#!/usr/bin/env python3 """ 检查转换后的音素数据集的结构 """ import pickle def check_data_structure(pkl_path): """检查PKL文件的数据结构""" with open(pkl_path, 'rb') as f: data = pickle.load(f) print("=== 数据结构分析 ===") print(f"数据类型: {type(data)}") print(f"顶层键: {list(data.keys())}") # 检查phoneme_data if 'phoneme_data' in data: phoneme_data = data['phoneme_data'] print(f"\nphoneme_data类型: {type(phoneme_data)}") print(f"音素数量: {len(phoneme_data)}") print(f"音素列表: {list(phoneme_data.keys())[:10]}...") # 检查第一个音素的结构 first_phoneme = list(phoneme_data.keys())[0] segments = phoneme_data[first_phoneme] print(f"\n第一个音素 '{first_phoneme}':") print(f" segments类型: {type(segments)}") print(f" segments数量: {len(segments)}") if len(segments) > 0: first_segment = segments[0] print(f" 第一个segment类型: {type(first_segment)}") print(f" 第一个segment键: {list(first_segment.keys())}") # 显示segment的详细内容 print(f" 第一个segment内容:") for key, value in first_segment.items(): if key == 'original_timestamps': print(f" {key}: {type(value)}") if isinstance(value, dict): for ts_key, ts_value in value.items(): print(f" {ts_key}: {ts_value}") else: print(f" {key}: {value}") # 检查conversion_info if 'conversion_info' in data: conversion_info = data['conversion_info'] print(f"\nconversion_info:") for key, value in conversion_info.items(): print(f" {key}: {value}") if __name__ == "__main__": pkl_path = "../phoneme_segmented_data/phoneme_dataset_20251009_202457_with_original_timestamps.pkl" check_data_structure(pkl_path)