#!/usr/bin/env python3 """ 检查音素数据集文件的内容和统计信息 """ import pickle import os def inspect_phoneme_dataset(file_path): """检查音素数据集文件""" if not os.path.exists(file_path): print(f"文件不存在: {file_path}") return print(f"正在加载文件: {file_path}") try: with open(file_path, 'rb') as f: dataset = pickle.load(f) print(f"\n=== 数据集统计信息 ===") print(f"音素类型数量: {len(dataset)}") total_segments = 0 print(f"\n各音素片段数量:") for phoneme, segments in dataset.items(): segment_count = len(segments) total_segments += segment_count print(f" {phoneme}: {segment_count} 个片段") print(f"\n总片段数: {total_segments}") # 查看第一个音素的第一个片段示例 if dataset: first_phoneme = list(dataset.keys())[2] first_segment = dataset[first_phoneme][0] print(f"\n=== 数据片段示例 (音素: {first_phoneme}) ===") for key, value in first_segment.items(): if key == 'neural_features': print(f" {key}: shape {value.shape}, dtype {value.dtype}") else: print(f" {key}: {value}") except Exception as e: print(f"加载文件时出错: {e}") if __name__ == "__main__": # 检查指定的音素数据集文件 file_path = "./data_analyse/phoneme_segmented_data/phoneme_dataset_20251008_233045.pkl" if os.path.exists(file_path): inspect_phoneme_dataset(file_path) else: print(f"文件不存在: {file_path}") # 尝试查找其他可能的位置 possible_dirs = [ "./phoneme_segmented_data", "../phoneme_segmented_data", "../../phoneme_segmented_data" ] for data_dir in possible_dirs: if os.path.exists(data_dir): print(f"\n在 {data_dir} 中找到以下文件:") files = os.listdir(data_dir) for f in files: if f.endswith('.pkl'): print(f" {f}") break