73 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			73 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | #!/usr/bin/env python3 | ||
|  | """
 | ||
|  | 检查音素数据集文件的内容和统计信息 | ||
|  | """
 | ||
|  | 
 | ||
|  | import pickle | ||
|  | import os | ||
|  | 
 | ||
|  | def inspect_phoneme_dataset(file_path): | ||
|  |     """检查音素数据集文件""" | ||
|  | 
 | ||
|  |     if not os.path.exists(file_path): | ||
|  |         print(f"文件不存在: {file_path}") | ||
|  |         return | ||
|  | 
 | ||
|  |     print(f"正在加载文件: {file_path}") | ||
|  | 
 | ||
|  |     try: | ||
|  |         with open(file_path, 'rb') as f: | ||
|  |             dataset = pickle.load(f) | ||
|  | 
 | ||
|  |         print(f"\n=== 数据集统计信息 ===") | ||
|  |         print(f"音素类型数量: {len(dataset)}") | ||
|  | 
 | ||
|  |         total_segments = 0 | ||
|  |         print(f"\n各音素片段数量:") | ||
|  | 
 | ||
|  |         for phoneme, segments in dataset.items(): | ||
|  |             segment_count = len(segments) | ||
|  |             total_segments += segment_count | ||
|  |             print(f"  {phoneme}: {segment_count} 个片段") | ||
|  | 
 | ||
|  |         print(f"\n总片段数: {total_segments}") | ||
|  | 
 | ||
|  |         # 查看第一个音素的第一个片段示例 | ||
|  |         if dataset: | ||
|  |             first_phoneme = list(dataset.keys())[2] | ||
|  |             first_segment = dataset[first_phoneme][0] | ||
|  | 
 | ||
|  |             print(f"\n=== 数据片段示例 (音素: {first_phoneme}) ===") | ||
|  |             for key, value in first_segment.items(): | ||
|  |                 if key == 'neural_features': | ||
|  |                     print(f"  {key}: shape {value.shape}, dtype {value.dtype}") | ||
|  |                 else: | ||
|  |                     print(f"  {key}: {value}") | ||
|  | 
 | ||
|  |     except Exception as e: | ||
|  |         print(f"加载文件时出错: {e}") | ||
|  | 
 | ||
|  | if __name__ == "__main__": | ||
|  |     # 检查指定的音素数据集文件 | ||
|  |     file_path = "./data_analyse/phoneme_segmented_data/phoneme_dataset_20251008_233045.pkl" | ||
|  | 
 | ||
|  |     if os.path.exists(file_path): | ||
|  |         inspect_phoneme_dataset(file_path) | ||
|  |     else: | ||
|  |         print(f"文件不存在: {file_path}") | ||
|  | 
 | ||
|  |         # 尝试查找其他可能的位置 | ||
|  |         possible_dirs = [ | ||
|  |             "./phoneme_segmented_data", | ||
|  |             "../phoneme_segmented_data", | ||
|  |             "../../phoneme_segmented_data" | ||
|  |         ] | ||
|  | 
 | ||
|  |         for data_dir in possible_dirs: | ||
|  |             if os.path.exists(data_dir): | ||
|  |                 print(f"\n在 {data_dir} 中找到以下文件:") | ||
|  |                 files = os.listdir(data_dir) | ||
|  |                 for f in files: | ||
|  |                     if f.endswith('.pkl'): | ||
|  |                         print(f"  {f}") | ||
|  |                 break |