Files
b2txt25/data_analyse/inspect_phoneme_data.py

73 lines
2.2 KiB
Python
Raw Normal View History

2025-10-12 09:11:32 +08:00
#!/usr/bin/env python3
"""
检查音素数据集文件的内容和统计信息
"""
import pickle
import os
def inspect_phoneme_dataset(file_path):
"""检查音素数据集文件"""
if not os.path.exists(file_path):
print(f"文件不存在: {file_path}")
return
print(f"正在加载文件: {file_path}")
try:
with open(file_path, 'rb') as f:
dataset = pickle.load(f)
print(f"\n=== 数据集统计信息 ===")
print(f"音素类型数量: {len(dataset)}")
total_segments = 0
print(f"\n各音素片段数量:")
for phoneme, segments in dataset.items():
segment_count = len(segments)
total_segments += segment_count
print(f" {phoneme}: {segment_count} 个片段")
print(f"\n总片段数: {total_segments}")
# 查看第一个音素的第一个片段示例
if dataset:
first_phoneme = list(dataset.keys())[2]
first_segment = dataset[first_phoneme][0]
print(f"\n=== 数据片段示例 (音素: {first_phoneme}) ===")
for key, value in first_segment.items():
if key == 'neural_features':
print(f" {key}: shape {value.shape}, dtype {value.dtype}")
else:
print(f" {key}: {value}")
except Exception as e:
print(f"加载文件时出错: {e}")
if __name__ == "__main__":
# 检查指定的音素数据集文件
file_path = "./data_analyse/phoneme_segmented_data/phoneme_dataset_20251008_233045.pkl"
if os.path.exists(file_path):
inspect_phoneme_dataset(file_path)
else:
print(f"文件不存在: {file_path}")
# 尝试查找其他可能的位置
possible_dirs = [
"./phoneme_segmented_data",
"../phoneme_segmented_data",
"../../phoneme_segmented_data"
]
for data_dir in possible_dirs:
if os.path.exists(data_dir):
print(f"\n{data_dir} 中找到以下文件:")
files = os.listdir(data_dir)
for f in files:
if f.endswith('.pkl'):
print(f" {f}")
break