73 lines
2.2 KiB
Python
73 lines
2.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
检查音素数据集文件的内容和统计信息
|
|
"""
|
|
|
|
import pickle
|
|
import os
|
|
|
|
def inspect_phoneme_dataset(file_path):
|
|
"""检查音素数据集文件"""
|
|
|
|
if not os.path.exists(file_path):
|
|
print(f"文件不存在: {file_path}")
|
|
return
|
|
|
|
print(f"正在加载文件: {file_path}")
|
|
|
|
try:
|
|
with open(file_path, 'rb') as f:
|
|
dataset = pickle.load(f)
|
|
|
|
print(f"\n=== 数据集统计信息 ===")
|
|
print(f"音素类型数量: {len(dataset)}")
|
|
|
|
total_segments = 0
|
|
print(f"\n各音素片段数量:")
|
|
|
|
for phoneme, segments in dataset.items():
|
|
segment_count = len(segments)
|
|
total_segments += segment_count
|
|
print(f" {phoneme}: {segment_count} 个片段")
|
|
|
|
print(f"\n总片段数: {total_segments}")
|
|
|
|
# 查看第一个音素的第一个片段示例
|
|
if dataset:
|
|
first_phoneme = list(dataset.keys())[2]
|
|
first_segment = dataset[first_phoneme][0]
|
|
|
|
print(f"\n=== 数据片段示例 (音素: {first_phoneme}) ===")
|
|
for key, value in first_segment.items():
|
|
if key == 'neural_features':
|
|
print(f" {key}: shape {value.shape}, dtype {value.dtype}")
|
|
else:
|
|
print(f" {key}: {value}")
|
|
|
|
except Exception as e:
|
|
print(f"加载文件时出错: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
# 检查指定的音素数据集文件
|
|
file_path = "./data_analyse/phoneme_segmented_data/phoneme_dataset_20251008_233045.pkl"
|
|
|
|
if os.path.exists(file_path):
|
|
inspect_phoneme_dataset(file_path)
|
|
else:
|
|
print(f"文件不存在: {file_path}")
|
|
|
|
# 尝试查找其他可能的位置
|
|
possible_dirs = [
|
|
"./phoneme_segmented_data",
|
|
"../phoneme_segmented_data",
|
|
"../../phoneme_segmented_data"
|
|
]
|
|
|
|
for data_dir in possible_dirs:
|
|
if os.path.exists(data_dir):
|
|
print(f"\n在 {data_dir} 中找到以下文件:")
|
|
files = os.listdir(data_dir)
|
|
for f in files:
|
|
if f.endswith('.pkl'):
|
|
print(f" {f}")
|
|
break |