39 lines
1.4 KiB
Python
39 lines
1.4 KiB
Python
![]() |
import pickle
|
||
|
from pathlib import Path
|
||
|
|
||
|
# Simple check of the latest dataset
|
||
|
dataset_path = Path("phoneme_segmented_data/ctc_results_20251009_000024.pkl")
|
||
|
print(f"Checking: {dataset_path}")
|
||
|
print(f"File exists: {dataset_path.exists()}")
|
||
|
|
||
|
if dataset_path.exists():
|
||
|
print(f"File size: {dataset_path.stat().st_size / (1024*1024):.1f} MB")
|
||
|
|
||
|
try:
|
||
|
with open(dataset_path, 'rb') as f:
|
||
|
data = pickle.load(f)
|
||
|
|
||
|
print(f"Data type: {type(data)}")
|
||
|
print(f"Keys: {len(data) if isinstance(data, dict) else 'N/A'}")
|
||
|
|
||
|
# Quick sample
|
||
|
if isinstance(data, dict):
|
||
|
sample_keys = list(data.keys())[:3]
|
||
|
for key in sample_keys:
|
||
|
if 'trial_' in str(key):
|
||
|
trial = data[key]
|
||
|
if 'alignment_info' in trial:
|
||
|
print(f"Trial {key}: {len(trial['alignment_info'])} segments")
|
||
|
|
||
|
# Check first few segments for temporal ordering
|
||
|
for i, (phoneme, start, end, conf) in enumerate(trial['alignment_info'][:3]):
|
||
|
status = "OK" if end >= start else "BUG"
|
||
|
print(f" {phoneme}: {start}-{end} [{status}]")
|
||
|
break
|
||
|
|
||
|
print("✅ Dataset loaded successfully")
|
||
|
|
||
|
except Exception as e:
|
||
|
print(f"❌ Error: {e}")
|
||
|
else:
|
||
|
print("❌ Dataset file not found")
|