import pickle from pathlib import Path # Simple check of the latest dataset dataset_path = Path("phoneme_segmented_data/ctc_results_20251009_000024.pkl") print(f"Checking: {dataset_path}") print(f"File exists: {dataset_path.exists()}") if dataset_path.exists(): print(f"File size: {dataset_path.stat().st_size / (1024*1024):.1f} MB") try: with open(dataset_path, 'rb') as f: data = pickle.load(f) print(f"Data type: {type(data)}") print(f"Keys: {len(data) if isinstance(data, dict) else 'N/A'}") # Quick sample if isinstance(data, dict): sample_keys = list(data.keys())[:3] for key in sample_keys: if 'trial_' in str(key): trial = data[key] if 'alignment_info' in trial: print(f"Trial {key}: {len(trial['alignment_info'])} segments") # Check first few segments for temporal ordering for i, (phoneme, start, end, conf) in enumerate(trial['alignment_info'][:3]): status = "OK" if end >= start else "BUG" print(f" {phoneme}: {start}-{end} [{status}]") break print("✅ Dataset loaded successfully") except Exception as e: print(f"❌ Error: {e}") else: print("❌ Dataset file not found")