#!/usr/bin/env python3 import pickle import sys from pathlib import Path try: # Load the latest dataset data_dir = Path("phoneme_segmented_data") latest_file = max(data_dir.glob("ctc_results_*.pkl"), key=lambda x: x.stat().st_mtime) print(f"Loading dataset: {latest_file}") # Load all batches from the results file all_trials = [] with open(latest_file, 'rb') as f: while True: try: batch = pickle.load(f) all_trials.extend(batch) except EOFError: break print(f"Loaded {len(all_trials)} trials") # Check alignment in first 20 trials only total_segments = 0 error_segments = 0 for trial_idx in range(min(20, len(all_trials))): trial_data = all_trials[trial_idx] alignment_info = trial_data.get('alignment_info', []) for segment_idx, (phoneme, start_time, end_time, confidence) in enumerate(alignment_info): total_segments += 1 if end_time < start_time: error_segments += 1 print(f"ERROR: Trial {trial_idx}, Segment {segment_idx}: '{phoneme}' has end={end_time} < start={start_time}") print(f"Total segments checked: {total_segments}") print(f"Error segments: {error_segments}") if error_segments == 0: print("SUCCESS: No temporal ordering bugs found!") else: error_rate = (error_segments / total_segments) * 100 print(f"FAILED: {error_rate:.1f}% of segments have temporal ordering bugs") except Exception as e: print(f"Error: {e}") sys.exit(1)