#!/usr/bin/env python3 import pickle from pathlib import Path def verify_alignment_fix(): """Verify that the alignment fix worked by checking the latest dataset""" # Load the latest dataset data_dir = Path("phoneme_segmented_data") latest_file = max(data_dir.glob("ctc_results_*.pkl"), key=lambda x: x.stat().st_mtime) print(f"Loading dataset: {latest_file}") # Load all batches from the results file all_trials = [] try: with open(latest_file, 'rb') as f: while True: try: batch = pickle.load(f) all_trials.extend(batch) except EOFError: break except Exception as e: print(f"Error loading dataset: {e}") return False print(f"Loaded {len(all_trials)} trials") # Check alignment in first 100 trials total_segments = 0 error_segments = 0 print(f"\n=== Checking alignment in first 100 trials ===") for trial_idx in range(min(100, len(all_trials))): trial_data = all_trials[trial_idx] alignment_info = trial_data.get('alignment_info', []) for segment_idx, (phoneme, start_time, end_time, confidence) in enumerate(alignment_info): total_segments += 1 if end_time < start_time: error_segments += 1 if error_segments <= 5: # Show first 5 errors print(f"āŒ ERROR Trial {trial_idx}, Segment {segment_idx}: '{phoneme}' has end={end_time} < start={start_time}") print(f"\n=== Results ===") print(f"Total segments checked: {total_segments}") print(f"Error segments: {error_segments}") if error_segments == 0: print("āœ… SUCCESS: No temporal ordering bugs found!") error_rate = 0.0 else: error_rate = (error_segments / total_segments) * 100 print(f"āŒ FAILED: {error_rate:.1f}% of segments have temporal ordering bugs") print(f"Error rate: {error_rate:.1f}%") return error_segments == 0 if __name__ == "__main__": success = verify_alignment_fix() if success: print("\nšŸŽ‰ ALIGNMENT FIX SUCCESSFUL! All temporal ordering issues resolved.") else: print("\nāš ļø Still has temporal ordering issues. Need further debugging.")