52 lines
1.6 KiB
Python
52 lines
1.6 KiB
Python
![]() |
#!/usr/bin/env python3
|
||
|
|
||
|
import pickle
|
||
|
import sys
|
||
|
from pathlib import Path
|
||
|
|
||
|
try:
|
||
|
# Load the latest dataset
|
||
|
data_dir = Path("phoneme_segmented_data")
|
||
|
latest_file = max(data_dir.glob("ctc_results_*.pkl"), key=lambda x: x.stat().st_mtime)
|
||
|
|
||
|
print(f"Loading dataset: {latest_file}")
|
||
|
|
||
|
# Load all batches from the results file
|
||
|
all_trials = []
|
||
|
with open(latest_file, 'rb') as f:
|
||
|
while True:
|
||
|
try:
|
||
|
batch = pickle.load(f)
|
||
|
all_trials.extend(batch)
|
||
|
except EOFError:
|
||
|
break
|
||
|
|
||
|
print(f"Loaded {len(all_trials)} trials")
|
||
|
|
||
|
# Check alignment in first 20 trials only
|
||
|
total_segments = 0
|
||
|
error_segments = 0
|
||
|
|
||
|
for trial_idx in range(min(20, len(all_trials))):
|
||
|
trial_data = all_trials[trial_idx]
|
||
|
alignment_info = trial_data.get('alignment_info', [])
|
||
|
|
||
|
for segment_idx, (phoneme, start_time, end_time, confidence) in enumerate(alignment_info):
|
||
|
total_segments += 1
|
||
|
|
||
|
if end_time < start_time:
|
||
|
error_segments += 1
|
||
|
print(f"ERROR: Trial {trial_idx}, Segment {segment_idx}: '{phoneme}' has end={end_time} < start={start_time}")
|
||
|
|
||
|
print(f"Total segments checked: {total_segments}")
|
||
|
print(f"Error segments: {error_segments}")
|
||
|
|
||
|
if error_segments == 0:
|
||
|
print("SUCCESS: No temporal ordering bugs found!")
|
||
|
else:
|
||
|
error_rate = (error_segments / total_segments) * 100
|
||
|
print(f"FAILED: {error_rate:.1f}% of segments have temporal ordering bugs")
|
||
|
|
||
|
except Exception as e:
|
||
|
print(f"Error: {e}")
|
||
|
sys.exit(1)
|