159 lines
4.9 KiB
Python
159 lines
4.9 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import pickle
|
|
import numpy as np
|
|
from pathlib import Path
|
|
|
|
def examine_latest_dataset():
|
|
"""Examine the latest processed dataset"""
|
|
|
|
data_dir = Path("phoneme_segmented_data")
|
|
|
|
# Find latest file
|
|
latest_file = data_dir / "ctc_results_20251009_000024.pkl"
|
|
|
|
if not latest_file.exists():
|
|
print(f"❌ File not found: {latest_file}")
|
|
return False
|
|
|
|
print(f"📁 Loading dataset: {latest_file}")
|
|
print(f"📏 File size: {latest_file.stat().st_size / (1024*1024):.1f} MB")
|
|
|
|
try:
|
|
with open(latest_file, 'rb') as f:
|
|
data = pickle.load(f)
|
|
|
|
print(f"✅ Successfully loaded dataset")
|
|
|
|
if isinstance(data, dict):
|
|
print(f"📊 Dataset keys: {list(data.keys())}")
|
|
|
|
# Look for trial data
|
|
trial_count = 0
|
|
temporal_errors = 0
|
|
total_segments = 0
|
|
|
|
for key, value in data.items():
|
|
if 'trial_' in str(key):
|
|
trial_count += 1
|
|
|
|
if isinstance(value, dict) and 'phoneme_segments' in value:
|
|
segments = value['phoneme_segments']
|
|
total_segments += len(segments)
|
|
|
|
for seg in segments:
|
|
if isinstance(seg, dict):
|
|
start_time = seg.get('start_time', 0)
|
|
end_time = seg.get('end_time', 0)
|
|
|
|
if end_time < start_time:
|
|
temporal_errors += 1
|
|
|
|
print(f"🔢 Trials processed: {trial_count}")
|
|
print(f"🔤 Total phoneme segments: {total_segments}")
|
|
print(f"⏰ Temporal ordering errors: {temporal_errors}")
|
|
|
|
if temporal_errors == 0:
|
|
print("✅ SUCCESS: No temporal ordering bugs found!")
|
|
return True
|
|
else:
|
|
print(f"❌ FAILED: Found {temporal_errors} temporal ordering bugs!")
|
|
return False
|
|
|
|
else:
|
|
print(f"❌ Unexpected data format: {type(data)}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error loading dataset: {e}")
|
|
return False
|
|
|
|
def test_alignment_logic():
|
|
"""Test the corrected alignment logic"""
|
|
|
|
print("\n=== Testing Alignment Logic ===")
|
|
|
|
# Simple manual test of the alignment logic
|
|
# Simulate what the fixed code should do
|
|
|
|
# Test case: sequence [1, 2, 1] at times [0, 1, 2]
|
|
# This should create segments that don't overlap incorrectly
|
|
|
|
test_cases = [
|
|
{
|
|
"sequence": [1, 2, 1],
|
|
"path": [0, 1, 2],
|
|
"description": "Simple case"
|
|
},
|
|
{
|
|
"sequence": [1, 1, 2],
|
|
"path": [0, 1, 3],
|
|
"description": "Repeated phoneme"
|
|
}
|
|
]
|
|
|
|
all_valid = True
|
|
|
|
for case in test_cases:
|
|
print(f"\nTesting: {case['description']}")
|
|
sequence = case['sequence']
|
|
path = case['path']
|
|
|
|
# Simulate the corrected segment creation
|
|
segments = []
|
|
current_phoneme = None
|
|
start_time = None
|
|
|
|
for i, (phoneme, time_idx) in enumerate(zip(sequence, path)):
|
|
if phoneme != current_phoneme:
|
|
# End previous segment
|
|
if current_phoneme is not None:
|
|
end_time = path[i-1]
|
|
segments.append({
|
|
'phoneme': current_phoneme,
|
|
'start_time': start_time,
|
|
'end_time': end_time
|
|
})
|
|
|
|
# Start new segment
|
|
current_phoneme = phoneme
|
|
start_time = time_idx
|
|
|
|
# Close final segment
|
|
if current_phoneme is not None:
|
|
segments.append({
|
|
'phoneme': current_phoneme,
|
|
'start_time': start_time,
|
|
'end_time': path[-1]
|
|
})
|
|
|
|
# Check temporal ordering
|
|
case_valid = True
|
|
for seg in segments:
|
|
start = seg['start_time']
|
|
end = seg['end_time']
|
|
status = "✅" if end >= start else "❌ BUG!"
|
|
if end < start:
|
|
case_valid = False
|
|
all_valid = False
|
|
|
|
print(f" Phoneme {seg['phoneme']}: {start}-{end} {status}")
|
|
|
|
print(f" Result: {'✅ PASS' if case_valid else '❌ FAIL'}")
|
|
|
|
return all_valid
|
|
|
|
if __name__ == "__main__":
|
|
print("=== CTC Alignment Verification ===\n")
|
|
|
|
dataset_ok = examine_latest_dataset()
|
|
logic_ok = test_alignment_logic()
|
|
|
|
print(f"\n=== FINAL RESULTS ===")
|
|
print(f"Dataset check: {'✅ PASS' if dataset_ok else '❌ FAIL'}")
|
|
print(f"Logic test: {'✅ PASS' if logic_ok else '❌ FAIL'}")
|
|
|
|
if dataset_ok and logic_ok:
|
|
print(f"\n🎉 VERIFICATION SUCCESSFUL: Bug fix appears to be working!")
|
|
else:
|
|
print(f"\n❌ VERIFICATION FAILED: Issues detected") |