b2txt25/data_analyse/verify_results.py

#!/usr/bin/env python3

import pickle
import numpy as np
from pathlib import Path

def examine_latest_dataset():
    """Examine the latest processed dataset"""

    data_dir = Path("phoneme_segmented_data")

    # Find latest file
    latest_file = data_dir / "ctc_results_20251009_000024.pkl"

    if not latest_file.exists():
        print(f"❌ File not found: {latest_file}")
        return False

    print(f"📁 Loading dataset: {latest_file}")
    print(f"📏 File size: {latest_file.stat().st_size / (1024*1024):.1f} MB")

    try:
        with open(latest_file, 'rb') as f:
            data = pickle.load(f)

        print(f"✅ Successfully loaded dataset")

        if isinstance(data, dict):
            print(f"📊 Dataset keys: {list(data.keys())}")

            # Look for trial data
            trial_count = 0
            temporal_errors = 0
            total_segments = 0

            for key, value in data.items():
                if 'trial_' in str(key):
                    trial_count += 1

                    if isinstance(value, dict) and 'phoneme_segments' in value:
                        segments = value['phoneme_segments']
                        total_segments += len(segments)

                        for seg in segments:
                            if isinstance(seg, dict):
                                start_time = seg.get('start_time', 0)
                                end_time = seg.get('end_time', 0)

                                if end_time < start_time:
                                    temporal_errors += 1

            print(f"🔢 Trials processed: {trial_count}")
            print(f"🔤 Total phoneme segments: {total_segments}")
            print(f"⏰ Temporal ordering errors: {temporal_errors}")

            if temporal_errors == 0:
                print("✅ SUCCESS: No temporal ordering bugs found!")
                return True
            else:
                print(f"❌ FAILED: Found {temporal_errors} temporal ordering bugs!")
                return False

        else:
            print(f"❌ Unexpected data format: {type(data)}")
            return False

    except Exception as e:
        print(f"❌ Error loading dataset: {e}")
        return False

def test_alignment_logic():
    """Test the corrected alignment logic"""

    print("\n=== Testing Alignment Logic ===")

    # Simple manual test of the alignment logic
    # Simulate what the fixed code should do

    # Test case: sequence [1, 2, 1] at times [0, 1, 2]
    # This should create segments that don't overlap incorrectly

    test_cases = [
        {
            "sequence": [1, 2, 1],
            "path": [0, 1, 2],
            "description": "Simple case"
        },
        {
            "sequence": [1, 1, 2],
            "path": [0, 1, 3],
            "description": "Repeated phoneme"
        }
    ]

    all_valid = True

    for case in test_cases:
        print(f"\nTesting: {case['description']}")
        sequence = case['sequence']
        path = case['path']

        # Simulate the corrected segment creation
        segments = []
        current_phoneme = None
        start_time = None

        for i, (phoneme, time_idx) in enumerate(zip(sequence, path)):
            if phoneme != current_phoneme:
                # End previous segment
                if current_phoneme is not None:
                    end_time = path[i-1]
                    segments.append({
                        'phoneme': current_phoneme,
                        'start_time': start_time,
                        'end_time': end_time
                    })

                # Start new segment
                current_phoneme = phoneme
                start_time = time_idx

        # Close final segment
        if current_phoneme is not None:
            segments.append({
                'phoneme': current_phoneme,
                'start_time': start_time,
                'end_time': path[-1]
            })

        # Check temporal ordering
        case_valid = True
        for seg in segments:
            start = seg['start_time']
            end = seg['end_time']
            status = "✅" if end >= start else "❌ BUG!"
            if end < start:
                case_valid = False
                all_valid = False

            print(f"  Phoneme {seg['phoneme']}: {start}-{end} {status}")

        print(f"  Result: {'✅ PASS' if case_valid else '❌ FAIL'}")

    return all_valid

if __name__ == "__main__":
    print("=== CTC Alignment Verification ===\n")

    dataset_ok = examine_latest_dataset()
    logic_ok = test_alignment_logic()

    print(f"\n=== FINAL RESULTS ===")
    print(f"Dataset check: {'✅ PASS' if dataset_ok else '❌ FAIL'}")
    print(f"Logic test:   {'✅ PASS' if logic_ok else '❌ FAIL'}")

    if dataset_ok and logic_ok:
        print(f"\n🎉 VERIFICATION SUCCESSFUL: Bug fix appears to be working!")
    else:
        print(f"\n❌ VERIFICATION FAILED: Issues detected")
备份1 2025-10-12 09:11:32 +08:00			`#!/usr/bin/env python3`

			`import pickle`
			`import numpy as np`
			`from pathlib import Path`

			`def examine_latest_dataset():`
			`"""Examine the latest processed dataset"""`

			`data_dir = Path("phoneme_segmented_data")`

			`# Find latest file`
			`latest_file = data_dir / "ctc_results_20251009_000024.pkl"`

			`if not latest_file.exists():`
			`print(f"❌ File not found: {latest_file}")`
			`return False`

			`print(f"📁 Loading dataset: {latest_file}")`
			`print(f"📏 File size: {latest_file.stat().st_size / (1024*1024):.1f} MB")`

			`try:`
			`with open(latest_file, 'rb') as f:`
			`data = pickle.load(f)`

			`print(f"✅ Successfully loaded dataset")`

			`if isinstance(data, dict):`
			`print(f"📊 Dataset keys: {list(data.keys())}")`

			`# Look for trial data`
			`trial_count = 0`
			`temporal_errors = 0`
			`total_segments = 0`

			`for key, value in data.items():`
			`if 'trial_' in str(key):`
			`trial_count += 1`

			`if isinstance(value, dict) and 'phoneme_segments' in value:`
			`segments = value['phoneme_segments']`
			`total_segments += len(segments)`

			`for seg in segments:`
			`if isinstance(seg, dict):`
			`start_time = seg.get('start_time', 0)`
			`end_time = seg.get('end_time', 0)`

			`if end_time < start_time:`
			`temporal_errors += 1`

			`print(f"🔢 Trials processed: {trial_count}")`
			`print(f"🔤 Total phoneme segments: {total_segments}")`
			`print(f"⏰ Temporal ordering errors: {temporal_errors}")`

			`if temporal_errors == 0:`
			`print("✅ SUCCESS: No temporal ordering bugs found!")`
			`return True`
			`else:`
			`print(f"❌ FAILED: Found {temporal_errors} temporal ordering bugs!")`
			`return False`

			`else:`
			`print(f"❌ Unexpected data format: {type(data)}")`
			`return False`

			`except Exception as e:`
			`print(f"❌ Error loading dataset: {e}")`
			`return False`

			`def test_alignment_logic():`
			`"""Test the corrected alignment logic"""`

			`print("\n=== Testing Alignment Logic ===")`

			`# Simple manual test of the alignment logic`
			`# Simulate what the fixed code should do`

			`# Test case: sequence [1, 2, 1] at times [0, 1, 2]`
			`# This should create segments that don't overlap incorrectly`

			`test_cases = [`
			`{`
			`"sequence": [1, 2, 1],`
			`"path": [0, 1, 2],`
			`"description": "Simple case"`
			`},`
			`{`
			`"sequence": [1, 1, 2],`
			`"path": [0, 1, 3],`
			`"description": "Repeated phoneme"`
			`}`
			`]`

			`all_valid = True`

			`for case in test_cases:`
			`print(f"\nTesting: {case['description']}")`
			`sequence = case['sequence']`
			`path = case['path']`

			`# Simulate the corrected segment creation`
			`segments = []`
			`current_phoneme = None`
			`start_time = None`

			`for i, (phoneme, time_idx) in enumerate(zip(sequence, path)):`
			`if phoneme != current_phoneme:`
			`# End previous segment`
			`if current_phoneme is not None:`
			`end_time = path[i-1]`
			`segments.append({`
			`'phoneme': current_phoneme,`
			`'start_time': start_time,`
			`'end_time': end_time`
			`})`

			`# Start new segment`
			`current_phoneme = phoneme`
			`start_time = time_idx`

			`# Close final segment`
			`if current_phoneme is not None:`
			`segments.append({`
			`'phoneme': current_phoneme,`
			`'start_time': start_time,`
			`'end_time': path[-1]`
			`})`

			`# Check temporal ordering`
			`case_valid = True`
			`for seg in segments:`
			`start = seg['start_time']`
			`end = seg['end_time']`
			`status = "✅" if end >= start else "❌ BUG!"`
			`if end < start:`
			`case_valid = False`
			`all_valid = False`

			`print(f" Phoneme {seg['phoneme']}: {start}-{end} {status}")`

			`print(f" Result: {'✅ PASS' if case_valid else '❌ FAIL'}")`

			`return all_valid`

			`if __name__ == "__main__":`
			`print("=== CTC Alignment Verification ===\n")`

			`dataset_ok = examine_latest_dataset()`
			`logic_ok = test_alignment_logic()`

			`print(f"\n=== FINAL RESULTS ===")`
			`print(f"Dataset check: {'✅ PASS' if dataset_ok else '❌ FAIL'}")`
			`print(f"Logic test: {'✅ PASS' if logic_ok else '❌ FAIL'}")`

			`if dataset_ok and logic_ok:`
			`print(f"\n🎉 VERIFICATION SUCCESSFUL: Bug fix appears to be working!")`
			`else:`
			`print(f"\n❌ VERIFICATION FAILED: Issues detected")`