Files
b2txt25/language_model/tools/wav2dur.py

27 lines
660 B
Python
Raw Normal View History

2025-07-02 12:18:09 -07:00
#!/usr/bin/env python3
# encoding: utf-8
import sys
import torchaudio
torchaudio.set_audio_backend("sox_io")
scp = sys.argv[1]
dur_scp = sys.argv[2]
with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
cnt = 0
total_duration = 0
for l in f:
items = l.strip().split()
wav_id = items[0]
fname = items[1]
cnt += 1
waveform, rate = torchaudio.load(fname)
frames = len(waveform[0])
duration = frames / float(rate)
total_duration += duration
fout.write('{} {}\n'.format(wav_id, duration))
print('process {} utts'.format(cnt))
print('total {} s'.format(total_duration))