27 lines
		
	
	
		
			660 B
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			27 lines
		
	
	
		
			660 B
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | #!/usr/bin/env python3 | ||
|  | # encoding: utf-8 | ||
|  | 
 | ||
|  | import sys | ||
|  | 
 | ||
|  | import torchaudio | ||
|  | torchaudio.set_audio_backend("sox_io") | ||
|  | 
 | ||
|  | scp = sys.argv[1] | ||
|  | dur_scp = sys.argv[2] | ||
|  | 
 | ||
|  | with open(scp, 'r') as f, open(dur_scp, 'w') as fout: | ||
|  |     cnt = 0 | ||
|  |     total_duration = 0 | ||
|  |     for l in f: | ||
|  |         items = l.strip().split() | ||
|  |         wav_id = items[0] | ||
|  |         fname = items[1] | ||
|  |         cnt += 1 | ||
|  |         waveform, rate = torchaudio.load(fname) | ||
|  |         frames = len(waveform[0]) | ||
|  |         duration = frames / float(rate) | ||
|  |         total_duration += duration | ||
|  |         fout.write('{} {}\n'.format(wav_id, duration)) | ||
|  |     print('process {} utts'.format(cnt)) | ||
|  |     print('total {} s'.format(total_duration)) |