competition update
This commit is contained in:
37
language_model/tools/cmvn_kaldi2json.py
Executable file
37
language_model/tools/cmvn_kaldi2json.py
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import logging
|
||||
import sys
|
||||
import json
|
||||
|
||||
def kaldi2json(kaldi_cmvn_file):
|
||||
means = []
|
||||
variance = []
|
||||
with open(kaldi_cmvn_file, 'r') as fid:
|
||||
# kaldi binary file start with '\0B'
|
||||
if fid.read(2) == '\0B':
|
||||
logging.error('kaldi cmvn binary file is not supported, please '
|
||||
'recompute it by: compute-cmvn-stats --binary=false '
|
||||
' scp:feats.scp global_cmvn')
|
||||
sys.exit(1)
|
||||
fid.seek(0)
|
||||
arr = fid.read().split()
|
||||
assert (arr[0] == '[')
|
||||
assert (arr[-2] == '0')
|
||||
assert (arr[-1] == ']')
|
||||
feat_dim = int((len(arr) - 2 - 2) / 2)
|
||||
for i in range(1, feat_dim + 1):
|
||||
means.append(float(arr[i]))
|
||||
count = float(arr[feat_dim + 1])
|
||||
for i in range(feat_dim + 2, 2 * feat_dim + 2):
|
||||
variance.append(float(arr[i]))
|
||||
|
||||
cmvn_info = {'mean_stat:' : means,
|
||||
'var_stat' : variance,
|
||||
'frame_num' : count}
|
||||
return cmvn_info
|
||||
|
||||
if __name__ == '__main__':
|
||||
with open(sys.argv[2], 'w') as fout:
|
||||
cmvn = kaldi2json(sys.argv[1])
|
||||
fout.write(json.dumps(cmvn))
|
||||
Reference in New Issue
Block a user