35 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Bash
		
	
	
	
	
	
			
		
		
	
	
			35 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Bash
		
	
	
	
	
	
| #!/bin/bash
 | |
| 
 | |
| # Parameters
 | |
| #SBATCH --cpus-per-task=1
 | |
| #SBATCH --job-name=lm
 | |
| #SBATCH --mail-type=ALL
 | |
| #SBATCH --mem=32GB
 | |
| #SBATCH --nodes=1
 | |
| #SBATCH --ntasks-per-node=1
 | |
| #SBATCH --open-mode=append
 | |
| #SBATCH --partition=shenoy,owners,henderj
 | |
| #SBATCH --signal=USR1@120
 | |
| #SBATCH --time=2880
 | |
| 
 | |
| export PATH=$PATH:/oak/stanford/groups/shenoy/stfan/code/nptlrig2/LanguageModelDecoder/srilm-1.7.3/bin/i686-m64/
 | |
| ml gcc/10.1.0
 | |
| 
 | |
| . path.sh
 | |
| 
 | |
| lm_dir=lm_order_exp/5gram/data/local/lm/
 | |
| tgt_lang=lm_order_exp/5gram/data/lang_test
 | |
| 
 | |
| #ngram -prune 4e-11 -order 5 -lm $lm_dir/lm_orig.arpa -write-lm $lm_dir/lm_pruned_4e-11.arpa
 | |
| 
 | |
| cat ${lm_dir}/lm_pruned_4e-11.arpa | \
 | |
|    grep -v '<s> <s>' | \
 | |
|    grep -v '</s> <s>' | \
 | |
|    grep -v '</s> </s>' | \
 | |
|    grep -v -i '<unk>' | \
 | |
|    grep -v -i '<spoken_noise>' | \
 | |
|    arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
 | |
|    tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
 | |
|      --osymbols=$tgt_lang/words.txt  --keep_isymbols=false --keep_osymbols=false | \
 | |
|     fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G_pruned_4e-11.fst
 | 
