45 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Awk
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			45 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Awk
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/local/bin/gawk -f
 | |
| #
 | |
| # subtract-ppls --
 | |
| #	Subtracts text statistics (from -ppl output)
 | |
| #
 | |
| #	The first input file contains a total, from which subsequent stats are
 | |
| #	discounted.  The result is printed in a format compatible with -ppl.
 | |
| #
 | |
| # Copyright (c) 1995, SRI International.  All Rights Reserved
 | |
| #
 | |
| # $Header: /home/srilm/CVS/srilm/utils/src/subtract-ppls.gawk,v 1.2 1997/07/12 05:01:08 stolcke Exp $
 | |
| #
 | |
| /^file .*: .* sentences/ {
 | |
| 	if (ARGIND == 1) {
 | |
| 		totalsents = $3;
 | |
| 		totalwords = $5;
 | |
| 		totaloovs = $7;
 | |
| 	} else {
 | |
| 		totalsents -= $3;
 | |
| 		totalwords -= $5;
 | |
| 		totaloovs -= $7;
 | |
| 	}
 | |
| 
 | |
| 	getline;
 | |
| 
 | |
| 	if (ARGIND == 1) {
 | |
| 		zeroprobs = $1;
 | |
| 		totalprob = $4;
 | |
| 	} else {
 | |
| 		zeroprobs -= $1;
 | |
| 		totalprob -= $4;
 | |
| 	}
 | |
| }
 | |
| END {
 | |
| 	M_LN10 = 2.30258509299404568402;        # from <math.h>
 | |
| 
 | |
| 	ppl = exp (- M_LN10 * totalprob / \
 | |
| 			(totalwords - totaloovs - zeroprobs + totalsents));
 | |
| 
 | |
| 	printf "file TOTAL: %d sentences, %d words, %d OOVs\n", \
 | |
| 			totalsents, totalwords, totaloovs;
 | |
| 	printf "%d zeroprobs, logprob= %g ppl= %g\n", \
 | |
| 			zeroprobs, totalprob, ppl;
 | |
| }
 | 
