Files
b2txt25/language_model/srilm-1.7.3/utils/src/subtract-ppls.gawk
2025-07-02 12:18:09 -07:00

45 lines
1.0 KiB
Awk
Executable File

#!/usr/local/bin/gawk -f
#
# subtract-ppls --
# Subtracts text statistics (from -ppl output)
#
# The first input file contains a total, from which subsequent stats are
# discounted. The result is printed in a format compatible with -ppl.
#
# Copyright (c) 1995, SRI International. All Rights Reserved
#
# $Header: /home/srilm/CVS/srilm/utils/src/subtract-ppls.gawk,v 1.2 1997/07/12 05:01:08 stolcke Exp $
#
/^file .*: .* sentences/ {
if (ARGIND == 1) {
totalsents = $3;
totalwords = $5;
totaloovs = $7;
} else {
totalsents -= $3;
totalwords -= $5;
totaloovs -= $7;
}
getline;
if (ARGIND == 1) {
zeroprobs = $1;
totalprob = $4;
} else {
zeroprobs -= $1;
totalprob -= $4;
}
}
END {
M_LN10 = 2.30258509299404568402; # from <math.h>
ppl = exp (- M_LN10 * totalprob / \
(totalwords - totaloovs - zeroprobs + totalsents));
printf "file TOTAL: %d sentences, %d words, %d OOVs\n", \
totalsents, totalwords, totaloovs;
printf "%d zeroprobs, logprob= %g ppl= %g\n", \
zeroprobs, totalprob, ppl;
}