Files
b2txt25/language_model/srilm-1.7.3/sbin/compare-outputs
2025-07-02 12:18:09 -07:00

110 lines
2.4 KiB
Awk
Executable File

#!/usr/local/bin/gawk -f
#
# compare test outputs allowing for small numerical differences
#
# Copyright (c) 2003-2006 SRI International, 2012 Microsoft Corp. All Right Reserved.
#
# $Header: /home/srilm/CVS/srilm/sbin/compare-outputs,v 1.5 2017/07/20 03:20:03 stolcke Exp $
#
BEGIN {
numerical = "^([-+])?[0-9]*(.[0-9]*)?([Ee][-+]?[0-9][0-9]*)?[,]?$";
epsilon = 1e-5;
tolerance = 0.001;
}
function abs(x) {
return (x < 0) ? -x : x;
}
function same_words(w1, w2, eq_sign) {
# split words of the form STRING=NUMBER
eq_sign = index(w1, "=");
if (eq_sign > 0 && index(w2, "=") == eq_sign) {
return same_words(substr(w1, 1, eq_sign-1), substr(w2, 1, eq_sign-1)) \
&& same_words(substr(w1, eq_sign+1), substr(w2, eq_sign+1));
}
# ignore filename differences due to optional gzipping
gsub("\\.gz", "", w1);
gsub("\\.gz", "", w2);
# MSVC infinity values
if (w1 == "1.#INF") w1 = "inf";
if (w2 == "1.#INF") w2 = "inf";
if (w1 == "-1.#INF") w1 = "-inf";
if (w2 == "-1.#INF") w2 = "-inf";
# normalize case in infinity
if (tolower(w1) ~ /inf/) w1 = tolower(w1);
if (tolower(w2) ~ /inf/) w2 = tolower(w2);
if (w1 == w2) {
return 1;
} else if (w1 ~ numerical && w2 ~ numerical) {
# allow some slippage when comparing numerical values
# two values that are both very close to zero are ok
if (abs(w1) < epsilon && abs(w2) < epsilon) {
return 1;
# two values that differ by a small relative difference are ok
} else if (abs((w1 - w2)/(w1 + w2)) < tolerance) {
return 1;
}
}
return 0;
}
function same_contents(file1, file2) {
lineno = 0;
while ((getline line1 < file1) > 0) {
if ((getline line2 < file2) <= 0) {
print "premature EOF on " file2 > "/dev/stderr";
return 0;
}
lineno ++;
sub("
$", "", line1); # strip MSDOS EOL
n1 = split(line1, words1);
sub("
$", "", line2); # strip MSDOS EOL
n2 = split(line2, words2);
if (n1 != n2) {
print "line " lineno ": mismatched number of fields" \
> "/dev/stderr";
return 0;
}
for (i = 1; i <= n1; i ++) {
if (!same_words(words1[i], words2[i])) {
print "line " lineno ": words differ -- " \
words1[i] " != " words2[i] \
> "/dev/stderr";
return 0;
}
}
}
if ((getline line2 < file2) > 0) {
print "premature EOF on " file1 > "/dev/stderr";
return 0;
}
return 1;
}
BEGIN {
if (same_contents(ARGV[1], ARGV[2])) {
exit(0);
} else {
exit(1);
}
}