b2txt25/language_model/srilm-1.7.3/sbin/compare-outputs

#!/usr/local/bin/gawk -f
#
# compare test outputs allowing for small numerical differences
#
# Copyright (c) 2003-2006 SRI International, 2012 Microsoft Corp. All Right Reserved.
#
# $Header: /home/srilm/CVS/srilm/sbin/compare-outputs,v 1.5 2017/07/20 03:20:03 stolcke Exp $
#

BEGIN {
	numerical = "^([-+])?[0-9]*(.[0-9]*)?([Ee][-+]?[0-9][0-9]*)?[,]?$";

	epsilon = 1e-5;
	tolerance = 0.001;
}

function abs(x) {
	return (x < 0) ? -x : x;
}
		
function same_words(w1, w2, eq_sign) {

	# split words of the form STRING=NUMBER
	eq_sign = index(w1, "=");
	if (eq_sign > 0 && index(w2, "=") == eq_sign) {
		return same_words(substr(w1, 1, eq_sign-1), substr(w2, 1, eq_sign-1)) \
		   && same_words(substr(w1, eq_sign+1), substr(w2, eq_sign+1));
	}

	# ignore filename differences due to optional gzipping
	gsub("\\.gz", "", w1);
	gsub("\\.gz", "", w2);

	# MSVC infinity values
	if (w1 == "1.#INF") w1 = "inf";
	if (w2 == "1.#INF") w2 = "inf";
	if (w1 == "-1.#INF") w1 = "-inf";
	if (w2 == "-1.#INF") w2 = "-inf";

	# normalize case in infinity
	if (tolower(w1) ~ /inf/) w1 = tolower(w1);
	if (tolower(w2) ~ /inf/) w2 = tolower(w2);

	if (w1 == w2) {
		return 1;
	} else if (w1 ~ numerical && w2 ~ numerical) {
		# allow some slippage when comparing numerical values 

		# two values that are both very close to zero are ok
		if (abs(w1) < epsilon && abs(w2) < epsilon) {
			return 1; 
		# two values that differ by a small relative difference are ok
		} else if (abs((w1 - w2)/(w1 + w2)) < tolerance) {
			return 1;
		}
	}
	return 0;
}

function same_contents(file1, file2) {

	lineno = 0;

	while ((getline line1 < file1) > 0) {
		if ((getline line2 < file2) <= 0) {
			print "premature EOF on " file2 > "/dev/stderr";
			return 0;
		}

		lineno ++;

		sub("
$", "", line1);		# strip MSDOS EOL
		n1 = split(line1, words1);

		sub("
$", "", line2);		# strip MSDOS EOL
		n2 = split(line2, words2);

		if (n1 != n2) {
			print "line " lineno ": mismatched number of fields" \
								> "/dev/stderr";
			return 0;
		}

		for (i = 1; i <= n1; i ++) {
			if (!same_words(words1[i], words2[i])) {
				print "line " lineno ": words differ -- " \
					words1[i] " != " words2[i] \
								> "/dev/stderr";
				return 0;
			}
		}
	}

	if ((getline line2 < file2) > 0) {
		print "premature EOF on " file1 > "/dev/stderr";
		return 0;
	}

	return 1;
}

BEGIN {
	if (same_contents(ARGV[1], ARGV[2])) {
		exit(0);
	} else {
		exit(1);
	}
}
competition update 2025-07-02 12:18:09 -07:00			`#!/usr/local/bin/gawk -f`
			`#`
			`# compare test outputs allowing for small numerical differences`
			`#`
			`# Copyright (c) 2003-2006 SRI International, 2012 Microsoft Corp. All Right Reserved.`
			`#`
			`# $Header: /home/srilm/CVS/srilm/sbin/compare-outputs,v 1.5 2017/07/20 03:20:03 stolcke Exp $`
			`#`

			`BEGIN {`
			`numerical = "^([-+])?[0-9](.[0-9])?([Ee][-+]?[0-9][0-9]*)?[,]?$";`

			`epsilon = 1e-5;`
			`tolerance = 0.001;`
			`}`

			`function abs(x) {`
			`return (x < 0) ? -x : x;`
			`}`

			`function same_words(w1, w2, eq_sign) {`

			`# split words of the form STRING=NUMBER`
			`eq_sign = index(w1, "=");`
			`if (eq_sign > 0 && index(w2, "=") == eq_sign) {`
			`return same_words(substr(w1, 1, eq_sign-1), substr(w2, 1, eq_sign-1)) \`
			`&& same_words(substr(w1, eq_sign+1), substr(w2, eq_sign+1));`
			`}`

			`# ignore filename differences due to optional gzipping`
			`gsub("\\.gz", "", w1);`
			`gsub("\\.gz", "", w2);`

			`# MSVC infinity values`
			`if (w1 == "1.#INF") w1 = "inf";`
			`if (w2 == "1.#INF") w2 = "inf";`
			`if (w1 == "-1.#INF") w1 = "-inf";`
			`if (w2 == "-1.#INF") w2 = "-inf";`

			`# normalize case in infinity`
			`if (tolower(w1) ~ /inf/) w1 = tolower(w1);`
			`if (tolower(w2) ~ /inf/) w2 = tolower(w2);`

			`if (w1 == w2) {`
			`return 1;`
			`} else if (w1 ~ numerical && w2 ~ numerical) {`
			`# allow some slippage when comparing numerical values`

			`# two values that are both very close to zero are ok`
			`if (abs(w1) < epsilon && abs(w2) < epsilon) {`
			`return 1;`
			`# two values that differ by a small relative difference are ok`
			`} else if (abs((w1 - w2)/(w1 + w2)) < tolerance) {`
			`return 1;`
			`}`
			`}`
			`return 0;`
			`}`

			`function same_contents(file1, file2) {`

			`lineno = 0;`

			`while ((getline line1 < file1) > 0) {`
			`if ((getline line2 < file2) <= 0) {`
			`print "premature EOF on " file2 > "/dev/stderr";`
			`return 0;`
			`}`

			`lineno ++;`

			`sub(" $", "", line1); # strip MSDOS EOL`
			`n1 = split(line1, words1);`

			`sub(" $", "", line2); # strip MSDOS EOL`
			`n2 = split(line2, words2);`

			`if (n1 != n2) {`
			`print "line " lineno ": mismatched number of fields" \`
			`> "/dev/stderr";`
			`return 0;`
			`}`

			`for (i = 1; i <= n1; i ++) {`
			`if (!same_words(words1[i], words2[i])) {`
			`print "line " lineno ": words differ -- " \`
			`words1[i] " != " words2[i] \`
			`> "/dev/stderr";`
			`return 0;`
			`}`
			`}`
			`}`

			`if ((getline line2 < file2) > 0) {`
			`print "premature EOF on " file1 > "/dev/stderr";`
			`return 0;`
			`}`

			`return 1;`
			`}`

			`BEGIN {`
			`if (same_contents(ARGV[1], ARGV[2])) {`
			`exit(0);`
			`} else {`
			`exit(1);`
			`}`
			`}`