competition update

This commit is contained in:
nckcard
2025-07-02 12:18:09 -07:00
parent 9e17716a4a
commit 77dbcf868f
2615 changed files with 1648116 additions and 125 deletions

View File

@@ -0,0 +1,23 @@
/*
* Boolean Type
*
* Copyright (c) 1995,2006 SRI International. All Rights Reserved.
*
* @(#)$Header: /home/srilm/CVS/srilm/misc/src/Boolean.h,v 1.5 2006/01/09 17:39:03 stolcke Exp $
*
*/
#ifndef _BOOLEAN_H_
#define _BOOLEAN_H_
#if defined(__GNUG__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_CC) || defined (_MSC_VER)
typedef bool Boolean;
#else /* ! __GNUG__ && !__INTEL_COMPILER && !__SUNPRO_CC && !_MSC_VER */
typedef int Boolean;
const Boolean false = 0;
const Boolean true = 1;
#endif /* __GNUG __ || __INTEL_COMPILER || __SUNPRO_CC || _MSC_VER */
#endif /* _BOOLEAN_H_ */

View File

@@ -0,0 +1,15 @@
/*
* Debug.cc --
* Generic debugging support
*
*/
#ifndef lint
static char Copyright[] = "Copyright (c) 1995, SRI International. All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/CVS/srilm/misc/src/Debug.cc,v 1.2 1996/05/30 17:57:48 stolcke Exp $";
#endif
#include "Debug.h"
unsigned Debug::debugAll = 0; /* global debugging level */

View File

@@ -0,0 +1,78 @@
/*
* Debug.h --
* General object debugging facility
*
* Debug is a Mix-in class that provides some simple, but consistent
* debugging output handling.
*
* Copyright (c) 1995-2010 SRI International. All Rights Reserved.
*
* @(#)$Header: /home/srilm/CVS/srilm/misc/src/Debug.h,v 1.8 2013/03/30 15:55:25 stolcke Exp $
*
*/
#ifndef _Debug_h_
#define _Debug_h_
#ifdef PRE_ISO_CXX
# include <iostream.h>
#else
# include <iostream>
using namespace std;
#endif
#include <Boolean.h>
/*
* Here is the typical usage for this mixin class.
* First, include it in the parents of some class FOO
*
* class FOO: public OTHER_PARENT, public FOO { ... }
*
* Inside FOO's methods use code such as
*
* if (debug(3)) {
* dout() << "I'm feeling sick today\n";
* }
*
* Finally, use that code, after setting the debugging level
* of the object and/or redirecting the debugging output.
*
* FOO foo;
* foo.debugme(4); foo.dout(cout);
*
* Debugging can also be set globally (to affect all objects of
* all classes.
*
* foo.debugall(1);
*
*/
class Debug
{
public:
Debug(unsigned level = 0)
: nodebug(false), debugLevel(level), debugStream(&cerr) {};
virtual ~Debug() {}; /* prevent warning about no virtual dtor */
Boolean debug(unsigned level) const /* true if debugging */
{ return (!nodebug && (debugAll >= level || debugLevel >= level)); };
virtual void debugme(unsigned level) { debugLevel = level; };
/* set object's debugging level */
void debugall(unsigned level) { debugAll = level; };
/* set global debugging level */
unsigned debuglevel() const { return debugLevel; };
virtual ostream &dout() const { return *debugStream; };
/* output stream for use with << */
virtual ostream &dout(ostream &stream) /* redirect debugging output */
{ debugStream = &stream; return stream; };
Boolean nodebug; /* temporarily disable debugging */
private:
static unsigned debugAll; /* global debugging level */
unsigned debugLevel; /* level of output -- the higher the more*/
ostream *debugStream; /* current debug output stream */
};
#endif /* _Debug_h_ */

View File

@@ -0,0 +1,807 @@
/*
* File.cc --
* File I/O for LM
*
*/
#ifndef lint
static char Copyright[] = "Copyright (c) 1995-2011 SRI International, 2012-2013 Andreas Stolcke, Microsoft Corp. All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/CVS/srilm/misc/src/File.cc,v 1.37 2019/09/09 23:13:15 stolcke Exp $";
#endif
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <errno.h>
#include "zio.h"
#include "Boolean.h"
#include "File.h"
#include "Array.cc"
#include "srilm_iconv.h"
#if defined(sgi) || defined(_MSC_VER) || defined(WIN32) || defined(linux) && defined(__INTEL_COMPILER) && __INTEL_COMPILER<=700
#define fseeko fseek
#define ftello ftell
#endif
/*
* Deal with different types of iconv() input buffer argument
*/
#if defined(sun) || defined(sgi)
# define ICONV_CONST const
#else
# define ICONV_CONST
#endif
const char *wordSeparators = " \t\r\n";
#define START_BUF_LEN 128 // needs to be > 2
#define iconvNone ((void *)-1)
File::File(const char *name, const char *mode, int exitOnError)
: name(name?strdup(name):0), lineno(0), exitOnError(exitOnError), skipComments(true),
fp(NULL), gzf(NULL), buffer((char *)malloc(START_BUF_LEN)), bufLen(START_BUF_LEN),
reuseBuffer(false), atFirstLine(true), encoding(ASCII), iconvID(iconvNone),
strFileLen(0), strFilePos(0), strFileActive(0)
{
assert(buffer != 0);
unsigned len = name?strlen(name):0;
if (len > sizeof(GZIP_SUFFIX)-1 &&
(strcmp(name + len - (sizeof(GZIP_SUFFIX)-1), GZIP_SUFFIX) == 0))
{
gzf = gzopen(name, mode);
} else if (name) {
fp = zopen(name, mode);
}
if (gzf == NULL && fp == NULL) {
if (exitOnError) {
perror(name);
exit(exitOnError);
}
}
strFile = "";
}
File::File(FILE *fp, int exitOnError)
: name(0), lineno(0), exitOnError(exitOnError), skipComments(true),
fp(fp), gzf(NULL), buffer((char *)malloc(START_BUF_LEN)), bufLen(START_BUF_LEN),
reuseBuffer(false), atFirstLine(true), encoding(ASCII), iconvID(iconvNone),
strFileLen(0), strFilePos(0), strFileActive(0)
{
assert(buffer != 0);
strFile = "";
}
File::File(const char *fileStr, size_t fileStrLen, int exitOnError, int reserved_length)
: name(0), lineno(0), exitOnError(exitOnError), skipComments(true),
fp(NULL), gzf(NULL), buffer((char *)malloc(START_BUF_LEN)), bufLen(START_BUF_LEN),
reuseBuffer(false), atFirstLine(true), encoding(ASCII), iconvID(iconvNone),
strFileLen(0), strFilePos(0), strFileActive(0)
{
assert(buffer != 0);
strFile = fileStr;
strFileLen = strFile.length();
strFileActive = 1;
// only reserve space if bigger than current capacity
if (reserved_length > strFileLen) strFile.reserve(reserved_length);
}
File::File(std::string& fileStr, int exitOnError, int reserved_length)
: name(0), lineno(0), exitOnError(exitOnError), skipComments(true),
fp(NULL), gzf(NULL), buffer((char *)malloc(START_BUF_LEN)), bufLen(START_BUF_LEN),
reuseBuffer(false), atFirstLine(true), encoding(ASCII), iconvID(iconvNone),
strFileLen(0), strFilePos(0), strFileActive(0)
{
assert(buffer != 0);
strFile = fileStr;
strFileLen = strFile.length();
strFileActive = 1;
// only reserve space if bigger than current capacity
if (reserved_length > strFileLen) strFile.reserve(reserved_length);
}
File::~File()
{
/*
* If we opened the file (name != 0), then we should close it
* as well.
*/
if (name != 0) {
close();
free(name);
}
if (iconvID != iconvNone) iconv_close((iconv_t)iconvID);
if (buffer) free(buffer);
buffer = NULL;
}
int
File::close()
{
int status = 0;
if (gzf) {
status = gzclose(gzf);
}
else if (fp) {
status = zclose(fp);
}
fp = NULL;
gzf = NULL;
if (status != 0) {
if (exitOnError != 0) {
perror(name ? name : "");
exit(exitOnError);
}
}
return status;
}
Boolean
File::reopen(const char *newName, const char *mode)
{
strFile = "";
strFileLen = 0;
strFilePos = 0;
strFileActive = 0;
atFirstLine = true;
encoding = ASCII;
if (iconvID != iconvNone) {
iconv_close((iconv_t)iconvID);
iconvID = iconvNone;
}
/*
* If we opened the file (name != 0), then we should close it
* as well.
*/
if (name != 0) {
close();
free(name);
}
/*
* Open new file as in File::File()
*/
name = newName?strdup(newName):0;
unsigned len = name?strlen(name):0;
if (len > sizeof(GZIP_SUFFIX)-1 &&
(strcmp(name + len - (sizeof(GZIP_SUFFIX)-1), GZIP_SUFFIX) == 0))
{
gzf = gzopen(name, mode);
} else if (name) {
fp = zopen(name, mode);
}
if (fp == 0 && gzf == 0) {
if (exitOnError) {
perror(name);
exit(exitOnError);
}
return false;
}
return true;
}
Boolean
File::reopen(const char *mode)
{
strFile = "";
strFileLen = 0;
strFilePos = 0;
strFileActive = 0;
atFirstLine = true;
encoding = ASCII;
if (iconvID != iconvNone) {
iconv_close((iconv_t)iconvID);
iconvID = iconvNone;
}
if (fp == NULL) {
return false;
}
if (fflush(fp) != 0) {
if (exitOnError != 0) {
perror(name ? name : "");
exit(exitOnError);
}
}
FILE *fpNew = fdopen(fileno(fp), mode);
if (fpNew == 0) {
return false;
} else {
// XXX: we can't fclose(fp), so the old stream object becomes garbage
fp = fpNew;
return true;
}
}
Boolean
File::reopen(const char *fileStr, size_t fileStrLen, int reserved_length)
{
atFirstLine = true;
encoding = ASCII;
if (iconvID != iconvNone) {
iconv_close((iconv_t)iconvID);
iconvID = iconvNone;
}
if (name != 0) {
close();
}
strFile = fileStr;
strFileLen = strFile.length();
strFilePos = 0;
strFileActive = 1;
// only reserve space if bigger than current capacity
if (reserved_length > strFileLen) strFile.reserve(reserved_length);
return true;
}
Boolean
File::reopen(std::string& fileStr, int reserved_length)
{
atFirstLine = true;
encoding = ASCII;
if (iconvID != iconvNone) {
iconv_close((iconv_t)iconvID);
iconvID = iconvNone;
}
if (name != 0) {
close();
}
strFile = fileStr;
strFileLen = strFile.length();
strFilePos = 0;
strFileActive = 1;
// only reserve space if bigger than current capacity
if (reserved_length > strFileLen) strFile.reserve(reserved_length);
return true;
}
Boolean
File::error()
{
if (strFileActive) return 0; // i/o using strings not file pointer, so no error
if (gzf) {
const char *msg = gzerror(gzf, NULL);
return msg == 0 || msg[0] != '\0';
} else {
return (fp == 0) || ferror(fp);
}
};
const char UTF8magic[] = "\357\273\277";
const char UTF16LEmagic[] = "\377\376";
const char UTF16BEmagic[] = "\376\377";
char *
File::fgetsUTF8(char *buffer, int buflen)
{
// Sanity check - need at least space for NULL terminator
if ((buflen < 1) || !buffer) {
return 0;
}
memset(buffer, 0, buflen);
/*
* make sure 2-byte encodings have one extra byte for final \0
*/
char *result = fgets(buffer, buflen % 2 ? buflen : buflen - 1);
if (result == 0) {
return 0;
}
/*
* When at the start of the file, try to determine charcter encoding scheme
*/
if (atFirstLine) {
const unsigned UTF8magicLen = sizeof(UTF8magic)-1;
const unsigned UTF16LEmagicLen = sizeof(UTF16LEmagic)-1;
const unsigned UTF16BEmagicLen = sizeof(UTF16BEmagic)-1;
unsigned magicLen = 0;
atFirstLine = false;
iconvID = (void *)0;
if (strncmp(buffer, UTF8magic, UTF8magicLen) == 0) {
encoding = UTF8;
magicLen = UTF8magicLen;
} else if (strncmp(buffer, UTF16LEmagic, UTF16LEmagicLen) == 0) {
encoding = UTF16LE;
magicLen = UTF16LEmagicLen;
iconvID = (void *)iconv_open("UTF-8", "UTF-16LE");
} else if (strncmp(buffer, UTF16BEmagic, UTF16BEmagicLen) == 0) {
encoding = UTF16BE;
magicLen = UTF16BEmagicLen;
iconvID = (void *)iconv_open("UTF-8", "UTF-16BE");
}
if (iconvID == iconvNone) {
this->position() << "conversion from UTF-16" << (encoding == UTF16LE ? "LE" : "BE") << " not supported\n";
return 0;
} else if (iconvID == (void *)0) {
iconvID = iconvNone;
}
/*
* remove the magic string from the buffer
*/
if (magicLen > 0) {
memmove(buffer, buffer + magicLen, buflen - magicLen);
memset(buffer + buflen - magicLen, 0, magicLen);
}
}
/*
* change 16-bit encoding to UTF-8 if needed
*/
if (iconvID != iconvNone) {
makeArray(char, buffer2, buflen);
ICONV_CONST char *cp = buffer;
size_t inSize = buflen % 2 ? buflen-1 : buflen;
char *dp = buffer2;
size_t outSize = buflen;
#ifdef DEBUG_ICONV
::fprintf(stderr, "insize = %d input chars = ", (int)inSize);
for (unsigned j = 0; j < inSize; j ++) {
::fprintf(stderr, "'%c'(%03o) ", (buffer2[j] == '\r' ? 'R' : buffer[j]), ((unsigned char *)buffer)[j]);
}
::fprintf(stderr, "\n");
#endif
if (iconv((iconv_t)iconvID, &cp, &inSize, &dp, &outSize) == (size_t)-1) {
perror("iconv");
return 0;
}
#ifdef DEBUG_ICONV
::fprintf(stderr, "buflen = %d outsize = %d chars = ", buflen, (int)outSize);
for (unsigned j = 0; j < outSize; j ++) {
::fprintf(stderr, "'%c'(%03o) ", (buffer2[j] == '\r' ? 'R' : buffer2[j]), ((unsigned char *)(char *)buffer2)[j]);
}
::fprintf(stderr, "\n");
#endif
memcpy(buffer, buffer2, outSize);
// Makes it clear to static code analysis that buffer will
// be NULL-terminated; even though we expect outSize above
// includes the NULL-terminator.
if (outSize < (size_t)buflen) {
memset(buffer + outSize, 0, buflen - outSize);
} else {
buffer[buflen - 1] = 0;
}
if (encoding == UTF16LE) {
/*
* fgets() only reads up the \n --
* need to skip the following \0 byte
*/
unsigned len = strlen(buffer);
if (len > 0 && buffer[len-1] == '\n') fgetc();
}
}
return buffer;
}
char *
File::getline()
{
if (reuseBuffer) {
reuseBuffer = false;
return buffer;
}
while (1) {
unsigned bufOffset = 0;
Boolean lineDone = false;
do {
if (fgetsUTF8(buffer + bufOffset, bufLen - bufOffset) == 0) {
if (bufOffset == 0) {
return 0;
} else {
buffer[bufOffset] = '\0';
break;
}
}
/*
* Check if line end has been reached
*/
unsigned numbytes = strlen(buffer+bufOffset);
if (numbytes > 0 && buffer[bufOffset+numbytes-1] != '\n') {
if (bufOffset + numbytes >= bufLen - START_BUF_LEN) {
/*
* enlarge buffer
*/
//cerr << "!REALLOC!" << endl;
bufLen *= 2;
buffer = (char *)realloc(buffer, bufLen);
assert(buffer != 0);
}
bufOffset += numbytes;
} else {
lineDone = true;
}
} while (!lineDone);
lineno ++;
/*
* skip entirely blank lines
*/
register const char *p = buffer;
while (*p && isspace((unsigned char)*p)) p++;
if (*p == '\0') {
continue;
}
/*
* skip comment lines (started with double '#')
*/
if (skipComments && buffer[0] == '#' && buffer[1] == '#') {
continue;
}
reuseBuffer = false;
return buffer;
}
}
void
File::ungetline()
{
reuseBuffer = true;
}
ostream &
File::position(ostream &stream)
{
if (name) {
stream << name << ": ";
}
return stream << "line " << lineno << ": ";
}
ostream &
File::offset(ostream &stream)
{
if (name) {
stream << name << ": ";
}
if (fp) {
return stream << "offset " << ::ftello(fp) << ": ";
} else {
return stream << "offset unknown " << ": ";
}
}
/*------------------------------------------------------------------------*
* "stdio" functions:
*------------------------------------------------------------------------*/
int
File::fgetc()
{
if (gzf) {
return gzgetc(gzf);
} else if (fp) {
return ::fgetc(fp);
}
if (!strFileActive || strFileLen <= 0 || strFilePos >= strFileLen) return EOF;
return strFile.at(strFilePos++);
}
// override fgets in case object using strFile
char *
File::fgets(char *str, int n)
{
if (gzf) {
return gzgets(gzf, str, n);
} else if (fp) {
return ::fgets(str, n, fp);
}
if (!str || n <= 0) return NULL;
int i = 0;
for (i = 0; i < n - 1; i++) {
int c = fgetc();
if (c == EOF) {
break;
}
str[i] = c;
// xxx use \r on MacOS X?
if (c == '\n') {
// include \n in result
i++;
break;
}
}
// always terminate
str[i] = '\0';
if (i == 0)
return NULL;
else
return str;
}
int
File::fputc(int c)
{
if (gzf) {
return gzputc(gzf, c);
} else if (fp) {
return ::fputc(c, fp);
}
// error condition, no string active
if (!strFileActive) return EOF;
strFile += c;
return 0;
}
int
File::fputs(const char *str)
{
if (gzf) {
return gzputs(gzf, str);
} else if (fp) {
return ::fputs(str, fp);
}
// error condition, no string active
if (!strFileActive) return -1;
strFile += str;
return 0;
}
int
File::fprintf(const char *format, ...)
{
if (gzf) {
va_list args;
va_start(args, format);
int num_written = gzvprintf(gzf, format, args);
va_end(args);
return num_written;
} else if (fp) {
va_list args;
va_start(args, format);
int num_written = vfprintf(fp, format, args);
va_end(args);
return num_written;
}
// error condition, no string active
if (!strFileActive) return -1;
// This is the default max size to append at any one time. On sgi we
// get a buffer overrrun if we exceed this but elsewhere we manually
// allocate a larger buffer if needed.
const int maxMessage = 4096;
char message[maxMessage];
va_list args;
va_start(args, format);
#if defined(sgi)
// vsnprintf() doesn't exist in Irix 5.3
// Return value >= 0 is number of bytes written to buffer not including
// NULL terminator.
int nwritten = vsprintf(message, format, args);
if (nwritten >= maxMessage) {
// Buffer overflow!
if (exitOnError) {
exit(exitOnError);
}
// At least indicate overflow in output (if haven't crashed already)
sprintf(message, "In class File, BUFFER OVERFLOW %d >= %d\n", nwritten, maxMessage);
}
strFile += message;
#else
// Return value not consistent...
// Non-Windows: >= 0 is number of bytes needed in buffer not including
// NULL terminator.
// Windows: Returns -1 if output truncated.
int checkSize = vsnprintf(message, maxMessage, format, args);
if ((checkSize >= maxMessage) || (checkSize < 0)) {
int curSize;
if (checkSize >= maxMessage) {
// Should know exact size needed
curSize = checkSize + 1;
} else {
// Start with double initial size
curSize = maxMessage * 2;
}
bool success = false;
// Loop until successful but also impose 1GB cap on buffer size.
const int maxAlloc = 1000000000;
while (!success) {
va_end(args);
va_start(args, format);
char* buf = new char[curSize];
checkSize = vsnprintf(buf, curSize, format, args);
if ((checkSize >= 0) && (checkSize < curSize)) {
strFile += buf;
success = true;
} else {
// Try larger size
if (curSize <= maxAlloc / 2) {
curSize *= 2;
} else if (curSize < maxAlloc) {
// Don't exceed cap
curSize = maxAlloc;
} else {
// Fail
delete[] buf;
if (exitOnError) {
exit(exitOnError);
}
strFile += "In class File, failed writing to buffer\n";
break;
}
}
delete[] buf;
}
} else {
strFile += message;
}
#endif
va_end(args);
return 0;
}
size_t
File::fread(void *data, size_t size, size_t n)
{
if (gzf) {
return gzread(gzf, data, size * n)/size;
} else if (fp) {
return ::fread(data, size, n, fp);
}
// not supported for input from string
return 0;
}
size_t
File::fwrite(const void *data, size_t size, size_t n)
{
if (gzf) {
return gzwrite(gzf, data, size * n)/size;
} else if (fp) {
return ::fwrite(data, size, n, fp);
}
// not supported for output to string
return 0;
}
long long
File::ftell()
{
if (gzf) {
return gztell(gzf);
} else if (fp) {
return ::ftello(fp);
}
// error condition, no string active
if (!strFileActive) return -1;
return (long long) strFilePos;
}
int
File::fseek(long long offset, int origin)
{
if (gzf) {
return gzseek(gzf, offset, origin);
} else if (fp) {
return ::fseeko(fp, offset, origin);
}
// error condition, no string active
if (!strFileActive) return -1;
// xxx doesn't do (much) error checking
if (origin == SEEK_CUR) {
strFilePos += offset;
} else if (origin == SEEK_END) {
strFilePos = strFileLen + offset; // use negative offset!
} else if (origin == SEEK_SET) {
strFilePos = offset;
} else {
// invalid origin
return -1;
}
// xxx we check that position is not negative, but (currently) allow it to be greater than length
if (strFilePos < 0) strFilePos = 0;
return 0;
}
const char *
File::c_str()
{
if (fp || gzf) return 0;
// error condition, no string active
if (!strFileActive) return NULL;
return strFile.c_str();
}
const char *
File::data()
{
if (fp || gzf) return 0;
// error condition, no string active
if (!strFileActive) return NULL;
return strFile.data();
}
size_t
File::length()
{
if (fp || gzf) return 0;
// error condition, no string active
if (!strFileActive) return 0;
return strFile.length();
}

View File

@@ -0,0 +1,144 @@
/*
* File.h
* File I/O utilities for LM
*
* Copyright (c) 1995-2011 SRI International, 2012-2013 Andreas Stolcke, Microsoft Corp. All Rights Reserved.
*
* @(#)$Header: /home/srilm/CVS/srilm/misc/src/File.h,v 1.27 2019/09/09 23:13:15 stolcke Exp $
*
*/
#ifndef _File_h_
#define _File_h_
#ifdef PRE_ISO_CXX
# include <iostream.h>
#else
# include <iostream>
using namespace std;
#endif
#include <stdio.h>
#include "zio.h"
#include "zlib.h"
#include "Boolean.h"
/*
* Tell clients that we can handle .gz files regardless of zio working
*/
#undef GZIP_SUFFIX
#define GZIP_SUFFIX ".gz"
const unsigned int maxWordsPerLine = 50000;
extern const char *wordSeparators;
/*
* A File object is a wrapper around a stdio FILE pointer. If presently
* provides two kinds of convenience.
*
* - constructors and destructors manage opening and closing of the stream.
* The stream is checked for errors on closing, and the default behavior
* is to exit() with an error message if a problem was found.
* - the getline() method strips comments and keeps track of input line
* numbers for error reporting.
*
* File object can be cast to (FILE *) to perform most of the standard
* stdio operations in a seamless way.
*
* The File object can also read/write to a std::string, for file
* access "to memory".
*
* To read from an existing string, allocate the File object using:
* File(char *, size_t) or File(std::string&) and then call any File()
* accessor function. For reading, you can also allocate the File
* object using File(NULL, exitOnError) and then reopen it using
* File.reopen(char *, size_t) or File.reopen(std::string&).
*
* To write to a string, allocate the File object using: File("", 0,
* exitOnError, reserved_length). Alternatively, use File(NULL,
* exitOnError) followed by File.reopen("", 0, reserved_length).
*
* NOTE: String I/O does not yet support binary data (unless initialized from std::string?).
* NOTE: For backwards compatibility, File object preferentially uses FILE * object if it exists.
*/
class File
{
public:
// Note that prior to September, 2014, internal member variable
// only stored exact pointer to name, now makes copy of name
// since otherwise user needs to ensure name is not changed
// or deleted (or stack variable) during lifetime of File object
// (or prior to reopen with new name).
File(const char *name, const char *mode, int exitOnError = 1);
File(FILE *fp = 0, int exitOnError = 1);
// Initialize strFile with contents of string. strFile will be
// resized to "reserved_length" if this value is bigger than the
// string size.
File(const char *fileStr, size_t fileStrLen, int exitOnError = 1, int reserved_length = 0);
File(std::string& fileStr, int exitOnError = 1, int reserved_length = 0);
~File();
char *getline();
void ungetline();
int close();
Boolean reopen(const char *name, const char *mode);
Boolean reopen(const char *mode); // switch to binary I/O
// [close() and] reopen File and initialize strFile with contents of string
Boolean reopen(const char *fileStr, size_t fileStrLen, int reserved_length = 0);
Boolean reopen(std::string& fileStr, int reserved_length = 0);
Boolean error();
ostream &position(ostream &stream = cerr);
ostream &offset(ostream &stream = cerr);
char *name;
unsigned int lineno;
Boolean exitOnError;
Boolean skipComments;
// Provide "stdio" equivalent functions for the case where the
// File class is wrapping a string instead of a FILE, since
// casting File to (FILE *) won't work in this case. The
// functions should perform the same as their namesakes, but will
// not set errno.
char *fgets(char *str, int n);
char *fgetsUTF8(char *str, int n); // also converts to UTF8
int fgetc();
int fputc(int c);
int fputs(const char *str);
// uses internal 4KB buffer
int fprintf(const char *format, ...);
size_t fread(void *data, size_t size, size_t n);
size_t fwrite(const void *data, size_t size, size_t n);
long long ftell();
int fseek(long long offset, int origin);
// get string contents from File() object, provided we are doing string I/O
const char *c_str();
const char *data();
size_t length();
private:
FILE *fp;
gzFile gzf; // when reading/writing via zlib
char *buffer;
unsigned bufLen;
Boolean reuseBuffer;
Boolean atFirstLine; // we haven't read the first line yet
enum { ASCII, UTF8, UTF16LE, UTF16BE } encoding; // char encoding scheme
void *iconvID;
// read/write from/to string instead of file
std::string strFile;
int strFileLen;
int strFilePos;
int strFileActive;
};
#endif /* _File_h_ */

View File

@@ -0,0 +1,88 @@
/*
* MStringTokUtil.cc --
* Platform-independent version of strtok_r.
*
* @author SRI International
* @file MStringTokUtil.h \brief Utility for portable string tokenization.
*
* Copyright (C) 2011 SRI International. Unpublished, All Rights Reserved.
*
* $Id: MStringTokUtil.cc,v 1.1 2011/04/01 17:47:18 victor Exp $
*/
#include <string.h>
#include <stdlib.h>
#include "MStringTokUtil.h"
char*
MStringTokUtil::strtok_r(char* s1, const char* s2, char** lasts)
{
if (lasts == NULL) {
return NULL;
}
char* retval = NULL;
if (s1 != NULL) {
// First call
retval = s1;
} else if (*lasts != NULL) {
// Get the input from the stored pointer state
retval = *lasts;
} else {
// Saved state didn't have a string
return NULL;
}
// Count the number of separator characters in s2
int numcheck = 0;
if (s2 != NULL) {
numcheck = strlen(s2);
}
// Skip any initial separator characters
char ch;
bool match = true;
while (((ch = *retval) != 0) && match) {
match = false;
for (int i = 0; i < numcheck; i++) {
if (ch == s2[i]) {
retval++;
match = true;
break;
}
}
}
// Did we hit the end of the string?
if (*retval == 0) {
*lasts = NULL;
return NULL;
}
// Else we are on a non-separator, non-terminal character and will
// have something non-zero length to return.
char* ptr = retval;
// Loop until match separator character or find NULL-terminator
while ((ch = *ptr) != 0) {
for (int i = 0; i < numcheck; i++) {
if (ch == s2[i]) {
*ptr = 0;
ptr++;
if (*ptr != 0) {
*lasts = ptr;
} else {
*lasts = NULL;
}
return retval;
}
}
ptr++;
}
// If here, no separator character was found so retval is the last thing we return
*lasts = NULL;
return retval;
}

View File

@@ -0,0 +1,43 @@
/*
* MStringTokUtil.h --
* Platform-independent version of strtok_r.
*
* @author SRI International
* @file MStringTokUtil.h \brief Utility for portable string tokenization.
*
* Copyright (C) 2011 SRI International. Unpublished, All Rights Reserved.
*
* $Id: MStringTokUtil.h,v 1.1 2011/04/01 17:47:18 victor Exp $
*/
#ifndef MStringTokUtil_h
#define MStringTokUtil_h
/**
* Platform-independent version of strtok_r.
*/
class MStringTokUtil {
public:
/**
* Get next token from string based on character separators.
*
* @param s1 For the first call, this is a pointer to a string
* from which to extract tokens. This string will be updated
* with 0 characters. On subsequent calls, this parameter
* should be NULL.
* @param s2 Null-terminated set of delimiter characters.
* This may updated on subsequent calls.
* @param lasts This is an address to a pointer used for
* storing state between calls. The value will be set
* on the first call and read/updated on successive calls.
* @return pointer to NULL-terminated next token in s1
* or NULL when no tokens remain.
*/
static char* strtok_r(char* s1, const char* s2, char** lasts);
private:
// Static; no constructor
MStringTokUtil();
};
#endif // MStringTokUtil_h

View File

@@ -0,0 +1,178 @@
#
# File: Makefile.example
# Author: The SRI DECIPHER (TM) System
# Date: Thu Sep 9 12:04:47 1993
#
# Description:
# This is the example makefile to start from when adding new
# modules to the DECIPHER System. To use this makefile, first
# copy it to your directory as the file "Makefile". Second,
# replace the word "Example" in the text below with the real name
# of your library. Next replace the the example filenames with
# the names of your actual declarations and source files in the
# appropriate variable definitions. Finally clean up by deleting
# any lines not relevant to your module and updating this header
# to describe your new module. Do not forget to use the proper
# RCS keywords!
#
# Copyright (c) 1993, SRI International. All Rights Reserved.
#
# $Header: /home/srilm/CVS/srilm/misc/src/Makefile,v 1.41 2017/02/02 06:25:10 stolcke Exp $
#
# Include common SRILM variable definitions.
include $(SRILM)/common/Makefile.common.variables
# Define variables.
# Flags for generating "compact" data structures
COMPACT_FLAGS += -DUSE_SARRAY -DUSE_SARRAY_TRIE -DUSE_SARRAY_MAP2
# Flags for generating "short" data structures
SHORT_FLAGS = $(COMPACT_FLAGS) -DUSE_SHORT_VOCAB -DUSE_XCOUNTS
# Flags for generating "long long" data structures
LLONG_FLAGS = $(COMPACT_FLAGS) -DUSE_LONGLONG_COUNTS -DUSE_XCOUNTS
# Example Library declarations files.
EXTERNAL_LIB_HEADERS = \
$(SRCDIR)/cfuncproto.h \
$(SRCDIR)/option.h \
$(SRCDIR)/zio.h \
$(SRCDIR)/mkdir.h \
$(SRCDIR)/Boolean.h \
$(SRCDIR)/Debug.h \
$(SRCDIR)/File.h \
$(SRCDIR)/MStringTokUtil.h \
$(SRCDIR)/tls.h \
$(SRCDIR)/TLSWrapper.h \
$(SRCDIR)/tserror.h \
$(SRCDIR)/version.h
INTERNAL_LIB_HEADERS = \
$(SRCDIR)/srilm_iconv.h
# Example Library source files.
LIB_SOURCES = \
$(SRCDIR)/option.c \
$(SRCDIR)/zio.c \
$(SRCDIR)/fcheck.c \
$(SRCDIR)/rand48.c \
$(SRCDIR)/Debug.cc \
$(SRCDIR)/File.cc \
$(SRCDIR)/MStringTokUtil.cc \
$(SRCDIR)/tls.cc \
$(SRCDIR)/tserror.cc \
$(SRCDIR)/version.c
ifeq ($(NO_TCL), )
LIB_SOURCES += $(SRCDIR)/tclmain.cc
endif
ifneq ($(NO_ICONV), )
CFLAGS += -DNO_ICONV
CXXFLAGS += -DNO_ICONV
endif
# tell version.c about compile-time options
ifneq ($(HAVE_LIBLBFGS), )
ADDITIONAL_CFLAGS += -DHAVE_LIBLBFGS
ADDITIONAL_CXXFLAGS += -DHAVE_LIBLBFGS
endif
# Example Library object files.
LIB_OBJECTS = $(patsubst $(SRCDIR)/%.cc,$(OBJDIR)/%$(OBJ_SUFFIX),\
$(patsubst $(SRCDIR)/%.c,$(OBJDIR)/%$(OBJ_SUFFIX),$(LIB_SOURCES)))
# Example Library.
LIBRARY = $(OBJDIR)/$(LIB_PREFIX)misc$(LIB_SUFFIX)
ADDITIONAL_INCLUDES += \
-I$(OBJDIR) \
$(TCL_INCLUDE)
ADDITIONAL_LIBRARIES += \
$(SRILM_LIBDIR)/$(LIB_PREFIX)z$(LIB_SUFFIX) \
$(TCL_LIBRARY) \
$(MATH_LIBRARY)
# Example programs.
PROGRAM_NAMES = \
ztest \
testFile \
testRand
PROGRAMS = $(PROGRAM_NAMES:%=$(BINDIR)/%$(EXE_SUFFIX))
PROGRAM_SOURCES = $(foreach prog,$(PROGRAM_NAMES),\
$(wildcard $(SRCDIR)/$(prog).c) \
$(wildcard $(SRCDIR)/$(prog).cc))
PROGRAM_OBJECTS = $(PROGRAM_NAMES:%=$(OBJDIR)/%$(OBJ_SUFFIX))
# Libraries to be linked with the Example programs.
LIBRARIES = $(LIBRARY) \
$(ADDITIONAL_LIBRARIES)
# All of the types of files.
ALL_HEADERS = $(EXTERNAL_LIB_HEADERS) \
$(INTERNAL_LIB_HEADERS)
ALL_SOURCES = $(LIB_SOURCES) \
$(PROGRAM_SOURCES)
ALL_OBJECTS = $(LIB_OBJECTS) \
$(PROGRAM_OBJECTS)
ALL_LIBRARIES = $(LIBRARY)
ALL_PROGRAMS = $(PROGRAMS)
ALL_PROGRAM_NAMES = $(PROGRAM_NAMES)
# Define pseudo-targets.
# Make sure the library does not get deleted if the make is interrupted.
.PRECIOUS: $(LIBRARY)
# Define targets.
all: libraries
$(LIBRARY): $(LIB_OBJECTS)
$(ARCHIVE) $(AR_OUTPUT_OPTION) $^
$(RANLIB) $@ $(DEMANGLE_FILTER)
SRILMversion.h: $(SRILM)/RELEASE $(SRILM)/Copyright
read version < $(SRILM)/RELEASE; echo "#define SRILM_RELEASE \"$$version\"" > $@
sed -f $(SRILM)/sbin/stringify-copyright $(SRILM)/Copyright >> $@
$(OBJDIR)/SRILMoptions.h:
echo "#define BUILD_OPTIONS \"$(OPTION_FLAGS)\"" > $@
# make sure SRILMoptions.h gets generated for each $(OPTION)
$(OBJDIR)/version$(OBJ_SUFFIX): $(OBJDIR)/SRILMoptions.h
# Variables and Targets for released system
EXPORTED_HEADERS = $(EXTERNAL_LIB_HEADERS)
EXPORTED_LIBRARIES = $(LIBRARY)
EXPORTED_PROGRAMS =
release: release-headers release-libraries release-programs
depend: SRILMversion.h $(OBJDIR)/SRILMoptions.h
cleaner:
rm -f SRILMversion.h $(OBJDIR)/SRILMoptions.h
# Include common SRILM target definitions.
include $(SRILM)/common/Makefile.common.targets

View File

@@ -0,0 +1,3 @@
This directory contains miscellaneous utility function used by the
Language Modeling tools and libraries.

View File

@@ -0,0 +1,33 @@
/*
* Default SRILMoptions.h
* Should be overridden by automatically generated $(OBJDIR)/SRILMoptions.h
*
* $Header: /home/srilm/CVS/srilm/misc/src/SRILMoptions.h,v 1.1 2017/02/01 22:39:39 stolcke Exp $
*/
#ifdef NDEBUG
# define NDEBUG_OPTION "-DNDEBUG"
#else
# define NDEBUG_OPTION ""
#endif
#ifdef USE_SARRAY
# define BUILD_OPTION_1 "-DUSE_SARRAY"
#else
# define BUILD_OPTION_1 ""
#endif
#ifdef USE_SARRAY_TRIE
# define BUILD_OPTION_2 "-DUSE_SARRAY_TRIE"
#else
# define BUILD_OPTION_2 ""
#endif
#ifdef USE_SARRAY_MAP2
# define BUILD_OPTION_3 "-DUSE_SARRAY_MAP2"
#else
# define BUILD_OPTION_3 ""
#endif
#define BUILD_OPTIONS NDEBUG_OPTION " " BUILD_OPTION_1 " " BUILD_OPTION_2 " " BUILD_OPTION_3

View File

@@ -0,0 +1,112 @@
/*
* TLSWrapper.h --
* A template that wraps a thread-local storage variable. If NO_TLS is
* defined then the macros in this file will simply create static
* variables, thereby producing a single-threaded build.
*
* Copyright (c) 2012, SRI International. All Rights Reserved.
*/
#ifndef TLSWrapper_h
#define TLSWrapper_h
#include <stdlib.h>
#include <assert.h>
#include "tls.h"
#include "Boolean.h"
#ifndef NO_TLS
// Declare and define a POD TLS variable
# define TLSW(type, name) TLSWrapper<type> name(1, false)
// Declare and define a non-POD TLS variable (call its constructor)
# define TLSWC(type, name) TLSWrapper<type> name(1, true)
// Declare and define a TLS array
# define TLSW_ARRAY(type, name, size) TLSWrapper<type> name(size)
// Declare a TLS variable of a given type
# define TLSW_DECL(type, name) TLSWrapper<type> name
// Declare a TLS array of a given type
# define TLSW_DECL_ARRAY(type, name, size) TLSWrapper<type> name
// Define a previously declared TLS variable
# define TLSW_DEF(type, name) TLSWrapper<type> name = TLSWrapper<type>(1, false)
// Define a previously declared non-POD TLS variable
# define TLSW_DEFC(type, name) TLSWrapper<type> name = TLSWrapper<type>(1, true)
// Define a previously declared array TLS variable
# define TLSW_DEF_ARRAY(type, name, size) TLSWrapper<type> name = TLSWrapper<type>(size)
// Get a T reference that is specific to the current thread
# define TLSW_GET(name) (name.get())
// Get a T pointer to the beginning of the array that belongs to current thread
# define TLSW_GET_ARRAY(name) &name.get()
// Free the thread-local memory (but not what it points to, if anything)
# define TLSW_FREE(name) name.release()
template<class T>
class TLSWrapper {
public:
TLSWrapper(size_t numP = 1, Boolean constructP = false) {
key = TLS_CREATE_KEY();
num = numP;
construct = constructP;
}
~TLSWrapper() {
TLS_FREE_KEY(key);
}
T &get() {
T* mem = (T*)TLS_GET(key);
if (mem == 0) {
// Since we're imitating static memory, zero-init
if (construct)
mem = new T();
else
mem = (T*)calloc(num, sizeof(T));
assert(mem != 0);
TLS_SET(key, mem);
}
return *mem;
}
void release() {
T* mem = (T*)TLS_GET(key);
if (mem != 0) {
if (construct)
delete mem;
else
free(mem);
TLS_SET(key, 0);
}
}
private:
size_t num;
Boolean construct;
TLS_KEY key;
};
#else
// Just create static variables for single-threaded builds
# define TLSW(type, name) type name
# define TLSWC(type, name) type name
# define TLSW_ARRAY(type, name, size) type name[size]
# define TLSW_DECL(type, name) type name
# define TLSW_DECL_ARRAY(type, name, size) type name[size]
# define TLSW_DEF(type, name) type name
# define TLSW_DEFC(type, name) type name
# define TLSW_DEF_ARRAY(type, name, size) type name[size]
# define TLSW_GET(name) name
# define TLSW_GET_ARRAY(name) name
# define TLSW_FREE(name)
#endif
#endif /* TLSWrapper_h */

View File

@@ -0,0 +1,78 @@
/*
* cfuncproto.h --
*
* Declarations of a macro supporting Ansi-C function prototypes in
* Sprite. This macro allow function prototypes to be defined
* such that the code works on both standard and K&R C.
*
* Copyright 1990 Regents of the University of California
* Permission to use, copy, modify, and distribute this
* software and its documentation for any purpose and without
* fee is hereby granted, provided that the above copyright
* notice appear in all copies. The University of California
* makes no representations about the suitability of this
* software for any purpose. It is provided "as is" without
* express or implied warranty.
*
* $Header: /home/srilm/CVS/srilm/misc/src/cfuncproto.h,v 1.9 2011/04/06 03:23:00 stolcke Exp $ SPRITE (Berkeley)
*/
#ifndef _CFUNCPROTO
#define _CFUNCPROTO
/*
* Definition of the _ARGS_ macro. The _ARGS_ macro such be used to
* enclose the argument list of a function prototype. For example, the
* function:
* extern int main(argc, argv)
* int args;
* char **argv;
*
* Would have a prototype of:
*
* extern int main _ARGS_((int argc, char **argv))
*
*/
#ifndef _ASM
#if defined(__STDC__) || defined(_MSC_VER)
#define _HAS_PROTOTYPES
#define _HAS_VOIDPTR
#define _HAS_CONST
#endif
#if defined(__cplusplus)
#define _EXTERN extern "C"
#define _NULLARGS (void)
#define _HAS_PROTOTYPES
#define _HAS_VOIDPTR
#define _HAS_CONST
#else
#define _EXTERN extern
#define _NULLARGS ()
#endif
#if defined(_HAS_PROTOTYPES) && !defined(lint)
#define _ARGS_(x) x
#else
#define _ARGS_(x) ()
#endif
#ifndef _CONST
#ifdef _HAS_CONST
#define _CONST const
#else
#define _CONST
#endif
#endif
#ifdef _HAS_VOIDPTR
typedef void *_VoidPtr;
#else
typedef char *_VoidPtr;
#endif
#endif /* _ASM */
#endif /* _CFUNCPROTO */

View File

@@ -0,0 +1,33 @@
/*
* fcheck.c --
* stdio file handling with error checking
*
* $Header: /home/srilm/CVS/srilm/misc/src/fcheck.c,v 1.2 2003/02/21 22:01:23 stolcke Exp $
*/
#include <stdlib.h>
#define ZIO_HACK
#include "zio.h"
#include "fcheck.h"
FILE *fopen_check(const char *name, const char *mode)
{
FILE *file = fopen(name, mode);
if (file == 0) {
perror(name);
exit(1);
} else {
return file;
}
}
void fclose_check(const char *name, FILE *file)
{
if (fclose(file) != 0) {
perror(name);
exit(1);
}
}

View File

@@ -0,0 +1,22 @@
/*
* fcheck.h --
* stdio file handling with error checking
*/
#ifndef _FCHECK_H_
#define _FCHECK_H_
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
FILE *fopen_check(const char *name, const char *mode);
void fclose_check(const char *name, FILE *file);
#ifdef __cplusplus
}
#endif
#endif /* _FCHECK_H_ */

View File

@@ -0,0 +1,40 @@
/*
File: mkdir.h
Author: Andreas Stolcke
Date: Wed Feb 15 15:19:44 PST 1995
Description: Portability for the mkdir function
Copyright (c) 2006, SRI International. All Rights Reserved.
RCS ID: $Id: mkdir.h,v 1.2 2006/10/17 18:53:33 stolcke Exp $
*/
/*
* $Log: mkdir.h,v $
* Revision 1.2 2006/10/17 18:53:33 stolcke
* win32 portability
*
* Revision 1.1 2006/01/09 19:14:04 stolcke
* Initial revision
*
*/
#ifndef _MKDIR_H
#define _MKDIR_H
#if defined(_MSC_VER) || defined(WIN32)
# include <direct.h>
# define MKDIR(d) _mkdir(d)
#else
# include <sys/stat.h>
# include <sys/types.h>
# ifdef S_IRWXO
# define MKDIR(d) mkdir(d, S_IRWXU|S_IRWXG|S_IRWXO)
# else
# define MKDIR(d) mkdir(d)
# endif
#endif /* _MSC_VER */
#endif /* _MKDIR_H */

View File

@@ -0,0 +1,479 @@
/*
* option.c --
*
* Routines to do command line option processing.
*
* Copyright 1986, 1991 Regents of the University of California
* Permission to use, copy, modify, and distribute this
* software and its documentation for any purpose and without
* fee is hereby granted, provided that the above copyright
* notice appear in all copies. The University of California
* makes no representations about the suitability of this
* software for any purpose. It is provided "as is" without
* express or implied warranty.
*/
#ifndef lint
static char rcsid[] = "$Header: /home/srilm/CVS/srilm/misc/src/option.c,v 1.17 2013/04/09 06:07:02 stolcke Exp $ SPRITE (Berkeley)";
#endif
#include <option.h>
#include <cfuncproto.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define OptNoArg(progName, opt) fprintf(stderr, \
"Warning: %s option \"-%s\" needs an argument\n", \
(progName), (opt))
/* Forward references: */
static void ParseTime _ARGS_ ((_CONST char *progName, char *str,
time_t *resultPtr));
/*
*----------------------------------------------------------------------
*
* Opt_Parse --
*
* Process a command line according to a template of accepted
* options. See the manual page and header file for more details.
*
* Results:
* The number of options that weren't processed by this procedure
* is returned, and argv points to an array of unprocessed
* options. (This is all of the options that didn't start with
* "-", except for those used as arguments to the options
* processed here; it's also anything after an OPT_REST option.)
*
* Side effects:
* The variables referenced from the option array get modified
* if their option was present on the command line. Can clobber
* the global buffer used by localtime(3).
*
*----------------------------------------------------------------------
*/
int
Opt_Parse(
int argc, /* Number of arguments in argv. */
char **argv, /* Array of arguments */
Option optionArray[], /* Array of option descriptions */
int numOptions, /* Size of optionArray */
int flags) /* Or'ed combination of various flag bits:
* see option.h for definitions. */
{
register Option *optionPtr; /* pointer to the current option in the
* array of option specifications */
register char *curOpt; /* Current flag argument */
register char **curArg; /* Current argument */
register int argIndex; /* Index into argv to which next unused
* argument should be copied */
int stop=0; /* Set non-zero to stop processing
* arguments when an OPT_REST flag is
* encountered */
int error=0; /* A bad option was found */
int length; /* Number of characters in current
* option. */
argIndex = 1;
argc -= 1;
curArg = &argv[1];
while (argc && !stop) {
if (**curArg == '-') {
curOpt = &curArg[0][1];
curArg += 1;
argc -= 1;
/*
* Check for the special options "?" and "help". If found,
* print documentation and exit.
*/
if ((strcmp(curOpt, "?") == 0) || (strcmp(curOpt, "help") == 0)) {
Opt_PrintUsage (argv[0], optionArray, numOptions);
exit(0);
}
/*
* Loop over all the options specified in a single argument
* (must be 1 unless OPT_ALLOW_CLUSTERING was specified).
*/
while (1) {
/*
* Loop over the array of options searching for one with the
* matching key string. If found, it is left pointed to by
* optionPtr.
*/
for (optionPtr = &optionArray[numOptions - 1];
optionPtr >= optionArray;
optionPtr -= 1) {
if (optionPtr->key == NULL) {
continue;
}
if (*optionPtr->key == *curOpt) {
if (flags & OPT_ALLOW_CLUSTERING) {
length = strlen(optionPtr->key);
if (strncmp(optionPtr->key, curOpt, length) == 0) {
break;
}
} else {
if (strcmp(optionPtr->key, curOpt) == 0) {
break;
}
}
}
}
if (optionPtr < optionArray) {
/*
* No match. Print error message and skip option.
*/
if (flags & OPT_UNKNOWN_IS_ERROR) {
error = 1;
stop = 1;
} else {
fprintf(stderr, "Unknown option \"-%s\";", curOpt);
fprintf(stderr, " type \"%s -help\" for information\n",
argv[0]);
}
break;
}
/*
* Take the appropriate action based on the option type
*/
if (optionPtr->type >= 0) {
*((int *) optionPtr->address) = optionPtr->type;
} else {
switch (optionPtr->type) {
case OPT_REST:
stop = 1;
*((int *) optionPtr->address) = argIndex;
break;
case OPT_STRING:
if (argc == 0) {
OptNoArg(argv[0], optionPtr->key);
} else {
*((char **)optionPtr->address) = *curArg;
curArg++;
argc--;
}
break;
case OPT_INT:
case OPT_UINT:
if (argc == 0) {
OptNoArg(argv[0], optionPtr->key);
} else {
char *endPtr;
int value = strtol(*curArg, &endPtr, 0);
if (endPtr == *curArg) {
fprintf(stderr,
"Warning: option \"-%s\" got a non-numeric argument \"%s\". Using default: %d\n",
optionPtr->key, *curArg, *((int *) optionPtr->address));
} else if (optionPtr->type == OPT_UINT &&
value < 0)
{
fprintf(stderr,
"Warning: option \"-%s\" got a negative argument \"%s\". Using default: %u.\n",
optionPtr->key, *curArg, *((unsigned *) optionPtr->address));
} else {
*((int *) optionPtr->address) = value;
}
curArg++;
argc--;
}
break;
case OPT_TIME:
if (argc == 0) {
OptNoArg(argv[0], optionPtr->key);
} else {
ParseTime(argv[0], *curArg,
(time_t *)optionPtr->address);
curArg++;
argc--;
}
break;
case OPT_FLOAT:
if (argc == 0) {
OptNoArg(argv[0], optionPtr->key);
} else {
char *endPtr;
double value = strtod(*curArg, &endPtr);
if (endPtr == *curArg) {
fprintf(stderr,
"Warning: option \"-%s\" got non-floating-point argument \"%s\". Using default: %lg.\n",
optionPtr->key, *curArg, *((double *) optionPtr->address));
} else {
*((double *) optionPtr->address) = value;
}
curArg++;
argc--;
}
break;
case OPT_GENFUNC: {
int (*handlerProc)();
handlerProc = (int (*)())optionPtr->address;
argc = (* handlerProc) (optionPtr->key, argc,
curArg);
break;
}
case OPT_FUNC: {
int (*handlerProc)();
handlerProc = (int (*)())optionPtr->address;
if ((* handlerProc) (optionPtr->key, *curArg)) {
curArg += 1;
argc -= 1;
}
break;
}
case OPT_DOC:
Opt_PrintUsage (argv[0], optionArray, numOptions);
exit(0);
/*NOTREACHED*/
}
}
/*
* Advance to next option
*/
if (flags & OPT_ALLOW_CLUSTERING) {
curOpt += length;
if (*curOpt == 0) {
break;
}
} else {
break;
}
}
} else {
/*
* *curArg is an argument for which we have no use, so copy it
* down.
*/
argv[argIndex] = *curArg;
argIndex += 1;
curArg += 1;
argc -= 1;
/*
* If this wasn't an option, and we're supposed to stop parsing
* the first time we see something other than "-", quit.
*/
if (flags & OPT_OPTIONS_FIRST) {
stop = 1;
}
}
}
/*
* If we broke out of the loop because of an OPT_REST argument, we want
* to copy the rest of the arguments down, so we do.
*/
while (argc) {
argv[argIndex] = *curArg;
argIndex += 1;
curArg += 1;
argc -= 1;
}
argv[argIndex] = (char *)NULL;
if ((flags & OPT_UNKNOWN_IS_ERROR) && error) {
return -1;
} else {
return argIndex;
}
}
/*
*----------------------------------------------------------------------
*
* Opt_PrintUsage --
*
* Print out a usage message for a command. This prints out the
* documentation strings associated with each option.
*
* Results:
* none.
*
* Side effects:
* Messages printed onto the console.
*
*----------------------------------------------------------------------
*/
void
Opt_PrintUsage(
_CONST char *commandName,
Option optionArray[],
int numOptions)
{
register int i;
int width;
/*
* First, compute the width of the widest option key, so that we
* can make everything line up.
*/
width = 4;
for (i=0; i<numOptions; i++) {
int length;
if (optionArray[i].key == NULL) {
continue;
}
length = strlen(optionArray[i].key);
if (length > width) {
width = length;
}
}
if (commandName != NULL) {
fprintf(stderr, "Usage of command \"%s\"\n", commandName);
}
for (i=0; i<numOptions; i++) {
if (optionArray[i].type != OPT_DOC) {
fprintf(stderr, " -%s%-*s %s\n", optionArray[i].key,
width+1-(int)strlen(optionArray[i].key), ":",
optionArray[i].docMsg);
switch (optionArray[i].type) {
case OPT_INT: {
fprintf(stderr, "\t\tDefault value: %d\n",
*((int *) optionArray[i].address));
break;
}
case OPT_UINT: {
fprintf(stderr, "\t\tDefault value: %u\n",
*((unsigned *) optionArray[i].address));
break;
}
case OPT_FLOAT: {
fprintf(stderr, "\t\tDefault value: %lg\n",
*((double *) optionArray[i].address));
break;
}
case OPT_STRING: {
if (*(char **)optionArray[i].address != (char *) NULL) {
fprintf(stderr, "\t\tDefault value: \"%s\"\n",
*(char **) optionArray[i].address);
break;
}
}
default: {
break;
}
}
} else {
fprintf(stderr, " %s\n", optionArray[i].docMsg);
}
}
if (commandName != NULL) {
fprintf(stderr, " -help%-*s Print this message\n", width-3, ":");
}
}
/*
*----------------------------------------------------------------------
*
* ParseTime --
*
* Convert a date and time from some string representation to
* something we can compute with.
*
* Results:
* If str points to a parsable time, the corresponding UNIX time
* value (seconds past the epoch) is returned through resultPtr.
*
* Side effects:
* Can clobber the global buffer used by localtime(3).
*
*----------------------------------------------------------------------
*/
static void
ParseTime(
_CONST char *progName, /* name that the program was called as */
char *str, /* the string to parse */
time_t *resultPtr) /* pointer to result time value */
{
long result; /* the answer */
char *endPtr; /* pointer into str, for parsing */
struct tm pieces; /* year, month, etc. as integers */
/*
* We currently accept the following formats:
*
* (1) an integer number of seconds past the epoch.
* (2) a string of the form "yy.mm.dd.hh.mm.ss"
*/
result = strtol(str, &endPtr, 0);
if (endPtr == str) {
goto parseError;
}
if (*endPtr == '\0') {
*resultPtr = result;
return;
}
/*
* Not a simple integer, so try form 2.
*/
if (*endPtr != '.') {
goto parseError;
}
pieces.tm_year = result;
if (pieces.tm_year > 1900) {
pieces.tm_year -= 1900;
}
pieces.tm_mon = strtol(endPtr+1, &endPtr, 0) - 1;
if (endPtr == str || *endPtr != '.') {
goto parseError;
}
pieces.tm_mday = strtol(endPtr+1, &endPtr, 0);
if (endPtr == str || *endPtr != '.') {
goto parseError;
}
pieces.tm_hour = strtol(endPtr+1, &endPtr, 0);
if (endPtr == str || *endPtr != '.') {
goto parseError;
}
pieces.tm_min = strtol(endPtr+1, &endPtr, 0);
if (endPtr == str || *endPtr != '.') {
goto parseError;
}
pieces.tm_sec = strtol(endPtr+1, &endPtr, 0);
if (endPtr == str || *endPtr != '\0') {
goto parseError;
}
result = mktime(&pieces);
if (result == -1) {
fprintf(stderr, "%s: can't represent the time \"%s\".\n",
progName, str);
} else {
*resultPtr = result;
}
return;
parseError:
fprintf(stderr, "%s: can't parse \"%s\" as a time.\n", progName, str);
return;
}

View File

@@ -0,0 +1,161 @@
/*
* option.h --
* This defines the Option type and the interface to the
* Opt_Parse library call that parses command lines.
*
* Copyright 1988, 1991 Regents of the University of California
* Permission to use, copy, modify, and distribute this
* software and its documentation for any purpose and without
* fee is hereby granted, provided that the above copyright
* notice appear in all copies. The University of California
* makes no representations about the suitability of this
* software for any purpose. It is provided "as is" without
* express or implied warranty.
*
* $Header: /home/srilm/CVS/srilm/misc/src/option.h,v 1.13 2013/04/09 06:07:02 stolcke Exp $ SPRITE (Berkeley)
*/
#ifndef _OPTION
#define _OPTION
#ifdef __cplusplus
extern "C" {
#endif
#include <cfuncproto.h>
/*
* An array of option descriptions (type Option) is passed into the
* routine which interprets the command line. Each option description
* includes the key-string that indicates the option, a type for the option,
* the address of an associated variable, and a documentation message
* that is printed when the command is invoked with a single argument
* of '?'
*/
typedef struct Option {
int type; /* Indicates option type; see below */
_CONST char *key; /* The key string that flags option */
_VoidPtr address; /* Address of variable to modify */
_CONST char *docMsg; /* Documentation message */
} Option;
/*
* Values for type:
*
* OPT_CONSTANT(val) - if the flag is present then set the
* associated (integer) variable to val.
* Val must be a non-negative integer.
* OPT_TRUE - if the flag is present then set the
* associated (integer) variable to TRUE (1).
* OPT_FALSE - if the flag is present then set the
* associated (integer) variable to FALSE (0).
* OPT_INT - if the flag is present then the next argument
* on the command line is interpreted as a
* signed integer and that value is assigned to
* the options associated variable.
* OPT_UINT - if the flag is present then the next argument
* on the command line is interpreted as an
* unsigned integer and that value is assigned to
* the options associated variable.
* OPT_STRING - if the flag is present then the next argument
* on the command line is copied into the string
* variable associated with the option.
* OPT_REST - if the flag is present, inhibit processing of
* later options, so that they're all returned
* to the caller in argv. In addition, set the
* associated variable to the index of the first
* of these arguments in the returned argv.
* This permits a program to allow a flag to
* separate its own options from options it will
* pass to another program.
* OPT_FLOAT - if the flag is present then the next argument
* on the command line is interpreted as a
* "double" and that value is assigned to the
* option's associated variable.
* OPT_TIME - if the flag is present then the next argument
* on the command line is interpreted as a date
* and time. The corresponding time value
* (number of seconds past the epoch) is assigned
* to the option's associated variable.
* OPT_FUNC - if the flag is present, pass the next argument
* to "address" as a function. The function
* should be declared:
* int
* func(optString, arg)
* char *optString;
* char *arg;
* Func should return non-zero if the argument
* was consumed or zero if not. "optString" is
* the option key string that caused the
* function to be called and "arg" is the next
* argument (if there is no next argument then
* "arg" will be NULL).
* OPT_GENFUNC - if the flag is present, pass the remaining
* arguments and the number of arguments to
* "address" as a function. The function should
* be declared:
* int
* func(optString, argc, argv)
* char *optString;
* int argc;
* char **argv;
* and should return the new number of arguments
* left in argv. argv should have been shuffled
* to eliminate the arguments func consumed.
* OPT_DOC - a dummy entry. Exists mostly for its
* documentation string. As an additional side
* effect, if its key string an argument,
* Opt_Parse will treat it like a question mark
* (i.e. print out the program's usage and exit).
*/
#define OPT_CONSTANT(val) ((int) val)
#define OPT_FALSE 0
#define OPT_TRUE 1
#define OPT_INT -1
#define OPT_UINT -2
#define OPT_STRING -3
#define OPT_REST -4
#define OPT_FLOAT -5
#define OPT_FUNC -6
#define OPT_GENFUNC -7
#define OPT_DOC -8
#define OPT_TIME -9
/*
* Flag values for Opt_Parse:
*
* OPT_ALLOW_CLUSTERING - Permit many flags to be clustered under
* a single "-". In otherwords, treat
* "foo -abc" the same as "foo -a -b -c".
* OPT_OPTIONS_FIRST - Stop parsing if something other than an
* option (starting with a hyphen) is encountered.
* OPT_UNKNOWN_IS_ERROR - Stop parsing if unknown optionis encountered
* and do not print -help message hint.
*/
#define OPT_ALLOW_CLUSTERING 1
#define OPT_OPTIONS_FIRST 2
#define OPT_UNKNOWN_IS_ERROR 4
/*
* Exported procedures:
*/
int Opt_Parse _ARGS_ ((int argc, char *argv[], Option *optionArray,
int numOptions, int flags));
void Opt_PrintUsage _ARGS_ ((_CONST char *commandName, Option *optionArray,
int numOptions));
/*
* Macro to determine size of option array:
*/
#define Opt_Number(optionArray) (sizeof(optionArray)/sizeof((optionArray)[0]))
#ifdef __cplusplus
}
#endif
#endif /* _OPTION */

View File

@@ -0,0 +1,153 @@
/*
* rand48.c --
* Replacement *rand48 functions (for systems that don't have them)
*
* $Header: /home/srilm/CVS/srilm/misc/src/rand48.c,v 1.3 2016/05/25 16:26:15 stolcke Exp $
*/
#ifdef NEED_RAND48
/************************************************************************
* *
* Copyright (c) 1993 Martin Birgmeier *
* All rights reserved. *
* *
* You may redistribute unmodified or modified versions of this source *
* code provided that the above copyright notice and this and the *
* following conditions are retained. *
* *
* This software is provided ``as is'', and comes with no warranties *
* of any kind. I shall in no event be liable for anything that happens *
* to anyone/anything when using this software. *
* *
************************************************************************/
#include <math.h>
#include <stdlib.h>
#define RAND48_SEED_0 (0x330e)
#define RAND48_SEED_1 (0xabcd)
#define RAND48_SEED_2 (0x1234)
#define RAND48_MULT_0 (0xe66d)
#define RAND48_MULT_1 (0xdeec)
#define RAND48_MULT_2 (0x0005)
#define RAND48_ADD (0x000b)
unsigned short _rand48_seed[3] = {
RAND48_SEED_0,
RAND48_SEED_1,
RAND48_SEED_2
};
unsigned short _rand48_mult[3] = {
RAND48_MULT_0,
RAND48_MULT_1,
RAND48_MULT_2
};
unsigned short _rand48_add = RAND48_ADD;
void
_dorand48(unsigned short xseed[3])
{
unsigned long accu;
unsigned short temp[2];
accu = (unsigned long) _rand48_mult[0] * (unsigned long) xseed[0] +
(unsigned long) _rand48_add;
temp[0] = (unsigned short) accu; /* lower 16 bits */
accu >>= sizeof(unsigned short) * 8;
accu += (unsigned long) _rand48_mult[0] * (unsigned long) xseed[1] +
(unsigned long) _rand48_mult[1] * (unsigned long) xseed[0];
temp[1] = (unsigned short) accu; /* middle 16 bits */
accu >>= sizeof(unsigned short) * 8;
accu += _rand48_mult[0] * xseed[2] + _rand48_mult[1] * xseed[1] + _rand48_mult[2] * xseed[0];
xseed[0] = temp[0];
xseed[1] = temp[1];
xseed[2] = (unsigned short) accu;
}
double
erand48(unsigned short xseed[3])
{
_dorand48(xseed);
return ldexp((double) xseed[0], -48) +
ldexp((double) xseed[1], -32) +
ldexp((double) xseed[2], -16);
}
double
drand48(void)
{
return erand48(_rand48_seed);
}
long
lrand48(void)
{
_dorand48(_rand48_seed);
return ((long) _rand48_seed[2] << 15) + ((long) _rand48_seed[1] >> 1);
}
long
nrand48(unsigned short xseed[3])
{
_dorand48(xseed);
return ((long) xseed[2] << 15) + ((long) xseed[1] >> 1);
}
long
mrand48(void)
{
_dorand48(_rand48_seed);
return ((long) _rand48_seed[2] << 16) + (long) _rand48_seed[1];
}
long
jrand48(unsigned short xseed[3])
{
_dorand48(xseed);
return ((long) xseed[2] << 16) + (long) xseed[1];
}
void
srand48(long seed)
{
_rand48_seed[0] = RAND48_SEED_0;
_rand48_seed[1] = (unsigned short) seed;
_rand48_seed[2] = (unsigned short) (seed >> 16);
_rand48_mult[0] = RAND48_MULT_0;
_rand48_mult[1] = RAND48_MULT_1;
_rand48_mult[2] = RAND48_MULT_2;
_rand48_add = RAND48_ADD;
}
unsigned short *
seed48(unsigned short xseed[3])
{
static unsigned short sseed[3];
sseed[0] = _rand48_seed[0];
sseed[1] = _rand48_seed[1];
sseed[2] = _rand48_seed[2];
_rand48_seed[0] = xseed[0];
_rand48_seed[1] = xseed[1];
_rand48_seed[2] = xseed[2];
_rand48_mult[0] = RAND48_MULT_0;
_rand48_mult[1] = RAND48_MULT_1;
_rand48_mult[2] = RAND48_MULT_2;
_rand48_add = RAND48_ADD;
return sseed;
}
void
lcong48(unsigned short p[7])
{
_rand48_seed[0] = p[0];
_rand48_seed[1] = p[1];
_rand48_seed[2] = p[2];
_rand48_mult[0] = p[3];
_rand48_mult[1] = p[4];
_rand48_mult[2] = p[5];
_rand48_add = p[6];
}
#endif /* NEED_RAND48 */

View File

@@ -0,0 +1,69 @@
/*
File: srilm_iconv.h
Author: Andreas Stolcke
Date: Sun Jan 22 12:48:55 2012
Description: Portability for the iconv function
Copyright (c) 2012 Andreas Stolcke, Microsoft Corp. All Rights Reserved.
RCS ID: $Id: srilm_iconv.h,v 1.6 2019/09/09 23:13:15 stolcke Exp $
*/
#if !defined(NO_ICONV) && defined(__GNUC__) && !defined(WIN32)
# include_next <iconv.h>
#else
# if !defined(NO_ICONV) && defined(sun)
# include "/usr/include/iconv.h"
# else
#ifndef _SRILM_ICONV_H
#define _SRILM_ICONV_H
#include <errno.h>
#undef iconv_open
#undef iconv_close
#undef iconv
# ifdef NO_ICONV
/*
* Avoid libiconv references, disallow UTF-16 conversion.
*/
typedef void *iconv_t; // unused
#define iconv_open(to, from) (errno = EINVAL, (iconv_t)-1)
#define iconv_close(x) /* nothing to do */
#define iconv(cp, in, nin, out, nout) ((size_t)-1) // unused
# else /* ! NO_ICONV */
# if defined(_MSC_VER) || defined(WIN32)
/*
* Emulate simple iconv() usage using Windows API.
* (Not pretty, but keeps the code below from being littered with #ifdefs)
*/
#include "Windows.h"
typedef void *iconv_t; // unused
#define iconv_open(to, from) ((strcmp(to,"UTF-8")==0 && strcmp(from,"UTF-16LE")==0) ? \
(iconv_t)1 : \
(errno = EINVAL, (iconv_t)-1))
#define iconv_close(x) /* nothing to do */
#define iconv(cp, in, nin, out, nout) \
((*(nout) = WideCharToMultiByte(CP_UTF8, 0, \
(LPCWSTR)*(in), -1, \
(*out), *(nout), \
NULL, NULL)) == 0 ? -1 : *(nout))
# endif /* _MSC_VER */
# endif /* NO_ICONV */
#endif /* _SRILM_ICONV_H */
# endif
#endif

View File

@@ -0,0 +1,22 @@
/*
* tclmain.c --
* main() function for tcl clients
*
* $Header: /home/srilm/CVS/srilm/misc/src/tclmain.cc,v 1.6 2003/07/01 02:54:12 stolcke Exp $
*/
#include <tcl.h>
/*
* Tcl versions up to 7.3 defined main() in the libtcl.a
*/
#if (TCL_MAJOR_VERSION == 7 && TCL_MINOR_VERSION > 3) || (TCL_MAJOR_VERSION > 7)
int
main(int argc, char **argv)
{
Tcl_Main(argc, argv, Tcl_AppInit);
}
#endif

View File

@@ -0,0 +1,70 @@
/*
* Test File class
*/
#ifndef lint
static char Copyright[] = "Copyright (c) 1998-2010 SRI International. All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/CVS/srilm/misc/src/testFile.cc,v 1.7 2012/07/11 22:07:58 stolcke Exp $";
#endif
#include <stdlib.h>
#include <string.h>
#include "File.h"
int hasNL(const char *line)
{
unsigned len = strlen(line);
if (len > 0 && line[len-1] == '\n') {
return 1;
} else {
return 0;
}
}
int
main()
{
File file(stdin);
File buffer("", (size_t)0);
char *line;
cout << "=== input data ===\n";
while ((line = file.getline())) {
file.position(cout) << line;
if (!hasNL(line)) {
cout << "(MISSING NEWLINE)\n";
}
// save the line in our buffer
buffer.fputs(line);
}
buffer.fputs("LINE WITHOUT NEWLINE");
cout << "=== buffer contents ===\n";
unsigned len = strlen(buffer.c_str());
cout << "(length = " << len << ")\n";
cout << buffer.c_str();
cout << "\n=== buffer read back ===\n";
File sfile(buffer.c_str(), len);
while ((line = sfile.getline())) {
sfile.position(cout) << line;
if (!hasNL(line)) {
cout << "(MISSING NEWLINE)\n";
}
}
exit(0);
}

View File

@@ -0,0 +1,36 @@
/*
* testRand --
* Test random number generator
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#ifdef NEED_RAND48
extern "C" {
void srand48(long);
double drand48();
long lrand48();
}
#endif
int
main()
{
srand48(1);
int i;
for (i = 0; i < 20; i ++) {
printf(" %ld", lrand48());
}
printf("\n");
for (i = 0; i < 20; i ++) {
printf(" %lg", drand48());
}
printf("\n");
exit(0);
}

View File

@@ -0,0 +1,18 @@
#include <stdlib.h>
/*
* tls.cc --
* Abstracts pthread and Windows thread-local storage mechanisms
*
* Copyright (c) 2012, SRI International. All Rights Reserved.
*/
#include "tls.h"
#if !defined(NO_TLS) && !defined(_MSC_VER) && !defined(WIN32)
// Needed for non-windows TLS
TLS_KEY srilm_tls_get_key() {
TLS_KEY key;
pthread_key_create(&key, 0);
return key;
}
#endif

View File

@@ -0,0 +1,31 @@
/*
* tls.h --
* Abstracts pthread and Windows thread-local storage mechanisms
*
* Copyright (c) 2012, SRI International. All Rights Reserved.
*/
#ifndef tls_h
#define tls_h
#ifndef NO_TLS
# if defined(_MSC_VER) || defined(WIN32)
# include <windows.h>
# define TLS_KEY DWORD
# define TLS_CREATE_KEY TlsAlloc
# define TLS_GET(key) TlsGetValue(key)
# define TLS_SET(key, value) TlsSetValue(key, value)
# define TLS_FREE_KEY(key) TlsFree(key)
# else
# include <pthread.h>
# define TLS_KEY pthread_key_t
# define TLS_CREATE_KEY srilm_tls_get_key
# define TLS_GET(key) pthread_getspecific(key)
# define TLS_SET(key, value) pthread_setspecific(key, value)
# define TLS_FREE_KEY(key) pthread_key_delete(key)
TLS_KEY srilm_tls_get_key();
# endif /* _MSC_VER */
#endif /* USE_TLS */
#endif /* tls_h */

View File

@@ -0,0 +1,38 @@
#include <stdlib.h>
/*
* tserror.cc --
* Provide thread-safe strerror calls
*
* Copyright (c) 2012, SRI International. All Rights Reserved.
*/
#define ERR_BUFF_SZ 256
#include <string.h>
#ifndef NO_TLS
#include "tserror.h"
#include "TLSWrapper.h"
static TLSW_ARRAY(char, errBuffTLS, ERR_BUFF_SZ);
char *srilm_ts_strerror(int errnum) {
#if defined(WIN32)
char *buff = strerror(errnum); // mingw doesn't have strerror_s()
#else
char *buff = TLSW_GET_ARRAY(errBuffTLS);
#if defined(_MSC_VER)
strerror_s(buff, ERR_BUFF_SZ, errnum);
#else
strerror_r(errnum, buff, ERR_BUFF_SZ);
#endif /* _MSC_VER */
#endif /* WIN32 */
return buff;
}
void srilm_tserror_freeThread() {
TLSW_FREE(errBuffTLS);
}
#endif /* NO_TLS */

View File

@@ -0,0 +1,28 @@
/*
* tserror.h --
* Provide thread-safe strerror calls
*
* Copyright (c) 2012, SRI International. All Rights Reserved.
*/
#ifndef tserror_h
#define tserror_h
#ifdef __cplusplus
extern "C" {
#endif
#ifndef NO_TLS
char *srilm_ts_strerror(int errnum);
#else
# define srilm_ts_strerror strerror
#endif
void srilm_tserror_freeThread();
#ifdef __cplusplus
}
#endif
#endif /* tserror_h */

View File

@@ -0,0 +1,61 @@
/*
* version.c --
* Print version information
*
*/
#ifndef lint
static char Copyright[] = "Copyright (c) 2004 SRI International, 2015 Andreas Stolcke, Microsoft Corp. All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/CVS/srilm/misc/src/version.c,v 1.10 2019/09/09 23:13:15 stolcke Exp $";
#endif
#include <stdio.h>
#include "zio.h"
#include "version.h"
#include "SRILMversion.h"
#include <SRILMoptions.h>
#if defined(_OPENMP) && defined(_MSC_VER)
#include <omp.h>
#endif
void
printVersion(const char *rcsid)
{
printf("SRILM release %s", SRILM_RELEASE);
#ifndef EXCLUDE_CONTRIB
printf(" (with third-party contributions)");
#endif /* EXCLUDE_CONTRIB_END */
printf("\n");
#if defined(__GNUC__) && !defined(__clang__)
printf("Built with GCC %d.%d.%d\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
#endif
#ifdef __clang__
printf("Built with Clang %d.%d.%d\n", __clang_major__, __clang_minor__, __clang_patchlevel__);
#endif
#ifdef __INTEL_COMPILER
printf("Built with IntelC %d\n", __INTEL_COMPILER);
#endif
#ifdef _MSC_VER
printf("Built with MSVC %d\n", _MSC_VER);
#endif
#ifdef BUILD_OPTIONS
printf("and options %s\n", BUILD_OPTIONS);
#endif
printf("\nProgram version %s\n", rcsid);
#ifndef NO_ZIO
printf("\nSupport for compressed files is included.\n");
#else
printf("\nSupport for gzipped files is included.\n");
#endif
#ifdef HAVE_LIBLBFGS
printf("Using libLBFGS.\n");
#endif
#ifdef _OPENMP
printf("Using OpenMP version %d.\n", _OPENMP);
#endif
puts(SRILM_COPYRIGHT);
}

View File

@@ -0,0 +1,25 @@
/*
* version.h --
* Print version information
*
* Copyright (c) 2004, SRI International. All Rights Reserved.
*
* @(#)$Header: /home/srilm/CVS/srilm/misc/src/version.h,v 1.1 2004/12/03 04:24:36 stolcke Exp $
*
*/
#ifndef _version_h_
#define _version_h_
#ifdef __cplusplus
extern "C" {
#endif
void printVersion(const char *rcsid);
#ifdef __cplusplus
}
#endif
#endif /* _version_h_ */

View File

@@ -0,0 +1,501 @@
/*
File: zio.c
Author: Andreas Stolcke
Date: Wed Feb 15 15:19:44 PST 1995
Description:
Compressed file stdio extension
*/
#ifndef lint
static char Copyright[] = "Copyright (c) 1995-2010 SRI International. All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/CVS/srilm/misc/src/zio.c,v 1.31 2011/04/07 07:43:24 stolcke Exp $";
#endif
/*
* $Log: zio.c,v $
* Revision 1.31 2011/04/07 07:43:24 stolcke
* Suppress unused functions if NO_ZIO is defined
*
* Revision 1.30 2010/06/02 04:47:32 stolcke
* avoid compiler warning
*
* Revision 1.29 2010/04/05 15:12:03 stolcke
* avoid using gunzip to avoid script wrapper overhead
*
* Revision 1.28 2009/08/22 22:41:19 stolcke
* support for xz compressed files
*
* Revision 1.27 2008/05/27 03:21:41 stolcke
* avoid compiler warnings about exit()
*
* Revision 1.26 2007/11/11 19:49:11 stolcke
* use 7z e to uncompress (probably doesn't matter)
*
* Revision 1.25 2007/11/11 16:06:53 stolcke
* 7zip compression support
*
* Revision 1.24 2006/03/06 05:46:43 stolcke
* define NO_ZIO in zio.h instead of zio.c
*
* Revision 1.23 2006/03/01 00:45:45 stolcke
* allow disabling of zio for windows environment (NO_ZIO)
*
* Revision 1.22 2006/01/09 17:39:03 stolcke
* MSVC port
*
* Revision 1.21 2006/01/05 19:32:42 stolcke
* ms visual c portability
*
* Revision 1.20 2005/12/16 23:30:09 stolcke
* added support for bzip2-compressed files
*
* Revision 1.19 2005/07/28 21:08:15 stolcke
* include signal.h for portability
*
* Revision 1.18 2005/07/28 18:37:47 stolcke
* portability for systems w/o pipes
*
* Revision 1.17 2004/01/31 01:17:51 stolcke
* don't declare errno, get it from errno.h
*
* Revision 1.16 2003/11/09 21:09:11 stolcke
* use gunzip -f to allow uncompressed files ending in .gz
*
* Revision 1.15 2003/11/01 06:18:30 stolcke
* issue stdin/stdout warning only once
*
* Revision 1.14 1999/10/13 09:07:13 stolcke
* make filename checking functions public
*
* Revision 1.13 1997/06/07 15:58:47 stolcke
* fixed some gcc warnings
*
* Revision 1.13 1997/06/07 15:56:24 stolcke
* fixed some gcc warnings
*
* Revision 1.12 1997/01/23 20:38:35 stolcke
* *** empty log message ***
*
* Revision 1.11 1997/01/23 20:02:59 stolcke
* handle SIGPIPE termination
*
* Revision 1.10 1997/01/22 07:52:08 stolcke
* warn about multiple uses of -
*
* Revision 1.9 1996/11/30 21:08:59 stolcke
* use exec in compress commands
*
* Revision 1.8 1995/07/19 16:51:31 stolcke
* remove PATH assignment to account for local setup
*
* Revision 1.7 1995/06/22 20:47:16 stolcke
* dup stdio descriptors so fclose won't disturb them
*
* Revision 1.6 1995/06/22 20:44:39 stolcke
* return more error info
*
* Revision 1.5 1995/06/22 19:58:11 stolcke
* ansi-fied
*
* Revision 1.4 1995/06/12 22:57:12 tmk
* Added ifdef around the redefinitions of fopen() and fclose().
*
*/
/*******************************************************************
Copyright 1994,1997 SRI International. All rights reserved.
This is an unpublished work of SRI International and is not to be
used or disclosed except as provided in a license agreement or
nondisclosure agreement with SRI International.
********************************************************************/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#ifndef _MSC_VER
#include <unistd.h>
#include <sys/param.h>
#endif
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <signal.h>
#include <errno.h>
#ifndef MAXPATHLEN
#define MAXPATHLEN 1024
#endif
#include "zio.h"
#ifdef ZIO_HACK
#undef fopen
#undef fclose
#endif
#define STDIO_NAME "-"
#define STD_PATH ":" /* "PATH=/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin" */
#define COMPRESS_CMD "exec compress -c"
#define UNCOMPRESS_CMD "exec uncompress -c"
#define GZIP_CMD "exec gzip -c"
#define GUNZIP_CMD "exec gzip -dcf"
#define BZIP2_CMD "exec bzip2"
#define BUNZIP2_CMD "exec bzip2 -dcf"
#define SEVENZIP_CMD "exec 7z a -si"
#define SEVENUNZIP_CMD "exec 7z e -so"
#define XZ_CMD "exec xz"
#define XZ_DECOMPRESS_CMD "exec xz -dcf"
/*
* Does the filename refer to stdin/stdout ?
*/
int
stdio_filename_p (const char *name)
{
return (strcmp(name, STDIO_NAME) == 0);
}
/*
* Does the filename refer to a compressed file ?
*/
int
compressed_filename_p (const char *name)
{
unsigned len = strlen(name);
return
(sizeof(COMPRESS_SUFFIX) > 1) &&
(len > sizeof(COMPRESS_SUFFIX)-1) &&
(strcmp(name + len - (sizeof(COMPRESS_SUFFIX)-1),
COMPRESS_SUFFIX) == 0);
}
/*
* Does the filename refer to a gzipped file ?
*/
int
gzipped_filename_p (const char *name)
{
unsigned len = strlen(name);
return
((sizeof(GZIP_SUFFIX) > 1) &&
(len > sizeof(GZIP_SUFFIX)-1) &&
(strcmp(name + len - (sizeof(GZIP_SUFFIX)-1),
GZIP_SUFFIX) == 0)) ||
((sizeof(OLD_GZIP_SUFFIX) > 1) &&
(len > sizeof(OLD_GZIP_SUFFIX)-1) &&
(strcmp(name + len - (sizeof(OLD_GZIP_SUFFIX)-1),
OLD_GZIP_SUFFIX) == 0));
}
/*
* Does the filename refer to a bzipped file ?
*/
int
bzipped_filename_p (const char *name)
{
unsigned len = strlen(name);
return
(sizeof(BZIP2_SUFFIX) > 1) &&
(len > sizeof(BZIP2_SUFFIX)-1) &&
(strcmp(name + len - (sizeof(BZIP2_SUFFIX)-1),
BZIP2_SUFFIX) == 0);
}
/*
* Does the filename refer to a 7-zip file ?
*/
int
sevenzipped_filename_p (const char *name)
{
unsigned len = strlen(name);
return
(sizeof(SEVENZIP_SUFFIX) > 1) &&
(len > sizeof(SEVENZIP_SUFFIX)-1) &&
(strcmp(name + len - (sizeof(SEVENZIP_SUFFIX)-1),
SEVENZIP_SUFFIX) == 0);
}
/*
* Does the filename refer to a xz-compressed file ?
*/
int
xz_filename_p (const char *name)
{
unsigned len = strlen(name);
return
(sizeof(XZ_SUFFIX) > 1) &&
(len > sizeof(XZ_SUFFIX)-1) &&
(strcmp(name + len - (sizeof(XZ_SUFFIX)-1),
XZ_SUFFIX) == 0);
}
/*
* Check file readability
*/
#ifndef NO_ZIO
static int
readable_p (const char *name)
{
int fd = open(name, O_RDONLY);
if (fd < 0)
return 0;
else {
close(fd);
return 1;
}
}
/*
* Check file writability
*/
static int
writable_p (const char *name)
{
int fd = open(name, O_WRONLY|O_CREAT, 0666);
if (fd < 0)
return 0;
else {
close(fd);
return 1;
}
}
#endif /* !NO_ZIO */
/*
* Open a stdio stream, handling special filenames
*/
FILE *zopen(const char *name, const char *mode)
{
char command[MAXPATHLEN + 100];
if (stdio_filename_p(name)) {
/*
* Return stream to stdin or stdout
*/
if (*mode == 'r') {
static int stdin_used = 0;
static int stdin_warning = 0;
int fd;
if (stdin_used) {
if (!stdin_warning) {
fprintf(stderr,
"warning: '-' used multiple times for input\n");
stdin_warning = 1;
}
} else {
stdin_used = 1;
}
fd = dup(0);
return fd < 0 ? NULL : fdopen(fd, mode);
} else if (*mode == 'w' || *mode == 'a') {
static int stdout_used = 0;
static int stdout_warning = 0;
int fd;
if (stdout_used) {
if (!stdout_warning) {
fprintf(stderr,
"warning: '-' used multiple times for output\n");
stdout_warning = 1;
}
} else {
stdout_used = 1;
}
fd = dup(1);
return fd < 0 ? NULL : fdopen(fd, mode);
} else {
return NULL;
}
} else {
char *compress_cmd = NULL;
char *uncompress_cmd = NULL;
int zip_to_stdout = 1;
if (compressed_filename_p(name)) {
compress_cmd = COMPRESS_CMD;
uncompress_cmd = UNCOMPRESS_CMD;
} else if (gzipped_filename_p(name)) {
compress_cmd = GZIP_CMD;
uncompress_cmd = GUNZIP_CMD;
} else if (bzipped_filename_p(name)) {
compress_cmd = BZIP2_CMD;
uncompress_cmd = BUNZIP2_CMD;
} else if (sevenzipped_filename_p(name)) {
compress_cmd = SEVENZIP_CMD;
uncompress_cmd = SEVENUNZIP_CMD;
zip_to_stdout = 0;
} else if (xz_filename_p(name)) {
compress_cmd = XZ_CMD;
uncompress_cmd = XZ_DECOMPRESS_CMD;
}
if (compress_cmd != NULL) {
#ifdef NO_ZIO
fprintf(stderr, "Sorry, compressed I/O not available on this machine\n");
errno = EINVAL;
return NULL;
#else /* !NO_ZIO */
/*
* Return stream to compress pipe
*/
if (*mode == 'r') {
if (!readable_p(name))
return NULL;
sprintf(command, "%s;%s %s", STD_PATH, uncompress_cmd, name);
return popen(command, mode);
} else if (*mode == 'w') {
if (!writable_p(name))
return NULL;
if (zip_to_stdout) {
sprintf(command, "%s;%s >%s", STD_PATH, compress_cmd, name);
} else {
/*
* This is necessary because the compression program might
* complain if a zero-length file already exists.
* However, it means that existing file owner & permission
* attributes are not preserved.
*/
unlink(name);
sprintf(command, "%s;%s %s", STD_PATH, compress_cmd, name);
}
return popen(command, mode);
} else {
return NULL;
}
#endif /* !NO_ZIO */
} else {
return fopen(name, mode);
}
}
}
/*
* Close a stream created by zopen()
*/
int
zclose(FILE *stream)
{
#ifdef NO_ZIO
return fclose(stream);
#else /* !NO_ZIO */
int status;
struct stat statb;
/*
* pclose(), according to the man page, should diagnose streams not
* created by popen() and return -1. however, on SGIs, it core dumps
* in that case. So we better be careful and try to figure out
* what type of stream it is.
*/
if (fstat(fileno(stream), &statb) < 0)
return -1;
/*
* First try pclose(). It will tell us if stream is not a pipe
*/
if ((statb.st_mode & S_IFMT) != S_IFIFO ||
fileno(stream) == 0 || fileno(stream) == 1)
{
return fclose(stream);
} else {
status = pclose(stream);
if (status == -1) {
/*
* stream was not created by popen(), but popen() does fclose
* for us in thise case.
*/
return ferror(stream);
} else if (status == SIGPIPE) {
/*
* It's normal for the uncompressor to terminate by SIGPIPE,
* i.e., if the user program closed the file before reaching
* EOF.
*/
return 0;
} else {
/*
* The compressor program terminated with an error, and supposedly
* has printed a message to stderr.
* Set errno to a generic error code if it hasn't been set already.
*/
if (errno == 0) {
errno = EIO;
}
return status;
}
}
#endif /* NO_ZIO */
}
#ifdef STAND
int
main (argc, argv)
int argc;
char **argv;
{
int dowrite = 0;
char buffer[BUFSIZ];
int nread;
FILE *stream;
if (argc < 3) {
printf("usage: %s file {r|w}\n", argv[0]);
exit(2);
}
if (*argv[2] == 'r') {
stream = zopen(argv[1], argv[2]);
if (!stream) {
perror(argv[1]);
exit(1);
}
while (!ferror(stream) && !feof(stream) &&!ferror(stdout)) {
nread = fread(buffer, 1, sizeof(buffer), stream);
(void)fwrite(buffer, 1, nread, stdout);
}
} else {
stream = zopen(argv[1], argv[2]);
if (!stream) {
perror(argv[1]);
exit(1);
}
while (!ferror(stdin) && !feof(stdin) && !ferror(stream)) {
nread = fread(buffer, 1, sizeof(buffer), stdin);
(void)fwrite(buffer, 1, nread, stream);
}
}
if (ferror(stdin)) {
perror("stdin");
} else if (ferror(stdout)) {
perror("stdout");
} else if (ferror(stream)) {
perror(argv[1]);
}
zclose(stream);
exit(0);
}
#endif /* STAND */

View File

@@ -0,0 +1,120 @@
/*
File: zio.h
Author: Andreas Stolcke
Date: Wed Feb 15 15:19:44 PST 1995
Description:
Copyright (c) 1994-2007, SRI International. All Rights Reserved.
RCS ID: $Id: zio.h,v 1.14 2009/08/22 22:41:19 stolcke Exp $
*/
/*
* $Log: zio.h,v $
* Revision 1.14 2009/08/22 22:41:19 stolcke
* support for xz compressed files
*
* Revision 1.13 2007/11/11 16:06:53 stolcke
* 7zip compression support
*
* Revision 1.12 2006/08/04 23:59:09 stolcke
* MSVC portability
*
* Revision 1.11 2006/03/28 01:15:10 stolcke
* include sys/signal.h to check for SIGPIPE
*
* Revision 1.10 2006/03/06 05:46:43 stolcke
* define NO_ZIO in zio.h instead of zio.c
*
* Revision 1.9 2006/03/01 00:45:45 stolcke
* allow disabling of zio for windows environment (NO_ZIO)
*
* Revision 1.8 2005/12/16 23:30:09 stolcke
* added support for bzip2-compressed files
*
* Revision 1.7 2003/02/21 20:18:53 stolcke
* avoid conflict if zopen is already defined in library
*
* Revision 1.6 1999/10/13 09:07:13 stolcke
* make filename checking functions public
*
* Revision 1.5 1995/06/22 19:58:26 stolcke
* ansi-fied
*
* Revision 1.4 1995/06/12 22:56:37 tmk
* Added ifdef around the redefinitions of fopen() and fclose().
*
*/
/*******************************************************************
Copyright 1994 SRI International. All rights reserved.
This is an unpublished work of SRI International and is not to be
used or disclosed except as provided in a license agreement or
nondisclosure agreement with SRI International.
********************************************************************/
#ifndef _ZIO_H
#define _ZIO_H
#ifdef __cplusplus
extern "C" {
#endif
/* Include declarations files. */
#include <stdio.h>
#include <signal.h> // to check for SIGPIPE
/* Avoid conflict with library function */
#ifdef HAVE_ZOPEN
#define zopen my_zopen
#endif
/* Constants */
#if !defined(SIGPIPE)
#define NO_ZIO
#endif
#ifdef NO_ZIO
# define COMPRESS_SUFFIX ""
# define GZIP_SUFFIX ""
# define OLD_GZIP_SUFFIX ""
# define BZIP2_SUFFIX ""
# define SEVENZIP_SUFFIX ""
# define XZ_SUFFIX ""
#else
# define COMPRESS_SUFFIX ".Z"
# define GZIP_SUFFIX ".gz"
# define OLD_GZIP_SUFFIX ".z"
# define BZIP2_SUFFIX ".bz2"
# define SEVENZIP_SUFFIX ".7z"
# define XZ_SUFFIX ".xz"
#endif /* NO_ZIO */
/* Define function prototypes. */
int stdio_filename_p (const char *name);
int compressed_filename_p (const char *name);
int gzipped_filename_p (const char *name);
int bzipped_filename_p (const char *name);
int sevenzipped_filename_p (const char *name);
int xz_filename_p (const char *name);
FILE * zopen (const char *name, const char *mode);
int zclose (FILE *stream);
/* Users of this header implicitly always use zopen/zclose in stdio */
#ifdef ZIO_HACK
#define fopen(name,mode) zopen(name,mode)
#define fclose(stream) zclose(stream)
#endif
#ifdef __cplusplus
}
#endif
#endif /* _ZIO_H */

View File

@@ -0,0 +1,105 @@
/*
* ztest --
* test for zio.
*/
#ifndef lint
static char Copyright[] = "Copyright (c) 1997,2006 SRI International, 2013 Andreas Stolcke, Microsoft Corp. All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/CVS/srilm/misc/src/ztest.c,v 1.5 2019/09/09 23:13:15 stolcke Exp $";
#endif
#include <stdio.h>
#include <stdlib.h>
#include "zio.h"
#include "zlib.h"
#include "option.h"
#include "version.h"
char *inFile = "-";
char *outFile = "-";
int numLines = 0;
int version = 0;
int useZlib = 0;
static Option options[] = {
{ OPT_TRUE, "version", (void *)&version, "print version information" },
{ OPT_TRUE, "zlib", (void *)&useZlib, "use zlib" },
{ OPT_STRING, "read", (void *)&inFile, "input file" },
{ OPT_STRING, "write", (void *)&outFile, "output file" },
{ OPT_INT, "lines", (void *)&numLines, "number of lines to copy" },
};
int
main(int argc, char **argv)
{
char buffer[1024];
FILE *in, *out;
gzFile gzin, gzout;
int result;
int lineno;
Opt_Parse(argc, argv, options, Opt_Number(options), 0);
if (version) {
printVersion(RcsId);
exit(0);
}
if (useZlib) {
gzin = gzopen(inFile, "r");
if (gzin == NULL) {
perror(inFile);
exit(1);
}
gzout = gzopen(outFile, "w");
if (gzout == NULL) {
perror(outFile);
exit(1);
}
} else {
in = zopen(inFile, "r");
if (in == NULL) {
perror(inFile);
exit(1);
}
out = zopen(outFile, "w");
if (out == NULL) {
perror(outFile);
exit(1);
}
}
lineno = 0;
while ((numLines == 0 || lineno < numLines) &&
(useZlib ?
gzgets(gzin, buffer, sizeof(buffer)) :
fgets(buffer, sizeof(buffer), in)))
{
if (useZlib) {
gzputs(gzout, buffer);
} else {
fputs(buffer, out);
}
lineno ++;
}
if (lineno > 0) {
if (useZlib) {
gzprintf(gzout, "THE END AFTER %d LINES\n", lineno);
} else {
fprintf(out, "THE END AFTER %d LINES\n", lineno);
}
}
result = useZlib ? gzclose(gzin) : zclose(in);
fprintf(stderr, "zclose(in) = %d\n", result);
result = useZlib ? gzclose(gzout) : zclose(out);
fprintf(stderr, "zclose(out) = %d\n", result);
exit(0);
}