competition update
This commit is contained in:
23
language_model/srilm-1.7.3/misc/src/Boolean.h
Normal file
23
language_model/srilm-1.7.3/misc/src/Boolean.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Boolean Type
|
||||
*
|
||||
* Copyright (c) 1995,2006 SRI International. All Rights Reserved.
|
||||
*
|
||||
* @(#)$Header: /home/srilm/CVS/srilm/misc/src/Boolean.h,v 1.5 2006/01/09 17:39:03 stolcke Exp $
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _BOOLEAN_H_
|
||||
#define _BOOLEAN_H_
|
||||
|
||||
#if defined(__GNUG__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_CC) || defined (_MSC_VER)
|
||||
typedef bool Boolean;
|
||||
|
||||
#else /* ! __GNUG__ && !__INTEL_COMPILER && !__SUNPRO_CC && !_MSC_VER */
|
||||
|
||||
typedef int Boolean;
|
||||
const Boolean false = 0;
|
||||
const Boolean true = 1;
|
||||
#endif /* __GNUG __ || __INTEL_COMPILER || __SUNPRO_CC || _MSC_VER */
|
||||
|
||||
#endif /* _BOOLEAN_H_ */
|
||||
15
language_model/srilm-1.7.3/misc/src/Debug.cc
Normal file
15
language_model/srilm-1.7.3/misc/src/Debug.cc
Normal file
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Debug.cc --
|
||||
* Generic debugging support
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char Copyright[] = "Copyright (c) 1995, SRI International. All Rights Reserved.";
|
||||
static char RcsId[] = "@(#)$Header: /home/srilm/CVS/srilm/misc/src/Debug.cc,v 1.2 1996/05/30 17:57:48 stolcke Exp $";
|
||||
#endif
|
||||
|
||||
#include "Debug.h"
|
||||
|
||||
unsigned Debug::debugAll = 0; /* global debugging level */
|
||||
|
||||
78
language_model/srilm-1.7.3/misc/src/Debug.h
Normal file
78
language_model/srilm-1.7.3/misc/src/Debug.h
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Debug.h --
|
||||
* General object debugging facility
|
||||
*
|
||||
* Debug is a Mix-in class that provides some simple, but consistent
|
||||
* debugging output handling.
|
||||
*
|
||||
* Copyright (c) 1995-2010 SRI International. All Rights Reserved.
|
||||
*
|
||||
* @(#)$Header: /home/srilm/CVS/srilm/misc/src/Debug.h,v 1.8 2013/03/30 15:55:25 stolcke Exp $
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _Debug_h_
|
||||
#define _Debug_h_
|
||||
|
||||
#ifdef PRE_ISO_CXX
|
||||
# include <iostream.h>
|
||||
#else
|
||||
# include <iostream>
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
#include <Boolean.h>
|
||||
|
||||
/*
|
||||
* Here is the typical usage for this mixin class.
|
||||
* First, include it in the parents of some class FOO
|
||||
*
|
||||
* class FOO: public OTHER_PARENT, public FOO { ... }
|
||||
*
|
||||
* Inside FOO's methods use code such as
|
||||
*
|
||||
* if (debug(3)) {
|
||||
* dout() << "I'm feeling sick today\n";
|
||||
* }
|
||||
*
|
||||
* Finally, use that code, after setting the debugging level
|
||||
* of the object and/or redirecting the debugging output.
|
||||
*
|
||||
* FOO foo;
|
||||
* foo.debugme(4); foo.dout(cout);
|
||||
*
|
||||
* Debugging can also be set globally (to affect all objects of
|
||||
* all classes.
|
||||
*
|
||||
* foo.debugall(1);
|
||||
*
|
||||
*/
|
||||
class Debug
|
||||
{
|
||||
public:
|
||||
Debug(unsigned level = 0)
|
||||
: nodebug(false), debugLevel(level), debugStream(&cerr) {};
|
||||
virtual ~Debug() {}; /* prevent warning about no virtual dtor */
|
||||
|
||||
Boolean debug(unsigned level) const /* true if debugging */
|
||||
{ return (!nodebug && (debugAll >= level || debugLevel >= level)); };
|
||||
virtual void debugme(unsigned level) { debugLevel = level; };
|
||||
/* set object's debugging level */
|
||||
void debugall(unsigned level) { debugAll = level; };
|
||||
/* set global debugging level */
|
||||
unsigned debuglevel() const { return debugLevel; };
|
||||
|
||||
virtual ostream &dout() const { return *debugStream; };
|
||||
/* output stream for use with << */
|
||||
virtual ostream &dout(ostream &stream) /* redirect debugging output */
|
||||
{ debugStream = &stream; return stream; };
|
||||
|
||||
Boolean nodebug; /* temporarily disable debugging */
|
||||
private:
|
||||
static unsigned debugAll; /* global debugging level */
|
||||
unsigned debugLevel; /* level of output -- the higher the more*/
|
||||
ostream *debugStream; /* current debug output stream */
|
||||
};
|
||||
|
||||
#endif /* _Debug_h_ */
|
||||
|
||||
807
language_model/srilm-1.7.3/misc/src/File.cc
Normal file
807
language_model/srilm-1.7.3/misc/src/File.cc
Normal file
@@ -0,0 +1,807 @@
|
||||
/*
|
||||
* File.cc --
|
||||
* File I/O for LM
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char Copyright[] = "Copyright (c) 1995-2011 SRI International, 2012-2013 Andreas Stolcke, Microsoft Corp. All Rights Reserved.";
|
||||
static char RcsId[] = "@(#)$Header: /home/srilm/CVS/srilm/misc/src/File.cc,v 1.37 2019/09/09 23:13:15 stolcke Exp $";
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "zio.h"
|
||||
#include "Boolean.h"
|
||||
#include "File.h"
|
||||
#include "Array.cc"
|
||||
|
||||
#include "srilm_iconv.h"
|
||||
|
||||
#if defined(sgi) || defined(_MSC_VER) || defined(WIN32) || defined(linux) && defined(__INTEL_COMPILER) && __INTEL_COMPILER<=700
|
||||
#define fseeko fseek
|
||||
#define ftello ftell
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Deal with different types of iconv() input buffer argument
|
||||
*/
|
||||
#if defined(sun) || defined(sgi)
|
||||
# define ICONV_CONST const
|
||||
#else
|
||||
# define ICONV_CONST
|
||||
#endif
|
||||
|
||||
const char *wordSeparators = " \t\r\n";
|
||||
|
||||
#define START_BUF_LEN 128 // needs to be > 2
|
||||
|
||||
#define iconvNone ((void *)-1)
|
||||
|
||||
File::File(const char *name, const char *mode, int exitOnError)
|
||||
: name(name?strdup(name):0), lineno(0), exitOnError(exitOnError), skipComments(true),
|
||||
fp(NULL), gzf(NULL), buffer((char *)malloc(START_BUF_LEN)), bufLen(START_BUF_LEN),
|
||||
reuseBuffer(false), atFirstLine(true), encoding(ASCII), iconvID(iconvNone),
|
||||
strFileLen(0), strFilePos(0), strFileActive(0)
|
||||
|
||||
{
|
||||
assert(buffer != 0);
|
||||
|
||||
unsigned len = name?strlen(name):0;
|
||||
if (len > sizeof(GZIP_SUFFIX)-1 &&
|
||||
(strcmp(name + len - (sizeof(GZIP_SUFFIX)-1), GZIP_SUFFIX) == 0))
|
||||
{
|
||||
gzf = gzopen(name, mode);
|
||||
} else if (name) {
|
||||
fp = zopen(name, mode);
|
||||
}
|
||||
|
||||
if (gzf == NULL && fp == NULL) {
|
||||
if (exitOnError) {
|
||||
perror(name);
|
||||
exit(exitOnError);
|
||||
}
|
||||
}
|
||||
strFile = "";
|
||||
}
|
||||
|
||||
File::File(FILE *fp, int exitOnError)
|
||||
: name(0), lineno(0), exitOnError(exitOnError), skipComments(true),
|
||||
fp(fp), gzf(NULL), buffer((char *)malloc(START_BUF_LEN)), bufLen(START_BUF_LEN),
|
||||
reuseBuffer(false), atFirstLine(true), encoding(ASCII), iconvID(iconvNone),
|
||||
strFileLen(0), strFilePos(0), strFileActive(0)
|
||||
{
|
||||
assert(buffer != 0);
|
||||
strFile = "";
|
||||
}
|
||||
|
||||
File::File(const char *fileStr, size_t fileStrLen, int exitOnError, int reserved_length)
|
||||
: name(0), lineno(0), exitOnError(exitOnError), skipComments(true),
|
||||
fp(NULL), gzf(NULL), buffer((char *)malloc(START_BUF_LEN)), bufLen(START_BUF_LEN),
|
||||
reuseBuffer(false), atFirstLine(true), encoding(ASCII), iconvID(iconvNone),
|
||||
strFileLen(0), strFilePos(0), strFileActive(0)
|
||||
{
|
||||
assert(buffer != 0);
|
||||
|
||||
strFile = fileStr;
|
||||
strFileLen = strFile.length();
|
||||
strFileActive = 1;
|
||||
// only reserve space if bigger than current capacity
|
||||
if (reserved_length > strFileLen) strFile.reserve(reserved_length);
|
||||
}
|
||||
|
||||
File::File(std::string& fileStr, int exitOnError, int reserved_length)
|
||||
: name(0), lineno(0), exitOnError(exitOnError), skipComments(true),
|
||||
fp(NULL), gzf(NULL), buffer((char *)malloc(START_BUF_LEN)), bufLen(START_BUF_LEN),
|
||||
reuseBuffer(false), atFirstLine(true), encoding(ASCII), iconvID(iconvNone),
|
||||
strFileLen(0), strFilePos(0), strFileActive(0)
|
||||
{
|
||||
assert(buffer != 0);
|
||||
|
||||
strFile = fileStr;
|
||||
strFileLen = strFile.length();
|
||||
strFileActive = 1;
|
||||
// only reserve space if bigger than current capacity
|
||||
if (reserved_length > strFileLen) strFile.reserve(reserved_length);
|
||||
}
|
||||
|
||||
|
||||
File::~File()
|
||||
{
|
||||
/*
|
||||
* If we opened the file (name != 0), then we should close it
|
||||
* as well.
|
||||
*/
|
||||
if (name != 0) {
|
||||
close();
|
||||
free(name);
|
||||
}
|
||||
|
||||
if (iconvID != iconvNone) iconv_close((iconv_t)iconvID);
|
||||
|
||||
if (buffer) free(buffer);
|
||||
buffer = NULL;
|
||||
}
|
||||
|
||||
int
|
||||
File::close()
|
||||
{
|
||||
int status = 0;
|
||||
|
||||
if (gzf) {
|
||||
status = gzclose(gzf);
|
||||
}
|
||||
else if (fp) {
|
||||
status = zclose(fp);
|
||||
}
|
||||
|
||||
fp = NULL;
|
||||
gzf = NULL;
|
||||
if (status != 0) {
|
||||
if (exitOnError != 0) {
|
||||
perror(name ? name : "");
|
||||
exit(exitOnError);
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Boolean
|
||||
File::reopen(const char *newName, const char *mode)
|
||||
{
|
||||
strFile = "";
|
||||
strFileLen = 0;
|
||||
strFilePos = 0;
|
||||
strFileActive = 0;
|
||||
|
||||
atFirstLine = true;
|
||||
encoding = ASCII;
|
||||
if (iconvID != iconvNone) {
|
||||
iconv_close((iconv_t)iconvID);
|
||||
iconvID = iconvNone;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we opened the file (name != 0), then we should close it
|
||||
* as well.
|
||||
*/
|
||||
if (name != 0) {
|
||||
close();
|
||||
free(name);
|
||||
}
|
||||
|
||||
/*
|
||||
* Open new file as in File::File()
|
||||
*/
|
||||
name = newName?strdup(newName):0;
|
||||
|
||||
unsigned len = name?strlen(name):0;
|
||||
if (len > sizeof(GZIP_SUFFIX)-1 &&
|
||||
(strcmp(name + len - (sizeof(GZIP_SUFFIX)-1), GZIP_SUFFIX) == 0))
|
||||
{
|
||||
gzf = gzopen(name, mode);
|
||||
} else if (name) {
|
||||
fp = zopen(name, mode);
|
||||
}
|
||||
|
||||
if (fp == 0 && gzf == 0) {
|
||||
if (exitOnError) {
|
||||
perror(name);
|
||||
exit(exitOnError);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Boolean
|
||||
File::reopen(const char *mode)
|
||||
{
|
||||
strFile = "";
|
||||
strFileLen = 0;
|
||||
strFilePos = 0;
|
||||
strFileActive = 0;
|
||||
|
||||
atFirstLine = true;
|
||||
encoding = ASCII;
|
||||
if (iconvID != iconvNone) {
|
||||
iconv_close((iconv_t)iconvID);
|
||||
iconvID = iconvNone;
|
||||
}
|
||||
|
||||
if (fp == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (fflush(fp) != 0) {
|
||||
if (exitOnError != 0) {
|
||||
perror(name ? name : "");
|
||||
exit(exitOnError);
|
||||
}
|
||||
}
|
||||
|
||||
FILE *fpNew = fdopen(fileno(fp), mode);
|
||||
|
||||
if (fpNew == 0) {
|
||||
return false;
|
||||
} else {
|
||||
// XXX: we can't fclose(fp), so the old stream object becomes garbage
|
||||
fp = fpNew;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
Boolean
|
||||
File::reopen(const char *fileStr, size_t fileStrLen, int reserved_length)
|
||||
{
|
||||
atFirstLine = true;
|
||||
encoding = ASCII;
|
||||
if (iconvID != iconvNone) {
|
||||
iconv_close((iconv_t)iconvID);
|
||||
iconvID = iconvNone;
|
||||
}
|
||||
|
||||
if (name != 0) {
|
||||
close();
|
||||
}
|
||||
|
||||
strFile = fileStr;
|
||||
strFileLen = strFile.length();
|
||||
strFilePos = 0;
|
||||
strFileActive = 1;
|
||||
// only reserve space if bigger than current capacity
|
||||
if (reserved_length > strFileLen) strFile.reserve(reserved_length);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Boolean
|
||||
File::reopen(std::string& fileStr, int reserved_length)
|
||||
{
|
||||
atFirstLine = true;
|
||||
encoding = ASCII;
|
||||
if (iconvID != iconvNone) {
|
||||
iconv_close((iconv_t)iconvID);
|
||||
iconvID = iconvNone;
|
||||
}
|
||||
|
||||
if (name != 0) {
|
||||
close();
|
||||
}
|
||||
|
||||
strFile = fileStr;
|
||||
strFileLen = strFile.length();
|
||||
strFilePos = 0;
|
||||
strFileActive = 1;
|
||||
// only reserve space if bigger than current capacity
|
||||
if (reserved_length > strFileLen) strFile.reserve(reserved_length);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Boolean
|
||||
File::error()
|
||||
{
|
||||
if (strFileActive) return 0; // i/o using strings not file pointer, so no error
|
||||
|
||||
if (gzf) {
|
||||
const char *msg = gzerror(gzf, NULL);
|
||||
return msg == 0 || msg[0] != '\0';
|
||||
} else {
|
||||
return (fp == 0) || ferror(fp);
|
||||
}
|
||||
};
|
||||
|
||||
const char UTF8magic[] = "\357\273\277";
|
||||
const char UTF16LEmagic[] = "\377\376";
|
||||
const char UTF16BEmagic[] = "\376\377";
|
||||
|
||||
char *
|
||||
File::fgetsUTF8(char *buffer, int buflen)
|
||||
{
|
||||
// Sanity check - need at least space for NULL terminator
|
||||
if ((buflen < 1) || !buffer) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
memset(buffer, 0, buflen);
|
||||
|
||||
/*
|
||||
* make sure 2-byte encodings have one extra byte for final \0
|
||||
*/
|
||||
char *result = fgets(buffer, buflen % 2 ? buflen : buflen - 1);
|
||||
|
||||
if (result == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* When at the start of the file, try to determine charcter encoding scheme
|
||||
*/
|
||||
if (atFirstLine) {
|
||||
const unsigned UTF8magicLen = sizeof(UTF8magic)-1;
|
||||
const unsigned UTF16LEmagicLen = sizeof(UTF16LEmagic)-1;
|
||||
const unsigned UTF16BEmagicLen = sizeof(UTF16BEmagic)-1;
|
||||
unsigned magicLen = 0;
|
||||
|
||||
atFirstLine = false;
|
||||
iconvID = (void *)0;
|
||||
|
||||
if (strncmp(buffer, UTF8magic, UTF8magicLen) == 0) {
|
||||
encoding = UTF8;
|
||||
magicLen = UTF8magicLen;
|
||||
} else if (strncmp(buffer, UTF16LEmagic, UTF16LEmagicLen) == 0) {
|
||||
encoding = UTF16LE;
|
||||
magicLen = UTF16LEmagicLen;
|
||||
iconvID = (void *)iconv_open("UTF-8", "UTF-16LE");
|
||||
} else if (strncmp(buffer, UTF16BEmagic, UTF16BEmagicLen) == 0) {
|
||||
encoding = UTF16BE;
|
||||
magicLen = UTF16BEmagicLen;
|
||||
iconvID = (void *)iconv_open("UTF-8", "UTF-16BE");
|
||||
}
|
||||
|
||||
if (iconvID == iconvNone) {
|
||||
this->position() << "conversion from UTF-16" << (encoding == UTF16LE ? "LE" : "BE") << " not supported\n";
|
||||
return 0;
|
||||
} else if (iconvID == (void *)0) {
|
||||
iconvID = iconvNone;
|
||||
}
|
||||
|
||||
/*
|
||||
* remove the magic string from the buffer
|
||||
*/
|
||||
if (magicLen > 0) {
|
||||
memmove(buffer, buffer + magicLen, buflen - magicLen);
|
||||
memset(buffer + buflen - magicLen, 0, magicLen);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* change 16-bit encoding to UTF-8 if needed
|
||||
*/
|
||||
if (iconvID != iconvNone) {
|
||||
makeArray(char, buffer2, buflen);
|
||||
|
||||
ICONV_CONST char *cp = buffer;
|
||||
size_t inSize = buflen % 2 ? buflen-1 : buflen;
|
||||
char *dp = buffer2;
|
||||
size_t outSize = buflen;
|
||||
|
||||
#ifdef DEBUG_ICONV
|
||||
::fprintf(stderr, "insize = %d input chars = ", (int)inSize);
|
||||
for (unsigned j = 0; j < inSize; j ++) {
|
||||
::fprintf(stderr, "'%c'(%03o) ", (buffer2[j] == '\r' ? 'R' : buffer[j]), ((unsigned char *)buffer)[j]);
|
||||
}
|
||||
::fprintf(stderr, "\n");
|
||||
#endif
|
||||
|
||||
if (iconv((iconv_t)iconvID, &cp, &inSize, &dp, &outSize) == (size_t)-1) {
|
||||
perror("iconv");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_ICONV
|
||||
::fprintf(stderr, "buflen = %d outsize = %d chars = ", buflen, (int)outSize);
|
||||
for (unsigned j = 0; j < outSize; j ++) {
|
||||
::fprintf(stderr, "'%c'(%03o) ", (buffer2[j] == '\r' ? 'R' : buffer2[j]), ((unsigned char *)(char *)buffer2)[j]);
|
||||
}
|
||||
::fprintf(stderr, "\n");
|
||||
#endif
|
||||
|
||||
memcpy(buffer, buffer2, outSize);
|
||||
|
||||
// Makes it clear to static code analysis that buffer will
|
||||
// be NULL-terminated; even though we expect outSize above
|
||||
// includes the NULL-terminator.
|
||||
if (outSize < (size_t)buflen) {
|
||||
memset(buffer + outSize, 0, buflen - outSize);
|
||||
} else {
|
||||
buffer[buflen - 1] = 0;
|
||||
}
|
||||
|
||||
if (encoding == UTF16LE) {
|
||||
/*
|
||||
* fgets() only reads up the \n --
|
||||
* need to skip the following \0 byte
|
||||
*/
|
||||
unsigned len = strlen(buffer);
|
||||
if (len > 0 && buffer[len-1] == '\n') fgetc();
|
||||
}
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
char *
|
||||
File::getline()
|
||||
{
|
||||
if (reuseBuffer) {
|
||||
reuseBuffer = false;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
unsigned bufOffset = 0;
|
||||
Boolean lineDone = false;
|
||||
|
||||
do {
|
||||
if (fgetsUTF8(buffer + bufOffset, bufLen - bufOffset) == 0) {
|
||||
if (bufOffset == 0) {
|
||||
return 0;
|
||||
} else {
|
||||
buffer[bufOffset] = '\0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if line end has been reached
|
||||
*/
|
||||
unsigned numbytes = strlen(buffer+bufOffset);
|
||||
|
||||
if (numbytes > 0 && buffer[bufOffset+numbytes-1] != '\n') {
|
||||
if (bufOffset + numbytes >= bufLen - START_BUF_LEN) {
|
||||
/*
|
||||
* enlarge buffer
|
||||
*/
|
||||
//cerr << "!REALLOC!" << endl;
|
||||
bufLen *= 2;
|
||||
buffer = (char *)realloc(buffer, bufLen);
|
||||
assert(buffer != 0);
|
||||
}
|
||||
bufOffset += numbytes;
|
||||
} else {
|
||||
lineDone = true;
|
||||
}
|
||||
} while (!lineDone);
|
||||
|
||||
lineno ++;
|
||||
|
||||
/*
|
||||
* skip entirely blank lines
|
||||
*/
|
||||
register const char *p = buffer;
|
||||
while (*p && isspace((unsigned char)*p)) p++;
|
||||
if (*p == '\0') {
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* skip comment lines (started with double '#')
|
||||
*/
|
||||
if (skipComments && buffer[0] == '#' && buffer[1] == '#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
reuseBuffer = false;
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
File::ungetline()
|
||||
{
|
||||
reuseBuffer = true;
|
||||
}
|
||||
|
||||
ostream &
|
||||
File::position(ostream &stream)
|
||||
{
|
||||
if (name) {
|
||||
stream << name << ": ";
|
||||
}
|
||||
return stream << "line " << lineno << ": ";
|
||||
}
|
||||
|
||||
ostream &
|
||||
File::offset(ostream &stream)
|
||||
{
|
||||
if (name) {
|
||||
stream << name << ": ";
|
||||
}
|
||||
if (fp) {
|
||||
return stream << "offset " << ::ftello(fp) << ": ";
|
||||
} else {
|
||||
return stream << "offset unknown " << ": ";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*------------------------------------------------------------------------*
|
||||
* "stdio" functions:
|
||||
*------------------------------------------------------------------------*/
|
||||
|
||||
int
|
||||
File::fgetc()
|
||||
{
|
||||
if (gzf) {
|
||||
return gzgetc(gzf);
|
||||
} else if (fp) {
|
||||
return ::fgetc(fp);
|
||||
}
|
||||
|
||||
if (!strFileActive || strFileLen <= 0 || strFilePos >= strFileLen) return EOF;
|
||||
|
||||
return strFile.at(strFilePos++);
|
||||
}
|
||||
|
||||
// override fgets in case object using strFile
|
||||
char *
|
||||
File::fgets(char *str, int n)
|
||||
{
|
||||
if (gzf) {
|
||||
return gzgets(gzf, str, n);
|
||||
} else if (fp) {
|
||||
return ::fgets(str, n, fp);
|
||||
}
|
||||
|
||||
if (!str || n <= 0) return NULL;
|
||||
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < n - 1; i++) {
|
||||
int c = fgetc();
|
||||
if (c == EOF) {
|
||||
break;
|
||||
}
|
||||
|
||||
str[i] = c;
|
||||
// xxx use \r on MacOS X?
|
||||
if (c == '\n') {
|
||||
// include \n in result
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// always terminate
|
||||
str[i] = '\0';
|
||||
if (i == 0)
|
||||
return NULL;
|
||||
else
|
||||
return str;
|
||||
}
|
||||
|
||||
int
|
||||
File::fputc(int c)
|
||||
{
|
||||
if (gzf) {
|
||||
return gzputc(gzf, c);
|
||||
} else if (fp) {
|
||||
return ::fputc(c, fp);
|
||||
}
|
||||
|
||||
// error condition, no string active
|
||||
if (!strFileActive) return EOF;
|
||||
|
||||
strFile += c;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
File::fputs(const char *str)
|
||||
{
|
||||
if (gzf) {
|
||||
return gzputs(gzf, str);
|
||||
} else if (fp) {
|
||||
return ::fputs(str, fp);
|
||||
}
|
||||
|
||||
// error condition, no string active
|
||||
if (!strFileActive) return -1;
|
||||
|
||||
strFile += str;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
File::fprintf(const char *format, ...)
|
||||
{
|
||||
if (gzf) {
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
int num_written = gzvprintf(gzf, format, args);
|
||||
va_end(args);
|
||||
return num_written;
|
||||
} else if (fp) {
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
int num_written = vfprintf(fp, format, args);
|
||||
va_end(args);
|
||||
return num_written;
|
||||
}
|
||||
|
||||
// error condition, no string active
|
||||
if (!strFileActive) return -1;
|
||||
|
||||
// This is the default max size to append at any one time. On sgi we
|
||||
// get a buffer overrrun if we exceed this but elsewhere we manually
|
||||
// allocate a larger buffer if needed.
|
||||
const int maxMessage = 4096;
|
||||
char message[maxMessage];
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
#if defined(sgi)
|
||||
// vsnprintf() doesn't exist in Irix 5.3
|
||||
// Return value >= 0 is number of bytes written to buffer not including
|
||||
// NULL terminator.
|
||||
int nwritten = vsprintf(message, format, args);
|
||||
if (nwritten >= maxMessage) {
|
||||
// Buffer overflow!
|
||||
if (exitOnError) {
|
||||
exit(exitOnError);
|
||||
}
|
||||
// At least indicate overflow in output (if haven't crashed already)
|
||||
sprintf(message, "In class File, BUFFER OVERFLOW %d >= %d\n", nwritten, maxMessage);
|
||||
}
|
||||
strFile += message;
|
||||
#else
|
||||
// Return value not consistent...
|
||||
// Non-Windows: >= 0 is number of bytes needed in buffer not including
|
||||
// NULL terminator.
|
||||
// Windows: Returns -1 if output truncated.
|
||||
int checkSize = vsnprintf(message, maxMessage, format, args);
|
||||
if ((checkSize >= maxMessage) || (checkSize < 0)) {
|
||||
int curSize;
|
||||
if (checkSize >= maxMessage) {
|
||||
// Should know exact size needed
|
||||
curSize = checkSize + 1;
|
||||
} else {
|
||||
// Start with double initial size
|
||||
curSize = maxMessage * 2;
|
||||
}
|
||||
bool success = false;
|
||||
// Loop until successful but also impose 1GB cap on buffer size.
|
||||
const int maxAlloc = 1000000000;
|
||||
while (!success) {
|
||||
va_end(args);
|
||||
va_start(args, format);
|
||||
char* buf = new char[curSize];
|
||||
checkSize = vsnprintf(buf, curSize, format, args);
|
||||
if ((checkSize >= 0) && (checkSize < curSize)) {
|
||||
strFile += buf;
|
||||
success = true;
|
||||
} else {
|
||||
// Try larger size
|
||||
if (curSize <= maxAlloc / 2) {
|
||||
curSize *= 2;
|
||||
} else if (curSize < maxAlloc) {
|
||||
// Don't exceed cap
|
||||
curSize = maxAlloc;
|
||||
} else {
|
||||
// Fail
|
||||
delete[] buf;
|
||||
if (exitOnError) {
|
||||
exit(exitOnError);
|
||||
}
|
||||
strFile += "In class File, failed writing to buffer\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
delete[] buf;
|
||||
}
|
||||
} else {
|
||||
strFile += message;
|
||||
}
|
||||
#endif
|
||||
|
||||
va_end(args);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t
|
||||
File::fread(void *data, size_t size, size_t n)
|
||||
{
|
||||
if (gzf) {
|
||||
return gzread(gzf, data, size * n)/size;
|
||||
} else if (fp) {
|
||||
return ::fread(data, size, n, fp);
|
||||
}
|
||||
|
||||
// not supported for input from string
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t
|
||||
File::fwrite(const void *data, size_t size, size_t n)
|
||||
{
|
||||
if (gzf) {
|
||||
return gzwrite(gzf, data, size * n)/size;
|
||||
} else if (fp) {
|
||||
return ::fwrite(data, size, n, fp);
|
||||
}
|
||||
|
||||
// not supported for output to string
|
||||
return 0;
|
||||
}
|
||||
|
||||
long long
|
||||
File::ftell()
|
||||
{
|
||||
if (gzf) {
|
||||
return gztell(gzf);
|
||||
} else if (fp) {
|
||||
return ::ftello(fp);
|
||||
}
|
||||
|
||||
// error condition, no string active
|
||||
if (!strFileActive) return -1;
|
||||
|
||||
return (long long) strFilePos;
|
||||
}
|
||||
|
||||
int
|
||||
File::fseek(long long offset, int origin)
|
||||
{
|
||||
if (gzf) {
|
||||
return gzseek(gzf, offset, origin);
|
||||
} else if (fp) {
|
||||
return ::fseeko(fp, offset, origin);
|
||||
}
|
||||
|
||||
// error condition, no string active
|
||||
if (!strFileActive) return -1;
|
||||
|
||||
// xxx doesn't do (much) error checking
|
||||
if (origin == SEEK_CUR) {
|
||||
strFilePos += offset;
|
||||
} else if (origin == SEEK_END) {
|
||||
strFilePos = strFileLen + offset; // use negative offset!
|
||||
} else if (origin == SEEK_SET) {
|
||||
strFilePos = offset;
|
||||
} else {
|
||||
// invalid origin
|
||||
return -1;
|
||||
}
|
||||
|
||||
// xxx we check that position is not negative, but (currently) allow it to be greater than length
|
||||
if (strFilePos < 0) strFilePos = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *
|
||||
File::c_str()
|
||||
{
|
||||
if (fp || gzf) return 0;
|
||||
|
||||
// error condition, no string active
|
||||
if (!strFileActive) return NULL;
|
||||
|
||||
return strFile.c_str();
|
||||
}
|
||||
|
||||
const char *
|
||||
File::data()
|
||||
{
|
||||
if (fp || gzf) return 0;
|
||||
|
||||
// error condition, no string active
|
||||
if (!strFileActive) return NULL;
|
||||
|
||||
return strFile.data();
|
||||
}
|
||||
|
||||
size_t
|
||||
File::length()
|
||||
{
|
||||
if (fp || gzf) return 0;
|
||||
|
||||
// error condition, no string active
|
||||
if (!strFileActive) return 0;
|
||||
|
||||
return strFile.length();
|
||||
}
|
||||
|
||||
144
language_model/srilm-1.7.3/misc/src/File.h
Normal file
144
language_model/srilm-1.7.3/misc/src/File.h
Normal file
@@ -0,0 +1,144 @@
|
||||
/*
|
||||
* File.h
|
||||
* File I/O utilities for LM
|
||||
*
|
||||
* Copyright (c) 1995-2011 SRI International, 2012-2013 Andreas Stolcke, Microsoft Corp. All Rights Reserved.
|
||||
*
|
||||
* @(#)$Header: /home/srilm/CVS/srilm/misc/src/File.h,v 1.27 2019/09/09 23:13:15 stolcke Exp $
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _File_h_
|
||||
#define _File_h_
|
||||
|
||||
#ifdef PRE_ISO_CXX
|
||||
# include <iostream.h>
|
||||
#else
|
||||
# include <iostream>
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "zio.h"
|
||||
#include "zlib.h"
|
||||
|
||||
#include "Boolean.h"
|
||||
|
||||
/*
|
||||
* Tell clients that we can handle .gz files regardless of zio working
|
||||
*/
|
||||
#undef GZIP_SUFFIX
|
||||
#define GZIP_SUFFIX ".gz"
|
||||
|
||||
const unsigned int maxWordsPerLine = 50000;
|
||||
|
||||
extern const char *wordSeparators;
|
||||
|
||||
/*
|
||||
* A File object is a wrapper around a stdio FILE pointer. If presently
|
||||
* provides two kinds of convenience.
|
||||
*
|
||||
* - constructors and destructors manage opening and closing of the stream.
|
||||
* The stream is checked for errors on closing, and the default behavior
|
||||
* is to exit() with an error message if a problem was found.
|
||||
* - the getline() method strips comments and keeps track of input line
|
||||
* numbers for error reporting.
|
||||
*
|
||||
* File object can be cast to (FILE *) to perform most of the standard
|
||||
* stdio operations in a seamless way.
|
||||
*
|
||||
* The File object can also read/write to a std::string, for file
|
||||
* access "to memory".
|
||||
*
|
||||
* To read from an existing string, allocate the File object using:
|
||||
* File(char *, size_t) or File(std::string&) and then call any File()
|
||||
* accessor function. For reading, you can also allocate the File
|
||||
* object using File(NULL, exitOnError) and then reopen it using
|
||||
* File.reopen(char *, size_t) or File.reopen(std::string&).
|
||||
*
|
||||
* To write to a string, allocate the File object using: File("", 0,
|
||||
* exitOnError, reserved_length). Alternatively, use File(NULL,
|
||||
* exitOnError) followed by File.reopen("", 0, reserved_length).
|
||||
*
|
||||
* NOTE: String I/O does not yet support binary data (unless initialized from std::string?).
|
||||
* NOTE: For backwards compatibility, File object preferentially uses FILE * object if it exists.
|
||||
*/
|
||||
class File
|
||||
{
|
||||
public:
|
||||
// Note that prior to September, 2014, internal member variable
|
||||
// only stored exact pointer to name, now makes copy of name
|
||||
// since otherwise user needs to ensure name is not changed
|
||||
// or deleted (or stack variable) during lifetime of File object
|
||||
// (or prior to reopen with new name).
|
||||
File(const char *name, const char *mode, int exitOnError = 1);
|
||||
File(FILE *fp = 0, int exitOnError = 1);
|
||||
// Initialize strFile with contents of string. strFile will be
|
||||
// resized to "reserved_length" if this value is bigger than the
|
||||
// string size.
|
||||
File(const char *fileStr, size_t fileStrLen, int exitOnError = 1, int reserved_length = 0);
|
||||
File(std::string& fileStr, int exitOnError = 1, int reserved_length = 0);
|
||||
~File();
|
||||
|
||||
char *getline();
|
||||
void ungetline();
|
||||
int close();
|
||||
Boolean reopen(const char *name, const char *mode);
|
||||
Boolean reopen(const char *mode); // switch to binary I/O
|
||||
// [close() and] reopen File and initialize strFile with contents of string
|
||||
Boolean reopen(const char *fileStr, size_t fileStrLen, int reserved_length = 0);
|
||||
Boolean reopen(std::string& fileStr, int reserved_length = 0);
|
||||
Boolean error();
|
||||
|
||||
ostream &position(ostream &stream = cerr);
|
||||
ostream &offset(ostream &stream = cerr);
|
||||
|
||||
char *name;
|
||||
unsigned int lineno;
|
||||
Boolean exitOnError;
|
||||
Boolean skipComments;
|
||||
|
||||
// Provide "stdio" equivalent functions for the case where the
|
||||
// File class is wrapping a string instead of a FILE, since
|
||||
// casting File to (FILE *) won't work in this case. The
|
||||
// functions should perform the same as their namesakes, but will
|
||||
// not set errno.
|
||||
char *fgets(char *str, int n);
|
||||
char *fgetsUTF8(char *str, int n); // also converts to UTF8
|
||||
int fgetc();
|
||||
int fputc(int c);
|
||||
int fputs(const char *str);
|
||||
// uses internal 4KB buffer
|
||||
int fprintf(const char *format, ...);
|
||||
size_t fread(void *data, size_t size, size_t n);
|
||||
size_t fwrite(const void *data, size_t size, size_t n);
|
||||
long long ftell();
|
||||
int fseek(long long offset, int origin);
|
||||
|
||||
// get string contents from File() object, provided we are doing string I/O
|
||||
const char *c_str();
|
||||
const char *data();
|
||||
size_t length();
|
||||
|
||||
private:
|
||||
|
||||
FILE *fp;
|
||||
gzFile gzf; // when reading/writing via zlib
|
||||
|
||||
char *buffer;
|
||||
unsigned bufLen;
|
||||
Boolean reuseBuffer;
|
||||
Boolean atFirstLine; // we haven't read the first line yet
|
||||
enum { ASCII, UTF8, UTF16LE, UTF16BE } encoding; // char encoding scheme
|
||||
void *iconvID;
|
||||
|
||||
// read/write from/to string instead of file
|
||||
std::string strFile;
|
||||
int strFileLen;
|
||||
int strFilePos;
|
||||
int strFileActive;
|
||||
};
|
||||
|
||||
#endif /* _File_h_ */
|
||||
|
||||
88
language_model/srilm-1.7.3/misc/src/MStringTokUtil.cc
Normal file
88
language_model/srilm-1.7.3/misc/src/MStringTokUtil.cc
Normal file
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* MStringTokUtil.cc --
|
||||
* Platform-independent version of strtok_r.
|
||||
*
|
||||
* @author SRI International
|
||||
* @file MStringTokUtil.h \brief Utility for portable string tokenization.
|
||||
*
|
||||
* Copyright (C) 2011 SRI International. Unpublished, All Rights Reserved.
|
||||
*
|
||||
* $Id: MStringTokUtil.cc,v 1.1 2011/04/01 17:47:18 victor Exp $
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "MStringTokUtil.h"
|
||||
|
||||
char*
|
||||
MStringTokUtil::strtok_r(char* s1, const char* s2, char** lasts)
|
||||
{
|
||||
if (lasts == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char* retval = NULL;
|
||||
if (s1 != NULL) {
|
||||
// First call
|
||||
retval = s1;
|
||||
} else if (*lasts != NULL) {
|
||||
// Get the input from the stored pointer state
|
||||
retval = *lasts;
|
||||
} else {
|
||||
// Saved state didn't have a string
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Count the number of separator characters in s2
|
||||
int numcheck = 0;
|
||||
if (s2 != NULL) {
|
||||
numcheck = strlen(s2);
|
||||
}
|
||||
|
||||
// Skip any initial separator characters
|
||||
char ch;
|
||||
bool match = true;
|
||||
while (((ch = *retval) != 0) && match) {
|
||||
match = false;
|
||||
for (int i = 0; i < numcheck; i++) {
|
||||
if (ch == s2[i]) {
|
||||
retval++;
|
||||
match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Did we hit the end of the string?
|
||||
if (*retval == 0) {
|
||||
*lasts = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Else we are on a non-separator, non-terminal character and will
|
||||
// have something non-zero length to return.
|
||||
|
||||
char* ptr = retval;
|
||||
// Loop until match separator character or find NULL-terminator
|
||||
while ((ch = *ptr) != 0) {
|
||||
for (int i = 0; i < numcheck; i++) {
|
||||
if (ch == s2[i]) {
|
||||
*ptr = 0;
|
||||
ptr++;
|
||||
if (*ptr != 0) {
|
||||
*lasts = ptr;
|
||||
} else {
|
||||
*lasts = NULL;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
|
||||
// If here, no separator character was found so retval is the last thing we return
|
||||
*lasts = NULL;
|
||||
|
||||
return retval;
|
||||
}
|
||||
43
language_model/srilm-1.7.3/misc/src/MStringTokUtil.h
Normal file
43
language_model/srilm-1.7.3/misc/src/MStringTokUtil.h
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* MStringTokUtil.h --
|
||||
* Platform-independent version of strtok_r.
|
||||
*
|
||||
* @author SRI International
|
||||
* @file MStringTokUtil.h \brief Utility for portable string tokenization.
|
||||
*
|
||||
* Copyright (C) 2011 SRI International. Unpublished, All Rights Reserved.
|
||||
*
|
||||
* $Id: MStringTokUtil.h,v 1.1 2011/04/01 17:47:18 victor Exp $
|
||||
*/
|
||||
|
||||
#ifndef MStringTokUtil_h
|
||||
#define MStringTokUtil_h
|
||||
|
||||
/**
|
||||
* Platform-independent version of strtok_r.
|
||||
*/
|
||||
class MStringTokUtil {
|
||||
public:
|
||||
/**
|
||||
* Get next token from string based on character separators.
|
||||
*
|
||||
* @param s1 For the first call, this is a pointer to a string
|
||||
* from which to extract tokens. This string will be updated
|
||||
* with 0 characters. On subsequent calls, this parameter
|
||||
* should be NULL.
|
||||
* @param s2 Null-terminated set of delimiter characters.
|
||||
* This may updated on subsequent calls.
|
||||
* @param lasts This is an address to a pointer used for
|
||||
* storing state between calls. The value will be set
|
||||
* on the first call and read/updated on successive calls.
|
||||
* @return pointer to NULL-terminated next token in s1
|
||||
* or NULL when no tokens remain.
|
||||
*/
|
||||
static char* strtok_r(char* s1, const char* s2, char** lasts);
|
||||
|
||||
private:
|
||||
// Static; no constructor
|
||||
MStringTokUtil();
|
||||
};
|
||||
|
||||
#endif // MStringTokUtil_h
|
||||
178
language_model/srilm-1.7.3/misc/src/Makefile
Normal file
178
language_model/srilm-1.7.3/misc/src/Makefile
Normal file
@@ -0,0 +1,178 @@
|
||||
#
|
||||
# File: Makefile.example
|
||||
# Author: The SRI DECIPHER (TM) System
|
||||
# Date: Thu Sep 9 12:04:47 1993
|
||||
#
|
||||
# Description:
|
||||
# This is the example makefile to start from when adding new
|
||||
# modules to the DECIPHER System. To use this makefile, first
|
||||
# copy it to your directory as the file "Makefile". Second,
|
||||
# replace the word "Example" in the text below with the real name
|
||||
# of your library. Next replace the the example filenames with
|
||||
# the names of your actual declarations and source files in the
|
||||
# appropriate variable definitions. Finally clean up by deleting
|
||||
# any lines not relevant to your module and updating this header
|
||||
# to describe your new module. Do not forget to use the proper
|
||||
# RCS keywords!
|
||||
#
|
||||
# Copyright (c) 1993, SRI International. All Rights Reserved.
|
||||
#
|
||||
# $Header: /home/srilm/CVS/srilm/misc/src/Makefile,v 1.41 2017/02/02 06:25:10 stolcke Exp $
|
||||
#
|
||||
|
||||
# Include common SRILM variable definitions.
|
||||
include $(SRILM)/common/Makefile.common.variables
|
||||
|
||||
# Define variables.
|
||||
|
||||
# Flags for generating "compact" data structures
|
||||
COMPACT_FLAGS += -DUSE_SARRAY -DUSE_SARRAY_TRIE -DUSE_SARRAY_MAP2
|
||||
|
||||
# Flags for generating "short" data structures
|
||||
SHORT_FLAGS = $(COMPACT_FLAGS) -DUSE_SHORT_VOCAB -DUSE_XCOUNTS
|
||||
|
||||
# Flags for generating "long long" data structures
|
||||
LLONG_FLAGS = $(COMPACT_FLAGS) -DUSE_LONGLONG_COUNTS -DUSE_XCOUNTS
|
||||
|
||||
# Example Library declarations files.
|
||||
|
||||
EXTERNAL_LIB_HEADERS = \
|
||||
$(SRCDIR)/cfuncproto.h \
|
||||
$(SRCDIR)/option.h \
|
||||
$(SRCDIR)/zio.h \
|
||||
$(SRCDIR)/mkdir.h \
|
||||
$(SRCDIR)/Boolean.h \
|
||||
$(SRCDIR)/Debug.h \
|
||||
$(SRCDIR)/File.h \
|
||||
$(SRCDIR)/MStringTokUtil.h \
|
||||
$(SRCDIR)/tls.h \
|
||||
$(SRCDIR)/TLSWrapper.h \
|
||||
$(SRCDIR)/tserror.h \
|
||||
$(SRCDIR)/version.h
|
||||
|
||||
INTERNAL_LIB_HEADERS = \
|
||||
$(SRCDIR)/srilm_iconv.h
|
||||
|
||||
# Example Library source files.
|
||||
LIB_SOURCES = \
|
||||
$(SRCDIR)/option.c \
|
||||
$(SRCDIR)/zio.c \
|
||||
$(SRCDIR)/fcheck.c \
|
||||
$(SRCDIR)/rand48.c \
|
||||
$(SRCDIR)/Debug.cc \
|
||||
$(SRCDIR)/File.cc \
|
||||
$(SRCDIR)/MStringTokUtil.cc \
|
||||
$(SRCDIR)/tls.cc \
|
||||
$(SRCDIR)/tserror.cc \
|
||||
$(SRCDIR)/version.c
|
||||
|
||||
ifeq ($(NO_TCL), )
|
||||
LIB_SOURCES += $(SRCDIR)/tclmain.cc
|
||||
endif
|
||||
|
||||
ifneq ($(NO_ICONV), )
|
||||
CFLAGS += -DNO_ICONV
|
||||
CXXFLAGS += -DNO_ICONV
|
||||
endif
|
||||
|
||||
# tell version.c about compile-time options
|
||||
ifneq ($(HAVE_LIBLBFGS), )
|
||||
ADDITIONAL_CFLAGS += -DHAVE_LIBLBFGS
|
||||
ADDITIONAL_CXXFLAGS += -DHAVE_LIBLBFGS
|
||||
endif
|
||||
|
||||
# Example Library object files.
|
||||
LIB_OBJECTS = $(patsubst $(SRCDIR)/%.cc,$(OBJDIR)/%$(OBJ_SUFFIX),\
|
||||
$(patsubst $(SRCDIR)/%.c,$(OBJDIR)/%$(OBJ_SUFFIX),$(LIB_SOURCES)))
|
||||
|
||||
# Example Library.
|
||||
LIBRARY = $(OBJDIR)/$(LIB_PREFIX)misc$(LIB_SUFFIX)
|
||||
|
||||
ADDITIONAL_INCLUDES += \
|
||||
-I$(OBJDIR) \
|
||||
$(TCL_INCLUDE)
|
||||
|
||||
ADDITIONAL_LIBRARIES += \
|
||||
$(SRILM_LIBDIR)/$(LIB_PREFIX)z$(LIB_SUFFIX) \
|
||||
$(TCL_LIBRARY) \
|
||||
$(MATH_LIBRARY)
|
||||
|
||||
# Example programs.
|
||||
PROGRAM_NAMES = \
|
||||
ztest \
|
||||
testFile \
|
||||
testRand
|
||||
|
||||
PROGRAMS = $(PROGRAM_NAMES:%=$(BINDIR)/%$(EXE_SUFFIX))
|
||||
|
||||
PROGRAM_SOURCES = $(foreach prog,$(PROGRAM_NAMES),\
|
||||
$(wildcard $(SRCDIR)/$(prog).c) \
|
||||
$(wildcard $(SRCDIR)/$(prog).cc))
|
||||
PROGRAM_OBJECTS = $(PROGRAM_NAMES:%=$(OBJDIR)/%$(OBJ_SUFFIX))
|
||||
|
||||
|
||||
# Libraries to be linked with the Example programs.
|
||||
LIBRARIES = $(LIBRARY) \
|
||||
$(ADDITIONAL_LIBRARIES)
|
||||
|
||||
# All of the types of files.
|
||||
|
||||
ALL_HEADERS = $(EXTERNAL_LIB_HEADERS) \
|
||||
$(INTERNAL_LIB_HEADERS)
|
||||
|
||||
ALL_SOURCES = $(LIB_SOURCES) \
|
||||
$(PROGRAM_SOURCES)
|
||||
|
||||
ALL_OBJECTS = $(LIB_OBJECTS) \
|
||||
$(PROGRAM_OBJECTS)
|
||||
|
||||
ALL_LIBRARIES = $(LIBRARY)
|
||||
|
||||
ALL_PROGRAMS = $(PROGRAMS)
|
||||
|
||||
ALL_PROGRAM_NAMES = $(PROGRAM_NAMES)
|
||||
|
||||
|
||||
# Define pseudo-targets.
|
||||
|
||||
|
||||
# Make sure the library does not get deleted if the make is interrupted.
|
||||
.PRECIOUS: $(LIBRARY)
|
||||
|
||||
|
||||
# Define targets.
|
||||
|
||||
|
||||
all: libraries
|
||||
|
||||
$(LIBRARY): $(LIB_OBJECTS)
|
||||
$(ARCHIVE) $(AR_OUTPUT_OPTION) $^
|
||||
$(RANLIB) $@ $(DEMANGLE_FILTER)
|
||||
|
||||
SRILMversion.h: $(SRILM)/RELEASE $(SRILM)/Copyright
|
||||
read version < $(SRILM)/RELEASE; echo "#define SRILM_RELEASE \"$$version\"" > $@
|
||||
sed -f $(SRILM)/sbin/stringify-copyright $(SRILM)/Copyright >> $@
|
||||
|
||||
$(OBJDIR)/SRILMoptions.h:
|
||||
echo "#define BUILD_OPTIONS \"$(OPTION_FLAGS)\"" > $@
|
||||
|
||||
# make sure SRILMoptions.h gets generated for each $(OPTION)
|
||||
$(OBJDIR)/version$(OBJ_SUFFIX): $(OBJDIR)/SRILMoptions.h
|
||||
|
||||
|
||||
# Variables and Targets for released system
|
||||
|
||||
EXPORTED_HEADERS = $(EXTERNAL_LIB_HEADERS)
|
||||
EXPORTED_LIBRARIES = $(LIBRARY)
|
||||
EXPORTED_PROGRAMS =
|
||||
|
||||
release: release-headers release-libraries release-programs
|
||||
|
||||
depend: SRILMversion.h $(OBJDIR)/SRILMoptions.h
|
||||
|
||||
cleaner:
|
||||
rm -f SRILMversion.h $(OBJDIR)/SRILMoptions.h
|
||||
|
||||
# Include common SRILM target definitions.
|
||||
include $(SRILM)/common/Makefile.common.targets
|
||||
|
||||
3
language_model/srilm-1.7.3/misc/src/README
Normal file
3
language_model/srilm-1.7.3/misc/src/README
Normal file
@@ -0,0 +1,3 @@
|
||||
|
||||
This directory contains miscellaneous utility function used by the
|
||||
Language Modeling tools and libraries.
|
||||
33
language_model/srilm-1.7.3/misc/src/SRILMoptions.h
Normal file
33
language_model/srilm-1.7.3/misc/src/SRILMoptions.h
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Default SRILMoptions.h
|
||||
* Should be overridden by automatically generated $(OBJDIR)/SRILMoptions.h
|
||||
*
|
||||
* $Header: /home/srilm/CVS/srilm/misc/src/SRILMoptions.h,v 1.1 2017/02/01 22:39:39 stolcke Exp $
|
||||
*/
|
||||
|
||||
#ifdef NDEBUG
|
||||
# define NDEBUG_OPTION "-DNDEBUG"
|
||||
#else
|
||||
# define NDEBUG_OPTION ""
|
||||
#endif
|
||||
|
||||
#ifdef USE_SARRAY
|
||||
# define BUILD_OPTION_1 "-DUSE_SARRAY"
|
||||
#else
|
||||
# define BUILD_OPTION_1 ""
|
||||
#endif
|
||||
|
||||
#ifdef USE_SARRAY_TRIE
|
||||
# define BUILD_OPTION_2 "-DUSE_SARRAY_TRIE"
|
||||
#else
|
||||
# define BUILD_OPTION_2 ""
|
||||
#endif
|
||||
|
||||
#ifdef USE_SARRAY_MAP2
|
||||
# define BUILD_OPTION_3 "-DUSE_SARRAY_MAP2"
|
||||
#else
|
||||
# define BUILD_OPTION_3 ""
|
||||
#endif
|
||||
|
||||
#define BUILD_OPTIONS NDEBUG_OPTION " " BUILD_OPTION_1 " " BUILD_OPTION_2 " " BUILD_OPTION_3
|
||||
|
||||
112
language_model/srilm-1.7.3/misc/src/TLSWrapper.h
Normal file
112
language_model/srilm-1.7.3/misc/src/TLSWrapper.h
Normal file
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* TLSWrapper.h --
|
||||
* A template that wraps a thread-local storage variable. If NO_TLS is
|
||||
* defined then the macros in this file will simply create static
|
||||
* variables, thereby producing a single-threaded build.
|
||||
*
|
||||
* Copyright (c) 2012, SRI International. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef TLSWrapper_h
|
||||
#define TLSWrapper_h
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "tls.h"
|
||||
#include "Boolean.h"
|
||||
|
||||
#ifndef NO_TLS
|
||||
// Declare and define a POD TLS variable
|
||||
# define TLSW(type, name) TLSWrapper<type> name(1, false)
|
||||
// Declare and define a non-POD TLS variable (call its constructor)
|
||||
# define TLSWC(type, name) TLSWrapper<type> name(1, true)
|
||||
// Declare and define a TLS array
|
||||
# define TLSW_ARRAY(type, name, size) TLSWrapper<type> name(size)
|
||||
|
||||
// Declare a TLS variable of a given type
|
||||
# define TLSW_DECL(type, name) TLSWrapper<type> name
|
||||
// Declare a TLS array of a given type
|
||||
# define TLSW_DECL_ARRAY(type, name, size) TLSWrapper<type> name
|
||||
|
||||
// Define a previously declared TLS variable
|
||||
# define TLSW_DEF(type, name) TLSWrapper<type> name = TLSWrapper<type>(1, false)
|
||||
// Define a previously declared non-POD TLS variable
|
||||
# define TLSW_DEFC(type, name) TLSWrapper<type> name = TLSWrapper<type>(1, true)
|
||||
// Define a previously declared array TLS variable
|
||||
# define TLSW_DEF_ARRAY(type, name, size) TLSWrapper<type> name = TLSWrapper<type>(size)
|
||||
|
||||
// Get a T reference that is specific to the current thread
|
||||
# define TLSW_GET(name) (name.get())
|
||||
// Get a T pointer to the beginning of the array that belongs to current thread
|
||||
# define TLSW_GET_ARRAY(name) &name.get()
|
||||
|
||||
// Free the thread-local memory (but not what it points to, if anything)
|
||||
# define TLSW_FREE(name) name.release()
|
||||
|
||||
template<class T>
|
||||
class TLSWrapper {
|
||||
public:
|
||||
TLSWrapper(size_t numP = 1, Boolean constructP = false) {
|
||||
key = TLS_CREATE_KEY();
|
||||
num = numP;
|
||||
construct = constructP;
|
||||
}
|
||||
|
||||
~TLSWrapper() {
|
||||
TLS_FREE_KEY(key);
|
||||
}
|
||||
|
||||
T &get() {
|
||||
T* mem = (T*)TLS_GET(key);
|
||||
if (mem == 0) {
|
||||
// Since we're imitating static memory, zero-init
|
||||
if (construct)
|
||||
mem = new T();
|
||||
else
|
||||
mem = (T*)calloc(num, sizeof(T));
|
||||
assert(mem != 0);
|
||||
TLS_SET(key, mem);
|
||||
}
|
||||
return *mem;
|
||||
}
|
||||
|
||||
void release() {
|
||||
T* mem = (T*)TLS_GET(key);
|
||||
|
||||
if (mem != 0) {
|
||||
if (construct)
|
||||
delete mem;
|
||||
else
|
||||
free(mem);
|
||||
|
||||
TLS_SET(key, 0);
|
||||
}
|
||||
}
|
||||
private:
|
||||
size_t num;
|
||||
Boolean construct;
|
||||
TLS_KEY key;
|
||||
};
|
||||
|
||||
#else
|
||||
// Just create static variables for single-threaded builds
|
||||
# define TLSW(type, name) type name
|
||||
# define TLSWC(type, name) type name
|
||||
# define TLSW_ARRAY(type, name, size) type name[size]
|
||||
|
||||
# define TLSW_DECL(type, name) type name
|
||||
# define TLSW_DECL_ARRAY(type, name, size) type name[size]
|
||||
|
||||
# define TLSW_DEF(type, name) type name
|
||||
# define TLSW_DEFC(type, name) type name
|
||||
# define TLSW_DEF_ARRAY(type, name, size) type name[size]
|
||||
|
||||
# define TLSW_GET(name) name
|
||||
# define TLSW_GET_ARRAY(name) name
|
||||
|
||||
# define TLSW_FREE(name)
|
||||
#endif
|
||||
|
||||
#endif /* TLSWrapper_h */
|
||||
|
||||
78
language_model/srilm-1.7.3/misc/src/cfuncproto.h
Normal file
78
language_model/srilm-1.7.3/misc/src/cfuncproto.h
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* cfuncproto.h --
|
||||
*
|
||||
* Declarations of a macro supporting Ansi-C function prototypes in
|
||||
* Sprite. This macro allow function prototypes to be defined
|
||||
* such that the code works on both standard and K&R C.
|
||||
*
|
||||
* Copyright 1990 Regents of the University of California
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software and its documentation for any purpose and without
|
||||
* fee is hereby granted, provided that the above copyright
|
||||
* notice appear in all copies. The University of California
|
||||
* makes no representations about the suitability of this
|
||||
* software for any purpose. It is provided "as is" without
|
||||
* express or implied warranty.
|
||||
*
|
||||
* $Header: /home/srilm/CVS/srilm/misc/src/cfuncproto.h,v 1.9 2011/04/06 03:23:00 stolcke Exp $ SPRITE (Berkeley)
|
||||
*/
|
||||
|
||||
#ifndef _CFUNCPROTO
|
||||
#define _CFUNCPROTO
|
||||
|
||||
/*
|
||||
* Definition of the _ARGS_ macro. The _ARGS_ macro such be used to
|
||||
* enclose the argument list of a function prototype. For example, the
|
||||
* function:
|
||||
* extern int main(argc, argv)
|
||||
* int args;
|
||||
* char **argv;
|
||||
*
|
||||
* Would have a prototype of:
|
||||
*
|
||||
* extern int main _ARGS_((int argc, char **argv))
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _ASM
|
||||
|
||||
#if defined(__STDC__) || defined(_MSC_VER)
|
||||
#define _HAS_PROTOTYPES
|
||||
#define _HAS_VOIDPTR
|
||||
#define _HAS_CONST
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
#define _EXTERN extern "C"
|
||||
#define _NULLARGS (void)
|
||||
#define _HAS_PROTOTYPES
|
||||
#define _HAS_VOIDPTR
|
||||
#define _HAS_CONST
|
||||
#else
|
||||
#define _EXTERN extern
|
||||
#define _NULLARGS ()
|
||||
#endif
|
||||
|
||||
#if defined(_HAS_PROTOTYPES) && !defined(lint)
|
||||
#define _ARGS_(x) x
|
||||
#else
|
||||
#define _ARGS_(x) ()
|
||||
#endif
|
||||
|
||||
#ifndef _CONST
|
||||
#ifdef _HAS_CONST
|
||||
#define _CONST const
|
||||
#else
|
||||
#define _CONST
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _HAS_VOIDPTR
|
||||
typedef void *_VoidPtr;
|
||||
#else
|
||||
typedef char *_VoidPtr;
|
||||
#endif
|
||||
|
||||
#endif /* _ASM */
|
||||
#endif /* _CFUNCPROTO */
|
||||
|
||||
33
language_model/srilm-1.7.3/misc/src/fcheck.c
Normal file
33
language_model/srilm-1.7.3/misc/src/fcheck.c
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* fcheck.c --
|
||||
* stdio file handling with error checking
|
||||
*
|
||||
* $Header: /home/srilm/CVS/srilm/misc/src/fcheck.c,v 1.2 2003/02/21 22:01:23 stolcke Exp $
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ZIO_HACK
|
||||
#include "zio.h"
|
||||
#include "fcheck.h"
|
||||
|
||||
FILE *fopen_check(const char *name, const char *mode)
|
||||
{
|
||||
FILE *file = fopen(name, mode);
|
||||
|
||||
if (file == 0) {
|
||||
perror(name);
|
||||
exit(1);
|
||||
} else {
|
||||
return file;
|
||||
}
|
||||
}
|
||||
|
||||
void fclose_check(const char *name, FILE *file)
|
||||
{
|
||||
if (fclose(file) != 0) {
|
||||
perror(name);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
22
language_model/srilm-1.7.3/misc/src/fcheck.h
Normal file
22
language_model/srilm-1.7.3/misc/src/fcheck.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* fcheck.h --
|
||||
* stdio file handling with error checking
|
||||
*/
|
||||
|
||||
#ifndef _FCHECK_H_
|
||||
#define _FCHECK_H_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
FILE *fopen_check(const char *name, const char *mode);
|
||||
void fclose_check(const char *name, FILE *file);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _FCHECK_H_ */
|
||||
40
language_model/srilm-1.7.3/misc/src/mkdir.h
Normal file
40
language_model/srilm-1.7.3/misc/src/mkdir.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
File: mkdir.h
|
||||
Author: Andreas Stolcke
|
||||
Date: Wed Feb 15 15:19:44 PST 1995
|
||||
|
||||
Description: Portability for the mkdir function
|
||||
|
||||
Copyright (c) 2006, SRI International. All Rights Reserved.
|
||||
|
||||
RCS ID: $Id: mkdir.h,v 1.2 2006/10/17 18:53:33 stolcke Exp $
|
||||
*/
|
||||
|
||||
/*
|
||||
* $Log: mkdir.h,v $
|
||||
* Revision 1.2 2006/10/17 18:53:33 stolcke
|
||||
* win32 portability
|
||||
*
|
||||
* Revision 1.1 2006/01/09 19:14:04 stolcke
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _MKDIR_H
|
||||
#define _MKDIR_H
|
||||
|
||||
#if defined(_MSC_VER) || defined(WIN32)
|
||||
# include <direct.h>
|
||||
# define MKDIR(d) _mkdir(d)
|
||||
#else
|
||||
# include <sys/stat.h>
|
||||
# include <sys/types.h>
|
||||
# ifdef S_IRWXO
|
||||
# define MKDIR(d) mkdir(d, S_IRWXU|S_IRWXG|S_IRWXO)
|
||||
# else
|
||||
# define MKDIR(d) mkdir(d)
|
||||
# endif
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
#endif /* _MKDIR_H */
|
||||
|
||||
479
language_model/srilm-1.7.3/misc/src/option.c
Normal file
479
language_model/srilm-1.7.3/misc/src/option.c
Normal file
@@ -0,0 +1,479 @@
|
||||
/*
|
||||
* option.c --
|
||||
*
|
||||
* Routines to do command line option processing.
|
||||
*
|
||||
* Copyright 1986, 1991 Regents of the University of California
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software and its documentation for any purpose and without
|
||||
* fee is hereby granted, provided that the above copyright
|
||||
* notice appear in all copies. The University of California
|
||||
* makes no representations about the suitability of this
|
||||
* software for any purpose. It is provided "as is" without
|
||||
* express or implied warranty.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$Header: /home/srilm/CVS/srilm/misc/src/option.c,v 1.17 2013/04/09 06:07:02 stolcke Exp $ SPRITE (Berkeley)";
|
||||
#endif
|
||||
|
||||
#include <option.h>
|
||||
#include <cfuncproto.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#define OptNoArg(progName, opt) fprintf(stderr, \
|
||||
"Warning: %s option \"-%s\" needs an argument\n", \
|
||||
(progName), (opt))
|
||||
|
||||
/* Forward references: */
|
||||
|
||||
static void ParseTime _ARGS_ ((_CONST char *progName, char *str,
|
||||
time_t *resultPtr));
|
||||
|
||||
|
||||
/*
|
||||
*----------------------------------------------------------------------
|
||||
*
|
||||
* Opt_Parse --
|
||||
*
|
||||
* Process a command line according to a template of accepted
|
||||
* options. See the manual page and header file for more details.
|
||||
*
|
||||
* Results:
|
||||
* The number of options that weren't processed by this procedure
|
||||
* is returned, and argv points to an array of unprocessed
|
||||
* options. (This is all of the options that didn't start with
|
||||
* "-", except for those used as arguments to the options
|
||||
* processed here; it's also anything after an OPT_REST option.)
|
||||
*
|
||||
* Side effects:
|
||||
* The variables referenced from the option array get modified
|
||||
* if their option was present on the command line. Can clobber
|
||||
* the global buffer used by localtime(3).
|
||||
*
|
||||
*----------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
int
|
||||
Opt_Parse(
|
||||
int argc, /* Number of arguments in argv. */
|
||||
char **argv, /* Array of arguments */
|
||||
Option optionArray[], /* Array of option descriptions */
|
||||
int numOptions, /* Size of optionArray */
|
||||
int flags) /* Or'ed combination of various flag bits:
|
||||
* see option.h for definitions. */
|
||||
{
|
||||
register Option *optionPtr; /* pointer to the current option in the
|
||||
* array of option specifications */
|
||||
register char *curOpt; /* Current flag argument */
|
||||
register char **curArg; /* Current argument */
|
||||
register int argIndex; /* Index into argv to which next unused
|
||||
* argument should be copied */
|
||||
int stop=0; /* Set non-zero to stop processing
|
||||
* arguments when an OPT_REST flag is
|
||||
* encountered */
|
||||
int error=0; /* A bad option was found */
|
||||
int length; /* Number of characters in current
|
||||
* option. */
|
||||
|
||||
argIndex = 1;
|
||||
argc -= 1;
|
||||
curArg = &argv[1];
|
||||
|
||||
while (argc && !stop) {
|
||||
if (**curArg == '-') {
|
||||
curOpt = &curArg[0][1];
|
||||
curArg += 1;
|
||||
argc -= 1;
|
||||
|
||||
/*
|
||||
* Check for the special options "?" and "help". If found,
|
||||
* print documentation and exit.
|
||||
*/
|
||||
|
||||
if ((strcmp(curOpt, "?") == 0) || (strcmp(curOpt, "help") == 0)) {
|
||||
Opt_PrintUsage (argv[0], optionArray, numOptions);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop over all the options specified in a single argument
|
||||
* (must be 1 unless OPT_ALLOW_CLUSTERING was specified).
|
||||
*/
|
||||
|
||||
while (1) {
|
||||
/*
|
||||
* Loop over the array of options searching for one with the
|
||||
* matching key string. If found, it is left pointed to by
|
||||
* optionPtr.
|
||||
*/
|
||||
for (optionPtr = &optionArray[numOptions - 1];
|
||||
optionPtr >= optionArray;
|
||||
optionPtr -= 1) {
|
||||
if (optionPtr->key == NULL) {
|
||||
continue;
|
||||
}
|
||||
if (*optionPtr->key == *curOpt) {
|
||||
if (flags & OPT_ALLOW_CLUSTERING) {
|
||||
length = strlen(optionPtr->key);
|
||||
if (strncmp(optionPtr->key, curOpt, length) == 0) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (strcmp(optionPtr->key, curOpt) == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (optionPtr < optionArray) {
|
||||
/*
|
||||
* No match. Print error message and skip option.
|
||||
*/
|
||||
|
||||
if (flags & OPT_UNKNOWN_IS_ERROR) {
|
||||
error = 1;
|
||||
stop = 1;
|
||||
} else {
|
||||
fprintf(stderr, "Unknown option \"-%s\";", curOpt);
|
||||
fprintf(stderr, " type \"%s -help\" for information\n",
|
||||
argv[0]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take the appropriate action based on the option type
|
||||
*/
|
||||
|
||||
if (optionPtr->type >= 0) {
|
||||
*((int *) optionPtr->address) = optionPtr->type;
|
||||
} else {
|
||||
switch (optionPtr->type) {
|
||||
case OPT_REST:
|
||||
stop = 1;
|
||||
*((int *) optionPtr->address) = argIndex;
|
||||
break;
|
||||
case OPT_STRING:
|
||||
if (argc == 0) {
|
||||
OptNoArg(argv[0], optionPtr->key);
|
||||
} else {
|
||||
*((char **)optionPtr->address) = *curArg;
|
||||
curArg++;
|
||||
argc--;
|
||||
}
|
||||
break;
|
||||
case OPT_INT:
|
||||
case OPT_UINT:
|
||||
if (argc == 0) {
|
||||
OptNoArg(argv[0], optionPtr->key);
|
||||
} else {
|
||||
char *endPtr;
|
||||
|
||||
int value = strtol(*curArg, &endPtr, 0);
|
||||
|
||||
if (endPtr == *curArg) {
|
||||
fprintf(stderr,
|
||||
"Warning: option \"-%s\" got a non-numeric argument \"%s\". Using default: %d\n",
|
||||
optionPtr->key, *curArg, *((int *) optionPtr->address));
|
||||
} else if (optionPtr->type == OPT_UINT &&
|
||||
value < 0)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Warning: option \"-%s\" got a negative argument \"%s\". Using default: %u.\n",
|
||||
optionPtr->key, *curArg, *((unsigned *) optionPtr->address));
|
||||
} else {
|
||||
*((int *) optionPtr->address) = value;
|
||||
}
|
||||
curArg++;
|
||||
argc--;
|
||||
}
|
||||
break;
|
||||
case OPT_TIME:
|
||||
if (argc == 0) {
|
||||
OptNoArg(argv[0], optionPtr->key);
|
||||
} else {
|
||||
ParseTime(argv[0], *curArg,
|
||||
(time_t *)optionPtr->address);
|
||||
curArg++;
|
||||
argc--;
|
||||
}
|
||||
break;
|
||||
case OPT_FLOAT:
|
||||
if (argc == 0) {
|
||||
OptNoArg(argv[0], optionPtr->key);
|
||||
} else {
|
||||
char *endPtr;
|
||||
|
||||
double value = strtod(*curArg, &endPtr);
|
||||
|
||||
if (endPtr == *curArg) {
|
||||
fprintf(stderr,
|
||||
"Warning: option \"-%s\" got non-floating-point argument \"%s\". Using default: %lg.\n",
|
||||
optionPtr->key, *curArg, *((double *) optionPtr->address));
|
||||
} else {
|
||||
*((double *) optionPtr->address) = value;
|
||||
}
|
||||
curArg++;
|
||||
argc--;
|
||||
}
|
||||
break;
|
||||
case OPT_GENFUNC: {
|
||||
int (*handlerProc)();
|
||||
|
||||
handlerProc = (int (*)())optionPtr->address;
|
||||
|
||||
argc = (* handlerProc) (optionPtr->key, argc,
|
||||
curArg);
|
||||
break;
|
||||
}
|
||||
case OPT_FUNC: {
|
||||
int (*handlerProc)();
|
||||
|
||||
handlerProc = (int (*)())optionPtr->address;
|
||||
|
||||
if ((* handlerProc) (optionPtr->key, *curArg)) {
|
||||
curArg += 1;
|
||||
argc -= 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OPT_DOC:
|
||||
Opt_PrintUsage (argv[0], optionArray, numOptions);
|
||||
exit(0);
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Advance to next option
|
||||
*/
|
||||
|
||||
if (flags & OPT_ALLOW_CLUSTERING) {
|
||||
curOpt += length;
|
||||
if (*curOpt == 0) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* *curArg is an argument for which we have no use, so copy it
|
||||
* down.
|
||||
*/
|
||||
argv[argIndex] = *curArg;
|
||||
argIndex += 1;
|
||||
curArg += 1;
|
||||
argc -= 1;
|
||||
|
||||
/*
|
||||
* If this wasn't an option, and we're supposed to stop parsing
|
||||
* the first time we see something other than "-", quit.
|
||||
*/
|
||||
if (flags & OPT_OPTIONS_FIRST) {
|
||||
stop = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we broke out of the loop because of an OPT_REST argument, we want
|
||||
* to copy the rest of the arguments down, so we do.
|
||||
*/
|
||||
while (argc) {
|
||||
argv[argIndex] = *curArg;
|
||||
argIndex += 1;
|
||||
curArg += 1;
|
||||
argc -= 1;
|
||||
}
|
||||
argv[argIndex] = (char *)NULL;
|
||||
if ((flags & OPT_UNKNOWN_IS_ERROR) && error) {
|
||||
return -1;
|
||||
} else {
|
||||
return argIndex;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
*----------------------------------------------------------------------
|
||||
*
|
||||
* Opt_PrintUsage --
|
||||
*
|
||||
* Print out a usage message for a command. This prints out the
|
||||
* documentation strings associated with each option.
|
||||
*
|
||||
* Results:
|
||||
* none.
|
||||
*
|
||||
* Side effects:
|
||||
* Messages printed onto the console.
|
||||
*
|
||||
*----------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
void
|
||||
Opt_PrintUsage(
|
||||
_CONST char *commandName,
|
||||
Option optionArray[],
|
||||
int numOptions)
|
||||
{
|
||||
register int i;
|
||||
int width;
|
||||
|
||||
/*
|
||||
* First, compute the width of the widest option key, so that we
|
||||
* can make everything line up.
|
||||
*/
|
||||
|
||||
width = 4;
|
||||
for (i=0; i<numOptions; i++) {
|
||||
int length;
|
||||
if (optionArray[i].key == NULL) {
|
||||
continue;
|
||||
}
|
||||
length = strlen(optionArray[i].key);
|
||||
if (length > width) {
|
||||
width = length;
|
||||
}
|
||||
}
|
||||
|
||||
if (commandName != NULL) {
|
||||
fprintf(stderr, "Usage of command \"%s\"\n", commandName);
|
||||
}
|
||||
|
||||
for (i=0; i<numOptions; i++) {
|
||||
if (optionArray[i].type != OPT_DOC) {
|
||||
fprintf(stderr, " -%s%-*s %s\n", optionArray[i].key,
|
||||
width+1-(int)strlen(optionArray[i].key), ":",
|
||||
optionArray[i].docMsg);
|
||||
switch (optionArray[i].type) {
|
||||
case OPT_INT: {
|
||||
fprintf(stderr, "\t\tDefault value: %d\n",
|
||||
*((int *) optionArray[i].address));
|
||||
break;
|
||||
}
|
||||
case OPT_UINT: {
|
||||
fprintf(stderr, "\t\tDefault value: %u\n",
|
||||
*((unsigned *) optionArray[i].address));
|
||||
break;
|
||||
}
|
||||
case OPT_FLOAT: {
|
||||
fprintf(stderr, "\t\tDefault value: %lg\n",
|
||||
*((double *) optionArray[i].address));
|
||||
break;
|
||||
}
|
||||
case OPT_STRING: {
|
||||
if (*(char **)optionArray[i].address != (char *) NULL) {
|
||||
fprintf(stderr, "\t\tDefault value: \"%s\"\n",
|
||||
*(char **) optionArray[i].address);
|
||||
break;
|
||||
}
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, " %s\n", optionArray[i].docMsg);
|
||||
}
|
||||
}
|
||||
if (commandName != NULL) {
|
||||
fprintf(stderr, " -help%-*s Print this message\n", width-3, ":");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
*----------------------------------------------------------------------
|
||||
*
|
||||
* ParseTime --
|
||||
*
|
||||
* Convert a date and time from some string representation to
|
||||
* something we can compute with.
|
||||
*
|
||||
* Results:
|
||||
* If str points to a parsable time, the corresponding UNIX time
|
||||
* value (seconds past the epoch) is returned through resultPtr.
|
||||
*
|
||||
* Side effects:
|
||||
* Can clobber the global buffer used by localtime(3).
|
||||
*
|
||||
*----------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
static void
|
||||
ParseTime(
|
||||
_CONST char *progName, /* name that the program was called as */
|
||||
char *str, /* the string to parse */
|
||||
time_t *resultPtr) /* pointer to result time value */
|
||||
{
|
||||
long result; /* the answer */
|
||||
char *endPtr; /* pointer into str, for parsing */
|
||||
struct tm pieces; /* year, month, etc. as integers */
|
||||
|
||||
/*
|
||||
* We currently accept the following formats:
|
||||
*
|
||||
* (1) an integer number of seconds past the epoch.
|
||||
* (2) a string of the form "yy.mm.dd.hh.mm.ss"
|
||||
*/
|
||||
|
||||
result = strtol(str, &endPtr, 0);
|
||||
if (endPtr == str) {
|
||||
goto parseError;
|
||||
}
|
||||
if (*endPtr == '\0') {
|
||||
*resultPtr = result;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Not a simple integer, so try form 2.
|
||||
*/
|
||||
if (*endPtr != '.') {
|
||||
goto parseError;
|
||||
}
|
||||
pieces.tm_year = result;
|
||||
if (pieces.tm_year > 1900) {
|
||||
pieces.tm_year -= 1900;
|
||||
}
|
||||
pieces.tm_mon = strtol(endPtr+1, &endPtr, 0) - 1;
|
||||
if (endPtr == str || *endPtr != '.') {
|
||||
goto parseError;
|
||||
}
|
||||
pieces.tm_mday = strtol(endPtr+1, &endPtr, 0);
|
||||
if (endPtr == str || *endPtr != '.') {
|
||||
goto parseError;
|
||||
}
|
||||
pieces.tm_hour = strtol(endPtr+1, &endPtr, 0);
|
||||
if (endPtr == str || *endPtr != '.') {
|
||||
goto parseError;
|
||||
}
|
||||
pieces.tm_min = strtol(endPtr+1, &endPtr, 0);
|
||||
if (endPtr == str || *endPtr != '.') {
|
||||
goto parseError;
|
||||
}
|
||||
pieces.tm_sec = strtol(endPtr+1, &endPtr, 0);
|
||||
if (endPtr == str || *endPtr != '\0') {
|
||||
goto parseError;
|
||||
}
|
||||
|
||||
result = mktime(&pieces);
|
||||
if (result == -1) {
|
||||
fprintf(stderr, "%s: can't represent the time \"%s\".\n",
|
||||
progName, str);
|
||||
} else {
|
||||
*resultPtr = result;
|
||||
}
|
||||
return;
|
||||
|
||||
parseError:
|
||||
fprintf(stderr, "%s: can't parse \"%s\" as a time.\n", progName, str);
|
||||
return;
|
||||
}
|
||||
161
language_model/srilm-1.7.3/misc/src/option.h
Normal file
161
language_model/srilm-1.7.3/misc/src/option.h
Normal file
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
* option.h --
|
||||
* This defines the Option type and the interface to the
|
||||
* Opt_Parse library call that parses command lines.
|
||||
*
|
||||
* Copyright 1988, 1991 Regents of the University of California
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software and its documentation for any purpose and without
|
||||
* fee is hereby granted, provided that the above copyright
|
||||
* notice appear in all copies. The University of California
|
||||
* makes no representations about the suitability of this
|
||||
* software for any purpose. It is provided "as is" without
|
||||
* express or implied warranty.
|
||||
*
|
||||
* $Header: /home/srilm/CVS/srilm/misc/src/option.h,v 1.13 2013/04/09 06:07:02 stolcke Exp $ SPRITE (Berkeley)
|
||||
*/
|
||||
|
||||
#ifndef _OPTION
|
||||
#define _OPTION
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <cfuncproto.h>
|
||||
|
||||
/*
|
||||
* An array of option descriptions (type Option) is passed into the
|
||||
* routine which interprets the command line. Each option description
|
||||
* includes the key-string that indicates the option, a type for the option,
|
||||
* the address of an associated variable, and a documentation message
|
||||
* that is printed when the command is invoked with a single argument
|
||||
* of '?'
|
||||
*/
|
||||
|
||||
typedef struct Option {
|
||||
int type; /* Indicates option type; see below */
|
||||
_CONST char *key; /* The key string that flags option */
|
||||
_VoidPtr address; /* Address of variable to modify */
|
||||
_CONST char *docMsg; /* Documentation message */
|
||||
} Option;
|
||||
/*
|
||||
* Values for type:
|
||||
*
|
||||
* OPT_CONSTANT(val) - if the flag is present then set the
|
||||
* associated (integer) variable to val.
|
||||
* Val must be a non-negative integer.
|
||||
* OPT_TRUE - if the flag is present then set the
|
||||
* associated (integer) variable to TRUE (1).
|
||||
* OPT_FALSE - if the flag is present then set the
|
||||
* associated (integer) variable to FALSE (0).
|
||||
* OPT_INT - if the flag is present then the next argument
|
||||
* on the command line is interpreted as a
|
||||
* signed integer and that value is assigned to
|
||||
* the options associated variable.
|
||||
* OPT_UINT - if the flag is present then the next argument
|
||||
* on the command line is interpreted as an
|
||||
* unsigned integer and that value is assigned to
|
||||
* the options associated variable.
|
||||
* OPT_STRING - if the flag is present then the next argument
|
||||
* on the command line is copied into the string
|
||||
* variable associated with the option.
|
||||
* OPT_REST - if the flag is present, inhibit processing of
|
||||
* later options, so that they're all returned
|
||||
* to the caller in argv. In addition, set the
|
||||
* associated variable to the index of the first
|
||||
* of these arguments in the returned argv.
|
||||
* This permits a program to allow a flag to
|
||||
* separate its own options from options it will
|
||||
* pass to another program.
|
||||
* OPT_FLOAT - if the flag is present then the next argument
|
||||
* on the command line is interpreted as a
|
||||
* "double" and that value is assigned to the
|
||||
* option's associated variable.
|
||||
* OPT_TIME - if the flag is present then the next argument
|
||||
* on the command line is interpreted as a date
|
||||
* and time. The corresponding time value
|
||||
* (number of seconds past the epoch) is assigned
|
||||
* to the option's associated variable.
|
||||
* OPT_FUNC - if the flag is present, pass the next argument
|
||||
* to "address" as a function. The function
|
||||
* should be declared:
|
||||
* int
|
||||
* func(optString, arg)
|
||||
* char *optString;
|
||||
* char *arg;
|
||||
* Func should return non-zero if the argument
|
||||
* was consumed or zero if not. "optString" is
|
||||
* the option key string that caused the
|
||||
* function to be called and "arg" is the next
|
||||
* argument (if there is no next argument then
|
||||
* "arg" will be NULL).
|
||||
* OPT_GENFUNC - if the flag is present, pass the remaining
|
||||
* arguments and the number of arguments to
|
||||
* "address" as a function. The function should
|
||||
* be declared:
|
||||
* int
|
||||
* func(optString, argc, argv)
|
||||
* char *optString;
|
||||
* int argc;
|
||||
* char **argv;
|
||||
* and should return the new number of arguments
|
||||
* left in argv. argv should have been shuffled
|
||||
* to eliminate the arguments func consumed.
|
||||
* OPT_DOC - a dummy entry. Exists mostly for its
|
||||
* documentation string. As an additional side
|
||||
* effect, if its key string an argument,
|
||||
* Opt_Parse will treat it like a question mark
|
||||
* (i.e. print out the program's usage and exit).
|
||||
*/
|
||||
|
||||
#define OPT_CONSTANT(val) ((int) val)
|
||||
#define OPT_FALSE 0
|
||||
#define OPT_TRUE 1
|
||||
#define OPT_INT -1
|
||||
#define OPT_UINT -2
|
||||
#define OPT_STRING -3
|
||||
#define OPT_REST -4
|
||||
#define OPT_FLOAT -5
|
||||
#define OPT_FUNC -6
|
||||
#define OPT_GENFUNC -7
|
||||
#define OPT_DOC -8
|
||||
#define OPT_TIME -9
|
||||
|
||||
/*
|
||||
* Flag values for Opt_Parse:
|
||||
*
|
||||
* OPT_ALLOW_CLUSTERING - Permit many flags to be clustered under
|
||||
* a single "-". In otherwords, treat
|
||||
* "foo -abc" the same as "foo -a -b -c".
|
||||
* OPT_OPTIONS_FIRST - Stop parsing if something other than an
|
||||
* option (starting with a hyphen) is encountered.
|
||||
* OPT_UNKNOWN_IS_ERROR - Stop parsing if unknown optionis encountered
|
||||
* and do not print -help message hint.
|
||||
*/
|
||||
|
||||
#define OPT_ALLOW_CLUSTERING 1
|
||||
#define OPT_OPTIONS_FIRST 2
|
||||
#define OPT_UNKNOWN_IS_ERROR 4
|
||||
|
||||
/*
|
||||
* Exported procedures:
|
||||
*/
|
||||
|
||||
int Opt_Parse _ARGS_ ((int argc, char *argv[], Option *optionArray,
|
||||
int numOptions, int flags));
|
||||
|
||||
void Opt_PrintUsage _ARGS_ ((_CONST char *commandName, Option *optionArray,
|
||||
int numOptions));
|
||||
|
||||
/*
|
||||
* Macro to determine size of option array:
|
||||
*/
|
||||
|
||||
#define Opt_Number(optionArray) (sizeof(optionArray)/sizeof((optionArray)[0]))
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _OPTION */
|
||||
153
language_model/srilm-1.7.3/misc/src/rand48.c
Normal file
153
language_model/srilm-1.7.3/misc/src/rand48.c
Normal file
@@ -0,0 +1,153 @@
|
||||
/*
|
||||
* rand48.c --
|
||||
* Replacement *rand48 functions (for systems that don't have them)
|
||||
*
|
||||
* $Header: /home/srilm/CVS/srilm/misc/src/rand48.c,v 1.3 2016/05/25 16:26:15 stolcke Exp $
|
||||
*/
|
||||
|
||||
#ifdef NEED_RAND48
|
||||
|
||||
/************************************************************************
|
||||
* *
|
||||
* Copyright (c) 1993 Martin Birgmeier *
|
||||
* All rights reserved. *
|
||||
* *
|
||||
* You may redistribute unmodified or modified versions of this source *
|
||||
* code provided that the above copyright notice and this and the *
|
||||
* following conditions are retained. *
|
||||
* *
|
||||
* This software is provided ``as is'', and comes with no warranties *
|
||||
* of any kind. I shall in no event be liable for anything that happens *
|
||||
* to anyone/anything when using this software. *
|
||||
* *
|
||||
************************************************************************/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define RAND48_SEED_0 (0x330e)
|
||||
#define RAND48_SEED_1 (0xabcd)
|
||||
#define RAND48_SEED_2 (0x1234)
|
||||
#define RAND48_MULT_0 (0xe66d)
|
||||
#define RAND48_MULT_1 (0xdeec)
|
||||
#define RAND48_MULT_2 (0x0005)
|
||||
#define RAND48_ADD (0x000b)
|
||||
|
||||
unsigned short _rand48_seed[3] = {
|
||||
RAND48_SEED_0,
|
||||
RAND48_SEED_1,
|
||||
RAND48_SEED_2
|
||||
};
|
||||
unsigned short _rand48_mult[3] = {
|
||||
RAND48_MULT_0,
|
||||
RAND48_MULT_1,
|
||||
RAND48_MULT_2
|
||||
};
|
||||
unsigned short _rand48_add = RAND48_ADD;
|
||||
|
||||
void
|
||||
_dorand48(unsigned short xseed[3])
|
||||
{
|
||||
unsigned long accu;
|
||||
unsigned short temp[2];
|
||||
|
||||
accu = (unsigned long) _rand48_mult[0] * (unsigned long) xseed[0] +
|
||||
(unsigned long) _rand48_add;
|
||||
temp[0] = (unsigned short) accu; /* lower 16 bits */
|
||||
accu >>= sizeof(unsigned short) * 8;
|
||||
accu += (unsigned long) _rand48_mult[0] * (unsigned long) xseed[1] +
|
||||
(unsigned long) _rand48_mult[1] * (unsigned long) xseed[0];
|
||||
temp[1] = (unsigned short) accu; /* middle 16 bits */
|
||||
accu >>= sizeof(unsigned short) * 8;
|
||||
accu += _rand48_mult[0] * xseed[2] + _rand48_mult[1] * xseed[1] + _rand48_mult[2] * xseed[0];
|
||||
xseed[0] = temp[0];
|
||||
xseed[1] = temp[1];
|
||||
xseed[2] = (unsigned short) accu;
|
||||
}
|
||||
|
||||
double
|
||||
erand48(unsigned short xseed[3])
|
||||
{
|
||||
_dorand48(xseed);
|
||||
return ldexp((double) xseed[0], -48) +
|
||||
ldexp((double) xseed[1], -32) +
|
||||
ldexp((double) xseed[2], -16);
|
||||
}
|
||||
|
||||
double
|
||||
drand48(void)
|
||||
{
|
||||
return erand48(_rand48_seed);
|
||||
}
|
||||
|
||||
long
|
||||
lrand48(void)
|
||||
{
|
||||
_dorand48(_rand48_seed);
|
||||
return ((long) _rand48_seed[2] << 15) + ((long) _rand48_seed[1] >> 1);
|
||||
}
|
||||
|
||||
long
|
||||
nrand48(unsigned short xseed[3])
|
||||
{
|
||||
_dorand48(xseed);
|
||||
return ((long) xseed[2] << 15) + ((long) xseed[1] >> 1);
|
||||
}
|
||||
|
||||
long
|
||||
mrand48(void)
|
||||
{
|
||||
_dorand48(_rand48_seed);
|
||||
return ((long) _rand48_seed[2] << 16) + (long) _rand48_seed[1];
|
||||
}
|
||||
|
||||
long
|
||||
jrand48(unsigned short xseed[3])
|
||||
{
|
||||
_dorand48(xseed);
|
||||
return ((long) xseed[2] << 16) + (long) xseed[1];
|
||||
}
|
||||
|
||||
void
|
||||
srand48(long seed)
|
||||
{
|
||||
_rand48_seed[0] = RAND48_SEED_0;
|
||||
_rand48_seed[1] = (unsigned short) seed;
|
||||
_rand48_seed[2] = (unsigned short) (seed >> 16);
|
||||
_rand48_mult[0] = RAND48_MULT_0;
|
||||
_rand48_mult[1] = RAND48_MULT_1;
|
||||
_rand48_mult[2] = RAND48_MULT_2;
|
||||
_rand48_add = RAND48_ADD;
|
||||
}
|
||||
|
||||
unsigned short *
|
||||
seed48(unsigned short xseed[3])
|
||||
{
|
||||
static unsigned short sseed[3];
|
||||
|
||||
sseed[0] = _rand48_seed[0];
|
||||
sseed[1] = _rand48_seed[1];
|
||||
sseed[2] = _rand48_seed[2];
|
||||
_rand48_seed[0] = xseed[0];
|
||||
_rand48_seed[1] = xseed[1];
|
||||
_rand48_seed[2] = xseed[2];
|
||||
_rand48_mult[0] = RAND48_MULT_0;
|
||||
_rand48_mult[1] = RAND48_MULT_1;
|
||||
_rand48_mult[2] = RAND48_MULT_2;
|
||||
_rand48_add = RAND48_ADD;
|
||||
return sseed;
|
||||
}
|
||||
|
||||
void
|
||||
lcong48(unsigned short p[7])
|
||||
{
|
||||
_rand48_seed[0] = p[0];
|
||||
_rand48_seed[1] = p[1];
|
||||
_rand48_seed[2] = p[2];
|
||||
_rand48_mult[0] = p[3];
|
||||
_rand48_mult[1] = p[4];
|
||||
_rand48_mult[2] = p[5];
|
||||
_rand48_add = p[6];
|
||||
}
|
||||
|
||||
#endif /* NEED_RAND48 */
|
||||
69
language_model/srilm-1.7.3/misc/src/srilm_iconv.h
Normal file
69
language_model/srilm-1.7.3/misc/src/srilm_iconv.h
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
File: srilm_iconv.h
|
||||
Author: Andreas Stolcke
|
||||
Date: Sun Jan 22 12:48:55 2012
|
||||
|
||||
Description: Portability for the iconv function
|
||||
|
||||
Copyright (c) 2012 Andreas Stolcke, Microsoft Corp. All Rights Reserved.
|
||||
|
||||
RCS ID: $Id: srilm_iconv.h,v 1.6 2019/09/09 23:13:15 stolcke Exp $
|
||||
*/
|
||||
|
||||
|
||||
#if !defined(NO_ICONV) && defined(__GNUC__) && !defined(WIN32)
|
||||
# include_next <iconv.h>
|
||||
#else
|
||||
# if !defined(NO_ICONV) && defined(sun)
|
||||
# include "/usr/include/iconv.h"
|
||||
# else
|
||||
|
||||
#ifndef _SRILM_ICONV_H
|
||||
#define _SRILM_ICONV_H
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#undef iconv_open
|
||||
#undef iconv_close
|
||||
#undef iconv
|
||||
|
||||
# ifdef NO_ICONV
|
||||
|
||||
/*
|
||||
* Avoid libiconv references, disallow UTF-16 conversion.
|
||||
*/
|
||||
typedef void *iconv_t; // unused
|
||||
|
||||
#define iconv_open(to, from) (errno = EINVAL, (iconv_t)-1)
|
||||
#define iconv_close(x) /* nothing to do */
|
||||
#define iconv(cp, in, nin, out, nout) ((size_t)-1) // unused
|
||||
|
||||
# else /* ! NO_ICONV */
|
||||
|
||||
# if defined(_MSC_VER) || defined(WIN32)
|
||||
/*
|
||||
* Emulate simple iconv() usage using Windows API.
|
||||
* (Not pretty, but keeps the code below from being littered with #ifdefs)
|
||||
*/
|
||||
#include "Windows.h"
|
||||
|
||||
typedef void *iconv_t; // unused
|
||||
|
||||
#define iconv_open(to, from) ((strcmp(to,"UTF-8")==0 && strcmp(from,"UTF-16LE")==0) ? \
|
||||
(iconv_t)1 : \
|
||||
(errno = EINVAL, (iconv_t)-1))
|
||||
#define iconv_close(x) /* nothing to do */
|
||||
#define iconv(cp, in, nin, out, nout) \
|
||||
((*(nout) = WideCharToMultiByte(CP_UTF8, 0, \
|
||||
(LPCWSTR)*(in), -1, \
|
||||
(*out), *(nout), \
|
||||
NULL, NULL)) == 0 ? -1 : *(nout))
|
||||
# endif /* _MSC_VER */
|
||||
|
||||
# endif /* NO_ICONV */
|
||||
|
||||
#endif /* _SRILM_ICONV_H */
|
||||
|
||||
# endif
|
||||
#endif
|
||||
|
||||
22
language_model/srilm-1.7.3/misc/src/tclmain.cc
Normal file
22
language_model/srilm-1.7.3/misc/src/tclmain.cc
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* tclmain.c --
|
||||
* main() function for tcl clients
|
||||
*
|
||||
* $Header: /home/srilm/CVS/srilm/misc/src/tclmain.cc,v 1.6 2003/07/01 02:54:12 stolcke Exp $
|
||||
*/
|
||||
|
||||
#include <tcl.h>
|
||||
|
||||
/*
|
||||
* Tcl versions up to 7.3 defined main() in the libtcl.a
|
||||
*/
|
||||
#if (TCL_MAJOR_VERSION == 7 && TCL_MINOR_VERSION > 3) || (TCL_MAJOR_VERSION > 7)
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
Tcl_Main(argc, argv, Tcl_AppInit);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
70
language_model/srilm-1.7.3/misc/src/testFile.cc
Normal file
70
language_model/srilm-1.7.3/misc/src/testFile.cc
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Test File class
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char Copyright[] = "Copyright (c) 1998-2010 SRI International. All Rights Reserved.";
|
||||
static char RcsId[] = "@(#)$Header: /home/srilm/CVS/srilm/misc/src/testFile.cc,v 1.7 2012/07/11 22:07:58 stolcke Exp $";
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "File.h"
|
||||
|
||||
int hasNL(const char *line)
|
||||
{
|
||||
unsigned len = strlen(line);
|
||||
|
||||
if (len > 0 && line[len-1] == '\n') {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
File file(stdin);
|
||||
|
||||
File buffer("", (size_t)0);
|
||||
|
||||
char *line;
|
||||
|
||||
cout << "=== input data ===\n";
|
||||
|
||||
while ((line = file.getline())) {
|
||||
file.position(cout) << line;
|
||||
|
||||
if (!hasNL(line)) {
|
||||
cout << "(MISSING NEWLINE)\n";
|
||||
}
|
||||
|
||||
// save the line in our buffer
|
||||
buffer.fputs(line);
|
||||
}
|
||||
|
||||
buffer.fputs("LINE WITHOUT NEWLINE");
|
||||
|
||||
cout << "=== buffer contents ===\n";
|
||||
|
||||
unsigned len = strlen(buffer.c_str());
|
||||
cout << "(length = " << len << ")\n";
|
||||
cout << buffer.c_str();
|
||||
|
||||
cout << "\n=== buffer read back ===\n";
|
||||
|
||||
File sfile(buffer.c_str(), len);
|
||||
|
||||
while ((line = sfile.getline())) {
|
||||
sfile.position(cout) << line;
|
||||
|
||||
if (!hasNL(line)) {
|
||||
cout << "(MISSING NEWLINE)\n";
|
||||
}
|
||||
}
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
36
language_model/srilm-1.7.3/misc/src/testRand.cc
Normal file
36
language_model/srilm-1.7.3/misc/src/testRand.cc
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* testRand --
|
||||
* Test random number generator
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifdef NEED_RAND48
|
||||
extern "C" {
|
||||
void srand48(long);
|
||||
double drand48();
|
||||
long lrand48();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
srand48(1);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 20; i ++) {
|
||||
printf(" %ld", lrand48());
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
for (i = 0; i < 20; i ++) {
|
||||
printf(" %lg", drand48());
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
exit(0);
|
||||
}
|
||||
18
language_model/srilm-1.7.3/misc/src/tls.cc
Normal file
18
language_model/srilm-1.7.3/misc/src/tls.cc
Normal file
@@ -0,0 +1,18 @@
|
||||
#include <stdlib.h>
|
||||
/*
|
||||
* tls.cc --
|
||||
* Abstracts pthread and Windows thread-local storage mechanisms
|
||||
*
|
||||
* Copyright (c) 2012, SRI International. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "tls.h"
|
||||
|
||||
#if !defined(NO_TLS) && !defined(_MSC_VER) && !defined(WIN32)
|
||||
// Needed for non-windows TLS
|
||||
TLS_KEY srilm_tls_get_key() {
|
||||
TLS_KEY key;
|
||||
pthread_key_create(&key, 0);
|
||||
return key;
|
||||
}
|
||||
#endif
|
||||
31
language_model/srilm-1.7.3/misc/src/tls.h
Normal file
31
language_model/srilm-1.7.3/misc/src/tls.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* tls.h --
|
||||
* Abstracts pthread and Windows thread-local storage mechanisms
|
||||
*
|
||||
* Copyright (c) 2012, SRI International. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef tls_h
|
||||
#define tls_h
|
||||
|
||||
#ifndef NO_TLS
|
||||
# if defined(_MSC_VER) || defined(WIN32)
|
||||
# include <windows.h>
|
||||
# define TLS_KEY DWORD
|
||||
# define TLS_CREATE_KEY TlsAlloc
|
||||
# define TLS_GET(key) TlsGetValue(key)
|
||||
# define TLS_SET(key, value) TlsSetValue(key, value)
|
||||
# define TLS_FREE_KEY(key) TlsFree(key)
|
||||
# else
|
||||
# include <pthread.h>
|
||||
# define TLS_KEY pthread_key_t
|
||||
# define TLS_CREATE_KEY srilm_tls_get_key
|
||||
# define TLS_GET(key) pthread_getspecific(key)
|
||||
# define TLS_SET(key, value) pthread_setspecific(key, value)
|
||||
# define TLS_FREE_KEY(key) pthread_key_delete(key)
|
||||
TLS_KEY srilm_tls_get_key();
|
||||
# endif /* _MSC_VER */
|
||||
#endif /* USE_TLS */
|
||||
|
||||
#endif /* tls_h */
|
||||
|
||||
38
language_model/srilm-1.7.3/misc/src/tserror.cc
Normal file
38
language_model/srilm-1.7.3/misc/src/tserror.cc
Normal file
@@ -0,0 +1,38 @@
|
||||
#include <stdlib.h>
|
||||
/*
|
||||
* tserror.cc --
|
||||
* Provide thread-safe strerror calls
|
||||
*
|
||||
* Copyright (c) 2012, SRI International. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#define ERR_BUFF_SZ 256
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifndef NO_TLS
|
||||
#include "tserror.h"
|
||||
#include "TLSWrapper.h"
|
||||
static TLSW_ARRAY(char, errBuffTLS, ERR_BUFF_SZ);
|
||||
char *srilm_ts_strerror(int errnum) {
|
||||
|
||||
#if defined(WIN32)
|
||||
char *buff = strerror(errnum); // mingw doesn't have strerror_s()
|
||||
#else
|
||||
char *buff = TLSW_GET_ARRAY(errBuffTLS);
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
strerror_s(buff, ERR_BUFF_SZ, errnum);
|
||||
#else
|
||||
strerror_r(errnum, buff, ERR_BUFF_SZ);
|
||||
#endif /* _MSC_VER */
|
||||
#endif /* WIN32 */
|
||||
|
||||
return buff;
|
||||
}
|
||||
|
||||
void srilm_tserror_freeThread() {
|
||||
TLSW_FREE(errBuffTLS);
|
||||
}
|
||||
|
||||
#endif /* NO_TLS */
|
||||
28
language_model/srilm-1.7.3/misc/src/tserror.h
Normal file
28
language_model/srilm-1.7.3/misc/src/tserror.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
* tserror.h --
|
||||
* Provide thread-safe strerror calls
|
||||
*
|
||||
* Copyright (c) 2012, SRI International. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef tserror_h
|
||||
#define tserror_h
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef NO_TLS
|
||||
char *srilm_ts_strerror(int errnum);
|
||||
#else
|
||||
# define srilm_ts_strerror strerror
|
||||
#endif
|
||||
|
||||
void srilm_tserror_freeThread();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* tserror_h */
|
||||
|
||||
61
language_model/srilm-1.7.3/misc/src/version.c
Normal file
61
language_model/srilm-1.7.3/misc/src/version.c
Normal file
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* version.c --
|
||||
* Print version information
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char Copyright[] = "Copyright (c) 2004 SRI International, 2015 Andreas Stolcke, Microsoft Corp. All Rights Reserved.";
|
||||
static char RcsId[] = "@(#)$Header: /home/srilm/CVS/srilm/misc/src/version.c,v 1.10 2019/09/09 23:13:15 stolcke Exp $";
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "zio.h"
|
||||
#include "version.h"
|
||||
#include "SRILMversion.h"
|
||||
#include <SRILMoptions.h>
|
||||
|
||||
#if defined(_OPENMP) && defined(_MSC_VER)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
void
|
||||
printVersion(const char *rcsid)
|
||||
{
|
||||
printf("SRILM release %s", SRILM_RELEASE);
|
||||
#ifndef EXCLUDE_CONTRIB
|
||||
printf(" (with third-party contributions)");
|
||||
#endif /* EXCLUDE_CONTRIB_END */
|
||||
printf("\n");
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
printf("Built with GCC %d.%d.%d\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
|
||||
#endif
|
||||
#ifdef __clang__
|
||||
printf("Built with Clang %d.%d.%d\n", __clang_major__, __clang_minor__, __clang_patchlevel__);
|
||||
#endif
|
||||
#ifdef __INTEL_COMPILER
|
||||
printf("Built with IntelC %d\n", __INTEL_COMPILER);
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
printf("Built with MSVC %d\n", _MSC_VER);
|
||||
#endif
|
||||
#ifdef BUILD_OPTIONS
|
||||
printf("and options %s\n", BUILD_OPTIONS);
|
||||
#endif
|
||||
|
||||
printf("\nProgram version %s\n", rcsid);
|
||||
#ifndef NO_ZIO
|
||||
printf("\nSupport for compressed files is included.\n");
|
||||
#else
|
||||
printf("\nSupport for gzipped files is included.\n");
|
||||
#endif
|
||||
#ifdef HAVE_LIBLBFGS
|
||||
printf("Using libLBFGS.\n");
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
printf("Using OpenMP version %d.\n", _OPENMP);
|
||||
#endif
|
||||
puts(SRILM_COPYRIGHT);
|
||||
}
|
||||
|
||||
25
language_model/srilm-1.7.3/misc/src/version.h
Normal file
25
language_model/srilm-1.7.3/misc/src/version.h
Normal file
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* version.h --
|
||||
* Print version information
|
||||
*
|
||||
* Copyright (c) 2004, SRI International. All Rights Reserved.
|
||||
*
|
||||
* @(#)$Header: /home/srilm/CVS/srilm/misc/src/version.h,v 1.1 2004/12/03 04:24:36 stolcke Exp $
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _version_h_
|
||||
#define _version_h_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void printVersion(const char *rcsid);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _version_h_ */
|
||||
|
||||
501
language_model/srilm-1.7.3/misc/src/zio.c
Normal file
501
language_model/srilm-1.7.3/misc/src/zio.c
Normal file
@@ -0,0 +1,501 @@
|
||||
/*
|
||||
File: zio.c
|
||||
Author: Andreas Stolcke
|
||||
Date: Wed Feb 15 15:19:44 PST 1995
|
||||
|
||||
Description:
|
||||
Compressed file stdio extension
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char Copyright[] = "Copyright (c) 1995-2010 SRI International. All Rights Reserved.";
|
||||
static char RcsId[] = "@(#)$Header: /home/srilm/CVS/srilm/misc/src/zio.c,v 1.31 2011/04/07 07:43:24 stolcke Exp $";
|
||||
#endif
|
||||
|
||||
/*
|
||||
* $Log: zio.c,v $
|
||||
* Revision 1.31 2011/04/07 07:43:24 stolcke
|
||||
* Suppress unused functions if NO_ZIO is defined
|
||||
*
|
||||
* Revision 1.30 2010/06/02 04:47:32 stolcke
|
||||
* avoid compiler warning
|
||||
*
|
||||
* Revision 1.29 2010/04/05 15:12:03 stolcke
|
||||
* avoid using gunzip to avoid script wrapper overhead
|
||||
*
|
||||
* Revision 1.28 2009/08/22 22:41:19 stolcke
|
||||
* support for xz compressed files
|
||||
*
|
||||
* Revision 1.27 2008/05/27 03:21:41 stolcke
|
||||
* avoid compiler warnings about exit()
|
||||
*
|
||||
* Revision 1.26 2007/11/11 19:49:11 stolcke
|
||||
* use 7z e to uncompress (probably doesn't matter)
|
||||
*
|
||||
* Revision 1.25 2007/11/11 16:06:53 stolcke
|
||||
* 7zip compression support
|
||||
*
|
||||
* Revision 1.24 2006/03/06 05:46:43 stolcke
|
||||
* define NO_ZIO in zio.h instead of zio.c
|
||||
*
|
||||
* Revision 1.23 2006/03/01 00:45:45 stolcke
|
||||
* allow disabling of zio for windows environment (NO_ZIO)
|
||||
*
|
||||
* Revision 1.22 2006/01/09 17:39:03 stolcke
|
||||
* MSVC port
|
||||
*
|
||||
* Revision 1.21 2006/01/05 19:32:42 stolcke
|
||||
* ms visual c portability
|
||||
*
|
||||
* Revision 1.20 2005/12/16 23:30:09 stolcke
|
||||
* added support for bzip2-compressed files
|
||||
*
|
||||
* Revision 1.19 2005/07/28 21:08:15 stolcke
|
||||
* include signal.h for portability
|
||||
*
|
||||
* Revision 1.18 2005/07/28 18:37:47 stolcke
|
||||
* portability for systems w/o pipes
|
||||
*
|
||||
* Revision 1.17 2004/01/31 01:17:51 stolcke
|
||||
* don't declare errno, get it from errno.h
|
||||
*
|
||||
* Revision 1.16 2003/11/09 21:09:11 stolcke
|
||||
* use gunzip -f to allow uncompressed files ending in .gz
|
||||
*
|
||||
* Revision 1.15 2003/11/01 06:18:30 stolcke
|
||||
* issue stdin/stdout warning only once
|
||||
*
|
||||
* Revision 1.14 1999/10/13 09:07:13 stolcke
|
||||
* make filename checking functions public
|
||||
*
|
||||
* Revision 1.13 1997/06/07 15:58:47 stolcke
|
||||
* fixed some gcc warnings
|
||||
*
|
||||
* Revision 1.13 1997/06/07 15:56:24 stolcke
|
||||
* fixed some gcc warnings
|
||||
*
|
||||
* Revision 1.12 1997/01/23 20:38:35 stolcke
|
||||
* *** empty log message ***
|
||||
*
|
||||
* Revision 1.11 1997/01/23 20:02:59 stolcke
|
||||
* handle SIGPIPE termination
|
||||
*
|
||||
* Revision 1.10 1997/01/22 07:52:08 stolcke
|
||||
* warn about multiple uses of -
|
||||
*
|
||||
* Revision 1.9 1996/11/30 21:08:59 stolcke
|
||||
* use exec in compress commands
|
||||
*
|
||||
* Revision 1.8 1995/07/19 16:51:31 stolcke
|
||||
* remove PATH assignment to account for local setup
|
||||
*
|
||||
* Revision 1.7 1995/06/22 20:47:16 stolcke
|
||||
* dup stdio descriptors so fclose won't disturb them
|
||||
*
|
||||
* Revision 1.6 1995/06/22 20:44:39 stolcke
|
||||
* return more error info
|
||||
*
|
||||
* Revision 1.5 1995/06/22 19:58:11 stolcke
|
||||
* ansi-fied
|
||||
*
|
||||
* Revision 1.4 1995/06/12 22:57:12 tmk
|
||||
* Added ifdef around the redefinitions of fopen() and fclose().
|
||||
*
|
||||
*/
|
||||
|
||||
/*******************************************************************
|
||||
Copyright 1994,1997 SRI International. All rights reserved.
|
||||
This is an unpublished work of SRI International and is not to be
|
||||
used or disclosed except as provided in a license agreement or
|
||||
nondisclosure agreement with SRI International.
|
||||
********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#ifndef _MSC_VER
|
||||
#include <unistd.h>
|
||||
#include <sys/param.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <signal.h>
|
||||
#include <errno.h>
|
||||
|
||||
#ifndef MAXPATHLEN
|
||||
#define MAXPATHLEN 1024
|
||||
#endif
|
||||
|
||||
#include "zio.h"
|
||||
|
||||
#ifdef ZIO_HACK
|
||||
#undef fopen
|
||||
#undef fclose
|
||||
#endif
|
||||
|
||||
#define STDIO_NAME "-"
|
||||
|
||||
#define STD_PATH ":" /* "PATH=/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin" */
|
||||
|
||||
#define COMPRESS_CMD "exec compress -c"
|
||||
#define UNCOMPRESS_CMD "exec uncompress -c"
|
||||
|
||||
#define GZIP_CMD "exec gzip -c"
|
||||
#define GUNZIP_CMD "exec gzip -dcf"
|
||||
|
||||
#define BZIP2_CMD "exec bzip2"
|
||||
#define BUNZIP2_CMD "exec bzip2 -dcf"
|
||||
|
||||
#define SEVENZIP_CMD "exec 7z a -si"
|
||||
#define SEVENUNZIP_CMD "exec 7z e -so"
|
||||
|
||||
#define XZ_CMD "exec xz"
|
||||
#define XZ_DECOMPRESS_CMD "exec xz -dcf"
|
||||
|
||||
/*
|
||||
* Does the filename refer to stdin/stdout ?
|
||||
*/
|
||||
int
|
||||
stdio_filename_p (const char *name)
|
||||
{
|
||||
return (strcmp(name, STDIO_NAME) == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Does the filename refer to a compressed file ?
|
||||
*/
|
||||
int
|
||||
compressed_filename_p (const char *name)
|
||||
{
|
||||
unsigned len = strlen(name);
|
||||
|
||||
return
|
||||
(sizeof(COMPRESS_SUFFIX) > 1) &&
|
||||
(len > sizeof(COMPRESS_SUFFIX)-1) &&
|
||||
(strcmp(name + len - (sizeof(COMPRESS_SUFFIX)-1),
|
||||
COMPRESS_SUFFIX) == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Does the filename refer to a gzipped file ?
|
||||
*/
|
||||
int
|
||||
gzipped_filename_p (const char *name)
|
||||
{
|
||||
unsigned len = strlen(name);
|
||||
|
||||
return
|
||||
((sizeof(GZIP_SUFFIX) > 1) &&
|
||||
(len > sizeof(GZIP_SUFFIX)-1) &&
|
||||
(strcmp(name + len - (sizeof(GZIP_SUFFIX)-1),
|
||||
GZIP_SUFFIX) == 0)) ||
|
||||
((sizeof(OLD_GZIP_SUFFIX) > 1) &&
|
||||
(len > sizeof(OLD_GZIP_SUFFIX)-1) &&
|
||||
(strcmp(name + len - (sizeof(OLD_GZIP_SUFFIX)-1),
|
||||
OLD_GZIP_SUFFIX) == 0));
|
||||
}
|
||||
|
||||
/*
|
||||
* Does the filename refer to a bzipped file ?
|
||||
*/
|
||||
int
|
||||
bzipped_filename_p (const char *name)
|
||||
{
|
||||
unsigned len = strlen(name);
|
||||
|
||||
return
|
||||
(sizeof(BZIP2_SUFFIX) > 1) &&
|
||||
(len > sizeof(BZIP2_SUFFIX)-1) &&
|
||||
(strcmp(name + len - (sizeof(BZIP2_SUFFIX)-1),
|
||||
BZIP2_SUFFIX) == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Does the filename refer to a 7-zip file ?
|
||||
*/
|
||||
int
|
||||
sevenzipped_filename_p (const char *name)
|
||||
{
|
||||
unsigned len = strlen(name);
|
||||
|
||||
return
|
||||
(sizeof(SEVENZIP_SUFFIX) > 1) &&
|
||||
(len > sizeof(SEVENZIP_SUFFIX)-1) &&
|
||||
(strcmp(name + len - (sizeof(SEVENZIP_SUFFIX)-1),
|
||||
SEVENZIP_SUFFIX) == 0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Does the filename refer to a xz-compressed file ?
|
||||
*/
|
||||
int
|
||||
xz_filename_p (const char *name)
|
||||
{
|
||||
unsigned len = strlen(name);
|
||||
|
||||
return
|
||||
(sizeof(XZ_SUFFIX) > 1) &&
|
||||
(len > sizeof(XZ_SUFFIX)-1) &&
|
||||
(strcmp(name + len - (sizeof(XZ_SUFFIX)-1),
|
||||
XZ_SUFFIX) == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check file readability
|
||||
*/
|
||||
#ifndef NO_ZIO
|
||||
static int
|
||||
readable_p (const char *name)
|
||||
{
|
||||
int fd = open(name, O_RDONLY);
|
||||
|
||||
if (fd < 0)
|
||||
return 0;
|
||||
else {
|
||||
close(fd);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check file writability
|
||||
*/
|
||||
static int
|
||||
writable_p (const char *name)
|
||||
{
|
||||
int fd = open(name, O_WRONLY|O_CREAT, 0666);
|
||||
|
||||
if (fd < 0)
|
||||
return 0;
|
||||
else {
|
||||
close(fd);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#endif /* !NO_ZIO */
|
||||
|
||||
/*
|
||||
* Open a stdio stream, handling special filenames
|
||||
*/
|
||||
FILE *zopen(const char *name, const char *mode)
|
||||
{
|
||||
char command[MAXPATHLEN + 100];
|
||||
|
||||
if (stdio_filename_p(name)) {
|
||||
/*
|
||||
* Return stream to stdin or stdout
|
||||
*/
|
||||
if (*mode == 'r') {
|
||||
static int stdin_used = 0;
|
||||
static int stdin_warning = 0;
|
||||
int fd;
|
||||
|
||||
if (stdin_used) {
|
||||
if (!stdin_warning) {
|
||||
fprintf(stderr,
|
||||
"warning: '-' used multiple times for input\n");
|
||||
stdin_warning = 1;
|
||||
}
|
||||
} else {
|
||||
stdin_used = 1;
|
||||
}
|
||||
|
||||
fd = dup(0);
|
||||
return fd < 0 ? NULL : fdopen(fd, mode);
|
||||
} else if (*mode == 'w' || *mode == 'a') {
|
||||
static int stdout_used = 0;
|
||||
static int stdout_warning = 0;
|
||||
int fd;
|
||||
|
||||
if (stdout_used) {
|
||||
if (!stdout_warning) {
|
||||
fprintf(stderr,
|
||||
"warning: '-' used multiple times for output\n");
|
||||
stdout_warning = 1;
|
||||
}
|
||||
} else {
|
||||
stdout_used = 1;
|
||||
}
|
||||
|
||||
fd = dup(1);
|
||||
return fd < 0 ? NULL : fdopen(fd, mode);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
char *compress_cmd = NULL;
|
||||
char *uncompress_cmd = NULL;
|
||||
int zip_to_stdout = 1;
|
||||
|
||||
if (compressed_filename_p(name)) {
|
||||
compress_cmd = COMPRESS_CMD;
|
||||
uncompress_cmd = UNCOMPRESS_CMD;
|
||||
} else if (gzipped_filename_p(name)) {
|
||||
compress_cmd = GZIP_CMD;
|
||||
uncompress_cmd = GUNZIP_CMD;
|
||||
} else if (bzipped_filename_p(name)) {
|
||||
compress_cmd = BZIP2_CMD;
|
||||
uncompress_cmd = BUNZIP2_CMD;
|
||||
} else if (sevenzipped_filename_p(name)) {
|
||||
compress_cmd = SEVENZIP_CMD;
|
||||
uncompress_cmd = SEVENUNZIP_CMD;
|
||||
zip_to_stdout = 0;
|
||||
} else if (xz_filename_p(name)) {
|
||||
compress_cmd = XZ_CMD;
|
||||
uncompress_cmd = XZ_DECOMPRESS_CMD;
|
||||
}
|
||||
|
||||
if (compress_cmd != NULL) {
|
||||
#ifdef NO_ZIO
|
||||
fprintf(stderr, "Sorry, compressed I/O not available on this machine\n");
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
#else /* !NO_ZIO */
|
||||
/*
|
||||
* Return stream to compress pipe
|
||||
*/
|
||||
if (*mode == 'r') {
|
||||
if (!readable_p(name))
|
||||
return NULL;
|
||||
sprintf(command, "%s;%s %s", STD_PATH, uncompress_cmd, name);
|
||||
return popen(command, mode);
|
||||
} else if (*mode == 'w') {
|
||||
if (!writable_p(name))
|
||||
return NULL;
|
||||
if (zip_to_stdout) {
|
||||
sprintf(command, "%s;%s >%s", STD_PATH, compress_cmd, name);
|
||||
} else {
|
||||
/*
|
||||
* This is necessary because the compression program might
|
||||
* complain if a zero-length file already exists.
|
||||
* However, it means that existing file owner & permission
|
||||
* attributes are not preserved.
|
||||
*/
|
||||
unlink(name);
|
||||
sprintf(command, "%s;%s %s", STD_PATH, compress_cmd, name);
|
||||
}
|
||||
return popen(command, mode);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
#endif /* !NO_ZIO */
|
||||
} else {
|
||||
return fopen(name, mode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Close a stream created by zopen()
|
||||
*/
|
||||
int
|
||||
zclose(FILE *stream)
|
||||
{
|
||||
#ifdef NO_ZIO
|
||||
return fclose(stream);
|
||||
#else /* !NO_ZIO */
|
||||
|
||||
int status;
|
||||
struct stat statb;
|
||||
|
||||
/*
|
||||
* pclose(), according to the man page, should diagnose streams not
|
||||
* created by popen() and return -1. however, on SGIs, it core dumps
|
||||
* in that case. So we better be careful and try to figure out
|
||||
* what type of stream it is.
|
||||
*/
|
||||
if (fstat(fileno(stream), &statb) < 0)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* First try pclose(). It will tell us if stream is not a pipe
|
||||
*/
|
||||
if ((statb.st_mode & S_IFMT) != S_IFIFO ||
|
||||
fileno(stream) == 0 || fileno(stream) == 1)
|
||||
{
|
||||
return fclose(stream);
|
||||
} else {
|
||||
status = pclose(stream);
|
||||
if (status == -1) {
|
||||
/*
|
||||
* stream was not created by popen(), but popen() does fclose
|
||||
* for us in thise case.
|
||||
*/
|
||||
return ferror(stream);
|
||||
} else if (status == SIGPIPE) {
|
||||
/*
|
||||
* It's normal for the uncompressor to terminate by SIGPIPE,
|
||||
* i.e., if the user program closed the file before reaching
|
||||
* EOF.
|
||||
*/
|
||||
return 0;
|
||||
} else {
|
||||
/*
|
||||
* The compressor program terminated with an error, and supposedly
|
||||
* has printed a message to stderr.
|
||||
* Set errno to a generic error code if it hasn't been set already.
|
||||
*/
|
||||
if (errno == 0) {
|
||||
errno = EIO;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
}
|
||||
#endif /* NO_ZIO */
|
||||
}
|
||||
|
||||
#ifdef STAND
|
||||
int
|
||||
main (argc, argv)
|
||||
int argc;
|
||||
char **argv;
|
||||
{
|
||||
int dowrite = 0;
|
||||
char buffer[BUFSIZ];
|
||||
int nread;
|
||||
FILE *stream;
|
||||
|
||||
if (argc < 3) {
|
||||
printf("usage: %s file {r|w}\n", argv[0]);
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if (*argv[2] == 'r') {
|
||||
stream = zopen(argv[1], argv[2]);
|
||||
|
||||
if (!stream) {
|
||||
perror(argv[1]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
while (!ferror(stream) && !feof(stream) &&!ferror(stdout)) {
|
||||
nread = fread(buffer, 1, sizeof(buffer), stream);
|
||||
(void)fwrite(buffer, 1, nread, stdout);
|
||||
}
|
||||
} else {
|
||||
stream = zopen(argv[1], argv[2]);
|
||||
|
||||
if (!stream) {
|
||||
perror(argv[1]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
while (!ferror(stdin) && !feof(stdin) && !ferror(stream)) {
|
||||
nread = fread(buffer, 1, sizeof(buffer), stdin);
|
||||
(void)fwrite(buffer, 1, nread, stream);
|
||||
}
|
||||
}
|
||||
if (ferror(stdin)) {
|
||||
perror("stdin");
|
||||
} else if (ferror(stdout)) {
|
||||
perror("stdout");
|
||||
} else if (ferror(stream)) {
|
||||
perror(argv[1]);
|
||||
}
|
||||
zclose(stream);
|
||||
|
||||
exit(0);
|
||||
}
|
||||
#endif /* STAND */
|
||||
120
language_model/srilm-1.7.3/misc/src/zio.h
Normal file
120
language_model/srilm-1.7.3/misc/src/zio.h
Normal file
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
File: zio.h
|
||||
Author: Andreas Stolcke
|
||||
Date: Wed Feb 15 15:19:44 PST 1995
|
||||
|
||||
Description:
|
||||
|
||||
Copyright (c) 1994-2007, SRI International. All Rights Reserved.
|
||||
|
||||
RCS ID: $Id: zio.h,v 1.14 2009/08/22 22:41:19 stolcke Exp $
|
||||
*/
|
||||
|
||||
/*
|
||||
* $Log: zio.h,v $
|
||||
* Revision 1.14 2009/08/22 22:41:19 stolcke
|
||||
* support for xz compressed files
|
||||
*
|
||||
* Revision 1.13 2007/11/11 16:06:53 stolcke
|
||||
* 7zip compression support
|
||||
*
|
||||
* Revision 1.12 2006/08/04 23:59:09 stolcke
|
||||
* MSVC portability
|
||||
*
|
||||
* Revision 1.11 2006/03/28 01:15:10 stolcke
|
||||
* include sys/signal.h to check for SIGPIPE
|
||||
*
|
||||
* Revision 1.10 2006/03/06 05:46:43 stolcke
|
||||
* define NO_ZIO in zio.h instead of zio.c
|
||||
*
|
||||
* Revision 1.9 2006/03/01 00:45:45 stolcke
|
||||
* allow disabling of zio for windows environment (NO_ZIO)
|
||||
*
|
||||
* Revision 1.8 2005/12/16 23:30:09 stolcke
|
||||
* added support for bzip2-compressed files
|
||||
*
|
||||
* Revision 1.7 2003/02/21 20:18:53 stolcke
|
||||
* avoid conflict if zopen is already defined in library
|
||||
*
|
||||
* Revision 1.6 1999/10/13 09:07:13 stolcke
|
||||
* make filename checking functions public
|
||||
*
|
||||
* Revision 1.5 1995/06/22 19:58:26 stolcke
|
||||
* ansi-fied
|
||||
*
|
||||
* Revision 1.4 1995/06/12 22:56:37 tmk
|
||||
* Added ifdef around the redefinitions of fopen() and fclose().
|
||||
*
|
||||
*/
|
||||
|
||||
/*******************************************************************
|
||||
Copyright 1994 SRI International. All rights reserved.
|
||||
This is an unpublished work of SRI International and is not to be
|
||||
used or disclosed except as provided in a license agreement or
|
||||
nondisclosure agreement with SRI International.
|
||||
********************************************************************/
|
||||
|
||||
|
||||
#ifndef _ZIO_H
|
||||
#define _ZIO_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Include declarations files. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h> // to check for SIGPIPE
|
||||
|
||||
/* Avoid conflict with library function */
|
||||
#ifdef HAVE_ZOPEN
|
||||
#define zopen my_zopen
|
||||
#endif
|
||||
|
||||
/* Constants */
|
||||
#if !defined(SIGPIPE)
|
||||
#define NO_ZIO
|
||||
#endif
|
||||
|
||||
#ifdef NO_ZIO
|
||||
# define COMPRESS_SUFFIX ""
|
||||
# define GZIP_SUFFIX ""
|
||||
# define OLD_GZIP_SUFFIX ""
|
||||
# define BZIP2_SUFFIX ""
|
||||
# define SEVENZIP_SUFFIX ""
|
||||
# define XZ_SUFFIX ""
|
||||
#else
|
||||
# define COMPRESS_SUFFIX ".Z"
|
||||
# define GZIP_SUFFIX ".gz"
|
||||
# define OLD_GZIP_SUFFIX ".z"
|
||||
# define BZIP2_SUFFIX ".bz2"
|
||||
# define SEVENZIP_SUFFIX ".7z"
|
||||
# define XZ_SUFFIX ".xz"
|
||||
#endif /* NO_ZIO */
|
||||
|
||||
/* Define function prototypes. */
|
||||
|
||||
int stdio_filename_p (const char *name);
|
||||
int compressed_filename_p (const char *name);
|
||||
int gzipped_filename_p (const char *name);
|
||||
int bzipped_filename_p (const char *name);
|
||||
int sevenzipped_filename_p (const char *name);
|
||||
int xz_filename_p (const char *name);
|
||||
|
||||
FILE * zopen (const char *name, const char *mode);
|
||||
int zclose (FILE *stream);
|
||||
|
||||
/* Users of this header implicitly always use zopen/zclose in stdio */
|
||||
|
||||
#ifdef ZIO_HACK
|
||||
#define fopen(name,mode) zopen(name,mode)
|
||||
#define fclose(stream) zclose(stream)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZIO_H */
|
||||
|
||||
105
language_model/srilm-1.7.3/misc/src/ztest.c
Normal file
105
language_model/srilm-1.7.3/misc/src/ztest.c
Normal file
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* ztest --
|
||||
* test for zio.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char Copyright[] = "Copyright (c) 1997,2006 SRI International, 2013 Andreas Stolcke, Microsoft Corp. All Rights Reserved.";
|
||||
static char RcsId[] = "@(#)$Header: /home/srilm/CVS/srilm/misc/src/ztest.c,v 1.5 2019/09/09 23:13:15 stolcke Exp $";
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "zio.h"
|
||||
#include "zlib.h"
|
||||
#include "option.h"
|
||||
#include "version.h"
|
||||
|
||||
char *inFile = "-";
|
||||
char *outFile = "-";
|
||||
int numLines = 0;
|
||||
int version = 0;
|
||||
int useZlib = 0;
|
||||
|
||||
static Option options[] = {
|
||||
{ OPT_TRUE, "version", (void *)&version, "print version information" },
|
||||
{ OPT_TRUE, "zlib", (void *)&useZlib, "use zlib" },
|
||||
{ OPT_STRING, "read", (void *)&inFile, "input file" },
|
||||
{ OPT_STRING, "write", (void *)&outFile, "output file" },
|
||||
{ OPT_INT, "lines", (void *)&numLines, "number of lines to copy" },
|
||||
};
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
char buffer[1024];
|
||||
FILE *in, *out;
|
||||
gzFile gzin, gzout;
|
||||
int result;
|
||||
int lineno;
|
||||
|
||||
Opt_Parse(argc, argv, options, Opt_Number(options), 0);
|
||||
|
||||
if (version) {
|
||||
printVersion(RcsId);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (useZlib) {
|
||||
gzin = gzopen(inFile, "r");
|
||||
if (gzin == NULL) {
|
||||
perror(inFile);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
gzout = gzopen(outFile, "w");
|
||||
if (gzout == NULL) {
|
||||
perror(outFile);
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
in = zopen(inFile, "r");
|
||||
if (in == NULL) {
|
||||
perror(inFile);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
out = zopen(outFile, "w");
|
||||
if (out == NULL) {
|
||||
perror(outFile);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
lineno = 0;
|
||||
while ((numLines == 0 || lineno < numLines) &&
|
||||
(useZlib ?
|
||||
gzgets(gzin, buffer, sizeof(buffer)) :
|
||||
fgets(buffer, sizeof(buffer), in)))
|
||||
{
|
||||
if (useZlib) {
|
||||
gzputs(gzout, buffer);
|
||||
} else {
|
||||
fputs(buffer, out);
|
||||
}
|
||||
lineno ++;
|
||||
}
|
||||
|
||||
if (lineno > 0) {
|
||||
if (useZlib) {
|
||||
gzprintf(gzout, "THE END AFTER %d LINES\n", lineno);
|
||||
} else {
|
||||
fprintf(out, "THE END AFTER %d LINES\n", lineno);
|
||||
}
|
||||
}
|
||||
|
||||
result = useZlib ? gzclose(gzin) : zclose(in);
|
||||
fprintf(stderr, "zclose(in) = %d\n", result);
|
||||
|
||||
result = useZlib ? gzclose(gzout) : zclose(out);
|
||||
fprintf(stderr, "zclose(out) = %d\n", result);
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user