37 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Groff
		
	
	
	
	
	
		
		
			
		
	
	
			37 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Groff
		
	
	
	
	
	
|   | .\" $Id: classes-format.5,v 1.3 2007/12/19 22:08:05 stolcke Exp $ | ||
|  | .TH classes-format 5 "$Date: 2007/12/19 22:08:05 $" "SRILM File Formats" | ||
|  | .SH NAME | ||
|  | classes-format \- File format for word class definitions | ||
|  | .SH SYNOPSIS | ||
|  | .nf | ||
|  | \fIclass\fP [\fIp\fP] \fIword1\fP \fIword2\fP ... | ||
|  | .fi | ||
|  | .SH DESCRIPTION | ||
|  | Various programs dealing with word classes use this format to define | ||
|  | the posssible expansions of classes and their respective probabilities. | ||
|  | Each expansion appears on a separate line as in  | ||
|  | the synopsis, where | ||
|  | .I class | ||
|  | names a word class, | ||
|  | .I p | ||
|  | gives the probability for the class expansion, and | ||
|  | .I "word1 word2 ..." | ||
|  | defines the word string that the class expands to. | ||
|  | If  | ||
|  | .I p | ||
|  | is omitted it is assumed to be 1. | ||
|  | (All expansion probabilities for a given class should sum to one, | ||
|  | although this is not necessarily enforced by the software and would | ||
|  | lead to improper models.) | ||
|  | .PP | ||
|  | Note that the concept of word class here is generalized to include | ||
|  | ``multi-words'', or phrases consisting of more than one word. | ||
|  | All expansions must have at least one word. | ||
|  | Certain models might impose more restrictive formats. | ||
|  | .SH "SEE ALSO" | ||
|  | ngram(1), ngram-class(1), disambig(1), training-scripts(1), pfsg-scripts(5). | ||
|  | .SH AUTHOR | ||
|  | Andreas Stolcke <stolcke@speech.sri.com>. | ||
|  | .br | ||
|  | Copyright 1999 SRI International |