21 #ifndef KALDI_LM_ARPA_FILE_PARSER_H_    22 #define KALDI_LM_ARPA_FILE_PARSER_H_    24 #include <fst/fst-decl.h>    54                    "Maximum warnings to report on ARPA parsing, "    55                    "0 to disable, -1 to show all");
    96   void Read(std::istream &is);
   113   virtual void ConsumeNGram(
const NGram&) = 0;
   119   const fst::SymbolTable* 
Symbols()
 const { 
return symbols_; }
   126   std::string LineReference() 
const;
   133   const std::vector<int32>& 
NgramCounts()
 const { 
return ngram_counts_; }
   146 #endif  // KALDI_LM_ARPA_FILE_PARSER_H_ ArpaFileParser is an abstract base class for ARPA LM file conversion. 
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
ArpaParseOptions options_
 
const fst::SymbolTable * Symbols() const
Read-only access to symbol table. Not owned, do not make public. 
 
virtual void ReadStarted()
Override called before reading starts. 
 
Options that control ArpaFileParser. 
 
virtual void ReadComplete()
Override function called after the last n-gram has been consumed. 
 
const ArpaParseOptions & Options() const
Parser options. 
 
int32 unk_symbol
Symbol for <unk>, Required for kReplaceWithUnk. 
 
float logprob
Log-prob of the n-gram. 
 
void Register(OptionsItf *opts)
 
int32 LineNumber() const
Inside ConsumeNGram(), provides the current line number. 
 
virtual void HeaderAvailable()
Override function called to signal that ARPA header with the expected number of n-grams has been read...
 
Add novel words to the symbol table. 
 
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
 
int32 eos_symbol
Symbol for </s>, Required non-epsilon. 
 
float backoff
log-backoff weight of the n-gram. 
 
std::vector< int32 > ngram_counts_
 
std::vector< int32 > words
Symbols in left to right order. 
 
std::string current_line_
 
Skip n-gram with OOV word and continue. 
 
int32 bos_symbol
Symbol for <s>, Required non-epsilon. 
 
int32 max_warnings
Maximum warnings to report, <0 unlimited. 
 
fst::SymbolTable * symbols_
 
const std::vector< int32 > & NgramCounts() const
N-gram counts. Valid from the point when HeaderAvailable() is called. 
 
OovHandling oov_handling
How to handle OOV words in the file. 
 
Replace OOV words with <unk>. 
 
A parsed n-gram from ARPA LM file.