21 #ifndef KALDI_LM_ARPA_FILE_PARSER_H_ 22 #define KALDI_LM_ARPA_FILE_PARSER_H_ 24 #include <fst/fst-decl.h> 54 "Maximum warnings to report on ARPA parsing, " 55 "0 to disable, -1 to show all");
96 void Read(std::istream &is);
113 virtual void ConsumeNGram(
const NGram&) = 0;
119 const fst::SymbolTable*
Symbols()
const {
return symbols_; }
126 std::string LineReference()
const;
133 const std::vector<int32>&
NgramCounts()
const {
return ngram_counts_; }
146 #endif // KALDI_LM_ARPA_FILE_PARSER_H_ ArpaFileParser is an abstract base class for ARPA LM file conversion.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
ArpaParseOptions options_
const fst::SymbolTable * Symbols() const
Read-only access to symbol table. Not owned, do not make public.
virtual void ReadStarted()
Override called before reading starts.
Options that control ArpaFileParser.
virtual void ReadComplete()
Override function called after the last n-gram has been consumed.
const ArpaParseOptions & Options() const
Parser options.
int32 unk_symbol
Symbol for <unk>, Required for kReplaceWithUnk.
float logprob
Log-prob of the n-gram.
void Register(OptionsItf *opts)
int32 LineNumber() const
Inside ConsumeNGram(), provides the current line number.
virtual void HeaderAvailable()
Override function called to signal that ARPA header with the expected number of n-grams has been read...
Add novel words to the symbol table.
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
int32 eos_symbol
Symbol for </s>, Required non-epsilon.
float backoff
log-backoff weight of the n-gram.
std::vector< int32 > ngram_counts_
std::vector< int32 > words
Symbols in left to right order.
std::string current_line_
Skip n-gram with OOV word and continue.
int32 bos_symbol
Symbol for <s>, Required non-epsilon.
int32 max_warnings
Maximum warnings to report, <0 unlimited.
fst::SymbolTable * symbols_
const std::vector< int32 > & NgramCounts() const
N-gram counts. Valid from the point when HeaderAvailable() is called.
OovHandling oov_handling
How to handle OOV words in the file.
Replace OOV words with <unk>.
A parsed n-gram from ARPA LM file.