20 #ifndef KALDI_LM_CONST_ARPA_LM_H_ 21 #define KALDI_LM_CONST_ARPA_LM_H_ 218 unigram_states_ = NULL;
219 overflow_buffer_ = NULL;
220 memory_assigned_ =
false;
221 initialized_ =
false;
227 const int32 unk_symbol,
const int32 ngram_order,
228 const int32 num_words,
const int32 overflow_buffer_size,
229 const int64 lm_states_size,
int32** unigram_states,
231 bos_symbol_(bos_symbol), eos_symbol_(eos_symbol),
232 unk_symbol_(unk_symbol), ngram_order_(ngram_order),
233 num_words_(num_words), overflow_buffer_size_(overflow_buffer_size),
234 lm_states_size_(lm_states_size), unigram_states_(unigram_states),
235 overflow_buffer_(overflow_buffer), lm_states_(lm_states) {
240 KALDI_ASSERT(bos_symbol_ < num_words_ && bos_symbol_ > 0);
241 KALDI_ASSERT(eos_symbol_ < num_words_ && eos_symbol_ > 0);
243 (unk_symbol_ > 0 || unk_symbol_ == -1));
244 lm_states_end_ = lm_states_ + lm_states_size_ - 1;
245 memory_assigned_ =
false;
250 if (memory_assigned_) {
252 delete[] unigram_states_;
253 delete[] overflow_buffer_;
259 void Read(std::istream &is,
bool binary);
262 void Write(std::ostream &os,
bool binary)
const;
266 void WriteArpa(std::ostream &os)
const;
270 float GetNgramLogprob(
const int32 word,
const std::vector<int32>& hist)
const;
274 bool HistoryStateExists(
const std::vector<int32>& hist)
const;
283 void ReadInternal(std::istream &is,
bool binary);
289 void ReadInternalOldFormat(std::istream &is,
bool binary);
293 float GetNgramLogprobRecurse(
const int32 word,
294 const std::vector<int32>& hist)
const;
302 int32* GetLmState(
const std::vector<int32>& seq)
const;
313 bool GetChildInfo(
const int32 word,
int32* parent,
int32* child_info)
const;
317 void DecodeChildInfo(
const int32 child_info,
int32* parent,
320 void WriteArpaRecurse(
int32* lm_state,
321 const std::vector<int32>& seq,
322 std::vector<ArpaLine> *output)
const;
402 virtual StateId
Start() {
return start_state_; }
406 virtual Weight Final(StateId s);
408 virtual bool GetArc(StateId s, Label ilabel,
fst::StdArc* oarc);
411 typedef unordered_map<std::vector<Label>,
423 const std::string& arpa_rxfilename,
424 const std::string& const_arpa_wxfilename);
428 #endif // KALDI_LM_CONST_ARPA_LM_H_ fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
fst::StdArc::Weight Weight
std::vector< std::vector< Label > > state_to_wseq_
A hashing function-object for vectors.
Options that control ArpaFileParser.
int32 overflow_buffer_size_
Int32AndFloat(int32 input_i)
class DeterministicOnDemandFst is an "FST-like" base-class.
ConstArpaLm(const int32 bos_symbol, const int32 eos_symbol, const int32 unk_symbol, const int32 ngram_order, const int32 num_words, const int32 overflow_buffer_size, const int64 lm_states_size, int32 **unigram_states, int32 **overflow_buffer, int32 *lm_states)
int32 ** overflow_buffer_
fst::StdArc::Weight Weight
Int32AndFloat(float input_f)
fst::StdArc::StateId StateId
#define KALDI_ASSERT(cond)
bool BuildConstArpaLm(const ArpaParseOptions &options, const std::string &arpa_rxfilename, const std::string &const_arpa_wxfilename)
unordered_map< std::vector< Label >, StateId, VectorHasher< Label > > MapType
This class wraps a ConstArpaLm format language model with the interface defined in DeterministicOnDem...