20 #ifndef KALDI_HMM_HMM_UTILS_H_ 21 #define KALDI_HMM_HMM_UTILS_H_ 44 transition_scale(1.0),
45 nonterm_phones_offset(-1) { }
48 opts->
Register(
"transition-scale", &transition_scale,
49 "Scale of transition probs (relative to LM)");
50 opts->
Register(
"nonterm-phones-offset", &nonterm_phones_offset,
51 "The integer id of #nonterm_bos in phones.txt, if present. " 52 "Only needs to be set if you are doing grammar decoding, " 53 "see doc/grammar.dox.");
59 int operator () (
const std::pair<
int32, std::vector<int32> >&p)
const {
62 return prime*p.first + v(p.second);
68 typedef unordered_map<std::pair<int32, std::vector<int32> >,
69 fst::VectorFst<fst::StdArc>*,
94 std::vector<int32> context_window,
98 HmmCacheType *cache = NULL);
104 fst::VectorFst<fst::StdArc>*
126 fst::VectorFst<fst::StdArc>*
127 GetHTransducer(
const std::vector<std::vector<int32> > &ilabel_info,
131 std::vector<int32> *disambig_syms_left);
149 void GetIlabelMapping(
const std::vector<std::vector<int32> > &ilabel_info_old,
152 std::vector<int32> *old2new_map);
186 const std::vector<int32> &disambig_syms,
189 bool check_no_self_loops,
190 fst::VectorFst<fst::StdArc> *
fst);
210 const std::vector<int32> &disambig_syms,
213 fst::VectorFst<fst::StdArc> *
fst);
227 fst::VectorFst<fst::StdArc>*
233 const std::vector<int32> &disambig_syms,
234 fst::VectorFst<fst::StdArc> *
fst);
252 const std::vector<int32> &alignment,
253 std::vector<std::vector<int32> > *split_alignment);
290 const std::vector<int32> &old_alignment,
291 int32 subsample_factor,
294 const std::vector<int32> *phone_map,
295 std::vector<int32> *new_alignment);
310 const std::vector<int32> &
words,
311 int32 word_start_sym,
313 std::vector<std::vector<int32> > *prons);
323 const std::vector<int32> &phone_window,
324 std::vector<int32> *alignment);
330 std::vector<int32> *alignment);
336 std::vector<std::set<int32> > *pdf2phones);
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
A hashing function-object for vectors.
void Register(OptionsItf *opts)
void GetRandomAlignmentForPhone(const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, const std::vector< int32 > &phone_window, std::vector< int32 > *alignment)
bool ConvertPhnxToProns(const std::vector< int32 > &phnx, const std::vector< int32 > &words, int32 word_start_sym, int32 word_end_sym, std::vector< std::vector< int32 > > *prons)
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
void AddSelfLoops(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat self_loop_scale, bool reorder, bool check_no_self_loops, fst::VectorFst< fst::StdArc > *fst)
For context, see AddSelfLoops().
void GetIlabelMapping(const std::vector< std::vector< int32 > > &ilabel_info_old, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, std::vector< int32 > *old2new_map)
GetIlabelMapping produces a mapping that's similar to HTK's logical-to-physical model mapping (i...
unordered_map< std::pair< int32, std::vector< int32 > >, fst::VectorFst< fst::StdArc > *, HmmCacheHash > HmmCacheType
HmmCacheType is a map from (central-phone, sequence of pdf-ids) to FST, used as cache in GetHmmAsFsa...
int32 nonterm_phones_offset
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
bool SplitToPhones(const TransitionModel &trans_model, const std::vector< int32 > &alignment, std::vector< std::vector< int32 > > *split_alignment)
SplitToPhones splits up the TransitionIds in "alignment" into their individual phones (one vector per...
Configuration class for the GetHTransducer() function; see The HTransducerConfig configuration class ...
void AddTransitionProbs(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat transition_scale, BaseFloat self_loop_scale, fst::VectorFst< fst::StdArc > *fst)
Adds transition-probs, with the supplied scales (see Scaling of transition and acoustic probabilities...
void GetPdfToPhonesMap(const TransitionModel &trans_model, std::vector< std::set< int32 > > *pdf2phones)
fst::VectorFst< LatticeArc > Lattice
fst::VectorFst< fst::StdArc > * GetHTransducer(const std::vector< std::vector< int32 > > &ilabel_info, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, const HTransducerConfig &config, std::vector< int32 > *disambig_syms_left)
Returns the H tranducer; result owned by caller.
context-dep-itf.h provides a link between the tree-building code in ../tree/, and the FST code in ...
BaseFloat transition_scale
Transition log-prob scale, see Scaling of transition and acoustic probabilities.
fst::VectorFst< fst::StdArc > * GetHmmAsFsa(std::vector< int32 > phone_window, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, const HTransducerConfig &config, HmmCacheType *cache)
Called by GetHTransducer() and probably will not need to be called directly; it creates and returns t...
void ConvertTransitionIdsToPdfs(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, fst::VectorFst< fst::StdArc > *fst)
Converts all transition-ids in the FST to pdfs plus one.
void ChangeReorderingOfAlignment(const TransitionModel &trans_model, std::vector< int32 > *alignment)
bool ConvertAlignment(const TransitionModel &old_trans_model, const TransitionModel &new_trans_model, const ContextDependencyInterface &new_ctx_dep, const std::vector< int32 > &old_alignment, int32 subsample_factor, bool repeat_frames, bool new_is_reordered, const std::vector< int32 > *phone_map, std::vector< int32 > *new_alignment)
ConvertAlignment converts an alignment that was created using one model, to another model...
fst::VectorFst< fst::StdArc > * GetHmmAsFsaSimple(std::vector< int32 > phone_window, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, BaseFloat prob_scale)
Included mainly as a form of documentation, not used in any other code currently. ...
fst::VectorFst< fst::StdArc > * GetPdfToTransitionIdTransducer(const TransitionModel &trans_model)
Returns a transducer from pdfs plus one (input) to transition-ids (output).