hmm-utils.cc
Go to the documentation of this file.
1 // hmm/hmm-utils.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 // 2018 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include <vector>
22 
23 #include "hmm/hmm-utils.h"
24 #include "fst/fstlib.h"
25 #include "fstext/fstext-lib.h"
27 
28 namespace kaldi {
29 
30 
31 
32 fst::VectorFst<fst::StdArc> *GetHmmAsFsa(
33  std::vector<int32> phone_window,
34  const ContextDependencyInterface &ctx_dep,
35  const TransitionModel &trans_model,
36  const HTransducerConfig &config,
37  HmmCacheType *cache) {
38  using namespace fst;
39 
40  if (static_cast<int32>(phone_window.size()) != ctx_dep.ContextWidth())
41  KALDI_ERR << "Context size mismatch, ilabel-info [from context FST is "
42  << phone_window.size() << ", context-dependency object "
43  "expects " << ctx_dep.ContextWidth();
44 
45  int P = ctx_dep.CentralPosition();
46  int32 phone = phone_window[P];
47  if (phone == 0)
48  KALDI_ERR << "phone == 0. Some mismatch happened, or there is "
49  "a code error.";
50 
51  const HmmTopology &topo = trans_model.GetTopo();
52  const HmmTopology::TopologyEntry &entry = topo.TopologyForPhone(phone);
53 
54  // vector of the pdfs, indexed by pdf-class (pdf-classes must start from zero
55  // and be contiguous).
56  std::vector<int32> pdfs(topo.NumPdfClasses(phone));
57  for (int32 pdf_class = 0;
58  pdf_class < static_cast<int32>(pdfs.size());
59  pdf_class++) {
60  if (! ctx_dep.Compute(phone_window, pdf_class, &(pdfs[pdf_class])) ) {
61  std::ostringstream ctx_ss;
62  for (size_t i = 0; i < phone_window.size(); i++)
63  ctx_ss << phone_window[i] << ' ';
64  KALDI_ERR << "GetHmmAsFsa: context-dependency object could not produce "
65  << "an answer: pdf-class = " << pdf_class << " ctx-window = "
66  << ctx_ss.str() << ". This probably points "
67  "to either a coding error in some graph-building process, "
68  "a mismatch of topology with context-dependency object, the "
69  "wrong FST being passed on a command-line, or something of "
70  " that general nature.";
71  }
72  }
73  std::pair<int32, std::vector<int32> > cache_index(phone, pdfs);
74  if (cache != NULL) {
75  HmmCacheType::iterator iter = cache->find(cache_index);
76  if (iter != cache->end())
77  return iter->second;
78  }
79 
80  VectorFst<StdArc> *ans = new VectorFst<StdArc>;
81 
82  typedef StdArc Arc;
83  typedef Arc::Weight Weight;
84  typedef Arc::StateId StateId;
85  typedef Arc::Label Label;
86 
87  std::vector<StateId> state_ids;
88  for (size_t i = 0; i < entry.size(); i++)
89  state_ids.push_back(ans->AddState());
90  KALDI_ASSERT(state_ids.size() != 0); // Or empty topology entry.
91  ans->SetStart(state_ids[0]);
92  StateId final = state_ids.back();
93  ans->SetFinal(final, Weight::One());
94 
95  for (int32 hmm_state = 0;
96  hmm_state < static_cast<int32>(entry.size());
97  hmm_state++) {
98  int32 forward_pdf_class = entry[hmm_state].forward_pdf_class, forward_pdf;
99  int32 self_loop_pdf_class = entry[hmm_state].self_loop_pdf_class, self_loop_pdf;
100  if (forward_pdf_class == kNoPdf) { // nonemitting state.
101  forward_pdf = kNoPdf;
102  self_loop_pdf = kNoPdf;
103  } else {
104  KALDI_ASSERT(forward_pdf_class < static_cast<int32>(pdfs.size()));
105  KALDI_ASSERT(self_loop_pdf_class < static_cast<int32>(pdfs.size()));
106  forward_pdf = pdfs[forward_pdf_class];
107  self_loop_pdf = pdfs[self_loop_pdf_class];
108  }
109  int32 trans_idx;
110  for (trans_idx = 0;
111  trans_idx < static_cast<int32>(entry[hmm_state].transitions.size());
112  trans_idx++) {
113  BaseFloat log_prob;
114  Label label;
115  int32 dest_state = entry[hmm_state].transitions[trans_idx].first;
116  bool is_self_loop = (dest_state == hmm_state);
117  if (is_self_loop)
118  continue; // We will add self-loops in at a later stage of processing,
119  // not in this function.
120  if (forward_pdf_class == kNoPdf) {
121  // no pdf, hence non-estimated probability.
122  // [would not happen with normal topology] . There is no transition-state
123  // involved in this case.
124  log_prob = Log(entry[hmm_state].transitions[trans_idx].second);
125  label = 0;
126  } else { // normal probability.
127  int32 trans_state =
128  trans_model.TupleToTransitionState(phone, hmm_state, forward_pdf, self_loop_pdf);
129  int32 trans_id =
130  trans_model.PairToTransitionId(trans_state, trans_idx);
131  log_prob = trans_model.GetTransitionLogProbIgnoringSelfLoops(trans_id);
132  // log_prob is a negative number (or zero)...
133  label = trans_id;
134  }
135  // Will add probability-scale later (we may want to push first).
136  ans->AddArc(state_ids[hmm_state],
137  Arc(label, label, Weight(-log_prob), state_ids[dest_state]));
138  }
139  }
140 
141  fst::RemoveEpsLocal(ans); // this is safe and will not blow up.
142 
143  // Now apply probability scale.
144  // We waited till after the possible weight-pushing steps,
145  // because weight-pushing needs "real" weights in order to work.
147  if (cache != NULL)
148  (*cache)[cache_index] = ans;
149  return ans;
150 }
151 
152 
153 
154 fst::VectorFst<fst::StdArc>*
155 GetHmmAsFsaSimple(std::vector<int32> phone_window,
156  const ContextDependencyInterface &ctx_dep,
157  const TransitionModel &trans_model,
158  BaseFloat prob_scale) {
159  using namespace fst;
160 
161  if (static_cast<int32>(phone_window.size()) != ctx_dep.ContextWidth())
162  KALDI_ERR <<"Context size mismatch, ilabel-info [from context FST is "
163  <<(phone_window.size())<<", context-dependency object "
164  "expects "<<(ctx_dep.ContextWidth());
165 
166  int P = ctx_dep.CentralPosition();
167  int32 phone = phone_window[P];
168  KALDI_ASSERT(phone != 0);
169 
170  const HmmTopology &topo = trans_model.GetTopo();
171  const HmmTopology::TopologyEntry &entry = topo.TopologyForPhone(phone);
172 
173  VectorFst<StdArc> *ans = new VectorFst<StdArc>;
174 
175  // Create a mini-FST with a superfinal state [in case we have emitting
176  // final-states, which we usually will.]
177  typedef StdArc Arc;
178  typedef Arc::Weight Weight;
179  typedef Arc::StateId StateId;
180  typedef Arc::Label Label;
181 
182  std::vector<StateId> state_ids;
183  for (size_t i = 0; i < entry.size(); i++)
184  state_ids.push_back(ans->AddState());
185  KALDI_ASSERT(state_ids.size() > 1); // Or invalid topology entry.
186  ans->SetStart(state_ids[0]);
187  StateId final = state_ids.back();
188  ans->SetFinal(final, Weight::One());
189 
190  for (int32 hmm_state = 0;
191  hmm_state < static_cast<int32>(entry.size());
192  hmm_state++) {
193  int32 forward_pdf_class = entry[hmm_state].forward_pdf_class, forward_pdf;
194  int32 self_loop_pdf_class = entry[hmm_state].self_loop_pdf_class, self_loop_pdf;
195  if (forward_pdf_class == kNoPdf) { // nonemitting state; not generally used.
196  forward_pdf = kNoPdf;
197  self_loop_pdf = kNoPdf;
198  } else {
199  bool ans = ctx_dep.Compute(phone_window, forward_pdf_class, &forward_pdf);
200  KALDI_ASSERT(ans && "Context-dependency computation failed.");
201  ans = ctx_dep.Compute(phone_window, self_loop_pdf_class, &self_loop_pdf);
202  KALDI_ASSERT(ans && "Context-dependency computation failed.");
203  }
204  int32 trans_idx;
205  for (trans_idx = 0;
206  trans_idx < static_cast<int32>(entry[hmm_state].transitions.size());
207  trans_idx++) {
208  BaseFloat log_prob;
209  Label label;
210  int32 dest_state = entry[hmm_state].transitions[trans_idx].first;
211  if (forward_pdf_class == kNoPdf) {
212  // no pdf, hence non-estimated probability. very unusual case. [would
213  // not happen with normal topology] . There is no transition-state
214  // involved in this case.
215  KALDI_ASSERT(hmm_state != dest_state);
216  log_prob = Log(entry[hmm_state].transitions[trans_idx].second);
217  label = 0;
218  } else { // normal probability.
219  int32 trans_state =
220  trans_model.TupleToTransitionState(phone, hmm_state, forward_pdf, self_loop_pdf);
221  int32 trans_id =
222  trans_model.PairToTransitionId(trans_state, trans_idx);
223  log_prob = prob_scale * trans_model.GetTransitionLogProb(trans_id);
224  // log_prob is a negative number (or zero)...
225  label = trans_id;
226  }
227  ans->AddArc(state_ids[hmm_state],
228  Arc(label, label, Weight(-log_prob), state_ids[dest_state]));
229  }
230  }
231  return ans;
232 }
233 
234 
235 
239 static inline fst::VectorFst<fst::StdArc> *MakeTrivialAcceptor(int32 label) {
240  typedef fst::StdArc Arc;
241  typedef Arc::Weight Weight;
242  fst::VectorFst<Arc> *ans = new fst::VectorFst<Arc>;
243  ans->AddState();
244  ans->AddState();
245  ans->SetStart(0);
246  ans->SetFinal(1, Weight::One());
247  ans->AddArc(0, Arc(label, label, Weight::One(), 1));
248  return ans;
249 }
250 
251 
252 
253 // The H transducer has a separate outgoing arc for each of the symbols in ilabel_info.
254 fst::VectorFst<fst::StdArc> *GetHTransducer(const std::vector<std::vector<int32> > &ilabel_info,
255  const ContextDependencyInterface &ctx_dep,
256  const TransitionModel &trans_model,
257  const HTransducerConfig &config,
258  std::vector<int32> *disambig_syms_left) {
259  KALDI_ASSERT(ilabel_info.size() >= 1 && ilabel_info[0].size() == 0); // make sure that eps == eps.
260  HmmCacheType cache;
261  // "cache" is an optimization that prevents GetHmmAsFsa repeating work
262  // unnecessarily.
263  using namespace fst;
264  typedef StdArc Arc;
265  typedef Arc::Weight Weight;
266  typedef Arc::StateId StateId;
267  typedef Arc::Label Label;
268 
269  std::vector<const ExpandedFst<Arc>* > fsts(ilabel_info.size(), NULL);
270  std::vector<int32> phones = trans_model.GetPhones();
271 
272  KALDI_ASSERT(disambig_syms_left != 0);
273  disambig_syms_left->clear();
274 
275  int32 first_disambig_sym = trans_model.NumTransitionIds() + 1; // First disambig symbol we can have on the input side.
276  int32 next_disambig_sym = first_disambig_sym;
277 
278  if (ilabel_info.size() > 0)
279  KALDI_ASSERT(ilabel_info[0].size() == 0); // make sure epsilon is epsilon...
280 
281  for (int32 j = 1; j < static_cast<int32>(ilabel_info.size()); j++) { // zero is eps.
282  KALDI_ASSERT(!ilabel_info[j].empty());
283  if (ilabel_info[j][0] < 0 ||
284  (ilabel_info[j][0] == 0 && ilabel_info[j].size() == 1)) {
285  // disambig symbol or special symbol for grammar FSTs.
286  if (ilabel_info[j].size() == 1) {
287  // disambiguation symbol.
288  int32 disambig_sym_left = next_disambig_sym++;
289  disambig_syms_left->push_back(disambig_sym_left);
290  fsts[j] = MakeTrivialAcceptor(disambig_sym_left);
291  } else if (ilabel_info[j].size() == 2) {
292  if (config.nonterm_phones_offset <= 0) {
293  KALDI_ERR << "ilabel-info seems to be for grammar-FST. You need to "
294  "supply the --nonterm-phones-offset option.";
295  }
296  int32 nonterm_phones_offset = config.nonterm_phones_offset,
297  nonterminal = -ilabel_info[j][0],
298  left_context_phone = ilabel_info[j][1];
299  if (nonterminal <= nonterm_phones_offset ||
300  left_context_phone <= 0 ||
301  left_context_phone > nonterm_phones_offset) {
302  KALDI_ERR << "Could not interpret this ilabel-info with "
303  "--nonterm-phones-offset=" << nonterm_phones_offset
304  << ": nonterminal,left-context-phone="
305  << nonterminal << ',' << left_context_phone;
306  }
307  int32 big_number = static_cast<int32>(fst::kNontermBigNumber),
308  encoding_multiple = fst::GetEncodingMultiple(nonterm_phones_offset);
309  int32 encoded_symbol = big_number + nonterminal * encoding_multiple +
310  left_context_phone;
311  fsts[j] = MakeTrivialAcceptor(encoded_symbol);
312  } else {
313  KALDI_ERR << "Could not decode this ilabel_info entry.";
314  }
315  } else { // Real phone-in-context.
316  std::vector<int32> phone_window = ilabel_info[j];
317 
318  VectorFst<Arc> *fst = GetHmmAsFsa(phone_window,
319  ctx_dep,
320  trans_model,
321  config,
322  &cache);
323  fsts[j] = fst;
324  }
325  }
326 
327  VectorFst<Arc> *ans = MakeLoopFst(fsts);
328  SortAndUniq(&fsts); // remove duplicate pointers, which we will have
329  // in general, since we used the cache.
330  DeletePointers(&fsts);
331  return ans;
332 }
333 
334 
335 void GetIlabelMapping (const std::vector<std::vector<int32> > &ilabel_info_old,
336  const ContextDependencyInterface &ctx_dep,
337  const TransitionModel &trans_model,
338  std::vector<int32> *old2new_map) {
339  KALDI_ASSERT(old2new_map != NULL);
340 
348  std::map<std::pair<int32, std::vector<int32> >, int32 >
349  pair_to_physical;
350 
351  int32 N = ctx_dep.ContextWidth(),
352  P = ctx_dep.CentralPosition();
353  int32 num_syms_old = ilabel_info_old.size();
354 
357  std::vector<int32> old2old_map(num_syms_old);
358  old2old_map[0] = 0;
359  for (int32 i = 1; i < num_syms_old; i++) {
360  const std::vector<int32> &vec = ilabel_info_old[i];
361  if (vec.size() == 1 && vec[0] <= 0) { // disambig.
362  old2old_map[i] = i;
363  } else {
364  KALDI_ASSERT(vec.size() == static_cast<size_t>(N));
365  // work out the vector of context-dependent phone
366  int32 central_phone = vec[P];
367  int32 num_pdf_classes = trans_model.GetTopo().NumPdfClasses(central_phone);
368  std::vector<int32> state_seq(num_pdf_classes); // Indexed by pdf-class
369  for (int32 pdf_class = 0; pdf_class < num_pdf_classes; pdf_class++) {
370  if (!ctx_dep.Compute(vec, pdf_class, &(state_seq[pdf_class]))) {
371  std::ostringstream ss;
372  WriteIntegerVector(ss, false, vec);
373  KALDI_ERR << "tree did not succeed in converting phone window "<<ss.str();
374  }
375  }
376  std::pair<int32, std::vector<int32> > pr(central_phone, state_seq);
377  std::map<std::pair<int32, std::vector<int32> >, int32 >::iterator iter
378  = pair_to_physical.find(pr);
379  if (iter == pair_to_physical.end()) { // first time we saw something like this.
380  pair_to_physical[pr] = i;
381  old2old_map[i] = i;
382  } else { // seen it before. look up in the map, the index we point to.
383  old2old_map[i] = iter->second;
384  }
385  }
386  }
387 
388  std::vector<bool> seen(num_syms_old, false);
389  for (int32 i = 0; i < num_syms_old; i++)
390  seen[old2old_map[i]] = true;
391 
392  // Now work out the elements of old2new_map corresponding to
393  // things that are first in their equivalence class. We're just
394  // compacting the labels to those for which seen[i] == true.
395  int32 cur_id = 0;
396  old2new_map->resize(num_syms_old);
397  for (int32 i = 0; i < num_syms_old; i++)
398  if (seen[i])
399  (*old2new_map)[i] = cur_id++;
400  // fill in the other elements of old2new_map.
401  for (int32 i = 0; i < num_syms_old; i++)
402  (*old2new_map)[i] = (*old2new_map)[old2old_map[i]];
403 }
404 
405 
406 
407 fst::VectorFst<fst::StdArc> *GetPdfToTransitionIdTransducer(const TransitionModel &trans_model) {
408  using namespace fst;
409  VectorFst<StdArc> *ans = new VectorFst<StdArc>;
411  typedef StdArc Arc;
412  ans->AddState();
413  ans->SetStart(0);
414  ans->SetFinal(0, Weight::One());
415  for (int32 tid = 1; tid <= trans_model.NumTransitionIds(); tid++) {
416  int32 pdf = trans_model.TransitionIdToPdf(tid);
417  ans->AddArc(0, Arc(pdf+1, tid, Weight::One(), 0)); // note the offset of 1 on the pdfs.
418  // it's because 0 is a valid pdf.
419  }
420  return ans;
421 }
422 
423 
424 
426 public:
427  // Function object used in MakePrecedingInputSymbolsSameClass and
428  // MakeFollowingInputSymbolsSameClass (as called by AddSelfLoopsReorder and
429  // AddSelfLoopsNoReorder). It maps transition-ids to transition-states (and
430  // -1 to -1, 0 to 0 and disambiguation symbols to 0). If check_no_self_loops
431  // == true, it also checks that there are no self-loops in the graph (i.e. in
432  // the labels it is called with). This is just a convenient place to put this
433  // check.
434 
435  // This maps valid transition-ids to transition states, maps kNoLabel to -1, and
436  // maps all other symbols (i.e. epsilon symbols, disambig symbols, and symbols
437  // with values over 100000/kNontermBigNumber) to zero.
438  // Its point is to provide an equivalence class on labels that's relevant to what
439  // the self-loop will be on the following (or preceding) state.
440  TidToTstateMapper(const TransitionModel &trans_model,
441  const std::vector<int32> &disambig_syms,
442  bool check_no_self_loops):
443  trans_model_(trans_model),
444  disambig_syms_(disambig_syms),
445  check_no_self_loops_(check_no_self_loops) { }
446  typedef int32 Result;
447  int32 operator() (int32 label) const {
448  if (label == static_cast<int32>(fst::kNoLabel)) return -1; // -1 -> -1
449  else if (label >= 1 && label <= trans_model_.NumTransitionIds()) {
450  if (check_no_self_loops_ && trans_model_.IsSelfLoop(label))
451  KALDI_ERR << "AddSelfLoops: graph already has self-loops.";
452  return trans_model_.TransitionIdToTransitionState(label);
453  } else { // 0 or (presumably) disambiguation symbol. Map to zero
454  int32 big_number = fst::kNontermBigNumber; // 1000000
455  if (label != 0 && label < big_number)
456  KALDI_ASSERT(std::binary_search(disambig_syms_.begin(),
457  disambig_syms_.end(),
458  label)); // or invalid label
459  return 0;
460  }
461  }
462 
463 private:
465  const std::vector<int32> &disambig_syms_; // sorted.
467 };
468 
469 // This is the code that expands an FST from transition-states to
470 // transition-ids, in the case where reorder == true, i.e. the non-optional
471 // transition is before the self-loop.
472 static void AddSelfLoopsReorder(const TransitionModel &trans_model,
473  const std::vector<int32> &disambig_syms,
474  BaseFloat self_loop_scale,
475  bool check_no_self_loops,
476  fst::VectorFst<fst::StdArc> *fst) {
477  using namespace fst;
478  typedef StdArc Arc;
479  typedef Arc::Label Label;
480  typedef Arc::StateId StateId;
481  typedef Arc::Weight Weight;
482 
483  TidToTstateMapper f(trans_model, disambig_syms, check_no_self_loops);
484  // Duplicate states as necessary so that each state will require at most one
485  // self-loop to be added to it. Approximately this means that if a
486  // state has multiple different symbols on arcs entering it, it will be
487  // duplicated, with one copy per incoming symbol.
489 
490  int32 kNoTransState = f(kNoLabel);
491  KALDI_ASSERT(kNoTransState == -1);
492 
493  // use the following to keep track of the transition-state for each state.
494  std::vector<int32> state_in(fst->NumStates(), kNoTransState);
495 
496  // This first loop just works out the label into each state,
497  // and converts the transitions in the graph from transition-states
498  // to transition-ids.
499 
500  for (StateIterator<VectorFst<Arc> > siter(*fst);
501  !siter.Done();
502  siter.Next()) {
503  StateId s = siter.Value();
504  for (MutableArcIterator<VectorFst<Arc> > aiter(fst, s);
505  !aiter.Done();
506  aiter.Next()) {
507  Arc arc = aiter.Value();
508  int32 trans_state = f(arc.ilabel);
509  if (state_in[arc.nextstate] == kNoTransState)
510  state_in[arc.nextstate] = trans_state;
511  else {
512  KALDI_ASSERT(state_in[arc.nextstate] == trans_state);
513  // or probably an error in MakePrecedingInputSymbolsSame.
514  }
515  }
516  }
517 
518  KALDI_ASSERT(state_in[fst->Start()] == kNoStateId || state_in[fst->Start()] == 0);
519  // or MakePrecedingInputSymbolsSame failed.
520 
521  // The next loop looks at each graph state, adds the self-loop [if needed] and
522  // multiples all the out-transitions' probs (and final-prob) by the
523  // forward-prob for that state (which is one minus self-loop-prob). We do it
524  // like this to maintain stochasticity (i.e. rather than multiplying the arcs
525  // with the corresponding labels on them by this probability).
526 
527  for (StateId s = 0; s < static_cast<StateId>(state_in.size()); s++) {
528  if (state_in[s] > 0) { // defined, and not eps or a disambiguation symbol or a
529  // nonterminal-related sybol for grammar decoding...
530  int32 trans_state = static_cast<int32>(state_in[s]);
531  // First multiply all probabilities by "forward" probability.
532  BaseFloat log_prob = trans_model.GetNonSelfLoopLogProb(trans_state);
533  fst->SetFinal(s, Times(fst->Final(s), Weight(-log_prob*self_loop_scale)));
534  for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s);
535  !aiter.Done();
536  aiter.Next()) {
537  Arc arc = aiter.Value();
538  arc.weight = Times(arc.weight, Weight(-log_prob*self_loop_scale));
539  aiter.SetValue(arc);
540  }
541  // Now add self-loop, if needed.
542  int32 trans_id = trans_model.SelfLoopOf(trans_state);
543  if (trans_id != 0) { // has self-loop.
544  BaseFloat log_prob = trans_model.GetTransitionLogProb(trans_id);
545  fst->AddArc(s, Arc(trans_id, 0, Weight(-log_prob*self_loop_scale), s));
546  }
547  }
548  }
549 }
550 
551 
552 // this is the code that expands an FST from transition-states to
553 // transition-ids, in the case where reorder == false, i.e. non-optional
554 // transition is after the self-loop.
556  const TransitionModel &trans_model,
557  const std::vector<int32> &disambig_syms,
558  BaseFloat self_loop_scale,
559  bool check_no_self_loops,
560  fst::VectorFst<fst::StdArc> *fst) {
561  using namespace fst;
562  typedef StdArc Arc;
563  typedef Arc::Label Label;
564  typedef Arc::StateId StateId;
565  typedef Arc::Weight Weight;
566 
567  // Duplicate states as necessary so that each state has at most one self-loop
568  // on it.
569  TidToTstateMapper f(trans_model, disambig_syms, check_no_self_loops);
571 
572  StateId num_states = fst->NumStates();
573  for (StateId s = 0; s < num_states; s++) {
574  int32 my_trans_state = f(kNoLabel);
575  KALDI_ASSERT(my_trans_state == -1);
576  for (MutableArcIterator<VectorFst<Arc> > aiter(fst, s);
577  !aiter.Done();
578  aiter.Next()) {
579  Arc arc = aiter.Value();
580  if (my_trans_state == -1) my_trans_state = f(arc.ilabel);
581  else KALDI_ASSERT(my_trans_state == f(arc.ilabel)); // or MakeFollowingInputSymbolsSameClass failed.
582  if (my_trans_state > 0) { // transition-id; multiply weight...
583  BaseFloat log_prob = trans_model.GetNonSelfLoopLogProb(my_trans_state);
584  arc.weight = Times(arc.weight, Weight(-log_prob*self_loop_scale));
585  aiter.SetValue(arc);
586  }
587  }
588  if (fst->Final(s) != Weight::Zero()) {
589  KALDI_ASSERT(my_trans_state == kNoLabel || my_trans_state == 0); // or MakeFollowingInputSymbolsSameClass failed.
590  }
591  if (my_trans_state != kNoLabel && my_trans_state != 0) {
592  // a transition-state; add self-loop, if it has one.
593  int32 trans_id = trans_model.SelfLoopOf(my_trans_state);
594  if (trans_id != 0) { // has self-loop.
595  BaseFloat log_prob = trans_model.GetTransitionLogProb(trans_id);
596  fst->AddArc(s, Arc(trans_id, 0, Weight(-log_prob*self_loop_scale), s));
597  }
598  }
599  }
600 }
601 
602 void AddSelfLoops(const TransitionModel &trans_model,
603  const std::vector<int32> &disambig_syms,
604  BaseFloat self_loop_scale,
605  bool reorder,
606  bool check_no_self_loops,
607  fst::VectorFst<fst::StdArc> *fst) {
608  KALDI_ASSERT(fst->Start() != fst::kNoStateId);
609  if (reorder)
610  AddSelfLoopsReorder(trans_model, disambig_syms, self_loop_scale,
611  check_no_self_loops, fst);
612  else
613  AddSelfLoopsNoReorder(trans_model, disambig_syms, self_loop_scale,
614  check_no_self_loops, fst);
615 }
616 
617 // IsReordered returns true if the transitions were possibly reordered. This reordering
618 // can happen in AddSelfLoops, if the "reorder" option was true.
619 // This makes the out-transition occur before the self-loop transition.
620 // The function returns false (no reordering) if there is not enough information in
621 // the alignment to tell (i.e. no self-loop were taken), and in this case the calling
622 // code doesn't care what the answer is.
623 // The "alignment" vector contains a sequence of TransitionIds.
624 
625 static bool IsReordered(const TransitionModel &trans_model,
626  const std::vector<int32> &alignment) {
627  for (size_t i = 0; i + 1 < alignment.size(); i++) {
628  int32 tstate1 = trans_model.TransitionIdToTransitionState(alignment[i]),
629  tstate2 = trans_model.TransitionIdToTransitionState(alignment[i+1]);
630  if (tstate1 != tstate2) {
631  bool is_loop_1 = trans_model.IsSelfLoop(alignment[i]),
632  is_loop_2 = trans_model.IsSelfLoop(alignment[i+1]);
633  KALDI_ASSERT(!(is_loop_1 && is_loop_2)); // Invalid.
634  if (is_loop_1) return true; // Reordered. self-loop is last.
635  if (is_loop_2) return false; // Not reordered. self-loop is first.
636  }
637  }
638 
639  // Just one trans-state in whole sequence.
640  if (alignment.empty()) return false;
641  else {
642  bool is_loop_front = trans_model.IsSelfLoop(alignment.front()),
643  is_loop_back = trans_model.IsSelfLoop(alignment.back());
644  if (is_loop_front) return false; // Not reordered. Self-loop is first.
645  if (is_loop_back) return true; // Reordered. Self-loop is last.
646  return false; // We really don't know in this case but calling code should
647  // not care.
648  }
649 }
650 
651 // SplitToPhonesInternal takes as input the "alignment" vector containing
652 // a sequence of transition-ids, and appends a single vector to
653 // "split_output" for each instance of a phone that occurs in the
654 // output.
655 // Returns true if the alignment passes some non-exhaustive consistency
656 // checks (if the input does not start at the start of a phone or does not
657 // end at the end of a phone, we should expect that false will be returned).
658 
659 static bool SplitToPhonesInternal(const TransitionModel &trans_model,
660  const std::vector<int32> &alignment,
661  bool reordered,
662  std::vector<std::vector<int32> > *split_output) {
663  if (alignment.empty()) return true; // nothing to split.
664  std::vector<size_t> end_points; // points at which phones end [in an
665  // stl iterator sense, i.e. actually one past the last transition-id within
666  // each phone]..
667 
668  bool was_ok = true;
669  for (size_t i = 0; i < alignment.size(); i++) {
670  int32 trans_id = alignment[i];
671  if (trans_model.IsFinal(trans_id)) { // is final-prob
672  if (!reordered) end_points.push_back(i+1);
673  else { // reordered.
674  while (i+1 < alignment.size() &&
675  trans_model.IsSelfLoop(alignment[i+1])) {
676  KALDI_ASSERT(trans_model.TransitionIdToTransitionState(alignment[i]) ==
677  trans_model.TransitionIdToTransitionState(alignment[i+1]));
678  i++;
679  }
680  end_points.push_back(i+1);
681  }
682  } else if (i+1 == alignment.size()) {
683  // need to have an end-point at the actual end.
684  // but this is an error- should have been detected already.
685  was_ok = false;
686  end_points.push_back(i+1);
687  } else {
688  int32 this_state = trans_model.TransitionIdToTransitionState(alignment[i]),
689  next_state = trans_model.TransitionIdToTransitionState(alignment[i+1]);
690  if (this_state == next_state) continue; // optimization.
691  int32 this_phone = trans_model.TransitionStateToPhone(this_state),
692  next_phone = trans_model.TransitionStateToPhone(next_state);
693  if (this_phone != next_phone) {
694  // The phone changed, but this is an error-- we should have detected this via the
695  // IsFinal check.
696  was_ok = false;
697  end_points.push_back(i+1);
698  }
699  }
700  }
701 
702  size_t cur_point = 0;
703  for (size_t i = 0; i < end_points.size(); i++) {
704  split_output->push_back(std::vector<int32>());
705  // The next if-statement checks if the initial trans-id at the current end
706  // point is the initial-state of the current phone if that initial-state
707  // is emitting (a cursory check that the alignment is plausible).
708  int32 trans_state =
709  trans_model.TransitionIdToTransitionState(alignment[cur_point]);
710  int32 phone = trans_model.TransitionStateToPhone(trans_state);
711  int32 forward_pdf_class = trans_model.GetTopo().TopologyForPhone(phone)[0].forward_pdf_class;
712  if (forward_pdf_class != kNoPdf) // initial-state of the current phone is emitting
713  if (trans_model.TransitionStateToHmmState(trans_state) != 0)
714  was_ok = false;
715  for (size_t j = cur_point; j < end_points[i]; j++)
716  split_output->back().push_back(alignment[j]);
717  cur_point = end_points[i];
718  }
719  return was_ok;
720 }
721 
722 
723 bool SplitToPhones(const TransitionModel &trans_model,
724  const std::vector<int32> &alignment,
725  std::vector<std::vector<int32> > *split_alignment) {
726  KALDI_ASSERT(split_alignment != NULL);
727  split_alignment->clear();
728 
729  bool is_reordered = IsReordered(trans_model, alignment);
730  return SplitToPhonesInternal(trans_model, alignment,
731  is_reordered, split_alignment);
732 }
733 
734 
742 static inline void ConvertAlignmentForPhone(
743  const TransitionModel &old_trans_model,
744  const TransitionModel &new_trans_model,
745  const ContextDependencyInterface &new_ctx_dep,
746  const std::vector<int32> &old_phone_alignment,
747  const std::vector<int32> &new_phone_window,
748  bool old_is_reordered,
749  bool new_is_reordered,
750  std::vector<int32> *new_phone_alignment) {
751  int32 alignment_size = old_phone_alignment.size();
752  static bool warned_topology = false;
753  int32 P = new_ctx_dep.CentralPosition(),
754  old_central_phone = old_trans_model.TransitionIdToPhone(
755  old_phone_alignment[0]),
756  new_central_phone = new_phone_window[P];
757  const HmmTopology &old_topo = old_trans_model.GetTopo(),
758  &new_topo = new_trans_model.GetTopo();
759 
760  bool topology_mismatch = !(old_topo.TopologyForPhone(old_central_phone) ==
761  new_topo.TopologyForPhone(new_central_phone));
762  if (topology_mismatch) {
763  if (!warned_topology) {
764  warned_topology = true;
765  KALDI_WARN << "Topology mismatch detected; automatically converting. "
766  << "Won't warn again.";
767  }
768  }
769  bool length_mismatch =
770  (new_phone_alignment->size() != old_phone_alignment.size());
771  if (length_mismatch || topology_mismatch) {
772  // We generate a random path from this FST, ignoring the
773  // old alignment.
774  GetRandomAlignmentForPhone(new_ctx_dep, new_trans_model,
775  new_phone_window, new_phone_alignment);
776  if (new_is_reordered)
777  ChangeReorderingOfAlignment(new_trans_model, new_phone_alignment);
778  return;
779  }
780 
781  KALDI_ASSERT(!old_phone_alignment.empty());
782 
783  int32 new_num_pdf_classes = new_topo.NumPdfClasses(new_central_phone);
784  std::vector<int32> pdf_ids(new_num_pdf_classes); // Indexed by pdf-class
785  for (int32 pdf_class = 0; pdf_class < new_num_pdf_classes; pdf_class++) {
786  if (!new_ctx_dep.Compute(new_phone_window, pdf_class,
787  &(pdf_ids[pdf_class]))) {
788  std::ostringstream ss;
789  WriteIntegerVector(ss, false, new_phone_window);
790  KALDI_ERR << "tree did not succeed in converting phone window "
791  << ss.str();
792  }
793  }
794 
795  // the topologies and lengths match -> we can directly transfer
796  // the alignment.
797  for (int32 j = 0; j < alignment_size; j++) {
798  int32 old_tid = old_phone_alignment[j],
799  old_tstate = old_trans_model.TransitionIdToTransitionState(old_tid);
800  int32 forward_pdf_class =
801  old_trans_model.TransitionStateToForwardPdfClass(old_tstate),
802  self_loop_pdf_class =
803  old_trans_model.TransitionStateToSelfLoopPdfClass(old_tstate);
804  int32 hmm_state = old_trans_model.TransitionIdToHmmState(old_tid);
805  int32 trans_idx = old_trans_model.TransitionIdToTransitionIndex(old_tid);
806  int32 new_forward_pdf = pdf_ids[forward_pdf_class];
807  int32 new_self_loop_pdf = pdf_ids[self_loop_pdf_class];
808  int32 new_trans_state =
809  new_trans_model.TupleToTransitionState(new_central_phone, hmm_state,
810  new_forward_pdf, new_self_loop_pdf);
811  int32 new_tid =
812  new_trans_model.PairToTransitionId(new_trans_state, trans_idx);
813  (*new_phone_alignment)[j] = new_tid;
814  }
815 
816  if (new_is_reordered != old_is_reordered)
817  ChangeReorderingOfAlignment(new_trans_model, new_phone_alignment);
818 }
819 
820 
821 
849 static bool ComputeNewPhoneLengths(const HmmTopology &topology,
850  const std::vector<int32> &mapped_phones,
851  const std::vector<int32> &old_lengths,
852  int32 conversion_shift,
853  int32 subsample_factor,
854  std::vector<int32> *new_lengths) {
855  int32 phone_sequence_length = old_lengths.size();
856  std::vector<int32> min_lengths(phone_sequence_length);
857  new_lengths->resize(phone_sequence_length);
858  for (int32 i = 0; i < phone_sequence_length; i++)
859  min_lengths[i] = topology.MinLength(mapped_phones[i]);
860  int32 cur_time_elapsed = 0;
861  for (int32 i = 0; i < phone_sequence_length; i++) {
862  // Note: the '+ subsample_factor - 1' here is needed so that
863  // the subsampled alignments have the same length as features
864  // subsampled with 'subsample-feats'.
865  int32 subsampled_time =
866  (cur_time_elapsed + conversion_shift) / subsample_factor;
867  cur_time_elapsed += old_lengths[i];
868  int32 next_subsampled_time =
869  (cur_time_elapsed + conversion_shift) / subsample_factor;
870  (*new_lengths)[i] = next_subsampled_time - subsampled_time;
871  }
872  bool changed = true;
873  while (changed) {
874  changed = false;
875  for (int32 i = 0; i < phone_sequence_length; i++) {
876  if ((*new_lengths)[i] < min_lengths[i]) {
877  changed = true;
878  // we need at least one extra frame.. just try to get one frame for now.
879  // Get it from the left or the right, depending which one has the closest
880  // availability of a 'spare' frame.
881  int32 min_distance = std::numeric_limits<int32>::max(),
882  best_other_phone_index = -1,
883  cur_distance = 0;
884  // first try to the left.
885  for (int32 j = i - 1; j >= 0; j--) {
886  if ((*new_lengths)[j] > min_lengths[j]) {
887  min_distance = cur_distance;
888  best_other_phone_index = j;
889  break;
890  } else {
891  cur_distance += (*new_lengths)[j];
892  }
893  }
894  // .. now to the right.
895  cur_distance = 0;
896  for (int32 j = i + 1; j < phone_sequence_length; j++) {
897  if ((*new_lengths)[j] > min_lengths[j]) {
898  if (cur_distance < min_distance) {
899  min_distance = cur_distance;
900  best_other_phone_index = j;
901  }
902  break;
903  } else {
904  cur_distance += (*new_lengths)[j];
905  }
906  }
907  if (best_other_phone_index == -1)
908  return false;
909  // assign an extra frame to this phone...
910  (*new_lengths)[i]++;
911  // and borrow it from the place that we found.
912  (*new_lengths)[best_other_phone_index]--;
913  }
914  }
915  }
916  return true;
917 }
918 
926 static bool ConvertAlignmentInternal(const TransitionModel &old_trans_model,
927  const TransitionModel &new_trans_model,
928  const ContextDependencyInterface &new_ctx_dep,
929  const std::vector<int32> &old_alignment,
930  int32 conversion_shift,
931  int32 subsample_factor,
932  bool new_is_reordered,
933  const std::vector<int32> *phone_map,
934  std::vector<int32> *new_alignment) {
935  KALDI_ASSERT(0 <= conversion_shift && conversion_shift < subsample_factor);
936  bool old_is_reordered = IsReordered(old_trans_model, old_alignment);
937  KALDI_ASSERT(new_alignment != NULL);
938  new_alignment->clear();
939  new_alignment->reserve(old_alignment.size());
940  std::vector<std::vector<int32> > old_split; // split into phones.
941  if (!SplitToPhones(old_trans_model, old_alignment, &old_split))
942  return false;
943  int32 phone_sequence_length = old_split.size();
944  std::vector<int32> mapped_phones(phone_sequence_length);
945  for (size_t i = 0; i < phone_sequence_length; i++) {
946  KALDI_ASSERT(!old_split[i].empty());
947  mapped_phones[i] = old_trans_model.TransitionIdToPhone(old_split[i][0]);
948  if (phone_map != NULL) { // Map the phone sequence.
949  int32 sz = phone_map->size();
950  if (mapped_phones[i] < 0 || mapped_phones[i] >= sz ||
951  (*phone_map)[mapped_phones[i]] == -1)
952  KALDI_ERR << "ConvertAlignment: could not map phone " << mapped_phones[i];
953  mapped_phones[i] = (*phone_map)[mapped_phones[i]];
954  }
955  }
956 
957  // the sizes of each element of 'new_split' indicate the length of alignment
958  // that we want for each phone in the new sequence.
959  std::vector<std::vector<int32> > new_split(phone_sequence_length);
960  if (subsample_factor == 1 &&
961  old_trans_model.GetTopo() == new_trans_model.GetTopo()) {
962  // we know the old phone lengths will be fine.
963  for (size_t i = 0; i < phone_sequence_length; i++)
964  new_split[i].resize(old_split[i].size());
965  } else {
966  // .. they may not be fine.
967  std::vector<int32> old_lengths(phone_sequence_length), new_lengths;
968  for (int32 i = 0; i < phone_sequence_length; i++)
969  old_lengths[i] = old_split[i].size();
970  if (!ComputeNewPhoneLengths(new_trans_model.GetTopo(),
971  mapped_phones, old_lengths, conversion_shift,
972  subsample_factor, &new_lengths)) {
973  KALDI_WARN << "Failed to produce suitable phone lengths";
974  return false;
975  }
976  for (int32 i = 0; i < phone_sequence_length; i++)
977  new_split[i].resize(new_lengths[i]);
978  }
979 
980  int32 N = new_ctx_dep.ContextWidth(),
981  P = new_ctx_dep.CentralPosition();
982 
983  // by starting at -N and going to phone_sequence_length + N, we're
984  // being generous and not bothering to work out the exact
985  // array bounds.
986  for (int32 win_start = -N;
987  win_start < static_cast<int32>(phone_sequence_length + N);
988  win_start++) { // start of a context window.
989  int32 central_pos = win_start + P;
990  if (static_cast<size_t>(central_pos) < phone_sequence_length) {
991  // i.e. if (central_pos >= 0 && central_pos < phone_sequence_length)
992  std::vector<int32> new_phone_window(N, 0);
993  for (int32 offset = 0; offset < N; offset++)
994  if (static_cast<size_t>(win_start+offset) < phone_sequence_length)
995  new_phone_window[offset] = mapped_phones[win_start+offset];
996  const std::vector<int32> &old_alignment_for_phone = old_split[central_pos];
997  std::vector<int32> &new_alignment_for_phone = new_split[central_pos];
998 
999  ConvertAlignmentForPhone(old_trans_model, new_trans_model, new_ctx_dep,
1000  old_alignment_for_phone, new_phone_window,
1001  old_is_reordered, new_is_reordered,
1002  &new_alignment_for_phone);
1003  new_alignment->insert(new_alignment->end(),
1004  new_alignment_for_phone.begin(),
1005  new_alignment_for_phone.end());
1006  }
1007  }
1008  KALDI_ASSERT(new_alignment->size() ==
1009  (old_alignment.size() + conversion_shift)/subsample_factor);
1010  return true;
1011 }
1012 
1013 bool ConvertAlignment(const TransitionModel &old_trans_model,
1014  const TransitionModel &new_trans_model,
1015  const ContextDependencyInterface &new_ctx_dep,
1016  const std::vector<int32> &old_alignment,
1017  int32 subsample_factor,
1018  bool repeat_frames,
1019  bool new_is_reordered,
1020  const std::vector<int32> *phone_map,
1021  std::vector<int32> *new_alignment) {
1022  if (!repeat_frames || subsample_factor == 1) {
1023  return ConvertAlignmentInternal(old_trans_model,
1024  new_trans_model,
1025  new_ctx_dep,
1026  old_alignment,
1027  subsample_factor - 1,
1028  subsample_factor,
1029  new_is_reordered,
1030  phone_map,
1031  new_alignment);
1032  // The value "subsample_factor - 1" for conversion_shift above ensures the
1033  // alignments have the same length as the output of 'subsample-feats'
1034  } else {
1035  std::vector<std::vector<int32> > shifted_alignments(subsample_factor);
1036  for (int32 conversion_shift = subsample_factor - 1;
1037  conversion_shift >= 0; conversion_shift--) {
1038  if (!ConvertAlignmentInternal(old_trans_model,
1039  new_trans_model,
1040  new_ctx_dep,
1041  old_alignment,
1042  conversion_shift,
1043  subsample_factor,
1044  new_is_reordered,
1045  phone_map,
1046  &shifted_alignments[conversion_shift]))
1047  return false;
1048  }
1049  KALDI_ASSERT(new_alignment != NULL);
1050  new_alignment->clear();
1051  new_alignment->reserve(old_alignment.size());
1052  int32 max_shifted_ali_length = (old_alignment.size() / subsample_factor)
1053  + (old_alignment.size() % subsample_factor);
1054  for (int32 i = 0; i < max_shifted_ali_length; i++)
1055  for (int32 conversion_shift = subsample_factor - 1;
1056  conversion_shift >= 0; conversion_shift--)
1057  if (i < static_cast<int32>(shifted_alignments[conversion_shift].size()))
1058  new_alignment->push_back(shifted_alignments[conversion_shift][i]);
1059  }
1060  KALDI_ASSERT(new_alignment->size() == old_alignment.size());
1061  return true;
1062 }
1063 
1064 // Returns the scaled, but not negated, log-prob, with the given scaling factors.
1066  int32 trans_id,
1067  BaseFloat transition_scale,
1068  BaseFloat self_loop_scale) {
1069  if (transition_scale == self_loop_scale) {
1070  return trans_model.GetTransitionLogProb(trans_id) * transition_scale;
1071  } else {
1072  if (trans_model.IsSelfLoop(trans_id)) {
1073  return self_loop_scale * trans_model.GetTransitionLogProb(trans_id);
1074  } else {
1075  int32 trans_state = trans_model.TransitionIdToTransitionState(trans_id);
1076  return self_loop_scale * trans_model.GetNonSelfLoopLogProb(trans_state)
1077  + transition_scale * trans_model.GetTransitionLogProbIgnoringSelfLoops(trans_id);
1078  // This could be simplified to
1079  // (self_loop_scale - transition_scale) * trans_model.GetNonSelfLoopLogProb(trans_state)
1080  // + trans_model.GetTransitionLogProb(trans_id);
1081  // this simplifies if self_loop_scale == 0.0
1082  }
1083  }
1084 }
1085 
1086 
1087 
1088 void AddTransitionProbs(const TransitionModel &trans_model,
1089  const std::vector<int32> &disambig_syms, // may be empty
1090  BaseFloat transition_scale,
1091  BaseFloat self_loop_scale,
1092  fst::VectorFst<fst::StdArc> *fst) {
1093  using namespace fst;
1094  KALDI_ASSERT(IsSortedAndUniq(disambig_syms));
1095  int num_tids = trans_model.NumTransitionIds();
1096  for (StateIterator<VectorFst<StdArc> > siter(*fst);
1097  !siter.Done();
1098  siter.Next()) {
1099  for (MutableArcIterator<VectorFst<StdArc> > aiter(fst, siter.Value());
1100  !aiter.Done();
1101  aiter.Next()) {
1102  StdArc arc = aiter.Value();
1103  StdArc::Label l = arc.ilabel;
1104  if (l >= 1 && l <= num_tids) { // a transition-id.
1105  BaseFloat scaled_log_prob = GetScaledTransitionLogProb(trans_model,
1106  l,
1107  transition_scale,
1108  self_loop_scale);
1109  arc.weight = Times(arc.weight, TropicalWeight(-scaled_log_prob));
1110  } else if (l != 0) {
1111  if (!std::binary_search(disambig_syms.begin(), disambig_syms.end(),
1112  arc.ilabel))
1113  KALDI_ERR << "AddTransitionProbs: invalid symbol " << arc.ilabel
1114  << " on graph input side.";
1115  }
1116  aiter.SetValue(arc);
1117  }
1118  }
1119 }
1120 
1121 void AddTransitionProbs(const TransitionModel &trans_model,
1122  BaseFloat transition_scale,
1123  BaseFloat self_loop_scale,
1124  Lattice *lat) {
1125  using namespace fst;
1126  int num_tids = trans_model.NumTransitionIds();
1127  for (fst::StateIterator<Lattice> siter(*lat);
1128  !siter.Done();
1129  siter.Next()) {
1130  for (MutableArcIterator<Lattice> aiter(lat, siter.Value());
1131  !aiter.Done();
1132  aiter.Next()) {
1133  LatticeArc arc = aiter.Value();
1134  LatticeArc::Label l = arc.ilabel;
1135  if (l >= 1 && l <= num_tids) { // a transition-id.
1136  BaseFloat scaled_log_prob = GetScaledTransitionLogProb(trans_model,
1137  l,
1138  transition_scale,
1139  self_loop_scale);
1140  // cost is negated log prob.
1141  arc.weight.SetValue1(arc.weight.Value1() - scaled_log_prob);
1142  } else if (l != 0) {
1143  KALDI_ERR << "AddTransitionProbs: invalid symbol " << arc.ilabel
1144  << " on lattice input side.";
1145  }
1146  aiter.SetValue(arc);
1147  }
1148  }
1149 }
1150 
1151 
1152 // This function takes a phone-sequence with word-start and word-end
1153 // tokens in it, and a word-sequence, and outputs the pronunciations
1154 // "prons"... the format of "prons" is, each element is a vector,
1155 // where the first element is the word (or zero meaning no word, e.g.
1156 // for optional silence introduced by the lexicon), and the remaining
1157 // elements are the phones in the word's pronunciation.
1158 // It returns false if it encounters a problem of some kind, e.g.
1159 // if the phone-sequence doesn't seem to have the right number of
1160 // words in it.
1161 bool ConvertPhnxToProns(const std::vector<int32> &phnx,
1162  const std::vector<int32> &words,
1163  int32 word_start_sym,
1164  int32 word_end_sym,
1165  std::vector<std::vector<int32> > *prons) {
1166  size_t i = 0, j = 0;
1167 
1168  while (i < phnx.size()) {
1169  if (phnx[i] == 0) return false; // zeros not valid here.
1170  if (phnx[i] == word_start_sym) { // start a word...
1171  std::vector<int32> pron;
1172  if (j >= words.size()) return false; // no word left..
1173  if (words[j] == 0) return false; // zero word disallowed.
1174  pron.push_back(words[j++]);
1175  i++;
1176  while (i < phnx.size()) {
1177  if (phnx[i] == 0) return false;
1178  if (phnx[i] == word_start_sym) return false; // error.
1179  if (phnx[i] == word_end_sym) { i++; break; }
1180  pron.push_back(phnx[i]);
1181  i++;
1182  }
1183  // check we did see the word-end symbol.
1184  if (!(i > 0 && phnx[i-1] == word_end_sym))
1185  return false;
1186  prons->push_back(pron);
1187  } else if (phnx[i] == word_end_sym) {
1188  return false; // error.
1189  } else {
1190  // start a non-word sequence of phones (e.g. opt-sil).
1191  std::vector<int32> pron;
1192  pron.push_back(0); // 0 serves as the word-id.
1193  while (i < phnx.size()) {
1194  if (phnx[i] == 0) return false;
1195  if (phnx[i] == word_start_sym) break;
1196  if (phnx[i] == word_end_sym) return false; // error.
1197  pron.push_back(phnx[i]);
1198  i++;
1199  }
1200  prons->push_back(pron);
1201  }
1202  }
1203  return (j == words.size());
1204 }
1205 
1206 
1208  const TransitionModel &trans_model,
1209  const std::vector<int32> &phone_window,
1210  std::vector<int32> *alignment) {
1211  typedef fst::StdArc Arc;
1212  int32 length = alignment->size();
1213  BaseFloat prob_scale = 0.0;
1214  fst::VectorFst<Arc> *fst = GetHmmAsFsaSimple(phone_window, ctx_dep,
1215  trans_model, prob_scale);
1216  fst::RmEpsilon(fst);
1217 
1218  fst::VectorFst<Arc> length_constraint_fst;
1219  { // set up length_constraint_fst.
1220  std::vector<int32> symbols;
1221  bool include_epsilon = false;
1222  // note: 'fst' is an acceptor so ilabels == olabels.
1223  GetInputSymbols(*fst, include_epsilon, &symbols);
1224  int32 cur_state = length_constraint_fst.AddState();
1225  length_constraint_fst.SetStart(cur_state);
1226  for (int32 i = 0; i < length; i++) {
1227  int32 next_state = length_constraint_fst.AddState();
1228  for (size_t j = 0; j < symbols.size(); j++) {
1229  length_constraint_fst.AddArc(cur_state,
1230  Arc(symbols[j], symbols[j],
1231  fst::TropicalWeight::One(),
1232  next_state));
1233  }
1234  cur_state = next_state;
1235  }
1236  length_constraint_fst.SetFinal(cur_state, fst::TropicalWeight::One());
1237  }
1238  fst::VectorFst<Arc> composed_fst;
1239  fst::Compose(*fst, length_constraint_fst, &composed_fst);
1240  fst::VectorFst<Arc> single_path_fst;
1241  { // randomly generate a single path.
1242  fst::UniformArcSelector<Arc> selector;
1243  fst::RandGenOptions<fst::UniformArcSelector<Arc> > randgen_opts(selector);
1244  fst::RandGen(composed_fst, &single_path_fst, randgen_opts);
1245  }
1246  if (single_path_fst.NumStates() == 0) {
1247  KALDI_ERR << "Error generating random alignment (wrong length?): "
1248  << "requested length is " << length << " versus min-length "
1249  << trans_model.GetTopo().MinLength(
1250  phone_window[ctx_dep.CentralPosition()]);
1251  }
1252  std::vector<int32> symbol_sequence;
1253  bool ans = fst::GetLinearSymbolSequence<Arc, int32>(
1254  single_path_fst, &symbol_sequence, NULL, NULL);
1255  KALDI_ASSERT(ans && symbol_sequence.size() == length);
1256  symbol_sequence.swap(*alignment);
1257  delete fst;
1258 }
1259 
1261  std::vector<int32> *alignment) {
1262  int32 start_pos = 0, size = alignment->size();
1263  while (start_pos != size) {
1264  int32 start_tid = (*alignment)[start_pos];
1265  int32 cur_tstate = trans_model.TransitionIdToTransitionState(start_tid);
1266  bool start_is_self_loop = trans_model.IsSelfLoop(start_tid) ? 0 : 1;
1267  int32 end_pos = start_pos + 1;
1268  // If the first instance of this transition-state was a self-loop, then eat
1269  // only non-self-loops of this state; if it was a non-self-loop, then eat
1270  // only self-loops of this state. Imposing this condition on self-loops
1271  // would only actually matter in the rare circumstances that phones can
1272  // have length 1.
1273  while (end_pos != size &&
1274  trans_model.TransitionIdToTransitionState((*alignment)[end_pos]) ==
1275  cur_tstate) {
1276  bool this_is_self_loop = trans_model.IsSelfLoop((*alignment)[end_pos]);
1277  if (!this_is_self_loop) {
1278  if (start_is_self_loop) {
1279  break; // stop before including this transition-id.
1280  } else {
1281  end_pos++;
1282  break; // stop after including this transition-id.
1283  }
1284  }
1285  end_pos++;
1286  }
1287  std::swap((*alignment)[start_pos], (*alignment)[end_pos - 1]);
1288  start_pos = end_pos;
1289  }
1290 }
1291 
1292 void GetPdfToPhonesMap(const TransitionModel &trans_model,
1293  std::vector<std::set<int32> > *pdf2phones) {
1294  pdf2phones->clear();
1295  pdf2phones->resize(trans_model.NumPdfs());
1296  for (int32 i = 0; i < trans_model.NumTransitionIds(); i++) {
1297  int32 trans_id = i + 1;
1298  int32 pdf_id = trans_model.TransitionIdToPdf(trans_id);
1299  int32 phone = trans_model.TransitionIdToPhone(trans_id);
1300  (*pdf2phones)[pdf_id].insert(phone);
1301  }
1302 }
1303 
1304 } // namespace kaldi
int32 words[kMaxOrder]
fst::StdArc::StateId StateId
fst::StdArc::Label Label
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
fst::ArcTpl< LatticeWeight > LatticeArc
Definition: kaldi-lattice.h:40
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
Definition: stl-utils.h:184
int32 PairToTransitionId(int32 trans_state, int32 trans_index) const
A class for storing topology information for phones.
Definition: hmm-topology.h:93
const std::vector< int32 > & GetPhones() const
Returns a sorted, unique list of phones.
void GetRandomAlignmentForPhone(const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, const std::vector< int32 > &phone_window, std::vector< int32 > *alignment)
Definition: hmm-utils.cc:1207
virtual bool Compute(const std::vector< int32 > &phoneseq, int32 pdf_class, int32 *pdf_id) const =0
The "new" Compute interface.
void RemoveEpsLocal(MutableFst< Arc > *fst)
RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST, using an algorithm that is g...
bool ConvertPhnxToProns(const std::vector< int32 > &phnx, const std::vector< int32 > &words, int32 word_start_sym, int32 word_end_sym, std::vector< std::vector< int32 > > *prons)
Definition: hmm-utils.cc:1161
static fst::VectorFst< fst::StdArc > * MakeTrivialAcceptor(int32 label)
This utility function, used in GetHTransducer(), creates an FSA (finite state acceptor, i.e.
Definition: hmm-utils.cc:239
Lattice::StateId StateId
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
int32 TransitionStateToSelfLoopPdfClass(int32 trans_state) const
int32 TransitionStateToForwardPdfClass(int32 trans_state) const
fst::StdArc StdArc
void AddSelfLoops(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat self_loop_scale, bool reorder, bool check_no_self_loops, fst::VectorFst< fst::StdArc > *fst)
For context, see AddSelfLoops().
Definition: hmm-utils.cc:602
void GetIlabelMapping(const std::vector< std::vector< int32 > > &ilabel_info_old, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, std::vector< int32 > *old2new_map)
GetIlabelMapping produces a mapping that&#39;s similar to HTK&#39;s logical-to-physical model mapping (i...
Definition: hmm-utils.cc:335
static bool IsReordered(const TransitionModel &trans_model, const std::vector< int32 > &alignment)
Definition: hmm-utils.cc:625
int32 TransitionStateToHmmState(int32 trans_state) const
unordered_map< std::pair< int32, std::vector< int32 > >, fst::VectorFst< fst::StdArc > *, HmmCacheHash > HmmCacheType
HmmCacheType is a map from (central-phone, sequence of pdf-ids) to FST, used as cache in GetHmmAsFsa...
Definition: hmm-utils.h:70
int32 SelfLoopOf(int32 trans_state) const
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
int32 TupleToTransitionState(int32 phone, int32 hmm_state, int32 pdf, int32 self_loop_pdf) const
kaldi::int32 int32
std::vector< HmmState > TopologyEntry
TopologyEntry is a typedef that represents the topology of a single (prototype) state.
Definition: hmm-topology.h:133
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq&#39;s (removes duplicates) from a vector.
Definition: stl-utils.h:39
int32 TransitionIdToPdf(int32 trans_id) const
static const int32 kNoPdf
A constant used in the HmmTopology class as the pdf-class kNoPdf, which is used when a HMM-state is n...
Definition: hmm-topology.h:86
void GetInputSymbols(const Fst< Arc > &fst, bool include_eps, std::vector< I > *symbols)
GetInputSymbols gets the list of symbols on the input of fst (including epsilon, if include_eps == tr...
int32 NumPdfClasses(int32 phone) const
Returns the number of pdf-classes for this phone; throws exception if phone not covered by this topol...
bool SplitToPhones(const TransitionModel &trans_model, const std::vector< int32 > &alignment, std::vector< std::vector< int32 > > *split_alignment)
SplitToPhones splits up the TransitionIds in "alignment" into their individual phones (one vector per...
Definition: hmm-utils.cc:723
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
static bool ConvertAlignmentInternal(const TransitionModel &old_trans_model, const TransitionModel &new_trans_model, const ContextDependencyInterface &new_ctx_dep, const std::vector< int32 > &old_alignment, int32 conversion_shift, int32 subsample_factor, bool new_is_reordered, const std::vector< int32 > *phone_map, std::vector< int32 > *new_alignment)
This function is the same as &#39;ConvertAligment&#39;, but instead of the &#39;repeat_frames&#39; option it supports...
Definition: hmm-utils.cc:926
const TransitionModel & trans_model_
Definition: hmm-utils.cc:464
Configuration class for the GetHTransducer() function; see The HTransducerConfig configuration class ...
Definition: hmm-utils.h:36
static bool ComputeNewPhoneLengths(const HmmTopology &topology, const std::vector< int32 > &mapped_phones, const std::vector< int32 > &old_lengths, int32 conversion_shift, int32 subsample_factor, std::vector< int32 > *new_lengths)
This function, called from ConvertAlignmentInternal(), works out suitable new lengths of phones in th...
Definition: hmm-utils.cc:849
void AddTransitionProbs(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat transition_scale, BaseFloat self_loop_scale, fst::VectorFst< fst::StdArc > *fst)
Adds transition-probs, with the supplied scales (see Scaling of transition and acoustic probabilities...
Definition: hmm-utils.cc:1088
float BaseFloat
Definition: kaldi-types.h:29
static void AddSelfLoopsNoReorder(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat self_loop_scale, bool check_no_self_loops, fst::VectorFst< fst::StdArc > *fst)
Definition: hmm-utils.cc:555
void GetPdfToPhonesMap(const TransitionModel &trans_model, std::vector< std::set< int32 > > *pdf2phones)
Definition: hmm-utils.cc:1292
virtual int CentralPosition() const =0
Central position P of the phone context, in 0-based numbering, e.g.
double Log(double x)
Definition: kaldi-math.h:100
int32 NumTransitionIds() const
Returns the total number of transition-ids (note, these are one-based).
static void AddSelfLoopsReorder(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat self_loop_scale, bool check_no_self_loops, fst::VectorFst< fst::StdArc > *fst)
Definition: hmm-utils.cc:472
int32 TransitionIdToHmmState(int32 trans_id) const
bool IsSelfLoop(int32 trans_id) const
const TopologyEntry & TopologyForPhone(int32 phone) const
Returns the topology entry (i.e.
const HmmTopology & GetTopo() const
return reference to HMM-topology object.
BaseFloat GetTransitionLogProb(int32 trans_id) const
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44
#define KALDI_ERR
Definition: kaldi-error.h:147
int32 TransitionIdToTransitionState(int32 trans_id) const
#define KALDI_WARN
Definition: kaldi-error.h:150
VectorFst< Arc > * MakeLoopFst(const std::vector< const ExpandedFst< Arc > *> &fsts)
MakeLoopFst creates an FST that has a state that is both initial and final (weight == Weight::One())...
fst::StdArc::Label Label
int32 TransitionStateToPhone(int32 trans_state) const
void ApplyProbabilityScale(float scale, MutableFst< Arc > *fst)
ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
TidToTstateMapper(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, bool check_no_self_loops)
Definition: hmm-utils.cc:440
void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon, MutableFst< Arc > *fst, const F &f)
As MakePrecedingInputSymbolsSame, but takes a functor object that maps labels to classes.
static void ConvertAlignmentForPhone(const TransitionModel &old_trans_model, const TransitionModel &new_trans_model, const ContextDependencyInterface &new_ctx_dep, const std::vector< int32 > &old_phone_alignment, const std::vector< int32 > &new_phone_window, bool old_is_reordered, bool new_is_reordered, std::vector< int32 > *new_phone_alignment)
This function is used internally inside ConvertAlignment; it converts the alignment for a single phon...
Definition: hmm-utils.cc:742
fst::VectorFst< fst::StdArc > * GetHTransducer(const std::vector< std::vector< int32 > > &ilabel_info, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, const HTransducerConfig &config, std::vector< int32 > *disambig_syms_left)
Returns the H tranducer; result owned by caller.
Definition: hmm-utils.cc:254
fst::StdArc::Weight Weight
static BaseFloat GetScaledTransitionLogProb(const TransitionModel &trans_model, int32 trans_id, BaseFloat transition_scale, BaseFloat self_loop_scale)
Definition: hmm-utils.cc:1065
context-dep-itf.h provides a link between the tree-building code in ../tree/, and the FST code in ...
virtual int ContextWidth() const =0
ContextWidth() returns the value N (e.g.
BaseFloat transition_scale
Transition log-prob scale, see Scaling of transition and acoustic probabilities.
Definition: hmm-utils.h:40
fst::VectorFst< fst::StdArc > * GetHmmAsFsa(std::vector< int32 > phone_window, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, const HTransducerConfig &config, HmmCacheType *cache)
Called by GetHTransducer() and probably will not need to be called directly; it creates and returns t...
Definition: hmm-utils.cc:32
BaseFloat GetTransitionLogProbIgnoringSelfLoops(int32 trans_id) const
Returns the log-probability of a particular non-self-loop transition after subtracting the probabilit...
Arc::Weight Weight
Definition: kws-search.cc:31
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Definition: io-funcs-inl.h:198
void ChangeReorderingOfAlignment(const TransitionModel &trans_model, std::vector< int32 > *alignment)
Definition: hmm-utils.cc:1260
int32 MinLength(int32 phone) const
static bool SplitToPhonesInternal(const TransitionModel &trans_model, const std::vector< int32 > &alignment, bool reordered, std::vector< std::vector< int32 > > *split_output)
Definition: hmm-utils.cc:659
void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon, MutableFst< Arc > *fst, const F &f)
As MakeFollowingInputSymbolsSame, but takes a functor object that maps labels to classes.
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
Definition: stl-utils.h:63
int32 GetEncodingMultiple(int32 nonterm_phones_offset)
bool IsFinal(int32 trans_id) const
int32 TransitionIdToPhone(int32 trans_id) const
bool ConvertAlignment(const TransitionModel &old_trans_model, const TransitionModel &new_trans_model, const ContextDependencyInterface &new_ctx_dep, const std::vector< int32 > &old_alignment, int32 subsample_factor, bool repeat_frames, bool new_is_reordered, const std::vector< int32 > *phone_map, std::vector< int32 > *new_alignment)
ConvertAlignment converts an alignment that was created using one model, to another model...
Definition: hmm-utils.cc:1013
fst::VectorFst< fst::StdArc > * GetHmmAsFsaSimple(std::vector< int32 > phone_window, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, BaseFloat prob_scale)
Included mainly as a form of documentation, not used in any other code currently. ...
Definition: hmm-utils.cc:155
const std::vector< int32 > & disambig_syms_
Definition: hmm-utils.cc:465
BaseFloat GetNonSelfLoopLogProb(int32 trans_state) const
Returns the log-prob of the non-self-loop probability mass for this transition state.
int32 TransitionIdToTransitionIndex(int32 trans_id) const
fst::VectorFst< fst::StdArc > * GetPdfToTransitionIdTransducer(const TransitionModel &trans_model)
Returns a transducer from pdfs plus one (input) to transition-ids (output).
Definition: hmm-utils.cc:407