lattice-incremental-decoder.cc
Go to the documentation of this file.
1 // decoder/lattice-incremental-decoder.cc
2 
3 // Copyright 2019 Zhehuai Chen, Daniel Povey
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
21 #include "lat/lattice-functions.h"
22 #include "base/timer.h"
23 
24 namespace kaldi {
25 
26 // instantiate this class once for each thing you have to decode.
27 template <typename FST, typename Token>
29  const FST &fst, const TransitionModel &trans_model,
30  const LatticeIncrementalDecoderConfig &config)
31  : fst_(&fst),
32  delete_fst_(false),
33  num_toks_(0),
34  config_(config),
35  determinizer_(trans_model, config) {
36  config.Check();
37  toks_.SetSize(1000); // just so on the first frame we do something reasonable.
38 }
39 
40 template <typename FST, typename Token>
42  const LatticeIncrementalDecoderConfig &config, FST *fst,
43  const TransitionModel &trans_model)
44  : fst_(fst),
45  delete_fst_(true),
46  num_toks_(0),
47  config_(config),
48  determinizer_(trans_model, config) {
49  config.Check();
50  toks_.SetSize(1000); // just so on the first frame we do something reasonable.
51 }
52 
53 template <typename FST, typename Token>
55  DeleteElems(toks_.Clear());
56  ClearActiveTokens();
57  if (delete_fst_) delete fst_;
58 }
59 
60 template <typename FST, typename Token>
62  // clean up from last time:
63  DeleteElems(toks_.Clear());
64  cost_offsets_.clear();
65  ClearActiveTokens();
66  warned_ = false;
67  num_toks_ = 0;
68  decoding_finalized_ = false;
69  final_costs_.clear();
70  StateId start_state = fst_->Start();
71  KALDI_ASSERT(start_state != fst::kNoStateId);
72  active_toks_.resize(1);
73  Token *start_tok = new Token(0.0, 0.0, NULL, NULL, NULL);
74  active_toks_[0].toks = start_tok;
75  toks_.Insert(start_state, start_tok);
76  num_toks_++;
77 
78  determinizer_.Init();
79  num_frames_in_lattice_ = 0;
80  token2label_map_.clear();
81  next_token_label_ = LatticeIncrementalDeterminizer::kTokenLabelOffset;
82  ProcessNonemitting(config_.beam);
83 }
84 
85 template <typename FST, typename Token>
87  if (NumFramesDecoded() - num_frames_in_lattice_ <
88  config_.determinize_max_delay)
89  return;
90 
91 
92  /* Make sure the token-pruning is active. Note: PruneActiveTokens() has
93  internal logic that prevents it from doing unnecessary work if you
94  call it and then immediately call it again. */
95  PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
96 
97  int32 first = num_frames_in_lattice_ + config_.determinize_min_chunk_size,
98  last = NumFramesDecoded(),
99  fewest_tokens = std::numeric_limits<int32>::max(),
100  best_frame = -1;
101  for (int32 t = last; t >= first; t--) {
102  /* Make sure PruneActiveTokens() has computed num_toks for all these
103  frames... */
104  KALDI_ASSERT(active_toks_[t].num_toks != -1);
105  if (active_toks_[t].num_toks < fewest_tokens) {
106  // <= because we want the latest one in case of ties.
107  fewest_tokens = active_toks_[t].num_toks;
108  best_frame = t;
109  }
110  }
111  /* OK, determinize the chunk that spans from num_frames_in_lattice_ to
112  best_frame. */
113  bool use_final_probs = false;
114  GetLattice(best_frame, use_final_probs);
115  return;
116 }
117 // Returns true if any kind of traceback is available (not necessarily from
118 // a final state). It should only very rarely return false; this indicates
119 // an unusual search error.
120 template <typename FST, typename Token>
122  InitDecoding();
123 
124  // We use 1-based indexing for frames in this decoder (if you view it in
125  // terms of features), but note that the decodable object uses zero-based
126  // numbering, which we have to correct for when we call it.
127 
128  while (!decodable->IsLastFrame(NumFramesDecoded() - 1)) {
129  if (NumFramesDecoded() % config_.prune_interval == 0) {
130  PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
131  }
132  UpdateLatticeDeterminization();
133 
134  BaseFloat cost_cutoff = ProcessEmitting(decodable);
135  ProcessNonemitting(cost_cutoff);
136  }
137  Timer timer;
138  FinalizeDecoding();
139  bool use_final_probs = true;
140  GetLattice(NumFramesDecoded(), use_final_probs);
141  KALDI_VLOG(2) << "Delay time during and after FinalizeDecoding()"
142  << "(secs): " << timer.Elapsed();
143 
144  // Returns true if we have any kind of traceback available (not necessarily
145  // to the end state; query ReachedFinal() for that).
146  return !active_toks_.empty() && active_toks_.back().toks != NULL;
147 }
148 
149 
150 template <typename FST, typename Token>
152  size_t new_sz =
153  static_cast<size_t>(static_cast<BaseFloat>(num_toks) * config_.hash_ratio);
154  if (new_sz > toks_.Size()) {
155  toks_.SetSize(new_sz);
156  }
157 }
158 
159 /*
160  A note on the definition of extra_cost.
161 
162  extra_cost is used in pruning tokens, to save memory.
163 
164  extra_cost can be thought of as a beta (backward) cost assuming
165  we had set the betas on currently-active tokens to all be the negative
166  of the alphas for those tokens. (So all currently active tokens would
167  be on (tied) best paths).
168 
169 
170  Define the 'forward cost' of a token as zero for any token on the frame
171  we're currently decoding; and for other frames, as the shortest-path cost
172  between that token and a token on the frame we're currently decoding.
173  (by "currently decoding" I mean the most recently processed frame).
174 
175  Then define the extra_cost of a token (always >= 0) as the forward-cost of
176  the token minus the smallest forward-cost of any token on the same frame.
177 
178  We can use the extra_cost to accurately prune away tokens that we know will
179  never appear in the lattice. If the extra_cost is greater than the desired
180  lattice beam, the token would provably never appear in the lattice, so we can
181  prune away the token.
182 
183  The advantage of storing the extra_cost rather than the forward-cost, is that
184  it is less costly to keep the extra_cost up-to-date when we process new frames.
185  When we process a new frame, *all* the previous frames' forward-costs would change;
186  but in general the extra_cost will change only for a finite number of frames.
187  (Actually we don't update all the extra_costs every time we update a frame; we
188  only do it every 'config_.prune_interval' frames).
189  */
190 
191 // FindOrAddToken either locates a token in hash of toks_,
192 // or if necessary inserts a new, empty token (i.e. with no forward links)
193 // for the current frame. [note: it's inserted if necessary into hash toks_
194 // and also into the singly linked list of tokens active on this frame
195 // (whose head is at active_toks_[frame]).
196 template <typename FST, typename Token>
198  StateId state, int32 frame_plus_one, BaseFloat tot_cost, Token *backpointer,
199  bool *changed) {
200  // Returns the Token pointer. Sets "changed" (if non-NULL) to true
201  // if the token was newly created or the cost changed.
202  KALDI_ASSERT(frame_plus_one < active_toks_.size());
203  Token *&toks = active_toks_[frame_plus_one].toks;
204  Elem *e_found = toks_.Find(state);
205  if (e_found == NULL) { // no such token presently.
206  const BaseFloat extra_cost = 0.0;
207  // tokens on the currently final frame have zero extra_cost
208  // as any of them could end up
209  // on the winning path.
210  Token *new_tok = new Token(tot_cost, extra_cost, NULL, toks, backpointer);
211  // NULL: no forward links yet
212  toks = new_tok;
213  num_toks_++;
214  toks_.Insert(state, new_tok);
215  if (changed) *changed = true;
216  return new_tok;
217  } else {
218  Token *tok = e_found->val; // There is an existing Token for this state.
219  if (tok->tot_cost > tot_cost) { // replace old token
220  tok->tot_cost = tot_cost;
221  // SetBackpointer() just does tok->backpointer = backpointer in
222  // the case where Token == BackpointerToken, else nothing.
223  tok->SetBackpointer(backpointer);
224  // we don't allocate a new token, the old stays linked in active_toks_
225  // we only replace the tot_cost
226  // in the current frame, there are no forward links (and no extra_cost)
227  // only in ProcessNonemitting we have to delete forward links
228  // in case we visit a state for the second time
229  // those forward links, that lead to this replaced token before:
230  // they remain and will hopefully be pruned later (PruneForwardLinks...)
231  if (changed) *changed = true;
232  } else {
233  if (changed) *changed = false;
234  }
235  return tok;
236  }
237 }
238 
239 // prunes outgoing links for all tokens in active_toks_[frame]
240 // it's called by PruneActiveTokens
241 // all links, that have link_extra_cost > lattice_beam are pruned
242 template <typename FST, typename Token>
244  int32 frame_plus_one, bool *extra_costs_changed, bool *links_pruned,
245  BaseFloat delta) {
246  // delta is the amount by which the extra_costs must change
247  // If delta is larger, we'll tend to go back less far
248  // toward the beginning of the file.
249  // extra_costs_changed is set to true if extra_cost was changed for any token
250  // links_pruned is set to true if any link in any token was pruned
251 
252  *extra_costs_changed = false;
253  *links_pruned = false;
254  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
255  if (active_toks_[frame_plus_one].toks == NULL) { // empty list; should not happen.
256  if (!warned_) {
257  KALDI_WARN << "No tokens alive [doing pruning].. warning first "
258  "time only for each utterance\n";
259  warned_ = true;
260  }
261  }
262 
263  // We have to iterate until there is no more change, because the links
264  // are not guaranteed to be in topological order.
265  bool changed = true; // difference new minus old extra cost >= delta ?
266  while (changed) {
267  changed = false;
268  for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
269  tok = tok->next) {
270  ForwardLinkT *link, *prev_link = NULL;
271  // will recompute tok_extra_cost for tok.
272  BaseFloat tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
273  // tok_extra_cost is the best (min) of link_extra_cost of outgoing links
274  for (link = tok->links; link != NULL;) {
275  // See if we need to excise this link...
276  Token *next_tok = link->next_tok;
277  BaseFloat link_extra_cost =
278  next_tok->extra_cost +
279  ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
280  next_tok->tot_cost); // difference in brackets is >= 0
281  // link_exta_cost is the difference in score between the best paths
282  // through link source state and through link destination state
283  KALDI_ASSERT(link_extra_cost == link_extra_cost); // check for NaN
284  if (link_extra_cost > config_.lattice_beam) { // excise link
285  ForwardLinkT *next_link = link->next;
286  if (prev_link != NULL)
287  prev_link->next = next_link;
288  else
289  tok->links = next_link;
290  delete link;
291  link = next_link; // advance link but leave prev_link the same.
292  *links_pruned = true;
293  } else { // keep the link and update the tok_extra_cost if needed.
294  if (link_extra_cost < 0.0) { // this is just a precaution.
295  if (link_extra_cost < -0.01)
296  KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
297  link_extra_cost = 0.0;
298  }
299  if (link_extra_cost < tok_extra_cost) tok_extra_cost = link_extra_cost;
300  prev_link = link; // move to next link
301  link = link->next;
302  }
303  } // for all outgoing links
304  if (fabs(tok_extra_cost - tok->extra_cost) > delta)
305  changed = true; // difference new minus old is bigger than delta
306  tok->extra_cost = tok_extra_cost;
307  // will be +infinity or <= lattice_beam_.
308  // infinity indicates, that no forward link survived pruning
309  } // for all Token on active_toks_[frame]
310  if (changed) *extra_costs_changed = true;
311 
312  // Note: it's theoretically possible that aggressive compiler
313  // optimizations could cause an infinite loop here for small delta and
314  // high-dynamic-range scores.
315  } // while changed
316 }
317 
318 // PruneForwardLinksFinal is a version of PruneForwardLinks that we call
319 // on the final frame. If there are final tokens active, it uses
320 // the final-probs for pruning, otherwise it treats all tokens as final.
321 template <typename FST, typename Token>
323  KALDI_ASSERT(!active_toks_.empty());
324  int32 frame_plus_one = active_toks_.size() - 1;
325 
326  if (active_toks_[frame_plus_one].toks == NULL) // empty list; should not happen.
327  KALDI_WARN << "No tokens alive at end of file";
328 
329  typedef typename unordered_map<Token *, BaseFloat>::const_iterator IterType;
330  ComputeFinalCosts(&final_costs_, &final_relative_cost_, &final_best_cost_);
331  decoding_finalized_ = true;
332  // We call DeleteElems() as a nicety, not because it's really necessary;
333  // otherwise there would be a time, after calling PruneTokensForFrame() on the
334  // final frame, when toks_.GetList() or toks_.Clear() would contain pointers
335  // to nonexistent tokens.
336  DeleteElems(toks_.Clear());
337 
338  // Now go through tokens on this frame, pruning forward links... may have to
339  // iterate a few times until there is no more change, because the list is not
340  // in topological order. This is a modified version of the code in
341  // PruneForwardLinks, but here we also take account of the final-probs.
342  bool changed = true;
343  BaseFloat delta = 1.0e-05;
344  while (changed) {
345  changed = false;
346  for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
347  tok = tok->next) {
348  ForwardLinkT *link, *prev_link = NULL;
349  // will recompute tok_extra_cost. It has a term in it that corresponds
350  // to the "final-prob", so instead of initializing tok_extra_cost to infinity
351  // below we set it to the difference between the (score+final_prob) of this
352  // token,
353  // and the best such (score+final_prob).
354  BaseFloat final_cost;
355  if (final_costs_.empty()) {
356  final_cost = 0.0;
357  } else {
358  IterType iter = final_costs_.find(tok);
359  if (iter != final_costs_.end())
360  final_cost = iter->second;
361  else
362  final_cost = std::numeric_limits<BaseFloat>::infinity();
363  }
364  BaseFloat tok_extra_cost = tok->tot_cost + final_cost - final_best_cost_;
365  // tok_extra_cost will be a "min" over either directly being final, or
366  // being indirectly final through other links, and the loop below may
367  // decrease its value:
368  for (link = tok->links; link != NULL;) {
369  // See if we need to excise this link...
370  Token *next_tok = link->next_tok;
371  BaseFloat link_extra_cost =
372  next_tok->extra_cost +
373  ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
374  next_tok->tot_cost);
375  if (link_extra_cost > config_.lattice_beam) { // excise link
376  ForwardLinkT *next_link = link->next;
377  if (prev_link != NULL)
378  prev_link->next = next_link;
379  else
380  tok->links = next_link;
381  delete link;
382  link = next_link; // advance link but leave prev_link the same.
383  } else { // keep the link and update the tok_extra_cost if needed.
384  if (link_extra_cost < 0.0) { // this is just a precaution.
385  if (link_extra_cost < -0.01)
386  KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
387  link_extra_cost = 0.0;
388  }
389  if (link_extra_cost < tok_extra_cost) tok_extra_cost = link_extra_cost;
390  prev_link = link;
391  link = link->next;
392  }
393  }
394  // prune away tokens worse than lattice_beam above best path. This step
395  // was not necessary in the non-final case because then, this case
396  // showed up as having no forward links. Here, the tok_extra_cost has
397  // an extra component relating to the final-prob.
398  if (tok_extra_cost > config_.lattice_beam)
399  tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
400  // to be pruned in PruneTokensForFrame
401 
402  if (!ApproxEqual(tok->extra_cost, tok_extra_cost, delta)) changed = true;
403  tok->extra_cost = tok_extra_cost; // will be +infinity or <= lattice_beam_.
404  }
405  } // while changed
406 }
407 
408 template <typename FST, typename Token>
410  BaseFloat relative_cost;
411  ComputeFinalCosts(NULL, &relative_cost, NULL);
412  return relative_cost;
413 }
414 
415 // Prune away any tokens on this frame that have no forward links.
416 // [we don't do this in PruneForwardLinks because it would give us
417 // a problem with dangling pointers].
418 // It's called by PruneActiveTokens if any forward links have been pruned
419 template <typename FST, typename Token>
421  int32 frame_plus_one) {
422  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
423  Token *&toks = active_toks_[frame_plus_one].toks;
424  if (toks == NULL) KALDI_WARN << "No tokens alive [doing pruning]";
425  Token *tok, *next_tok, *prev_tok = NULL;
426  int32 num_toks = 0;
427  for (tok = toks; tok != NULL; tok = next_tok, num_toks++) {
428  next_tok = tok->next;
429  if (tok->extra_cost == std::numeric_limits<BaseFloat>::infinity()) {
430  // token is unreachable from end of graph; (no forward links survived)
431  // excise tok from list and delete tok.
432  if (prev_tok != NULL)
433  prev_tok->next = tok->next;
434  else
435  toks = tok->next;
436  delete tok;
437  num_toks_--;
438  } else { // fetch next Token
439  prev_tok = tok;
440  }
441  }
442  active_toks_[frame_plus_one].num_toks = num_toks;
443 }
444 
445 // Go backwards through still-alive tokens, pruning them, starting not from
446 // the current frame (where we want to keep all tokens) but from the frame before
447 // that. We go backwards through the frames and stop when we reach a point
448 // where the delta-costs are not changing (and the delta controls when we consider
449 // a cost to have "not changed").
450 template <typename FST, typename Token>
452  int32 cur_frame_plus_one = NumFramesDecoded();
453  int32 num_toks_begin = num_toks_;
454 
455  if (active_toks_[cur_frame_plus_one].num_toks == -1){
456  // The current frame's tokens don't get pruned so they don't get counted
457  // (the count is needed by the incremental determinization code).
458  // Fix this.
459  int this_frame_num_toks = 0;
460  for (Token *t = active_toks_[cur_frame_plus_one].toks; t != NULL; t = t->next)
461  this_frame_num_toks++;
462  active_toks_[cur_frame_plus_one].num_toks = this_frame_num_toks;
463  }
464 
465  // The index "f" below represents a "frame plus one", i.e. you'd have to subtract
466  // one to get the corresponding index for the decodable object.
467  for (int32 f = cur_frame_plus_one - 1; f >= 0; f--) {
468  // Reason why we need to prune forward links in this situation:
469  // (1) we have never pruned them (new TokenList)
470  // (2) we have not yet pruned the forward links to the next f,
471  // after any of those tokens have changed their extra_cost.
472  if (active_toks_[f].must_prune_forward_links) {
473  bool extra_costs_changed = false, links_pruned = false;
474  PruneForwardLinks(f, &extra_costs_changed, &links_pruned, delta);
475  if (extra_costs_changed && f > 0) // any token has changed extra_cost
476  active_toks_[f - 1].must_prune_forward_links = true;
477  if (links_pruned) // any link was pruned
478  active_toks_[f].must_prune_tokens = true;
479  active_toks_[f].must_prune_forward_links = false; // job done
480  }
481  if (f + 1 < cur_frame_plus_one && // except for last f (no forward links)
482  active_toks_[f + 1].must_prune_tokens) {
483  PruneTokensForFrame(f + 1);
484  active_toks_[f + 1].must_prune_tokens = false;
485  }
486  }
487  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin
488  << " to " << num_toks_;
489 }
490 
491 template <typename FST, typename Token>
493  unordered_map<Token *, BaseFloat> *final_costs, BaseFloat *final_relative_cost,
494  BaseFloat *final_best_cost) const {
495  if (decoding_finalized_) {
496  // If we finalized decoding, the list toks_ will no longer exist, so return
497  // something we already computed.
498  if (final_costs) *final_costs = final_costs_;
499  if (final_relative_cost) *final_relative_cost = final_relative_cost_;
500  if (final_best_cost) *final_best_cost = final_best_cost_;
501  return;
502  }
503  if (final_costs != NULL) final_costs->clear();
504  const Elem *final_toks = toks_.GetList();
505  BaseFloat infinity = std::numeric_limits<BaseFloat>::infinity();
506  BaseFloat best_cost = infinity, best_cost_with_final = infinity;
507 
508  while (final_toks != NULL) {
509  StateId state = final_toks->key;
510  Token *tok = final_toks->val;
511  const Elem *next = final_toks->tail;
512  BaseFloat final_cost = fst_->Final(state).Value();
513  BaseFloat cost = tok->tot_cost, cost_with_final = cost + final_cost;
514  best_cost = std::min(cost, best_cost);
515  best_cost_with_final = std::min(cost_with_final, best_cost_with_final);
516  if (final_costs != NULL && final_cost != infinity)
517  (*final_costs)[tok] = final_cost;
518  final_toks = next;
519  }
520  if (final_relative_cost != NULL) {
521  if (best_cost == infinity && best_cost_with_final == infinity) {
522  // Likely this will only happen if there are no tokens surviving.
523  // This seems the least bad way to handle it.
524  *final_relative_cost = infinity;
525  } else {
526  *final_relative_cost = best_cost_with_final - best_cost;
527  }
528  }
529  if (final_best_cost != NULL) {
530  if (best_cost_with_final != infinity) { // final-state exists.
531  *final_best_cost = best_cost_with_final;
532  } else { // no final-state exists.
533  *final_best_cost = best_cost;
534  }
535  }
536 }
537 
538 template <typename FST, typename Token>
540  DecodableInterface *decodable, int32 max_num_frames) {
541  if (std::is_same<FST, fst::Fst<fst::StdArc> >::value) {
542  // if the type 'FST' is the FST base-class, then see if the FST type of fst_
543  // is actually VectorFst or ConstFst. If so, call the AdvanceDecoding()
544  // function after casting *this to the more specific type.
545  if (fst_->Type() == "const") {
547  reinterpret_cast<
549  this);
550  this_cast->AdvanceDecoding(decodable, max_num_frames);
551  return;
552  } else if (fst_->Type() == "vector") {
554  reinterpret_cast<
556  this);
557  this_cast->AdvanceDecoding(decodable, max_num_frames);
558  return;
559  }
560  }
561 
562  KALDI_ASSERT(!active_toks_.empty() && !decoding_finalized_ &&
563  "You must call InitDecoding() before AdvanceDecoding");
564  int32 num_frames_ready = decodable->NumFramesReady();
565  // num_frames_ready must be >= num_frames_decoded, or else
566  // the number of frames ready must have decreased (which doesn't
567  // make sense) or the decodable object changed between calls
568  // (which isn't allowed).
569  KALDI_ASSERT(num_frames_ready >= NumFramesDecoded());
570  int32 target_frames_decoded = num_frames_ready;
571  if (max_num_frames >= 0)
572  target_frames_decoded =
573  std::min(target_frames_decoded, NumFramesDecoded() + max_num_frames);
574  while (NumFramesDecoded() < target_frames_decoded) {
575  if (NumFramesDecoded() % config_.prune_interval == 0) {
576  PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
577  }
578  BaseFloat cost_cutoff = ProcessEmitting(decodable);
579  ProcessNonemitting(cost_cutoff);
580  }
581  UpdateLatticeDeterminization();
582 }
583 
584 // FinalizeDecoding() is a version of PruneActiveTokens that we call
585 // (optionally) on the final frame. Takes into account the final-prob of
586 // tokens. This function used to be called PruneActiveTokensFinal().
587 template <typename FST, typename Token>
589  int32 final_frame_plus_one = NumFramesDecoded();
590  int32 num_toks_begin = num_toks_;
591  // PruneForwardLinksFinal() prunes the final frame (with final-probs), and
592  // sets decoding_finalized_.
593  PruneForwardLinksFinal();
594  for (int32 f = final_frame_plus_one - 1; f >= 0; f--) {
595  bool b1, b2; // values not used.
596  BaseFloat dontcare = 0.0; // delta of zero means we must always update
597  PruneForwardLinks(f, &b1, &b2, dontcare);
598  PruneTokensForFrame(f + 1);
599  }
600  PruneTokensForFrame(0);
601  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin << " to " << num_toks_;
602 }
603 
605 template <typename FST, typename Token>
607  Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam, Elem **best_elem) {
608  BaseFloat best_weight = std::numeric_limits<BaseFloat>::infinity();
609  // positive == high cost == bad.
610  size_t count = 0;
611  if (config_.max_active == std::numeric_limits<int32>::max() &&
612  config_.min_active == 0) {
613  for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
614  BaseFloat w = static_cast<BaseFloat>(e->val->tot_cost);
615  if (w < best_weight) {
616  best_weight = w;
617  if (best_elem) *best_elem = e;
618  }
619  }
620  if (tok_count != NULL) *tok_count = count;
621  if (adaptive_beam != NULL) *adaptive_beam = config_.beam;
622  return best_weight + config_.beam;
623  } else {
624  tmp_array_.clear();
625  for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
626  BaseFloat w = e->val->tot_cost;
627  tmp_array_.push_back(w);
628  if (w < best_weight) {
629  best_weight = w;
630  if (best_elem) *best_elem = e;
631  }
632  }
633  if (tok_count != NULL) *tok_count = count;
634 
635  BaseFloat beam_cutoff = best_weight + config_.beam,
636  min_active_cutoff = std::numeric_limits<BaseFloat>::infinity(),
637  max_active_cutoff = std::numeric_limits<BaseFloat>::infinity();
638 
639  KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded()
640  << " is " << tmp_array_.size();
641 
642  if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
643  std::nth_element(tmp_array_.begin(), tmp_array_.begin() + config_.max_active,
644  tmp_array_.end());
645  max_active_cutoff = tmp_array_[config_.max_active];
646  }
647  if (max_active_cutoff < beam_cutoff) { // max_active is tighter than beam.
648  if (adaptive_beam)
649  *adaptive_beam = max_active_cutoff - best_weight + config_.beam_delta;
650  return max_active_cutoff;
651  }
652  if (tmp_array_.size() > static_cast<size_t>(config_.min_active)) {
653  if (config_.min_active == 0)
654  min_active_cutoff = best_weight;
655  else {
656  std::nth_element(tmp_array_.begin(), tmp_array_.begin() + config_.min_active,
657  tmp_array_.size() > static_cast<size_t>(config_.max_active)
658  ? tmp_array_.begin() + config_.max_active
659  : tmp_array_.end());
660  min_active_cutoff = tmp_array_[config_.min_active];
661  }
662  }
663  if (min_active_cutoff > beam_cutoff) { // min_active is looser than beam.
664  if (adaptive_beam)
665  *adaptive_beam = min_active_cutoff - best_weight + config_.beam_delta;
666  return min_active_cutoff;
667  } else {
668  *adaptive_beam = config_.beam;
669  return beam_cutoff;
670  }
671  }
672 }
673 
674 template <typename FST, typename Token>
676  DecodableInterface *decodable) {
677  KALDI_ASSERT(active_toks_.size() > 0);
678  int32 frame = active_toks_.size() - 1; // frame is the frame-index
679  // (zero-based) used to get likelihoods
680  // from the decodable object.
681  active_toks_.resize(active_toks_.size() + 1);
682 
683  Elem *final_toks = toks_.Clear(); // analogous to swapping prev_toks_ / cur_toks_
684  // in simple-decoder.h. Removes the Elems from
685  // being indexed in the hash in toks_.
686  Elem *best_elem = NULL;
687  BaseFloat adaptive_beam;
688  size_t tok_cnt;
689  BaseFloat cur_cutoff = GetCutoff(final_toks, &tok_cnt, &adaptive_beam, &best_elem);
690  KALDI_VLOG(6) << "Adaptive beam on frame " << NumFramesDecoded() << " is "
691  << adaptive_beam;
692 
693  PossiblyResizeHash(tok_cnt); // This makes sure the hash is always big enough.
694 
695  BaseFloat next_cutoff = std::numeric_limits<BaseFloat>::infinity();
696  // pruning "online" before having seen all tokens
697 
698  BaseFloat cost_offset = 0.0; // Used to keep probabilities in a good
699  // dynamic range.
700 
701  // First process the best token to get a hopefully
702  // reasonably tight bound on the next cutoff. The only
703  // products of the next block are "next_cutoff" and "cost_offset".
704  if (best_elem) {
705  StateId state = best_elem->key;
706  Token *tok = best_elem->val;
707  cost_offset = -tok->tot_cost;
708  for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done(); aiter.Next()) {
709  const Arc &arc = aiter.Value();
710  if (arc.ilabel != 0) { // propagate..
711  BaseFloat new_weight = arc.weight.Value() + cost_offset -
712  decodable->LogLikelihood(frame, arc.ilabel) +
713  tok->tot_cost;
714  if (new_weight + adaptive_beam < next_cutoff)
715  next_cutoff = new_weight + adaptive_beam;
716  }
717  }
718  }
719 
720  // Store the offset on the acoustic likelihoods that we're applying.
721  // Could just do cost_offsets_.push_back(cost_offset), but we
722  // do it this way as it's more robust to future code changes.
723  cost_offsets_.resize(frame + 1, 0.0);
724  cost_offsets_[frame] = cost_offset;
725 
726  // the tokens are now owned here, in final_toks, and the hash is empty.
727  // 'owned' is a complex thing here; the point is we need to call DeleteElem
728  // on each elem 'e' to let toks_ know we're done with them.
729  for (Elem *e = final_toks, *e_tail; e != NULL; e = e_tail) {
730  // loop this way because we delete "e" as we go.
731  StateId state = e->key;
732  Token *tok = e->val;
733  if (tok->tot_cost <= cur_cutoff) {
734  for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done(); aiter.Next()) {
735  const Arc &arc = aiter.Value();
736  if (arc.ilabel != 0) { // propagate..
737  BaseFloat ac_cost =
738  cost_offset - decodable->LogLikelihood(frame, arc.ilabel),
739  graph_cost = arc.weight.Value(), cur_cost = tok->tot_cost,
740  tot_cost = cur_cost + ac_cost + graph_cost;
741  if (tot_cost >= next_cutoff)
742  continue;
743  else if (tot_cost + adaptive_beam < next_cutoff)
744  next_cutoff = tot_cost + adaptive_beam; // prune by best current token
745  // Note: the frame indexes into active_toks_ are one-based,
746  // hence the + 1.
747  Token *next_tok =
748  FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, NULL);
749  // NULL: no change indicator needed
750 
751  // Add ForwardLink from tok to next_tok (put on head of list tok->links)
752  tok->links = new ForwardLinkT(next_tok, arc.ilabel, arc.olabel, graph_cost,
753  ac_cost, tok->links);
754  }
755  } // for all arcs
756  }
757  e_tail = e->tail;
758  toks_.Delete(e); // delete Elem
759  }
760  return next_cutoff;
761 }
762 
763 // static inline
764 template <typename FST, typename Token>
766  ForwardLinkT *l = tok->links, *m;
767  while (l != NULL) {
768  m = l->next;
769  delete l;
770  l = m;
771  }
772  tok->links = NULL;
773 }
774 
775 template <typename FST, typename Token>
777  KALDI_ASSERT(!active_toks_.empty());
778  int32 frame = static_cast<int32>(active_toks_.size()) - 2;
779  // Note: "frame" is the time-index we just processed, or -1 if
780  // we are processing the nonemitting transitions before the
781  // first frame (called from InitDecoding()).
782 
783  // Processes nonemitting arcs for one frame. Propagates within toks_.
784  // Note-- this queue structure is is not very optimal as
785  // it may cause us to process states unnecessarily (e.g. more than once),
786  // but in the baseline code, turning this vector into a set to fix this
787  // problem did not improve overall speed.
788 
789  KALDI_ASSERT(queue_.empty());
790 
791  if (toks_.GetList() == NULL) {
792  if (!warned_) {
793  KALDI_WARN << "Error, no surviving tokens: frame is " << frame;
794  warned_ = true;
795  }
796  }
797 
798  for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
799  StateId state = e->key;
800  if (fst_->NumInputEpsilons(state) != 0) queue_.push_back(state);
801  }
802 
803  while (!queue_.empty()) {
804  StateId state = queue_.back();
805  queue_.pop_back();
806 
807  Token *tok =
808  toks_.Find(state)
809  ->val; // would segfault if state not in toks_ but this can't happen.
810  BaseFloat cur_cost = tok->tot_cost;
811  if (cur_cost >= cutoff) // Don't bother processing successors.
812  continue;
813  // If "tok" has any existing forward links, delete them,
814  // because we're about to regenerate them. This is a kind
815  // of non-optimality (remember, this is the simple decoder),
816  // but since most states are emitting it's not a huge issue.
817  DeleteForwardLinks(tok); // necessary when re-visiting
818  tok->links = NULL;
819  for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done(); aiter.Next()) {
820  const Arc &arc = aiter.Value();
821  if (arc.ilabel == 0) { // propagate nonemitting only...
822  BaseFloat graph_cost = arc.weight.Value(), tot_cost = cur_cost + graph_cost;
823  if (tot_cost < cutoff) {
824  bool changed;
825 
826  Token *new_tok =
827  FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, &changed);
828 
829  tok->links =
830  new ForwardLinkT(new_tok, 0, arc.olabel, graph_cost, 0, tok->links);
831 
832  // "changed" tells us whether the new token has a different
833  // cost from before, or is new [if so, add into queue].
834  if (changed && fst_->NumInputEpsilons(arc.nextstate) != 0)
835  queue_.push_back(arc.nextstate);
836  }
837  }
838  } // for all arcs
839  } // while queue not empty
840 }
841 
842 template <typename FST, typename Token>
844  for (Elem *e = list, *e_tail; e != NULL; e = e_tail) {
845  e_tail = e->tail;
846  toks_.Delete(e);
847  }
848 }
849 
850 template <typename FST, typename Token>
852  FST, Token>::ClearActiveTokens() { // a cleanup routine, at utt end/begin
853  for (size_t i = 0; i < active_toks_.size(); i++) {
854  // Delete all tokens alive on this frame, and any forward
855  // links they may have.
856  for (Token *tok = active_toks_[i].toks; tok != NULL;) {
857  DeleteForwardLinks(tok);
858  Token *next_tok = tok->next;
859  delete tok;
860  num_toks_--;
861  tok = next_tok;
862  }
863  }
864  active_toks_.clear();
865  KALDI_ASSERT(num_toks_ == 0);
866 }
867 
868 
869 template <typename FST, typename Token>
871  int32 num_frames_to_include,
872  bool use_final_probs) {
873  KALDI_ASSERT(num_frames_to_include >= num_frames_in_lattice_ &&
874  num_frames_to_include <= NumFramesDecoded());
875 
876 
877  if (num_frames_in_lattice_ > 0 &&
878  determinizer_.GetLattice().NumStates() == 0) {
879  /* Something went wrong, lattice is empty and will continue to be empty.
880  User-level code should detect and deal with this.
881  */
882  num_frames_in_lattice_ = num_frames_to_include;
883  return determinizer_.GetLattice();
884  }
885 
886  if (decoding_finalized_ && !use_final_probs) {
887  // This is not supported
888  KALDI_ERR << "You cannot get the lattice without final-probs after "
889  "calling FinalizeDecoding().";
890  }
891  if (use_final_probs && num_frames_to_include != NumFramesDecoded()) {
892  /* This is because we only remember the relation between HCLG states and
893  Tokens for the current frame; the Token does not have a `state` field. */
894  KALDI_ERR << "use-final-probs may no be true if you are not "
895  "getting a lattice for all frames decoded so far.";
896  }
897 
898 
899  if (num_frames_to_include > num_frames_in_lattice_) {
900  /* Make sure the token-pruning is up to date. If we just pruned the tokens,
901  this will do very little work. */
902  PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
903 
904  if (determinizer_.GetLattice().NumStates() == 0 ||
905  determinizer_.GetLattice().Final(0) != CompactLatticeWeight::Zero()) {
906  num_frames_in_lattice_ = 0;
907  determinizer_.Init();
908  }
909 
910  Lattice chunk_lat;
911 
912  unordered_map<Label, LatticeArc::StateId> token_label2state;
913  if (num_frames_in_lattice_ != 0) {
914  determinizer_.InitializeRawLatticeChunk(&chunk_lat,
915  &token_label2state);
916  }
917 
918  // tok_map will map from Token* to state-id in chunk_lat.
919  // The cur and prev versions alternate on different frames.
920  unordered_map<Token*, StateId> &tok2state_map(temp_token_map_);
921  tok2state_map.clear();
922 
923  unordered_map<Token*, Label> &next_token2label_map(token2label_map_temp_);
924  next_token2label_map.clear();
925 
926  { // Deal with the last frame in the chunk, the one numbered `num_frames_to_include`.
927  // (Yes, this is backwards). We allocate token labels, and set tokens as
928  // final, but don't add any transitions. This may leave some states
929  // disconnected (e.g. due to chains of nonemitting arcs), but it's OK; we'll
930  // fix it when we generate the next chunk of lattice.
931  int32 frame = num_frames_to_include;
932  // Allocate state-ids for all tokens on this frame.
933 
934  for (Token *tok = active_toks_[frame].toks; tok != NULL; tok = tok->next) {
935  /* If we included the final-costs at this stage, they will cause
936  non-final states to be pruned out from the end of the lattice. */
937  BaseFloat final_cost;
938  { // This block computes final_cost
939  if (decoding_finalized_) {
940  if (final_costs_.empty()) {
941  final_cost = 0.0; /* No final-state survived, so treat all as final
942  * with probability One(). */
943  } else {
944  auto iter = final_costs_.find(tok);
945  if (iter == final_costs_.end())
946  final_cost = std::numeric_limits<BaseFloat>::infinity();
947  else
948  final_cost = iter->second;
949  }
950  } else {
951  /* this is a `fake` final-cost used to guide pruning. It's as if we
952  set the betas (backward-probs) on the final frame to the
953  negatives of the corresponding alphas, so all tokens on the last
954  frae will be on a best path.. the extra_cost for each token
955  always corresponds to its alpha+beta on this assumption. We want
956  the final_cost here to correspond to the beta (backward-prob), so
957  we get that by final_cost = extra_cost - tot_cost.
958  [The tot_cost is the forward/alpha cost.]
959  */
960  final_cost = tok->extra_cost - tok->tot_cost;
961  }
962  }
963 
964  StateId state = chunk_lat.AddState();
965  tok2state_map[tok] = state;
966  if (final_cost < std::numeric_limits<BaseFloat>::infinity()) {
967  next_token2label_map[tok] = AllocateNewTokenLabel();
968  StateId token_final_state = chunk_lat.AddState();
969  LatticeArc::Label ilabel = 0,
970  olabel = (next_token2label_map[tok] = AllocateNewTokenLabel());
971  chunk_lat.AddArc(state,
972  LatticeArc(ilabel, olabel,
973  LatticeWeight::One(),
974  token_final_state));
975  chunk_lat.SetFinal(token_final_state, LatticeWeight(final_cost, 0.0));
976  }
977  }
978  }
979 
980  // Go in reverse order over the remaining frames so we can create arcs as we
981  // go, and their destination-states will already be in the map.
982  for (int32 frame = num_frames_to_include;
983  frame >= num_frames_in_lattice_; frame--) {
984  // The conditional below is needed for the last frame of the utterance.
985  BaseFloat cost_offset = (frame < cost_offsets_.size() ?
986  cost_offsets_[frame] : 0.0);
987 
988  // For the first frame of the chunk, we need to make sure the states are
989  // the ones created by InitializeRawLatticeChunk() (where not pruned away).
990  if (frame == num_frames_in_lattice_ && num_frames_in_lattice_ != 0) {
991  for (Token *tok = active_toks_[frame].toks; tok != NULL; tok = tok->next) {
992  auto iter = token2label_map_.find(tok);
993  KALDI_ASSERT(iter != token2label_map_.end());
994  Label token_label = iter->second;
995  auto iter2 = token_label2state.find(token_label);
996  if (iter2 != token_label2state.end()) {
997  StateId state = iter2->second;
998  tok2state_map[tok] = state;
999  } else {
1000  // Some states may have been pruned out, but we should still allocate
1001  // them. They might have been part of chains of nonemitting arcs
1002  // where the state became disconnected because the last chunk didn't
1003  // include arcs starting at this frame.
1004  StateId state = chunk_lat.AddState();
1005  tok2state_map[tok] = state;
1006  }
1007  }
1008  } else if (frame != num_frames_to_include) { // We already created states
1009  // for the last frame.
1010  for (Token *tok = active_toks_[frame].toks; tok != NULL; tok = tok->next) {
1011  StateId state = chunk_lat.AddState();
1012  tok2state_map[tok] = state;
1013  }
1014  }
1015  for (Token *tok = active_toks_[frame].toks; tok != NULL; tok = tok->next) {
1016  auto iter = tok2state_map.find(tok);
1017  KALDI_ASSERT(iter != tok2state_map.end());
1018  StateId cur_state = iter->second;
1019  for (ForwardLinkT *l = tok->links; l != NULL; l = l->next) {
1020  auto next_iter = tok2state_map.find(l->next_tok);
1021  if (next_iter == tok2state_map.end()) {
1022  // Emitting arcs from the last frame we're including -- ignore
1023  // these.
1024  KALDI_ASSERT(frame == num_frames_to_include);
1025  continue;
1026  }
1027  StateId next_state = next_iter->second;
1028  BaseFloat this_offset = (l->ilabel != 0 ? cost_offset : 0);
1029  LatticeArc arc(l->ilabel, l->olabel,
1030  LatticeWeight(l->graph_cost, l->acoustic_cost - this_offset),
1031  next_state);
1032  // Note: the epsilons get redundantly included at the end and beginning
1033  // of successive chunks. These will get removed in the determinization.
1034  chunk_lat.AddArc(cur_state, arc);
1035  }
1036  }
1037  }
1038  if (num_frames_in_lattice_ == 0) {
1039  // This block locates the start token. NOTE: we use the fact that in the
1040  // linked list of tokens, things are added at the head, so the start state
1041  // must be at the tail. If this data structure is changed in future, we
1042  // might need to explicitly store the start token as a class member.
1043  Token *tok = active_toks_[0].toks;
1044  if (tok == NULL) {
1045  KALDI_WARN << "No tokens exist on start frame";
1046  return determinizer_.GetLattice(); // will be empty.
1047  }
1048  while (tok->next != NULL)
1049  tok = tok->next;
1050  Token *start_token = tok;
1051  auto iter = tok2state_map.find(start_token);
1052  KALDI_ASSERT(iter != tok2state_map.end());
1053  StateId start_state = iter->second;
1054  chunk_lat.SetStart(start_state);
1055  }
1056  token2label_map_.swap(next_token2label_map);
1057 
1058  // bool finished_before_beam =
1059  determinizer_.AcceptRawLatticeChunk(&chunk_lat);
1060  // We are ignoring the return status, which say whether it finished before the beam.
1061 
1062  num_frames_in_lattice_ = num_frames_to_include;
1063 
1064  if (determinizer_.GetLattice().NumStates() == 0)
1065  return determinizer_.GetLattice(); // Something went wrong, lattice is empty.
1066  }
1067 
1068  unordered_map<Token*, BaseFloat> token2final_cost;
1069  unordered_map<Label, BaseFloat> token_label2final_cost;
1070  if (use_final_probs) {
1071  ComputeFinalCosts(&token2final_cost, NULL, NULL);
1072  for (const auto &p: token2final_cost) {
1073  Token *tok = p.first;
1074  BaseFloat cost = p.second;
1075  auto iter = token2label_map_.find(tok);
1076  if (iter != token2label_map_.end()) {
1077  /* Some tokens may not have survived the pruned determinization. */
1078  Label token_label = iter->second;
1079  bool ret = token_label2final_cost.insert({token_label, cost}).second;
1080  KALDI_ASSERT(ret); /* Make sure it was inserted. */
1081  }
1082  }
1083  }
1084  /* Note: these final-probs won't affect the next chunk, only the lattice
1085  returned from GetLattice(). They are kind of temporaries. */
1086  determinizer_.SetFinalCosts(token_label2final_cost.empty() ? NULL :
1087  &token_label2final_cost);
1088 
1089  return determinizer_.GetLattice();
1090 }
1091 
1092 
1093 template <typename FST, typename Token>
1095  int32 r = 0;
1096  for (Token *tok = active_toks_[frame].toks; tok; tok = tok->next) r++;
1097  return r;
1098 }
1099 
1100 
1101 
1102 /* This utility function adds an arc to a Lattice, but where the source is a
1103  CompactLatticeArc. If the CompactLatticeArc has a string with length greater
1104  than 1, this will require adding extra states to `lat`.
1105  */
1107  const CompactLatticeArc &clat_arc,
1108  LatticeArc::StateId src_state,
1109  Lattice *lat) {
1110  const std::vector<int32> &string = clat_arc.weight.String();
1111  size_t N = string.size();
1112  if (N == 0) {
1113  LatticeArc arc;
1114  arc.ilabel = 0;
1115  arc.olabel = clat_arc.ilabel;
1116  arc.nextstate = clat_arc.nextstate;
1117  arc.weight = clat_arc.weight.Weight();
1118  lat->AddArc(src_state, arc);
1119  } else {
1120  LatticeArc::StateId cur_state = src_state;
1121  for (size_t i = 0; i < N; i++) {
1122  LatticeArc arc;
1123  arc.ilabel = string[i];
1124  arc.olabel = (i == 0 ? clat_arc.ilabel : 0);
1125  arc.nextstate = (i + 1 == N ? clat_arc.nextstate : lat->AddState());
1126  arc.weight = (i == 0 ? clat_arc.weight.Weight() : LatticeWeight::One());
1127  lat->AddArc(cur_state, arc);
1128  cur_state = arc.nextstate;
1129  }
1130  }
1131 }
1132 
1133 
1134 void LatticeIncrementalDeterminizer::Init() {
1135  non_final_redet_states_.clear();
1136  clat_.DeleteStates();
1137  final_arcs_.clear();
1138  forward_costs_.clear();
1139  arcs_in_.clear();
1140 }
1141 
1142 CompactLattice::StateId LatticeIncrementalDeterminizer::AddStateToClat() {
1143  CompactLattice::StateId ans = clat_.AddState();
1144  forward_costs_.push_back(std::numeric_limits<BaseFloat>::infinity());
1145  KALDI_ASSERT(forward_costs_.size() == ans + 1);
1146  arcs_in_.resize(ans + 1);
1147  return ans;
1148 }
1149 
1150 void LatticeIncrementalDeterminizer::AddArcToClat(
1152  const CompactLatticeArc &arc) {
1153  BaseFloat forward_cost = forward_costs_[state] +
1154  ConvertToCost(arc.weight);
1155  if (forward_cost == std::numeric_limits<BaseFloat>::infinity())
1156  return;
1157  int32 arc_idx = clat_.NumArcs(state);
1158  clat_.AddArc(state, arc);
1159  arcs_in_[arc.nextstate].push_back({state, arc_idx});
1160  if (forward_cost < forward_costs_[arc.nextstate])
1161  forward_costs_[arc.nextstate] = forward_cost;
1162 }
1163 
1164 // See documentation in header
1165 void LatticeIncrementalDeterminizer::IdentifyTokenFinalStates(
1166  const CompactLattice &chunk_clat,
1167  std::unordered_map<CompactLattice::StateId, CompactLatticeArc::Label> *token_map) const {
1168  token_map->clear();
1171 
1172  StateId num_states = chunk_clat.NumStates();
1173  for (StateId state = 0; state < num_states; state++) {
1174  for (fst::ArcIterator<CompactLattice> aiter(chunk_clat, state);
1175  !aiter.Done(); aiter.Next()) {
1176  const CompactLatticeArc &arc = aiter.Value();
1177  if (arc.olabel >= kTokenLabelOffset && arc.olabel < kMaxTokenLabel) {
1178  StateId nextstate = arc.nextstate;
1179  auto r = token_map->insert({nextstate, arc.olabel});
1180  // Check consistency of labels on incoming arcs
1181  KALDI_ASSERT(r.first->second == arc.olabel);
1182  }
1183  }
1184  }
1185 }
1186 
1187 
1188 
1189 
1190 void LatticeIncrementalDeterminizer::GetNonFinalRedetStates() {
1192  non_final_redet_states_.clear();
1193  non_final_redet_states_.reserve(final_arcs_.size());
1194 
1195  std::vector<StateId> state_queue;
1196  for (const CompactLatticeArc &arc: final_arcs_) {
1197  // Note: we abuse the .nextstate field to store the state which is really
1198  // the source of that arc.
1199  StateId redet_state = arc.nextstate;
1200  if (forward_costs_[redet_state] != std::numeric_limits<BaseFloat>::infinity()) {
1201  // if it is accessible..
1202  if (non_final_redet_states_.insert(redet_state).second) {
1203  // it was not already there
1204  state_queue.push_back(redet_state);
1205  }
1206  }
1207  }
1208  // Add any states that are reachable from the states above.
1209  while (!state_queue.empty()) {
1210  StateId s = state_queue.back();
1211  state_queue.pop_back();
1212  for (fst::ArcIterator<CompactLattice> aiter(clat_, s); !aiter.Done();
1213  aiter.Next()) {
1214  const CompactLatticeArc &arc = aiter.Value();
1215  StateId nextstate = arc.nextstate;
1216  if (non_final_redet_states_.insert(nextstate).second)
1217  state_queue.push_back(nextstate); // it was not already there
1218  }
1219  }
1220 }
1221 
1222 
1223 void LatticeIncrementalDeterminizer::InitializeRawLatticeChunk(
1224  Lattice *olat,
1225  unordered_map<Label, LatticeArc::StateId> *token_label2state) {
1226  using namespace fst;
1227 
1228  olat->DeleteStates();
1229  LatticeArc::StateId start_state = olat->AddState();
1230  olat->SetStart(start_state);
1231  token_label2state->clear();
1232 
1233  // redet_state_map maps from state-ids in clat_ to state-ids in olat. This
1234  // will be the set of states from which the arcs to final-states in the
1235  // canonical appended lattice leave (physically, these are in the .nextstate
1236  // elements of arcs_, since we use that field for the source state), plus any
1237  // states reachable from those states.
1238  unordered_map<CompactLattice::StateId, LatticeArc::StateId> redet_state_map;
1239 
1240  for (CompactLattice::StateId redet_state: non_final_redet_states_)
1241  redet_state_map[redet_state] = olat->AddState();
1242 
1243  // First, process any arcs leaving the non-final redeterminized states that
1244  // are not to final-states. (What we mean by "not to final states" is, not to
1245  // stats that are final in the `canonical appended lattice`.. they may
1246  // actually be physically final in clat_, because we make clat_ what we want
1247  // to return to the user.
1248  for (CompactLattice::StateId redet_state: non_final_redet_states_) {
1249  LatticeArc::StateId lat_state = redet_state_map[redet_state];
1250 
1251  for (ArcIterator<CompactLattice> aiter(clat_, redet_state);
1252  !aiter.Done(); aiter.Next()) {
1253  const CompactLatticeArc &arc = aiter.Value();
1254  CompactLattice::StateId nextstate = arc.nextstate;
1255  LatticeArc::StateId lat_nextstate = olat->NumStates();
1256  auto r = redet_state_map.insert({nextstate, lat_nextstate});
1257  if (r.second) { // Was inserted.
1258  LatticeArc::StateId s = olat->AddState();
1259  KALDI_ASSERT(s == lat_nextstate);
1260  } else {
1261  // was not inserted -> was already there.
1262  lat_nextstate = r.first->second;
1263  }
1264  CompactLatticeArc clat_arc(arc);
1265  clat_arc.nextstate = lat_nextstate;
1266  AddCompactLatticeArcToLattice(clat_arc, lat_state, olat);
1267  }
1268  clat_.DeleteArcs(redet_state);
1269  clat_.SetFinal(redet_state, CompactLatticeWeight::Zero());
1270  }
1271 
1272  for (const CompactLatticeArc &arc: final_arcs_) {
1273  // We abuse the `nextstate` field to store the source state.
1274  CompactLattice::StateId src_state = arc.nextstate;
1275  auto iter = redet_state_map.find(src_state);
1276  if (forward_costs_[src_state] == std::numeric_limits<BaseFloat>::infinity())
1277  continue; /* Unreachable state */
1278  KALDI_ASSERT(iter != redet_state_map.end());
1279  LatticeArc::StateId src_lat_state = iter->second;
1280  Label token_label = arc.ilabel; // will be == arc.olabel.
1281  KALDI_ASSERT(token_label >= kTokenLabelOffset &&
1282  token_label < kMaxTokenLabel);
1283  auto r = token_label2state->insert({token_label,
1284  olat->NumStates()});
1285  LatticeArc::StateId dest_lat_state = r.first->second;
1286  if (r.second) { // was inserted
1287  LatticeArc::StateId new_state = olat->AddState();
1288  KALDI_ASSERT(new_state == dest_lat_state);
1289  }
1290  CompactLatticeArc new_arc;
1291  new_arc.nextstate = dest_lat_state;
1292  /* We convert the token-label to epsilon; it's not needed anymore. */
1293  new_arc.ilabel = new_arc.olabel = 0;
1294  new_arc.weight = arc.weight;
1295  AddCompactLatticeArcToLattice(new_arc, src_lat_state, olat);
1296  }
1297 
1298  // Now deal with the initial-probs. Arcs from initial-states to
1299  // redeterminized-states in the raw lattice have an olabel that identifies the
1300  // id of that redeterminized-state in clat_, and a cost that is derived from
1301  // its entry in forward_costs_. These forward-probs are used to get the
1302  // pruned lattice determinization to behave correctly, and will be canceled
1303  // out later on.
1304  //
1305  // In the paper this is the second-from-last bullet in Sec. 5.2. NOTE: in the
1306  // paper we state that we only include such arcs for "each redeterminized
1307  // state that is either initial in det(A) or that has an arc entering it from
1308  // a state that is not a redeterminized state." In fact, we include these
1309  // arcs for all redeterminized states. I realized that it won't make a
1310  // difference to the outcome, and it's easier to do it this way.
1311  for (CompactLattice::StateId state_id: non_final_redet_states_) {
1312  BaseFloat forward_cost = forward_costs_[state_id];
1313  LatticeArc arc;
1314  arc.ilabel = 0;
1315  // The olabel (which appears where the word-id would) is what
1316  // we call a 'state-label'. It identifies a state in clat_.
1317  arc.olabel = state_id + kStateLabelOffset;
1318  // It doesn't matter what field we put forward_cost in (or whether we
1319  // divide it among them both; the effect on pruning is the same, and
1320  // we will cancel it out later anyway.
1321  arc.weight = LatticeWeight(forward_cost, 0);
1322  auto iter = redet_state_map.find(state_id);
1323  KALDI_ASSERT(iter != redet_state_map.end());
1324  arc.nextstate = iter->second;
1325  olat->AddArc(start_state, arc);
1326  }
1327 }
1328 
1329 void LatticeIncrementalDeterminizer::GetRawLatticeFinalCosts(
1330  const Lattice &raw_fst,
1331  std::unordered_map<Label, BaseFloat> *old_final_costs) {
1332  LatticeArc::StateId raw_fst_num_states = raw_fst.NumStates();
1333  for (LatticeArc::StateId s = 0; s < raw_fst_num_states; s++) {
1334  for (fst::ArcIterator<Lattice> aiter(raw_fst, s); !aiter.Done();
1335  aiter.Next()) {
1336  const LatticeArc &value = aiter.Value();
1337  if (value.olabel >= (Label)kTokenLabelOffset &&
1338  value.olabel < (Label)kMaxTokenLabel) {
1339  LatticeWeight final_weight = raw_fst.Final(value.nextstate);
1340  if (final_weight != LatticeWeight::Zero() &&
1341  final_weight.Value2() != 0) {
1342  KALDI_ERR << "Label " << value.olabel << " from state " << s
1343  << " looks like a token-label but its next-state "
1344  << value.nextstate <<
1345  " has unexpected final-weight " << final_weight.Value1() << ','
1346  << final_weight.Value2();
1347  }
1348  auto r = old_final_costs->insert({value.olabel,
1349  final_weight.Value1()});
1350  if (!r.second && r.first->second != final_weight.Value1()) {
1351  // For any given token-label, all arcs in raw_fst with that
1352  // olabel should go to the same state, so this should be
1353  // impossible.
1354  KALDI_ERR << "Unexpected mismatch in final-costs for tokens, "
1355  << r.first->second << " vs " << final_weight.Value1();
1356  }
1357  }
1358  }
1359  }
1360 }
1361 
1362 
1363 bool LatticeIncrementalDeterminizer::ProcessArcsFromChunkStartState(
1364  const CompactLattice &chunk_clat,
1365  std::unordered_map<CompactLattice::StateId, CompactLattice::StateId> *state_map) {
1367  StateId clat_num_states = clat_.NumStates();
1368 
1369  // Process arcs leaving the start state of chunk_clat. These arcs will have
1370  // state-labels on them (unless this is the first chunk).
1371  // For destination-states of those arcs, work out which states in
1372  // clat_ they correspond to and update their forward_costs.
1373  for (fst::ArcIterator<CompactLattice> aiter(chunk_clat, chunk_clat.Start());
1374  !aiter.Done(); aiter.Next()) {
1375  const CompactLatticeArc &arc = aiter.Value();
1376  Label label = arc.ilabel; // ilabel == olabel; would be the olabel
1377  // in a Lattice.
1378  if (!(label >= kStateLabelOffset &&
1379  label - kStateLabelOffset < clat_num_states)) {
1380  // The label was not a state-label. This should only be possible on the
1381  // first chunk.
1382  KALDI_ASSERT(state_map->empty());
1383  return true; // this is the first chunk.
1384  }
1385  StateId clat_state = label - kStateLabelOffset;
1386  StateId chunk_state = arc.nextstate;
1387  auto p = state_map->insert({chunk_state, clat_state});
1388  StateId dest_clat_state = p.first->second;
1389  // We deleted all its arcs in InitializeRawLatticeChunk
1390  KALDI_ASSERT(clat_.NumArcs(clat_state) == 0);
1391  /*
1392  In almost all cases, dest_clat_state and clat_state will be the same state;
1393  but there may be situations where two arcs with different state-labels
1394  left the start state and entered the same next-state in chunk_clat; and in
1395  these cases, they will be different.
1396 
1397  We didn't address this issue in the paper (or actually realize it could be
1398  a problem). What we do is pick one of the clat_states as the "canonical"
1399  one, and redirect all incoming transitions of the others to enter the
1400  "canonical" one. (Search below for new_in_arc.nextstate =
1401  dest_clat_state).
1402  */
1403  if (clat_state != dest_clat_state) {
1404  // Check that the start state isn't getting merged with any other state.
1405  // If this were possible, we'd need to deal with it specially, but it
1406  // can't be, because to be merged, 2 states must have identical arcs
1407  // leaving them with identical weights, so we'd need to have another state
1408  // on frame 0 identical to the start state, which is not possible if the
1409  // lattice is deterministic and epsilon-free.
1410  KALDI_ASSERT(clat_state != 0 && dest_clat_state != 0);
1411  }
1412 
1413  // in_weight is an extra weight that we'll include on arcs entering this
1414  // state from the previous chunk. We need to cancel out
1415  // `forward_costs[clat_state]`, which was included in the corresponding arc
1416  // in the raw lattice for pruning purposes; and we need to include the
1417  // weight on the arc from the start-state of `chunk_clat` to this state.
1418  CompactLatticeWeight extra_weight_in = arc.weight;
1419  extra_weight_in.SetWeight(
1420  fst::Times(extra_weight_in.Weight(),
1421  LatticeWeight(-forward_costs_[clat_state], 0.0)));
1422 
1423  // We don't allow state 0 to be a redeterminized-state; calling code assures
1424  // this. Search for `determinizer_.GetLattice().Final(0) !=
1425  // CompactLatticeWeight::Zero())` to find that calling code.
1426  KALDI_ASSERT(clat_state != 0);
1427 
1428  // Note: 0 is the start state of clat_. This was checked.
1429  forward_costs_[clat_state] = (clat_state == 0 ? 0 :
1430  std::numeric_limits<BaseFloat>::infinity());
1431  std::vector<std::pair<StateId, int32> > arcs_in;
1432  arcs_in.swap(arcs_in_[clat_state]);
1433  for (auto p: arcs_in) {
1434  // Note: we'll be doing `continue` below if this input arc came from
1435  // another redeterminized-state, because we did DeleteArcs() for them in
1436  // InitializeRawLatticeChunk(). Those arcs will be transferred
1437  // from chunk_clat later on.
1438  CompactLattice::StateId src_state = p.first;
1439  int32 arc_pos = p.second;
1440 
1441  if (arc_pos >= (int32)clat_.NumArcs(src_state))
1442  continue;
1443  fst::MutableArcIterator<CompactLattice> aiter(&clat_, src_state);
1444  aiter.Seek(arc_pos);
1445  if (aiter.Value().nextstate != clat_state)
1446  continue; // This arc record has become invalidated.
1447  CompactLatticeArc new_in_arc(aiter.Value());
1448  // In most cases we will have dest_clat_state == clat_state, so the next
1449  // line won't change the value of .nextstate
1450  new_in_arc.nextstate = dest_clat_state;
1451  new_in_arc.weight = fst::Times(new_in_arc.weight, extra_weight_in);
1452  aiter.SetValue(new_in_arc);
1453 
1454  BaseFloat new_forward_cost = forward_costs_[src_state] +
1455  ConvertToCost(new_in_arc.weight);
1456  if (new_forward_cost < forward_costs_[dest_clat_state])
1457  forward_costs_[dest_clat_state] = new_forward_cost;
1458  arcs_in_[dest_clat_state].push_back(p);
1459  }
1460  }
1461  return false; // this is not the first chunk.
1462 }
1463 
1464 void LatticeIncrementalDeterminizer::TransferArcsToClat(
1465  const CompactLattice &chunk_clat,
1466  bool is_first_chunk,
1467  const std::unordered_map<CompactLattice::StateId, CompactLattice::StateId> &state_map,
1468  const std::unordered_map<CompactLattice::StateId, Label> &chunk_state_to_token,
1469  const std::unordered_map<Label, BaseFloat> &old_final_costs) {
1471  StateId chunk_num_states = chunk_clat.NumStates();
1472 
1473  // Now transfer arcs from chunk_clat to clat_.
1474  for (StateId chunk_state = (is_first_chunk ? 0 : 1);
1475  chunk_state < chunk_num_states; chunk_state++) {
1476  auto iter = state_map.find(chunk_state);
1477  if (iter == state_map.end()) {
1478  KALDI_ASSERT(chunk_state_to_token.count(chunk_state) != 0);
1479  // Don't process token-final states. Anyway they have no arcs leaving
1480  // them.
1481  continue;
1482  }
1483  StateId clat_state = iter->second;
1484 
1485  // We know that this point that `clat_state` is not a token-final state
1486  // (see glossary for definition) as if it were, we would have done
1487  // `continue` above.
1488  //
1489  // Only in the last chunk of the lattice would be there be a final-prob on
1490  // states that are not `token-final states`; these final-probs would
1491  // normally all be Zero() at this point. So in almost all cases the following
1492  // call will do nothing.
1493  clat_.SetFinal(clat_state, chunk_clat.Final(chunk_state));
1494 
1495  // Process arcs leaving this state.
1496  for (fst::ArcIterator<CompactLattice> aiter(chunk_clat, chunk_state);
1497  !aiter.Done(); aiter.Next()) {
1498  CompactLatticeArc arc(aiter.Value());
1499 
1500  auto next_iter = state_map.find(arc.nextstate);
1501  if (next_iter != state_map.end()) {
1502  // The normal case (when the .nextstate has a corresponding
1503  // state in clat_) is very simple. Just copy the arc over.
1504  arc.nextstate = next_iter->second;
1505  KALDI_ASSERT(arc.ilabel < kTokenLabelOffset ||
1506  arc.ilabel > kMaxTokenLabel);
1507  AddArcToClat(clat_state, arc);
1508  } else {
1509  // This is the case when the arc is to a `token-final` state (see
1510  // glossary.)
1511 
1512  // TODO: remove the following slightly excessive assertion?
1513  KALDI_ASSERT(chunk_clat.Final(arc.nextstate) != CompactLatticeWeight::Zero() &&
1514  arc.olabel >= (Label)kTokenLabelOffset &&
1515  arc.olabel < (Label)kMaxTokenLabel &&
1516  chunk_state_to_token.count(arc.nextstate) != 0 &&
1517  old_final_costs.count(arc.olabel) != 0);
1518 
1519  // Include the final-cost of the next state (which should be final)
1520  // in arc.weight.
1521  arc.weight = fst::Times(arc.weight,
1522  chunk_clat.Final(arc.nextstate));
1523 
1524  auto cost_iter = old_final_costs.find(arc.olabel);
1525  KALDI_ASSERT(cost_iter != old_final_costs.end());
1526  BaseFloat old_final_cost = cost_iter->second;
1527 
1528  // `arc` is going to become an element of final_arcs_. These
1529  // contain information about transitions from states in clat_ to
1530  // `token-final` states (i.e. states that have a token-label on the arc
1531  // to them and that are final in the canonical compact lattice).
1532  // We subtract the old_final_cost as it was just a temporary cost
1533  // introduced for pruning purposes.
1534  arc.weight.SetWeight(fst::Times(arc.weight.Weight(),
1535  LatticeWeight{-old_final_cost, 0.0}));
1536  // In a slight abuse of the Arc data structure, the nextstate is set to
1537  // the source state. The label (ilabel == olabel) indicates the
1538  // token it is associated with.
1539  arc.nextstate = clat_state;
1540  final_arcs_.push_back(arc);
1541  }
1542  }
1543  }
1544 
1545 }
1546 
1547 bool LatticeIncrementalDeterminizer::AcceptRawLatticeChunk(
1548  Lattice *raw_fst) {
1551 
1552  // old_final_costs is a map from a `token-label` (see glossary) to the
1553  // associated final-prob in a final-state of `raw_fst`, that is associated
1554  // with that Token. These are Tokens that were active at the end of the
1555  // chunk. The final-probs may arise from beta (backward) costs, introduced
1556  // for pruning purposes, and/or from final-probs in HCLG. Those costs will
1557  // not be included in anything we store permamently in this class; they used
1558  // only to guide pruned determinization, and we will use `old_final_costs`
1559  // later to cancel them out.
1560  std::unordered_map<Label, BaseFloat> old_final_costs;
1561  GetRawLatticeFinalCosts(*raw_fst, &old_final_costs);
1562 
1563  CompactLattice chunk_clat;
1564  bool determinized_till_beam = DeterminizeLatticePhonePrunedWrapper(
1565  trans_model_, raw_fst, config_.lattice_beam, &chunk_clat,
1566  config_.det_opts);
1567 
1568  TopSortCompactLatticeIfNeeded(&chunk_clat);
1569 
1570  std::unordered_map<StateId, Label> chunk_state_to_token;
1571  IdentifyTokenFinalStates(chunk_clat,
1572  &chunk_state_to_token);
1573 
1574  StateId chunk_num_states = chunk_clat.NumStates();
1575  if (chunk_num_states == 0) {
1576  // This will be an error but user-level calling code can detect it from the
1577  // lattice being empty.
1578  KALDI_WARN << "Empty lattice, something went wrong.";
1579  clat_.DeleteStates();
1580  return false;
1581  }
1582 
1583  StateId start_state = chunk_clat.Start(); // would be 0.
1584  KALDI_ASSERT(start_state == 0);
1585 
1586  // Process arcs leaving the start state of chunk_clat. Unless this is the
1587  // first chunk in the lattice, all arcs leaving the start state of chunk_clat
1588  // will have `state labels` on them (identifying redeterminized-states in
1589  // clat_), and will transition to a state in `chunk_clat` that we can identify
1590  // with that redeterminized-state.
1591 
1592  // state_map maps from (non-initial, non-token-final state s in chunk_clat) to
1593  // a state in clat_.
1594  std::unordered_map<StateId, StateId> state_map;
1595 
1596 
1597  bool is_first_chunk = ProcessArcsFromChunkStartState(chunk_clat, &state_map);
1598 
1599  // Remove any existing arcs in clat_ that leave redeterminized-states, and
1600  // make those states non-final. Below, we'll add arcs leaving those states
1601  // (and possibly new final-probs.)
1602  for (StateId clat_state: non_final_redet_states_) {
1603  clat_.DeleteArcs(clat_state);
1604  clat_.SetFinal(clat_state, CompactLatticeWeight::Zero());
1605  }
1606 
1607  // The previous final-arc info is no longer relevant; we'll recreate it below.
1608  final_arcs_.clear();
1609 
1610  // assume chunk_lat.Start() == 0; we asserted it above. Allocate state-ids
1611  // for all remaining states in chunk_clat, except for token-final states.
1612  for (StateId state = (is_first_chunk ? 0 : 1);
1613  state < chunk_num_states; state++) {
1614  if (chunk_state_to_token.count(state) != 0)
1615  continue; // these `token-final` states don't get a state allocated.
1616 
1617  StateId new_clat_state = clat_.NumStates();
1618  if (state_map.insert({state, new_clat_state}).second) {
1619  // If it was inserted then we need to actually allocate that state
1620  StateId s = AddStateToClat();
1621  KALDI_ASSERT(s == new_clat_state);
1622  } // else do nothing; it would have been a redeterminized-state and no
1623  } // allocation is needed since they already exist in clat_. and
1624  // in state_map.
1625 
1626  if (is_first_chunk) {
1627  auto iter = state_map.find(start_state);
1628  KALDI_ASSERT(iter != state_map.end());
1629  CompactLattice::StateId clat_start_state = iter->second;
1630  KALDI_ASSERT(clat_start_state == 0); // topological order.
1631  clat_.SetStart(clat_start_state);
1632  forward_costs_[clat_start_state] = 0.0;
1633  }
1634 
1635  TransferArcsToClat(chunk_clat, is_first_chunk,
1636  state_map, chunk_state_to_token, old_final_costs);
1637 
1638  GetNonFinalRedetStates();
1639 
1640  return determinized_till_beam;
1641 }
1642 
1643 
1644 
1645 void LatticeIncrementalDeterminizer::SetFinalCosts(
1646  const unordered_map<Label, BaseFloat> *token_label2final_cost) {
1647  if (final_arcs_.empty()) {
1648  KALDI_WARN << "SetFinalCosts() called when final_arcs_.empty()... possibly "
1649  "means you are calling this after Finalize()? Not allowed: could "
1650  "indicate a code error. Or possibly decoding failed somehow.";
1651  }
1652 
1653  /*
1654  prefinal states a terminology that does not appear in the paper. What it
1655  means is: the set of states that have an arc with a Token-label as the label
1656  leaving them in the canonical appended lattice.
1657  */
1658  std::unordered_set<int32> &prefinal_states(temp_);
1659  prefinal_states.clear();
1660  for (const auto &arc: final_arcs_) {
1661  /* Caution: `state` is actually the state the arc would
1662  leave from in the canonical appended lattice; we just store
1663  that in the .nextstate field. */
1664  CompactLattice::StateId state = arc.nextstate;
1665  prefinal_states.insert(state);
1666  }
1667 
1668  for (int32 state: prefinal_states)
1669  clat_.SetFinal(state, CompactLatticeWeight::Zero());
1670 
1671 
1672  for (const CompactLatticeArc &arc: final_arcs_) {
1673  Label token_label = arc.ilabel;
1674  /* Note: we store the source state in the .nextstate field. */
1675  CompactLattice::StateId src_state = arc.nextstate;
1676  BaseFloat graph_final_cost;
1677  if (token_label2final_cost == NULL) {
1678  graph_final_cost = 0.0;
1679  } else {
1680  auto iter = token_label2final_cost->find(token_label);
1681  if (iter == token_label2final_cost->end())
1682  continue;
1683  else
1684  graph_final_cost = iter->second;
1685  }
1686  /* It might seem odd to set a final-prob on the src-state of the arc..
1687  the point is that the symbol on the arc is a token-label, which should not
1688  appear in the lattice the user sees, so after that token-label is removed
1689  the arc would just become a final-prob.
1690  */
1691  clat_.SetFinal(src_state,
1692  fst::Plus(clat_.Final(src_state),
1693  fst::Times(arc.weight,
1695  LatticeWeight(graph_final_cost, 0), {}))));
1696  }
1697 }
1698 
1699 
1700 
1701 
1702 // Instantiate the template for the combination of token types and FST types
1703 // that we'll need.
1706  decoder::StdToken>;
1708  decoder::StdToken>;
1710 
1714  decoder::BackpointerToken>;
1716  decoder::BackpointerToken>;
1718  decoder::BackpointerToken>;
1719 
1720 } // end namespace kaldi.
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void AdvanceDecoding(DecodableInterface *decodable, int32 max_num_frames=-1)
This will decode until there are no more frames ready in the decodable object.
fst::ArcTpl< LatticeWeight > LatticeArc
Definition: kaldi-lattice.h:40
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
LatticeIncrementalDecoderTpl(const FST &fst, const TransitionModel &trans_model, const LatticeIncrementalDecoderConfig &config)
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
Definition: decodable-itf.h:82
This is an extention to the "normal" lattice-generating decoder.
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
virtual bool IsLastFrame(int32 frame) const =0
Returns true if this is the last frame.
kaldi::int32 int32
typename HashList< StateId, decoder::BackpointerToken *>::Elem Elem
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
const size_t count
double ConvertToCost(const LatticeWeightTpl< Float > &w)
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44
#define KALDI_ERR
Definition: kaldi-error.h:147
GrammarFst is an FST that is &#39;stitched together&#39; from multiple FSTs, that can recursively incorporate...
Definition: grammar-fst.h:96
#define KALDI_WARN
Definition: kaldi-error.h:150
fst::StdArc::Label Label
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
The normal decoder, lattice-faster-decoder.h, sometimes has an issue when doing real-time application...
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
LatticeWeightTpl< BaseFloat > LatticeWeight
fst::ArcTpl< CompactLatticeWeight > CompactLatticeArc
Definition: kaldi-lattice.h:42
void TopSortCompactLatticeIfNeeded(CompactLattice *clat)
Topologically sort the compact lattice if not already topologically sorted.
virtual BaseFloat LogLikelihood(int32 frame, int32 index)=0
Returns the log likelihood, which will be negated in the decoder.
double Elapsed() const
Returns time in seconds.
Definition: timer.h:74
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
bool DeterminizeLatticePhonePrunedWrapper(const kaldi::TransitionModel &trans_model, MutableFst< kaldi::LatticeArc > *ifst, double beam, MutableFst< kaldi::CompactLatticeArc > *ofst, DeterminizeLatticePhonePrunedOptions opts)
This function is a wrapper of DeterminizeLatticePhonePruned() that works for Lattice type FSTs...
static void AddCompactLatticeArcToLattice(const CompactLatticeArc &clat_arc, LatticeArc::StateId src_state, Lattice *lat)