lattice-simple-decoder.h
Go to the documentation of this file.
1 // decoder/lattice-simple-decoder.h
2 
3 // Copyright 2009-2012 Microsoft Corporation
4 // 2012-2014 Johns Hopkins University (Author: Daniel Povey)
5 // 2014 Guoguo Chen
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #ifndef KALDI_DECODER_LATTICE_SIMPLE_DECODER_H_
23 #define KALDI_DECODER_LATTICE_SIMPLE_DECODER_H_
24 
25 
26 #include "util/stl-utils.h"
27 #include "fst/fstlib.h"
28 #include "itf/decodable-itf.h"
29 #include "fstext/fstext-lib.h"
31 #include "lat/kaldi-lattice.h"
32 
33 #include <algorithm>
34 
35 namespace kaldi {
36 
41  bool determinize_lattice; // not inspected by this class... used in
42  // command-line program.
45  BaseFloat prune_scale; // Note: we don't make this configurable on the command line,
46  // it's not a very important parameter. It affects the
47  // algorithm that prunes the tokens as we go.
49 
51  lattice_beam(10.0),
52  prune_interval(25),
53  determinize_lattice(true),
54  beam_ratio(0.9),
55  prune_scale(0.1) { }
56  void Register(OptionsItf *opts) {
57  det_opts.Register(opts);
58  opts->Register("beam", &beam, "Decoding beam.");
59  opts->Register("lattice-beam", &lattice_beam, "Lattice generation beam");
60  opts->Register("prune-interval", &prune_interval, "Interval (in frames) at "
61  "which to prune tokens");
62  opts->Register("determinize-lattice", &determinize_lattice, "If true, "
63  "determinize the lattice (in a special sense, keeping only "
64  "best pdf-sequence for each word-sequence).");
65  }
66  void Check() const {
67  KALDI_ASSERT(beam > 0.0 && lattice_beam > 0.0 && prune_interval > 0);
68  }
69 };
70 
71 
77  public:
78  typedef fst::StdArc Arc;
79  typedef Arc::Label Label;
82  // instantiate this class once for each thing you have to decode.
83  LatticeSimpleDecoder(const fst::Fst<fst::StdArc> &fst,
84  const LatticeSimpleDecoderConfig &config):
85  fst_(fst), config_(config), num_toks_(0) { config.Check(); }
86 
87  ~LatticeSimpleDecoder() { ClearActiveTokens(); }
88 
90  return config_;
91  }
92 
93  // Returns true if any kind of traceback is available (not necessarily from
94  // a final state).
95  bool Decode(DecodableInterface *decodable);
96 
97 
100  bool ReachedFinal() const {
101  return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
102  }
103 
108  void InitDecoding();
109 
122  void FinalizeDecoding();
123 
132  BaseFloat FinalRelativeCost() const;
133 
134  // Outputs an FST corresponding to the single best path
135  // through the lattice. Returns true if result is nonempty
136  // (using the return status is deprecated, it will become void).
137  // If "use_final_probs" is true AND we reached the final-state
138  // of the graph then it will include those as final-probs, else
139  // it will treat all final-probs as one.
140  bool GetBestPath(Lattice *lat,
141  bool use_final_probs = true) const;
142 
143  // Outputs an FST corresponding to the raw, state-level
144  // tracebacks. Returns true if result is nonempty
145  // (using the return status is deprecated, it will become void).
146  // If "use_final_probs" is true AND we reached the final-state
147  // of the graph then it will include those as final-probs, else
148  // it will treat all final-probs as one.
149  bool GetRawLattice(Lattice *lat,
150  bool use_final_probs = true) const;
151 
152  // This function is now deprecated, since now we do determinization from
153  // outside the LatticeTrackingDecoder class.
154  // Outputs an FST corresponding to the lattice-determinized
155  // lattice (one path per word sequence). [will become deprecated,
156  // users should determinize themselves.]
157  bool GetLattice(CompactLattice *clat,
158  bool use_final_probs = true) const;
159 
160  inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
161  private:
162  struct Token;
163  // ForwardLinks are the links from a token to a token on the next frame.
164  // or sometimes on the current frame (for input-epsilon links).
165  struct ForwardLink {
166  Token *next_tok; // the next token [or NULL if represents final-state]
167  Label ilabel; // ilabel on link.
168  Label olabel; // olabel on link.
169  BaseFloat graph_cost; // graph cost of traversing link (contains LM, etc.)
170  BaseFloat acoustic_cost; // acoustic cost (pre-scaled) of traversing link
171  ForwardLink *next; // next in singly-linked list of forward links from a
172  // token.
173  ForwardLink(Token *next_tok, Label ilabel, Label olabel,
174  BaseFloat graph_cost, BaseFloat acoustic_cost,
175  ForwardLink *next):
176  next_tok(next_tok), ilabel(ilabel), olabel(olabel),
177  graph_cost(graph_cost), acoustic_cost(acoustic_cost),
178  next(next) { }
179  };
180 
181  // Token is what's resident in a particular state at a particular time.
182  // In this decoder a Token actually contains *forward* links.
183  // When first created, a Token just has the (total) cost. We add forward
184  // links from it when we process the next frame.
185  struct Token {
186  BaseFloat tot_cost; // would equal weight.Value()... cost up to this point.
187  BaseFloat extra_cost; // >= 0. After calling PruneForwardLinks, this equals
188  // the minimum difference between the cost of the best path this is on,
189  // and the cost of the absolute best path, under the assumption
190  // that any of the currently active states at the decoding front may
191  // eventually succeed (e.g. if you were to take the currently active states
192  // one by one and compute this difference, and then take the minimum).
193 
194  ForwardLink *links; // Head of singly linked list of ForwardLinks
195 
196  Token *next; // Next in list of tokens for this frame.
197 
198  Token(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLink *links,
199  Token *next): tot_cost(tot_cost), extra_cost(extra_cost), links(links),
200  next(next) { }
201  Token() {}
203  ForwardLink *l = links, *m;
204  while (l != NULL) {
205  m = l->next;
206  delete l;
207  l = m;
208  }
209  links = NULL;
210  }
211  };
212 
213  // head and tail of per-frame list of Tokens (list is in topological order),
214  // and something saying whether we ever pruned it using PruneForwardLinks.
215  struct TokenList {
219  TokenList(): toks(NULL), must_prune_forward_links(true),
220  must_prune_tokens(true) { }
221  };
222 
223 
224  // FindOrAddToken either locates a token in cur_toks_, or if necessary inserts a new,
225  // empty token (i.e. with no forward links) for the current frame. [note: it's
226  // inserted if necessary into cur_toks_ and also into the singly linked list
227  // of tokens active on this frame (whose head is at active_toks_[frame]).
228  //
229  // Returns the Token pointer. Sets "changed" (if non-NULL) to true
230  // if the token was newly created or the cost changed.
231  inline Token *FindOrAddToken(StateId state, int32 frame_plus_one,
232  BaseFloat tot_cost, bool emitting, bool *changed);
233 
234  // delta is the amount by which the extra_costs must
235  // change before it sets "extra_costs_changed" to true. If delta is larger,
236  // we'll tend to go back less far toward the beginning of the file.
237  void PruneForwardLinks(int32 frame, bool *extra_costs_changed,
238  bool *links_pruned,
239  BaseFloat delta);
240 
241  // PruneForwardLinksFinal is a version of PruneForwardLinks that we call
242  // on the final frame. If there are final tokens active, it uses the final-probs
243  // for pruning, otherwise it treats all tokens as final.
244  void PruneForwardLinksFinal();
245 
246  // Prune away any tokens on this frame that have no forward links. [we don't do
247  // this in PruneForwardLinks because it would give us a problem with dangling
248  // pointers].
249  void PruneTokensForFrame(int32 frame);
250 
251  // Go backwards through still-alive tokens, pruning them if the
252  // forward+backward cost is more than lat_beam away from the best path. It's
253  // possible to prove that this is "correct" in the sense that we won't lose
254  // anything outside of lat_beam, regardless of what happens in the future.
255  // delta controls when it considers a cost to have changed enough to continue
256  // going backward and propagating the change. larger delta -> will recurse
257  // less far.
258  void PruneActiveTokens(BaseFloat delta);
259 
260  void ProcessEmitting(DecodableInterface *decodable);
261 
262  void ProcessNonemitting();
263 
264  void ClearActiveTokens(); // a cleanup routine, at utt end/begin
265 
266  // This function computes the final-costs for tokens active on the final
267  // frame. It outputs to final-costs, if non-NULL, a map from the Token*
268  // pointer to the final-prob of the corresponding state, or zero for all states if
269  // none were final. It outputs to final_relative_cost, if non-NULL, the
270  // difference between the best forward-cost including the final-prob cost, and
271  // the best forward-cost without including the final-prob cost (this will
272  // usually be positive), or infinity if there were no final-probs. It outputs
273  // to final_best_cost, if non-NULL, the lowest for any token t active on the
274  // final frame, of t + final-cost[t], where final-cost[t] is the final-cost
275  // in the graph of the state corresponding to token t, or zero if there
276  // were no final-probs active on the final frame.
277  // You cannot call this after FinalizeDecoding() has been called; in that
278  // case you should get the answer from class-member variables.
279  void ComputeFinalCosts(unordered_map<Token*, BaseFloat> *final_costs,
280  BaseFloat *final_relative_cost,
281  BaseFloat *final_best_cost) const;
282 
283 
284  // PruneCurrentTokens deletes the tokens from the "toks" map, but not
285  // from the active_toks_ list, which could cause dangling forward pointers
286  // (will delete it during regular pruning operation).
287  void PruneCurrentTokens(BaseFloat beam, unordered_map<StateId, Token*> *toks);
288 
289  unordered_map<StateId, Token*> cur_toks_;
290  unordered_map<StateId, Token*> prev_toks_;
291  std::vector<TokenList> active_toks_; // Lists of tokens, indexed by
292  // frame_plus_one
293  const fst::Fst<fst::StdArc> &fst_;
295  int32 num_toks_; // current total #toks allocated...
296  bool warned_;
297 
298 
309  unordered_map<Token*, BaseFloat> final_costs_;
312 };
313 
314 
315 } // end namespace kaldi.
316 
317 
318 #endif
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
std::vector< TokenList > active_toks_
LatticeSimpleDecoderConfig config_
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
Definition: decodable-itf.h:82
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
fst::StdArc StdArc
LatticeSimpleDecoder(const fst::Fst< fst::StdArc > &fst, const LatticeSimpleDecoderConfig &config)
kaldi::int32 int32
bool decoding_finalized_
decoding_finalized_ is true if someone called FinalizeDecoding().
const LatticeSimpleDecoderConfig & GetOptions() const
unordered_map< Token *, BaseFloat > final_costs_
For the meaning of the next 3 variables, see the comment for decoding_finalized_ above., and ComputeFinalCosts().
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
unordered_map< StateId, Token * > prev_toks_
const fst::Fst< fst::StdArc > & fst_
Token(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLink *links, Token *next)
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44
fst::StdArc::Label Label
Simplest possible decoder, included largely for didactic purposes and as a means to debug more highly...
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
fst::StdArc::Weight Weight
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
bool ReachedFinal() const
says whether a final-state was active on the last frame.
unordered_map< StateId, Token * > cur_toks_
fst::DeterminizeLatticePhonePrunedOptions det_opts