faster-decoder.h
Go to the documentation of this file.
1 // decoder/faster-decoder.h
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 // 2013 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #ifndef KALDI_DECODER_FASTER_DECODER_H_
22 #define KALDI_DECODER_FASTER_DECODER_H_
23 
24 #include "util/stl-utils.h"
25 #include "itf/options-itf.h"
26 #include "util/hash-list.h"
27 #include "fst/fstlib.h"
28 #include "itf/decodable-itf.h"
29 #include "lat/kaldi-lattice.h" // for CompactLatticeArc
30 
31 namespace kaldi {
32 
39  FasterDecoderOptions(): beam(16.0),
40  max_active(std::numeric_limits<int32>::max()),
41  min_active(20), // This decoder mostly used for
42  // alignment, use small default.
43  beam_delta(0.5),
44  hash_ratio(2.0) { }
45  void Register(OptionsItf *opts, bool full) {
46  opts->Register("beam", &beam, "Decoding beam. Larger->slower, more accurate.");
49  opts->Register("max-active", &max_active, "Decoder max active states. Larger->slower; "
50  "more accurate");
51  opts->Register("min-active", &min_active,
52  "Decoder min active states (don't prune if #active less than this).");
53  if (full) {
54  opts->Register("beam-delta", &beam_delta,
55  "Increment used in decoder [obscure setting]");
56  opts->Register("hash-ratio", &hash_ratio,
57  "Setting used in decoder to control hash behavior");
58  }
59  }
60 };
61 
63  public:
64  typedef fst::StdArc Arc;
65  typedef Arc::Label Label;
68 
69  FasterDecoder(const fst::Fst<fst::StdArc> &fst,
70  const FasterDecoderOptions &config);
71 
72  void SetOptions(const FasterDecoderOptions &config) { config_ = config; }
73 
74  ~FasterDecoder() { ClearToks(toks_.Clear()); }
75 
76  void Decode(DecodableInterface *decodable);
77 
79  bool ReachedFinal() const;
80 
87  bool GetBestPath(fst::MutableFst<LatticeArc> *fst_out,
88  bool use_final_probs = true);
89 
92  void InitDecoding();
93 
94 
98  void AdvanceDecoding(DecodableInterface *decodable,
99  int32 max_num_frames = -1);
100 
102  int32 NumFramesDecoded() const { return num_frames_decoded_; }
103 
104  protected:
105 
106  class Token {
107  public:
108  Arc arc_; // contains only the graph part of the cost;
109  // we can work out the acoustic part from difference between
110  // "cost_" and prev->cost_.
113  // if you are looking for weight_ here, it was removed and now we just have
114  // cost_, which corresponds to ConvertToCost(weight_).
115  double cost_;
116  inline Token(const Arc &arc, BaseFloat ac_cost, Token *prev):
117  arc_(arc), prev_(prev), ref_count_(1) {
118  if (prev) {
119  prev->ref_count_++;
120  cost_ = prev->cost_ + arc.weight.Value() + ac_cost;
121  } else {
122  cost_ = arc.weight.Value() + ac_cost;
123  }
124  }
125  inline Token(const Arc &arc, Token *prev):
126  arc_(arc), prev_(prev), ref_count_(1) {
127  if (prev) {
128  prev->ref_count_++;
129  cost_ = prev->cost_ + arc.weight.Value();
130  } else {
131  cost_ = arc.weight.Value();
132  }
133  }
134  inline bool operator < (const Token &other) {
135  return cost_ > other.cost_;
136  }
137 
138  inline static void TokenDelete(Token *tok) {
139  while (--tok->ref_count_ == 0) {
140  Token *prev = tok->prev_;
141  delete tok;
142  if (prev == NULL) return;
143  else tok = prev;
144  }
145 #ifdef KALDI_PARANOID
146  KALDI_ASSERT(tok->ref_count_ > 0);
147 #endif
148  }
149  };
151 
152 
154  double GetCutoff(Elem *list_head, size_t *tok_count,
155  BaseFloat *adaptive_beam, Elem **best_elem);
156 
157  void PossiblyResizeHash(size_t num_toks);
158 
159  // ProcessEmitting returns the likelihood cutoff used.
160  // It decodes the frame num_frames_decoded_ of the decodable object
161  // and then increments num_frames_decoded_
162  double ProcessEmitting(DecodableInterface *decodable);
163 
164  // TODO: first time we go through this, could avoid using the queue.
165  void ProcessNonemitting(double cutoff);
166 
167  // HashList defined in ../util/hash-list.h. It actually allows us to maintain
168  // more than one list (e.g. for current and previous frames), but only one of
169  // them at a time can be indexed by StateId.
171  const fst::Fst<fst::StdArc> &fst_;
173  std::vector<const Elem* > queue_; // temp variable used in ProcessNonemitting,
174  std::vector<BaseFloat> tmp_array_; // used in GetCutoff.
175  // make it class member to avoid internal new/delete.
176 
177  // Keep track of the number of frames decoded in the current file.
179 
180  // It might seem unclear why we call ClearToks(toks_.Clear()).
181  // There are two separate cleanup tasks we need to do at when we start a new file.
182  // one is to delete the Token objects in the list; the other is to delete
183  // the Elem objects. toks_.Clear() just clears them from the hash and gives ownership
184  // to the caller, who then has to call toks_.Delete(e) for each one. It was designed
185  // this way for convenience in propagating tokens from one frame to the next.
186  void ClearToks(Elem *list);
187 
189 };
190 
191 
192 } // end namespace kaldi.
193 
194 
195 #endif
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Token(const Arc &arc, BaseFloat ac_cost, Token *prev)
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
Definition: decodable-itf.h:82
void SetOptions(const FasterDecoderOptions &config)
FasterDecoderOptions config_
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
fst::StdArc StdArc
Token(const Arc &arc, Token *prev)
kaldi::int32 int32
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
Definition: kaldi-utils.h:121
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
static void TokenDelete(Token *tok)
const fst::Fst< fst::StdArc > & fst_
void Register(OptionsItf *opts, bool full)
HashList< StateId, Token * > toks_
fst::StdArc::Label Label
fst::StdArc::Weight Weight
int32 NumFramesDecoded() const
Returns the number of frames already decoded.
bool operator<(const Int32Pair &a, const Int32Pair &b)
Definition: cu-matrixdim.h:83
HashList< StateId, Token * >::Elem Elem
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
std::vector< BaseFloat > tmp_array_
Arc::StateId StateId
std::vector< const Elem *> queue_