simple-decoder.h
Go to the documentation of this file.
1 // decoder/simple-decoder.h
2 
3 // Copyright 2009-2013 Microsoft Corporation; Lukas Burget;
4 // Saarland University (author: Arnab Ghoshal);
5 // Johns Hopkins University (author: Daniel Povey)
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #ifndef KALDI_DECODER_SIMPLE_DECODER_H_
23 #define KALDI_DECODER_SIMPLE_DECODER_H_
24 
25 
26 #include "util/stl-utils.h"
27 #include "fst/fstlib.h"
28 #include "lat/kaldi-lattice.h"
29 #include "itf/decodable-itf.h"
30 
31 namespace kaldi {
32 
38  public:
43 
44  SimpleDecoder(const fst::Fst<fst::StdArc> &fst, BaseFloat beam): fst_(fst), beam_(beam) { }
45 
47 
52  bool Decode(DecodableInterface *decodable);
53 
54  bool ReachedFinal() const;
55 
56  // GetBestPath gets the decoding traceback. If "use_final_probs" is true
57  // AND we reached a final state, it limits itself to final states;
58  // otherwise it gets the most likely token not taking into account final-probs.
59  // fst_out will be empty (Start() == kNoStateId) if nothing was available due to
60  // search error.
61  // If Decode() returned true, it is safe to assume GetBestPath will return true.
62  // It returns true if the output lattice was nonempty (i.e. had states in it);
63  // using the return value is deprecated.
64  bool GetBestPath(Lattice *fst_out, bool use_final_probs = true) const;
65 
67 
74 
79  void InitDecoding();
80 
85  void AdvanceDecoding(DecodableInterface *decodable,
86  int32 max_num_frames = -1);
87 
90 
91  private:
92 
93  class Token {
94  public:
95  LatticeArc arc_; // We use LatticeArc so that we can separately
96  // store the acoustic and graph cost, in case
97  // we need to produce lattice-formatted output.
100  double cost_; // accumulated total cost up to this point.
101  Token(const StdArc &arc,
102  BaseFloat acoustic_cost,
103  Token *prev): prev_(prev), ref_count_(1) {
104  arc_.ilabel = arc.ilabel;
105  arc_.olabel = arc.olabel;
106  arc_.weight = LatticeWeight(arc.weight.Value(), acoustic_cost);
107  arc_.nextstate = arc.nextstate;
108  if (prev) {
109  prev->ref_count_++;
110  cost_ = prev->cost_ + (arc.weight.Value() + acoustic_cost);
111  } else {
112  cost_ = arc.weight.Value() + acoustic_cost;
113  }
114  }
115  bool operator < (const Token &other) {
116  return cost_ > other.cost_;
117  }
118 
119  static void TokenDelete(Token *tok) {
120  while (--tok->ref_count_ == 0) {
121  Token *prev = tok->prev_;
122  delete tok;
123  if (prev == NULL) return;
124  else tok = prev;
125  }
126 #ifdef KALDI_PARANOID
127  KALDI_ASSERT(tok->ref_count_ > 0);
128 #endif
129  }
130  };
131 
132  // ProcessEmitting decodes the frame num_frames_decoded_ of the
133  // decodable object, then increments num_frames_decoded_.
134  void ProcessEmitting(DecodableInterface *decodable);
135 
136  void ProcessNonemitting();
137 
138  unordered_map<StateId, Token*> cur_toks_;
139  unordered_map<StateId, Token*> prev_toks_;
140  const fst::Fst<fst::StdArc> &fst_;
142  // Keep track of the number of frames decoded in the current file.
144 
145  static void ClearToks(unordered_map<StateId, Token*> &toks);
146 
147  static void PruneToks(BaseFloat beam, unordered_map<StateId, Token*> *toks);
148 
150 };
151 
152 
153 } // end namespace kaldi.
154 
155 
156 #endif
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
fst::ArcTpl< LatticeWeight > LatticeArc
Definition: kaldi-lattice.h:40
void ProcessEmitting(DecodableInterface *decodable)
StdArc::Label Label
unordered_map< StateId, Token * > cur_toks_
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
Definition: decodable-itf.h:82
StdArc::StateId StateId
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
unordered_map< StateId, Token * > prev_toks_
StdArc::Weight StdWeight
fst::StdArc StdArc
static void ClearToks(unordered_map< StateId, Token *> &toks)
bool ReachedFinal() const
kaldi::int32 int32
KALDI_DISALLOW_COPY_AND_ASSIGN(SimpleDecoder)
int32 NumFramesDecoded() const
Returns the number of frames already decoded.
bool operator<(const Token &other)
BaseFloat FinalRelativeCost() const
*** The next functions are from the "new interface". ***
fst::LatticeWeightTpl< BaseFloat > LatticeWeight
Definition: kaldi-lattice.h:32
const fst::Fst< fst::StdArc > & fst_
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44
static void PruneToks(BaseFloat beam, unordered_map< StateId, Token *> *toks)
fst::StdArc::Label Label
fst::StdArc::Weight Weight
SimpleDecoder(const fst::Fst< fst::StdArc > &fst, BaseFloat beam)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
bool Decode(DecodableInterface *decodable)
Decode this utterance.
static void TokenDelete(Token *tok)
Simplest possible decoder, included largely for didactic purposes and as a means to debug more highly...
Token(const StdArc &arc, BaseFloat acoustic_cost, Token *prev)
void AdvanceDecoding(DecodableInterface *decodable, int32 max_num_frames=-1)
This will decode until there are no more frames ready in the decodable object, but if max_num_frames ...
bool GetBestPath(Lattice *fst_out, bool use_final_probs=true) const
void InitDecoding()
InitDecoding initializes the decoding, and should only be used if you intend to call AdvanceDecoding(...