28 fst_(fst), config_(opts), num_frames_decoded_(-1) {
41 Arc dummy_arc(0, 0, Weight::One(), start_state);
54 int32 max_num_frames) {
56 "You must call InitDecoding() before AdvanceDecoding()");
63 int32 target_frames_decoded = num_frames_ready;
64 if (max_num_frames >= 0)
65 target_frames_decoded = std::min(target_frames_decoded,
77 if (e->val->cost_ != std::numeric_limits<double>::infinity() &&
78 fst_.Final(e->key) != Weight::Zero())
85 bool use_final_probs) {
92 fst_out->DeleteStates();
93 Token *best_tok = NULL;
97 if (best_tok == NULL || *best_tok < *(e->val) )
100 double infinity = std::numeric_limits<double>::infinity(),
101 best_cost = infinity;
103 double this_cost = e->val->
cost_ +
fst_.Final(e->key).Value();
104 if (this_cost < best_cost && this_cost != infinity) {
105 best_cost = this_cost;
110 if (best_tok == NULL)
return false;
112 std::vector<LatticeArc> arcs_reverse;
114 for (
Token *tok = best_tok; tok != NULL; tok = tok->
prev_) {
116 (tok->prev_ ? tok->prev_->cost_ : 0.0),
117 graph_cost = tok->arc_.weight.Value(),
118 ac_cost = tot_cost - graph_cost;
122 tok->arc_.nextstate);
123 arcs_reverse.push_back(l_arc);
126 arcs_reverse.pop_back();
128 StateId cur_state = fst_out->AddState();
129 fst_out->SetStart(cur_state);
130 for (ssize_t
i = static_cast<ssize_t>(arcs_reverse.size())-1;
i >= 0;
i--) {
132 arc.nextstate = fst_out->AddState();
133 fst_out->AddArc(cur_state, arc);
134 cur_state = arc.nextstate;
136 if (is_final && use_final_probs) {
138 fst_out->SetFinal(cur_state,
LatticeWeight(final_weight.Value(), 0.0));
150 double best_cost = std::numeric_limits<double>::infinity();
154 for (
Elem *e = list_head; e != NULL; e = e->
tail, count++) {
155 double w = e->val->cost_;
158 if (best_elem) *best_elem = e;
161 if (tok_count != NULL) *tok_count =
count;
162 if (adaptive_beam != NULL) *adaptive_beam =
config_.
beam;
166 for (
Elem *e = list_head; e != NULL; e = e->
tail, count++) {
167 double w = e->val->cost_;
171 if (best_elem) *best_elem = e;
174 if (tok_count != NULL) *tok_count =
count;
176 min_active_cutoff = std::numeric_limits<double>::infinity(),
177 max_active_cutoff = std::numeric_limits<double>::infinity();
185 if (max_active_cutoff < beam_cutoff) {
188 return max_active_cutoff;
201 if (min_active_cutoff > beam_cutoff) {
204 return min_active_cutoff;
213 size_t new_sz =
static_cast<size_t>(
static_cast<BaseFloat>(num_toks)
226 Elem *best_elem = NULL;
227 double weight_cutoff =
GetCutoff(last_toks, &tok_cnt,
228 &adaptive_beam, &best_elem);
229 KALDI_VLOG(3) << tok_cnt <<
" tokens active.";
235 double next_weight_cutoff = std::numeric_limits<double>::infinity();
242 for (fst::ArcIterator<fst::Fst<Arc> > aiter(
fst_, state);
245 const Arc &arc = aiter.Value();
246 if (arc.ilabel != 0) {
248 double new_weight = arc.weight.Value() + tok->
cost_ + ac_cost;
249 if (new_weight + adaptive_beam < next_weight_cutoff)
250 next_weight_cutoff = new_weight + adaptive_beam;
260 for (
Elem *e = last_toks, *e_tail; e != NULL; e = e_tail) {
265 if (tok->
cost_ < weight_cutoff) {
268 for (fst::ArcIterator<fst::Fst<Arc> > aiter(
fst_, state);
271 Arc arc = aiter.Value();
272 if (arc.ilabel != 0) {
274 double new_weight = arc.weight.Value() + tok->
cost_ + ac_cost;
275 if (new_weight < next_weight_cutoff) {
276 Token *new_tok =
new Token(arc, ac_cost, tok);
278 if (new_weight + adaptive_beam < next_weight_cutoff)
279 next_weight_cutoff = new_weight + adaptive_beam;
280 if (e_found->
val != new_tok) {
281 if (*(e_found->
val) < *new_tok) {
283 e_found->
val = new_tok;
297 return next_weight_cutoff;
312 if (tok->
cost_ > cutoff) {
316 for (fst::ArcIterator<fst::Fst<Arc> > aiter(
fst_, state);
319 const Arc &arc = aiter.Value();
320 if (arc.ilabel == 0) {
322 if (new_tok->
cost_ > cutoff) {
326 if (e_found->
val == new_tok) {
327 queue_.push_back(e_found);
329 if (*(e_found->
val) < *new_tok) {
331 e_found->
val = new_tok;
332 queue_.push_back(e_found);
344 for (
Elem *e = list, *e_tail; e != NULL; e = e_tail) {
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
fst::ArcTpl< LatticeWeight > LatticeArc
Elem * Insert(I key, T val)
Insert inserts a new element into the hashtable/stored list.
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
void ClearToks(Elem *list)
FasterDecoderOptions config_
static const LatticeWeightTpl One()
void RemoveEpsLocal(MutableFst< Arc > *fst)
RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST, using an algorithm that is g...
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
void InitDecoding()
As a new alternative to Decode(), you can call InitDecoding and then (possibly multiple times) Advanc...
void AdvanceDecoding(DecodableInterface *decodable, int32 max_num_frames=-1)
This will decode until there are no more frames ready in the decodable object, but if max_num_frames ...
void Decode(DecodableInterface *decodable)
void PossiblyResizeHash(size_t num_toks)
fst::LatticeWeightTpl< BaseFloat > LatticeWeight
bool GetBestPath(fst::MutableFst< LatticeArc > *fst_out, bool use_final_probs=true)
GetBestPath gets the decoding traceback.
static void TokenDelete(Token *tok)
const fst::Fst< fst::StdArc > & fst_
double GetCutoff(Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam, Elem **best_elem)
Gets the weight cutoff. Also counts the active tokens.
void SetSize(size_t sz)
SetSize tells the object how many hash buckets to allocate (should typically be at least twice the nu...
void ProcessNonemitting(double cutoff)
const Elem * GetList() const
Gives the head of the current list to the user.
Elem * Clear()
Clears the hash and gives the head of the current list to the user; ownership is transferred to the u...
HashList< StateId, Token * > toks_
#define KALDI_ASSERT(cond)
int32 num_frames_decoded_
std::vector< BaseFloat > tmp_array_
virtual BaseFloat LogLikelihood(int32 frame, int32 index)=0
Returns the log likelihood, which will be negated in the decoder.
FasterDecoder(const fst::Fst< fst::StdArc > &fst, const FasterDecoderOptions &config)
double ProcessEmitting(DecodableInterface *decodable)
void Delete(Elem *e)
Think of this like delete().
std::vector< const Elem *> queue_
bool ReachedFinal() const
Returns true if a final state was active on the last frame.
size_t Size()
Returns current number of hash buckets.