doc/faster-decoder_8cc_source.html

 // decoder/faster-decoder.cc

 // Copyright 2009-2011 Microsoft Corporation
 //           2012-2013 Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include "decoder/faster-decoder.h"

 namespace kaldi {


 FasterDecoder::FasterDecoder(const fst::Fst<fst::StdArc> &fst,
                              const FasterDecoderOptions &opts):
     fst_(fst), config_(opts), num_frames_decoded_(-1) {
   KALDI_ASSERT(config_.hash_ratio >= 1.0);  // less doesn't make much sense.
   KALDI_ASSERT(config_.max_active > 1);
   KALDI_ASSERT(config_.min_active >= 0 && config_.min_active < config_.max_active);
   toks_.SetSize(1000);  // just so on the first frame we do something reasonable.
 }


 void FasterDecoder::InitDecoding() {
   // clean up from last time:
   ClearToks(toks_.Clear());
   StateId start_state = fst_.Start();
   KALDI_ASSERT(start_state != fst::kNoStateId);
   Arc dummy_arc(0, 0, Weight::One(), start_state);
   toks_.Insert(start_state, new Token(dummy_arc, NULL));
   ProcessNonemitting(std::numeric_limits<float>::max());
   num_frames_decoded_ = 0;
 }


 void FasterDecoder::Decode(DecodableInterface *decodable) {
   InitDecoding();
   AdvanceDecoding(decodable);
 }

 void FasterDecoder::AdvanceDecoding(DecodableInterface *decodable,
                                       int32 max_num_frames) {
   KALDI_ASSERT(num_frames_decoded_ >= 0 &&
                "You must call InitDecoding() before AdvanceDecoding()");
   int32 num_frames_ready = decodable->NumFramesReady();
   // num_frames_ready must be >= num_frames_decoded, or else
   // the number of frames ready must have decreased (which doesn't
   // make sense) or the decodable object changed between calls
   // (which isn't allowed).
   KALDI_ASSERT(num_frames_ready >= num_frames_decoded_);
   int32 target_frames_decoded = num_frames_ready;
   if (max_num_frames >= 0)
     target_frames_decoded = std::min(target_frames_decoded,
                                      num_frames_decoded_ + max_num_frames);
   while (num_frames_decoded_ < target_frames_decoded) {
     // note: ProcessEmitting() increments num_frames_decoded_
     double weight_cutoff = ProcessEmitting(decodable);
     ProcessNonemitting(weight_cutoff);
   }
 }


 bool FasterDecoder::ReachedFinal() const {
   for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
     if (e->val->cost_ != std::numeric_limits<double>::infinity() &&
         fst_.Final(e->key) != Weight::Zero())
       return true;
   }
   return false;
 }

 bool FasterDecoder::GetBestPath(fst::MutableFst<LatticeArc> *fst_out,
                                 bool use_final_probs) {
   // GetBestPath gets the decoding output.  If "use_final_probs" is true
   // AND we reached a final state, it limits itself to final states;
   // otherwise it gets the most likely token not taking into
   // account final-probs.  fst_out will be empty (Start() == kNoStateId) if
   // nothing was available.  It returns true if it got output (thus, fst_out
   // will be nonempty).
   fst_out->DeleteStates();
   Token *best_tok = NULL;
   bool is_final = ReachedFinal();
   if (!is_final) {
     for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail)
       if (best_tok == NULL || *best_tok < *(e->val) )
         best_tok = e->val;
   } else {
     double infinity =  std::numeric_limits<double>::infinity(),
         best_cost = infinity;
     for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
       double this_cost = e->val->cost_ + fst_.Final(e->key).Value();
       if (this_cost < best_cost && this_cost != infinity) {
         best_cost = this_cost;
         best_tok = e->val;
       }
     }
   }
   if (best_tok == NULL) return false;  // No output.

   std::vector<LatticeArc> arcs_reverse;  // arcs in reverse order.

   for (Token *tok = best_tok; tok != NULL; tok = tok->prev_) {
     BaseFloat tot_cost = tok->cost_ -
         (tok->prev_ ? tok->prev_->cost_ : 0.0),
         graph_cost = tok->arc_.weight.Value(),
         ac_cost = tot_cost - graph_cost;
     LatticeArc l_arc(tok->arc_.ilabel,
                      tok->arc_.olabel,
                      LatticeWeight(graph_cost, ac_cost),
                      tok->arc_.nextstate);
     arcs_reverse.push_back(l_arc);
   }
   KALDI_ASSERT(arcs_reverse.back().nextstate == fst_.Start());
   arcs_reverse.pop_back();  // that was a "fake" token... gives no info.

   StateId cur_state = fst_out->AddState();
   fst_out->SetStart(cur_state);
   for (ssize_t i = static_cast<ssize_t>(arcs_reverse.size())-1; i >= 0; i--) {
     LatticeArc arc = arcs_reverse[i];
     arc.nextstate = fst_out->AddState();
     fst_out->AddArc(cur_state, arc);
     cur_state = arc.nextstate;
   }
   if (is_final && use_final_probs) {
     Weight final_weight = fst_.Final(best_tok->arc_.nextstate);
     fst_out->SetFinal(cur_state, LatticeWeight(final_weight.Value(), 0.0));
   } else {
     fst_out->SetFinal(cur_state, LatticeWeight::One());
   }
   RemoveEpsLocal(fst_out);
   return true;
 }


 // Gets the weight cutoff.  Also counts the active tokens.
 double FasterDecoder::GetCutoff(Elem *list_head, size_t *tok_count,
                                 BaseFloat *adaptive_beam, Elem **best_elem) {
   double best_cost = std::numeric_limits<double>::infinity();
   size_t count = 0;
   if (config_.max_active == std::numeric_limits<int32>::max() &&
       config_.min_active == 0) {
     for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
       double w = e->val->cost_;
       if (w < best_cost) {
         best_cost = w;
         if (best_elem) *best_elem = e;
       }
     }
     if (tok_count != NULL) *tok_count = count;
     if (adaptive_beam != NULL) *adaptive_beam = config_.beam;
     return best_cost + config_.beam;
   } else {
     tmp_array_.clear();
     for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
       double w = e->val->cost_;
       tmp_array_.push_back(w);
       if (w < best_cost) {
         best_cost = w;
         if (best_elem) *best_elem = e;
       }
     }
     if (tok_count != NULL) *tok_count = count;
     double beam_cutoff = best_cost + config_.beam,
         min_active_cutoff = std::numeric_limits<double>::infinity(),
         max_active_cutoff = std::numeric_limits<double>::infinity();

     if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
       std::nth_element(tmp_array_.begin(),
                        tmp_array_.begin() + config_.max_active,
                        tmp_array_.end());
       max_active_cutoff = tmp_array_[config_.max_active];
     }
     if (max_active_cutoff < beam_cutoff) { // max_active is tighter than beam.
       if (adaptive_beam)
         *adaptive_beam = max_active_cutoff - best_cost + config_.beam_delta;
       return max_active_cutoff;
     }
     if (tmp_array_.size() > static_cast<size_t>(config_.min_active)) {
       if (config_.min_active == 0) min_active_cutoff = best_cost;
       else {
         std::nth_element(tmp_array_.begin(),
                          tmp_array_.begin() + config_.min_active,
                          tmp_array_.size() > static_cast<size_t>(config_.max_active) ?
                          tmp_array_.begin() + config_.max_active :
                          tmp_array_.end());
         min_active_cutoff = tmp_array_[config_.min_active];
       }
     }
     if (min_active_cutoff > beam_cutoff) { // min_active is looser than beam.
       if (adaptive_beam)
         *adaptive_beam = min_active_cutoff - best_cost + config_.beam_delta;
       return min_active_cutoff;
     } else {
       *adaptive_beam = config_.beam;
       return beam_cutoff;
     }
   }
 }

 void FasterDecoder::PossiblyResizeHash(size_t num_toks) {
   size_t new_sz = static_cast<size_t>(static_cast<BaseFloat>(num_toks)
                                       * config_.hash_ratio);
   if (new_sz > toks_.Size()) {
     toks_.SetSize(new_sz);
   }
 }

 // ProcessEmitting returns the likelihood cutoff used.
 double FasterDecoder::ProcessEmitting(DecodableInterface *decodable) {
   int32 frame = num_frames_decoded_;
   Elem *last_toks = toks_.Clear();
   size_t tok_cnt;
   BaseFloat adaptive_beam;
   Elem *best_elem = NULL;
   double weight_cutoff = GetCutoff(last_toks, &tok_cnt,
                                    &adaptive_beam, &best_elem);
   KALDI_VLOG(3) << tok_cnt << " tokens active.";
   PossiblyResizeHash(tok_cnt);  // This makes sure the hash is always big enough.

   // This is the cutoff we use after adding in the log-likes (i.e.
   // for the next frame).  This is a bound on the cutoff we will use
   // on the next frame.
   double next_weight_cutoff = std::numeric_limits<double>::infinity();

   // First process the best token to get a hopefully
   // reasonably tight bound on the next cutoff.
   if (best_elem) {
     StateId state = best_elem->key;
     Token *tok = best_elem->val;
     for (fst::ArcIterator<fst::Fst<Arc> > aiter(fst_, state);
          !aiter.Done();
          aiter.Next()) {
       const Arc &arc = aiter.Value();
       if (arc.ilabel != 0) {  // we'd propagate..
         BaseFloat ac_cost = - decodable->LogLikelihood(frame, arc.ilabel);
         double new_weight = arc.weight.Value() + tok->cost_ + ac_cost;
         if (new_weight + adaptive_beam < next_weight_cutoff)
           next_weight_cutoff = new_weight + adaptive_beam;
       }
     }
   }

   // int32 n = 0, np = 0;

   // the tokens are now owned here, in last_toks, and the hash is empty.
   // 'owned' is a complex thing here; the point is we need to call TokenDelete
   // on each elem 'e' to let toks_ know we're done with them.
   for (Elem *e = last_toks, *e_tail; e != NULL; e = e_tail) {  // loop this way
     // n++;
     // because we delete "e" as we go.
     StateId state = e->key;
     Token *tok = e->val;
     if (tok->cost_ < weight_cutoff) {  // not pruned.
       // np++;
       KALDI_ASSERT(state == tok->arc_.nextstate);
       for (fst::ArcIterator<fst::Fst<Arc> > aiter(fst_, state);
            !aiter.Done();
            aiter.Next()) {
         Arc arc = aiter.Value();
         if (arc.ilabel != 0) {  // propagate..
           BaseFloat ac_cost =  - decodable->LogLikelihood(frame, arc.ilabel);
           double new_weight = arc.weight.Value() + tok->cost_ + ac_cost;
           if (new_weight < next_weight_cutoff) {  // not pruned..
             Token *new_tok = new Token(arc, ac_cost, tok);
             Elem *e_found = toks_.Insert(arc.nextstate, new_tok);
             if (new_weight + adaptive_beam < next_weight_cutoff)
               next_weight_cutoff = new_weight + adaptive_beam;
             if (e_found->val != new_tok) {
               if (*(e_found->val) < *new_tok) {
                 Token::TokenDelete(e_found->val);
                 e_found->val = new_tok;
               } else {
                 Token::TokenDelete(new_tok);
               }
             }
           }
         }
       }
     }
     e_tail = e->tail;
     Token::TokenDelete(e->val);
     toks_.Delete(e);
   }
   num_frames_decoded_++;
   return next_weight_cutoff;
 }

 // TODO: first time we go through this, could avoid using the queue.
 void FasterDecoder::ProcessNonemitting(double cutoff) {
   // Processes nonemitting arcs for one frame.
   KALDI_ASSERT(queue_.empty());
   for (const Elem *e = toks_.GetList(); e != NULL;  e = e->tail)
     queue_.push_back(e);
   while (!queue_.empty()) {
     const Elem* e = queue_.back();
     queue_.pop_back();
     StateId state = e->key;
     Token *tok = e->val;  // would segfault if state not
     // in toks_ but this can't happen.
     if (tok->cost_ > cutoff) { // Don't bother processing successors.
       continue;
     }
     KALDI_ASSERT(tok != NULL && state == tok->arc_.nextstate);
     for (fst::ArcIterator<fst::Fst<Arc> > aiter(fst_, state);
          !aiter.Done();
          aiter.Next()) {
       const Arc &arc = aiter.Value();
       if (arc.ilabel == 0) {  // propagate nonemitting only...
         Token *new_tok = new Token(arc, tok);
         if (new_tok->cost_ > cutoff) {  // prune
           Token::TokenDelete(new_tok);
         } else {
           Elem *e_found = toks_.Insert(arc.nextstate, new_tok);
           if (e_found->val == new_tok) {
             queue_.push_back(e_found);
           } else {
             if (*(e_found->val) < *new_tok) {
               Token::TokenDelete(e_found->val);
               e_found->val = new_tok;
               queue_.push_back(e_found);
             } else {
               Token::TokenDelete(new_tok);
             }
           }
         }
       }
     }
   }
 }

 void FasterDecoder::ClearToks(Elem *list) {
   for (Elem *e = list, *e_tail; e != NULL; e = e_tail) {
     Token::TokenDelete(e->val);
     e_tail = e->tail;
     toks_.Delete(e);
   }
 }

 } // end namespace kaldi.
kaldi::FasterDecoderOptions::beam
BaseFloat beam
Definition: faster-decoder.h:34

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::LatticeArc
fst::ArcTpl< LatticeWeight > LatticeArc
Definition: kaldi-lattice.h:40

kaldi::FasterDecoderOptions::min_active
int32 min_active
Definition: faster-decoder.h:36

kaldi::HashList::Insert
Elem * Insert(I key, T val)
Insert inserts a new element into the hashtable/stored list.
Definition: hash-list-inl.h:125

kaldi::DecodableInterface::NumFramesReady
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
Definition: decodable-itf.h:107

kaldi::DecodableInterface
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
Definition: decodable-itf.h:82

kaldi::FasterDecoder::ClearToks
void ClearToks(Elem *list)
Definition: faster-decoder.cc:343

kaldi::FasterDecoder::config_
FasterDecoderOptions config_
Definition: faster-decoder.h:172

fst::LatticeWeightTpl< BaseFloat >::One
static const LatticeWeightTpl One()
Definition: lattice-weight.h:81

fst::RemoveEpsLocal
void RemoveEpsLocal(MutableFst< Arc > *fst)
RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST, using an algorithm that is g...
Definition: remove-eps-local-inl.h:309

fst
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21

kaldi::FasterDecoderOptions::max_active
int32 max_active
Definition: faster-decoder.h:35

kaldi::FasterDecoder::InitDecoding
void InitDecoding()
As a new alternative to Decode(), you can call InitDecoding and then (possibly multiple times) Advanc...
Definition: faster-decoder.cc:36

kaldi::HashList::Elem::val
T val
Definition: hash-list.h:54

kaldi::FasterDecoderOptions::beam_delta
BaseFloat beam_delta
Definition: faster-decoder.h:37

faster-decoder.h

kaldi::FasterDecoder::AdvanceDecoding
void AdvanceDecoding(DecodableInterface *decodable, int32 max_num_frames=-1)
This will decode until there are no more frames ready in the decodable object, but if max_num_frames ...
Definition: faster-decoder.cc:53

kaldi::FasterDecoderOptions::hash_ratio
BaseFloat hash_ratio
Definition: faster-decoder.h:38

kaldi::FasterDecoder::Decode
void Decode(DecodableInterface *decodable)
Definition: faster-decoder.cc:48

kaldi::FasterDecoder::Arc
fst::StdArc Arc
Definition: faster-decoder.h:64

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

fst::LatticeWeightTpl< BaseFloat >

kaldi::FasterDecoder::PossiblyResizeHash
void PossiblyResizeHash(size_t num_toks)
Definition: faster-decoder.cc:212

kaldi::LatticeWeight
fst::LatticeWeightTpl< BaseFloat > LatticeWeight
Definition: kaldi-lattice.h:32

kaldi::FasterDecoder::GetBestPath
bool GetBestPath(fst::MutableFst< LatticeArc > *fst_out, bool use_final_probs=true)
GetBestPath gets the decoding traceback.
Definition: faster-decoder.cc:84

kaldi::FasterDecoder::Token::TokenDelete
static void TokenDelete(Token *tok)
Definition: faster-decoder.h:138

kaldi::FasterDecoder::fst_
const fst::Fst< fst::StdArc > & fst_
Definition: faster-decoder.h:171

count
const size_t count
Definition: arpa-file-parser-test.cc:66

kaldi::FasterDecoder::GetCutoff
double GetCutoff(Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam, Elem **best_elem)
Gets the weight cutoff. Also counts the active tokens.
Definition: faster-decoder.cc:148

float

kaldi::HashList::SetSize
void SetSize(size_t sz)
SetSize tells the object how many hash buckets to allocate (should typically be at least twice the nu...
Definition: hash-list-inl.h:37

kaldi::FasterDecoder::ProcessNonemitting
void ProcessNonemitting(double cutoff)
Definition: faster-decoder.cc:301

kaldi::HashList::GetList
const Elem * GetList() const
Gives the head of the current list to the user.
Definition: hash-list-inl.h:61

kaldi::FasterDecoder::Token
Definition: faster-decoder.h:106

kaldi::FasterDecoder::Token::prev_
Token * prev_
Definition: faster-decoder.h:111

kaldi::HashList::Elem::tail
Elem * tail
Definition: hash-list.h:55

kaldi::HashList::Clear
Elem * Clear()
Clears the hash and gives the head of the current list to the user; ownership is transferred to the u...
Definition: hash-list-inl.h:46

kaldi::FasterDecoder::toks_
HashList< StateId, Token * > toks_
Definition: faster-decoder.h:170

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::FasterDecoder::Token::cost_
double cost_
Definition: faster-decoder.h:115

kaldi::FasterDecoder::Token::arc_
Arc arc_
Definition: faster-decoder.h:108

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::FasterDecoderOptions
Definition: faster-decoder.h:33

kaldi::FasterDecoder::num_frames_decoded_
int32 num_frames_decoded_
Definition: faster-decoder.h:178

kaldi::HashList::Elem
Definition: hash-list.h:52

KALDI_VLOG
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156

kaldi::FasterDecoder::tmp_array_
std::vector< BaseFloat > tmp_array_
Definition: faster-decoder.h:174

kaldi::FasterDecoder::StateId
Arc::StateId StateId
Definition: faster-decoder.h:66

kaldi::FasterDecoder::Weight
Arc::Weight Weight
Definition: faster-decoder.h:67

kaldi::DecodableInterface::LogLikelihood
virtual BaseFloat LogLikelihood(int32 frame, int32 index)=0
Returns the log likelihood, which will be negated in the decoder.

kaldi::FasterDecoder::FasterDecoder
FasterDecoder(const fst::Fst< fst::StdArc > &fst, const FasterDecoderOptions &config)
Definition: faster-decoder.cc:26

kaldi::FasterDecoder::ProcessEmitting
double ProcessEmitting(DecodableInterface *decodable)
Definition: faster-decoder.cc:221

kaldi::HashList::Delete
void Delete(Elem *e)
Think of this like delete().
Definition: hash-list-inl.h:66

kaldi::FasterDecoder::queue_
std::vector< const Elem *> queue_
Definition: faster-decoder.h:173

kaldi::FasterDecoder::ReachedFinal
bool ReachedFinal() const
Returns true if a final state was active on the last frame.
Definition: faster-decoder.cc:75

kaldi::HashList::Size
size_t Size()
Returns current number of hash buckets.
Definition: hash-list.h:113

kaldi::HashList::Elem::key
I key
Definition: hash-list.h:53