doc/sausages_8h_source.html

 // lat/sausages.h

 // Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 //           2015  Guoguo Chen
 //           2019  Dogan Can

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #ifndef KALDI_LAT_SAUSAGES_H_
 #define KALDI_LAT_SAUSAGES_H_

 #include <vector>
 #include <map>

 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
 #include "fstext/fstext-lib.h"
 #include "lat/kaldi-lattice.h"

 namespace kaldi {


 struct MinimumBayesRiskOptions {
   bool decode_mbr;
   bool print_silence;

   MinimumBayesRiskOptions() : decode_mbr(true), print_silence(false)
   { }
   void Register(OptionsItf *opts) {
     opts->Register("decode-mbr", &decode_mbr, "If true, do Minimum Bayes Risk "
                    "decoding (else, Maximum a Posteriori)");
     opts->Register("print-silence", &print_silence, "Keep the inter-word '<eps>' "
                    "bins in the 1-best output (ctm, <eps> can be a 'silence' or a 'deleted' word)");
   }
 };

 class MinimumBayesRisk {
  public:
   MinimumBayesRisk(const CompactLattice &clat,
                    MinimumBayesRiskOptions opts = MinimumBayesRiskOptions());

   // Uses the provided <words> as <R_> instead of using the lattice best path.
   // Note that the default value of opts.decode_mbr is true. If you provide 1-best
   // hypothesis from MAP decoding, the output ctm from MBR decoding may be
   // mismatched with the provided <words> (<words> would be used as the starting
   // point of optimization).
   MinimumBayesRisk(const CompactLattice &clat,
                    const std::vector<int32> &words,
                    MinimumBayesRiskOptions opts = MinimumBayesRiskOptions());
   // Uses the provided <words> as <R_> and <times> of bins instead of using the lattice best path.
   // Note that the default value of opts.decode_mbr is true. If you provide 1-best
   // hypothesis from MAP decoding, the output ctm from MBR decoding may be
   // mismatched with the provided <words> (<words> would be used as the starting
   // point of optimization).
   MinimumBayesRisk(const CompactLattice &clat,
                    const std::vector<int32> &words,
                    const std::vector<std::pair<BaseFloat,BaseFloat> > &times,
                    MinimumBayesRiskOptions opts = MinimumBayesRiskOptions());

   const std::vector<int32> &GetOneBest() const { // gets one-best (with no epsilons)
     return R_;
   }

   const std::vector<std::vector<std::pair<BaseFloat, BaseFloat> > > GetTimes() const {
     return times_; // returns average (start,end) times for each word in each
     // bin. These are raw averages without any processing, i.e. time intervals
     // from different bins can overlap.
   }

   const std::vector<std::pair<BaseFloat, BaseFloat> > GetSausageTimes() const {
     return sausage_times_; // returns average (start,end) times for each bin.
     // This is typically the weighted average of the times in GetTimes() but can
     // be slightly different if the times for the bins overlap, in which case
     // the times returned by this method do not overlap unlike the times
     // returned by GetTimes().
   }

   const std::vector<std::pair<BaseFloat, BaseFloat> > &GetOneBestTimes() const {
     return one_best_times_; // returns average (start,end) times for each word
     // corresponding to an entry in the one-best output.  This is typically the
     // appropriate subset of the times in GetTimes() but can be slightly
     // different if the times for the one-best words overlap, in which case
     // the times returned by this method do not overlap unlike the times
     // returned by GetTimes().
   }

   const std::vector<BaseFloat> &GetOneBestConfidences() const {
     return one_best_confidences_;
   }

   BaseFloat GetBayesRisk() const { return L_; }

   const std::vector<std::vector<std::pair<int32, BaseFloat> > > &GetSausageStats() const {
     return gamma_;
   }

  private:
   void PrepareLatticeAndInitStats(CompactLattice *clat);

   void MbrDecode();

   inline double l(int32 a, int32 b, bool penalize = false) {
     if (a == b) return 0.0;
     else return (penalize ? 1.0 + delta() : 1.0);
   }

   inline int32 r(int32 q) { return R_[q-1]; }


   double EditDistance(int32 N, int32 Q,
                       Vector<double> &alpha,
                       Matrix<double> &alpha_dash,
                       Vector<double> &alpha_dash_arc);

   void AccStats();

   static void RemoveEps(std::vector<int32> *vec);

   // Ensures that between each word in "vec" and at the beginning and end, is
   // epsilon (0).  (But if no words in vec, just one epsilon)
   static void NormalizeEps(std::vector<int32> *vec);

   // delta() is a constant used in the algorithm, which penalizes
   // the use of certain epsilon transitions in the edit-distance which would cause
   // words not to show up in the accumulated edit-distance statistics.
   // There has been a conceptual bug-fix versus the way it was presented in
   // the paper: we now add delta only if the edit-distance was not already
   // zero.
   static inline BaseFloat delta() { return 1.0e-05; }


   static inline void AddToMap(int32 i, double d, std::map<int32, double> *gamma) {
     if (d == 0) return;
     std::pair<const int32, double> pr(i, d);
     std::pair<std::map<int32, double>::iterator, bool> ret = gamma->insert(pr);
     if (!ret.second) // not inserted, so add to contents.
       ret.first->second += d;
   }

   struct Arc {
     int32 word;
     int32 start_node;
     int32 end_node;
     BaseFloat loglike;
   };

   MinimumBayesRiskOptions opts_;


   std::vector<Arc> arcs_;

   std::vector<std::vector<int32> > pre_;

   std::vector<int32> state_times_; // time of each state in the word lattice,
   // indexed from 1 (same index as into pre_)

   std::vector<int32> R_; // current 1-best word sequence, normalized to have
   // epsilons between each word and at the beginning and end.  R in paper...
   // caution: indexed from zero, not from 1 as in paper.

   double L_; // current averaged edit-distance between lattice and R_.
   // \hat{L} in paper.

   std::vector<std::vector<std::pair<int32, BaseFloat> > > gamma_;
   // The stats we accumulate; these are pairs of (posterior, word-id), and note
   // that word-id may be epsilon.  Caution: indexed from zero, not from 1 as in
   // paper.  We sort in reverse order on the second member (posterior), so more
   // likely word is first.

   std::vector<std::vector<std::pair<BaseFloat, BaseFloat> > > times_;
   // The average start and end times for words in each confusion-network bin.
   // This is like an average over arcs, of the tau_b and tau_e quantities in
   // Appendix C of the paper.  Indexed from zero, like gamma_ and R_.

   std::vector<std::pair<BaseFloat, BaseFloat> > sausage_times_;
   // The average start and end times for each confusion-network bin.  This
   // is like an average over words, of the tau_b and tau_e quantities in
   // Appendix C of the paper.  Indexed from zero, like gamma_ and R_.

   std::vector<std::pair<BaseFloat, BaseFloat> > one_best_times_;
   // The average start and end times for words in the one best output.  This
   // is like an average over the arcs, of the tau_b and tau_e quantities in
   // Appendix C of the paper. Indexed from zero, like gamma_ and R_.

   std::vector<BaseFloat> one_best_confidences_;
   // vector of confidences for the 1-best output (which could be
   // the MAP output if opts_.decode_mbr == false, or the MBR output otherwise).
   // Indexed by the same index as one_best_times_.

   struct GammaCompare{
     // should be like operator <.  But we want reverse order
     // on the 2nd element (posterior), so it'll be like operator
     // > that looks first at the posterior.
     bool operator () (const std::pair<int32, BaseFloat> &a,
                       const std::pair<int32, BaseFloat> &b) const {
       if (a.second > b.second) return true;
       else if (a.second < b.second) return false;
       else return a.first > b.first;
     }
   };
 };

 }  // namespace kaldi

 #endif  // KALDI_LAT_SAUSAGES_H_
words
int32 words[kMaxOrder]
Definition: arpa-file-parser-test.cc:43

fstext-lib.h

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::MinimumBayesRisk::Arc::loglike
BaseFloat loglike
Definition: sausages.h:204

kaldi::MinimumBayesRisk::AddToMap
static void AddToMap(int32 i, double d, std::map< int32, double > *gamma)
Function used to increment map.
Definition: sausages.h:192

kaldi::MinimumBayesRisk::l
double l(int32 a, int32 b, bool penalize=false)
Without the &#39;penalize&#39; argument this gives us the basic edit-distance function l(a,b), as in the paper.
Definition: sausages.h:157

kaldi::MinimumBayesRisk::L_
double L_
Definition: sausages.h:226

kaldi::MinimumBayesRisk::delta
static BaseFloat delta()
Definition: sausages.h:188

kaldi::MinimumBayesRisk::GetOneBestConfidences
const std::vector< BaseFloat > & GetOneBestConfidences() const
Outputs the confidences for the one-best transcript.
Definition: sausages.h:132

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

common-utils.h

kaldi::MinimumBayesRisk::GetTimes
const std::vector< std::vector< std::pair< BaseFloat, BaseFloat > > > GetTimes() const
Definition: sausages.h:108

kaldi::Matrix< double >

kaldi::MinimumBayesRiskOptions
The implementation of the Minimum Bayes Risk decoding method described in "Minimum Bayes Risk decodin...
Definition: sausages.h:56

kaldi::MinimumBayesRisk::times_
std::vector< std::vector< std::pair< BaseFloat, BaseFloat > > > times_
Definition: sausages.h:235

kaldi::MinimumBayesRisk::state_times_
std::vector< int32 > state_times_
Definition: sausages.h:219

kaldi::MinimumBayesRisk::opts_
MinimumBayesRiskOptions opts_
Definition: sausages.h:207

kaldi::MinimumBayesRiskOptions::print_silence
bool print_silence
Boolean configuration parameter: if true, the 1-best path will &#39;keep&#39; the <eps> bins,.
Definition: sausages.h:62

kaldi::OptionsItf::Register
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0

kaldi::MinimumBayesRisk::Arc::end_node
int32 end_node
Definition: sausages.h:203

kaldi-lattice.h

kaldi::MinimumBayesRisk::one_best_confidences_
std::vector< BaseFloat > one_best_confidences_
Definition: sausages.h:250

kaldi::MinimumBayesRiskOptions::MinimumBayesRiskOptions
MinimumBayesRiskOptions()
Definition: sausages.h:64

kaldi::MinimumBayesRisk::gamma_
std::vector< std::vector< std::pair< int32, BaseFloat > > > gamma_
Definition: sausages.h:229

float

kaldi::MinimumBayesRisk::Arc::word
int32 word
Definition: sausages.h:201

kaldi::MinimumBayesRisk::GetSausageTimes
const std::vector< std::pair< BaseFloat, BaseFloat > > GetSausageTimes() const
Definition: sausages.h:114

kaldi::MinimumBayesRisk::r
int32 r(int32 q)
returns r_q, in one-based indexing, as in the paper.
Definition: sausages.h:163

kaldi::MinimumBayesRisk::GetOneBestTimes
const std::vector< std::pair< BaseFloat, BaseFloat > > & GetOneBestTimes() const
Definition: sausages.h:122

kaldi::MinimumBayesRisk::GetOneBest
const std::vector< int32 > & GetOneBest() const
Definition: sausages.h:104

kaldi::MinimumBayesRisk
This class does the word-level Minimum Bayes Risk computation, and gives you either the 1-best MBR ou...
Definition: sausages.h:77

kaldi::MinimumBayesRisk::GetSausageStats
const std::vector< std::vector< std::pair< int32, BaseFloat > > > & GetSausageStats() const
Definition: sausages.h:139

kaldi::MinimumBayesRisk::R_
std::vector< int32 > R_
Definition: sausages.h:222

kaldi::OptionsItf
Definition: options-itf.h:26

kaldi::CompactLattice
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46

kaldi::MinimumBayesRisk::sausage_times_
std::vector< std::pair< BaseFloat, BaseFloat > > sausage_times_
Definition: sausages.h:240

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::Vector< double >

kaldi::MinimumBayesRiskOptions::Register
void Register(OptionsItf *opts)
Definition: sausages.h:66

kaldi::MinimumBayesRisk::pre_
std::vector< std::vector< int32 > > pre_
For each node in the lattice, a list of arcs entering that node.
Definition: sausages.h:217

kaldi::MinimumBayesRisk::GammaCompare
Definition: sausages.h:255

kaldi::MinimumBayesRisk::GetBayesRisk
BaseFloat GetBayesRisk() const
Returns the expected WER over this sentence (assuming model correctness).
Definition: sausages.h:137

kaldi::MinimumBayesRisk::one_best_times_
std::vector< std::pair< BaseFloat, BaseFloat > > one_best_times_
Definition: sausages.h:245

kaldi::MinimumBayesRisk::Arc::start_node
int32 start_node
Definition: sausages.h:202

kaldi::MinimumBayesRisk::arcs_
std::vector< Arc > arcs_
Arcs in the topologically sorted acceptor form of the word-level lattice, with one final-state...
Definition: sausages.h:213

kaldi-common.h

kaldi::MinimumBayesRisk::Arc
Definition: sausages.h:200

rnnlm::d
double d
Definition: mikolov-rnnlm-lib.cc:64

kaldi::MinimumBayesRiskOptions::decode_mbr
bool decode_mbr
Boolean configuration parameter: if true, we actually update the hypothesis to do MBR decoding (if fa...
Definition: sausages.h:60