sausages.h
Go to the documentation of this file.
1 // lat/sausages.h
2 
3 // Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
4 // 2015 Guoguo Chen
5 // 2019 Dogan Can
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 
23 #ifndef KALDI_LAT_SAUSAGES_H_
24 #define KALDI_LAT_SAUSAGES_H_
25 
26 #include <vector>
27 #include <map>
28 
29 #include "base/kaldi-common.h"
30 #include "util/common-utils.h"
31 #include "fstext/fstext-lib.h"
32 #include "lat/kaldi-lattice.h"
33 
34 namespace kaldi {
35 
55 
60  bool decode_mbr;
63 
64  MinimumBayesRiskOptions() : decode_mbr(true), print_silence(false)
65  { }
66  void Register(OptionsItf *opts) {
67  opts->Register("decode-mbr", &decode_mbr, "If true, do Minimum Bayes Risk "
68  "decoding (else, Maximum a Posteriori)");
69  opts->Register("print-silence", &print_silence, "Keep the inter-word '<eps>' "
70  "bins in the 1-best output (ctm, <eps> can be a 'silence' or a 'deleted' word)");
71  }
72 };
73 
78  public:
83  MinimumBayesRisk(const CompactLattice &clat,
85 
86  // Uses the provided <words> as <R_> instead of using the lattice best path.
87  // Note that the default value of opts.decode_mbr is true. If you provide 1-best
88  // hypothesis from MAP decoding, the output ctm from MBR decoding may be
89  // mismatched with the provided <words> (<words> would be used as the starting
90  // point of optimization).
91  MinimumBayesRisk(const CompactLattice &clat,
92  const std::vector<int32> &words,
94  // Uses the provided <words> as <R_> and <times> of bins instead of using the lattice best path.
95  // Note that the default value of opts.decode_mbr is true. If you provide 1-best
96  // hypothesis from MAP decoding, the output ctm from MBR decoding may be
97  // mismatched with the provided <words> (<words> would be used as the starting
98  // point of optimization).
99  MinimumBayesRisk(const CompactLattice &clat,
100  const std::vector<int32> &words,
101  const std::vector<std::pair<BaseFloat,BaseFloat> > &times,
103 
104  const std::vector<int32> &GetOneBest() const { // gets one-best (with no epsilons)
105  return R_;
106  }
107 
108  const std::vector<std::vector<std::pair<BaseFloat, BaseFloat> > > GetTimes() const {
109  return times_; // returns average (start,end) times for each word in each
110  // bin. These are raw averages without any processing, i.e. time intervals
111  // from different bins can overlap.
112  }
113 
114  const std::vector<std::pair<BaseFloat, BaseFloat> > GetSausageTimes() const {
115  return sausage_times_; // returns average (start,end) times for each bin.
116  // This is typically the weighted average of the times in GetTimes() but can
117  // be slightly different if the times for the bins overlap, in which case
118  // the times returned by this method do not overlap unlike the times
119  // returned by GetTimes().
120  }
121 
122  const std::vector<std::pair<BaseFloat, BaseFloat> > &GetOneBestTimes() const {
123  return one_best_times_; // returns average (start,end) times for each word
124  // corresponding to an entry in the one-best output. This is typically the
125  // appropriate subset of the times in GetTimes() but can be slightly
126  // different if the times for the one-best words overlap, in which case
127  // the times returned by this method do not overlap unlike the times
128  // returned by GetTimes().
129  }
130 
132  const std::vector<BaseFloat> &GetOneBestConfidences() const {
133  return one_best_confidences_;
134  }
135 
137  BaseFloat GetBayesRisk() const { return L_; }
138 
139  const std::vector<std::vector<std::pair<int32, BaseFloat> > > &GetSausageStats() const {
140  return gamma_;
141  }
142 
143  private:
144  void PrepareLatticeAndInitStats(CompactLattice *clat);
145 
147  void MbrDecode();
148 
157  inline double l(int32 a, int32 b, bool penalize = false) {
158  if (a == b) return 0.0;
159  else return (penalize ? 1.0 + delta() : 1.0);
160  }
161 
163  inline int32 r(int32 q) { return R_[q-1]; }
164 
165 
167  double EditDistance(int32 N, int32 Q,
168  Vector<double> &alpha,
169  Matrix<double> &alpha_dash,
170  Vector<double> &alpha_dash_arc);
171 
173  void AccStats();
174 
176  static void RemoveEps(std::vector<int32> *vec);
177 
178  // Ensures that between each word in "vec" and at the beginning and end, is
179  // epsilon (0). (But if no words in vec, just one epsilon)
180  static void NormalizeEps(std::vector<int32> *vec);
181 
182  // delta() is a constant used in the algorithm, which penalizes
183  // the use of certain epsilon transitions in the edit-distance which would cause
184  // words not to show up in the accumulated edit-distance statistics.
185  // There has been a conceptual bug-fix versus the way it was presented in
186  // the paper: we now add delta only if the edit-distance was not already
187  // zero.
188  static inline BaseFloat delta() { return 1.0e-05; }
189 
190 
192  static inline void AddToMap(int32 i, double d, std::map<int32, double> *gamma) {
193  if (d == 0) return;
194  std::pair<const int32, double> pr(i, d);
195  std::pair<std::map<int32, double>::iterator, bool> ret = gamma->insert(pr);
196  if (!ret.second) // not inserted, so add to contents.
197  ret.first->second += d;
198  }
199 
200  struct Arc {
205  };
206 
208 
209 
213  std::vector<Arc> arcs_;
214 
217  std::vector<std::vector<int32> > pre_;
218 
219  std::vector<int32> state_times_; // time of each state in the word lattice,
220  // indexed from 1 (same index as into pre_)
221 
222  std::vector<int32> R_; // current 1-best word sequence, normalized to have
223  // epsilons between each word and at the beginning and end. R in paper...
224  // caution: indexed from zero, not from 1 as in paper.
225 
226  double L_; // current averaged edit-distance between lattice and R_.
227  // \hat{L} in paper.
228 
229  std::vector<std::vector<std::pair<int32, BaseFloat> > > gamma_;
230  // The stats we accumulate; these are pairs of (posterior, word-id), and note
231  // that word-id may be epsilon. Caution: indexed from zero, not from 1 as in
232  // paper. We sort in reverse order on the second member (posterior), so more
233  // likely word is first.
234 
235  std::vector<std::vector<std::pair<BaseFloat, BaseFloat> > > times_;
236  // The average start and end times for words in each confusion-network bin.
237  // This is like an average over arcs, of the tau_b and tau_e quantities in
238  // Appendix C of the paper. Indexed from zero, like gamma_ and R_.
239 
240  std::vector<std::pair<BaseFloat, BaseFloat> > sausage_times_;
241  // The average start and end times for each confusion-network bin. This
242  // is like an average over words, of the tau_b and tau_e quantities in
243  // Appendix C of the paper. Indexed from zero, like gamma_ and R_.
244 
245  std::vector<std::pair<BaseFloat, BaseFloat> > one_best_times_;
246  // The average start and end times for words in the one best output. This
247  // is like an average over the arcs, of the tau_b and tau_e quantities in
248  // Appendix C of the paper. Indexed from zero, like gamma_ and R_.
249 
250  std::vector<BaseFloat> one_best_confidences_;
251  // vector of confidences for the 1-best output (which could be
252  // the MAP output if opts_.decode_mbr == false, or the MBR output otherwise).
253  // Indexed by the same index as one_best_times_.
254 
255  struct GammaCompare{
256  // should be like operator <. But we want reverse order
257  // on the 2nd element (posterior), so it'll be like operator
258  // > that looks first at the posterior.
259  bool operator () (const std::pair<int32, BaseFloat> &a,
260  const std::pair<int32, BaseFloat> &b) const {
261  if (a.second > b.second) return true;
262  else if (a.second < b.second) return false;
263  else return a.first > b.first;
264  }
265  };
266 };
267 
268 } // namespace kaldi
269 
270 #endif // KALDI_LAT_SAUSAGES_H_
int32 words[kMaxOrder]
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
static void AddToMap(int32 i, double d, std::map< int32, double > *gamma)
Function used to increment map.
Definition: sausages.h:192
double l(int32 a, int32 b, bool penalize=false)
Without the &#39;penalize&#39; argument this gives us the basic edit-distance function l(a,b), as in the paper.
Definition: sausages.h:157
static BaseFloat delta()
Definition: sausages.h:188
const std::vector< BaseFloat > & GetOneBestConfidences() const
Outputs the confidences for the one-best transcript.
Definition: sausages.h:132
kaldi::int32 int32
const std::vector< std::vector< std::pair< BaseFloat, BaseFloat > > > GetTimes() const
Definition: sausages.h:108
The implementation of the Minimum Bayes Risk decoding method described in "Minimum Bayes Risk decodin...
Definition: sausages.h:56
std::vector< std::vector< std::pair< BaseFloat, BaseFloat > > > times_
Definition: sausages.h:235
std::vector< int32 > state_times_
Definition: sausages.h:219
MinimumBayesRiskOptions opts_
Definition: sausages.h:207
bool print_silence
Boolean configuration parameter: if true, the 1-best path will &#39;keep&#39; the <eps> bins,.
Definition: sausages.h:62
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
std::vector< BaseFloat > one_best_confidences_
Definition: sausages.h:250
std::vector< std::vector< std::pair< int32, BaseFloat > > > gamma_
Definition: sausages.h:229
const std::vector< std::pair< BaseFloat, BaseFloat > > GetSausageTimes() const
Definition: sausages.h:114
int32 r(int32 q)
returns r_q, in one-based indexing, as in the paper.
Definition: sausages.h:163
const std::vector< std::pair< BaseFloat, BaseFloat > > & GetOneBestTimes() const
Definition: sausages.h:122
const std::vector< int32 > & GetOneBest() const
Definition: sausages.h:104
This class does the word-level Minimum Bayes Risk computation, and gives you either the 1-best MBR ou...
Definition: sausages.h:77
const std::vector< std::vector< std::pair< int32, BaseFloat > > > & GetSausageStats() const
Definition: sausages.h:139
std::vector< int32 > R_
Definition: sausages.h:222
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
std::vector< std::pair< BaseFloat, BaseFloat > > sausage_times_
Definition: sausages.h:240
void Register(OptionsItf *opts)
Definition: sausages.h:66
std::vector< std::vector< int32 > > pre_
For each node in the lattice, a list of arcs entering that node.
Definition: sausages.h:217
BaseFloat GetBayesRisk() const
Returns the expected WER over this sentence (assuming model correctness).
Definition: sausages.h:137
std::vector< std::pair< BaseFloat, BaseFloat > > one_best_times_
Definition: sausages.h:245
std::vector< Arc > arcs_
Arcs in the topologically sorted acceptor form of the word-level lattice, with one final-state...
Definition: sausages.h:213
bool decode_mbr
Boolean configuration parameter: if true, we actually update the hypothesis to do MBR decoding (if fa...
Definition: sausages.h:60