discriminative-training.cc
Go to the documentation of this file.
1 // nnet3/discriminative-training.cc
2 
3 // Copyright 2012-2015 Johns Hopkins University (author: Daniel Povey)
4 // Copyright 2014-2015 Vimal Manohar
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
22 #include "lat/lattice-functions.h"
23 #include "cudamatrix/cu-matrix.h"
24 
25 namespace kaldi {
26 namespace discriminative {
27 
29  std::memset((void *)this, 0, sizeof(*this));
30 }
31 
33  accumulate_gradients(false),
34  accumulate_output(false),
35  num_pdfs(num_pdfs) {
36  gradients.Resize(num_pdfs);
37  output.Resize(num_pdfs);
38  Reset();
39 }
40 
41 // Constructor from config structure
43  const DiscriminativeOptions &opts) :
46  num_pdfs(opts.num_pdfs) {
48  output.Resize(opts.num_pdfs);
49  Reset();
50 }
51 
52 // Reset statistics
55  output.SetZero();
56 
57  tot_t = 0.0;
58  tot_t_weighted = 0.0;
59  tot_objf = 0.0;
60  tot_num_count = 0.0;
61  tot_den_count = 0.0;
62  tot_num_objf = 0.0;
63  tot_l2_term = 0.0;
64 }
65 
69  num_pdfs = opts.num_pdfs;
71  output.Resize(opts.num_pdfs);
72 }
73 
74 // This class is responsible for the forward-backward of the
75 // 'supervision' lattices and computation of the objective function
76 // and gradients.
77 //
78 // note: the supervision.weight is ignored by this class, you have to apply
79 // it externally.
81  typedef Lattice::Arc Arc;
83 
84  public:
85  // Initialize the objcect. Note: we expect the 'nnet_output' to have the
86  // same number of rows as supervision.num_frames * supervision.num_sequences,
87  // and the same number of columns as tmodel.NumPdfs(); but the
88  // ordering of the rows of 'nnet_output' is not the same as the ordering of
89  // frames in paths in the 'supervision' object (which has all frames of the
90  // 1st sequence first, then the 2nd sequence, and so on). Instead, the
91  // frames in 'nnet_output' are ordered as: first the first frame of each
92  // sequence, then the second frame of each sequence, and so on.
93  // This is done to be similar to the setup in 'chain' training
94  // even though this does not offer any computational advantages here
95  // as in the 'chain' case.
97  const TransitionModel &tmodel,
98  const CuVectorBase<BaseFloat> &log_priors,
99  const DiscriminativeSupervision &supervision,
100  const CuMatrixBase<BaseFloat> &nnet_output,
102  CuMatrixBase<BaseFloat> *nnet_output_deriv,
103  CuMatrixBase<BaseFloat> *xent_output_deriv);
104 
105  // Does the forward-backward computation and add the derivative of the
106  // w.r.t. the nnet output (log-prob) times supervision_.weight times
107  // deriv_weight to 'nnet_output_deriv'.
108  void Compute();
109 
110  private:
113 
114  // Vector of log-priors of pdfs.
115  // This can be a size zero vector e.g. for 'chain' model
117 
119 
120  // The neural net output.
122 
123  // Training stats including accumulated objective function, gradient
124  // and total weight. Optionally the nnet_output and gradients per pdf can be
125  // accumulated for debugging purposes.
127 
128  // If non-NULL, derivative w.r.t. to nnet_output is written here.
130 
131  // If non-NULL, then the xent objective derivative
132  // (which equals a posterior from the numerator forward-backward, scaled by
133  // the supervision weight) is written to here.
134  // This will be used in the cross-entropy regularization code.
136 
137  // Denominator lattice.
139 
140  // List of silence phones. Useful to treat silence phones
141  // differently in computing SMBR / MPFE objectives.
142  std::vector<int32> silence_phones_;
143 
144  // The function that actually computes the objective and gradients
145  double ComputeObjfAndDeriv(Posterior *post, Posterior *xent_post);
146 
147  // This function looks up the nnet output the pdf-ids in the
148  // denominator lattice and the alignment in the case of "mmi" objective
149  // using the CuMatrix::Lookup() and stores them in "answers"
150  void LookupNnetOutput(std::vector<Int32Pair> *requested_indexes,
151  std::vector<BaseFloat> *answers) const ;
152 
153  // Converts the answers looked up by LookupNnetOutput function into
154  // log-likelihoods scaled by acoustic scale.
155  void ConvertAnswersToLogLike(
156  const std::vector<Int32Pair>& requested_indexes,
157  std::vector<BaseFloat> *answers) const;
158 
159  // Does acoustic rescoring of lattice to put the negative (scaled) acoustic
160  // log-likelihoods in the arcs of the lattice. Returns the number of
161  // indexes of log-likelihoods read from the "answers" vector.
162  static size_t LatticeAcousticRescore(const std::vector<BaseFloat> &answers,
163  size_t index,
164  Lattice *lat);
165 
166  // Process the derivative stored as posteriors into CuMatrix.
167  // Optionally accumulate numerator and denominator posteriors.
168  void ProcessPosteriors(const Posterior &post,
169  CuMatrixBase<BaseFloat> *output_deriv_temp,
170  double *tot_num_post = NULL,
171  double *tot_den_post = NULL) const;
172 
173  static inline Int32Pair MakePair(int32 first, int32 second) {
174  Int32Pair ans;
175  ans.first = first;
176  ans.second = second;
177  return ans;
178  }
179 };
180 
182  const DiscriminativeOptions &opts,
183  const TransitionModel &tmodel,
184  const CuVectorBase<BaseFloat> &log_priors,
185  const DiscriminativeSupervision &supervision,
186  const CuMatrixBase<BaseFloat> &nnet_output,
188  CuMatrixBase<BaseFloat> *nnet_output_deriv,
189  CuMatrixBase<BaseFloat> *xent_output_deriv)
190  : opts_(opts), tmodel_(tmodel), log_priors_(log_priors),
191  supervision_(supervision), nnet_output_(nnet_output),
192  stats_(stats),
193  nnet_output_deriv_(nnet_output_deriv),
194  xent_output_deriv_(xent_output_deriv) {
195 
196  den_lat_ = supervision.den_lat;
197  TopSort(&den_lat_);
198 
200  &silence_phones_)) {
201  KALDI_ERR << "Bad value for --silence-phones option: "
203  }
204 }
205 
207  std::vector<Int32Pair> *requested_indexes,
208  std::vector<BaseFloat> *answers) const {
209  BaseFloat wiggle_room = 1.3; // value not critical.. it's just 'reserve'
210 
212  int32 num_pdfs = tmodel_.NumPdfs();
213 
214  int32 num_reserve = wiggle_room * den_lat_.NumStates();
215 
216  if (opts_.criterion == "mmi") {
217  // For looking up the posteriors corresponding to the pdfs in the alignment
218  num_reserve += num_frames;
219  }
220 
221  requested_indexes->reserve(num_reserve);
222 
223  // Denominator probabilities to look up from denominator lattice
224  std::vector<int32> state_times;
225  int32 T = LatticeStateTimes(den_lat_, &state_times);
226  KALDI_ASSERT(T == num_frames);
227 
228  StateId num_states = den_lat_.NumStates();
229  for (StateId s = 0; s < num_states; s++) {
230  int32 t = state_times[s];
233 
234  for (fst::ArcIterator<Lattice> aiter(den_lat_, s); !aiter.Done(); aiter.Next()) {
235  const Arc &arc = aiter.Value();
236  if (arc.ilabel != 0) { // input-side has transition-ids, output-side empty
237  int32 tid = arc.ilabel, pdf_id = tmodel_.TransitionIdToPdf(tid);
238  // The ordering of the indexes is similar to that in chain models
239  requested_indexes->push_back(MakePair(idx * supervision_.num_sequences + seq, pdf_id));
240  }
241  }
242  }
243 
244  if (opts_.criterion == "mmi") {
245  // Numerator probabilities to look up from alignment
246  for (int32 t = 0; t < num_frames; t++) {
249  int32 tid = supervision_.num_ali[t],
250  pdf_id = tmodel_.TransitionIdToPdf(tid);
251  KALDI_ASSERT(pdf_id >= 0 && pdf_id < num_pdfs);
252  requested_indexes->push_back(MakePair(idx * supervision_.num_sequences + seq, pdf_id));
253  }
254  }
255 
256  CuArray<Int32Pair> cu_requested_indexes(*requested_indexes);
257  answers->resize(requested_indexes->size());
258  nnet_output_.Lookup(cu_requested_indexes, &((*answers)[0]));
259  // requested_indexes now contain (t, j) pair and answers contains the
260  // neural network output, which is log p(j|x(t)) for CE models
261 }
262 
264  const std::vector<Int32Pair>& requested_indexes,
265  std::vector<BaseFloat> *answers) const {
266  int32 num_floored = 0;
267 
268  BaseFloat floor_val = -20 * kaldi::Log(10.0); // floor for posteriors.
269  size_t index;
270 
271  Vector<BaseFloat> log_priors(log_priors_);
272 
273  // Replace "answers" with the vector of scaled log-probs. If this step takes
274  // too much time, we can look at other ways to do it, using the CUDA card.
275  for (index = 0; index < answers->size(); index++) {
276  BaseFloat log_post = (*answers)[index];
277  if (log_post < floor_val) {
278  // TODO: this might not be required for 'chain' models
279  log_post = floor_val;
280  num_floored++;
281  }
282 
283  if (log_priors_.Dim() > 0) {
284  int32 pdf_id = requested_indexes[index].second;
285  KALDI_ASSERT(log_post <= 0 && log_priors(pdf_id) <= 0);
286  BaseFloat pseudo_loglike = (log_post - log_priors(pdf_id))
288  KALDI_ASSERT(!KALDI_ISINF(pseudo_loglike) && !KALDI_ISNAN(pseudo_loglike));
289  (*answers)[index] = pseudo_loglike;
290  } else {
291  (*answers)[index] = log_post * opts_.acoustic_scale;
292  }
293  }
294 
295  if (num_floored > 0) {
296  KALDI_WARN << "Floored " << num_floored << " probabilities from nnet.";
297  }
298 }
299 
301  const std::vector<BaseFloat> &answers,
302  size_t index, Lattice *lat) {
303  int32 num_states = lat->NumStates();
304 
305  for (StateId s = 0; s < num_states; s++) {
306  for (fst::MutableArcIterator<Lattice> aiter(lat, s);
307  !aiter.Done(); aiter.Next()) {
308  Arc arc = aiter.Value();
309  if (arc.ilabel != 0) { // input-side has transition-ids, output-side empty
310  arc.weight.SetValue2(-answers[index]);
311  index++;
312  aiter.SetValue(arc);
313  }
314  }
315  LatticeWeight final = lat->Final(s);
316  if (final != LatticeWeight::Zero()) {
317  final.SetValue2(0.0); // make sure no acoustic term in final-prob.
318  lat->SetFinal(s, final);
319  }
320  }
321 
322  // Number of indexes of log-likes used to rescore lattice
323  return index;
324 }
325 
327  const Posterior &post,
328  CuMatrixBase<BaseFloat> *output_deriv_temp,
329  double *tot_num_post,
330  double *tot_den_post) const {
331  std::vector<Int32Pair> deriv_indexes;
332  std::vector<BaseFloat> deriv_data;
333  for (size_t t = 0; t < post.size(); t++) {
334  for (size_t j = 0; j < post[t].size(); j++) {
337  int32 pdf_id = post[t][j].first;
338 
339  // Same ordering as for 'chain' models
340  deriv_indexes.push_back(MakePair(idx * supervision_.num_sequences + seq, pdf_id));
341 
342  BaseFloat weight = post[t][j].second;
343  if (tot_num_post && weight > 0.0) *tot_num_post += weight;
344  if (tot_den_post && weight < 0.0) *tot_den_post -= weight;
345  deriv_data.push_back(weight);
346  }
347  }
348  CuArray<Int32Pair> cu_deriv_indexes(deriv_indexes);
349  output_deriv_temp->AddElements(supervision_.weight, cu_deriv_indexes,
350  deriv_data.data());
351 }
352 
354  if (opts_.criterion == "mmi" && opts_.boost != 0.0) {
355  BaseFloat max_silence_error = 0.0;
357  opts_.boost, max_silence_error, &den_lat_);
358  }
359 
361 
362  int32 num_pdfs = nnet_output_.NumCols();
363  KALDI_ASSERT(log_priors_.Dim() == 0 || num_pdfs == log_priors_.Dim());
364 
365  // We need to look up the nnet output for some pdf-ids.
366  // Rather than looking them all up using operator (), which is
367  // very slow because each lookup involves a separate CUDA call with
368  // communication over PciExpress, we look them up all at once using
369  // CuMatrix::Lookup().
370  std::vector<BaseFloat> answers;
371  std::vector<Int32Pair> requested_indexes;
372 
373  LookupNnetOutput(&requested_indexes, &answers);
374 
375  ConvertAnswersToLogLike(requested_indexes, &answers);
376 
377  size_t index = 0;
378 
379  // Now put the negative (scaled) acoustic log-likelihoods in the lattice.
380  index = LatticeAcousticRescore(answers, index, &den_lat_);
381  // index is now the number of indexes of log-likes used to rescore lattice.
382  // This is required to further lookup answers for computing "mmi"
383  // numerator score.
384 
385  // Get statistics for this minibatch
386  DiscriminativeObjectiveInfo this_stats;
387  if (stats_) {
388  this_stats = *stats_;
389  this_stats.Reset();
390  }
391 
392  // Look up numerator probabilities corresponding to alignment
393  if (opts_.criterion == "mmi") {
394  double tot_num_like = 0.0;
395  KALDI_ASSERT(index + supervision_.num_ali.size() == answers.size());
396  for (size_t this_index = 0; this_index < supervision_.num_ali.size(); this_index++) {
397  tot_num_like += answers[index + this_index];
398  }
399  this_stats.tot_num_objf += supervision_.weight * tot_num_like;
400  index += supervision_.num_ali.size();
401  }
402 
403  KALDI_ASSERT(index == answers.size());
404 
405  if (nnet_output_deriv_) {
406  nnet_output_deriv_->SetZero();
407  KALDI_ASSERT(nnet_output_deriv_->NumRows() == nnet_output_.NumRows() &&
408  nnet_output_deriv_->NumCols() == nnet_output_.NumCols());
409  }
410 
411  if (xent_output_deriv_) {
412  xent_output_deriv_->SetZero();
413  KALDI_ASSERT(xent_output_deriv_->NumRows() == nnet_output_.NumRows() &&
414  xent_output_deriv_->NumCols() == nnet_output_.NumCols());
415  }
416 
417  Posterior post;
418  Posterior xent_post;
419  double objf = ComputeObjfAndDeriv(&post,
420  (xent_output_deriv_ ? &xent_post : NULL));
421 
422  this_stats.tot_objf += supervision_.weight * objf;
423 
424  KALDI_ASSERT(nnet_output_.NumRows() == post.size());
425 
426  CuMatrix<BaseFloat> output_deriv;
427 
428  CuMatrixBase<BaseFloat> *output_deriv_temp;
429 
430  if (nnet_output_deriv_)
431  output_deriv_temp = nnet_output_deriv_;
432  else {
433  // This is for accumulating the statistics
434  output_deriv.Resize(nnet_output_.NumRows(), nnet_output_.NumCols());
435  output_deriv_temp = &output_deriv;
436  }
437 
438  double tot_num_post = 0.0, tot_den_post = 0.0;
439  {
440  ProcessPosteriors(post, output_deriv_temp,
441  &tot_num_post, &tot_den_post);
442  }
443 
444  if (xent_output_deriv_) {
445  ProcessPosteriors(xent_post, xent_output_deriv_, NULL, NULL);
446  }
447 
448  this_stats.tot_den_count += tot_den_post;
449  this_stats.tot_num_count += tot_num_post;
450 
451  if (this_stats.AccumulateGradients())
452  (this_stats.gradients).AddRowSumMat(1.0, CuMatrix<double>(*output_deriv_temp));
453 
454  if (this_stats.AccumulateOutput()) {
456  temp.ApplyExp();
457  (this_stats.output).AddRowSumMat(1.0, temp);
458  }
459 
460  this_stats.tot_t = num_frames;
461  this_stats.tot_t_weighted = num_frames * supervision_.weight;
462 
463  if (!(this_stats.TotalObjf(opts_.criterion) ==
464  this_stats.TotalObjf(opts_.criterion))) {
465  // inf or NaN detected
466  if (nnet_output_deriv_)
467  nnet_output_deriv_->SetZero();
468  BaseFloat default_objf = -10;
469  KALDI_WARN << "Objective function is "
470  << this_stats.TotalObjf(opts_.criterion)
471  << ", setting to " << default_objf << " per frame.";
472  this_stats.tot_objf = default_objf * this_stats.tot_t_weighted;
473  }
474 
475  if (GetVerboseLevel() >= 2) {
476  if (GetVerboseLevel() >= 3) {
477  this_stats.PrintAll(opts_.criterion);
478  } else
479  this_stats.Print(opts_.criterion);
480  }
481 
482  // This code helps us see how big the derivatives are, on average,
483  // for different frames of the sequences. As expected, they are
484  // smaller towards the edges of the sequences (due to the penalization
485  // of 'incorrect' pdf-ids.
486  if (nnet_output_deriv_ && GetVerboseLevel() >= 1) {
487  int32 tot_frames = nnet_output_deriv_->NumRows(),
488  frames_per_sequence = supervision_.frames_per_sequence,
489  num_sequences = supervision_.num_sequences;
490  CuVector<BaseFloat> row_products(tot_frames);
491  row_products.AddDiagMat2(1.0, *nnet_output_deriv_, kNoTrans, 0.0);
492  Vector<BaseFloat> row_products_cpu(row_products);
493  Vector<BaseFloat> row_products_per_frame(frames_per_sequence);
494  for (int32 i = 0; i < tot_frames; i++)
495  row_products_per_frame(i / num_sequences) += row_products_cpu(i);
496  KALDI_LOG << "Derivs per frame are " << row_products_per_frame;
497  }
498 
499  if (opts_.l2_regularize != 0.0) {
500  // compute the l2 penalty term and its derivative
502  this_stats.tot_l2_term += -0.5 * scale * TraceMatMat(nnet_output_, nnet_output_, kTrans);
503  if (nnet_output_deriv_)
504  nnet_output_deriv_->AddMat(-1.0 * scale, nnet_output_);
505  }
506 
507  if (stats_)
508  stats_->Add(this_stats);
509 
510 }
511 
513  Posterior *xent_post) {
514 
515  if (xent_post) {
516  Posterior tid_post;
517  // Compute posterior from the numerator alignment
519  ConvertPosteriorToPdfs(tmodel_, tid_post, xent_post);
520  }
521 
522  if (opts_.criterion == "mpfe" || opts_.criterion == "smbr") {
523  Posterior tid_post;
525  den_lat_,
528  &tid_post);
529  ConvertPosteriorToPdfs(tmodel_, tid_post, post);
530  return ans;
531  } else if (opts_.criterion == "mmi") {
532  bool convert_to_pdfs = true, cancel = true;
533  // we'll return the denominator-lattice forward backward likelihood,
534  // which is one term in the objective function.
536  opts_.drop_frames, convert_to_pdfs,
537  cancel, post));
538  } else {
539  KALDI_ERR << "Unknown criterion " << opts_.criterion;
540  }
541 
542  return 0;
543 }
544 
545 
547  const TransitionModel &tmodel,
548  const CuVectorBase<BaseFloat> &log_priors,
549  const DiscriminativeSupervision &supervision,
550  const CuMatrixBase<BaseFloat> &nnet_output,
552  CuMatrixBase<BaseFloat> *nnet_output_deriv,
553  CuMatrixBase<BaseFloat> *xent_output_deriv) {
554  DiscriminativeComputation computation(opts, tmodel, log_priors, supervision,
555  nnet_output, stats,
556  nnet_output_deriv, xent_output_deriv);
557  computation.Compute();
558 }
559 
561  tot_t += other.tot_t;
562  tot_t_weighted += other.tot_t_weighted;
563  tot_objf += other.tot_objf; // Actually tot_den_objf for mmi
564  tot_num_count += other.tot_num_count;
565  tot_den_count += other.tot_den_count;
566  tot_num_objf += other.tot_num_objf; // Only for mmi
567  tot_l2_term += other.tot_l2_term;
568 
569  if (AccumulateGradients()) {
570  gradients.AddVec(1.0, other.gradients);
571  }
572  if (AccumulateOutput()) {
573  output.AddVec(1.0, other.output);
574  }
575 }
576 
577 void DiscriminativeObjectiveInfo::Print(const std::string &criterion,
578  bool print_avg_gradients,
579  bool print_avg_output) const {
580  if (criterion == "mmi") {
581  double num_objf = tot_num_objf / tot_t_weighted,
582  den_objf = tot_objf / tot_t_weighted;
583  double objf = num_objf - den_objf;
584 
585  double avg_post_per_frame = tot_num_count / tot_t_weighted;
586 
587  KALDI_LOG << "Number of frames is " << tot_t
588  << " (weighted: " << tot_t_weighted
589  << "), average (num or den) posterior per frame is "
590  << avg_post_per_frame;
591  KALDI_LOG << "MMI objective function is " << num_objf << " - "
592  << den_objf << " = " << objf << " per frame, over "
593  << tot_t_weighted << " frames.";
594  } else if (criterion == "mpfe") {
595  double avg_gradients = (tot_num_count + tot_den_count) / tot_t_weighted;
596  double objf = tot_objf / tot_t_weighted;
597  KALDI_LOG << "Average num+den count of MPFE stats is " << avg_gradients
598  << " per frame, over "
599  << tot_t_weighted << " frames";
600  KALDI_LOG << "MPFE objective function is " << objf
601  << " per frame, over " << tot_t_weighted << " frames.";
602  } else if (criterion == "smbr") {
603  double avg_gradients = (tot_num_count + tot_den_count) / tot_t_weighted;
604  double objf = tot_objf / tot_t_weighted;
605  KALDI_LOG << "Average num+den count of SMBR stats is " << avg_gradients
606  << " per frame, over "
607  << tot_t_weighted << " frames";
608  KALDI_LOG << "SMBR objective function is " << objf
609  << " per frame, over " << tot_t_weighted << " frames.";
610  }
611 
612  if (AccumulateGradients()) {
613  Vector<double> temp(gradients);
614  temp.Scale(1.0/tot_t_weighted);
615  if (print_avg_gradients) {
616  KALDI_LOG << "Vector of average gradients wrt output activations is: \n" << temp;
617  } else {
618  KALDI_VLOG(4) << "Vector of average gradients wrt output activations is: \n" << temp;
619  }
620  }
621  if (AccumulateOutput()) {
622  Vector<double> temp(output);
623  temp.Scale(1.0/tot_t_weighted);
624  if (print_avg_output) {
625  KALDI_LOG << "Average DNN output is: \n" << temp;
626  } else {
627  KALDI_VLOG(4) << "Average DNN output is: \n" << temp;
628  }
629  }
630 }
631 
633  if (pdf_id < gradients.Dim() && pdf_id >= 0) {
634  KALDI_LOG << "Average gradient wrt output activations of pdf " << pdf_id
635  << " is " << gradients(pdf_id) / tot_t_weighted
636  << " per frame, over "
637  << tot_t_weighted << " frames";
638  }
639 }
640 
641 
642 
643 } // namespace discriminative
644 } // namespace kaldi
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void LookupNnetOutput(std::vector< Int32Pair > *requested_indexes, std::vector< BaseFloat > *answers) const
void SetZero()
Math operations.
Definition: cu-vector.cc:1098
void ConvertAnswersToLogLike(const std::vector< Int32Pair > &requested_indexes, std::vector< BaseFloat > *answers) const
int32 LatticeStateTimes(const Lattice &lat, vector< int32 > *times)
This function iterates over the states of a topologically sorted lattice and counts the time instance...
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
int32 GetVerboseLevel()
Get verbosity level, usually set via command line &#39;–verbose=&#39; switch.
Definition: kaldi-error.h:60
#define KALDI_ISINF
Definition: kaldi-math.h:73
void AddElements(Real alpha, const std::vector< MatrixElement< Real > > &input)
Definition: cu-matrix.cc:3277
void PrintAll(const std::string &criterion) const
void AddDiagMat2(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, Real beta)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
Definition: cu-vector.cc:595
DiscriminativeComputation(const DiscriminativeOptions &opts, const TransitionModel &tmodel, const CuVectorBase< BaseFloat > &log_priors, const DiscriminativeSupervision &supervision, const CuMatrixBase< BaseFloat > &nnet_output, DiscriminativeObjectiveInfo *stats, CuMatrixBase< BaseFloat > *nnet_output_deriv, CuMatrixBase< BaseFloat > *xent_output_deriv)
kaldi::int32 int32
void LatticeAcousticRescore(const TransitionModel &trans_model, const Matrix< BaseFloat > &log_likes, const std::vector< int32 > &state_times, Lattice *lat)
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
double ComputeObjfAndDeriv(Posterior *post, Posterior *xent_post)
int32 TransitionIdToPdf(int32 trans_id) const
static Int32Pair MakePair(int32 first, int32 second)
void Add(const DiscriminativeObjectiveInfo &other)
double TotalObjf(const std::string &criterion) const
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
BaseFloat LatticeForwardBackwardMmi(const TransitionModel &tmodel, const Lattice &lat, const std::vector< int32 > &num_ali, bool drop_frames, bool convert_to_pdf_ids, bool cancel, Posterior *post)
This function can be used to compute posteriors for MMI, with a positive contribution for the numerat...
double Log(double x)
Definition: kaldi-math.h:100
static size_t LatticeAcousticRescore(const std::vector< BaseFloat > &answers, size_t index, Lattice *lat)
void ComputeDiscriminativeObjfAndDeriv(const DiscriminativeOptions &opts, const TransitionModel &tmodel, const CuVectorBase< BaseFloat > &log_priors, const DiscriminativeSupervision &supervision, const CuMatrixBase< BaseFloat > &nnet_output, DiscriminativeObjectiveInfo *stats, CuMatrixBase< BaseFloat > *nnet_output_deriv, CuMatrixBase< BaseFloat > *xent_output_deriv)
This function does forward-backward on the numerator and denominator lattices and computes derivates ...
static const LatticeWeightTpl Zero()
void AlignmentToPosterior(const std::vector< int32 > &ali, Posterior *post)
Convert an alignment to a posterior (with a scale of 1.0 on each entry).
Definition: posterior.cc:290
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44
void Resize(MatrixIndexT dim, MatrixResizeType t=kSetZero)
Allocate the memory.
Definition: cu-vector.cc:993
#define KALDI_ERR
Definition: kaldi-error.h:147
BaseFloat LatticeForwardBackwardMpeVariants(const TransitionModel &trans, const std::vector< int32 > &silence_phones, const Lattice &lat, const std::vector< int32 > &num_ali, std::string criterion, bool one_silence_class, Posterior *post)
This function implements either the MPFE (minimum phone frame error) or SMBR (state-level minimum bay...
#define KALDI_WARN
Definition: kaldi-error.h:150
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
void Configure(const DiscriminativeOptions &opts)
void Scale(Real alpha)
Multiplies all elements by this constant.
void Print(const std::string &criterion, bool print_avg_gradients=false, bool print_avg_output=false) const
Matrix for CUDA computing.
Definition: matrix-common.h:69
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ISNAN
Definition: kaldi-math.h:72
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
bool LatticeBoost(const TransitionModel &trans, const std::vector< int32 > &alignment, const std::vector< int32 > &silence_phones, BaseFloat b, BaseFloat max_silence_error, Lattice *lat)
Boosts LM probabilities by b * [number of frame errors]; equivalently, adds -b*[number of frame error...
int32_cuda second
Definition: cu-matrixdim.h:80
void ConvertPosteriorToPdfs(const TransitionModel &tmodel, const Posterior &post_in, Posterior *post_out)
Converts a posterior over transition-ids to be a posterior over pdf-ids.
Definition: posterior.cc:322
#define KALDI_LOG
Definition: kaldi-error.h:153
Vector for CUDA computing.
Definition: matrix-common.h:72
int32_cuda first
Definition: cu-matrixdim.h:79
void ProcessPosteriors(const Posterior &post, CuMatrixBase< BaseFloat > *output_deriv_temp, double *tot_num_post=NULL, double *tot_den_post=NULL) const