nnet-example-functions.h
Go to the documentation of this file.
1 // nnet2/nnet-example-functions.h
2 
3 // Copyright 2013 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #ifndef KALDI_NNET2_NNET_EXAMPLE_FUNCTIONS_H_
21 #define KALDI_NNET2_NNET_EXAMPLE_FUNCTIONS_H_
22 
29 #include "nnet2/nnet-nnet.h"
30 #include "util/table-types.h"
31 #include "lat/kaldi-lattice.h"
32 #include "nnet2/nnet-example.h"
33 #include "hmm/transition-model.h"
34 #include "hmm/posterior.h"
35 
36 namespace kaldi {
37 namespace nnet2 {
38 
39 // Glossary: mmi = Maximum Mutual Information,
40 // mpfe = Minimum Phone Frame Error
41 // smbr = State-level Minimum Bayes Risk
42 
43 
44 // This file relates to the creation of examples for discriminative training
45 // (see struct DiscriminativeNnetExample, in ./nnet-example.h).
46 
47 
52  // This is the maximum length in frames that any example is allowed to have.
53  // We will split training examples to ensure that they are no longer than
54  // this. Note: if you make this too short it may have bad effects because
55  // the posteriors start to become inaccurate at the edges of the training
56  // example (since they will be based on the acoustic model that was used to
57  // generate the lattices, not the current one).
59 
60  // criterion can be "smbr" or "mpfe" or "mmi". This info is only needed to
61  // determine which parts of the lattices will not contribute to training and
62  // can be discarded (for mpe/smbr, any part where the den-lat has only one
63  // path or all den-lat paths map to the same pdf can be discareded; for mmi,
64  // any part where the den-lat's pdfs all have the same value as the num-lat
65  // pdf for that frame, can be discarded.
66  std::string criterion;
67 
69 
71 
72  bool minimize; // we'll push and minimize if this is true.
73 
74  bool test;
75 
76  bool drop_frames; // For MMI, true if we will eventually drop frames in which
77  // the numerator does not appear in the denominator lattice.
78  // (i.e. we won't backpropagate any derivatives on those
79  // frames). We may still need to include those frames in
80  // the computation in order to get correct posteriors for
81  // other parts of the lattice.
82 
83  bool split; // if false, we won't split at all.
84 
85  bool excise; // if false, we will skip the "excise" step.
86 
88  max_length(1024), criterion("smbr"), collapse_transition_ids(true),
89  determinize(true), minimize(true), test(false), drop_frames(false),
90  split(true), excise(true) { }
91 
92  void Register(OptionsItf *opts) {
93 
94  opts->Register("max-length", &max_length, "Maximum length allowed for any "
95  "segment (i.e. max #frames for any example");
96  //opts->Register("target-length", &target_length, "Target length for a "
97  // "segment");
98  opts->Register("criterion", &criterion, "Criterion, 'mmi'|'mpfe'|'smbr'. "
99  "Determines which frames may be dropped from lattices.");
100  opts->Register("collapse-transition-ids", &collapse_transition_ids,
101  "This option included for debugging purposes");
102  opts->Register("determinize", &determinize, "If true, we determinize "
103  "lattices (as Lattice) before splitting and possibly minimize");
104  opts->Register("minimize", &minimize, "If true, we push and "
105  "minimize lattices (as Lattice) before splitting");
106  opts->Register("test", &test, "If true, activate self-testing code.");
107  // See "Sequence-discriminative training of deep neural networks", Vesely et al,
108  // ICASSP 2013 for explanation of frame dropping.
109  opts->Register("drop-frames", &drop_frames, "For MMI, if true we drop frames "
110  "with no overlap of num and den pdf-ids");
111  opts->Register("split", &split, "Set to false to disable lattice-splitting.");
112  opts->Register("excise", &excise, "Set to false to disable excising un-needed "
113  "frames (option included for debug purposes)");
114  }
115 };
116 
132 
133  SplitExampleStats() { memset(this, 0, sizeof(*this)); }
134  void Print();
135 };
136 
141  const std::vector<int32> &alignment,
142  const Matrix<BaseFloat> &feats,
143  const CompactLattice &clat,
144  BaseFloat weight,
145  int32 left_context,
146  int32 right_context,
148 
149 
154  const SplitDiscriminativeExampleConfig &config,
155  const TransitionModel &tmodel,
156  const DiscriminativeNnetExample &eg,
157  std::vector<DiscriminativeNnetExample> *egs_out,
158  SplitExampleStats *stats_out);
159 
164  const SplitDiscriminativeExampleConfig &config,
165  const TransitionModel &tmodel,
166  const DiscriminativeNnetExample &eg,
167  std::vector<DiscriminativeNnetExample> *egs_out,
168  SplitExampleStats *stats_out);
169 
170 
186  const std::vector<const DiscriminativeNnetExample*> &input,
187  DiscriminativeNnetExample *output);
188 
211  const std::vector<DiscriminativeNnetExample> &input,
212  std::vector<DiscriminativeNnetExample> *output);
213 
221 void SolvePackingProblem(BaseFloat max_cost,
222  const std::vector<BaseFloat> &costs,
223  std::vector<std::vector<size_t> > *groups);
224 
225 
226 
245 void ExampleToPdfPost(
246  const TransitionModel &tmodel,
247  const std::vector<int32> &silence_phones,
248  std::string criterion,
249  bool drop_frames,
250  bool one_silence_class,
251  const DiscriminativeNnetExample &eg,
252  Posterior *post);
253 
284 void UpdateHash(
285  const TransitionModel &tmodel,
286  const DiscriminativeNnetExample &eg,
287  std::string criterion,
288  bool drop_frames,
289  bool one_silence_class,
290  Matrix<double> *hash,
291  double *num_weight,
292  double *den_weight,
293  double *tot_t);
294 
295 
296 
297 } // namespace nnet2
298 } // namespace kaldi
299 
300 #endif // KALDI_NNET2_NNET_EXAMPLE_FUNCTIONS_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void SplitDiscriminativeExample(const SplitDiscriminativeExampleConfig &config, const TransitionModel &tmodel, const DiscriminativeNnetExample &eg, std::vector< DiscriminativeNnetExample > *egs_out, SplitExampleStats *stats_out)
Split a "discriminative example" into multiple pieces, splitting where the lattice has "pinch points"...
void ExciseDiscriminativeExample(const SplitDiscriminativeExampleConfig &config, const TransitionModel &tmodel, const DiscriminativeNnetExample &eg, std::vector< DiscriminativeNnetExample > *egs_out, SplitExampleStats *stats_out)
Remove unnecessary frames from discriminative training example.
void UpdateHash(const TransitionModel &tmodel, const DiscriminativeNnetExample &eg, std::string criterion, bool drop_frames, bool one_silence_class, Matrix< double > *hash, double *num_weight, double *den_weight, double *tot_t)
This function is used in code that tests the functionality that we provide here, about splitting and ...
kaldi::int32 int32
void CombineDiscriminativeExamples(int32 max_length, const std::vector< DiscriminativeNnetExample > &input, std::vector< DiscriminativeNnetExample > *output)
This function is used to combine multiple discriminative-training examples (each corresponding to a s...
This struct exists only for diagnostic purposes.
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
void ExampleToPdfPost(const TransitionModel &tmodel, const std::vector< int32 > &silence_phones, std::string criterion, bool drop_frames, bool one_silence_class, const DiscriminativeNnetExample &eg, Posterior *post)
Given a discriminative training example, this function works out posteriors at the pdf level (note: t...
bool LatticeToDiscriminativeExample(const std::vector< int32 > &alignment, const Matrix< BaseFloat > &feats, const CompactLattice &clat, BaseFloat weight, int32 left_context, int32 right_context, DiscriminativeNnetExample *eg)
Converts lattice to discriminative training example.
void AppendDiscriminativeExamples(const std::vector< const DiscriminativeNnetExample *> &input, DiscriminativeNnetExample *output)
Appends the given vector of examples (which must be non-empty) into a single output example (called b...
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
This struct is used to store the information we need for discriminative training (MMI or MPE)...
Definition: nnet-example.h:136
void Print(const Fst< Arc > &fst, std::string message)
void SolvePackingProblem(BaseFloat max_cost, const std::vector< BaseFloat > &costs, std::vector< std::vector< size_t > > *groups)
This function solves the "packing problem" using the "first fit" algorithm.
Config structure for SplitExample, for splitting discriminative training examples.