train-nnet-ensemble.cc
Go to the documentation of this file.
1 // nnet2/train-nnet-ensemble.cc
2 
3 // Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 // 2014 Xiaohui Zhang
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
22 #include <numeric> // for std::accumulate
23 
24 namespace kaldi {
25 namespace nnet2 {
26 
27 static inline Int32Pair MakePair(int32 first, int32 second) {
28  Int32Pair ans;
29  ans.first = first;
30  ans.second = second;
31  return ans;
32 }
33 
35  const NnetEnsembleTrainerConfig &config,
36  std::vector<Nnet*> nnet_ensemble):
37  config_(config), nnet_ensemble_(nnet_ensemble) {
38  beta_ = config_.beta;
39  num_phases_ = 0;
40  bool first_time = true;
41  BeginNewPhase(first_time);
42 }
43 
45  buffer_.push_back(value);
46  if (static_cast<int32>(buffer_.size()) == config_.minibatch_size)
48 }
49 
51  KALDI_ASSERT(!buffer_.empty());
52 
53  int32 num_states = nnet_ensemble_[0]->GetComponent(nnet_ensemble_[0]->NumComponents() - 1).OutputDim();
54  // average of posteriors matrix, storing averaged outputs of net ensemble.
55  CuMatrix<BaseFloat> post_avg(buffer_.size(), num_states);
56  updater_ensemble_.reserve(nnet_ensemble_.size());
57  std::vector<CuMatrix<BaseFloat> > post_mat;
58  post_mat.resize(nnet_ensemble_.size());
59  for (int32 i = 0; i < nnet_ensemble_.size(); i++) {
61  updater_ensemble_[i]->FormatInput(buffer_);
62  updater_ensemble_[i]->Propagate();
63  // posterior matrix, storing output of one net.
64  updater_ensemble_[i]->GetOutput(&post_mat[i]);
65  CuVector<BaseFloat> row_sum(post_mat[i].NumRows());
66  post_avg.AddMat(1.0, post_mat[i]);
67  }
68 
69  // calculate the interpolated posterios as new supervision labels, and also
70  // collect the indices of the original supervision labels for later use (calc. objf.).
71  std::vector<MatrixElement<BaseFloat> > sv_labels;
72  std::vector<Int32Pair > sv_labels_ind;
73  sv_labels.reserve(buffer_.size()); // We must have at least this many labels.
74  sv_labels_ind.reserve(buffer_.size()); // We must have at least this many labels.
75  for (int32 m = 0; m < buffer_.size(); m++) {
76  KALDI_ASSERT(buffer_[m].labels.size() == 1 &&
77  "Currently this code only supports single-frame egs.");
78  const std::vector<std::pair<int32,BaseFloat> > &labels = buffer_[m].labels[0];
79  for (size_t i = 0; i < labels.size(); i++) {
81  tmp = {m, labels[i].first, labels[i].second};
82  sv_labels.push_back(tmp);
83  sv_labels_ind.push_back(MakePair(m, labels[i].first));
84  }
85  }
86  post_avg.Scale(1.0 / nnet_ensemble_.size());
87  post_avg.Scale(beta_);
88  post_avg.AddElements(1.0, sv_labels);
89 
90  // calculate the deriv, do backprop, and calculate the objf.
91  for (int32 i = 0; i < nnet_ensemble_.size(); i++) {
92  CuMatrix<BaseFloat> tmp_deriv(post_mat[i]);
93  post_mat[i].ApplyLog();
94  std::vector<BaseFloat> log_post_correct;
95  log_post_correct.resize(sv_labels_ind.size());
96  post_mat[i].Lookup(sv_labels_ind, &(log_post_correct[0]));
97  BaseFloat log_prob_this_net = std::accumulate(log_post_correct.begin(),
98  log_post_correct.end(),
99  static_cast<BaseFloat>(0));
100  avg_logprob_this_phase_ += log_prob_this_net;
101  tmp_deriv.InvertElements();
102  tmp_deriv.MulElements(post_avg);
103  updater_ensemble_[i]->Backprop(&tmp_deriv);
104  }
105  count_this_phase_ += buffer_.size();
106  buffer_.clear();
109  avg_logprob_this_phase_ /= static_cast<BaseFloat>(nnet_ensemble_.size());
110  bool first_time = false;
111  BeginNewPhase(first_time);
112  }
113 }
114 
115 void NnetEnsembleTrainer::BeginNewPhase(bool first_time) {
116  if (!first_time)
117  KALDI_LOG << "Averaged cross-entropy between the supervision labels and the output is "
119  << count_this_phase_ << " frames, during this phase";
121  count_this_phase_ = 0.0;
123  num_phases_++;
124 }
125 
126 
128  if (!buffer_.empty()) {
129  KALDI_LOG << "Doing partial minibatch of size "
130  << buffer_.size();
132  if (minibatches_seen_this_phase_ != 0) {
133  bool first_time = false;
134  BeginNewPhase(first_time);
135  }
136  }
137 }
138 
139 
140 } // namespace nnet2
141 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
Definition: nnet-example.h:36
static Int32Pair MakePair(int32 first, int32 second)
std::vector< NnetExample > buffer_
kaldi::int32 int32
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
NnetEnsembleTrainer(const NnetEnsembleTrainerConfig &config, std::vector< Nnet *> nnet_ensemble)
void InvertElements()
invert the matrix by elements.
Definition: cu-matrix.cc:932
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
Definition: cu-matrix.cc:667
void TrainOnExample(const NnetExample &value)
TrainOnExample will take the example and add it to a buffer; if we&#39;ve reached the minibatch size it w...
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
std::vector< NnetUpdater * > updater_ensemble_
int32_cuda second
Definition: cu-matrixdim.h:80
#define KALDI_LOG
Definition: kaldi-error.h:153
NnetEnsembleTrainerConfig config_
int32_cuda first
Definition: cu-matrixdim.h:79