combine-nnet.cc
Go to the documentation of this file.
1 // nnet2/combine-nnet.cc
2 
3 // Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "nnet2/combine-nnet.h"
21 
22 namespace kaldi {
23 namespace nnet2 {
24 
25 
26 // Here, "scale_params" is in blocks, with the first block
27 // corresponding to nnets[0].
28 static void CombineNnets(const Vector<BaseFloat> &scale_params,
29  const std::vector<Nnet> &nnets,
30  Nnet *dest) {
31  int32 num_nnets = nnets.size();
32  KALDI_ASSERT(num_nnets >= 1);
33  int32 num_uc = nnets[0].NumUpdatableComponents();
34  KALDI_ASSERT(nnets[0].NumUpdatableComponents() >= 1);
35 
36 
37  *dest = nnets[0];
38  SubVector<BaseFloat> scale_params0(scale_params, 0, num_uc);
39  dest->ScaleComponents(scale_params0);
40  for (int32 n = 1; n < num_nnets; n++) {
41  SubVector<BaseFloat> scale_params_n(scale_params, n * num_uc, num_uc);
42  dest->AddNnet(scale_params_n, nnets[n]);
43  }
44 }
45 
50  const std::vector<NnetExample> &validation_set,
51  const std::vector<Nnet> &nnets) {
52  int32 minibatch_size = 1024;
53  int32 num_nnets = static_cast<int32>(nnets.size());
54  KALDI_ASSERT(!nnets.empty());
55  BaseFloat tot_frames = validation_set.size();
56  int32 best_n = -1;
57  BaseFloat best_objf = -std::numeric_limits<BaseFloat>::infinity();
58  Vector<BaseFloat> objfs(nnets.size());
59  for (int32 n = 0; n < num_nnets; n++) {
60  BaseFloat objf = ComputeNnetObjf(nnets[n], validation_set,
61  minibatch_size) / tot_frames;
62 
63  if (n == 0 || objf > best_objf) {
64  best_objf = objf;
65  best_n = n;
66  }
67  objfs(n) = objf;
68  }
69  KALDI_LOG << "Objective functions for the source neural nets are " << objfs;
70 
71  int32 num_uc = nnets[0].NumUpdatableComponents();
72 
73  { // Now try a version where all the neural nets have the same weight.
74  Vector<BaseFloat> scale_params(num_uc * num_nnets);
75  scale_params.Set(1.0 / num_nnets);
76  Nnet average_nnet;
77  CombineNnets(scale_params, nnets, &average_nnet);
78  BaseFloat objf = ComputeNnetObjf(average_nnet, validation_set,
79  minibatch_size) / tot_frames;
80  KALDI_LOG << "Objf with all neural nets averaged is " << objf;
81  if (objf > best_objf) {
82  return num_nnets;
83  } else {
84  return best_n;
85  }
86  }
87 }
88 
89 // This function chooses from among the neural nets, the one
90 // which has the best validation set objective function.
92  const NnetCombineConfig &combine_config,
93  const std::vector<NnetExample> &validation_set,
94  const std::vector<Nnet> &nnets,
95  Vector<double> *scale_params) {
96 
97  int32 initial_model = combine_config.initial_model,
98  num_nnets = static_cast<int32>(nnets.size());
99  if (initial_model < 0 || initial_model > num_nnets)
100  initial_model = GetInitialModel(validation_set, nnets);
101 
102  KALDI_ASSERT(initial_model >= 0 && initial_model <= num_nnets);
103  int32 num_uc = nnets[0].NumUpdatableComponents();
104 
105  scale_params->Resize(num_uc * num_nnets);
106  if (initial_model < num_nnets) {
107  KALDI_LOG << "Initializing with neural net with index " << initial_model;
108  // At this point we're using the best of the individual neural nets.
109  scale_params->Set(0.0);
110 
111  // Set the block of parameters corresponding to the "best" of the
112  // source neural nets to
113  SubVector<double> best_block(*scale_params, num_uc * initial_model, num_uc);
114  best_block.Set(1.0);
115  } else { // initial_model == num_nnets
116  KALDI_LOG << "Initializing with all neural nets averaged.";
117  scale_params->Set(1.0 / num_nnets);
118  }
119 }
120 
121 
122 
123 
125  const std::vector<NnetExample> &validation_set,
126  const Vector<double> &scale_params,
127  const std::vector<Nnet> &nnets,
128  bool debug,
129  Vector<double> *gradient) {
130 
131  Vector<BaseFloat> scale_params_float(scale_params);
132 
133  Nnet nnet_combined;
134  CombineNnets(scale_params_float, nnets, &nnet_combined);
135 
136  Nnet nnet_gradient(nnet_combined);
137  bool is_gradient = true;
138  nnet_gradient.SetZero(is_gradient);
139 
140  // note: "ans" is normalized by the total weight of validation frames.
141  int32 batch_size = 1024;
142  double ans = ComputeNnetGradient(nnet_combined,
143  validation_set,
144  batch_size,
145  &nnet_gradient);
146 
147  double tot_frames = validation_set.size();
148  if (gradient != NULL) {
149  int32 i = 0; // index into scale_params.
150  for (int32 n = 0; n < static_cast<int32>(nnets.size()); n++) {
151  for (int32 j = 0; j < nnet_combined.NumComponents(); j++) {
152  const UpdatableComponent *uc =
153  dynamic_cast<const UpdatableComponent*>(&(nnets[n].GetComponent(j))),
154  *uc_gradient =
155  dynamic_cast<const UpdatableComponent*>(&(nnet_gradient.GetComponent(j)));
156  if (uc != NULL) {
157  double dotprod = uc->DotProduct(*uc_gradient) / tot_frames;
158  (*gradient)(i) = dotprod;
159  i++;
160  }
161  }
162  }
163  KALDI_ASSERT(i == scale_params.Dim());
164  }
165 
166  if (debug) {
167  KALDI_LOG << "Double-checking gradient computation";
168 
169  Vector<BaseFloat> manual_gradient(scale_params.Dim());
170  for (int32 i = 0; i < scale_params.Dim(); i++) {
171  double delta = 1.0e-04, fg = fabs((*gradient)(i));
172  if (fg < 1.0e-07) fg = 1.0e-07;
173  if (fg * delta < 1.0e-05)
174  delta = 1.0e-05 / fg;
175 
176  Vector<double> scale_params_temp(scale_params);
177  scale_params_temp(i) += delta;
178  double new_ans = ComputeObjfAndGradient(validation_set,
179  scale_params_temp,
180  nnets,
181  false,
182  NULL);
183  manual_gradient(i) = (new_ans - ans) / delta;
184  }
185  KALDI_LOG << "Manually computed gradient is " << manual_gradient;
186  KALDI_LOG << "Gradient we computed is " << *gradient;
187  }
188 
189  return ans;
190 }
191 
192 
193 void CombineNnets(const NnetCombineConfig &combine_config,
194  const std::vector<NnetExample> &validation_set,
195  const std::vector<Nnet> &nnets,
196  Nnet *nnet_out) {
197 
198  Vector<double> scale_params;
199 
200  GetInitialScaleParams(combine_config,
201  validation_set,
202  nnets,
203  &scale_params);
204 
205  int32 dim = scale_params.Dim();
206  KALDI_ASSERT(dim > 0);
207  Vector<double> gradient(dim);
208 
209  double objf, initial_objf;
210 
211  LbfgsOptions lbfgs_options;
212  lbfgs_options.minimize = false; // We're maximizing.
213  lbfgs_options.m = dim; // Store the same number of vectors as the dimension
214  // itself, so this is BFGS.
215  lbfgs_options.first_step_impr = combine_config.initial_impr;
216 
217  OptimizeLbfgs<double> lbfgs(scale_params,
218  lbfgs_options);
219 
220  for (int32 i = 0; i < combine_config.num_bfgs_iters; i++) {
221  scale_params.CopyFromVec(lbfgs.GetProposedValue());
222  objf = ComputeObjfAndGradient(validation_set,
223  scale_params,
224  nnets,
225  combine_config.test_gradient,
226  &gradient);
227 
228  KALDI_VLOG(2) << "Iteration " << i << " scale-params = " << scale_params
229  << ", objf = " << objf << ", gradient = " << gradient;
230 
231  if (i == 0) initial_objf = objf;
232 
233  lbfgs.DoStep(objf, gradient);
234  }
235 
236  scale_params.CopyFromVec(lbfgs.GetValue(&objf));
237 
238  Vector<BaseFloat> scale_params_float(scale_params);
239 
240  KALDI_LOG << "Combining nnets, validation objf per frame changed from "
241  << initial_objf << " to " << objf;
242 
243  Matrix<BaseFloat> scale_params_mat(nnets.size(),
244  nnets[0].NumUpdatableComponents());
245  scale_params_mat.CopyRowsFromVec(scale_params_float);
246  KALDI_LOG << "Final scale factors are " << scale_params_mat;
247 
248  CombineNnets(scale_params_float, nnets, nnet_out);
249 }
250 
251 
252 } // namespace nnet2
253 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
const Component & GetComponent(int32 c) const
Definition: nnet-nnet.cc:141
void DoStep(Real function_value, const VectorBase< Real > &gradient)
The user calls this function to provide the class with the function and gradient info at the point Ge...
void AddNnet(const VectorBase< BaseFloat > &scales, const Nnet &other)
For each updatatable component, adds to it the corresponding element of "other" times the appropriate...
Definition: nnet-nnet.cc:576
double ComputeNnetGradient(const Nnet &nnet, const std::vector< NnetExample > &validation_set, int32 batch_size, Nnet *gradient)
ComputeNnetGradient is mostly used to compute gradients on validation sets; it divides the example in...
Definition: nnet-update.cc:302
static void GetInitialScaleParams(const NnetCombineConfig &combine_config, const std::vector< NnetExample > &validation_set, const std::vector< Nnet > &nnets, Vector< double > *scale_params)
Definition: combine-nnet.cc:91
kaldi::int32 int32
const VectorBase< Real > & GetValue(Real *objf_value=NULL) const
This returns the value of the variable x that has the best objective function so far, and the corresponding objective function value if requested.
static int32 GetInitialModel(const std::vector< NnetExample > &validation_set, const std::vector< Nnet > &nnets)
Returns an integer saying which model to use: either 0 ...
Definition: combine-nnet.cc:49
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
double ComputeNnetObjf(const Nnet &nnet, const std::vector< NnetExample > &examples, double *tot_accuracy)
Computes objective function over a minibatch.
Definition: nnet-update.cc:258
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
Definition: nnet-nnet.h:69
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void SetZero(bool treat_as_gradient)
Definition: nnet-nnet.cc:151
virtual BaseFloat DotProduct(const UpdatableComponent &other) const =0
Here, "other" is a component of the same specific type.
struct rnnlm::@11::@12 n
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
Configuration class that controls neural net combination, where we combine a number of neural nets...
Definition: combine-nnet.h:35
void ScaleComponents(const VectorBase< BaseFloat > &scales)
Scales the parameters of each of the updatable components.
Definition: nnet-nnet.cc:421
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void Set(Real f)
Set all members of a vector to a specified value.
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
static void CombineNnets(const Vector< BaseFloat > &scale_params, const std::vector< Nnet > &nnets, Nnet *dest)
Definition: combine-nnet.cc:28
This is an implementation of L-BFGS.
Definition: optimization.h:84
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
#define KALDI_LOG
Definition: kaldi-error.h:153
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
const VectorBase< Real > & GetProposedValue() const
This returns the value at which the function wants us to compute the objective function and gradient...
Definition: optimization.h:134
int32 NumUpdatableComponents(const Nnet &dest)
Returns the number of updatable components in the nnet.
Definition: nnet-utils.cc:422
Class UpdatableComponent is a Component which has trainable parameters and contains some global param...
static BaseFloat ComputeObjfAndGradient(const std::vector< NnetExample > &validation_set, const Vector< double > &scale_params, const Nnet &orig_nnet, const Nnet &direction, Vector< double > *gradient)