nnet-derivative-test.cc
Go to the documentation of this file.
1 // nnet3/nnet-derivative-test.cc
2 
3 // Copyright 2015 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "nnet3/nnet-nnet.h"
21 #include "nnet3/nnet-compile.h"
22 #include "nnet3/nnet-analyze.h"
23 #include "nnet3/nnet-test-utils.h"
24 #include "nnet3/nnet-optimize.h"
25 #include "nnet3/nnet-compute.h"
26 
27 namespace kaldi {
28 namespace nnet3 {
29 
30 
31 void ComputeMinAndMaxTimes(const std::vector<Index> &indexes,
32  int32 *min_t,
33  int32 *max_t) {
34  KALDI_ASSERT(!indexes.empty());
35  *min_t = indexes[0].t;
36  *max_t = *min_t;
37  for (int32 n = 1; n < static_cast<int32>(indexes.size()); n++) {
38  *min_t = std::min(*min_t, indexes[n].t);
39  *max_t = std::max(*max_t, indexes[n].t);
40  }
41 }
42 
43 
44 // This function is called if you want to set min_deriv_time and max_deriv_time.
45 // It works out some meaningful values to set, based on the config.
47  NnetOptimizeOptions *opt_config) {
48  int32 min_t, max_t;
49  KALDI_ASSERT(request.inputs[0].name == "input");
50  const std::vector<Index> &input_indexes = request.inputs[0].indexes;
51  ComputeMinAndMaxTimes(input_indexes, &min_t, &max_t);
52 
53  int32 orig_min_t = min_t, orig_max_t = max_t;
54  int t_length = max_t + 1 - min_t;
55  KALDI_ASSERT(t_length > 0);
56  if (t_length == 1)
57  return;
58  if (RandInt(0, 2) == 0) {
59  // remove as much as 4 frames from the left (but don't remove everything).
60  min_t += std::min(4, RandInt(0, t_length - 1));
61  opt_config->min_deriv_time = min_t;
62  t_length = max_t + 1 - min_t;
63  KALDI_ASSERT(t_length > 0);
64  }
65  if (RandInt(0, 2) == 0) {
66  max_t -= std::min(4, RandInt(0, t_length - 1));
67  opt_config->max_deriv_time = max_t;
68  t_length = max_t + 1 - min_t;
69  KALDI_ASSERT(t_length > 0);
70  }
71  if (RandInt(0, 4) == 0) {
72  // ensure that all derivs will be pruned away;
73  // this tests more code.
74  min_t = orig_min_t - 10;
75  max_t = min_t + 1;
76  }
77 
78  int32 output_min_t, output_max_t;
79  KALDI_ASSERT(request.outputs[0].name == "output");
80  ComputeMinAndMaxTimes(request.outputs[0].indexes,
81  &output_min_t, &output_max_t);
82 
83  KALDI_LOG << "ComputationRequest has output (min,max) = (" << output_min_t
84  << ',' << output_max_t << "), input (min,max) = (" << orig_min_t
85  << ',' << orig_max_t << "), limiting deriv times to ("
86  << opt_config->min_deriv_time << ','
87  << opt_config->max_deriv_time << ')';
88 }
89 
90 // This test makes sure that the model-derivatives are correct.
92  int32 N = 20;
93  for (int32 n = 0; n < N; n++) {
94  struct NnetGenerationOptions gen_config;
95  //gen_config.allow_nonlinearity = false;
96  //gen_config.allow_recursion = false;
97  //gen_config.allow_final_nonlinearity = true;
98 
99  bool limit_deriv_times = (RandInt(0, 2) == 0);
100 
101  std::vector<std::string> configs;
102  GenerateConfigSequence(gen_config, &configs);
103  Nnet nnet;
104  for (size_t j = 0; j < configs.size(); j++) {
105  KALDI_LOG << "Input config[" << j << "] is: " << configs[j];
106  std::istringstream is(configs[j]);
107  nnet.ReadConfig(is);
108  }
109 
110  ComputationRequest request;
111  std::vector<Matrix<BaseFloat> > inputs;
112  ComputeExampleComputationRequestSimple(nnet, &request, &inputs);
113 
114  // make sure that a model-derivative is requested, and an output-derivative
115  // is supplied.
116  request.need_model_derivative = true;
117  request.outputs[0].has_deriv = true;
118  // whether input-derivatives are required or not does not matter,
119  // so leave it as it is in that regard.
120 
121  NnetOptimizeOptions optimize_opts;
122  CachingOptimizingCompilerOptions compiler_opts;
123  if (limit_deriv_times) {
124  SetDerivTimesOptions(request, &optimize_opts);
125  }
126 
127  CachingOptimizingCompiler compiler(nnet, optimize_opts,
128  compiler_opts);
129 
130  const NnetComputation &computation = *(compiler.Compile(request));
131 
132  {
133  std::ostringstream os;
134  computation.Print(os, nnet);
135  KALDI_LOG << "Optimized computation is: " << os.str();
136  }
137 
138  Nnet nnet_deriv(nnet);
139  ScaleNnet(0.0, &nnet_deriv);
140  SetNnetAsGradient(&nnet_deriv); // forces "simple" update and unit
141  // learning rate.
142 
143  int32 num_directions = 4; // must be >= 1. Best if it's >1, will reduce
144  // the probability of random failures.
145 
146  // the order of these vectors is:
147  // [ un-perturbed, perturbed-1, perturbed-2, perturbed-3 ].
148  std::vector<BaseFloat> measured_objf(num_directions + 1, 0.0),
149  predicted_objf_change(num_directions + 1, 0.0);
150  BaseFloat delta = 5.0e-04;
151 
152  // output_deriv is the derivative of the objective function w.r.t. the
153  // (single) output. We make the objf a linear function of the output and
154  // just set the output_deriv to be a random matrix, which defines the
155  // objective function.
156  CuMatrix<BaseFloat> output_deriv;
157  output_deriv.Resize(request.outputs[0].indexes.size(),
158  nnet.OutputDim("output"));
159  output_deriv.SetRandn();
160 
161 
162  NnetComputeOptions compute_opts;
163  if (RandInt(0, 1) == 0)
164  compute_opts.debug = true;
165 
166  // pass 0 is the forward pass with the un-perturbed model.
167  // Other passes are with various differently-perturbed versions of
168  // the model.
169  for (int32 pass = 0; pass <= num_directions; pass++) {
170  Nnet nnet_copy(nnet);
171  if (pass > 0)
172  PerturbParams(delta, &nnet_copy);
173 
174  NnetComputer computer(compute_opts,
175  computation,
176  nnet_copy,
177  (pass == 0 ? &nnet_deriv : &nnet_copy));
178 
179 
180  // provide the input to the computation.
181  for (size_t i = 0; i < request.inputs.size(); i++) {
182  CuMatrix<BaseFloat> temp(inputs[i]);
183  computer.AcceptInput(request.inputs[i].name, &temp);
184  }
185 
186  KALDI_LOG << "Running forward computation";
187  computer.Run();
188 
189  const CuMatrixBase<BaseFloat> &output(computer.GetOutput("output"));
190  KALDI_LOG << "Output sum for pass " << pass << " is " << output.Sum();
191  BaseFloat objf = TraceMatMat(output, output_deriv, kTrans);
192  measured_objf[pass] = objf;
193 
194  if (pass == 0) {
195  // we need to do the backward computation (to get the model derivative)
196  CuMatrix<BaseFloat> temp(output_deriv);
197  computer.AcceptInput("output", &temp);
198  KALDI_LOG << "Running backward computation";
199  computer.Run();
200  } else {
201  // work out the predicted objf-change as dot-product of deriv and
202  // parameter-change. The expression below can be interpreted as
203  // DotProduct(nnet_copy - nnet, nnet_deriv).
204  predicted_objf_change[pass] = DotProduct(nnet_copy, nnet_deriv) -
205  DotProduct(nnet, nnet_deriv);
206  }
207  }
208 
209  Vector<BaseFloat> predicted_objf_change_vec(num_directions),
210  measured_objf_change_vec(num_directions);
211  for (int32 d = 0; d < num_directions; d++) {
212  BaseFloat predicted_change = predicted_objf_change[d+1],
213  measured_change = measured_objf[d+1] - measured_objf[0];
214  predicted_objf_change_vec(d) = predicted_change;
215  measured_objf_change_vec(d) = measured_change;
216  }
217  KALDI_LOG << "Vector of predicted objf-change is: "
218  << predicted_objf_change_vec;
219  KALDI_LOG << "Vector of measured objf-change is: "
220  << measured_objf_change_vec;
221  BaseFloat delta_thresh_warn = 0.05, delta_thresh_fail = 0.25;
222  if (limit_deriv_times) {
223  KALDI_LOG << "Not checking that predicted/measured changes matched "
224  << "because we limited times of derivatives.";
225  } else {
226  if (!ApproxEqual(predicted_objf_change_vec,
227  measured_objf_change_vec, delta_thresh_fail)) {
228  if (NnetIsRecurrent(nnet)) {
229  KALDI_WARN << "Predicted and measured objf-changes differ too much. "
230  << "(would normally be beyond error threshold, but this "
231  << "nnet is recurrent, so letting it pass.";
232  } else {
233  KALDI_ERR << "Predicted and measured objf-changes differ too much.";
234  }
235  }
236  if (!ApproxEqual(predicted_objf_change_vec,
237  measured_objf_change_vec, delta_thresh_warn)) {
238  KALDI_WARN << "Predicted and measured objf-changes differ quite a lot.";
239  }
240  }
241  }
242 }
243 
244 
245 // This test makes sure that the input-derivatives are correct.
247  int32 N = 20;
248  for (int32 n = 0; n < N; n++) {
249  struct NnetGenerationOptions gen_config;
250  //gen_config.allow_nonlinearity = false;
251  //gen_config.allow_recursion = false;
252  //gen_config.allow_final_nonlinearity = true;
253  bool allow_optimization = true;
254 
255  std::vector<std::string> configs;
256  GenerateConfigSequence(gen_config, &configs);
257  Nnet nnet;
258  for (size_t j = 0; j < configs.size(); j++) {
259  KALDI_LOG << "Input config[" << j << "] is: " << configs[j];
260  std::istringstream is(configs[j]);
261  nnet.ReadConfig(is);
262  }
263 
264  ComputationRequest request;
265  std::vector<Matrix<BaseFloat> > inputs;
266  ComputeExampleComputationRequestSimple(nnet, &request, &inputs);
267 
268  // make sure that all inputs and outputs have derivatives requested/provided,
269  // and that the model-update (need_model_derivative) is not requested.
270  request.need_model_derivative = false;
271  for (int32 i = 0; i < request.inputs.size(); i++)
272  request.inputs[i].has_deriv = true;
273  request.outputs[0].has_deriv = true;
274 
275  NnetComputation computation;
276  Compiler compiler(request, nnet);
277 
278  CompilerOptions opts;
279  compiler.CreateComputation(opts, &computation);
280  {
281  std::ostringstream os;
282  computation.Print(os, nnet);
283  KALDI_LOG << "Generated computation is: " << os.str();
284  }
285  CheckComputationOptions check_config;
286  // we can do the rewrite check since it's before optimization.
287  check_config.check_rewrite = true;
288  ComputationChecker checker(check_config, nnet, computation);
289  checker.Check();
290 
291  if (RandInt(0, 3) != 0 && allow_optimization) {
292  NnetOptimizeOptions opt_config;
293  // opt_config.initialize_undefined = false; // temp
294  Optimize(opt_config, nnet,
295  MaxOutputTimeInRequest(request),
296  &computation);
297  std::ostringstream os;
298  computation.Print(os, nnet);
299  KALDI_LOG << "Optimized computation is: " << os.str();
300  }
301 
302  NnetComputeOptions compute_opts;
303  if (RandInt(0, 1) == 0)
304  compute_opts.debug = true;
305  computation.ComputeCudaIndexes();
306 
307 
308  int32 num_directions = 3; // must be >= 1. Best if it's >1, will reduce
309  // the probability of random failures.
310 
311  // the order of these vectors is:
312  // [ un-perturbed, perturbed-1, perturbed-2, perturbed-3, un-perturbed ].
313  // we compute un-perturbed twice to double-check the model did not change.
314  std::vector<BaseFloat> measured_objf(num_directions + 2, 0.0),
315  predicted_objf_change(num_directions + 2, 0.0);
316  BaseFloat delta = 1.0e-03;
317 
318  // output_deriv is the derivative of the objective function w.r.t. the
319  // (single) output. We make the objf a linear function of the output and
320  // just set the output_deriv to be a random matrix, which defines the
321  // objective function.
322  CuMatrix<BaseFloat> output_deriv;
323  output_deriv.Resize(request.outputs[0].indexes.size(),
324  nnet.OutputDim("output"));
325  output_deriv.SetRandn();
326 
327  std::vector<CuMatrix<BaseFloat> > delta_inputs(inputs.size());
328  std::vector<CuMatrix<BaseFloat> > input_derivs(inputs.size());
329 
330  // pass 0 is the forward pass with the un-perturbed features; so is
331  // pass num_directions + 1.
332  // Other passes are with various differently-perturbed versions of
333  // the features.
334  for (int32 pass = 0; pass <= num_directions + 1; pass++) {
335  // the only reason we might need to provide the &nnet parameter is if the
336  // StoreStats() operation had been requested. We made sure no model update
337  // is being performed.
338  NnetComputer computer(compute_opts,
339  computation,
340  nnet,
341  &nnet);
342 
343 
344  // provide the input to the computations.
345  for (size_t i = 0; i < request.inputs.size(); i++) {
346 
347  CuMatrix<BaseFloat> temp(inputs[i]);
348  if (pass > 0 && pass <= num_directions) { // Perturb the input randomly.
349  delta_inputs[i].Resize(inputs[i].NumRows(), inputs[i].NumCols());
350  delta_inputs[i].SetRandn();
351  delta_inputs[i].Scale(delta);
352  // if there are >1 inputs, sometimes set the delta for input 0 to
353  // zero. might sometimes give more accurate test of error in iVector
354  // derivative computation.
355  if (i == 0 && request.inputs.size() > 1 && RandInt(0, 1) == 0)
356  delta_inputs[i].SetZero();
357  temp.AddMat(1.0, delta_inputs[i]);
358  predicted_objf_change[pass] += TraceMatMat(input_derivs[i],
359  delta_inputs[i], kTrans);
360  }
361  computer.AcceptInput(request.inputs[i].name, &temp);
362  }
363 
364  KALDI_LOG << "Running forward computation";
365  computer.Run();
366 
367  const CuMatrixBase<BaseFloat> &output(computer.GetOutput("output"));
368  KALDI_LOG << "Output sum for pass " << pass << " is " << output.Sum();
369  BaseFloat objf = TraceMatMat(output, output_deriv, kTrans);
370  measured_objf[pass] = objf;
371 
372  if (pass == 0) {
373  // We need to compute the input derivatives.
374  CuMatrix<BaseFloat> temp(output_deriv);
375  computer.AcceptInput("output", &temp);
376  KALDI_LOG << "Running backward computation";
377  computer.Run();
378  for (size_t i = 0; i < request.inputs.size(); i++) {
379  input_derivs[i] = computer.GetOutput(request.inputs[i].name);
380  KALDI_LOG << "Input-deriv norm for '" << request.inputs[i].name
381  << "' is " << input_derivs[i].FrobeniusNorm();
382  }
383  }
384  }
385  KALDI_ASSERT(ApproxEqual(measured_objf[0],
386  measured_objf[num_directions + 1]));
387 
388  Vector<BaseFloat> predicted_objf_change_vec(num_directions),
389  measured_objf_change_vec(num_directions);
390  for (int32 d = 0; d < num_directions; d++) {
391  BaseFloat predicted_change = predicted_objf_change[d+1],
392  measured_change = measured_objf[d+1] - measured_objf[0];
393  predicted_objf_change_vec(d) = predicted_change;
394  measured_objf_change_vec(d) = measured_change;
395  }
396  KALDI_LOG << "Vector of predicted objf-change is: "
397  << predicted_objf_change_vec;
398  KALDI_LOG << "Vector of measured objf-change is: "
399  << measured_objf_change_vec;
400  BaseFloat delta_thresh_warn = 0.05, delta_thresh_fail = 0.25;
401  if (!ApproxEqual(predicted_objf_change_vec,
402  measured_objf_change_vec, delta_thresh_fail)) {
403  if (NnetIsRecurrent(nnet)) {
404  KALDI_WARN << "Predicted and measured objf-changes differ too much. "
405  << "(would normally be beyond error threshold, but this "
406  << "nnet is recurrent, so letting it pass.";
407  } else {
408  KALDI_ERR << "Predicted and measured objf-changes differ too much.";
409  }
410  } else if (!ApproxEqual(predicted_objf_change_vec,
411  measured_objf_change_vec, delta_thresh_warn)) {
412  KALDI_WARN << "Predicted and measured objf-changes differ quite a lot";
413  }
414  }
415 }
416 
417 
418 } // namespace nnet3
419 } // namespace kaldi
420 
421 int main() {
422  using namespace kaldi;
423  using namespace kaldi::nnet3;
424  SetVerboseLevel(3);
425 #if HAVE_CUDA == 1
426  kaldi::int32 loop = 0;
427  for (loop = 0; loop < 2; loop++) {
428  CuDevice::Instantiate().SetDebugStrideMode(true);
429  if (loop == 0)
430  CuDevice::Instantiate().SelectGpuId("no");
431  else
432  CuDevice::Instantiate().SelectGpuId("yes");
433 #endif
436 #if HAVE_CUDA == 1
437  } // No for loop if 'HAVE_CUDA != 1',
438  CuDevice::Instantiate().PrintProfile();
439 #endif
440  KALDI_LOG << "Nnet derivative tests succeeded.";
441 
442  return 0;
443 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void ScaleNnet(BaseFloat scale, Nnet *nnet)
Scales the nnet parameters and stats by this scale.
Definition: nnet-utils.cc:312
void ReadConfig(std::istream &config_file)
Definition: nnet-nnet.cc:189
bool need_model_derivative
if need_model_derivative is true, then we&#39;ll be doing either model training or model-derivative compu...
This class enables you to do the compilation and optimization in one call, and also ensures that if t...
This file contains various routines that are useful in test code.
void Print(std::ostream &os, const Nnet &nnet) const
This file contains utilities for analyzing and checking computations, which are used in the optimizat...
kaldi::int32 int32
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
Definition: cu-matrix.cc:954
std::vector< IoSpecification > inputs
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void UnitTestNnetModelDerivatives()
void SetDerivTimesOptions(const ComputationRequest &request, NnetOptimizeOptions *opt_config)
int32 OutputDim(const std::string &output_name) const
Definition: nnet-nnet.cc:677
void SetNnetAsGradient(Nnet *nnet)
Sets nnet as gradient by Setting is_gradient_ to true and learning_rate_ to 1 for each UpdatableCompo...
Definition: nnet-utils.cc:292
void SetVerboseLevel(int32 i)
This should be rarely used, except by programs using Kaldi as library; command-line programs set the ...
Definition: kaldi-error.h:64
void AcceptInput(const std::string &node_name, CuMatrix< BaseFloat > *input)
e.g.
void ComputeExampleComputationRequestSimple(const Nnet &nnet, ComputationRequest *request, std::vector< Matrix< BaseFloat > > *inputs)
This function computes an example computation request, for testing purposes.
const CuMatrixBase< BaseFloat > & GetOutput(const std::string &node_name)
int32 MaxOutputTimeInRequest(const ComputationRequest &request)
struct rnnlm::@11::@12 n
#define KALDI_ERR
Definition: kaldi-error.h:147
int main()
#define KALDI_WARN
Definition: kaldi-error.h:150
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
BaseFloat DotProduct(const Nnet &nnet1, const Nnet &nnet2)
Returns dot product between two networks of the same structure (calls the DotProduct functions of the...
Definition: nnet-utils.cc:250
void UnitTestNnetInputDerivatives()
void Optimize(const NnetOptimizeOptions &config, const Nnet &nnet, int32 max_output_time_in_request, NnetComputation *computation)
This is the top-level function for optimizing a computation.
void ComputeMinAndMaxTimes(const std::vector< Index > &indexes, int32 *min_t, int32 *max_t)
std::shared_ptr< const NnetComputation > Compile(const ComputationRequest &request)
Does the compilation and returns a const pointer to the result, which is owned by this class...
Matrix for CUDA computing.
Definition: matrix-common.h:69
A class representing a vector.
Definition: kaldi-vector.h:406
class NnetComputer is responsible for executing the computation described in the "computation" object...
Definition: nnet-compute.h:59
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
std::vector< IoSpecification > outputs
This class creates an initial version of the NnetComputation, without any optimization or sharing of ...
Definition: nnet-compile.h:44
void PerturbParams(BaseFloat stddev, Nnet *nnet)
Calls PerturbParams (with the given stddev) on all updatable components of the nnet.
Definition: nnet-utils.cc:199
bool NnetIsRecurrent(const Nnet &nnet)
Returns true if &#39;nnet&#39; has some kind of recurrency.
Definition: nnet-utils.cc:1441
void GenerateConfigSequence(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
Generates a sequence of at least one config files, output as strings, where the first in the sequence...
#define KALDI_LOG
Definition: kaldi-error.h:153
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95
void Run()
This does either the forward or backward computation, depending when it is called (in a typical compu...