rescale-nnet.h
Go to the documentation of this file.
1 // nnet2/rescale-nnet.h
2 
3 // Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #ifndef KALDI_NNET2_RESCALE_NNET_H_
21 #define KALDI_NNET2_RESCALE_NNET_H_
22 
23 #include "nnet2/nnet-update.h"
24 #include "nnet2/nnet-compute.h"
25 #include "itf/options-itf.h"
26 
27 // Neural net rescaling is a rescaling of the parameters of the various layers
28 // of a neural net, done so as to match certain specified statistics on the
29 // average derivative of the sigmoid, measured on sample data. This relates to
30 // how "saturated" the sigmoid is.
31 
32 namespace kaldi {
33 namespace nnet2 {
34 
35 
40 
41  // These are relatively unimportant; for now they have no
42  // command line options.
45  BaseFloat max_change; // maximum change on any one iteration (to
46  // ensure stability).
47  BaseFloat min_change; // minimum change on any one iteration (controls
48  // termination
49 
50  NnetRescaleConfig(): target_avg_deriv(0.2),
51  target_first_layer_avg_deriv(0.3),
52  target_last_layer_avg_deriv(0.1),
53  num_iters(10),
54  delta(0.01),
55  max_change(0.2), min_change(1.0e-05) { }
56 
57  void Register(OptionsItf *opts) {
58  opts->Register("target-avg-deriv", &target_avg_deriv, "Target average derivative "
59  "for hidden layers that are the not the first or last hidden layer "
60  "(as fraction of maximum derivative of the nonlinearity)");
61  opts->Register("target-first-layer-avg-deriv", &target_first_layer_avg_deriv,
62  "Target average derivative for the first hidden layer"
63  "(as fraction of maximum derivative of the nonlinearity)");
64  opts->Register("target-last-layer-avg-deriv", &target_last_layer_avg_deriv,
65  "Target average derivative for the last hidden layer, if "
66  "#hid-layers > 1"
67  "(as fraction of maximum derivative of the nonlinearity)");
68  }
69 };
70 
71 void RescaleNnet(const NnetRescaleConfig &rescale_config,
72  const std::vector<NnetExample> &examples,
73  Nnet *nnet);
74 
75 
76 
77 } // namespace nnet2
78 } // namespace kaldi
79 
80 #endif
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
void RescaleNnet(const NnetRescaleConfig &rescale_config, const std::vector< NnetExample > &examples, Nnet *nnet)
void Register(OptionsItf *opts)
Definition: rescale-nnet.h:57
This header provides functionality for sample-by-sample stochastic gradient descent and gradient comp...