rescale-nnet.cc
Go to the documentation of this file.
1 // nnet2/rescale-nnet.cc
2 
3 // Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "nnet2/rescale-nnet.h"
21 
22 namespace kaldi {
23 namespace nnet2 {
24 
25 
26 class NnetRescaler {
27  public:
29  const std::vector<NnetExample> &examples,
30  Nnet *nnet):
31  config_(config), examples_(examples), nnet_(nnet) {}
32 
33  void Rescale();
34 
35  private:
37  void FormatInput(const std::vector<NnetExample> &data,
38  CuMatrix<BaseFloat> *input);
39  void RescaleComponent(int32 c, int32 num_chunks,
40  CuMatrixBase<BaseFloat> *cur_data_in,
41  CuMatrix<BaseFloat> *next_data);
42 
44 
46 
48  const std::vector<NnetExample> &examples_;
50  std::vector <ChunkInfo> chunk_info_out_;
51  std::set<int32> relevant_indexes_; // values of c with AffineComponent followed
52  // by (at c+1) NonlinearComponent that is not SoftmaxComponent.
53 };
54 
55 
56 void NnetRescaler::FormatInput(const std::vector<NnetExample> &data,
57  CuMatrix<BaseFloat> *input) {
58  KALDI_ASSERT(data.size() > 0);
59  int32 num_splice = nnet_->LeftContext() + 1 + nnet_->RightContext();
60  KALDI_ASSERT(data[0].input_frames.NumRows() == num_splice);
61 
62  int32 feat_dim = data[0].input_frames.NumCols(),
63  spk_dim = data[0].spk_info.Dim(),
64  tot_dim = feat_dim + spk_dim; // we append these at the neural net
65  // input... note, spk_dim might be 0.
66  KALDI_ASSERT(tot_dim == nnet_->InputDim());
67  int32 num_chunks = data.size();
68 
69  input->Resize(num_splice * num_chunks,
70  tot_dim);
71  for (int32 chunk = 0; chunk < num_chunks; chunk++) {
72  CuSubMatrix<BaseFloat> dest(*input,
73  chunk * num_splice, num_splice,
74  0, feat_dim);
75  Matrix<BaseFloat> src(data[chunk].input_frames);
76  dest.CopyFromMat(src);
77  if (spk_dim != 0) {
78  CuSubMatrix<BaseFloat> spk_dest(*input,
79  chunk * num_splice, num_splice,
80  feat_dim, spk_dim);
81  spk_dest.CopyRowsFromVec(data[chunk].spk_info);
82  }
83  }
84  // TODO : filter out the unnecessary rows from the input
85  nnet_->ComputeChunkInfo(num_splice, num_chunks, &chunk_info_out_);
86 
87 }
88 
90  for (int32 c = 0; c + 1 < nnet_->NumComponents(); c++)
91  if (dynamic_cast<AffineComponent*>(&nnet_->GetComponent(c)) != NULL &&
92  (dynamic_cast<NonlinearComponent*>(&nnet_->GetComponent(c+1)) != NULL &&
93  dynamic_cast<SoftmaxComponent*>(&nnet_->GetComponent(c+1)) == NULL))
94  relevant_indexes_.insert(c);
95 }
96 
97 
99  KALDI_ASSERT(relevant_indexes_.count(c) == 1);
100  BaseFloat factor;
101  if (dynamic_cast<SigmoidComponent*>(&(nnet_->GetComponent(c + 1))) != NULL)
102  factor = 0.25;
103  else if (dynamic_cast<TanhComponent*>(&(nnet_->GetComponent(c + 1))) != NULL)
104  factor = 1.0;
105  else
106  KALDI_ERR << "This type of nonlinear component is not handled: index " << c;
107 
108  int32 last_c = *std::max_element(relevant_indexes_.begin(), relevant_indexes_.end()),
109  first_c = *std::min_element(relevant_indexes_.begin(), relevant_indexes_.end());
110  if (c == first_c)
111  return factor * config_.target_first_layer_avg_deriv;
112  else if (c == last_c)
113  return factor * config_.target_last_layer_avg_deriv;
114  else
115  return factor * config_.target_avg_deriv;
116 }
117 
118 // Here, c is the index of the affine component, and
119 // c + 1 is the index of the nonlinear component; *cur_data is the
120 // output of the affine component.
122  int32 c,
123  int32 num_chunks,
124  CuMatrixBase<BaseFloat> *cur_data_in,
125  CuMatrix<BaseFloat> *next_data) {
126  int32 rows = cur_data_in->NumRows(), cols = cur_data_in->NumCols();
127  // Only handle sigmoid or tanh here.
128  if (dynamic_cast<SigmoidComponent*>(&(nnet_->GetComponent(c + 1))) == NULL &&
129  dynamic_cast<TanhComponent*>(&(nnet_->GetComponent(c + 1))) == NULL)
130  KALDI_ERR << "This type of nonlinear component is not handled: index " << c;
131  KALDI_ASSERT(chunk_info_out_[0].NumChunks() == num_chunks); //TODO verify how this component can be used
132  // rewrite the
133  // chunk_info_out_
134  // computation
135  // the nonlinear component:
136  NonlinearComponent &nc =
137  *(dynamic_cast<NonlinearComponent*>(&(nnet_->GetComponent(c + 1))));
138  ChunkInfo in_info, out_info;
139  in_info = chunk_info_out_[c+1];
140  out_info = chunk_info_out_[c+2];
141 
142  BaseFloat orig_avg_deriv, target_avg_deriv = GetTargetAvgDeriv(c);
143  BaseFloat cur_scaling = 1.0; // current rescaling factor (on input).
144  int32 num_iters = 10;
145 
146  CuMatrix<BaseFloat> cur_data(*cur_data_in),
147  ones(rows, cols), in_deriv(rows, cols);
148 
149  ones.Set(1.0);
150  nc.Propagate(in_info, out_info, cur_data, next_data);
151  nc.Backprop(in_info, out_info, cur_data, *next_data, ones, NULL, &in_deriv);
152  BaseFloat cur_avg_deriv;
153  cur_avg_deriv = in_deriv.Sum() / (rows * cols);
154  orig_avg_deriv = cur_avg_deriv;
155  for (int32 iter = 0; iter < num_iters; iter++) {
156  // We already have "cur_avg_deriv"; perturb the scale and compute
157  // the next avg_deriv, so we can see how it changes with the scale.
158  cur_data.CopyFromMat(*cur_data_in);
159  cur_data.Scale(cur_scaling + config_.delta);
160  nc.Propagate(in_info, out_info, cur_data, next_data);
161  nc.Backprop(in_info, out_info, cur_data, *next_data, ones, NULL, &in_deriv);
162  BaseFloat next_avg_deriv = in_deriv.Sum() / (rows * cols);
163  KALDI_ASSERT(next_avg_deriv < cur_avg_deriv);
164  // "gradient" is how avg_deriv changes as we change the scale.
165  // should be negative.
166  BaseFloat gradient = (next_avg_deriv - cur_avg_deriv) / config_.delta;
167  KALDI_ASSERT(gradient < 0.0);
168  BaseFloat proposed_change = (target_avg_deriv - cur_avg_deriv) / gradient;
169  KALDI_VLOG(2) << "cur_avg_deriv = " << cur_avg_deriv << ", target_avg_deriv = "
170  << target_avg_deriv << ", gradient = " << gradient
171  << ", proposed_change " << proposed_change;
172  // Limit size of proposed change in "cur_scaling", to ensure stability.
173  if (fabs(proposed_change / cur_scaling) > config_.max_change)
174  proposed_change = cur_scaling * config_.max_change *
175  (proposed_change > 0.0 ? 1.0 : -1.0);
176  cur_scaling += proposed_change;
177 
178  cur_data.CopyFromMat(*cur_data_in);
179  cur_data.Scale(cur_scaling);
180  nc.Propagate(in_info, out_info, cur_data, next_data);
181  nc.Backprop(in_info, out_info, cur_data, *next_data, ones, NULL, &in_deriv);
182  cur_avg_deriv = in_deriv.Sum() / (rows * cols);
183  if (fabs(proposed_change) < config_.min_change) break; // Terminate the
184  // optimization
185  }
186  UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(
187  &nnet_->GetComponent(c));
188  KALDI_ASSERT(uc != NULL);
189  uc->Scale(cur_scaling); // scale the parameters of the previous
190  // AffineComponent.
191 
192  KALDI_LOG << "For component " << c << ", scaling parameters by "
193  << cur_scaling << "; average "
194  << "derivative changed from " << orig_avg_deriv << " to "
195  << cur_avg_deriv << "; target was " << target_avg_deriv;
196 }
197 
198 
199 
201  ComputeRelevantIndexes(); // set up relevant_indexes_.
202  CuMatrix<BaseFloat> cur_data, next_data;
203  FormatInput(examples_, &cur_data);
204  int32 num_chunks = examples_.size();
205  for (int32 c = 0; c < nnet_->NumComponents(); c++) {
206  Component &component = nnet_->GetComponent(c);
207  if (relevant_indexes_.count(c - 1) == 1) {
208  // the following function call also appropriately sets "next_data"
209  // after doing the rescaling
210  RescaleComponent(c - 1, num_chunks, &cur_data, &next_data);
211  } else {
212  component.Propagate(chunk_info_out_[c], chunk_info_out_[c+1], cur_data, &next_data);
213  }
214  cur_data.Swap(&next_data);
215  }
216 }
217 
218 void RescaleNnet(const NnetRescaleConfig &rescale_config,
219  const std::vector<NnetExample> &examples,
220  Nnet *nnet) {
221  NnetRescaler rescaler(rescale_config, examples, nnet);
222  rescaler.Rescale();
223 }
224 
225 
226 } // namespace nnet2
227 } // namespace kaldi
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
const Component & GetComponent(int32 c) const
Definition: nnet-nnet.cc:141
int32 LeftContext() const
Returns the left-context summed over all the Components...
Definition: nnet-nnet.cc:42
This kind of Component is a base-class for things like sigmoid and softmax.
const std::vector< NnetExample > & examples_
Definition: rescale-nnet.cc:48
void RescaleComponent(int32 c, int32 num_chunks, CuMatrixBase< BaseFloat > *cur_data_in, CuMatrix< BaseFloat > *next_data)
virtual void Scale(BaseFloat scale)=0
This new virtual function scales the parameters by this amount.
Abstract class, basic element of the network, it is a box with defined inputs, outputs, and tranformation functions interface.
void FormatInput(const std::vector< NnetExample > &data, CuMatrix< BaseFloat > *input)
takes the input and formats as a single matrix, in forward_data_[0].
Definition: rescale-nnet.cc:56
Real Sum() const
Definition: cu-matrix.cc:3012
kaldi::int32 int32
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
Definition: cu-matrix.cc:2301
void RescaleNnet(const NnetRescaleConfig &rescale_config, const std::vector< NnetExample > &examples, Nnet *nnet)
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
Definition: nnet-nnet.h:69
BaseFloat GetTargetAvgDeriv(int32 c)
Definition: rescale-nnet.cc:98
void Swap(Matrix< Real > *mat)
Definition: cu-matrix.cc:123
NnetRescaler(const NnetRescaleConfig &config, const std::vector< NnetExample > &examples, Nnet *nnet)
Definition: rescale-nnet.cc:28
int32 RightContext() const
Returns the right-context summed over all the Components...
Definition: nnet-nnet.cc:56
const NnetRescaleConfig & config_
Definition: rescale-nnet.cc:47
ChunkInfo is a class whose purpose is to describe the structure of matrices holding features...
std::vector< ChunkInfo > chunk_info_out_
Definition: rescale-nnet.cc:50
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const =0
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
#define KALDI_ERR
Definition: kaldi-error.h:147
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const =0
Perform forward pass propagation Input->Output.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
#define KALDI_LOG
Definition: kaldi-error.h:153
void ComputeChunkInfo(int32 input_chunk_size, int32 num_chunks, std::vector< ChunkInfo > *chunk_info_out) const
Uses the output of the Context() functions of the network, to compute a vector of size NumComponents(...
Definition: nnet-nnet.cc:65
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50
std::set< int32 > relevant_indexes_
Definition: rescale-nnet.cc:51
Class UpdatableComponent is a Component which has trainable parameters and contains some global param...
int32 InputDim() const
Dimension of the input features, e.g.
Definition: nnet-nnet.cc:36