mixup-nnet.cc
Go to the documentation of this file.
1 // nnet2/mixup-nnet.cc
2 
3 // Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "nnet2/mixup-nnet.h"
21 #include "gmm/model-common.h" // for GetSplitTargets()
22 #include <numeric> // for std::accumulate
23 
24 namespace kaldi {
25 namespace nnet2 {
26 
27 
37 static void GiveNnetCorrectTopology(Nnet *nnet,
38  AffineComponent **affine_component,
39  SoftmaxComponent **softmax_component,
40  SumGroupComponent **sum_group_component) {
41  int32 nc = nnet->NumComponents();
42  KALDI_ASSERT(nc > 0);
43  Component* component = &(nnet->GetComponent(nc - 1));
44  if ((*sum_group_component =
45  dynamic_cast<SumGroupComponent*>(component)) == NULL) {
46  KALDI_LOG << "Adding SumGroupComponent to neural net.";
47  int32 dim = component->OutputDim();
48  // Give it the same learning rate as the first updatable layer we have.
49  std::vector<int32> sizes(dim, 1); // a vector of all ones, of dimension "dim".
50 
51  *sum_group_component = new SumGroupComponent();
52  (*sum_group_component)->Init(sizes);
53  nnet->Append(*sum_group_component);
54  nc++;
55  }
56  component = &(nnet->GetComponent(nc - 2));
57  if ((*softmax_component = dynamic_cast<SoftmaxComponent*>(component)) == NULL)
58  KALDI_ERR << "Neural net has wrong topology: expected second-to-last "
59  << "component to be SoftmaxComponent, type is "
60  << component->Type();
61  component = &(nnet->GetComponent(nc - 3));
62  if ((*affine_component = dynamic_cast<AffineComponent*>(component)) == NULL)
63  KALDI_ERR << "Neural net has wrong topology: expected third-to-last "
64  << "component to be AffineComponent, type is "
65  << component->Type();
66 }
67 
68 
86 void MixupNnet(const NnetMixupConfig &mixup_config,
87  Nnet *nnet) {
88  AffineComponent *affine_component = NULL;
89  SoftmaxComponent *softmax_component = NULL;
90  SumGroupComponent *sum_group_component = NULL;
92  &affine_component,
93  &softmax_component,
94  &sum_group_component); // Adds a SumGroupComponent if needed.
95 
96  softmax_component->MixUp(mixup_config.num_mixtures,
97  mixup_config.power,
98  mixup_config.min_count,
99  mixup_config.perturb_stddev,
100  affine_component,
101  sum_group_component);
102  nnet->Check(); // Checks that dimensions all match up.
103 }
104 
105 
107 void SoftmaxComponent::MixUp(int32 num_mixtures,
108  BaseFloat power,
109  BaseFloat min_count,
110  BaseFloat perturb_stddev,
111  AffineComponent *ac,
112  SumGroupComponent *sc) {
113  // "counts" is derived from this->counts_ by summing.
114  std::vector<int32> old_sizes;
115  sc->GetSizes(&old_sizes);
116  Vector<BaseFloat> counts(old_sizes.size());
117  int32 old_dim = 0;
118  for (size_t i = 0; i < old_sizes.size(); i++) {
119  int32 this_input_dim = old_sizes[i];
120  BaseFloat this_tot_count = 0.0;
121  for (int32 d = 0; d < this_input_dim; d++, old_dim++)
124  this_tot_count += this->value_sum_(old_dim);
125  counts(i) = this_tot_count;
126  }
127  KALDI_ASSERT(old_dim == value_sum_.Dim());
128  KALDI_ASSERT(counts.Sum() > 0 && "Cannot do mixing up without counts.");
129 
130  std::vector<int32> targets; // #mixtures for each state.
131 
132 
133  // Get the target number of mixtures for each state.
134  GetSplitTargets(counts, num_mixtures, power, min_count, &targets);
135  KALDI_ASSERT(targets.size() == old_sizes.size());
136  std::vector<int32> new_sizes(old_sizes.size());
137  for (size_t i = 0; i < targets.size(); i++)
138  new_sizes[i] = std::max(targets[i], old_sizes[i]);
139  int32 new_dim = std::accumulate(new_sizes.begin(), new_sizes.end(),
140  static_cast<int32>(0)),
141  affine_input_dim = ac->InputDim();
142  KALDI_ASSERT(new_dim >= old_dim);
143  sc->Init(new_sizes);
144 
145  // bias and linear terms from affine component:
146  Vector<BaseFloat> old_bias_term(ac->bias_params_);
147  Matrix<BaseFloat> old_linear_term(ac->linear_params_);
148 
149  Vector<BaseFloat> new_bias_term(new_dim);
150  Matrix<BaseFloat> new_linear_term(new_dim, affine_input_dim);
151  Vector<BaseFloat> new_counts(new_dim);
152 
153  // old_offset and new_offset are offsets into the dimension at the
154  // input/output of the softmax component, before and after mixing up
155  // respectively. They get incremented in the following loop.
156  int32 old_offset = 0, new_offset = 0;
157  Vector<BaseFloat> old_counts(this->value_sum_);
158  for (size_t i = 0; i < old_sizes.size(); i++) {
159  int32 this_old_dim = old_sizes[i],
160  this_new_dim = new_sizes[i],
161  this_cur_dim = this_old_dim; // this_cur_dim is loop variable.
162 
163  SubMatrix<BaseFloat> this_old_linear_term(old_linear_term,
164  old_offset, this_old_dim,
165  0, affine_input_dim),
166  this_new_linear_term(new_linear_term,
167  new_offset, this_new_dim,
168  0, affine_input_dim);
169  SubVector<BaseFloat> this_old_bias_term(old_bias_term,
170  old_offset, this_old_dim),
171  this_new_bias_term(new_bias_term, new_offset, this_new_dim),
172  this_old_counts(old_counts,
173  old_offset, this_old_dim),
174  this_new_counts(new_counts,
175  new_offset, this_new_dim);
176 
177  // Copy the same-dimensional part of the parameters and counts.
178  this_new_linear_term.Range(0, this_old_dim, 0, affine_input_dim).
179  CopyFromMat(this_old_linear_term);
180  this_new_bias_term.Range(0, this_old_dim).
181  CopyFromVec(this_old_bias_term);
182  this_new_counts.Range(0, this_old_dim).
183  CopyFromVec(this_old_counts);
184  // this_new_params is the mixture weights.
185  // Add the new components...
186  for (; this_cur_dim < this_new_dim; this_cur_dim++) {
187  BaseFloat *count_begin = this_new_counts.Data(),
188  *count_end = count_begin + this_cur_dim,
189  *count_max = std::max_element(count_begin, count_end);
190  KALDI_ASSERT(*count_max > 0.0);
191  *count_max *= 0.5;
192  *count_end = *count_max; // count for the element we're adding.
193  int32 max_index = static_cast<int32>(count_max - count_begin),
194  new_index = this_cur_dim;
195  SubVector<BaseFloat> cur_vec(this_new_linear_term, max_index),
196  new_vec(this_new_linear_term, new_index);
197  new_vec.CopyFromVec(cur_vec);
198  Vector<BaseFloat> rand(affine_input_dim);
199  rand.SetRandn();
200  cur_vec.AddVec(perturb_stddev, rand);
201  new_vec.AddVec(-perturb_stddev, rand);
202  this_new_bias_term(max_index) += Log(0.5);
203  this_new_bias_term(new_index) = this_new_bias_term(max_index);
204  }
205  old_offset += this_old_dim;
206  new_offset += this_new_dim;
207  }
208  KALDI_ASSERT(old_offset == old_dim && new_offset == new_dim);
209  ac->SetParams(new_bias_term, new_linear_term);
210  this->value_sum_.Resize(new_counts.Dim());
211  this->value_sum_.CopyFromVec(new_counts);
212  this->count_ = this->value_sum_.Sum();
213  this->dim_ = new_dim;
214  KALDI_LOG << "Mixed up from dimension of " << old_dim << " to " << new_dim
215  << " in the softmax layer.";
216 }
217 
218 
219 
220 
221 } // namespace nnet2
222 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
const Component & GetComponent(int32 c) const
Definition: nnet-nnet.cc:141
virtual void SetParams(const VectorBase< BaseFloat > &bias, const MatrixBase< BaseFloat > &linear)
CuVector< BaseFloat > bias_params_
Abstract class, basic element of the network, it is a box with defined inputs, outputs, and tranformation functions interface.
Real Sum() const
Definition: cu-vector.cc:297
virtual int32 OutputDim() const =0
Get size of output vectors.
kaldi::int32 int32
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
Definition: nnet-nnet.h:69
void GetSizes(std::vector< int32 > *sizes) const
void GetSplitTargets(const Vector< BaseFloat > &state_occs, int32 target_components, BaseFloat power, BaseFloat min_count, std::vector< int32 > *targets)
Get Gaussian-mixture or substate-mixture splitting targets, according to a power rule (e...
void Append(Component *new_component)
Appends this component to the components already in the neural net.
Definition: nnet-nnet.cc:643
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
Definition: cu-vector.cc:1078
double Log(double x)
Definition: kaldi-math.h:100
virtual int32 InputDim() const
Get size of input vectors.
static void GiveNnetCorrectTopology(Nnet *nnet, AffineComponent **affine_component, SoftmaxComponent **softmax_component, SumGroupComponent **sum_group_component)
This function makes sure the neural net ends with a SumGroupComponent.
Definition: mixup-nnet.cc:37
void Resize(MatrixIndexT dim, MatrixResizeType t=kSetZero)
Allocate the memory.
Definition: cu-vector.cc:993
#define KALDI_ERR
Definition: kaldi-error.h:147
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void SetRandn()
Set vector to random normally-distributed noise.
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
virtual std::string Type() const =0
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
void Check() const
Definition: nnet-nnet.cc:271
void Init(const std::vector< int32 > &sizes)
CuMatrix< BaseFloat > linear_params_
#define KALDI_LOG
Definition: kaldi-error.h:153
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Sub-matrix representation.
Definition: kaldi-matrix.h:988
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void MixUp(int32 num_mixtures, BaseFloat power, BaseFloat min_count, BaseFloat perturb_stddev, AffineComponent *ac, SumGroupComponent *sc)
Allocate mixtures to states via a power rule, and add any new mixtures.
Definition: mixup-nnet.cc:107
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69
void MixupNnet(const NnetMixupConfig &mixup_config, Nnet *nnet)
This function works as follows.
Definition: mixup-nnet.cc:86
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94