34 int32 num_new_nnets = nnets.size() - 1;
37 scales.
Set(1.0 / num_new_nnets);
39 *direction = nnets[1];
41 for (
int32 n = 2;
n < 1 + num_new_nnets;
n++)
42 direction->
AddNnet(scales, nnets[
n]);
46 direction->
AddNnet(scales, nnets[0]);
53 const Nnet &direction,
57 dest->
AddNnet(scales, direction);
62 const std::vector<NnetExample> &validation_set,
64 const Nnet &orig_nnet,
65 const Nnet &direction,
71 AddDirection(orig_nnet, direction, scale_params_float, &nnet_combined);
73 Nnet nnet_gradient(nnet_combined);
74 bool is_gradient =
true;
75 nnet_gradient.
SetZero(is_gradient);
78 int32 batch_size = 1024;
84 BaseFloat tot_count = validation_set.size();
91 if (uc_direction != NULL) {
93 (*gradient)(
i) = dotprod;
103 const std::vector<NnetExample> &validation_set,
104 const std::vector<Nnet> &nnets,
117 double objf, initial_objf, zero_objf;
125 KALDI_LOG <<
"Objective function at old parameters is " 128 scale_params.
Set(1.0);
132 lbfgs_options.
m = dim;
147 KALDI_VLOG(2) <<
"Iteration " <<
i <<
" scale-params = " << scale_params
148 <<
", objf = " << objf <<
", gradient = " << gradient;
150 if (
i == 0) initial_objf = objf;
151 lbfgs.
DoStep(objf, gradient);
156 KALDI_LOG <<
"Combining nnets, after BFGS, validation objf per frame changed from " 157 << zero_objf <<
" (no change), or " << initial_objf <<
" (default change), " 158 <<
" to " << objf <<
"; scale factors on update direction are " 161 BaseFloat objf_change = objf - zero_objf;
169 if (overshoot_max < overshoot) {
170 KALDI_LOG <<
"Limiting overshoot from " << overshoot <<
" to " << overshoot_max
171 <<
" since the objf-impr " << objf_change <<
" is close to " 173 overshoot = overshoot_max;
175 KALDI_ASSERT(overshoot < 2.0 &&
"--valid-impr-thresh must be < 2.0 or " 176 "it will lead to instability.");
177 scale_params.Scale(overshoot);
186 KALDI_LOG <<
"Combining nnets, after overshooting, validation objf changed " 187 <<
"to " << objf <<
". Note: (zero, start, optimized) objfs were " 188 << zero_objf <<
", " << initial_objf <<
", " << optimized_objf;
189 if (objf < zero_objf) {
192 KALDI_WARN <<
"After overshooting, objf was worse than not updating; not doing the " 194 scale_params.Scale(1.0 / overshoot);
200 AddDirection(nnets[0], direction, scale_params_float, nnet_out);
208 BaseFloat step_length = scale_params(i), factor = step_length;
218 KALDI_LOG <<
"For component " <<
j <<
", step length was " << step_length
219 <<
", updating learning rate by factor " << factor <<
", changing " 220 <<
"learning rate from " << uc->
LearningRate() <<
" to " 221 << new_learning_rate;
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
const Component & GetComponent(int32 c) const
void DoStep(Real function_value, const VectorBase< Real > &gradient)
The user calls this function to provide the class with the function and gradient info at the point Ge...
void CombineNnetsA(const NnetCombineAconfig &config, const std::vector< NnetExample > &validation_set, const std::vector< Nnet > &nnets, Nnet *nnet_out)
void AddNnet(const VectorBase< BaseFloat > &scales, const Nnet &other)
For each updatatable component, adds to it the corresponding element of "other" times the appropriate...
static void AddDirection(const Nnet &orig_nnet, const Nnet &direction, const VectorBase< BaseFloat > &scales, Nnet *dest)
Sets "dest" to orig_nnet plus "direction", with each updatable component of "direction" first scaled ...
double ComputeNnetGradient(const Nnet &nnet, const std::vector< NnetExample > &validation_set, int32 batch_size, Nnet *gradient)
ComputeNnetGradient is mostly used to compute gradients on validation sets; it divides the example in...
const VectorBase< Real > & GetValue(Real *objf_value=NULL) const
This returns the value of the variable x that has the best objective function so far, and the corresponding objective function value if requested.
BaseFloat min_learning_rate
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
BaseFloat min_learning_rate_factor
void SetZero(bool treat_as_gradient)
virtual BaseFloat DotProduct(const UpdatableComponent &other) const =0
Here, "other" is a component of the same specific type.
BaseFloat max_learning_rate_factor
static void GetUpdateDirection(const std::vector< Nnet > &nnets, Nnet *direction)
MatrixIndexT Dim() const
Returns the dimension of the vector.
void ScaleComponents(const VectorBase< BaseFloat > &scales)
Scales the parameters of each of the updatable components.
BaseFloat valid_impr_thresh
A class representing a vector.
#define KALDI_ASSERT(cond)
void Set(Real f)
Set all members of a vector to a specified value.
This is an implementation of L-BFGS.
void SetLearningRate(BaseFloat lrate)
Sets the learning rate of gradient descent.
Provides a vector abstraction class.
BaseFloat LearningRate() const
Gets the learning rate of gradient descent.
const VectorBase< Real > & GetProposedValue() const
This returns the value at which the function wants us to compute the objective function and gradient...
int32 NumUpdatableComponents(const Nnet &dest)
Returns the number of updatable components in the nnet.
Class UpdatableComponent is a Component which has trainable parameters and contains some global param...
static BaseFloat ComputeObjfAndGradient(const std::vector< NnetExample > &validation_set, const Vector< double > &scale_params, const Nnet &orig_nnet, const Nnet &direction, Vector< double > *gradient)