34 const std::vector<Nnet> &nnets,
35 const std::vector<NnetExample> &egs,
57 length = std::min(minibatch_size_,
59 bool is_gradient =
true;
60 nnet_gradient.
SetZero(is_gradient);
61 std::vector<NnetExample> minibatch(
egs_.begin() + offset,
62 egs_.begin() + offset + length);
66 for (
int32 n = 0; n < static_cast<int32>(
nnets_.size());
n++) {
94 const std::vector<NnetExample> &
egs_;
105 const std::vector<NnetExample> &validation_set,
106 const std::vector<Nnet> &nnets_in,
108 config_(combine_config),
egs_(validation_set),
109 nnets_(nnets_in), nnet_out_(nnet_out) {
112 ComputePreconditioner();
114 int32 dim = params_.Dim();
118 double regularizer_objf, initial_regularizer_objf;
119 double objf, initial_objf;
123 lbfgs_options.
m = std::min(dim, config_.max_lbfgs_dim);
129 for (
int32 i = 0;
i < config_.num_lbfgs_iters;
i++) {
136 initial_regularizer_objf = regularizer_objf;
138 lbfgs.
DoStep(objf, gradient);
142 ComputeCurrentNnet(nnet_out_,
true);
144 if (config_.regularizer != 0.0) {
145 double initial_part = initial_objf - initial_regularizer_objf,
146 part = objf - regularizer_objf;
147 KALDI_LOG <<
"Combining nnets, objf/frame + regularizer changed from " 148 << initial_part <<
" + " << initial_regularizer_objf
149 <<
" = " << initial_objf <<
" to " << part <<
" + " 150 << regularizer_objf <<
" = " << objf;
152 KALDI_LOG <<
"Combining nnets, objf per frame changed from " 153 << initial_objf <<
" to " << objf;
159 const std::vector<NnetExample> &validation_set,
160 const std::vector<Nnet> &nnets)
const;
162 void GetInitialParams();
164 void ComputePreconditioner();
171 double *regularizer_objf);
173 void ComputeCurrentNnet(
174 Nnet *dest,
bool debug =
false);
177 const std::vector<Nnet> &nnets,
194 const std::vector<NnetExample> &
egs_;
202 const std::vector<Nnet> &nnets,
204 int32 num_nnets = nnets.size();
206 int32 num_uc = nnets[0].NumUpdatableComponents();
213 for (
int32 n = 1;
n < num_nnets;
n++) {
223 ComputeCurrentNnet(&nnet);
228 config_.fisher_minibatch_size,
234 int32 num_threads = config_.num_threads == 1 ? 0 : config_.num_threads;
249 F(
i,
i) = std::max<BaseFloat>(F(
i,
i), config_.fisher_floor);
254 F(
i,
i) *= (1.0 + config_.alpha);
263 params_.AddTpVec(1.0, C_,
kTrans, raw_params, 0.0);
269 int32 initial_model = config_.initial_model,
271 if (initial_model > num_nnets)
272 initial_model = num_nnets;
273 if (initial_model < 0)
276 KALDI_ASSERT(initial_model >= 0 && initial_model <= num_nnets);
281 if (initial_model < num_nnets) {
282 KALDI_LOG <<
"Initializing with neural net with index " << initial_model;
291 KALDI_LOG <<
"Initializing with all neural nets averaged.";
292 raw_params.
Set(1.0 / num_nnets);
295 params_ = raw_params;
301 double *regularizer_objf_ptr) {
303 ComputeCurrentNnet(&nnet);
305 Nnet nnet_gradient(nnet);
306 bool is_gradient =
true;
307 nnet_gradient.
SetZero(is_gradient);
308 double tot_weight = 0.0;
310 egs_, &tot_weight, &nnet_gradient) /
egs_.size();
315 double regularizer_objf = 0.0;
318 for (
int32 n = 0;
n < num_nnets;
n++) {
327 double gradient = uc->
DotProduct(*uc_gradient) / tot_weight;
331 if (config_.regularizer != 0.0) {
332 gradient -= config_.regularizer * uc->
DotProduct(*uc_params);
335 -0.5 * config_.regularizer * uc_params->DotProduct(*uc_params);
337 raw_gradient(i) = gradient;
342 if (config_.regularizer != 0.0) {
343 KALDI_VLOG(2) <<
"Objf is " << objf <<
" + regularizer " << regularizer_objf
344 <<
" = " << (objf + regularizer_objf) <<
", raw gradient is " 347 KALDI_VLOG(2) <<
"Objf is " << objf <<
", raw gradient is " << raw_gradient;
352 *regularizer_objf_ptr = regularizer_objf;
353 return objf + regularizer_objf;
357 Nnet *dest,
bool debug) {
364 if (C_inv_.NumRows() > 0)
367 raw_params = params_;
373 KALDI_LOG <<
"Scale parameters are " << params_mat;
382 const std::vector<NnetExample> &validation_set,
383 const std::vector<Nnet> &nnets)
const {
384 int32 num_nnets =
static_cast<int32>(nnets.size());
387 double best_objf = -std::numeric_limits<double>::infinity();
389 for (
int32 n = 0;
n < num_nnets;
n++) {
392 config_.num_threads, validation_set,
397 if (n == 0 || objf > best_objf) {
403 KALDI_LOG <<
"Objective functions for the source neural nets are " << objfs;
405 int32 num_uc = nnets[0].NumUpdatableComponents();
411 scale_params.
Set(1.0 / num_nnets);
416 config_.num_threads, validation_set,
419 KALDI_LOG <<
"Objf with all neural nets averaged is " << objf;
420 if (objf > best_objf) {
431 const std::vector<NnetExample> &validation_set,
432 const std::vector<Nnet> &nnets_in,
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
const Component & GetComponent(int32 c) const
void DoStep(Real function_value, const VectorBase< Real > &gradient)
The user calls this function to provide the class with the function and gradient info at the point Ge...
~FisherComputationClass()
void AddNnet(const VectorBase< BaseFloat > &scales, const Nnet &other)
For each updatatable component, adds to it the corresponding element of "other" times the appropriate...
int32 NumUpdatableComponents() const
Returns the number of updatable components.
static void CombineNnets(const Vector< double > &scale_params, const std::vector< Nnet > &nnets, Nnet *dest)
SpMatrix< double > scatter_
const VectorBase< Real > & GetValue(Real *objf_value=NULL) const
This returns the value of the variable x that has the best objective function so far, and the corresponding objective function value if requested.
static int32 GetInitialModel(const std::vector< NnetExample > &validation_set, const std::vector< Nnet > &nnets)
Returns an integer saying which model to use: either 0 ...
MatrixIndexT NumRows() const
const std::vector< Nnet > & nnets_
double DoBackprop(const Nnet &nnet, const std::vector< NnetExample > &examples, Nnet *nnet_to_update, double *tot_accuracy)
This function computes the objective function and either updates the model or adds to parameter gradi...
FisherComputationClass(const Nnet &nnet, const std::vector< Nnet > &nnets, const std::vector< NnetExample > &egs, int32 minibatch_size, SpMatrix< double > *scatter)
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
void CombineNnetsFast(const NnetCombineFastConfig &combine_config, const std::vector< NnetExample > &validation_set, const std::vector< Nnet > &nnets_in, Nnet *nnet_out)
SpMatrix< double > * scatter_ptr_
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v'
int32 GetInitialModel(const std::vector< NnetExample > &validation_set, const std::vector< Nnet > &nnets) const
Returns an integer saying which model to use: either 0 ...
void SetZero(bool treat_as_gradient)
FastNnetCombiner(const NnetCombineFastConfig &combine_config, const std::vector< NnetExample > &validation_set, const std::vector< Nnet > &nnets_in, Nnet *nnet_out)
const std::vector< Nnet > & nnets_
virtual BaseFloat DotProduct(const UpdatableComponent &other) const =0
Here, "other" is a component of the same specific type.
void AddSp(const Real alpha, const SpMatrix< Real > &Ma)
void ComputePreconditioner()
MatrixIndexT Dim() const
Returns the dimension of the vector.
void ScaleComponents(const VectorBase< BaseFloat > &scales)
Scales the parameters of each of the updatable components.
FisherComputationClass(const FisherComputationClass &other)
void AddTpVec(const Real alpha, const TpMatrix< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add triangular matrix times vector: this <– beta*this + alpha*M*v.
#define KALDI_ASSERT(cond)
void Set(Real f)
Set all members of a vector to a specified value.
double ComputeNnetObjfParallel(const Nnet &nnet, int32 minibatch_size, int32 num_threads, const std::vector< NnetExample > &examples, double *num_frames)
This is basically to clarify the fact that DoBackpropParallel will also work with nnet_to_update == N...
static void CombineNnets(const Vector< BaseFloat > &scale_params, const std::vector< Nnet > &nnets, Nnet *dest)
This is an implementation of L-BFGS.
double ComputeObjfAndGradient(Vector< double > *gradient, double *regularizer_objf)
Computes objf at point "params_".
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
const std::vector< NnetExample > & egs_
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
Configuration class that controls neural net combination, where we combine a number of neural nets...
double DoBackpropParallel(const Nnet &nnet, int32 minibatch_size, SequentialNnetExampleReader *examples_reader, double *tot_weight, Nnet *nnet_to_update)
This function is similar to "DoBackprop" in nnet-update.h This function computes the objective functi...
const std::vector< NnetExample > & egs_
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
const VectorBase< Real > & GetProposedValue() const
This returns the value at which the function wants us to compute the objective function and gradient...
const NnetCombineFastConfig & config_
int32 NumUpdatableComponents(const Nnet &dest)
Returns the number of updatable components in the nnet.
void ComputeCurrentNnet(Nnet *dest, bool debug=false)
TpMatrix< double > C_inv_
Class UpdatableComponent is a Component which has trainable parameters and contains some global param...
static BaseFloat ComputeObjfAndGradient(const std::vector< NnetExample > &validation_set, const Vector< double > &scale_params, const Nnet &orig_nnet, const Nnet &direction, Vector< double > *gradient)