doc/mixup-nnet_8cc_source.html

 // nnet2/mixup-nnet.cc

 // Copyright 2012   Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include "nnet2/mixup-nnet.h"
 #include "gmm/model-common.h" // for GetSplitTargets()
 #include <numeric> // for std::accumulate

 namespace kaldi {
 namespace nnet2 {


 static void GiveNnetCorrectTopology(Nnet *nnet,
                                     AffineComponent **affine_component,
                                     SoftmaxComponent **softmax_component,
                                     SumGroupComponent **sum_group_component) {
   int32 nc = nnet->NumComponents();
   KALDI_ASSERT(nc > 0);
   Component* component = &(nnet->GetComponent(nc - 1));
   if ((*sum_group_component =
        dynamic_cast<SumGroupComponent*>(component)) == NULL) {
     KALDI_LOG << "Adding SumGroupComponent to neural net.";
     int32 dim = component->OutputDim();
     // Give it the same learning rate as the first updatable layer we have.
     std::vector<int32> sizes(dim, 1); // a vector of all ones, of dimension "dim".

     *sum_group_component = new SumGroupComponent();
     (*sum_group_component)->Init(sizes);
     nnet->Append(*sum_group_component);
     nc++;
   }
   component = &(nnet->GetComponent(nc - 2));
   if ((*softmax_component = dynamic_cast<SoftmaxComponent*>(component)) == NULL)
     KALDI_ERR << "Neural net has wrong topology: expected second-to-last "
               << "component to be SoftmaxComponent, type is "
               << component->Type();
   component = &(nnet->GetComponent(nc - 3));
   if ((*affine_component = dynamic_cast<AffineComponent*>(component)) == NULL)
     KALDI_ERR << "Neural net has wrong topology: expected third-to-last "
               << "component to be AffineComponent, type is "
               << component->Type();
 }


 void MixupNnet(const NnetMixupConfig &mixup_config,
                Nnet *nnet) {
   AffineComponent *affine_component = NULL;
   SoftmaxComponent *softmax_component = NULL;
   SumGroupComponent *sum_group_component = NULL;
   GiveNnetCorrectTopology(nnet,
                           &affine_component,
                           &softmax_component,
                           &sum_group_component); // Adds a SumGroupComponent if needed.

   softmax_component->MixUp(mixup_config.num_mixtures,
                            mixup_config.power,
                            mixup_config.min_count,
                            mixup_config.perturb_stddev,
                            affine_component,
                            sum_group_component);
   nnet->Check(); // Checks that dimensions all match up.
 }


 void SoftmaxComponent::MixUp(int32 num_mixtures,
                              BaseFloat power,
                              BaseFloat min_count,
                              BaseFloat perturb_stddev,
                              AffineComponent *ac,
                              SumGroupComponent *sc) {
   // "counts" is derived from this->counts_ by summing.
   std::vector<int32> old_sizes;
   sc->GetSizes(&old_sizes);
   Vector<BaseFloat> counts(old_sizes.size());
   int32 old_dim = 0;
   for (size_t i = 0; i < old_sizes.size(); i++) {
     int32 this_input_dim = old_sizes[i];
     BaseFloat this_tot_count = 0.0;
     for (int32 d = 0; d < this_input_dim; d++, old_dim++)
       this_tot_count += this->value_sum_(old_dim);
     counts(i) = this_tot_count;
   }
   KALDI_ASSERT(old_dim == value_sum_.Dim());
   KALDI_ASSERT(counts.Sum() > 0 && "Cannot do mixing up without counts.");

   std::vector<int32> targets; // #mixtures for each state.


   // Get the target number of mixtures for each state.
   GetSplitTargets(counts, num_mixtures, power, min_count, &targets);
   KALDI_ASSERT(targets.size() == old_sizes.size());
   std::vector<int32> new_sizes(old_sizes.size());
   for (size_t i = 0; i < targets.size(); i++)
     new_sizes[i] = std::max(targets[i], old_sizes[i]);
   int32 new_dim = std::accumulate(new_sizes.begin(), new_sizes.end(),
                                   static_cast<int32>(0)),
       affine_input_dim = ac->InputDim();
   KALDI_ASSERT(new_dim >= old_dim);
   sc->Init(new_sizes);

   // bias and linear terms from affine component:
   Vector<BaseFloat> old_bias_term(ac->bias_params_);
   Matrix<BaseFloat> old_linear_term(ac->linear_params_);

   Vector<BaseFloat> new_bias_term(new_dim);
   Matrix<BaseFloat> new_linear_term(new_dim, affine_input_dim);
   Vector<BaseFloat> new_counts(new_dim);

   // old_offset and new_offset are offsets into the dimension at the
   // input/output of the softmax component, before and after mixing up
   // respectively.  They get incremented in the following loop.
   int32 old_offset = 0, new_offset = 0;
   Vector<BaseFloat> old_counts(this->value_sum_);
   for (size_t i = 0; i < old_sizes.size(); i++) {
     int32 this_old_dim = old_sizes[i],
           this_new_dim = new_sizes[i],
           this_cur_dim = this_old_dim; // this_cur_dim is loop variable.

     SubMatrix<BaseFloat> this_old_linear_term(old_linear_term,
                                               old_offset, this_old_dim,
                                               0, affine_input_dim),
         this_new_linear_term(new_linear_term,
                              new_offset, this_new_dim,
                              0, affine_input_dim);
     SubVector<BaseFloat> this_old_bias_term(old_bias_term,
                                             old_offset, this_old_dim),
         this_new_bias_term(new_bias_term, new_offset, this_new_dim),
         this_old_counts(old_counts,
                         old_offset, this_old_dim),
         this_new_counts(new_counts,
                         new_offset, this_new_dim);

     // Copy the same-dimensional part of the parameters and counts.
     this_new_linear_term.Range(0, this_old_dim, 0, affine_input_dim).
         CopyFromMat(this_old_linear_term);
     this_new_bias_term.Range(0, this_old_dim).
         CopyFromVec(this_old_bias_term);
     this_new_counts.Range(0, this_old_dim).
         CopyFromVec(this_old_counts);
     // this_new_params is the mixture weights.
     // Add the new components...
     for (; this_cur_dim < this_new_dim; this_cur_dim++) {
       BaseFloat *count_begin = this_new_counts.Data(),
           *count_end  = count_begin + this_cur_dim,
           *count_max = std::max_element(count_begin, count_end);
       KALDI_ASSERT(*count_max > 0.0);
       *count_max *= 0.5;
       *count_end = *count_max; // count for the element we're adding.
       int32 max_index = static_cast<int32>(count_max - count_begin),
           new_index = this_cur_dim;
       SubVector<BaseFloat> cur_vec(this_new_linear_term, max_index),
           new_vec(this_new_linear_term, new_index);
       new_vec.CopyFromVec(cur_vec);
       Vector<BaseFloat> rand(affine_input_dim);
       rand.SetRandn();
       cur_vec.AddVec(perturb_stddev, rand);
       new_vec.AddVec(-perturb_stddev, rand);
       this_new_bias_term(max_index) += Log(0.5);
       this_new_bias_term(new_index) = this_new_bias_term(max_index);
     }
     old_offset += this_old_dim;
     new_offset += this_new_dim;
   }
   KALDI_ASSERT(old_offset == old_dim && new_offset == new_dim);
   ac->SetParams(new_bias_term, new_linear_term);
   this->value_sum_.Resize(new_counts.Dim());
   this->value_sum_.CopyFromVec(new_counts);
   this->count_ = this->value_sum_.Sum();
   this->dim_ = new_dim;
   KALDI_LOG << "Mixed up from dimension of " << old_dim << " to " << new_dim
             << " in the softmax layer.";
 }


 } // namespace nnet2
 } // namespace kaldi
kaldi::nnet2::NonlinearComponent::dim_
int32 dim_
Definition: nnet-component.h:402

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet2::Nnet::GetComponent
const Component & GetComponent(int32 c) const
Definition: nnet-nnet.cc:141

kaldi::nnet2::AffineComponent::SetParams
virtual void SetParams(const VectorBase< BaseFloat > &bias, const MatrixBase< BaseFloat > &linear)
Definition: nnet-component.cc:1046

mixup-nnet.h

kaldi::nnet2::AffineComponent::bias_params_
CuVector< BaseFloat > bias_params_
Definition: nnet-component.h:938

kaldi::nnet2::NnetMixupConfig::power
BaseFloat power
Definition: mixup-nnet.h:31

kaldi::nnet2::Component
Abstract class, basic element of the network, it is a box with defined inputs, outputs, and tranformation functions interface.
Definition: nnet-component.h:157

kaldi::CuVectorBase::Sum
Real Sum() const
Definition: cu-vector.cc:297

kaldi::nnet2::NnetMixupConfig::min_count
BaseFloat min_count
Definition: mixup-nnet.h:32

kaldi::nnet2::Component::OutputDim
virtual int32 OutputDim() const =0
Get size of output vectors.

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::Matrix< BaseFloat >

kaldi::nnet2::NnetMixupConfig::perturb_stddev
BaseFloat perturb_stddev
Definition: mixup-nnet.h:34

kaldi::nnet2::Nnet::NumComponents
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
Definition: nnet-nnet.h:69

kaldi::nnet2::SumGroupComponent::GetSizes
void GetSizes(std::vector< int32 > *sizes) const
Definition: nnet-component.cc:2418

kaldi::GetSplitTargets
void GetSplitTargets(const Vector< BaseFloat > &state_occs, int32 target_components, BaseFloat power, BaseFloat min_count, std::vector< int32 > *targets)
Get Gaussian-mixture or substate-mixture splitting targets, according to a power rule (e...
Definition: model-common.cc:116

kaldi::nnet2::Nnet::Append
void Append(Component *new_component)
Appends this component to the components already in the neural net.
Definition: nnet-nnet.cc:643

kaldi::VectorBase::CopyFromVec
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
Definition: kaldi-vector.cc:228

kaldi::CuVectorBase::CopyFromVec
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
Definition: cu-vector.cc:1078

kaldi::nnet2::SumGroupComponent
Definition: nnet-component.h:1274

kaldi::Log
double Log(double x)
Definition: kaldi-math.h:100

kaldi::nnet2::NonlinearComponent::count_
double count_
Definition: nnet-component.h:406

float

kaldi::nnet2::AffineComponent::InputDim
virtual int32 InputDim() const
Get size of input vectors.
Definition: nnet-component.h:852

kaldi::nnet2::GiveNnetCorrectTopology
static void GiveNnetCorrectTopology(Nnet *nnet, AffineComponent **affine_component, SoftmaxComponent **softmax_component, SumGroupComponent **sum_group_component)
This function makes sure the neural net ends with a SumGroupComponent.
Definition: mixup-nnet.cc:37

kaldi::nnet2::NonlinearComponent::value_sum_
CuVector< double > value_sum_
Definition: nnet-component.h:403

kaldi::nnet2::Nnet
Definition: nnet-nnet.h:63

kaldi::CuVector::Resize
void Resize(MatrixIndexT dim, MatrixResizeType t=kSetZero)
Allocate the memory.
Definition: cu-vector.cc:993

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::VectorBase::Data
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70

kaldi::VectorBase::Dim
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64

kaldi::VectorBase::SetRandn
void SetRandn()
Set vector to random normally-distributed noise.
Definition: kaldi-vector.cc:301

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::nnet2::AffineComponent
Definition: nnet-component.h:843

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::nnet2::Component::Type
virtual std::string Type() const =0

kaldi::MatrixBase::Range
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202

kaldi::nnet2::Nnet::Check
void Check() const
Definition: nnet-nnet.cc:271

model-common.h

kaldi::nnet2::SumGroupComponent::Init
void Init(const std::vector< int32 > &sizes)
Definition: nnet-component.cc:2365

kaldi::nnet2::AffineComponent::linear_params_
CuMatrix< BaseFloat > linear_params_
Definition: nnet-component.h:937

kaldi::nnet2::NnetMixupConfig::num_mixtures
int32 num_mixtures
Definition: mixup-nnet.h:33

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

kaldi::VectorBase::AddVec
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Definition: kaldi-vector.cc:1044

kaldi::nnet2::SoftmaxComponent
Definition: nnet-component.h:777

kaldi::SubMatrix
Sub-matrix representation.
Definition: kaldi-matrix.h:988

kaldi::SubVector
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501

rnnlm::d
double d
Definition: mikolov-rnnlm-lib.cc:64

kaldi::nnet2::SoftmaxComponent::MixUp
void MixUp(int32 num_mixtures, BaseFloat power, BaseFloat min_count, BaseFloat perturb_stddev, AffineComponent *ac, SumGroupComponent *sc)
Allocate mixtures to states via a power rule, and add any new mixtures.
Definition: mixup-nnet.cc:107

kaldi::CuVectorBase::Dim
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

kaldi::nnet2::NnetMixupConfig
Definition: mixup-nnet.h:30

kaldi::nnet2::MixupNnet
void MixupNnet(const NnetMixupConfig &mixup_config, Nnet *nnet)
This function works as follows.
Definition: mixup-nnet.cc:86

kaldi::VectorBase::Range
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94