doc/rescale-nnet_8cc_source.html

 // nnet2/rescale-nnet.cc

 // Copyright 2012   Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include "nnet2/rescale-nnet.h"

 namespace kaldi {
 namespace nnet2 {


 class NnetRescaler {
  public:
   NnetRescaler(const NnetRescaleConfig &config,
                const std::vector<NnetExample> &examples,
                Nnet *nnet):
       config_(config), examples_(examples), nnet_(nnet) {}

   void Rescale();

  private:
   void FormatInput(const std::vector<NnetExample> &data,
                    CuMatrix<BaseFloat> *input);
   void RescaleComponent(int32 c, int32 num_chunks,
                         CuMatrixBase<BaseFloat> *cur_data_in,
                         CuMatrix<BaseFloat> *next_data);

   void ComputeRelevantIndexes();

   BaseFloat GetTargetAvgDeriv(int32 c);

   const NnetRescaleConfig &config_;
   const std::vector<NnetExample> &examples_;
   Nnet *nnet_;
   std::vector <ChunkInfo> chunk_info_out_;
   std::set<int32> relevant_indexes_; // values of c with AffineComponent followed
   // by (at c+1) NonlinearComponent that is not SoftmaxComponent.
 };


 void NnetRescaler::FormatInput(const std::vector<NnetExample> &data,
                                CuMatrix<BaseFloat> *input) {
   KALDI_ASSERT(data.size() > 0);
   int32 num_splice = nnet_->LeftContext() + 1 + nnet_->RightContext();
   KALDI_ASSERT(data[0].input_frames.NumRows() == num_splice);

   int32 feat_dim = data[0].input_frames.NumCols(),
          spk_dim = data[0].spk_info.Dim(),
          tot_dim = feat_dim + spk_dim; // we append these at the neural net
                                        // input... note, spk_dim might be 0.
   KALDI_ASSERT(tot_dim == nnet_->InputDim());
   int32 num_chunks = data.size();

   input->Resize(num_splice * num_chunks,
                 tot_dim);
   for (int32 chunk = 0; chunk < num_chunks; chunk++) {
     CuSubMatrix<BaseFloat> dest(*input,
                                 chunk * num_splice, num_splice,
                                 0, feat_dim);
     Matrix<BaseFloat> src(data[chunk].input_frames);
     dest.CopyFromMat(src);
     if (spk_dim != 0) {
       CuSubMatrix<BaseFloat> spk_dest(*input,
                                       chunk * num_splice, num_splice,
                                       feat_dim, spk_dim);
       spk_dest.CopyRowsFromVec(data[chunk].spk_info);
     }
   }
   // TODO : filter out the unnecessary rows from the input
   nnet_->ComputeChunkInfo(num_splice, num_chunks, &chunk_info_out_);

 }

 void NnetRescaler::ComputeRelevantIndexes() {
   for (int32 c = 0; c + 1 < nnet_->NumComponents(); c++)
     if (dynamic_cast<AffineComponent*>(&nnet_->GetComponent(c)) != NULL &&
         (dynamic_cast<NonlinearComponent*>(&nnet_->GetComponent(c+1)) != NULL &&
          dynamic_cast<SoftmaxComponent*>(&nnet_->GetComponent(c+1)) == NULL))
       relevant_indexes_.insert(c);
 }


 BaseFloat NnetRescaler::GetTargetAvgDeriv(int32 c) {
   KALDI_ASSERT(relevant_indexes_.count(c) == 1);
   BaseFloat factor;
   if (dynamic_cast<SigmoidComponent*>(&(nnet_->GetComponent(c + 1))) != NULL)
     factor = 0.25;
   else if (dynamic_cast<TanhComponent*>(&(nnet_->GetComponent(c + 1))) != NULL)
     factor = 1.0;
   else
     KALDI_ERR << "This type of nonlinear component is not handled: index  " << c;

   int32 last_c = *std::max_element(relevant_indexes_.begin(), relevant_indexes_.end()),
       first_c = *std::min_element(relevant_indexes_.begin(), relevant_indexes_.end());
   if (c == first_c)
     return factor * config_.target_first_layer_avg_deriv;
   else if (c == last_c)
     return factor * config_.target_last_layer_avg_deriv;
   else
     return factor * config_.target_avg_deriv;
 }

 // Here, c is the index of the affine component, and
 // c + 1 is the index of the nonlinear component; *cur_data is the
 // output of the affine component.
 void NnetRescaler::RescaleComponent(
     int32 c,
     int32 num_chunks,
     CuMatrixBase<BaseFloat> *cur_data_in,
     CuMatrix<BaseFloat> *next_data) {
   int32 rows = cur_data_in->NumRows(), cols = cur_data_in->NumCols();
   // Only handle sigmoid or tanh here.
   if (dynamic_cast<SigmoidComponent*>(&(nnet_->GetComponent(c + 1))) == NULL &&
       dynamic_cast<TanhComponent*>(&(nnet_->GetComponent(c + 1))) == NULL)
     KALDI_ERR << "This type of nonlinear component is not handled: index  " << c;
   KALDI_ASSERT(chunk_info_out_[0].NumChunks() == num_chunks); //TODO verify how this component can be used
                                                              // rewrite the
                                                              // chunk_info_out_
                                                              // computation
   // the nonlinear component:
   NonlinearComponent &nc =
       *(dynamic_cast<NonlinearComponent*>(&(nnet_->GetComponent(c + 1))));
   ChunkInfo in_info, out_info;
   in_info = chunk_info_out_[c+1];
   out_info = chunk_info_out_[c+2];

   BaseFloat orig_avg_deriv, target_avg_deriv = GetTargetAvgDeriv(c);
   BaseFloat cur_scaling = 1.0; // current rescaling factor (on input).
   int32 num_iters = 10;

   CuMatrix<BaseFloat> cur_data(*cur_data_in),
       ones(rows, cols), in_deriv(rows, cols);

   ones.Set(1.0);
   nc.Propagate(in_info, out_info, cur_data, next_data);
   nc.Backprop(in_info, out_info, cur_data, *next_data, ones, NULL, &in_deriv);
   BaseFloat cur_avg_deriv;
   cur_avg_deriv = in_deriv.Sum() / (rows * cols);
   orig_avg_deriv = cur_avg_deriv;
   for (int32 iter = 0; iter < num_iters; iter++) {
     // We already have "cur_avg_deriv"; perturb the scale and compute
     // the next avg_deriv, so we can see how it changes with the scale.
     cur_data.CopyFromMat(*cur_data_in);
     cur_data.Scale(cur_scaling + config_.delta);
     nc.Propagate(in_info, out_info, cur_data, next_data);
     nc.Backprop(in_info, out_info, cur_data, *next_data, ones, NULL, &in_deriv);
     BaseFloat next_avg_deriv = in_deriv.Sum() / (rows * cols);
     KALDI_ASSERT(next_avg_deriv < cur_avg_deriv);
     // "gradient" is how avg_deriv changes as we change the scale.
     // should be negative.
     BaseFloat gradient = (next_avg_deriv - cur_avg_deriv) / config_.delta;
     KALDI_ASSERT(gradient < 0.0);
     BaseFloat proposed_change = (target_avg_deriv - cur_avg_deriv) / gradient;
     KALDI_VLOG(2) << "cur_avg_deriv = " << cur_avg_deriv << ", target_avg_deriv = "
                   << target_avg_deriv << ", gradient = " << gradient
                   << ", proposed_change " << proposed_change;
     // Limit size of proposed change in "cur_scaling", to ensure stability.
     if (fabs(proposed_change / cur_scaling) > config_.max_change)
       proposed_change = cur_scaling * config_.max_change *
           (proposed_change > 0.0 ? 1.0 : -1.0);
     cur_scaling += proposed_change;

     cur_data.CopyFromMat(*cur_data_in);
     cur_data.Scale(cur_scaling);
     nc.Propagate(in_info, out_info, cur_data, next_data);
     nc.Backprop(in_info, out_info, cur_data, *next_data, ones, NULL, &in_deriv);
     cur_avg_deriv = in_deriv.Sum() / (rows * cols);
     if (fabs(proposed_change) < config_.min_change) break; // Terminate the
     // optimization
   }
   UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(
       &nnet_->GetComponent(c));
   KALDI_ASSERT(uc != NULL);
   uc->Scale(cur_scaling); // scale the parameters of the previous
   // AffineComponent.

   KALDI_LOG << "For component " << c << ", scaling parameters by "
             << cur_scaling << "; average "
             << "derivative changed from " << orig_avg_deriv << " to "
             << cur_avg_deriv << "; target was " << target_avg_deriv;
 }


 void NnetRescaler::Rescale() {
   ComputeRelevantIndexes(); // set up relevant_indexes_.
   CuMatrix<BaseFloat> cur_data, next_data;
   FormatInput(examples_, &cur_data);
   int32 num_chunks = examples_.size();
   for (int32 c = 0; c < nnet_->NumComponents(); c++) {
     Component &component = nnet_->GetComponent(c);
     if (relevant_indexes_.count(c - 1) == 1) {
       // the following function call also appropriately sets "next_data"
       // after doing the rescaling
       RescaleComponent(c - 1, num_chunks, &cur_data, &next_data);
     } else {
       component.Propagate(chunk_info_out_[c], chunk_info_out_[c+1], cur_data, &next_data);
     }
     cur_data.Swap(&next_data);
   }
 }

 void RescaleNnet(const NnetRescaleConfig &rescale_config,
                  const std::vector<NnetExample> &examples,
                  Nnet *nnet) {
   NnetRescaler rescaler(rescale_config, examples, nnet);
   rescaler.Rescale();
 }


 } // namespace nnet2
 } // namespace kaldi
kaldi::CuMatrixBase::CopyFromMat
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet2::Nnet::GetComponent
const Component & GetComponent(int32 c) const
Definition: nnet-nnet.cc:141

kaldi::nnet2::Nnet::LeftContext
int32 LeftContext() const
Returns the left-context summed over all the Components...
Definition: nnet-nnet.cc:42

kaldi::nnet2::NonlinearComponent
This kind of Component is a base-class for things like sigmoid and softmax.
Definition: nnet-component.h:352

kaldi::nnet2::NnetRescaler::nnet_
Nnet * nnet_
Definition: rescale-nnet.cc:49

kaldi::nnet2::NnetRescaler::examples_
const std::vector< NnetExample > & examples_
Definition: rescale-nnet.cc:48

kaldi::nnet2::NnetRescaler::RescaleComponent
void RescaleComponent(int32 c, int32 num_chunks, CuMatrixBase< BaseFloat > *cur_data_in, CuMatrix< BaseFloat > *next_data)
Definition: rescale-nnet.cc:121

kaldi::nnet2::UpdatableComponent::Scale
virtual void Scale(BaseFloat scale)=0
This new virtual function scales the parameters by this amount.

kaldi::nnet2::Component
Abstract class, basic element of the network, it is a box with defined inputs, outputs, and tranformation functions interface.
Definition: nnet-component.h:157

kaldi::nnet2::NnetRescaler::FormatInput
void FormatInput(const std::vector< NnetExample > &data, CuMatrix< BaseFloat > *input)
takes the input and formats as a single matrix, in forward_data_[0].
Definition: rescale-nnet.cc:56

kaldi::nnet2::NnetRescaler::Rescale
void Rescale()
Definition: rescale-nnet.cc:200

kaldi::CuMatrixBase::Sum
Real Sum() const
Definition: cu-matrix.cc:3012

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::nnet2::NnetRescaleConfig
Definition: rescale-nnet.h:36

kaldi::Matrix< BaseFloat >

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

kaldi::CuMatrixBase::CopyRowsFromVec
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
Definition: cu-matrix.cc:2301

kaldi::nnet2::NnetRescaleConfig::min_change
BaseFloat min_change
Definition: rescale-nnet.h:47

kaldi::nnet2::NnetRescaleConfig::max_change
BaseFloat max_change
Definition: rescale-nnet.h:45

kaldi::nnet2::NnetRescaler
Definition: rescale-nnet.cc:26

kaldi::nnet2::RescaleNnet
void RescaleNnet(const NnetRescaleConfig &rescale_config, const std::vector< NnetExample > &examples, Nnet *nnet)
Definition: rescale-nnet.cc:218

kaldi::nnet2::Nnet::NumComponents
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
Definition: nnet-nnet.h:69

kaldi::nnet2::TanhComponent
Definition: nnet-component.h:610

kaldi::nnet2::NnetRescaler::GetTargetAvgDeriv
BaseFloat GetTargetAvgDeriv(int32 c)
Definition: rescale-nnet.cc:98

float

kaldi::CuMatrix::Swap
void Swap(Matrix< Real > *mat)
Definition: cu-matrix.cc:123

kaldi::nnet2::NnetRescaler::NnetRescaler
NnetRescaler(const NnetRescaleConfig &config, const std::vector< NnetExample > &examples, Nnet *nnet)
Definition: rescale-nnet.cc:28

kaldi::nnet2::Nnet::RightContext
int32 RightContext() const
Returns the right-context summed over all the Components...
Definition: nnet-nnet.cc:56

kaldi::nnet2::NnetRescaler::config_
const NnetRescaleConfig & config_
Definition: rescale-nnet.cc:47

kaldi::nnet2::ChunkInfo
ChunkInfo is a class whose purpose is to describe the structure of matrices holding features...
Definition: nnet-component.h:72

kaldi::nnet2::NnetRescaler::chunk_info_out_
std::vector< ChunkInfo > chunk_info_out_
Definition: rescale-nnet.cc:50

kaldi::nnet2::NnetRescaleConfig::target_last_layer_avg_deriv
BaseFloat target_last_layer_avg_deriv
Definition: rescale-nnet.h:39

kaldi::nnet2::Nnet
Definition: nnet-nnet.h:63

kaldi::nnet2::Component::Backprop
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const =0
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

rescale-nnet.h

kaldi::CuSubMatrix
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70

kaldi::nnet2::NnetRescaleConfig::delta
BaseFloat delta
Definition: rescale-nnet.h:44

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

kaldi::CuMatrixBase::NumCols
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216

kaldi::nnet2::Component::Propagate
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const =0
Perform forward pass propagation Input->Output.

kaldi::nnet2::NnetRescaleConfig::target_avg_deriv
BaseFloat target_avg_deriv
Definition: rescale-nnet.h:37

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

KALDI_VLOG
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156

kaldi::nnet2::NnetRescaler::ComputeRelevantIndexes
void ComputeRelevantIndexes()
Definition: rescale-nnet.cc:89

kaldi::nnet2::NnetRescaleConfig::target_first_layer_avg_deriv
BaseFloat target_first_layer_avg_deriv
Definition: rescale-nnet.h:38

kaldi::CuMatrixBase::NumRows
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

kaldi::nnet2::Nnet::ComputeChunkInfo
void ComputeChunkInfo(int32 input_chunk_size, int32 num_chunks, std::vector< ChunkInfo > *chunk_info_out) const
Uses the output of the Context() functions of the network, to compute a vector of size NumComponents(...
Definition: nnet-nnet.cc:65

kaldi::CuMatrix::Resize
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

kaldi::nnet2::NnetRescaler::relevant_indexes_
std::set< int32 > relevant_indexes_
Definition: rescale-nnet.cc:51

kaldi::nnet2::UpdatableComponent
Class UpdatableComponent is a Component which has trainable parameters and contains some global param...
Definition: nnet-component.h:279

kaldi::nnet2::Nnet::InputDim
int32 InputDim() const
Dimension of the input features, e.g.
Definition: nnet-nnet.cc:36