doc/nnet-frame-pooling-component_8h_source.html

 // nnet/nnet-frame-pooling-component.h

 // Copyright 2014  Brno University of Technology (author: Karel Vesely)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #ifndef KALDI_NNET_NNET_FRAME_POOLING_COMPONENT_H_
 #define KALDI_NNET_NNET_FRAME_POOLING_COMPONENT_H_

 #include <string>
 #include <vector>
 #include <algorithm>
 #include <sstream>

 #include "nnet/nnet-component.h"
 #include "nnet/nnet-utils.h"
 #include "cudamatrix/cu-math.h"

 namespace kaldi {
 namespace nnet1 {

 class FramePoolingComponent : public UpdatableComponent {
  public:
   FramePoolingComponent(int32 dim_in, int32 dim_out):
     UpdatableComponent(dim_in, dim_out),
     feature_dim_(0),
     normalize_(false)
   { }

   ~FramePoolingComponent()
   { }

   Component* Copy() const { return new FramePoolingComponent(*this); }
   ComponentType GetType() const { return kFramePoolingComponent; }

   void InitData(std::istream &is) {
     // temporary, for initialization,
     std::vector<int32> pool_size;
     std::vector<int32> central_offset;
     Vector<BaseFloat> pool_weight;
     float learn_rate_coef = 0.01;
     // parse config
     std::string token;
     while (is >> std::ws, !is.eof()) {
       ReadToken(is, false, &token);
        if (token == "<FeatureDim>") ReadBasicType(is, false, &feature_dim_);
       else if (token == "<CentralOffset>") ReadIntegerVector(is, false, &central_offset);
       else if (token == "<PoolSize>") ReadIntegerVector(is, false, &pool_size);
       else if (token == "<PoolWeight>") pool_weight.Read(is, false);
       else if (token == "<LearnRateCoef>") ReadBasicType(is, false, &learn_rate_coef);
       else if (token == "<Normalize>") ReadBasicType(is, false, &normalize_);
       else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
                      << " (FeatureDim|CentralOffset <vec>|PoolSize <vec>|LearnRateCoef|Normalize)";
     }
     // check inputs:
     KALDI_ASSERT(feature_dim_ > 0);
     KALDI_ASSERT(central_offset.size() > 0);
     KALDI_ASSERT(central_offset.size() == pool_size.size());
     // initialize:
     int32 num_frames = InputDim() / feature_dim_;
     int32 central_frame = (num_frames -1) / 2;
     int32 num_pools = central_offset.size();
     offset_.resize(num_pools);
     weight_.resize(num_pools);
     for (int32 p = 0; p < num_pools; p++) {
       offset_[p] = central_frame + central_offset[p] + std::min(0, pool_size[p]+1);
       weight_[p].Resize(std::abs(pool_size[p]));
       weight_[p].Set(1.0/std::abs(pool_size[p]));
     }
     learn_rate_coef_ = learn_rate_coef;
     if (pool_weight.Dim() != 0) {
       KALDI_LOG << "Initializing from pool-weight vector";
       int32 num_weights = 0;
       for (int32 p = 0; p < num_pools; p++) {
         weight_[p].CopyFromVec(pool_weight.Range(num_weights, weight_[p].Dim()));
         num_weights += weight_[p].Dim();
       }
       KALDI_ASSERT(num_weights == pool_weight.Dim());
     }
     // check that offsets are within the splice we had,
     for (int32 p = 0; p < num_pools; p++) {
       KALDI_ASSERT(offset_[p] >= 0);
       KALDI_ASSERT(offset_[p] + weight_[p].Dim() <= num_frames);
     }
   }

   void ReadData(std::istream &is, bool binary) {
     // get the input dimension before splicing
     ExpectToken(is, binary, "<FeatureDim>");
     ReadBasicType(is, binary, &feature_dim_);
     ExpectToken(is, binary, "<LearnRateCoef>");
     ReadBasicType(is, binary, &learn_rate_coef_);
     ExpectToken(is, binary, "<Normalize>");
     ReadBasicType(is, binary, &normalize_);
     // read the offsets w.r.t. central frame
     ExpectToken(is, binary, "<FrameOffset>");
     ReadIntegerVector(is, binary, &offset_);
     // read the frame-weights
     ExpectToken(is, binary, "<FrameWeight>");
     int32 num_pools = offset_.size();
     weight_.resize(num_pools);
     for (int32 p = 0; p < num_pools; p++) {
       weight_[p].Read(is, binary);
     }
     //
     // Sanity checks:
     //
     KALDI_ASSERT(input_dim_ % feature_dim_ == 0);
     KALDI_ASSERT(output_dim_ % feature_dim_ == 0);
     KALDI_ASSERT(output_dim_ / feature_dim_ == num_pools);
     KALDI_ASSERT(offset_.size() == weight_.size());
     // check the shifts don't exceed the splicing
     int32 total_frame = InputDim() / feature_dim_;
     for (int32 p = 0; p < num_pools; p++) {
       KALDI_ASSERT(offset_[p] >= 0);
       KALDI_ASSERT(offset_[p] + (weight_[p].Dim()-1) < total_frame);
     }
     //
   }

   void WriteData(std::ostream &os, bool binary) const {
     WriteToken(os, binary, "<FeatureDim>");
     WriteBasicType(os, binary, feature_dim_);
     WriteToken(os, binary, "<LearnRateCoef>");
     WriteBasicType(os, binary, learn_rate_coef_);
     WriteToken(os, binary, "<Normalize>");
     WriteBasicType(os, binary, normalize_);
     WriteToken(os, binary, "<FrameOffset>");
     WriteIntegerVector(os, binary, offset_);
     // write pooling weights of individual frames
     WriteToken(os, binary, "<FrameWeight>");
     int32 num_pools = offset_.size();
     for (int32 p = 0; p < num_pools; p++) {
       weight_[p].Write(os, binary);
     }
   }

   int32 NumParams() const {
     int32 ans = 0;
     for (int32 p = 0; p < weight_.size(); p++) {
       ans += weight_[p].Dim();
     }
     return ans;
   }

   void GetGradient(VectorBase<BaseFloat> *gradient) const {
     KALDI_ERR << "Unimplemented.";
   }

   void GetParams(VectorBase<BaseFloat>* params) const {
     KALDI_ASSERT(params->Dim() == NumParams());
     int32 offset = 0;
     for (int32 p = 0; p < weight_.size(); p++) {
       params->Range(offset, weight_[p].Dim()).CopyFromVec(weight_[p]);
       offset += weight_[p].Dim();
     }
     KALDI_ASSERT(offset == params->Dim());
   }

   void SetParams(const VectorBase<BaseFloat>& params) {
     KALDI_ERR << "Unimplemented.";
   }

   std::string Info() const {
     std::ostringstream oss;
     oss << "\n  (offset,weights) : ";
     for (int32 p = 0; p < weight_.size(); p++) {
       oss << "(" << offset_[p] << "," << weight_[p] << "), ";
     }
     return oss.str();
   }

   std::string InfoGradient() const {
     std::ostringstream oss;
     oss << "\n  lr-coef " << ToString(learn_rate_coef_);
     oss << "\n  (offset,weights_grad) : ";
     for (int32 p = 0; p < weight_diff_.size(); p++) {
       oss << "(" << offset_[p] << ",";
       // pass the weight vector, remove '\n' as last char
       oss << weight_diff_[p];
       oss.seekp(-1, std::ios_base::cur);
       oss << "), ";
     }
     return oss.str();
   }

   void PropagateFnc(const CuMatrixBase<BaseFloat> &in,
                     CuMatrixBase<BaseFloat> *out) {
     // check dims
     KALDI_ASSERT(in.NumCols() % feature_dim_ == 0);
     KALDI_ASSERT(out->NumCols() % feature_dim_ == 0);
     // useful dims
     int32 num_pools = offset_.size();
     // compute the output pools
     for (int32 p = 0; p < num_pools; p++) {
       CuSubMatrix<BaseFloat> tgt(out->ColRange(p*feature_dim_, feature_dim_));
       tgt.SetZero();  // reset
       for (int32 i = 0; i < weight_[p].Dim(); i++) {
         tgt.AddMat(weight_[p](i), in.ColRange((offset_[p]+i) * feature_dim_, feature_dim_));
       }
     }
   }

   void BackpropagateFnc(const CuMatrixBase<BaseFloat> &in,
                         const CuMatrixBase<BaseFloat> &out,
                         const CuMatrixBase<BaseFloat> &out_diff,
                         CuMatrixBase<BaseFloat> *in_diff) {
     KALDI_ERR << "Unimplemented.";
   }


   void Update(const CuMatrixBase<BaseFloat> &input,
               const CuMatrixBase<BaseFloat> &diff) {
     // useful dims
     int32 num_pools = offset_.size();
     // lazy init
     if (weight_diff_.size() != num_pools) weight_diff_.resize(num_pools);
     // get the derivatives
     for (int32 p = 0; p < num_pools; p++) {
       weight_diff_[p].Resize(weight_[p].Dim(), kSetZero);  // reset
       for (int32 i = 0; i < weight_[p].Dim(); i++) {
         // multiply matrices element-wise, and sum to get the derivative
         CuSubMatrix<BaseFloat> in_frame(
           input.ColRange((offset_[p]+i) * feature_dim_, feature_dim_)
         );
         CuSubMatrix<BaseFloat> diff_frame(
           diff.ColRange(p * feature_dim_, feature_dim_)
         );
         CuMatrix<BaseFloat> mul_elems(in_frame);
         mul_elems.MulElements(diff_frame);
         weight_diff_[p](i) = mul_elems.Sum();
       }
     }
     // update
     for (int32 p = 0; p < num_pools; p++) {
       weight_[p].AddVec(- learn_rate_coef_ * opts_.learn_rate, weight_diff_[p]);
     }
     // force to be positive, re-normalize the sum
     if (normalize_) {
       for (int32 p = 0; p < num_pools; p++) {
         weight_[p].ApplyFloor(0.0);
         weight_[p].Scale(1.0/weight_[p].Sum());
       }
     }
   }

  private:
   int32 feature_dim_;  // feature dimension before splicing
   std::vector<int32> offset_;  // vector of pooling offsets
   std::vector<Vector<BaseFloat> > weight_;
   std::vector<Vector<BaseFloat> > weight_diff_;

   bool normalize_;  // apply normalization after each update
 };

 }  // namespace nnet1
 }  // namespace kaldi

 #endif  // KALDI_NNET_NNET_FRAME_POOLING_COMPONENT_H_
kaldi::nnet1::ToString
std::string ToString(const T &t)
Convert basic type to a string (please don&#39;t overuse),.
Definition: nnet-utils.h:52

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet1::FramePoolingComponent::feature_dim_
int32 feature_dim_
Definition: nnet-frame-pooling-component.h:277

kaldi::nnet1::NnetTrainOptions::learn_rate
BaseFloat learn_rate
Definition: nnet-trnopts.h:32

kaldi::nnet1::FramePoolingComponent::GetType
ComponentType GetType() const
Get Type Identification of the component,.
Definition: nnet-frame-pooling-component.h:55

kaldi::nnet1::UpdatableComponent::opts_
NnetTrainOptions opts_
Option-class with training hyper-parameters,.
Definition: nnet-component.h:265

kaldi::nnet1::FramePoolingComponent::Copy
Component * Copy() const
Copy component (deep copy),.
Definition: nnet-frame-pooling-component.h:54

kaldi::nnet1::Component::input_dim_
int32 input_dim_
Data members,.
Definition: nnet-component.h:190

kaldi::ReadBasicType
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55

kaldi::nnet1::UpdatableComponent::learn_rate_coef_
BaseFloat learn_rate_coef_
Scalar applied to learning rate for weight matrices (to be used in ::Update method),.
Definition: nnet-component.h:269

kaldi::nnet1::UpdatableComponent
Class UpdatableComponent is a Component which has trainable parameters, it contains SGD training hype...
Definition: nnet-component.h:208

kaldi::CuMatrixBase::Sum
Real Sum() const
Definition: cu-matrix.cc:3012

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::nnet1::FramePoolingComponent::normalize_
bool normalize_
Definition: nnet-frame-pooling-component.h:284

kaldi::ReadToken
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154

kaldi::nnet1::FramePoolingComponent::~FramePoolingComponent
~FramePoolingComponent()
Definition: nnet-frame-pooling-component.h:51

kaldi::nnet1::FramePoolingComponent::Info
std::string Info() const
Print some additional info (after <ComponentName> and the dims),.
Definition: nnet-frame-pooling-component.h:193

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

nnet-component.h

kaldi::nnet1::Component::ComponentType
ComponentType
Component type identification mechanism,.
Definition: nnet-component.h:47

kaldi::nnet1::FramePoolingComponent::Update
void Update(const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &diff)
Compute gradient and update parameters,.
Definition: nnet-frame-pooling-component.h:241

kaldi::nnet1::FramePoolingComponent::weight_
std::vector< Vector< BaseFloat > > weight_
Vector of pooling weight vectors,.
Definition: nnet-frame-pooling-component.h:280

kaldi::ReadIntegerVector
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
Definition: io-funcs-inl.h:232

kaldi::CuMatrixBase::SetZero
void SetZero()
Math operations, some calling kernels.
Definition: cu-matrix.cc:509

kaldi::ExpectToken
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191

kaldi::CuMatrixBase::MulElements
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
Definition: cu-matrix.cc:667

kaldi::nnet1::FramePoolingComponent::BackpropagateFnc
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
Definition: nnet-frame-pooling-component.h:233

kaldi::nnet1::Component::InputDim
int32 InputDim() const
Get the dimension of the input,.
Definition: nnet-component.h:130

kaldi::nnet1::FramePoolingComponent::FramePoolingComponent
FramePoolingComponent(int32 dim_in, int32 dim_out)
Definition: nnet-frame-pooling-component.h:45

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::CuSubMatrix
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70

kaldi::nnet1::FramePoolingComponent::offset_
std::vector< int32 > offset_
Definition: nnet-frame-pooling-component.h:278

kaldi::nnet1::FramePoolingComponent::ReadData
void ReadData(std::istream &is, bool binary)
Here the offsets are w.r.t.
Definition: nnet-frame-pooling-component.h:116

kaldi::WriteToken
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134

kaldi::VectorBase::Dim
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64

kaldi::nnet1::FramePoolingComponent::SetParams
void SetParams(const VectorBase< BaseFloat > &params)
Set the trainable parameters from, reshaped as a vector,.
Definition: nnet-frame-pooling-component.h:189

kaldi::nnet1::FramePoolingComponent::InfoGradient
std::string InfoGradient() const
Print some additional info about gradient (after <...> and dims),.
Definition: nnet-frame-pooling-component.h:202

kaldi::nnet1::Component::output_dim_
int32 output_dim_
Dimension of the output of the Component,.
Definition: nnet-component.h:191

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::CuMatrixBase::ColRange
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:665

cu-math.h

kaldi::nnet1::FramePoolingComponent::NumParams
int32 NumParams() const
Number of trainable parameters,.
Definition: nnet-frame-pooling-component.h:167

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

kaldi::CuMatrixBase::NumCols
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216

kaldi::nnet1::Component::kFramePoolingComponent
Definition: nnet-component.h:82

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::kSetZero
Definition: matrix-common.h:38

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::nnet1::FramePoolingComponent::GetGradient
void GetGradient(VectorBase< BaseFloat > *gradient) const
Get gradient reshaped as a vector,.
Definition: nnet-frame-pooling-component.h:175

kaldi::nnet1::FramePoolingComponent::InitData
void InitData(std::istream &is)
Here the offsets are w.r.t.
Definition: nnet-frame-pooling-component.h:61

kaldi::WriteIntegerVector
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Definition: io-funcs-inl.h:198

kaldi::nnet1::FramePoolingComponent::PropagateFnc
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
Definition: nnet-frame-pooling-component.h:216

kaldi::WriteBasicType
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34

kaldi::nnet1::Component
Abstract class, building block of the network.
Definition: nnet-component.cc:51

kaldi::nnet1::FramePoolingComponent::weight_diff_
std::vector< Vector< BaseFloat > > weight_diff_
detivatives of weight vectors,
Definition: nnet-frame-pooling-component.h:282

kaldi::nnet1::FramePoolingComponent::GetParams
void GetParams(VectorBase< BaseFloat > *params) const
Get the trainable parameters reshaped as a vector,.
Definition: nnet-frame-pooling-component.h:179

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

nnet-utils.h

kaldi::nnet1::FramePoolingComponent
FramePoolingComponent : The input/output matrices are split to frames of width &#39;feature_dim_&#39;.
Definition: nnet-frame-pooling-component.h:43

kaldi::Vector::Read
void Read(std::istream &in, bool binary, bool add=false)
Read function using C++ streams.
Definition: kaldi-vector.cc:1109

kaldi::nnet1::FramePoolingComponent::WriteData
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
Definition: nnet-frame-pooling-component.h:150

kaldi::VectorBase::Range
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94