doc/nnet-max-pooling-component_8h_source.html

 // nnet/nnet-max-pooling-component.h

 // Copyright 2014  Brno University of Technology (author: Karel Vesely)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #ifndef KALDI_NNET_NNET_MAX_POOLING_COMPONENT_H_
 #define KALDI_NNET_NNET_MAX_POOLING_COMPONENT_H_

 #include <string>
 #include <vector>

 #include "nnet/nnet-component.h"
 #include "nnet/nnet-utils.h"
 #include "cudamatrix/cu-math.h"

 namespace kaldi {
 namespace nnet1 {

 class MaxPoolingComponent : public Component {
  public:
   MaxPoolingComponent(int32 dim_in, int32 dim_out):
     Component(dim_in, dim_out),
     pool_size_(0),
     pool_step_(0),
     pool_stride_(0)
   { }

   ~MaxPoolingComponent()
   { }

   Component* Copy() const { return new MaxPoolingComponent(*this); }
   ComponentType GetType() const { return kMaxPoolingComponent; }

   void InitData(std::istream &is) {
     // parse config
     std::string token;
     while (is >> std::ws, !is.eof()) {
       ReadToken(is, false, &token);
        if (token == "<PoolSize>") ReadBasicType(is, false, &pool_size_);
       else if (token == "<PoolStep>") ReadBasicType(is, false, &pool_step_);
       else if (token == "<PoolStride>") ReadBasicType(is, false, &pool_stride_);
       else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
                      << " (PoolSize|PoolStep|PoolStride)";
     }
     // check
     KALDI_ASSERT(pool_size_ != 0 && pool_step_ != 0 && pool_stride_ != 0);
   }

   void ReadData(std::istream &is, bool binary) {
     // pooling hyperparameters
     ExpectToken(is, binary, "<PoolSize>");
     ReadBasicType(is, binary, &pool_size_);
     ExpectToken(is, binary, "<PoolStep>");
     ReadBasicType(is, binary, &pool_step_);
     ExpectToken(is, binary, "<PoolStride>");
     ReadBasicType(is, binary, &pool_stride_);

     //
     // Sanity checks:
     //
     // number of patches:
     KALDI_ASSERT(input_dim_ % pool_stride_ == 0);
     int32 num_patches = input_dim_ / pool_stride_;
     // number of pools:
     KALDI_ASSERT((num_patches - pool_size_) % pool_step_ == 0);
     int32 num_pools = 1 + (num_patches - pool_size_) / pool_step_;
     // check output dim:
     KALDI_ASSERT(output_dim_ == num_pools * pool_stride_);
     //
   }

   void WriteData(std::ostream &os, bool binary) const {
     // pooling hyperparameters
     WriteToken(os, binary, "<PoolSize>");
     WriteBasicType(os, binary, pool_size_);
     WriteToken(os, binary, "<PoolStep>");
     WriteBasicType(os, binary, pool_step_);
     WriteToken(os, binary, "<PoolStride>");
     WriteBasicType(os, binary, pool_stride_);
   }

   void PropagateFnc(const CuMatrixBase<BaseFloat> &in,
                     CuMatrixBase<BaseFloat> *out) {
     // useful dims
     int32 num_patches = input_dim_ / pool_stride_;
     int32 num_pools = 1 + (num_patches - pool_size_) / pool_step_;

     // do the max-pooling (pools indexed by q)
     for (int32 q = 0; q < num_pools; q++) {
       // get output buffer of the pool
       CuSubMatrix<BaseFloat> pool(out->ColRange(q*pool_stride_, pool_stride_));
       pool.Set(-1e20);  // reset (large negative value)
       for (int32 r = 0; r < pool_size_; r++) {  // max
         int32 p = r + q * pool_step_;  // p = input patch
         pool.Max(in.ColRange(p*pool_stride_, pool_stride_));
       }
     }
   }

   void BackpropagateFnc(const CuMatrixBase<BaseFloat> &in,
                         const CuMatrixBase<BaseFloat> &out,
                         const CuMatrixBase<BaseFloat> &out_diff,
                         CuMatrixBase<BaseFloat> *in_diff) {
     // useful dims
     int32 num_patches = input_dim_ / pool_stride_;
     int32 num_pools = 1 + (num_patches - pool_size_) / pool_step_;

     //
     // here we note how many diff matrices are summed for each input patch,
     std::vector<int32> patch_summands(num_patches, 0);
     // this metainfo will be used to divide diff of patches
     // used in more than one pool.
     //

     in_diff->SetZero();  // reset

     for (int32 q = 0; q<num_pools; q++) {  // sum
       for (int32 r = 0; r<pool_size_; r++) {
         int32 p = r + q * pool_step_;  // patch number
         //
         CuSubMatrix<BaseFloat> in_p(in.ColRange(p*pool_stride_, pool_stride_));
         CuSubMatrix<BaseFloat> out_q(out.ColRange(q*pool_stride_, pool_stride_));
         //
         CuSubMatrix<BaseFloat> tgt(in_diff->ColRange(p*pool_stride_, pool_stride_));
         CuMatrix<BaseFloat> src(out_diff.ColRange(q*pool_stride_, pool_stride_));

         // Only the pool-inputs with 'max-values' are used to back-propagate into,
         // the rest of derivatives is zeroed-out by a mask.
         CuMatrix<BaseFloat> mask;
         in_p.EqualElementMask(out_q, &mask);
         src.MulElements(mask);
         tgt.AddMat(1.0, src);

         patch_summands[p] += 1;
       }
     }

     // divide diff by #summands (compensate for patches used in more pools)
     for (int32 p = 0; p < num_patches; p++) {
       CuSubMatrix<BaseFloat> tgt(in_diff->ColRange(p*pool_stride_, pool_stride_));
       KALDI_ASSERT(patch_summands[p] > 0);  // patch at least in one pool
       tgt.Scale(1.0/patch_summands[p]);
     }
   }

  private:
   int32 pool_size_,    // input patches used for pooling
         pool_step_,    // shift used for pooling (allow overlapping pools)
         pool_stride_;  // stride used to slice input to a vector of matrices
 };

 }  // namespace nnet1
 }  // namespace kaldi

 #endif  // KALDI_NNET_NNET_MAX_POOLING_COMPONENT_H_
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet1::MaxPoolingComponent::WriteData
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
Definition: nnet-max-pooling-component.h:93

kaldi::nnet1::MaxPoolingComponent::ReadData
void ReadData(std::istream &is, bool binary)
Reads the component content.
Definition: nnet-max-pooling-component.h:70

kaldi::nnet1::MaxPoolingComponent::InitData
void InitData(std::istream &is)
Virtual interface for initialization and I/O,.
Definition: nnet-max-pooling-component.h:55

kaldi::nnet1::MaxPoolingComponent
MaxPoolingComponent : The input/output matrices are split to submatrices with width &#39;pool_stride_&#39;...
Definition: nnet-max-pooling-component.h:40

kaldi::nnet1::Component::input_dim_
int32 input_dim_
Data members,.
Definition: nnet-component.h:190

kaldi::nnet1::MaxPoolingComponent::BackpropagateFnc
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
Definition: nnet-max-pooling-component.h:121

kaldi::ReadBasicType
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55

kaldi::nnet1::MaxPoolingComponent::Copy
Component * Copy() const
Copy component (deep copy),.
Definition: nnet-max-pooling-component.h:52

kaldi::nnet1::MaxPoolingComponent::GetType
ComponentType GetType() const
Get Type Identification of the component,.
Definition: nnet-max-pooling-component.h:53

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::ReadToken
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

nnet-component.h

kaldi::nnet1::Component::ComponentType
ComponentType
Component type identification mechanism,.
Definition: nnet-component.h:47

kaldi::nnet1::MaxPoolingComponent::PropagateFnc
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
Definition: nnet-max-pooling-component.h:103

kaldi::nnet1::MaxPoolingComponent::pool_size_
int32 pool_size_
Definition: nnet-max-pooling-component.h:168

kaldi::nnet1::MaxPoolingComponent::~MaxPoolingComponent
~MaxPoolingComponent()
Definition: nnet-max-pooling-component.h:49

kaldi::CuMatrixBase::SetZero
void SetZero()
Math operations, some calling kernels.
Definition: cu-matrix.cc:509

kaldi::ExpectToken
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::CuSubMatrix
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70

kaldi::WriteToken
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134

kaldi::nnet1::Component::output_dim_
int32 output_dim_
Dimension of the output of the Component,.
Definition: nnet-component.h:191

kaldi::CuMatrixBase::ColRange
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:665

cu-math.h

kaldi::nnet1::MaxPoolingComponent::MaxPoolingComponent
MaxPoolingComponent(int32 dim_in, int32 dim_out)
Definition: nnet-max-pooling-component.h:42

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

kaldi::nnet1::Component::kMaxPoolingComponent
Definition: nnet-component.h:81

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::WriteBasicType
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34

kaldi::nnet1::Component
Abstract class, building block of the network.
Definition: nnet-component.cc:51

nnet-utils.h

kaldi::CuMatrixBase::Set
void Set(Real value)
Definition: cu-matrix.cc:531

kaldi::nnet1::MaxPoolingComponent::pool_stride_
int32 pool_stride_
Definition: nnet-max-pooling-component.h:168

kaldi::CuMatrixBase::EqualElementMask
void EqualElementMask(const CuMatrixBase< Real > &mat, CuMatrix< Real > *mask) const
Definition: cu-matrix.cc:3429

kaldi::nnet1::MaxPoolingComponent::pool_step_
int32 pool_step_
Definition: nnet-max-pooling-component.h:168