MaxPoolingComponent : The input/output matrices are split to submatrices with width 'pool_stride_'. More...

#include <nnet-max-pooling-component.h>

Inheritance diagram for MaxPoolingComponent:

Collaboration diagram for MaxPoolingComponent:

Public Member Functions
	MaxPoolingComponent (int32 dim_in, int32 dim_out)

	~MaxPoolingComponent ()

Component *	Copy () const
	Copy component (deep copy),. More...

ComponentType	GetType () const
	Get Type Identification of the component,. More...

void	InitData (std::istream &is)
	Virtual interface for initialization and I/O,. More...

void	ReadData (std::istream &is, bool binary)
	Reads the component content. More...

void	WriteData (std::ostream &os, bool binary) const
	Writes the component content. More...

void	PropagateFnc (const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
	Abstract interface for propagation/backpropagation. More...

void	BackpropagateFnc (const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
	Backward pass transformation (to be implemented by descending class...) More...

Public Member Functions inherited from Component
	Component (int32 input_dim, int32 output_dim)
	Generic interface of a component,. More...

virtual	~Component ()

virtual bool	IsUpdatable () const
	Check if componeny has 'Updatable' interface (trainable components),. More...

virtual bool	IsMultistream () const
	Check if component has 'Recurrent' interface (trainable and recurrent),. More...

int32	InputDim () const
	Get the dimension of the input,. More...

int32	OutputDim () const
	Get the dimension of the output,. More...

void	Propagate (const CuMatrixBase< BaseFloat > &in, CuMatrix< BaseFloat > *out)
	Perform forward-pass propagation 'in' -> 'out',. More...

void	Backpropagate (const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrix< BaseFloat > *in_diff)
	Perform backward-pass propagation 'out_diff' -> 'in_diff'. More...

void	Write (std::ostream &os, bool binary) const
	Write the component to a stream,. More...

virtual std::string	Info () const
	Print some additional info (after <ComponentName> and the dims),. More...

virtual std::string	InfoGradient () const
	Print some additional info about gradient (after <...> and dims),. More...

Private Attributes
int32	pool_size_

int32	pool_step_

int32	pool_stride_

Additional Inherited Members
Public Types inherited from Component
enum	ComponentType { kUnknown = 0x0, kUpdatableComponent = 0x0100, kAffineTransform, kLinearTransform, kConvolutionalComponent, kLstmProjected, kBlstmProjected, kRecurrentComponent, kActivationFunction = 0x0200, kSoftmax, kHiddenSoftmax, kBlockSoftmax, kSigmoid, kTanh, kParametricRelu, kDropout, kLengthNormComponent, kTranform = 0x0400, kRbm, kSplice, kCopy, kTranspose, kBlockLinearity, kAddShift, kRescale, kKlHmm = 0x0800, kSentenceAveragingComponent, kSimpleSentenceAveragingComponent, kAveragePoolingComponent, kMaxPoolingComponent, kFramePoolingComponent, kParallelComponent, kMultiBasisComponent }
	Component type identification mechanism,. More...

Static Public Member Functions inherited from Component
static const char *	TypeToMarker (ComponentType t)
	Converts component type to marker,. More...

static ComponentType	MarkerToType (const std::string &s)
	Converts marker to component type (case insensitive),. More...

static Component *	Init (const std::string &conf_line)
	Initialize component from a line in config file,. More...

static Component *	Read (std::istream &is, bool binary)
	Read the component from a stream (static method),. More...

Static Public Attributes inherited from Component
static const struct key_value	kMarkerMap []
	The table with pairs of Component types and markers (defined in nnet-component.cc),. More...

Protected Attributes inherited from Component
int32	input_dim_
	Data members,. More...

int32	output_dim_
	Dimension of the output of the Component,. More...

Detailed Description

MaxPoolingComponent : The input/output matrices are split to submatrices with width 'pool_stride_'.

The pooling is done over 3rd axis, of the set of 2d matrices. Our pooling supports overlaps, overlaps occur when (pool_step_ < pool_size_).

Definition at line 40 of file nnet-max-pooling-component.h.

Constructor & Destructor Documentation

◆ MaxPoolingComponent()

MaxPoolingComponent	(	int32	dim_in,
		int32	dim_out
	)

inline

Definition at line 42 of file nnet-max-pooling-component.h.

Referenced by MaxPoolingComponent::Copy().

                                                   :
     Component(dim_in, dim_out),
     pool_size_(0),
     pool_step_(0),
     pool_stride_(0)
   { }

◆ ~MaxPoolingComponent()

~MaxPoolingComponent ( )

inline

Definition at line 49 of file nnet-max-pooling-component.h.

50 { }

Member Function Documentation

◆ BackpropagateFnc()

void BackpropagateFnc	(	const CuMatrixBase< BaseFloat > &	in,
		const CuMatrixBase< BaseFloat > &	out,
		const CuMatrixBase< BaseFloat > &	out_diff,
		CuMatrixBase< BaseFloat > *	in_diff
	)

inlinevirtual

Backward pass transformation (to be implemented by descending class...)

Implements Component.

Definition at line 121 of file nnet-max-pooling-component.h.

References CuMatrixBase< Real >::ColRange(), CuMatrixBase< Real >::EqualElementMask(), Component::input_dim_, KALDI_ASSERT, MaxPoolingComponent::pool_size_, MaxPoolingComponent::pool_step_, MaxPoolingComponent::pool_stride_, and CuMatrixBase< Real >::SetZero().

                                                           {
     // useful dims
     int32 num_patches = input_dim_ / pool_stride_;
     int32 num_pools = 1 + (num_patches - pool_size_) / pool_step_;
 
     //
     // here we note how many diff matrices are summed for each input patch,
     std::vector<int32> patch_summands(num_patches, 0);
     // this metainfo will be used to divide diff of patches
     // used in more than one pool.
     //
 
     in_diff->SetZero();  // reset
 
     for (int32 q = 0; q<num_pools; q++) {  // sum
       for (int32 r = 0; r<pool_size_; r++) {
         int32 p = r + q * pool_step_;  // patch number
         //
         CuSubMatrix<BaseFloat> in_p(in.ColRange(p*pool_stride_, pool_stride_));
         CuSubMatrix<BaseFloat> out_q(out.ColRange(q*pool_stride_, pool_stride_));
         //
         CuSubMatrix<BaseFloat> tgt(in_diff->ColRange(p*pool_stride_, pool_stride_));
         CuMatrix<BaseFloat> src(out_diff.ColRange(q*pool_stride_, pool_stride_));
 
         // Only the pool-inputs with 'max-values' are used to back-propagate into,
         // the rest of derivatives is zeroed-out by a mask.
         CuMatrix<BaseFloat> mask;
         in_p.EqualElementMask(out_q, &mask);
         src.MulElements(mask);
         tgt.AddMat(1.0, src);
 
         patch_summands[p] += 1;
       }
     }
 
     // divide diff by #summands (compensate for patches used in more pools)
     for (int32 p = 0; p < num_patches; p++) {
       CuSubMatrix<BaseFloat> tgt(in_diff->ColRange(p*pool_stride_, pool_stride_));
       KALDI_ASSERT(patch_summands[p] > 0);  // patch at least in one pool
       tgt.Scale(1.0/patch_summands[p]);
     }
   }

◆ Copy()

Component* Copy ( ) const

inlinevirtual

Copy component (deep copy),.

Implements Component.

Definition at line 52 of file nnet-max-pooling-component.h.

References MaxPoolingComponent::MaxPoolingComponent().

52 { return new MaxPoolingComponent(*this); }

kaldi::nnet1::MaxPoolingComponent::MaxPoolingComponent

MaxPoolingComponent(int32 dim_in, int32 dim_out)

Definition: nnet-max-pooling-component.h:42

◆ GetType()

ComponentType GetType ( ) const

inlinevirtual

Get Type Identification of the component,.

Implements Component.

Definition at line 53 of file nnet-max-pooling-component.h.

References Component::kMaxPoolingComponent.

53 { return kMaxPoolingComponent; }

kaldi::nnet1::Component::kMaxPoolingComponent

Definition: nnet-component.h:81

◆ InitData()

void InitData ( std::istream & is )

inlinevirtual

Virtual interface for initialization and I/O,.

Initialize internal data of a component

Reimplemented from Component.

Definition at line 55 of file nnet-max-pooling-component.h.

References KALDI_ASSERT, KALDI_ERR, MaxPoolingComponent::pool_size_, MaxPoolingComponent::pool_step_, MaxPoolingComponent::pool_stride_, kaldi::ReadBasicType(), and kaldi::ReadToken().

                                 {
     // parse config
     std::string token;
     while (is >> std::ws, !is.eof()) {
       ReadToken(is, false, &token);
        if (token == "<PoolSize>") ReadBasicType(is, false, &pool_size_);
       else if (token == "<PoolStep>") ReadBasicType(is, false, &pool_step_);
       else if (token == "<PoolStride>") ReadBasicType(is, false, &pool_stride_);
       else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
                      << " (PoolSize|PoolStep|PoolStride)";
     }
     // check
     KALDI_ASSERT(pool_size_ != 0 && pool_step_ != 0 && pool_stride_ != 0);
   }

◆ PropagateFnc()

void PropagateFnc	(	const CuMatrixBase< BaseFloat > &	in,
		CuMatrixBase< BaseFloat > *	out
	)

inlinevirtual

Abstract interface for propagation/backpropagation.

Forward pass transformation (to be implemented by descending class...)

Implements Component.

Definition at line 103 of file nnet-max-pooling-component.h.

References CuMatrixBase< Real >::ColRange(), Component::input_dim_, MaxPoolingComponent::pool_size_, MaxPoolingComponent::pool_step_, MaxPoolingComponent::pool_stride_, and CuMatrixBase< Real >::Set().

                                                   {
     // useful dims
     int32 num_patches = input_dim_ / pool_stride_;
     int32 num_pools = 1 + (num_patches - pool_size_) / pool_step_;
 
     // do the max-pooling (pools indexed by q)
     for (int32 q = 0; q < num_pools; q++) {
       // get output buffer of the pool
       CuSubMatrix<BaseFloat> pool(out->ColRange(q*pool_stride_, pool_stride_));
       pool.Set(-1e20);  // reset (large negative value)
       for (int32 r = 0; r < pool_size_; r++) {  // max
         int32 p = r + q * pool_step_;  // p = input patch
         pool.Max(in.ColRange(p*pool_stride_, pool_stride_));
       }
     }
   }

◆ ReadData()

void ReadData	(	std::istream &	is,
		bool	binary
	)

inlinevirtual

Reads the component content.

Reimplemented from Component.

Definition at line 70 of file nnet-max-pooling-component.h.

References kaldi::ExpectToken(), Component::input_dim_, KALDI_ASSERT, Component::output_dim_, MaxPoolingComponent::pool_size_, MaxPoolingComponent::pool_step_, MaxPoolingComponent::pool_stride_, and kaldi::ReadBasicType().

                                              {
     // pooling hyperparameters
     ExpectToken(is, binary, "<PoolSize>");
     ReadBasicType(is, binary, &pool_size_);
     ExpectToken(is, binary, "<PoolStep>");
     ReadBasicType(is, binary, &pool_step_);
     ExpectToken(is, binary, "<PoolStride>");
     ReadBasicType(is, binary, &pool_stride_);
 
     //
     // Sanity checks:
     //
     // number of patches:
     KALDI_ASSERT(input_dim_ % pool_stride_ == 0);
     int32 num_patches = input_dim_ / pool_stride_;
     // number of pools:
     KALDI_ASSERT((num_patches - pool_size_) % pool_step_ == 0);
     int32 num_pools = 1 + (num_patches - pool_size_) / pool_step_;
     // check output dim:
     KALDI_ASSERT(output_dim_ == num_pools * pool_stride_);
     //
   }

◆ WriteData()

void WriteData	(	std::ostream &	os,
		bool	binary
	)		const

inlinevirtual

Writes the component content.

Reimplemented from Component.

Definition at line 93 of file nnet-max-pooling-component.h.

References MaxPoolingComponent::pool_size_, MaxPoolingComponent::pool_step_, MaxPoolingComponent::pool_stride_, kaldi::WriteBasicType(), and kaldi::WriteToken().

                                                     {
     // pooling hyperparameters
     WriteToken(os, binary, "<PoolSize>");
     WriteBasicType(os, binary, pool_size_);
     WriteToken(os, binary, "<PoolStep>");
     WriteBasicType(os, binary, pool_step_);
     WriteToken(os, binary, "<PoolStride>");
     WriteBasicType(os, binary, pool_stride_);
   }