FramePoolingComponent : The input/output matrices are split to frames of width 'feature_dim_'. More...

#include <nnet-frame-pooling-component.h>

Inheritance diagram for FramePoolingComponent:

Collaboration diagram for FramePoolingComponent:

[legend]

Public Member Functions
	FramePoolingComponent (int32 dim_in, int32 dim_out)

	~FramePoolingComponent ()

Component *	Copy () const
	Copy component (deep copy),. More...

ComponentType	GetType () const
	Get Type Identification of the component,. More...

void	InitData (std::istream &is)
	Here the offsets are w.r.t. More...

void	ReadData (std::istream &is, bool binary)
	Here the offsets are w.r.t. More...

void	WriteData (std::ostream &os, bool binary) const
	Writes the component content. More...

int32	NumParams () const
	Number of trainable parameters,. More...

void	GetGradient (VectorBase< BaseFloat > *gradient) const
	Get gradient reshaped as a vector,. More...

void	GetParams (VectorBase< BaseFloat > *params) const
	Get the trainable parameters reshaped as a vector,. More...

void	SetParams (const VectorBase< BaseFloat > &params)
	Set the trainable parameters from, reshaped as a vector,. More...

std::string	Info () const
	Print some additional info (after <ComponentName> and the dims),. More...

std::string	InfoGradient () const
	Print some additional info about gradient (after <...> and dims),. More...

void	PropagateFnc (const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
	Abstract interface for propagation/backpropagation. More...

void	BackpropagateFnc (const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
	Backward pass transformation (to be implemented by descending class...) More...

void	Update (const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &diff)
	Compute gradient and update parameters,. More...

Public Member Functions inherited from UpdatableComponent
	UpdatableComponent (int32 input_dim, int32 output_dim)

virtual	~UpdatableComponent ()

bool	IsUpdatable () const
	Check if contains trainable parameters,. More...

virtual void	SetTrainOptions (const NnetTrainOptions &opts)
	Set the training options to the component,. More...

const NnetTrainOptions &	GetTrainOptions () const
	Get the training options from the component,. More...

virtual void	SetLearnRateCoef (BaseFloat val)
	Set the learn-rate coefficient,. More...

virtual void	SetBiasLearnRateCoef (BaseFloat val)
	Set the learn-rate coefficient for bias,. More...

Public Member Functions inherited from Component
	Component (int32 input_dim, int32 output_dim)
	Generic interface of a component,. More...

virtual	~Component ()

virtual bool	IsMultistream () const
	Check if component has 'Recurrent' interface (trainable and recurrent),. More...

int32	InputDim () const
	Get the dimension of the input,. More...

int32	OutputDim () const
	Get the dimension of the output,. More...

void	Propagate (const CuMatrixBase< BaseFloat > &in, CuMatrix< BaseFloat > *out)
	Perform forward-pass propagation 'in' -> 'out',. More...

void	Backpropagate (const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrix< BaseFloat > *in_diff)
	Perform backward-pass propagation 'out_diff' -> 'in_diff'. More...

void	Write (std::ostream &os, bool binary) const
	Write the component to a stream,. More...

Private Attributes
int32	feature_dim_

std::vector< int32 >	offset_

std::vector< Vector< BaseFloat > >	weight_
	Vector of pooling weight vectors,. More...

std::vector< Vector< BaseFloat > >	weight_diff_
	detivatives of weight vectors, More...

bool	normalize_

Additional Inherited Members
Public Types inherited from Component
enum	ComponentType { kUnknown = 0x0, kUpdatableComponent = 0x0100, kAffineTransform, kLinearTransform, kConvolutionalComponent, kLstmProjected, kBlstmProjected, kRecurrentComponent, kActivationFunction = 0x0200, kSoftmax, kHiddenSoftmax, kBlockSoftmax, kSigmoid, kTanh, kParametricRelu, kDropout, kLengthNormComponent, kTranform = 0x0400, kRbm, kSplice, kCopy, kTranspose, kBlockLinearity, kAddShift, kRescale, kKlHmm = 0x0800, kSentenceAveragingComponent, kSimpleSentenceAveragingComponent, kAveragePoolingComponent, kMaxPoolingComponent, kFramePoolingComponent, kParallelComponent, kMultiBasisComponent }
	Component type identification mechanism,. More...

Static Public Member Functions inherited from Component
static const char *	TypeToMarker (ComponentType t)
	Converts component type to marker,. More...

static ComponentType	MarkerToType (const std::string &s)
	Converts marker to component type (case insensitive),. More...

static Component *	Init (const std::string &conf_line)
	Initialize component from a line in config file,. More...

static Component *	Read (std::istream &is, bool binary)
	Read the component from a stream (static method),. More...

Static Public Attributes inherited from Component
static const struct key_value	kMarkerMap []
	The table with pairs of Component types and markers (defined in nnet-component.cc),. More...

Protected Attributes inherited from UpdatableComponent
NnetTrainOptions	opts_
	Option-class with training hyper-parameters,. More...

BaseFloat	learn_rate_coef_
	Scalar applied to learning rate for weight matrices (to be used in ::Update method),. More...

BaseFloat	bias_learn_rate_coef_
	Scalar applied to learning rate for bias (to be used in ::Update method),. More...

Protected Attributes inherited from Component
int32	input_dim_
	Data members,. More...

int32	output_dim_
	Dimension of the output of the Component,. More...

Detailed Description

FramePoolingComponent : The input/output matrices are split to frames of width 'feature_dim_'.

Here we do weighted pooling of frames along the temporal axis, given a frame-offset of leftmost frame, the pool-size is defined by weight-vector size.

Definition at line 43 of file nnet-frame-pooling-component.h.

Constructor & Destructor Documentation

◆ FramePoolingComponent()

FramePoolingComponent	(	int32	dim_in,
		int32	dim_out
	)

inline

Definition at line 45 of file nnet-frame-pooling-component.h.

Referenced by FramePoolingComponent::Copy().

                                                     :
     UpdatableComponent(dim_in, dim_out),
     feature_dim_(0),
     normalize_(false)
   { }

◆ ~FramePoolingComponent()

~FramePoolingComponent ( )

inline

Definition at line 51 of file nnet-frame-pooling-component.h.

52 { }

Member Function Documentation

◆ BackpropagateFnc()

void BackpropagateFnc	(	const CuMatrixBase< BaseFloat > &	in,
		const CuMatrixBase< BaseFloat > &	out,
		const CuMatrixBase< BaseFloat > &	out_diff,
		CuMatrixBase< BaseFloat > *	in_diff
	)

inlinevirtual

Backward pass transformation (to be implemented by descending class...)

Implements Component.

Definition at line 233 of file nnet-frame-pooling-component.h.

References KALDI_ERR.

                                                           {
     KALDI_ERR << "Unimplemented.";
   }

◆ Copy()

Component* Copy ( ) const

inlinevirtual

Copy component (deep copy),.

Implements Component.

Definition at line 54 of file nnet-frame-pooling-component.h.

References FramePoolingComponent::FramePoolingComponent().

54 { return new FramePoolingComponent(*this); }

kaldi::nnet1::FramePoolingComponent::FramePoolingComponent

FramePoolingComponent(int32 dim_in, int32 dim_out)

Definition: nnet-frame-pooling-component.h:45

◆ GetGradient()

void GetGradient ( VectorBase< BaseFloat > * gradient ) const

inlinevirtual

Get gradient reshaped as a vector,.

Implements UpdatableComponent.

Definition at line 175 of file nnet-frame-pooling-component.h.

References KALDI_ERR.

                                                           {
     KALDI_ERR << "Unimplemented.";
   }

◆ GetParams()

void GetParams ( VectorBase< BaseFloat > * params ) const

inlinevirtual

Get the trainable parameters reshaped as a vector,.

Implements UpdatableComponent.

Definition at line 179 of file nnet-frame-pooling-component.h.

References VectorBase< Real >::Dim(), KALDI_ASSERT, FramePoolingComponent::NumParams(), VectorBase< Real >::Range(), and FramePoolingComponent::weight_.

                                                       {
     KALDI_ASSERT(params->Dim() == NumParams());
     int32 offset = 0;
     for (int32 p = 0; p < weight_.size(); p++) {
       params->Range(offset, weight_[p].Dim()).CopyFromVec(weight_[p]);
       offset += weight_[p].Dim();
     }
     KALDI_ASSERT(offset == params->Dim());
   }

◆ GetType()

ComponentType GetType ( ) const

inlinevirtual

Get Type Identification of the component,.

Implements Component.

Definition at line 55 of file nnet-frame-pooling-component.h.

References Component::kFramePoolingComponent.

55 { return kFramePoolingComponent; }

kaldi::nnet1::Component::kFramePoolingComponent

Definition: nnet-component.h:82

◆ Info()

std::string Info ( ) const

inlinevirtual

Print some additional info (after <ComponentName> and the dims),.

Reimplemented from Component.

Definition at line 193 of file nnet-frame-pooling-component.h.

References FramePoolingComponent::offset_, and FramePoolingComponent::weight_.

                          {
     std::ostringstream oss;
     oss << "\n  (offset,weights) : ";
     for (int32 p = 0; p < weight_.size(); p++) {
       oss << "(" << offset_[p] << "," << weight_[p] << "), ";
     }
     return oss.str();
   }

◆ InfoGradient()

std::string InfoGradient ( ) const

inlinevirtual

Print some additional info about gradient (after <...> and dims),.

Reimplemented from Component.

Definition at line 202 of file nnet-frame-pooling-component.h.

References UpdatableComponent::learn_rate_coef_, FramePoolingComponent::offset_, kaldi::nnet1::ToString(), and FramePoolingComponent::weight_diff_.

                                  {
     std::ostringstream oss;
     oss << "\n  lr-coef " << ToString(learn_rate_coef_);
     oss << "\n  (offset,weights_grad) : ";
     for (int32 p = 0; p < weight_diff_.size(); p++) {
       oss << "(" << offset_[p] << ",";
       // pass the weight vector, remove '\n' as last char
       oss << weight_diff_[p];
       oss.seekp(-1, std::ios_base::cur);
       oss << "), ";
     }
     return oss.str();
   }

◆ InitData()

void InitData ( std::istream & is )

inlinevirtual

Here the offsets are w.r.t.

central frames, which has offset 0. Note.: both the offsets and pool sizes can be negative.

Implements UpdatableComponent.

Definition at line 61 of file nnet-frame-pooling-component.h.

References VectorBase< Real >::Dim(), FramePoolingComponent::feature_dim_, Component::InputDim(), KALDI_ASSERT, KALDI_ERR, KALDI_LOG, UpdatableComponent::learn_rate_coef_, FramePoolingComponent::normalize_, FramePoolingComponent::offset_, VectorBase< Real >::Range(), Vector< Real >::Read(), kaldi::ReadBasicType(), kaldi::ReadIntegerVector(), kaldi::ReadToken(), and FramePoolingComponent::weight_.

                                 {
     // temporary, for initialization,
     std::vector<int32> pool_size;
     std::vector<int32> central_offset;
     Vector<BaseFloat> pool_weight;
     float learn_rate_coef = 0.01;
     // parse config
     std::string token;
     while (is >> std::ws, !is.eof()) {
       ReadToken(is, false, &token);
        if (token == "<FeatureDim>") ReadBasicType(is, false, &feature_dim_);
       else if (token == "<CentralOffset>") ReadIntegerVector(is, false, &central_offset);
       else if (token == "<PoolSize>") ReadIntegerVector(is, false, &pool_size);
       else if (token == "<PoolWeight>") pool_weight.Read(is, false);
       else if (token == "<LearnRateCoef>") ReadBasicType(is, false, &learn_rate_coef);
       else if (token == "<Normalize>") ReadBasicType(is, false, &normalize_);
       else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
                      << " (FeatureDim|CentralOffset <vec>|PoolSize <vec>|LearnRateCoef|Normalize)";
     }
     // check inputs:
     KALDI_ASSERT(feature_dim_ > 0);
     KALDI_ASSERT(central_offset.size() > 0);
     KALDI_ASSERT(central_offset.size() == pool_size.size());
     // initialize:
     int32 num_frames = InputDim() / feature_dim_;
     int32 central_frame = (num_frames -1) / 2;
     int32 num_pools = central_offset.size();
     offset_.resize(num_pools);
     weight_.resize(num_pools);
     for (int32 p = 0; p < num_pools; p++) {
       offset_[p] = central_frame + central_offset[p] + std::min(0, pool_size[p]+1);
       weight_[p].Resize(std::abs(pool_size[p]));
       weight_[p].Set(1.0/std::abs(pool_size[p]));
     }
     learn_rate_coef_ = learn_rate_coef;
     if (pool_weight.Dim() != 0) {
       KALDI_LOG << "Initializing from pool-weight vector";
       int32 num_weights = 0;
       for (int32 p = 0; p < num_pools; p++) {
         weight_[p].CopyFromVec(pool_weight.Range(num_weights, weight_[p].Dim()));
         num_weights += weight_[p].Dim();
       }
       KALDI_ASSERT(num_weights == pool_weight.Dim());
     }
     // check that offsets are within the splice we had,
     for (int32 p = 0; p < num_pools; p++) {
       KALDI_ASSERT(offset_[p] >= 0);
       KALDI_ASSERT(offset_[p] + weight_[p].Dim() <= num_frames);
     }
   }

◆ NumParams()

int32 NumParams ( ) const

inlinevirtual

Number of trainable parameters,.

Implements UpdatableComponent.

Definition at line 167 of file nnet-frame-pooling-component.h.

References FramePoolingComponent::weight_.

Referenced by FramePoolingComponent::GetParams().

                           {
     int32 ans = 0;
     for (int32 p = 0; p < weight_.size(); p++) {
       ans += weight_[p].Dim();
     }
     return ans;
   }

◆ PropagateFnc()

void PropagateFnc	(	const CuMatrixBase< BaseFloat > &	in,
		CuMatrixBase< BaseFloat > *	out
	)

inlinevirtual

Abstract interface for propagation/backpropagation.

Forward pass transformation (to be implemented by descending class...)

Implements Component.

Definition at line 216 of file nnet-frame-pooling-component.h.

References CuMatrixBase< Real >::ColRange(), FramePoolingComponent::feature_dim_, rnnlm::i, KALDI_ASSERT, CuMatrixBase< Real >::NumCols(), FramePoolingComponent::offset_, CuMatrixBase< Real >::SetZero(), and FramePoolingComponent::weight_.

                                                   {
     // check dims
     KALDI_ASSERT(in.NumCols() % feature_dim_ == 0);
     KALDI_ASSERT(out->NumCols() % feature_dim_ == 0);
     // useful dims
     int32 num_pools = offset_.size();
     // compute the output pools
     for (int32 p = 0; p < num_pools; p++) {
       CuSubMatrix<BaseFloat> tgt(out->ColRange(p*feature_dim_, feature_dim_));
       tgt.SetZero();  // reset
       for (int32 i = 0; i < weight_[p].Dim(); i++) {
         tgt.AddMat(weight_[p](i), in.ColRange((offset_[p]+i) * feature_dim_, feature_dim_));
       }
     }
   }

◆ ReadData()

void ReadData	(	std::istream &	is,
		bool	binary
	)

inlinevirtual

Here the offsets are w.r.t.

leftmost frame from splice, its offset is 0. If we spliced +/- 15 frames, the central frames has index '15'.

Reimplemented from Component.

Definition at line 116 of file nnet-frame-pooling-component.h.

References kaldi::ExpectToken(), FramePoolingComponent::feature_dim_, Component::input_dim_, Component::InputDim(), KALDI_ASSERT, UpdatableComponent::learn_rate_coef_, FramePoolingComponent::normalize_, FramePoolingComponent::offset_, Component::output_dim_, kaldi::ReadBasicType(), kaldi::ReadIntegerVector(), and FramePoolingComponent::weight_.

                                              {
     // get the input dimension before splicing
     ExpectToken(is, binary, "<FeatureDim>");
     ReadBasicType(is, binary, &feature_dim_);
     ExpectToken(is, binary, "<LearnRateCoef>");
     ReadBasicType(is, binary, &learn_rate_coef_);
     ExpectToken(is, binary, "<Normalize>");
     ReadBasicType(is, binary, &normalize_);
     // read the offsets w.r.t. central frame
     ExpectToken(is, binary, "<FrameOffset>");
     ReadIntegerVector(is, binary, &offset_);
     // read the frame-weights
     ExpectToken(is, binary, "<FrameWeight>");
     int32 num_pools = offset_.size();
     weight_.resize(num_pools);
     for (int32 p = 0; p < num_pools; p++) {
       weight_[p].Read(is, binary);
     }
     //
     // Sanity checks:
     //
     KALDI_ASSERT(input_dim_ % feature_dim_ == 0);
     KALDI_ASSERT(output_dim_ % feature_dim_ == 0);
     KALDI_ASSERT(output_dim_ / feature_dim_ == num_pools);
     KALDI_ASSERT(offset_.size() == weight_.size());
     // check the shifts don't exceed the splicing
     int32 total_frame = InputDim() / feature_dim_;
     for (int32 p = 0; p < num_pools; p++) {
       KALDI_ASSERT(offset_[p] >= 0);
       KALDI_ASSERT(offset_[p] + (weight_[p].Dim()-1) < total_frame);
     }
     //
   }

◆ SetParams()

void SetParams ( const VectorBase< BaseFloat > & params )

inlinevirtual

Set the trainable parameters from, reshaped as a vector,.

Implements UpdatableComponent.

Definition at line 189 of file nnet-frame-pooling-component.h.

References KALDI_ERR.

                                                       {
     KALDI_ERR << "Unimplemented.";
   }

◆ Update()

void Update	(	const CuMatrixBase< BaseFloat > &	input,
		const CuMatrixBase< BaseFloat > &	diff
	)

inlinevirtual

Compute gradient and update parameters,.

Implements UpdatableComponent.

Definition at line 241 of file nnet-frame-pooling-component.h.

References CuMatrixBase< Real >::ColRange(), FramePoolingComponent::feature_dim_, rnnlm::i, kaldi::kSetZero, NnetTrainOptions::learn_rate, UpdatableComponent::learn_rate_coef_, CuMatrixBase< Real >::MulElements(), FramePoolingComponent::normalize_, FramePoolingComponent::offset_, UpdatableComponent::opts_, CuMatrixBase< Real >::Sum(), FramePoolingComponent::weight_, and FramePoolingComponent::weight_diff_.

                                                    {
     // useful dims
     int32 num_pools = offset_.size();
     // lazy init
     if (weight_diff_.size() != num_pools) weight_diff_.resize(num_pools);
     // get the derivatives
     for (int32 p = 0; p < num_pools; p++) {
       weight_diff_[p].Resize(weight_[p].Dim(), kSetZero);  // reset
       for (int32 i = 0; i < weight_[p].Dim(); i++) {
         // multiply matrices element-wise, and sum to get the derivative
         CuSubMatrix<BaseFloat> in_frame(
           input.ColRange((offset_[p]+i) * feature_dim_, feature_dim_)
         );
         CuSubMatrix<BaseFloat> diff_frame(
           diff.ColRange(p * feature_dim_, feature_dim_)
         );
         CuMatrix<BaseFloat> mul_elems(in_frame);
         mul_elems.MulElements(diff_frame);
         weight_diff_[p](i) = mul_elems.Sum();
       }
     }
     // update
     for (int32 p = 0; p < num_pools; p++) {
       weight_[p].AddVec(- learn_rate_coef_ * opts_.learn_rate, weight_diff_[p]);
     }
     // force to be positive, re-normalize the sum
     if (normalize_) {
       for (int32 p = 0; p < num_pools; p++) {
         weight_[p].ApplyFloor(0.0);
         weight_[p].Scale(1.0/weight_[p].Sum());
       }
     }
   }

◆ WriteData()

void WriteData	(	std::ostream &	os,
		bool	binary
	)		const

inlinevirtual

Writes the component content.

Reimplemented from Component.

Definition at line 150 of file nnet-frame-pooling-component.h.

References FramePoolingComponent::feature_dim_, UpdatableComponent::learn_rate_coef_, FramePoolingComponent::normalize_, FramePoolingComponent::offset_, FramePoolingComponent::weight_, kaldi::WriteBasicType(), kaldi::WriteIntegerVector(), and kaldi::WriteToken().

                                                     {
     WriteToken(os, binary, "<FeatureDim>");
     WriteBasicType(os, binary, feature_dim_);
     WriteToken(os, binary, "<LearnRateCoef>");
     WriteBasicType(os, binary, learn_rate_coef_);
     WriteToken(os, binary, "<Normalize>");
     WriteBasicType(os, binary, normalize_);
     WriteToken(os, binary, "<FrameOffset>");
     WriteIntegerVector(os, binary, offset_);
     // write pooling weights of individual frames
     WriteToken(os, binary, "<FrameWeight>");
     int32 num_pools = offset_.size();
     for (int32 p = 0; p < num_pools; p++) {
       weight_[p].Write(os, binary);
     }
   }