#include <nnet-blstm-projected.h>

Inheritance diagram for BlstmProjected:

[legend]

Collaboration diagram for BlstmProjected:

[legend]

Public Member Functions
	BlstmProjected (int32 input_dim, int32 output_dim)

	~BlstmProjected ()

Component *	Copy () const
	Copy component (deep copy),. More...

ComponentType	GetType () const
	Get Type Identification of the component,. More...

void	InitData (std::istream &is)
	Initialize the content of the component by the 'line' from the prototype,. More...

void	ReadData (std::istream &is, bool binary)
	Reads the component content. More...

void	WriteData (std::ostream &os, bool binary) const
	Writes the component content. More...

int32	NumParams () const
	Number of trainable parameters,. More...

void	GetGradient (VectorBase< BaseFloat > *gradient) const
	Get gradient reshaped as a vector,. More...

void	GetParams (VectorBase< BaseFloat > *params) const
	Get the trainable parameters reshaped as a vector,. More...

void	SetParams (const VectorBase< BaseFloat > &params)
	Set the trainable parameters from, reshaped as a vector,. More...

std::string	Info () const
	Print some additional info (after <ComponentName> and the dims),. More...

std::string	InfoGradient () const
	Print some additional info about gradient (after <...> and dims),. More...

void	PropagateFnc (const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
	Abstract interface for propagation/backpropagation. More...

void	BackpropagateFnc (const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
	Backward pass transformation (to be implemented by descending class...) More...

void	Update (const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &diff)
	Compute gradient and update parameters,. More...

Public Member Functions inherited from MultistreamComponent
	MultistreamComponent (int32 input_dim, int32 output_dim)

bool	IsMultistream () const
	Check if component has 'Recurrent' interface (trainable and recurrent),. More...

virtual void	SetSeqLengths (const std::vector< int32 > &sequence_lengths)

int32	NumStreams () const

virtual void	ResetStreams (const std::vector< int32 > &stream_reset_flag)
	Optional function to reset the transfer of context (not used for BLSTMs. More...

Public Member Functions inherited from UpdatableComponent
	UpdatableComponent (int32 input_dim, int32 output_dim)

virtual	~UpdatableComponent ()

bool	IsUpdatable () const
	Check if contains trainable parameters,. More...

virtual void	SetTrainOptions (const NnetTrainOptions &opts)
	Set the training options to the component,. More...

const NnetTrainOptions &	GetTrainOptions () const
	Get the training options from the component,. More...

virtual void	SetLearnRateCoef (BaseFloat val)
	Set the learn-rate coefficient,. More...

virtual void	SetBiasLearnRateCoef (BaseFloat val)
	Set the learn-rate coefficient for bias,. More...

Public Member Functions inherited from Component
	Component (int32 input_dim, int32 output_dim)
	Generic interface of a component,. More...

virtual	~Component ()

int32	InputDim () const
	Get the dimension of the input,. More...

int32	OutputDim () const
	Get the dimension of the output,. More...

void	Propagate (const CuMatrixBase< BaseFloat > &in, CuMatrix< BaseFloat > *out)
	Perform forward-pass propagation 'in' -> 'out',. More...

void	Backpropagate (const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrix< BaseFloat > *in_diff)
	Perform backward-pass propagation 'out_diff' -> 'in_diff'. More...

void	Write (std::ostream &os, bool binary) const
	Write the component to a stream,. More...

Private Attributes
int32	cell_dim_
	the number of memory-cell blocks, More...

int32	proj_dim_
	recurrent projection layer dim, More...

BaseFloat	cell_clip_
	Clipping of 'cell-values' in forward pass (per-frame),. More...

BaseFloat	diff_clip_
	Clipping of 'derivatives' in backprop (per-frame),. More...

BaseFloat	cell_diff_clip_
	Clipping of 'cell-derivatives' accumulated over CEC (per-frame),. More...

BaseFloat	grad_clip_
	Clipping of the updates,. More...

CuMatrix< BaseFloat >	f_w_gifo_x_

CuMatrix< BaseFloat >	f_w_gifo_x_corr_

CuMatrix< BaseFloat >	b_w_gifo_x_

CuMatrix< BaseFloat >	b_w_gifo_x_corr_

CuMatrix< BaseFloat >	f_w_gifo_r_

CuMatrix< BaseFloat >	f_w_gifo_r_corr_

CuMatrix< BaseFloat >	b_w_gifo_r_

CuMatrix< BaseFloat >	b_w_gifo_r_corr_

CuVector< BaseFloat >	f_bias_

CuVector< BaseFloat >	f_bias_corr_

CuVector< BaseFloat >	b_bias_

CuVector< BaseFloat >	b_bias_corr_

CuVector< BaseFloat >	f_peephole_i_c_

CuVector< BaseFloat >	f_peephole_f_c_

CuVector< BaseFloat >	f_peephole_o_c_

CuVector< BaseFloat >	b_peephole_i_c_

CuVector< BaseFloat >	b_peephole_f_c_

CuVector< BaseFloat >	b_peephole_o_c_

CuVector< BaseFloat >	f_peephole_i_c_corr_

CuVector< BaseFloat >	f_peephole_f_c_corr_

CuVector< BaseFloat >	f_peephole_o_c_corr_

CuVector< BaseFloat >	b_peephole_i_c_corr_

CuVector< BaseFloat >	b_peephole_f_c_corr_

CuVector< BaseFloat >	b_peephole_o_c_corr_

CuMatrix< BaseFloat >	f_w_r_m_

CuMatrix< BaseFloat >	f_w_r_m_corr_

CuMatrix< BaseFloat >	b_w_r_m_

CuMatrix< BaseFloat >	b_w_r_m_corr_

CuMatrix< BaseFloat >	f_propagate_buf_

CuMatrix< BaseFloat >	b_propagate_buf_

CuMatrix< BaseFloat >	f_backpropagate_buf_

CuMatrix< BaseFloat >	b_backpropagate_buf_

Additional Inherited Members
Public Types inherited from Component
enum	ComponentType { kUnknown = 0x0, kUpdatableComponent = 0x0100, kAffineTransform, kLinearTransform, kConvolutionalComponent, kLstmProjected, kBlstmProjected, kRecurrentComponent, kActivationFunction = 0x0200, kSoftmax, kHiddenSoftmax, kBlockSoftmax, kSigmoid, kTanh, kParametricRelu, kDropout, kLengthNormComponent, kTranform = 0x0400, kRbm, kSplice, kCopy, kTranspose, kBlockLinearity, kAddShift, kRescale, kKlHmm = 0x0800, kSentenceAveragingComponent, kSimpleSentenceAveragingComponent, kAveragePoolingComponent, kMaxPoolingComponent, kFramePoolingComponent, kParallelComponent, kMultiBasisComponent }
	Component type identification mechanism,. More...

Static Public Member Functions inherited from Component
static const char *	TypeToMarker (ComponentType t)
	Converts component type to marker,. More...

static ComponentType	MarkerToType (const std::string &s)
	Converts marker to component type (case insensitive),. More...

static Component *	Init (const std::string &conf_line)
	Initialize component from a line in config file,. More...

static Component *	Read (std::istream &is, bool binary)
	Read the component from a stream (static method),. More...

Static Public Attributes inherited from Component
static const struct key_value	kMarkerMap []
	The table with pairs of Component types and markers (defined in nnet-component.cc),. More...

Protected Attributes inherited from MultistreamComponent
std::vector< int32 >	sequence_lengths_

Protected Attributes inherited from UpdatableComponent
NnetTrainOptions	opts_
	Option-class with training hyper-parameters,. More...

BaseFloat	learn_rate_coef_
	Scalar applied to learning rate for weight matrices (to be used in ::Update method),. More...

BaseFloat	bias_learn_rate_coef_
	Scalar applied to learning rate for bias (to be used in ::Update method),. More...

Protected Attributes inherited from Component
int32	input_dim_
	Data members,. More...

int32	output_dim_
	Dimension of the output of the Component,. More...

Detailed Description

Definition at line 50 of file nnet-blstm-projected.h.

Constructor & Destructor Documentation

◆ BlstmProjected()

BlstmProjected	(	int32	input_dim,
		int32	output_dim
	)

inline

Definition at line 52 of file nnet-blstm-projected.h.

Referenced by BlstmProjected::Copy().

                                                    :
     MultistreamComponent(input_dim, output_dim),
     cell_dim_(0),
     proj_dim_(static_cast<int32>(output_dim/2)),
     cell_clip_(50.0),
     diff_clip_(1.0),
     cell_diff_clip_(0.0),
     grad_clip_(250.0)
   { }

◆ ~BlstmProjected()

~BlstmProjected ( )

inline

Definition at line 62 of file nnet-blstm-projected.h.

63 { }

Member Function Documentation

◆ BackpropagateFnc()

void BackpropagateFnc	(	const CuMatrixBase< BaseFloat > &	in,
		const CuMatrixBase< BaseFloat > &	out,
		const CuMatrixBase< BaseFloat > &	out_diff,
		CuMatrixBase< BaseFloat > *	in_diff
	)

inlinevirtual

Backward pass transformation (to be implemented by descending class...)

Implements Component.

Definition at line 720 of file nnet-blstm-projected.h.

                                                           {
 
     // the number of sequences to be processed in parallel
     int32 T = in.NumRows() / NumStreams();
     int32 S = NumStreams();
 
     // buffers,
     f_backpropagate_buf_.Resize((T+2)*S, 7 * cell_dim_ + proj_dim_, kSetZero);
     b_backpropagate_buf_.Resize((T+2)*S, 7 * cell_dim_ + proj_dim_, kSetZero);
 
     // FORWARD DIRECTION,
     // forward-direction activations,
     CuSubMatrix<BaseFloat> F_YG(f_propagate_buf_.ColRange(0*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YI(f_propagate_buf_.ColRange(1*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YF(f_propagate_buf_.ColRange(2*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YO(f_propagate_buf_.ColRange(3*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YC(f_propagate_buf_.ColRange(4*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YH(f_propagate_buf_.ColRange(5*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YM(f_propagate_buf_.ColRange(6*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YR(f_propagate_buf_.ColRange(7*cell_dim_, proj_dim_));
 
     // forward-direction derivatives,
     CuSubMatrix<BaseFloat> F_DG(f_backpropagate_buf_.ColRange(0*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_DI(f_backpropagate_buf_.ColRange(1*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_DF(f_backpropagate_buf_.ColRange(2*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_DO(f_backpropagate_buf_.ColRange(3*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_DC(f_backpropagate_buf_.ColRange(4*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_DH(f_backpropagate_buf_.ColRange(5*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_DM(f_backpropagate_buf_.ColRange(6*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_DR(f_backpropagate_buf_.ColRange(7*cell_dim_, proj_dim_));
     CuSubMatrix<BaseFloat> F_DGIFO(f_backpropagate_buf_.ColRange(0, 4*cell_dim_));
 
     // pre-copy partial derivatives from the BLSTM output,
     F_DR.RowRange(1*S, T*S).CopyFromMat(out_diff.ColRange(0, proj_dim_));
 
     // BufferPadding [T0]:dummy, [1,T]:current sequence, [T+1]: dummy,
     for (int t = T; t >= 1; t--) {
       CuSubMatrix<BaseFloat> y_g(F_YG.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_i(F_YI.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_f(F_YF.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_o(F_YO.RowRange(t*S, S));
       // CuSubMatrix<BaseFloat> y_c(F_YC.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_h(F_YH.RowRange(t*S, S));
       // CuSubMatrix<BaseFloat> y_m(F_YM.RowRange(t*S, S));
       // CuSubMatrix<BaseFloat> y_r(F_YR.RowRange(t*S, S));
 
       CuSubMatrix<BaseFloat> d_all(f_backpropagate_buf_.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_g(F_DG.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_i(F_DI.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_f(F_DF.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_o(F_DO.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_c(F_DC.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_h(F_DH.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_m(F_DM.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_r(F_DR.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_gifo(F_DGIFO.RowRange(t*S, S));
 
       // r
       //   Version 1 (precise gradients):
       //   backprop error from g(t+1), i(t+1), f(t+1), o(t+1) to r(t)
       d_r.AddMatMat(1.0, F_DGIFO.RowRange((t+1)*S, S), kNoTrans, f_w_gifo_r_, kNoTrans, 1.0);
 
       /*
       //   Version 2 (Alex Graves' PhD dissertation):
       //   only backprop g(t+1) to r(t)
       CuSubMatrix<BaseFloat> w_g_r_(w_gifo_r_.RowRange(0, cell_dim_));
       d_r.AddMatMat(1.0, DG.RowRange((t+1)*S,S), kNoTrans, w_g_r_, kNoTrans, 1.0);
       */
 
       /*
       //   Version 3 (Felix Gers' PhD dissertation):
       //   truncate gradients of g(t+1), i(t+1), f(t+1), o(t+1) once they leak out memory block
       //   CEC(with forget connection) is the only "error-bridge" through time
       ;
       */
 
       // r -> m
       d_m.AddMatMat(1.0, d_r, kNoTrans, f_w_r_m_, kNoTrans, 0.0);
 
       // m -> h, via output gate
       d_h.AddMatMatElements(1.0, d_m, y_o, 0.0);
       d_h.DiffTanh(y_h, d_h);
 
       // o
       d_o.AddMatMatElements(1.0, d_m, y_h, 0.0);
       d_o.DiffSigmoid(y_o, d_o);
 
       // c
       // 1. diff from h(t)
       // 2. diff from c(t+1) (via forget-gate between CEC)
       // 3. diff from i(t+1) (via peephole)
       // 4. diff from f(t+1) (via peephole)
       // 5. diff from o(t)   (via peephole, not recurrent)
       d_c.AddMat(1.0, d_h);
       d_c.AddMatMatElements(1.0, F_DC.RowRange((t+1)*S, S), F_YF.RowRange((t+1)*S, S), 1.0);
       d_c.AddMatDiagVec(1.0, F_DI.RowRange((t+1)*S, S), kNoTrans, f_peephole_i_c_, 1.0);
       d_c.AddMatDiagVec(1.0, F_DF.RowRange((t+1)*S, S), kNoTrans, f_peephole_f_c_, 1.0);
       d_c.AddMatDiagVec(1.0, d_o                      , kNoTrans, f_peephole_o_c_, 1.0);
       // optionally clip the cell_derivative,
       if (cell_diff_clip_ > 0.0) {
         d_c.ApplyFloor(-cell_diff_clip_);
         d_c.ApplyCeiling(cell_diff_clip_);
       }
 
       // f
       d_f.AddMatMatElements(1.0, d_c, F_YC.RowRange((t-1)*S, S), 0.0);
       d_f.DiffSigmoid(y_f, d_f);
 
       // i
       d_i.AddMatMatElements(1.0, d_c, y_g, 0.0);
       d_i.DiffSigmoid(y_i, d_i);
 
       // c -> g, via input gate
       d_g.AddMatMatElements(1.0, d_c, y_i, 0.0);
       d_g.DiffTanh(y_g, d_g);
 
       // Clipping per-frame derivatives for the next `t'.
       // Clipping applied to gates and input gate (as done in Google).
       // [ICASSP2015, Sak, Learning acoustic frame labelling...],
       //
       // The path from 'out_diff' to 'd_c' via 'd_h' is unclipped,
       // which is probably important for the 'Constant Error Carousel'
       // to work well.
       //
       if (diff_clip_ > 0.0) {
         d_gifo.ApplyFloor(-diff_clip_);
         d_gifo.ApplyCeiling(diff_clip_);
       }
 
       // set zeros to padded frames,
       if (sequence_lengths_.size() > 0) {
         for (int s = 0; s < S; s++) {
           if (t > sequence_lengths_[s]) {
             d_all.Row(s).SetZero();
           }
         }
       }
     }
 
     // BACKWARD DIRECTION,
     // backward-direction activations,
     CuSubMatrix<BaseFloat> B_YG(b_propagate_buf_.ColRange(0*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YI(b_propagate_buf_.ColRange(1*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YF(b_propagate_buf_.ColRange(2*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YO(b_propagate_buf_.ColRange(3*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YC(b_propagate_buf_.ColRange(4*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YH(b_propagate_buf_.ColRange(5*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YM(b_propagate_buf_.ColRange(6*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YR(b_propagate_buf_.ColRange(7*cell_dim_, proj_dim_));
 
     // backward-direction derivatives,
     CuSubMatrix<BaseFloat> B_DG(b_backpropagate_buf_.ColRange(0*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_DI(b_backpropagate_buf_.ColRange(1*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_DF(b_backpropagate_buf_.ColRange(2*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_DO(b_backpropagate_buf_.ColRange(3*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_DC(b_backpropagate_buf_.ColRange(4*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_DH(b_backpropagate_buf_.ColRange(5*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_DM(b_backpropagate_buf_.ColRange(6*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_DR(b_backpropagate_buf_.ColRange(7*cell_dim_, proj_dim_));
     CuSubMatrix<BaseFloat> B_DGIFO(b_backpropagate_buf_.ColRange(0, 4*cell_dim_));
 
     // pre-copy partial derivatives from the BLSTM output,
     B_DR.RowRange(1*S, T*S).CopyFromMat(out_diff.ColRange(proj_dim_, proj_dim_));
 
     // BufferPadding [T0]:dummy, [1,T]:current sequence, [T+1]: dummy,
     for (int t = 1; t <= T; t++) {
       CuSubMatrix<BaseFloat> y_g(B_YG.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_i(B_YI.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_f(B_YF.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_o(B_YO.RowRange(t*S, S));
       // CuSubMatrix<BaseFloat> y_c(B_YC.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_h(B_YH.RowRange(t*S, S));
       // CuSubMatrix<BaseFloat> y_m(B_YM.RowRange(t*S, S));
       // CuSubMatrix<BaseFloat> y_r(B_YR.RowRange(t*S, S));
 
       CuSubMatrix<BaseFloat> d_all(b_backpropagate_buf_.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_g(B_DG.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_i(B_DI.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_f(B_DF.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_o(B_DO.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_c(B_DC.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_h(B_DH.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_m(B_DM.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_r(B_DR.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> d_gifo(B_DGIFO.RowRange(t*S, S));
 
       // r
       //   Version 1 (precise gradients):
       //   backprop error from g(t-1), i(t-1), f(t-1), o(t-1) to r(t)
       d_r.AddMatMat(1.0, B_DGIFO.RowRange((t-1)*S, S), kNoTrans, b_w_gifo_r_, kNoTrans, 1.0);
 
       /*
       //   Version 2 (Alex Graves' PhD dissertation):
       //   only backprop g(t+1) to r(t)
       CuSubMatrix<BaseFloat> w_g_r_(w_gifo_r_.RowRange(0, cell_dim_));
       d_r.AddMatMat(1.0, DG.RowRange((t+1)*S,S), kNoTrans, w_g_r_, kNoTrans, 1.0);
       */
 
       /*
       //   Version 3 (Felix Gers' PhD dissertation):
       //   truncate gradients of g(t+1), i(t+1), f(t+1), o(t+1) once they leak out memory block
       //   CEC(with forget connection) is the only "error-bridge" through time
       */
 
       // r -> m
       d_m.AddMatMat(1.0, d_r, kNoTrans, b_w_r_m_, kNoTrans, 0.0);
 
       // m -> h via output gate
       d_h.AddMatMatElements(1.0, d_m, y_o, 0.0);
       d_h.DiffTanh(y_h, d_h);
 
       // o
       d_o.AddMatMatElements(1.0, d_m, y_h, 0.0);
       d_o.DiffSigmoid(y_o, d_o);
 
       // c
       // 1. diff from h(t)
       // 2. diff from c(t+1) (via forget-gate between CEC)
       // 3. diff from i(t+1) (via peephole)
       // 4. diff from f(t+1) (via peephole)
       // 5. diff from o(t)   (via peephole, not recurrent)
       d_c.AddMat(1.0, d_h);
       d_c.AddMatMatElements(1.0, B_DC.RowRange((t-1)*S, S), B_YF.RowRange((t-1)*S, S), 1.0);
       d_c.AddMatDiagVec(1.0, B_DI.RowRange((t-1)*S, S), kNoTrans, b_peephole_i_c_, 1.0);
       d_c.AddMatDiagVec(1.0, B_DF.RowRange((t-1)*S, S), kNoTrans, b_peephole_f_c_, 1.0);
       d_c.AddMatDiagVec(1.0, d_o                      , kNoTrans, b_peephole_o_c_, 1.0);
       // optionally clip the cell_derivative,
       if (cell_diff_clip_ > 0.0) {
         d_c.ApplyFloor(-cell_diff_clip_);
         d_c.ApplyCeiling(cell_diff_clip_);
       }
 
       // f
       d_f.AddMatMatElements(1.0, d_c, B_YC.RowRange((t-1)*S, S), 0.0);
       d_f.DiffSigmoid(y_f, d_f);
 
       // i
       d_i.AddMatMatElements(1.0, d_c, y_g, 0.0);
       d_i.DiffSigmoid(y_i, d_i);
 
       // c -> g, via input gate,
       d_g.AddMatMatElements(1.0, d_c, y_i, 0.0);
       d_g.DiffTanh(y_g, d_g);
 
       // Clipping per-frame derivatives for the next `t'.
       // Clipping applied to gates and input gate (as done in Google).
       // [ICASSP2015, Sak, Learning acoustic frame labelling...],
       //
       // The path from 'out_diff' to 'd_c' via 'd_h' is unclipped,
       // which is probably important for the 'Constant Error Carousel'
       // to work well.
       //
       if (diff_clip_ > 0.0) {
         d_gifo.ApplyFloor(-diff_clip_);
         d_gifo.ApplyCeiling(diff_clip_);
       }
 
       // set zeros to padded frames,
       if (sequence_lengths_.size() > 0) {
         for (int s = 0; s < S; s++) {
           if (t > sequence_lengths_[s]) {
             d_all.Row(s).SetZero();
           }
         }
       }
     }
 
     // g,i,f,o -> x, calculating input derivatives,
     // forward direction difference
     in_diff->AddMatMat(1.0, F_DGIFO.RowRange(1*S, T*S), kNoTrans, f_w_gifo_x_, kNoTrans, 0.0);
     // backward direction difference
     in_diff->AddMatMat(1.0, B_DGIFO.RowRange(1*S, T*S), kNoTrans, b_w_gifo_x_, kNoTrans, 1.0);
 
     // lazy initialization of udpate buffers,
     if (f_w_gifo_x_corr_.NumRows() == 0) {
       // init delta buffers,
       // forward direction,
       f_w_gifo_x_corr_.Resize(4*cell_dim_, input_dim_, kSetZero);
       f_w_gifo_r_corr_.Resize(4*cell_dim_, proj_dim_, kSetZero);
       f_bias_corr_.Resize(4*cell_dim_, kSetZero);
       f_peephole_i_c_corr_.Resize(cell_dim_, kSetZero);
       f_peephole_f_c_corr_.Resize(cell_dim_, kSetZero);
       f_peephole_o_c_corr_.Resize(cell_dim_, kSetZero);
       f_w_r_m_corr_.Resize(proj_dim_, cell_dim_, kSetZero);
 
       // backward direction,
       b_w_gifo_x_corr_.Resize(4*cell_dim_, input_dim_, kSetZero);
       b_w_gifo_r_corr_.Resize(4*cell_dim_, proj_dim_, kSetZero);
       b_bias_corr_.Resize(4*cell_dim_, kSetZero);
       b_peephole_i_c_corr_.Resize(cell_dim_, kSetZero);
       b_peephole_f_c_corr_.Resize(cell_dim_, kSetZero);
       b_peephole_o_c_corr_.Resize(cell_dim_, kSetZero);
       b_w_r_m_corr_.Resize(proj_dim_, cell_dim_, kSetZero);
     }
 
     // calculate delta
     const BaseFloat mmt = opts_.momentum;
 
     // forward direction
     // weight x -> g, i, f, o
     f_w_gifo_x_corr_.AddMatMat(1.0, F_DGIFO.RowRange(1*S, T*S), kTrans,
                                     in,                        kNoTrans, mmt);
     // recurrent weight r -> g, i, f, o
     f_w_gifo_r_corr_.AddMatMat(1.0, F_DGIFO.RowRange(1*S, T*S), kTrans,
                                     F_YR.RowRange(0*S, T*S),    kNoTrans, mmt);
     // bias of g, i, f, o
     f_bias_corr_.AddRowSumMat(1.0, F_DGIFO.RowRange(1*S, T*S), mmt);
 
     // recurrent peephole c -> i
     f_peephole_i_c_corr_.AddDiagMatMat(1.0, F_DI.RowRange(1*S, T*S), kTrans,
                                             F_YC.RowRange(0*S, T*S), kNoTrans, mmt);
     // recurrent peephole c -> f
     f_peephole_f_c_corr_.AddDiagMatMat(1.0, F_DF.RowRange(1*S, T*S), kTrans,
                                             F_YC.RowRange(0*S, T*S), kNoTrans, mmt);
     // peephole c -> o
     f_peephole_o_c_corr_.AddDiagMatMat(1.0, F_DO.RowRange(1*S, T*S), kTrans,
                                             F_YC.RowRange(1*S, T*S), kNoTrans, mmt);
 
     f_w_r_m_corr_.AddMatMat(1.0, F_DR.RowRange(1*S, T*S), kTrans,
                                  F_YM.RowRange(1*S, T*S), kNoTrans, mmt);
 
     // backward direction backpropagate
     // weight x -> g, i, f, o
     b_w_gifo_x_corr_.AddMatMat(1.0, B_DGIFO.RowRange(1*S, T*S), kTrans, in, kNoTrans, mmt);
     // recurrent weight r -> g, i, f, o
     b_w_gifo_r_corr_.AddMatMat(1.0, B_DGIFO.RowRange(1*S, T*S), kTrans,
                                     B_YR.RowRange(0*S, T*S)   , kNoTrans, mmt);
     // bias of g, i, f, o
     b_bias_corr_.AddRowSumMat(1.0, B_DGIFO.RowRange(1*S, T*S), mmt);
 
     // recurrent peephole c -> i, c(t+1) --> i
     b_peephole_i_c_corr_.AddDiagMatMat(1.0, B_DI.RowRange(1*S, T*S), kTrans,
                                             B_YC.RowRange(2*S, T*S), kNoTrans, mmt);
     // recurrent peephole c -> f, c(t+1) --> f
     b_peephole_f_c_corr_.AddDiagMatMat(1.0, B_DF.RowRange(1*S, T*S), kTrans,
                                             B_YC.RowRange(2*S, T*S), kNoTrans, mmt);
     // peephole c -> o
     b_peephole_o_c_corr_.AddDiagMatMat(1.0, B_DO.RowRange(1*S, T*S), kTrans,
                                             B_YC.RowRange(1*S, T*S), kNoTrans, mmt);
 
     b_w_r_m_corr_.AddMatMat(1.0, B_DR.RowRange(1*S, T*S), kTrans,
                                  B_YM.RowRange(1*S, T*S), kNoTrans, mmt);
   }

◆ Copy()

Component* Copy ( ) const

inlinevirtual

Copy component (deep copy),.

Implements Component.

Definition at line 65 of file nnet-blstm-projected.h.

References BlstmProjected::BlstmProjected().

65 { return new BlstmProjected(*this); }

kaldi::nnet1::BlstmProjected::BlstmProjected

BlstmProjected(int32 input_dim, int32 output_dim)

Definition: nnet-blstm-projected.h:52

◆ GetGradient()

void GetGradient ( VectorBase< BaseFloat > * gradient ) const

inlinevirtual

Get gradient reshaped as a vector,.

Implements UpdatableComponent.

Definition at line 243 of file nnet-blstm-projected.h.

                                                           {
     KALDI_ASSERT(gradient->Dim() == NumParams());
     int32 offset, len;
 
     // Copying parameters corresponding to forward direction
     offset = 0;    len = f_w_gifo_x_.NumRows() * f_w_gifo_x_.NumCols();
     gradient->Range(offset, len).CopyRowsFromMat(f_w_gifo_x_corr_);
 
     offset += len; len = f_w_gifo_r_.NumRows() * f_w_gifo_r_.NumCols();
     gradient->Range(offset, len).CopyRowsFromMat(f_w_gifo_r_corr_);
 
     offset += len; len = f_bias_.Dim();
     gradient->Range(offset, len).CopyFromVec(f_bias_corr_);
 
     offset += len; len = f_peephole_i_c_.Dim();
     gradient->Range(offset, len).CopyFromVec(f_peephole_i_c_corr_);
 
     offset += len; len = f_peephole_f_c_.Dim();
     gradient->Range(offset, len).CopyFromVec(f_peephole_f_c_corr_);
 
     offset += len; len = f_peephole_o_c_.Dim();
     gradient->Range(offset, len).CopyFromVec(f_peephole_o_c_corr_);
 
     offset += len; len = f_w_r_m_.NumRows() * f_w_r_m_.NumCols();
     gradient->Range(offset, len).CopyRowsFromMat(f_w_r_m_corr_);
 
     // Copying parameters corresponding to backward direction
     offset += len; len = b_w_gifo_x_.NumRows() * b_w_gifo_x_.NumCols();
     gradient->Range(offset, len).CopyRowsFromMat(b_w_gifo_x_corr_);
 
     offset += len; len = b_w_gifo_r_.NumRows() * b_w_gifo_r_.NumCols();
     gradient->Range(offset, len).CopyRowsFromMat(b_w_gifo_r_corr_);
 
     offset += len; len = b_bias_.Dim();
     gradient->Range(offset, len).CopyFromVec(b_bias_corr_);
 
     offset += len; len = b_peephole_i_c_.Dim();
     gradient->Range(offset, len).CopyFromVec(b_peephole_i_c_corr_);
 
     offset += len; len = b_peephole_f_c_.Dim();
     gradient->Range(offset, len).CopyFromVec(b_peephole_f_c_corr_);
 
     offset += len; len = b_peephole_o_c_.Dim();
     gradient->Range(offset, len).CopyFromVec(b_peephole_o_c_corr_);
 
     offset += len; len = b_w_r_m_.NumRows() * b_w_r_m_.NumCols();
     gradient->Range(offset, len).CopyRowsFromMat(b_w_r_m_corr_);
 
     // check the dim,
     offset += len;
     KALDI_ASSERT(offset == NumParams());
   }

◆ GetParams()

void GetParams ( VectorBase< BaseFloat > * params ) const

inlinevirtual

Get the trainable parameters reshaped as a vector,.

Implements UpdatableComponent.

Definition at line 296 of file nnet-blstm-projected.h.

References BlstmProjected::b_bias_, BlstmProjected::b_peephole_f_c_, BlstmProjected::b_peephole_i_c_, BlstmProjected::b_peephole_o_c_, BlstmProjected::b_w_gifo_r_, BlstmProjected::b_w_gifo_x_, BlstmProjected::b_w_r_m_, VectorBase< Real >::Dim(), BlstmProjected::f_bias_, BlstmProjected::f_peephole_f_c_, BlstmProjected::f_peephole_i_c_, BlstmProjected::f_peephole_o_c_, BlstmProjected::f_w_gifo_r_, BlstmProjected::f_w_gifo_x_, BlstmProjected::f_w_r_m_, KALDI_ASSERT, BlstmProjected::NumParams(), and VectorBase< Real >::Range().

                                                       {
     KALDI_ASSERT(params->Dim() == NumParams());
     int32 offset, len;
 
     // Copying parameters corresponding to forward direction
     offset = 0;    len = f_w_gifo_x_.NumRows() * f_w_gifo_x_.NumCols();
     params->Range(offset, len).CopyRowsFromMat(f_w_gifo_x_);
 
     offset += len; len = f_w_gifo_r_.NumRows() * f_w_gifo_r_.NumCols();
     params->Range(offset, len).CopyRowsFromMat(f_w_gifo_r_);
 
     offset += len; len = f_bias_.Dim();
     params->Range(offset, len).CopyFromVec(f_bias_);
 
     offset += len; len = f_peephole_i_c_.Dim();
     params->Range(offset, len).CopyFromVec(f_peephole_i_c_);
 
     offset += len; len = f_peephole_f_c_.Dim();
     params->Range(offset, len).CopyFromVec(f_peephole_f_c_);
 
     offset += len; len = f_peephole_o_c_.Dim();
     params->Range(offset, len).CopyFromVec(f_peephole_o_c_);
 
     offset += len; len = f_w_r_m_.NumRows() * f_w_r_m_.NumCols();
     params->Range(offset, len).CopyRowsFromMat(f_w_r_m_);
 
     // Copying parameters corresponding to backward direction
     offset += len; len = b_w_gifo_x_.NumRows() * b_w_gifo_x_.NumCols();
     params->Range(offset, len).CopyRowsFromMat(b_w_gifo_x_);
 
     offset += len; len = b_w_gifo_r_.NumRows() * b_w_gifo_r_.NumCols();
     params->Range(offset, len).CopyRowsFromMat(b_w_gifo_r_);
 
     offset += len; len = b_bias_.Dim();
     params->Range(offset, len).CopyFromVec(b_bias_);
 
     offset += len; len = b_peephole_i_c_.Dim();
     params->Range(offset, len).CopyFromVec(b_peephole_i_c_);
 
     offset += len; len = b_peephole_f_c_.Dim();
     params->Range(offset, len).CopyFromVec(b_peephole_f_c_);
 
     offset += len; len = b_peephole_o_c_.Dim();
     params->Range(offset, len).CopyFromVec(b_peephole_o_c_);
 
     offset += len; len = b_w_r_m_.NumRows() * b_w_r_m_.NumCols();
     params->Range(offset, len).CopyRowsFromMat(b_w_r_m_);
 
     // check the dim,
     offset += len;
     KALDI_ASSERT(offset == NumParams());
   }

◆ GetType()

ComponentType GetType ( ) const

inlinevirtual

Get Type Identification of the component,.

Implements Component.

Definition at line 66 of file nnet-blstm-projected.h.

References Component::kBlstmProjected.

66 { return kBlstmProjected; }

kaldi::nnet1::Component::kBlstmProjected

Definition: nnet-component.h:55

◆ Info()

std::string Info ( ) const

inlinevirtual

Print some additional info (after <ComponentName> and the dims),.

Reimplemented from Component.

Definition at line 403 of file nnet-blstm-projected.h.

References BlstmProjected::b_bias_, BlstmProjected::b_peephole_f_c_, BlstmProjected::b_peephole_i_c_, BlstmProjected::b_peephole_o_c_, BlstmProjected::b_w_gifo_r_, BlstmProjected::b_w_gifo_x_, BlstmProjected::b_w_r_m_, UpdatableComponent::bias_learn_rate_coef_, BlstmProjected::cell_clip_, BlstmProjected::cell_dim_, BlstmProjected::diff_clip_, BlstmProjected::f_bias_, BlstmProjected::f_peephole_f_c_, BlstmProjected::f_peephole_i_c_, BlstmProjected::f_peephole_o_c_, BlstmProjected::f_w_gifo_r_, BlstmProjected::f_w_gifo_x_, BlstmProjected::f_w_r_m_, BlstmProjected::grad_clip_, UpdatableComponent::learn_rate_coef_, kaldi::nnet1::MomentStatistics(), and kaldi::nnet1::ToString().

                          {
     return std::string("cell-dim 2x") + ToString(cell_dim_) + " " +
       "( learn_rate_coef_ " + ToString(learn_rate_coef_) +
       ", bias_learn_rate_coef_ " + ToString(bias_learn_rate_coef_) +
       ", cell_clip_ " + ToString(cell_clip_) +
       ", diff_clip_ " + ToString(diff_clip_) +
       ", grad_clip_ " + ToString(grad_clip_) + " )" +
       "\n  Forward Direction weights:" +
       "\n  f_w_gifo_x_  "     + MomentStatistics(f_w_gifo_x_) +
       "\n  f_w_gifo_r_  "     + MomentStatistics(f_w_gifo_r_) +
       "\n  f_bias_  "         + MomentStatistics(f_bias_) +
       "\n  f_peephole_i_c_  " + MomentStatistics(f_peephole_i_c_) +
       "\n  f_peephole_f_c_  " + MomentStatistics(f_peephole_f_c_) +
       "\n  f_peephole_o_c_  " + MomentStatistics(f_peephole_o_c_) +
       "\n  f_w_r_m_  "        + MomentStatistics(f_w_r_m_) +
       "\n  Backward Direction weights:" +
       "\n  b_w_gifo_x_  "     + MomentStatistics(b_w_gifo_x_) +
       "\n  b_w_gifo_r_  "     + MomentStatistics(b_w_gifo_r_) +
       "\n  b_bias_  "         + MomentStatistics(b_bias_) +
       "\n  b_peephole_i_c_  " + MomentStatistics(b_peephole_i_c_) +
       "\n  b_peephole_f_c_  " + MomentStatistics(b_peephole_f_c_) +
       "\n  b_peephole_o_c_  " + MomentStatistics(b_peephole_o_c_) +
       "\n  b_w_r_m_  "        + MomentStatistics(b_w_r_m_);
   }

◆ InfoGradient()

std::string InfoGradient ( ) const

inlinevirtual

Print some additional info about gradient (after <...> and dims),.

Reimplemented from Component.

Definition at line 429 of file nnet-blstm-projected.h.

                                  {
     // forward-direction activations,
     const CuSubMatrix<BaseFloat> YG_FW(f_propagate_buf_.ColRange(0*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YI_FW(f_propagate_buf_.ColRange(1*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YF_FW(f_propagate_buf_.ColRange(2*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YO_FW(f_propagate_buf_.ColRange(3*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YC_FW(f_propagate_buf_.ColRange(4*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YH_FW(f_propagate_buf_.ColRange(5*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YM_FW(f_propagate_buf_.ColRange(6*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YR_FW(f_propagate_buf_.ColRange(7*cell_dim_, proj_dim_));
 
     // forward-direction derivatives,
     const CuSubMatrix<BaseFloat> DG_FW(f_backpropagate_buf_.ColRange(0*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DI_FW(f_backpropagate_buf_.ColRange(1*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DF_FW(f_backpropagate_buf_.ColRange(2*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DO_FW(f_backpropagate_buf_.ColRange(3*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DC_FW(f_backpropagate_buf_.ColRange(4*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DH_FW(f_backpropagate_buf_.ColRange(5*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DM_FW(f_backpropagate_buf_.ColRange(6*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DR_FW(f_backpropagate_buf_.ColRange(7*cell_dim_, proj_dim_));
 
     // backward-direction activations,
     const CuSubMatrix<BaseFloat> YG_BW(b_propagate_buf_.ColRange(0*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YI_BW(b_propagate_buf_.ColRange(1*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YF_BW(b_propagate_buf_.ColRange(2*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YO_BW(b_propagate_buf_.ColRange(3*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YC_BW(b_propagate_buf_.ColRange(4*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YH_BW(b_propagate_buf_.ColRange(5*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YM_BW(b_propagate_buf_.ColRange(6*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> YR_BW(b_propagate_buf_.ColRange(7*cell_dim_, proj_dim_));
 
     // backward-direction derivatives,
     const CuSubMatrix<BaseFloat> DG_BW(b_backpropagate_buf_.ColRange(0*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DI_BW(b_backpropagate_buf_.ColRange(1*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DF_BW(b_backpropagate_buf_.ColRange(2*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DO_BW(b_backpropagate_buf_.ColRange(3*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DC_BW(b_backpropagate_buf_.ColRange(4*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DH_BW(b_backpropagate_buf_.ColRange(5*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DM_BW(b_backpropagate_buf_.ColRange(6*cell_dim_, cell_dim_));
     const CuSubMatrix<BaseFloat> DR_BW(b_backpropagate_buf_.ColRange(7*cell_dim_, proj_dim_));
 
     return std::string("") +
       "( learn_rate_coef_ " + ToString(learn_rate_coef_) +
       ", bias_learn_rate_coef_ " + ToString(bias_learn_rate_coef_) +
       ", cell_clip_ " + ToString(cell_clip_) +
       ", diff_clip_ " + ToString(diff_clip_) +
       ", grad_clip_ " + ToString(grad_clip_) + " )" +
       "\n  ### Gradients " +
       "\n  f_w_gifo_x_corr_  "     + MomentStatistics(f_w_gifo_x_corr_) +
       "\n  f_w_gifo_r_corr_  "     + MomentStatistics(f_w_gifo_r_corr_) +
       "\n  f_bias_corr_  "         + MomentStatistics(f_bias_corr_) +
       "\n  f_peephole_i_c_corr_  " + MomentStatistics(f_peephole_i_c_corr_) +
       "\n  f_peephole_f_c_corr_  " + MomentStatistics(f_peephole_f_c_corr_) +
       "\n  f_peephole_o_c_corr_  " + MomentStatistics(f_peephole_o_c_corr_) +
       "\n  f_w_r_m_corr_  "        + MomentStatistics(f_w_r_m_corr_) +
       "\n  ---" +
       "\n  b_w_gifo_x_corr_  "     + MomentStatistics(b_w_gifo_x_corr_) +
       "\n  b_w_gifo_r_corr_  "     + MomentStatistics(b_w_gifo_r_corr_) +
       "\n  b_bias_corr_  "         + MomentStatistics(b_bias_corr_) +
       "\n  b_peephole_i_c_corr_  " + MomentStatistics(b_peephole_i_c_corr_) +
       "\n  b_peephole_f_c_corr_  " + MomentStatistics(b_peephole_f_c_corr_) +
       "\n  b_peephole_o_c_corr_  " + MomentStatistics(b_peephole_o_c_corr_) +
       "\n  b_w_r_m_corr_  "        + MomentStatistics(b_w_r_m_corr_) +
       "\n" +
       "\n  ### Activations (mostly after non-linearities)" +
       "\n  YI_FW(0..1)^  " + MomentStatistics(YI_FW) +
       "\n  YF_FW(0..1)^  " + MomentStatistics(YF_FW) +
       "\n  YO_FW(0..1)^  " + MomentStatistics(YO_FW) +
       "\n  YG_FW(-1..1)  " + MomentStatistics(YG_FW) +
       "\n  YC_FW(-R..R)* " + MomentStatistics(YC_FW) +
       "\n  YH_FW(-1..1)  " + MomentStatistics(YH_FW) +
       "\n  YM_FW(-1..1)  " + MomentStatistics(YM_FW) +
       "\n  YR_FW(-R..R)  " + MomentStatistics(YR_FW) +
       "\n  ---" +
       "\n  YI_BW(0..1)^  " + MomentStatistics(YI_BW) +
       "\n  YF_BW(0..1)^  " + MomentStatistics(YF_BW) +
       "\n  YO_BW(0..1)^  " + MomentStatistics(YO_BW) +
       "\n  YG_BW(-1..1)  " + MomentStatistics(YG_BW) +
       "\n  YC_BW(-R..R)* " + MomentStatistics(YC_BW) +
       "\n  YH_BW(-1..1)  " + MomentStatistics(YH_BW) +
       "\n  YM_BW(-1..1)  " + MomentStatistics(YM_BW) +
       "\n  YR_BW(-R..R)  " + MomentStatistics(YR_BW) +
       "\n" +
       "\n  ### Derivatives (w.r.t. inputs of non-linearities)" +
       "\n  DI_FW^ " + MomentStatistics(DI_FW) +
       "\n  DF_FW^ " + MomentStatistics(DF_FW) +
       "\n  DO_FW^ " + MomentStatistics(DO_FW) +
       "\n  DG_FW  " + MomentStatistics(DG_FW) +
       "\n  DC_FW* " + MomentStatistics(DC_FW) +
       "\n  DH_FW  " + MomentStatistics(DH_FW) +
       "\n  DM_FW  " + MomentStatistics(DM_FW) +
       "\n  DR_FW  " + MomentStatistics(DR_FW) +
       "\n  ---" +
       "\n  DI_BW^ " + MomentStatistics(DI_BW) +
       "\n  DF_BW^ " + MomentStatistics(DF_BW) +
       "\n  DO_BW^ " + MomentStatistics(DO_BW) +
       "\n  DG_BW  " + MomentStatistics(DG_BW) +
       "\n  DC_BW* " + MomentStatistics(DC_BW) +
       "\n  DH_BW  " + MomentStatistics(DH_BW) +
       "\n  DM_BW  " + MomentStatistics(DM_BW) +
       "\n  DR_BW  " + MomentStatistics(DR_BW);
   }

◆ InitData()

void InitData ( std::istream & is )

inlinevirtual

Initialize the content of the component by the 'line' from the prototype,.

Implements UpdatableComponent.

Definition at line 68 of file nnet-blstm-projected.h.

References BlstmProjected::b_bias_, BlstmProjected::b_peephole_f_c_, BlstmProjected::b_peephole_i_c_, BlstmProjected::b_peephole_o_c_, BlstmProjected::b_w_gifo_r_, BlstmProjected::b_w_gifo_x_, BlstmProjected::b_w_r_m_, UpdatableComponent::bias_learn_rate_coef_, BlstmProjected::cell_clip_, BlstmProjected::cell_diff_clip_, BlstmProjected::cell_dim_, BlstmProjected::diff_clip_, BlstmProjected::f_bias_, BlstmProjected::f_peephole_f_c_, BlstmProjected::f_peephole_i_c_, BlstmProjected::f_peephole_o_c_, BlstmProjected::f_w_gifo_r_, BlstmProjected::f_w_gifo_x_, BlstmProjected::f_w_r_m_, BlstmProjected::grad_clip_, Component::input_dim_, KALDI_ASSERT, KALDI_ERR, kaldi::kUndefined, UpdatableComponent::learn_rate_coef_, BlstmProjected::proj_dim_, kaldi::nnet1::RandUniform(), kaldi::ReadBasicType(), and kaldi::ReadToken().

                                 {
     // define options,
     float param_range = 0.1;
     // parse the line from prototype,
     std::string token;
     while (is >> std::ws, !is.eof()) {
       ReadToken(is, false, &token);
        if (token == "<ParamRange>") ReadBasicType(is, false, &param_range);
       else if (token == "<CellDim>") ReadBasicType(is, false, &cell_dim_);
       else if (token == "<LearnRateCoef>") ReadBasicType(is, false, &learn_rate_coef_);
       else if (token == "<BiasLearnRateCoef>") ReadBasicType(is, false, &bias_learn_rate_coef_);
       else if (token == "<CellClip>") ReadBasicType(is, false, &cell_clip_);
       else if (token == "<DiffClip>") ReadBasicType(is, false, &diff_clip_);
       else if (token == "<CellDiffClip>") ReadBasicType(is, false, &cell_diff_clip_);
       else if (token == "<GradClip>") ReadBasicType(is, false, &grad_clip_);
       else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
                      << " (ParamRange|CellDim|LearnRateCoef|BiasLearnRateCoef|CellClip|DiffClip|GradClip)";
     }
 
     // init the weights and biases (from uniform dist.),
     // forward direction,
     f_w_gifo_x_.Resize(4*cell_dim_, input_dim_, kUndefined);
     f_w_gifo_r_.Resize(4*cell_dim_, proj_dim_, kUndefined);
     f_bias_.Resize(4*cell_dim_, kUndefined);
     f_peephole_i_c_.Resize(cell_dim_, kUndefined);
     f_peephole_f_c_.Resize(cell_dim_, kUndefined);
     f_peephole_o_c_.Resize(cell_dim_, kUndefined);
     f_w_r_m_.Resize(proj_dim_, cell_dim_, kUndefined);
     //       (mean), (range)
     RandUniform(0.0, 2.0 * param_range, &f_w_gifo_x_);
     RandUniform(0.0, 2.0 * param_range, &f_w_gifo_r_);
     RandUniform(0.0, 2.0 * param_range, &f_bias_);
     RandUniform(0.0, 2.0 * param_range, &f_peephole_i_c_);
     RandUniform(0.0, 2.0 * param_range, &f_peephole_f_c_);
     RandUniform(0.0, 2.0 * param_range, &f_peephole_o_c_);
     RandUniform(0.0, 2.0 * param_range, &f_w_r_m_);
 
     // Add 1.0 to forget-gate bias
     // [Miao IS16: AN EMPIRICAL EXPLORATION...]
     f_bias_.Range(2*cell_dim_, cell_dim_).Add(1.0);
 
     // backward direction,
     b_w_gifo_x_.Resize(4*cell_dim_, input_dim_, kUndefined);
     b_w_gifo_r_.Resize(4*cell_dim_, proj_dim_, kUndefined);
     b_bias_.Resize(4*cell_dim_, kUndefined);
     b_peephole_i_c_.Resize(cell_dim_, kUndefined);
     b_peephole_f_c_.Resize(cell_dim_, kUndefined);
     b_peephole_o_c_.Resize(cell_dim_, kUndefined);
     b_w_r_m_.Resize(proj_dim_, cell_dim_, kUndefined);
 
     RandUniform(0.0, 2.0 * param_range, &b_w_gifo_x_);
     RandUniform(0.0, 2.0 * param_range, &b_w_gifo_r_);
     RandUniform(0.0, 2.0 * param_range, &b_bias_);
     RandUniform(0.0, 2.0 * param_range, &b_peephole_i_c_);
     RandUniform(0.0, 2.0 * param_range, &b_peephole_f_c_);
     RandUniform(0.0, 2.0 * param_range, &b_peephole_o_c_);
     RandUniform(0.0, 2.0 * param_range, &b_w_r_m_);
 
     // Add 1.0 to forget-gate bias,
     // [Miao IS16: AN EMPIRICAL EXPLORATION...]
     b_bias_.Range(2*cell_dim_, cell_dim_).Add(1.0);
 
     KALDI_ASSERT(cell_dim_ > 0);
     KALDI_ASSERT(learn_rate_coef_ >= 0.0);
     KALDI_ASSERT(bias_learn_rate_coef_ >= 0.0);
   }

◆ NumParams()

int32 NumParams ( ) const

inlinevirtual

Number of trainable parameters,.

Implements UpdatableComponent.

Definition at line 233 of file nnet-blstm-projected.h.

References BlstmProjected::f_bias_, BlstmProjected::f_peephole_f_c_, BlstmProjected::f_peephole_i_c_, BlstmProjected::f_peephole_o_c_, BlstmProjected::f_w_gifo_r_, BlstmProjected::f_w_gifo_x_, and BlstmProjected::f_w_r_m_.

Referenced by BlstmProjected::GetGradient(), BlstmProjected::GetParams(), and BlstmProjected::SetParams().

                           {
     return 2 * ( f_w_gifo_x_.NumRows() * f_w_gifo_x_.NumCols() +
       f_w_gifo_r_.NumRows() * f_w_gifo_r_.NumCols() +
       f_bias_.Dim() +
       f_peephole_i_c_.Dim() +
       f_peephole_f_c_.Dim() +
       f_peephole_o_c_.Dim() +
       f_w_r_m_.NumRows() * f_w_r_m_.NumCols() );
   }

◆ PropagateFnc()

void PropagateFnc	(	const CuMatrixBase< BaseFloat > &	in,
		CuMatrixBase< BaseFloat > *	out
	)

inlinevirtual

Abstract interface for propagation/backpropagation.

Forward pass transformation (to be implemented by descending class...)

Implements Component.

Definition at line 532 of file nnet-blstm-projected.h.

                                                   {
 
     KALDI_ASSERT(in.NumRows() % NumStreams() == 0);
     int32 S = NumStreams();
     int32 T = in.NumRows() / NumStreams();
 
     // buffers,
     f_propagate_buf_.Resize((T+2)*S, 7 * cell_dim_ + proj_dim_, kSetZero);
     b_propagate_buf_.Resize((T+2)*S, 7 * cell_dim_ + proj_dim_, kSetZero);
 
     // forward-direction activations,
     CuSubMatrix<BaseFloat> F_YG(f_propagate_buf_.ColRange(0*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YI(f_propagate_buf_.ColRange(1*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YF(f_propagate_buf_.ColRange(2*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YO(f_propagate_buf_.ColRange(3*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YC(f_propagate_buf_.ColRange(4*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YH(f_propagate_buf_.ColRange(5*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YM(f_propagate_buf_.ColRange(6*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> F_YR(f_propagate_buf_.ColRange(7*cell_dim_, proj_dim_));
     CuSubMatrix<BaseFloat> F_YGIFO(f_propagate_buf_.ColRange(0, 4*cell_dim_));
 
     // backward-direction activations,
     CuSubMatrix<BaseFloat> B_YG(b_propagate_buf_.ColRange(0*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YI(b_propagate_buf_.ColRange(1*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YF(b_propagate_buf_.ColRange(2*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YO(b_propagate_buf_.ColRange(3*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YC(b_propagate_buf_.ColRange(4*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YH(b_propagate_buf_.ColRange(5*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YM(b_propagate_buf_.ColRange(6*cell_dim_, cell_dim_));
     CuSubMatrix<BaseFloat> B_YR(b_propagate_buf_.ColRange(7*cell_dim_, proj_dim_));
     CuSubMatrix<BaseFloat> B_YGIFO(b_propagate_buf_.ColRange(0, 4*cell_dim_));
 
     // FORWARD DIRECTION,
     // x -> g, i, f, o, not recurrent, do it all in once
     F_YGIFO.RowRange(1*S, T*S).AddMatMat(1.0, in, kNoTrans, f_w_gifo_x_, kTrans, 0.0);
 
     // bias -> g, i, f, o
     F_YGIFO.RowRange(1*S, T*S).AddVecToRows(1.0, f_bias_);
 
     // BufferPadding [T0]:dummy, [1, T]:current sequence, [T+1]:dummy
     for (int t = 1; t <= T; t++) {
       // multistream buffers for current time-step,
       CuSubMatrix<BaseFloat> y_all(f_propagate_buf_.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_g(F_YG.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_i(F_YI.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_f(F_YF.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_o(F_YO.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_c(F_YC.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_h(F_YH.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_m(F_YM.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_r(F_YR.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_gifo(F_YGIFO.RowRange(t*S, S));
 
       // r(t-1) -> g, i, f, o
       y_gifo.AddMatMat(1.0, F_YR.RowRange((t-1)*S, S), kNoTrans, f_w_gifo_r_, kTrans, 1.0);
 
       // c(t-1) -> i(t) via peephole
       y_i.AddMatDiagVec(1.0, F_YC.RowRange((t-1)*S, S), kNoTrans, f_peephole_i_c_, 1.0);
 
       // c(t-1) -> f(t) via peephole
       y_f.AddMatDiagVec(1.0, F_YC.RowRange((t-1)*S, S), kNoTrans, f_peephole_f_c_, 1.0);
 
       // i, f sigmoid squashing
       y_i.Sigmoid(y_i);
       y_f.Sigmoid(y_f);
 
       // g tanh squashing
       y_g.Tanh(y_g);
 
       // g * i -> c
       y_c.AddMatMatElements(1.0, y_g, y_i, 0.0);
       // c(t-1) * f -> c(t) via forget-gate
       y_c.AddMatMatElements(1.0, F_YC.RowRange((t-1)*S, S), y_f, 1.0);
 
       if (cell_clip_ > 0.0) {
         y_c.ApplyFloor(-cell_clip_);   // Optional clipping of cell activation,
         y_c.ApplyCeiling(cell_clip_);  // Google paper Interspeech2014: LSTM for LVCSR
       }
 
       // c(t) -> o(t) via peephole (not recurrent, using c(t))
       y_o.AddMatDiagVec(1.0, y_c, kNoTrans, f_peephole_o_c_, 1.0);
 
       // o sigmoid squashing,
       y_o.Sigmoid(y_o);
 
       // c -> h, tanh squashing,
       y_h.Tanh(y_c);
 
       // h * o -> m via output gate,
       y_m.AddMatMatElements(1.0, y_h, y_o, 0.0);
 
       // m -> r
       y_r.AddMatMat(1.0, y_m, kNoTrans, f_w_r_m_, kTrans, 0.0);
 
       // set zeros to padded frames,
       if (sequence_lengths_.size() > 0) {
         for (int s = 0; s < S; s++) {
           if (t > sequence_lengths_[s]) {
             y_all.Row(s).SetZero();
           }
         }
       }
     }
 
     // BACKWARD DIRECTION,
     // x -> g, i, f, o, not recurrent, do it all in once
     B_YGIFO.RowRange(1*S, T*S).AddMatMat(1.0, in, kNoTrans, b_w_gifo_x_, kTrans, 0.0);
 
     // bias -> g, i, f, o
     B_YGIFO.RowRange(1*S, T*S).AddVecToRows(1.0, b_bias_);
 
     // BufferPadding [T0]:dummy, [1, T]:current sequence, [T+1]:dummy
     for (int t = T; t >= 1; t--) {
       // multistream buffers for current time-step,
       CuSubMatrix<BaseFloat> y_all(b_propagate_buf_.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_g(B_YG.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_i(B_YI.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_f(B_YF.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_o(B_YO.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_c(B_YC.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_h(B_YH.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_m(B_YM.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_r(B_YR.RowRange(t*S, S));
       CuSubMatrix<BaseFloat> y_gifo(B_YGIFO.RowRange(t*S, S));
 
       // r(t+1) -> g, i, f, o
       y_gifo.AddMatMat(1.0, B_YR.RowRange((t+1)*S, S), kNoTrans, b_w_gifo_r_, kTrans, 1.0);
 
       // c(t+1) -> i(t) via peephole
       y_i.AddMatDiagVec(1.0, B_YC.RowRange((t+1)*S, S), kNoTrans, b_peephole_i_c_, 1.0);
 
       // c(t+1) -> f(t) via peephole
       y_f.AddMatDiagVec(1.0, B_YC.RowRange((t+1)*S, S), kNoTrans, b_peephole_f_c_, 1.0);
 
       // i, f sigmoid squashing
       y_i.Sigmoid(y_i);
       y_f.Sigmoid(y_f);
 
       // g tanh squashing
       y_g.Tanh(y_g);
 
       // g * i -> c
       y_c.AddMatMatElements(1.0, y_g, y_i, 0.0);
       // c(t+1) * f -> c(t) via forget-gate
       y_c.AddMatMatElements(1.0, B_YC.RowRange((t+1)*S, S), y_f, 1.0);
 
       if (cell_clip_ > 0.0) {
         y_c.ApplyFloor(-cell_clip_);   // optional clipping of cell activation,
         y_c.ApplyCeiling(cell_clip_);  // google paper Interspeech2014: LSTM for LVCSR
       }
 
       // c(t) -> o(t) via peephole (not recurrent, using c(t))
       y_o.AddMatDiagVec(1.0, y_c, kNoTrans, b_peephole_o_c_, 1.0);
 
       // o sigmoid squashing,
       y_o.Sigmoid(y_o);
 
       // h tanh squashing,
       y_h.Tanh(y_c);
 
       // h * o -> m via output gate,
       y_m.AddMatMatElements(1.0, y_h, y_o, 0.0);
 
       // m -> r
       y_r.AddMatMat(1.0, y_m, kNoTrans, b_w_r_m_, kTrans, 0.0);
 
       // set zeros to padded frames,
       if (sequence_lengths_.size() > 0) {
         for (int s = 0; s < S; s++) {
           if (t > sequence_lengths_[s]) {
             y_all.Row(s).SetZero();
           }
         }
       }
     }
 
     CuMatrix<BaseFloat> YR_FB;
     YR_FB.Resize((T+2)*S, 2 * proj_dim_, kSetZero);
     // forward part
     YR_FB.ColRange(0, proj_dim_).CopyFromMat(f_propagate_buf_.ColRange(7*cell_dim_, proj_dim_));
     // backward part
     YR_FB.ColRange(proj_dim_, proj_dim_).CopyFromMat(b_propagate_buf_.ColRange(7*cell_dim_, proj_dim_));
     // recurrent projection layer is also feed-forward as BLSTM output
     out->CopyFromMat(YR_FB.RowRange(1*S, T*S));
   }

◆ ReadData()

void ReadData	(	std::istream &	is,
		bool	binary
	)

inlinevirtual

Reads the component content.

Reimplemented from Component.

Definition at line 135 of file nnet-blstm-projected.h.

References BlstmProjected::b_bias_, BlstmProjected::b_peephole_f_c_, BlstmProjected::b_peephole_i_c_, BlstmProjected::b_peephole_o_c_, BlstmProjected::b_w_gifo_r_, BlstmProjected::b_w_gifo_x_, BlstmProjected::b_w_r_m_, UpdatableComponent::bias_learn_rate_coef_, BlstmProjected::cell_clip_, BlstmProjected::cell_diff_clip_, BlstmProjected::cell_dim_, BlstmProjected::diff_clip_, kaldi::ExpectToken(), BlstmProjected::f_bias_, BlstmProjected::f_peephole_f_c_, BlstmProjected::f_peephole_i_c_, BlstmProjected::f_peephole_o_c_, BlstmProjected::f_w_gifo_r_, BlstmProjected::f_w_gifo_x_, BlstmProjected::f_w_r_m_, BlstmProjected::grad_clip_, KALDI_ASSERT, KALDI_ERR, UpdatableComponent::learn_rate_coef_, kaldi::Peek(), kaldi::PeekToken(), kaldi::ReadBasicType(), and kaldi::ReadToken().

                                              {
     // Read all the '<Tokens>' in arbitrary order,
     while ('<' == Peek(is, binary)) {
       std::string token;
       int first_char = PeekToken(is, binary);
       switch (first_char) {
         case 'C': ReadToken(is, false, &token);
            if (token == "<CellDim>") ReadBasicType(is, binary, &cell_dim_);
           else if (token == "<CellClip>") ReadBasicType(is, binary, &cell_clip_);
           else if (token == "<CellDiffClip>") ReadBasicType(is, binary, &cell_diff_clip_);
           else if (token == "<ClipGradient>") ReadBasicType(is, binary, &grad_clip_); // bwd-compat.
           else KALDI_ERR << "Unknown token: " << token;
           break;
         case 'L': ExpectToken(is, binary, "<LearnRateCoef>");
           ReadBasicType(is, binary, &learn_rate_coef_);
           break;
         case 'B': ExpectToken(is, binary, "<BiasLearnRateCoef>");
           ReadBasicType(is, binary, &bias_learn_rate_coef_);
           break;
         case 'D': ExpectToken(is, binary, "<DiffClip>");
           ReadBasicType(is, binary, &diff_clip_);
           break;
         case 'G': ExpectToken(is, binary, "<GradClip>");
           ReadBasicType(is, binary, &grad_clip_);
           break;
         default: ReadToken(is, false, &token);
           KALDI_ERR << "Unknown token: " << token;
       }
     }
     KALDI_ASSERT(cell_dim_ != 0);
     // Read the data (data follow the tokens),
 
     // reading parameters corresponding to forward direction
     f_w_gifo_x_.Read(is, binary);
     f_w_gifo_r_.Read(is, binary);
     f_bias_.Read(is, binary);
 
     f_peephole_i_c_.Read(is, binary);
     f_peephole_f_c_.Read(is, binary);
     f_peephole_o_c_.Read(is, binary);
 
     f_w_r_m_.Read(is, binary);
 
     // reading parameters corresponding to backward direction
     b_w_gifo_x_.Read(is, binary);
     b_w_gifo_r_.Read(is, binary);
     b_bias_.Read(is, binary);
 
     b_peephole_i_c_.Read(is, binary);
     b_peephole_f_c_.Read(is, binary);
     b_peephole_o_c_.Read(is, binary);
 
     b_w_r_m_.Read(is, binary);
   }

◆ SetParams()

void SetParams ( const VectorBase< BaseFloat > & params )

inlinevirtual

Set the trainable parameters from, reshaped as a vector,.

Implements UpdatableComponent.

Definition at line 349 of file nnet-blstm-projected.h.

References BlstmProjected::b_bias_, BlstmProjected::b_peephole_f_c_, BlstmProjected::b_peephole_i_c_, BlstmProjected::b_peephole_o_c_, BlstmProjected::b_w_gifo_r_, BlstmProjected::b_w_gifo_x_, BlstmProjected::b_w_r_m_, VectorBase< Real >::Dim(), BlstmProjected::f_bias_, BlstmProjected::f_peephole_f_c_, BlstmProjected::f_peephole_i_c_, BlstmProjected::f_peephole_o_c_, BlstmProjected::f_w_gifo_r_, BlstmProjected::f_w_gifo_x_, BlstmProjected::f_w_r_m_, KALDI_ASSERT, BlstmProjected::NumParams(), and VectorBase< Real >::Range().

                                                       {
     KALDI_ASSERT(params.Dim() == NumParams());
     int32 offset, len;
 
     // Copying parameters corresponding to forward direction
     offset = 0;    len = f_w_gifo_x_.NumRows() * f_w_gifo_x_.NumCols();
     f_w_gifo_x_.CopyRowsFromVec(params.Range(offset, len));
 
     offset += len; len = f_w_gifo_r_.NumRows() * f_w_gifo_r_.NumCols();
     f_w_gifo_r_.CopyRowsFromVec(params.Range(offset, len));
 
     offset += len; len = f_bias_.Dim();
     f_bias_.CopyFromVec(params.Range(offset, len));
 
     offset += len; len = f_peephole_i_c_.Dim();
     f_peephole_i_c_.CopyFromVec(params.Range(offset, len));
 
     offset += len; len = f_peephole_f_c_.Dim();
     f_peephole_f_c_.CopyFromVec(params.Range(offset, len));
 
     offset += len; len = f_peephole_o_c_.Dim();
     f_peephole_o_c_.CopyFromVec(params.Range(offset, len));
 
     offset += len; len = f_w_r_m_.NumRows() * f_w_r_m_.NumCols();
     f_w_r_m_.CopyRowsFromVec(params.Range(offset, len));
 
     // Copying parameters corresponding to backward direction
     offset += len; len = b_w_gifo_x_.NumRows() * b_w_gifo_x_.NumCols();
     b_w_gifo_x_.CopyRowsFromVec(params.Range(offset, len));
 
     offset += len; len = b_w_gifo_r_.NumRows() * b_w_gifo_r_.NumCols();
     b_w_gifo_r_.CopyRowsFromVec(params.Range(offset, len));
 
     offset += len; len = b_bias_.Dim();
     b_bias_.CopyFromVec(params.Range(offset, len));
 
     offset += len; len = b_peephole_i_c_.Dim();
     b_peephole_i_c_.CopyFromVec(params.Range(offset, len));
 
     offset += len; len = b_peephole_f_c_.Dim();
     b_peephole_f_c_.CopyFromVec(params.Range(offset, len));
 
     offset += len; len = b_peephole_o_c_.Dim();
     b_peephole_o_c_.CopyFromVec(params.Range(offset, len));
 
     offset += len; len = b_w_r_m_.NumRows() * b_w_r_m_.NumCols();
     b_w_r_m_.CopyRowsFromVec(params.Range(offset, len));
 
     // check the dim,
     offset += len;
     KALDI_ASSERT(offset == NumParams());
   }

◆ Update()

void Update	(	const CuMatrixBase< BaseFloat > &	input,
		const CuMatrixBase< BaseFloat > &	diff
	)

inlinevirtual

Compute gradient and update parameters,.

Implements UpdatableComponent.

Definition at line 1067 of file nnet-blstm-projected.h.

                                                    {
 
     // apply the gradient clipping,
     if (grad_clip_ > 0.0) {
       f_w_gifo_x_corr_.ApplyFloor(-grad_clip_);
       f_w_gifo_x_corr_.ApplyCeiling(grad_clip_);
       f_w_gifo_r_corr_.ApplyFloor(-grad_clip_);
       f_w_gifo_r_corr_.ApplyCeiling(grad_clip_);
       f_bias_corr_.ApplyFloor(-grad_clip_);
       f_bias_corr_.ApplyCeiling(grad_clip_);
       f_w_r_m_corr_.ApplyFloor(-grad_clip_);
       f_w_r_m_corr_.ApplyCeiling(grad_clip_);
       f_peephole_i_c_corr_.ApplyFloor(-grad_clip_);
       f_peephole_i_c_corr_.ApplyCeiling(grad_clip_);
       f_peephole_f_c_corr_.ApplyFloor(-grad_clip_);
       f_peephole_f_c_corr_.ApplyCeiling(grad_clip_);
       f_peephole_o_c_corr_.ApplyFloor(-grad_clip_);
       f_peephole_o_c_corr_.ApplyCeiling(grad_clip_);
 
       b_w_gifo_x_corr_.ApplyFloor(-grad_clip_);
       b_w_gifo_x_corr_.ApplyCeiling(grad_clip_);
       b_w_gifo_r_corr_.ApplyFloor(-grad_clip_);
       b_w_gifo_r_corr_.ApplyCeiling(grad_clip_);
       b_bias_corr_.ApplyFloor(-grad_clip_);
       b_bias_corr_.ApplyCeiling(grad_clip_);
       b_w_r_m_corr_.ApplyFloor(-grad_clip_);
       b_w_r_m_corr_.ApplyCeiling(grad_clip_);
       b_peephole_i_c_corr_.ApplyFloor(-grad_clip_);
       b_peephole_i_c_corr_.ApplyCeiling(grad_clip_);
       b_peephole_f_c_corr_.ApplyFloor(-grad_clip_);
       b_peephole_f_c_corr_.ApplyCeiling(grad_clip_);
       b_peephole_o_c_corr_.ApplyFloor(-grad_clip_);
       b_peephole_o_c_corr_.ApplyCeiling(grad_clip_);
     }
 
     const BaseFloat lr = opts_.learn_rate;
 
     // forward direction update
     f_w_gifo_x_.AddMat(-lr * learn_rate_coef_, f_w_gifo_x_corr_);
     f_w_gifo_r_.AddMat(-lr * learn_rate_coef_, f_w_gifo_r_corr_);
     f_bias_.AddVec(-lr * bias_learn_rate_coef_, f_bias_corr_, 1.0);
 
     f_peephole_i_c_.AddVec(-lr * bias_learn_rate_coef_, f_peephole_i_c_corr_, 1.0);
     f_peephole_f_c_.AddVec(-lr * bias_learn_rate_coef_, f_peephole_f_c_corr_, 1.0);
     f_peephole_o_c_.AddVec(-lr * bias_learn_rate_coef_, f_peephole_o_c_corr_, 1.0);
 
     f_w_r_m_.AddMat(-lr * learn_rate_coef_, f_w_r_m_corr_);
 
     // backward direction update
     b_w_gifo_x_.AddMat(-lr * learn_rate_coef_, b_w_gifo_x_corr_);
     b_w_gifo_r_.AddMat(-lr * learn_rate_coef_, b_w_gifo_r_corr_);
     b_bias_.AddVec(-lr * bias_learn_rate_coef_, b_bias_corr_, 1.0);
 
     b_peephole_i_c_.AddVec(-lr * bias_learn_rate_coef_, b_peephole_i_c_corr_, 1.0);
     b_peephole_f_c_.AddVec(-lr * bias_learn_rate_coef_, b_peephole_f_c_corr_, 1.0);
     b_peephole_o_c_.AddVec(-lr * bias_learn_rate_coef_, b_peephole_o_c_corr_, 1.0);
 
     b_w_r_m_.AddMat(-lr * learn_rate_coef_, b_w_r_m_corr_);
   }

◆ WriteData()

void WriteData	(	std::ostream &	os,
		bool	binary
	)		const

inlinevirtual

Writes the component content.

Reimplemented from Component.

Definition at line 190 of file nnet-blstm-projected.h.

References BlstmProjected::b_bias_, BlstmProjected::b_peephole_f_c_, BlstmProjected::b_peephole_i_c_, BlstmProjected::b_peephole_o_c_, BlstmProjected::b_w_gifo_r_, BlstmProjected::b_w_gifo_x_, BlstmProjected::b_w_r_m_, UpdatableComponent::bias_learn_rate_coef_, BlstmProjected::cell_clip_, BlstmProjected::cell_diff_clip_, BlstmProjected::cell_dim_, BlstmProjected::diff_clip_, BlstmProjected::f_bias_, BlstmProjected::f_peephole_f_c_, BlstmProjected::f_peephole_i_c_, BlstmProjected::f_peephole_o_c_, BlstmProjected::f_w_gifo_r_, BlstmProjected::f_w_gifo_x_, BlstmProjected::f_w_r_m_, BlstmProjected::grad_clip_, UpdatableComponent::learn_rate_coef_, kaldi::WriteBasicType(), and kaldi::WriteToken().

                                                     {
     WriteToken(os, binary, "<CellDim>");
     WriteBasicType(os, binary, cell_dim_);
 
     WriteToken(os, binary, "<LearnRateCoef>");
     WriteBasicType(os, binary, learn_rate_coef_);
     WriteToken(os, binary, "<BiasLearnRateCoef>");
     WriteBasicType(os, binary, bias_learn_rate_coef_);
 
     WriteToken(os, binary, "<CellClip>");
     WriteBasicType(os, binary, cell_clip_);
     WriteToken(os, binary, "<DiffClip>");
     WriteBasicType(os, binary, diff_clip_);
     WriteToken(os, binary, "<CellDiffClip>");
     WriteBasicType(os, binary, cell_diff_clip_);
     WriteToken(os, binary, "<GradClip>");
     WriteBasicType(os, binary, grad_clip_);
 
     if (!binary) os << "\n";
     // writing parameters, forward direction,
     f_w_gifo_x_.Write(os, binary);
     f_w_gifo_r_.Write(os, binary);
     f_bias_.Write(os, binary);
 
     f_peephole_i_c_.Write(os, binary);
     f_peephole_f_c_.Write(os, binary);
     f_peephole_o_c_.Write(os, binary);
 
     f_w_r_m_.Write(os, binary);
 
     if (!binary) os << "\n";
     // writing parameters, backward direction,
     b_w_gifo_x_.Write(os, binary);
     b_w_gifo_r_.Write(os, binary);
     b_bias_.Write(os, binary);
 
     b_peephole_i_c_.Write(os, binary);
     b_peephole_f_c_.Write(os, binary);
     b_peephole_o_c_.Write(os, binary);
 
     b_w_r_m_.Write(os, binary);
   }