21 #ifndef KALDI_NNET_NNET_FRAME_POOLING_COMPONENT_H_ 22 #define KALDI_NNET_NNET_FRAME_POOLING_COMPONENT_H_ 63 std::vector<int32> pool_size;
64 std::vector<int32> central_offset;
66 float learn_rate_coef = 0.01;
69 while (is >> std::ws, !is.eof()) {
72 else if (token ==
"<CentralOffset>")
ReadIntegerVector(is,
false, ¢ral_offset);
74 else if (token ==
"<PoolWeight>") pool_weight.
Read(is,
false);
75 else if (token ==
"<LearnRateCoef>")
ReadBasicType(is,
false, &learn_rate_coef);
77 else KALDI_ERR <<
"Unknown token " << token <<
", a typo in config?" 78 <<
" (FeatureDim|CentralOffset <vec>|PoolSize <vec>|LearnRateCoef|Normalize)";
86 int32 central_frame = (num_frames -1) / 2;
87 int32 num_pools = central_offset.size();
90 for (
int32 p = 0; p < num_pools; p++) {
91 offset_[p] = central_frame + central_offset[p] + std::min(0, pool_size[p]+1);
92 weight_[p].Resize(std::abs(pool_size[p]));
93 weight_[p].Set(1.0/std::abs(pool_size[p]));
96 if (pool_weight.
Dim() != 0) {
97 KALDI_LOG <<
"Initializing from pool-weight vector";
98 int32 num_weights = 0;
99 for (
int32 p = 0; p < num_pools; p++) {
101 num_weights +=
weight_[p].Dim();
106 for (
int32 p = 0; p < num_pools; p++) {
131 for (
int32 p = 0; p < num_pools; p++) {
143 for (
int32 p = 0; p < num_pools; p++) {
162 for (
int32 p = 0; p < num_pools; p++) {
194 std::ostringstream oss;
195 oss <<
"\n (offset,weights) : ";
203 std::ostringstream oss;
205 oss <<
"\n (offset,weights_grad) : ";
207 oss <<
"(" <<
offset_[p] <<
",";
210 oss.seekp(-1, std::ios_base::cur);
224 for (
int32 p = 0; p < num_pools; p++) {
248 for (
int32 p = 0; p < num_pools; p++) {
256 diff.
ColRange(p * feature_dim_, feature_dim_)
264 for (
int32 p = 0; p < num_pools; p++) {
269 for (
int32 p = 0; p < num_pools; p++) {
290 #endif // KALDI_NNET_NNET_FRAME_POOLING_COMPONENT_H_ std::string ToString(const T &t)
Convert basic type to a string (please don't overuse),.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
ComponentType GetType() const
Get Type Identification of the component,.
NnetTrainOptions opts_
Option-class with training hyper-parameters,.
Component * Copy() const
Copy component (deep copy),.
int32 input_dim_
Data members,.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
BaseFloat learn_rate_coef_
Scalar applied to learning rate for weight matrices (to be used in ::Update method),.
Class UpdatableComponent is a Component which has trainable parameters, it contains SGD training hype...
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
std::string Info() const
Print some additional info (after <ComponentName> and the dims),.
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
ComponentType
Component type identification mechanism,.
void Update(const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &diff)
Compute gradient and update parameters,.
std::vector< Vector< BaseFloat > > weight_
Vector of pooling weight vectors,.
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
void SetZero()
Math operations, some calling kernels.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
int32 InputDim() const
Get the dimension of the input,.
FramePoolingComponent(int32 dim_in, int32 dim_out)
This class is used for a piece of a CuMatrix.
std::vector< int32 > offset_
void ReadData(std::istream &is, bool binary)
Here the offsets are w.r.t.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void SetParams(const VectorBase< BaseFloat > ¶ms)
Set the trainable parameters from, reshaped as a vector,.
std::string InfoGradient() const
Print some additional info about gradient (after <...> and dims),.
int32 output_dim_
Dimension of the output of the Component,.
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
int32 NumParams() const
Number of trainable parameters,.
Matrix for CUDA computing.
MatrixIndexT NumCols() const
A class representing a vector.
#define KALDI_ASSERT(cond)
void GetGradient(VectorBase< BaseFloat > *gradient) const
Get gradient reshaped as a vector,.
void InitData(std::istream &is)
Here the offsets are w.r.t.
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Abstract class, building block of the network.
std::vector< Vector< BaseFloat > > weight_diff_
detivatives of weight vectors,
void GetParams(VectorBase< BaseFloat > *params) const
Get the trainable parameters reshaped as a vector,.
Provides a vector abstraction class.
FramePoolingComponent : The input/output matrices are split to frames of width 'feature_dim_'.
void Read(std::istream &in, bool binary, bool add=false)
Read function using C++ streams.
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).