21 #ifndef KALDI_NNET_NNET_CONVOLUTIONAL_COMPONENT_H_ 22 #define KALDI_NNET_NNET_CONVOLUTIONAL_COMPONENT_H_ 84 BaseFloat bias_mean = -2.0, bias_range = 2.0, param_stddev = 0.1;
87 while (is >> std::ws, !is.eof()) {
89 if (token ==
"<ParamStddev>")
ReadBasicType(is,
false, ¶m_stddev);
90 else if (token ==
"<BiasMean>")
ReadBasicType(is,
false, &bias_mean);
91 else if (token ==
"<BiasRange>")
ReadBasicType(is,
false, &bias_range);
98 else KALDI_ERR <<
"Unknown token " << token <<
", a typo in config?" 99 <<
" (ParamStddev|BiasMean|BiasRange|PatchDim|PatchStep|PatchStride)";
108 KALDI_LOG <<
"num_splice " << num_splice;
112 KALDI_LOG <<
"num_patches " << num_patches;
115 KALDI_LOG <<
"filter_dim " << filter_dim;
119 KALDI_LOG <<
"num_filters " << num_filters;
126 filters_.Resize(num_filters, filter_dim);
129 bias_.Resize(num_filters);
143 bool end_loop =
false;
146 switch (first_char) {
147 case 'L':
ExpectToken(is, binary,
"<LearnRateCoef>");
150 case 'B':
ExpectToken(is, binary,
"<BiasLearnRateCoef>");
156 case '!':
ExpectToken(is, binary,
"<!EndOfComponent>");
157 default: end_loop =
true;
165 bias_.Read(is, binary);
196 if (!binary) os <<
"\n";
201 WriteToken(os, binary,
"<BiasLearnRateCoef>");
206 if (!binary) os <<
"\n";
210 if (!binary) os <<
"\n";
213 if (!binary) os <<
"\n";
214 bias_.Write(os, binary);
224 gradient->
Range(0, filters_num_elem).CopyRowsFromMat(
filters_);
231 params->
Range(0, filters_num_elem).CopyRowsFromMat(
filters_);
288 for (
int32 p = 0; p < num_patches; p++) {
289 for (
int32 s = 0; s < num_splice; s++) {
301 for (
int32 p = 0; p < num_patches; p++) {
304 p * filter_dim, filter_dim));
324 std::vector<std::vector<int32> > *backward_indexes) {
326 int32 size = forward_indexes.size();
329 std::vector<std::vector<int32> >::iterator iter = backward_indexes->begin(),
330 end = backward_indexes->end();
331 for (; iter != end; ++iter)
332 iter->reserve(reserve_size);
334 i = forward_indexes[
j];
336 (*backward_indexes)[
i].push_back(
j);
352 std::vector<std::vector<int32> > *out) {
356 if (in[
i].size() > L)
360 (*out)[
i].resize(D, -1);
362 for (
int32 j = 0;
j < in[
i].size();
j++) {
363 (*out)[
j][
i] = in[
i][
j];
379 for (
int32 p = 0; p < num_patches; p++) {
381 p * filter_dim, filter_dim));
383 p * num_filters, num_filters));
389 std::vector<std::vector<int32> > reversed_column_map;
391 std::vector<std::vector<int32> > rearranged_column_map;
393 for (
int32 p = 0; p < rearranged_column_map.size(); p++) {
416 for (
int32 p = 0; p < num_patches; p++) {
420 p * filter_dim, filter_dim));
422 bias_grad_.AddRowSumMat(1.0, diff_patch, 1.0);
482 #endif // KALDI_NNET_NNET_CONVOLUTIONAL_COMPONENT_H_ std::string ToString(const T &t)
Convert basic type to a string (please don't overuse),.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
std::string Info() const
Print some additional info (after <ComponentName> and the dims),.
int32 patch_step_
step of the convolution (i.e.
void SetParams(const VectorBase< BaseFloat > ¶ms)
Set the trainable parameters from, reshaped as a vector,.
void ReverseIndexes(const std::vector< int32 > &forward_indexes, std::vector< std::vector< int32 > > *backward_indexes)
ConvolutionalComponent implements convolution over single axis (i.e.
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
ComponentType GetType() const
Get Type Identification of the component,.
void GetGradient(VectorBase< BaseFloat > *gradient) const
Get gradient reshaped as a vector,.
NnetTrainOptions opts_
Option-class with training hyper-parameters,.
std::string MomentStatistics(const VectorBase< Real > &vec)
Get a string with statistics of the data in a vector, so we can print them easily.
int32 input_dim_
Data members,.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
BaseFloat bias_learn_rate_coef_
Scalar applied to learning rate for bias (to be used in ::Update method),.
void ReadData(std::istream &is, bool binary)
Reads the component content.
BaseFloat learn_rate_coef_
Scalar applied to learning rate for weight matrices (to be used in ::Update method),.
Class UpdatableComponent is a Component which has trainable parameters, it contains SGD training hype...
CuMatrix< BaseFloat > filters_grad_
gradient of filters
void RandUniform(BaseFloat mu, BaseFloat range, CuMatrixBase< Real > *mat, struct RandomState *state=NULL)
Fill CuMatrix with random numbers (Uniform distribution): mu = the mean value, range = the 'width' of...
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
void RearrangeIndexes(const std::vector< std::vector< int32 > > &in, std::vector< std::vector< int32 > > *out)
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=NULL)
ComponentType
Component type identification mechanism,.
void AddCols(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indices)
Add column indices[r] of src to column r.
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
std::string InfoGradient() const
Print some additional info about gradient (after <...> and dims),.
void InitData(std::istream &is)
Initialize the content of the component by the 'line' from the prototype,.
int32 patch_stride_
shift for 2nd dim of a patch
CuMatrix< BaseFloat > filters_
(i.e. frame length before splicing)
BaseFloat max_norm_
limit L2 norm of a neuron weights to positive value
int32 NumParams() const
Number of trainable parameters,.
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
void AddColSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the columns of the matrix, add to vector.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
CuMatrix< BaseFloat > feature_patch_diffs_
Buffer for backpropagation: derivatives in the domain of 'vectorized_feature_patches_', 1row = vectorized rectangular feature patches, 1col = dim over speech frames,.
CuMatrix< BaseFloat > vectorized_feature_patches_
Buffer of reshaped inputs: 1row = vectorized rectangular feature patches, 1col = dim over speech fram...
std::vector< int32 > column_map_
void RandGauss(BaseFloat mu, BaseFloat sigma, CuMatrixBase< Real > *mat, struct RandomState *state=NULL)
Fill CuMatrix with random numbers (Gaussian distribution): mu = the mean value, sigma = standard devi...
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
This class is used for a piece of a CuMatrix.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
MatrixIndexT Dim() const
Returns the dimension of the vector.
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
CuVector< BaseFloat > bias_
bias for each filter
~ConvolutionalComponent()
int32 output_dim_
Dimension of the output of the Component,.
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Matrix for CUDA computing.
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
#define KALDI_ASSERT(cond)
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Abstract class, building block of the network.
void GetParams(VectorBase< BaseFloat > *params) const
Get the trainable parameters reshaped as a vector,.
MatrixIndexT NumRows() const
Dimensions.
Provides a vector abstraction class.
CuVector< BaseFloat > bias_grad_
gradient of biases
Component * Copy() const
Copy component (deep copy),.
ConvolutionalComponent(int32 dim_in, int32 dim_out)
void Update(const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &diff)
Compute gradient and update parameters,.
int32 patch_dim_
number of consecutive inputs, 1st dim of patch
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).