37 input_dim_(other.input_dim_), block_dim_(other.block_dim_),
38 target_rms_(other.target_rms_),
39 add_log_stddev_(other.add_log_stddev_) { }
53 KALDI_ERR <<
"Invalid initializer for layer of type " 60 if (token ==
"<NormalizeComponent>") {
66 if (token ==
"<BlockDim>") {
73 if (token ==
"<TargetRms>") {
78 if (token ==
"<AddLogStddev>") {
84 if (token ==
"<ValueAvg>") {
87 temp.
Read(is, binary);
89 temp.
Read(is, binary);
99 WriteToken(os, binary,
"<NormalizeComponent>");
110 WriteToken(os, binary,
"</NormalizeComponent>");
114 std::ostringstream stream;
139 new_num_rows = in.
NumRows() * num_blocks,
144 out_reshaped(out->
Data(), new_num_rows,
145 output_block_dim, output_block_dim);
190 new_num_rows = in_value.
NumRows() * num_blocks,
197 out_deriv_reshaped(out_deriv.
Data(), new_num_rows,
198 output_block_dim, output_block_dim),
199 in_deriv_reshaped(in_deriv->
Data(), new_num_rows,
217 KALDI_WARN <<
"Test-mode is set but there is no data count. " 218 "Creating random counts. This is NOT A PROBLEM if the message " 219 "appears in unit-tests or in compute_prob_*.0.log. If you see this " 220 "elsewhere, something is very wrong.";
222 stats_sum_.SetRandn();
223 stats_sumsq_.SetRandn();
224 stats_sumsq_.AddVecVec(1.0, stats_sum_, stats_sum_, 1.0);
229 offset_.CopyFromVec(stats_sum_);
230 offset_.Scale(-1.0 / count_);
232 scale_.CopyFromVec(stats_sumsq_);
233 scale_.Scale(1.0 / count_);
234 scale_.AddVecVec(-1.0, offset_, offset_, 1.0);
238 scale_.ApplyFloor(0.0);
239 scale_.Add(epsilon_);
241 scale_.ApplyPow(power);
245 offset_.MulElements(scale_);
250 test_mode_ = test_mode;
262 test_mode_(other.test_mode_), count_(other.count_),
263 stats_sum_(other.stats_sum_), stats_sumsq_(other.stats_sumsq_) {
270 std::ostringstream stream;
274 <<
", test-mode=" << (
test_mode_ ?
"true" :
"false");
300 if (!ok ||
dim_ <= 0) {
301 KALDI_ERR <<
"BatchNormComponent must have 'dim' specified, and > 0";
307 KALDI_ERR <<
"Invalid configuration in BatchNormComponent.";
309 KALDI_ERR <<
"Could not process these elements in initializer: " 411 orig_cols = in.
NumCols(), new_rows = orig_rows * ratio,
412 new_cols = orig_cols / ratio;
414 out_reshaped(out->
Data(), new_rows, new_cols, new_cols);
415 return Propagate(indexes, in_reshaped, &out_reshaped);
433 mean.AddRowSumMat(1.0 / num_frames, in, 0.0);
434 uvar.AddDiagMat2(1.0 / num_frames, in,
kTrans, 0.0);
439 scale.
AddVecVec(-var_scale, mean, mean, var_scale);
452 return static_cast<void*
>(memo);
456 KALDI_ERR <<
"Test mode set in BatchNormComponent, but no stats.";
458 KALDI_ERR <<
"Code error in BatchNormComponent";
468 const std::string &debug_info,
479 SameDim(out_value, *in_deriv) &&
489 orig_rows = out_value.
NumRows(),
490 orig_cols = out_value.
NumCols(),
491 new_rows = orig_rows * ratio, new_cols = orig_cols / ratio;
494 out_deriv_reshaped(out_deriv.
Data(), new_rows, new_cols, new_cols),
495 in_deriv_reshaped(in_deriv->
Data(), new_rows, new_cols, new_cols);
497 Backprop(debug_info, indexes, in_value,
498 out_value_reshaped, out_deriv_reshaped,
499 memo_in, to_update, &in_deriv_reshaped);
503 Memo *memo =
static_cast<Memo*
>(memo_in);
507 KALDI_ASSERT(memo != NULL &&
"memo not passed into backprop");
522 var_deriv_mod.AddDiagMatMat(coeff, out_value,
kTrans,
524 var_deriv_mod.MulElements(scale);
526 temp.AddRowSumMat(-1.0 / num_frames, out_deriv, 0.0);
564 orig_rows = out_value.
NumRows(),
565 orig_cols = out_value.
NumCols(),
566 new_rows = orig_rows * ratio, new_cols = orig_cols / ratio;
570 StoreStats(in_value, out_value_reshaped, memo_in);
574 Memo *memo =
static_cast<Memo*
>(memo_in);
618 WriteToken(os, binary,
"<BatchNormComponent>");
638 mean.Write(os, binary);
640 var.
Write(os, binary);
641 WriteToken(os, binary,
"</BatchNormComponent>");
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
MatrixIndexT Stride() const
const std::string WholeLine()
void SetZero()
Math operations.
CuVector< double > stats_sum_
CuVector< BaseFloat > offset_
static const BaseFloat kSquaredNormFloor
void SetTestMode(bool test_mode)
Abstract base-class for neural-net components.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
void AddMatDiagVec(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, CuVectorBase< Real > &v, Real beta=1.0)
std::string SummarizeVector(const VectorBase< float > &vec)
Returns a string that summarizes a vector fairly succintly, for printing stats in info lines...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=NULL)
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual void StoreStats(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, void *memo)
This function may store stats on average activation values, and for some component types...
This file contains declarations of components that in one way or another normalize their input: Norma...
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
void ExpectOneOrTwoTokens(std::istream &is, bool binary, const std::string &token1, const std::string &token2)
This function is like ExpectToken but for two tokens, and it will either accept token1 and then token...
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
void AddVecVec(Real alpha, const VectorBase< Real > &v, const VectorBase< Real > &r, Real beta)
Add element-by-element product of vectors:
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=nullptr)
Applies floor to all elements.
std::string UnusedValues() const
returns e.g.
void Write(std::ostream &is, bool binary) const
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
void DiffNormalizePerRow(const CuMatrixBase< Real > &in_value, const CuMatrixBase< Real > &out_deriv, const Real target_rms, const bool add_log_stddev, CuMatrixBase< Real > *in_deriv)
CuVector< BaseFloat > scale_
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
CuVector< double > stats_sumsq_
void Resize(MatrixIndexT dim, MatrixResizeType t=kSetZero)
Allocate the memory.
CuMatrix< BaseFloat > mean_uvar_scale
void ApplyPow(Real power)
This class is used for a piece of a CuMatrix.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
void Scale(Real alpha)
Multiplies all elements by this constant.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
void AddVec(Real alpha, const CuVectorBase< Real > &vec, Real beta=1.0)
void MulColsVec(const CuVectorBase< Real > &scale)
scale i'th column by scale[i]
const Real * Data() const
Return data pointer (const).
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
Matrix for CUDA computing.
MatrixIndexT NumCols() const
const CuVector< BaseFloat > & Scale() const
A class representing a vector.
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c' baz="a b c d='a b' e" and giving you access to the fields, in this case.
#define KALDI_ASSERT(cond)
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void ApplyPow(Real power)
Take all elements of vector to a power.
void Read(std::istream &is, bool binary)
I/O.
void AddVecVec(Real alpha, const CuVectorBase< Real > &v, const CuVectorBase< Real > &r, Real beta)
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual int32 OutputDim() const
Returns output-dimension of this component.
bool HasUnusedValues() const
bool GetValue(const std::string &key, std::string *value)
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
virtual int32 InputDim() const
Returns input-dimension of this component.
MatrixIndexT NumRows() const
Dimensions.
void NormalizePerRow(const CuMatrixBase< Real > &in, const Real target_rms, const bool add_log_stddev, CuMatrixBase< Real > *out)
Normalize nonlinearity modifies the vector of activations by scaling it so that the root-mean-square ...
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
MatrixIndexT Dim() const
Dimensions.