47 token.erase(token.length()-1);
50 KALDI_ERR <<
"Unknown ComponentPrecomputedIndexes type " << token;
51 ans->
Read(is, binary);
56 const std::string &cpi_type) {
58 if (cpi_type ==
"DistributeComponentPrecomputedIndexes") {
60 }
else if (cpi_type ==
"StatisticsExtractionComponentPrecomputedIndexes") {
62 }
else if (cpi_type ==
"StatisticsPoolingComponentPrecomputedIndexes") {
64 }
else if (cpi_type ==
"BackpropTruncationComponentPrecomputedIndexes") {
66 }
else if (cpi_type ==
"TimeHeightConvolutionComponentPrecomputedIndexes") {
68 }
else if (cpi_type ==
"RestrictedAttentionComponentPrecomputedIndexes") {
70 }
else if (cpi_type ==
"GeneralDropoutComponentPrecomputedIndexes") {
72 }
else if (cpi_type ==
"SpecAugmentTimeMaskComponentPrecomputedIndexes") {
74 }
else if (cpi_type ==
"TdnnComponentPrecomputedIndexes") {
88 token.erase(token.length()-1);
89 Component *ans = NewComponentOfType(token);
91 KALDI_ERR <<
"Unknown component type " << token;
92 ans->
Read(is, binary);
100 if (component_type ==
"SigmoidComponent") {
102 }
else if (component_type ==
"TanhComponent") {
104 }
else if (component_type ==
"SoftmaxComponent") {
106 }
else if (component_type ==
"LogSoftmaxComponent") {
108 }
else if (component_type ==
"RectifiedLinearComponent") {
110 }
else if (component_type ==
"NormalizeComponent") {
112 }
else if (component_type ==
"PnormComponent") {
114 }
else if (component_type ==
"AffineComponent") {
116 }
else if (component_type ==
"LinearComponent") {
118 }
else if (component_type ==
"NaturalGradientAffineComponent") {
120 }
else if (component_type ==
"PerElementScaleComponent") {
122 }
else if (component_type ==
"NaturalGradientPerElementScaleComponent") {
124 }
else if (component_type ==
"PerElementOffsetComponent") {
126 }
else if (component_type ==
"SumGroupComponent") {
128 }
else if (component_type ==
"FixedAffineComponent") {
130 }
else if (component_type ==
"FixedScaleComponent") {
132 }
else if (component_type ==
"FixedBiasComponent") {
134 }
else if (component_type ==
"NoOpComponent") {
136 }
else if (component_type ==
"ClipGradientComponent") {
138 }
else if (component_type ==
"ElementwiseProductComponent") {
140 }
else if (component_type ==
"ConvolutionComponent") {
142 }
else if (component_type ==
"TdnnComponent") {
144 }
else if (component_type ==
"MaxpoolingComponent") {
146 }
else if (component_type ==
"PermuteComponent") {
148 }
else if (component_type ==
"DistributeComponent") {
150 }
else if (component_type ==
"CompositeComponent") {
152 }
else if (component_type ==
"RepeatedAffineComponent") {
154 }
else if (component_type ==
"BlockAffineComponent") {
156 }
else if (component_type ==
"NaturalGradientRepeatedAffineComponent") {
158 }
else if (component_type ==
"StatisticsExtractionComponent") {
160 }
else if (component_type ==
"StatisticsPoolingComponent") {
162 }
else if (component_type ==
"ConstantFunctionComponent") {
164 }
else if (component_type ==
"ConstantComponent") {
166 }
else if (component_type ==
"DropoutComponent") {
168 }
else if (component_type ==
"DropoutMaskComponent") {
170 }
else if (component_type ==
"GeneralDropoutComponent") {
172 }
else if (component_type ==
"SpecAugmentTimeMaskComponent") {
174 }
else if (component_type ==
"BackpropTruncationComponent") {
176 }
else if (component_type ==
"LstmNonlinearityComponent") {
178 }
else if (component_type ==
"BatchNormComponent") {
180 }
else if (component_type ==
"TimeHeightConvolutionComponent") {
182 }
else if (component_type ==
"RestrictedAttentionComponent") {
184 }
else if (component_type ==
"SumBlockComponent") {
186 }
else if (component_type ==
"GruNonlinearityComponent") {
187 ans =
new GruNonlinearityComponent();
188 }
else if (component_type ==
"OutputGruNonlinearityComponent") {
189 ans =
new OutputGruNonlinearityComponent();
190 }
else if (component_type ==
"ScaleAndOffsetComponent") {
200 std::stringstream stream;
201 stream <<
Type() <<
", input-dim=" << InputDim()
202 <<
", output-dim=" << OutputDim();
207 const Index &output_index,
208 std::vector<Index> *input_indexes)
const {
209 input_indexes->resize(1);
210 (*input_indexes)[0] = output_index;
214 const Index &output_index,
216 std::vector<Index> *used_inputs)
const {
219 if (!input_index_set(output_index))
222 used_inputs->clear();
223 used_inputs->push_back(output_index);
230 learning_rate_(other.learning_rate_),
231 learning_rate_factor_(other.learning_rate_factor_),
232 l2_regularize_(other.l2_regularize_),
233 is_gradient_(other.is_gradient_),
234 max_change_(other.max_change_) { }
265 std::ostringstream opening_tag;
266 opening_tag <<
'<' << this->
Type() <<
'>';
269 if (token == opening_tag.str()) {
274 if (token ==
"<LearningRateFactor>") {
280 if (token ==
"<IsGradient>") {
286 if (token ==
"<MaxChange>") {
292 if (token ==
"<L2Regularize>") {
298 if (token ==
"<LearningRate>") {
308 std::ostringstream opening_tag;
309 opening_tag <<
'<' << this->
Type() <<
'>';
313 WriteToken(os, binary,
"<LearningRateFactor>");
334 std::stringstream stream;
336 <<
", output-dim=" <<
OutputDim() <<
", learning-rate=" 339 stream <<
", is-gradient=true";
355 if (value_sum_.Dim() != dim_ ||
356 (deriv != NULL && deriv_sum_.
Dim() != dim_)) {
357 if (value_sum_.Dim() != dim_) {
358 value_sum_.Resize(dim_);
361 if (deriv != NULL && deriv_sum_.
Dim() != dim_) {
362 deriv_sum_.Resize(dim_);
364 value_sum_.SetZero();
370 value_sum_.AddVec(1.0, temp);
373 deriv_sum_.AddVec(1.0, temp);
382 if (
RandInt(0, 3) == 0 && oderiv_count_ != 0)
388 if (oderiv_sumsq_.Dim() != dim_) {
389 oderiv_sumsq_.Resize(dim_);
394 oderiv_sumsq_.AddVec(1.0, temp);
395 oderiv_count_ += out_deriv.
NumRows();
400 value_sum_.SetZero();
401 deriv_sum_.SetZero();
402 oderiv_sumsq_.SetZero();
405 num_dims_self_repaired_ = 0.0;
406 num_dims_processed_ = 0.0;
410 std::stringstream stream;
411 stream <<
Type() <<
", dim=" << dim_;
412 if (block_dim_ != dim_)
413 stream <<
", block-dim=" << block_dim_;
414 if (self_repair_lower_threshold_ !=
BaseFloat(kUnsetThreshold))
415 stream <<
", self-repair-lower-threshold=" << self_repair_lower_threshold_;
416 if (self_repair_upper_threshold_ !=
BaseFloat(kUnsetThreshold))
417 stream <<
", self-repair-upper-threshold=" << self_repair_upper_threshold_;
418 if (self_repair_scale_ != 0.0)
419 stream <<
", self-repair-scale=" << self_repair_scale_;
420 if (count_ > 0 && value_sum_.Dim() == dim_) {
421 stream <<
", count=" << std::setprecision(3) << count_
422 << std::setprecision(6);
423 stream <<
", self-repaired-proportion=" 424 << (num_dims_processed_ > 0 ?
425 num_dims_self_repaired_ / num_dims_processed_ : 0);
428 value_avg.
Scale(1.0 / count_);
430 if (deriv_sum_.Dim() == dim_) {
432 deriv_avg.
Scale(1.0 / count_);
436 if (oderiv_count_ > 0 && oderiv_sumsq_.Dim() == dim_) {
438 oderiv_rms.
Scale(1.0 / oderiv_count_);
444 <<
", oderiv-count=" << oderiv_count_;
450 value_sum_.Scale(scale);
451 deriv_sum_.Scale(scale);
452 oderiv_sumsq_.Scale(scale);
454 oderiv_count_ *= scale;
455 num_dims_self_repaired_ *= scale;
456 num_dims_processed_ *= scale;
475 count_ += alpha * other->
count_;
482 std::ostringstream ostr_beg, ostr_end;
483 ostr_beg <<
"<" <<
Type() <<
">";
484 ostr_end <<
"</" <<
Type() <<
">";
494 value_sum_.Read(is, binary);
496 deriv_sum_.Read(is, binary);
501 oderiv_sumsq_.Read(is, binary);
502 oderiv_sumsq_.ApplyPow(2.0);
507 oderiv_sumsq_.Resize(0);
509 value_sum_.Scale(count_);
510 deriv_sum_.Scale(count_);
511 oderiv_sumsq_.Scale(oderiv_count_);
515 if (token[0] !=
'<') {
520 if (token ==
"<NumDimsSelfRepaired>") {
524 if (token ==
"<NumDimsProcessed>") {
528 if (token ==
"<SelfRepairLowerThreshold>") {
532 if (token ==
"<SelfRepairUpperThreshold>") {
536 if (token ==
"<SelfRepairScale>") {
540 if (token != ostr_end.str()) {
541 KALDI_ERR <<
"Expected token " << ostr_end.str()
542 <<
", got " << token;
547 std::ostringstream ostr_beg, ostr_end;
548 ostr_beg <<
"<" <<
Type() <<
">";
549 ostr_end <<
"</" <<
Type() <<
">";
553 if (block_dim_ != dim_) {
561 if (count_ != 0.0) temp.
Scale(1.0 / count_);
562 temp.
Write(os, binary);
565 temp.
Resize(deriv_sum_.Dim());
567 if (count_ != 0.0) temp.
Scale(1.0 / count_);
568 temp.
Write(os, binary);
574 temp.
Resize(oderiv_sumsq_.Dim());
576 if (oderiv_count_ != 0.0) temp.
Scale(1.0 / oderiv_count_);
581 temp.
Write(os, binary);
586 WriteToken(os, binary,
"<NumDimsSelfRepaired>");
590 if (self_repair_lower_threshold_ != kUnsetThreshold) {
591 WriteToken(os, binary,
"<SelfRepairLowerThreshold>");
594 if (self_repair_upper_threshold_ != kUnsetThreshold) {
595 WriteToken(os, binary,
"<SelfRepairUpperThreshold>");
598 if (self_repair_scale_ != 0.0) {
606 dim_(-1), block_dim_(-1), count_(0.0), oderiv_count_(0.0),
607 num_dims_self_repaired_(0.0), num_dims_processed_(0.0),
608 self_repair_lower_threshold_(kUnsetThreshold),
609 self_repair_upper_threshold_(kUnsetThreshold),
610 self_repair_scale_(0.0) { }
632 KALDI_ERR <<
"Invalid initializer for layer of type " virtual bool IsComputable(const MiscComputationInfo &misc_info, const Index &output_index, const IndexSet &input_index_set, std::vector< Index > *used_inputs) const
This function only does something interesting for non-simple Components, and it exists to make it pos...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
CuVector< double > oderiv_sumsq_
const std::string WholeLine()
BaseFloat self_repair_scale_
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual void Read(std::istream &os, bool binary)=0
Abstract base-class for neural-net components.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
An abstract representation of a set of Indexes.
TdnnComponent is a more memory-efficient alternative to manually splicing several frames of input and...
void Write(std::ostream &Out, bool binary) const
Writes to C++ stream (option to write in binary).
void InitLearningRatesFromConfig(ConfigLine *cfl)
void SetUpdatableConfigs(const UpdatableComponent &other)
virtual void GetInputIndexes(const MiscComputationInfo &misc_info, const Index &output_index, std::vector< Index > *desired_indexes) const
This function only does something interesting for non-simple Components.
std::string SummarizeVector(const VectorBase< float > &vec)
Returns a string that summarizes a vector fairly succintly, for printing stats in info lines...
void AddDiagMat2(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, Real beta)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
virtual int32 OutputDim() const =0
Returns output-dimension of this component.
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
BaseFloat max_change_
configuration value for imposing max-change
PermuteComponent changes the order of the columns (i.e.
CuVector< double > deriv_sum_
Contains component(s) related to attention models.
CompositeComponent is a component representing a sequence of [simple] components. ...
This file contains declarations of components that in one way or another normalize their input: Norma...
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
virtual void Read(std::istream &is, bool binary)=0
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
FixedScaleComponent applies a fixed per-element scale; it's similar to the Rescale component in the n...
This file contains declarations of components that are "simple", meaning they don't care about the in...
SpecAugmentTimeMaskComponent implements the time part of SpecAugment.
void ExpectOneOrTwoTokens(std::istream &is, bool binary, const std::string &token1, const std::string &token2)
This function is like ExpectToken but for two tokens, and it will either accept token1 and then token...
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=nullptr)
Applies floor to all elements.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
NaturalGradientPerElementScaleComponent is like PerElementScaleComponent but it uses a natural gradie...
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
double num_dims_self_repaired_
SumGroupComponent is used to sum up groups of posteriors.
BaseFloat learning_rate_
learning rate (typically 0.0..0.01)
You can view this as an overflow from nnet-simple-component.h.
std::string ReadUpdatableCommon(std::istream &is, bool binary)
CuVector< double > value_sum_
static Component * ReadNew(std::istream &is, bool binary)
Read component from stream (works out its type). Dies on error.
This Component takes a larger input-dim than output-dim, where the input-dim must be a multiple of th...
BaseFloat learning_rate_factor_
learning rate factor (normally 1.0, but can be set to another < value so that when < you call SetLear...
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
BaseFloat l2_regularize_
L2 regularization constant.
void Scale(Real alpha)
Multiplies all elements by this constant.
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
BaseFloat self_repair_lower_threshold_
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
static ComponentPrecomputedIndexes * ReadNew(std::istream &is, bool binary)
virtual std::string Type() const =0
Returns a string such as "SigmoidComponent", describing the type of the object.
GeneralDropoutComponent implements dropout, including a continuous variant where the thing we multipl...
void StoreBackpropStats(const CuMatrixBase< BaseFloat > &out_deriv)
BaseFloat self_repair_upper_threshold_
Matrix for CUDA computing.
MatrixIndexT NumCols() const
virtual std::string Type() const =0
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
double num_dims_processed_
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c' baz="a b c d='a b' e" and giving you access to the fields, in this case.
RestrictedAttentionComponent implements an attention model with restricted temporal context...
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
bool is_gradient_
True if this component is to be treated as a gradient rather than as parameters.
#define KALDI_ASSERT(cond)
PerElementScaleComponent scales each dimension of its input with a separate trainable scale; it's lik...
void WriteUpdatableCommon(std::ostream &is, bool binary) const
void ApplyPow(Real power)
Take all elements of vector to a power.
SumBlockComponent sums over blocks of its input: for instance, if you create one with the config "inp...
FixedBiasComponent applies a fixed per-element bias; it's similar to the AddShift component in the nn...
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
NoOpComponent just duplicates its input.
bool HasUnusedValues() const
bool GetValue(const std::string &key, std::string *value)
void StoreStatsInternal(const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > *deriv=NULL)
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual void Read(std::istream &is, bool binary)
We implement Read at this level as it just needs the Type().
void Swap(CuVector< Real > *vec)
MatrixIndexT NumRows() const
Dimensions.
virtual int32 InputDim() const =0
Returns input-dimension of this component.
static Component * NewComponentOfType(const std::string &type)
Returns a new Component of the given type e.g.
static ComponentPrecomputedIndexes * NewComponentPrecomputedIndexesOfType(const std::string &cpi_type)
TimeHeightConvolutionComponent implements 2-dimensional convolution where one of the dimensions of co...
BaseFloat LearningRate() const
Gets the learning rate to be used in gradient descent.
This file contains declarations of components that are not "simple", meaning they care about the inde...
WARNING, this component is deprecated in favor of TimeHeightConvolutionComponent, and will be deleted...
void AddRowSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the rows of the matrix, add to vector.
MatrixIndexT Dim() const
Dimensions.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
FixedAffineComponent is an affine transform that is supplied at network initialization time and is no...
This class implements an affine transform using a block diagonal matrix e.g., one whose weight matrix...