21 #ifndef KALDI_NNET_NNET_MULTIBASIS_COMPONENT_H_ 22 #define KALDI_NNET_NNET_MULTIBASIS_COMPONENT_H_ 50 std::string selector_proto;
51 std::string selector_filename;
52 std::string basis_proto;
53 std::string basis_filename;
54 std::vector<std::string> basis_filename_vector;
58 while (is >> std::ws, !is.eof()) {
60 if (token ==
"<SelectorProto>")
ReadToken(is,
false, &selector_proto);
61 else if (token ==
"<SelectorFilename>")
ReadToken(is,
false, &selector_filename);
63 else if (token ==
"<BasisProto>")
ReadToken(is,
false, &basis_proto);
64 else if (token ==
"<BasisFilename>")
ReadToken(is,
false, &basis_filename);
65 else if (token ==
"<BasisFilenameVector>") {
66 while(is >> std::ws, !is.eof()) {
67 std::string file_or_end;
69 if (file_or_end ==
"</BasisFilenameVector>")
break;
70 basis_filename_vector.push_back(file_or_end);
72 }
else KALDI_ERR <<
"Unknown token " << token <<
", typo in config?" 73 <<
" (SelectorProto|SelectorFilename|BasisProto|BasisFilename|BasisFilenameVector)";
79 if (selector_proto !=
"") {
80 KALDI_LOG <<
"Initializing 'selector' from : " << selector_proto;
83 if (selector_filename !=
"") {
84 KALDI_LOG <<
"Reading 'selector' from : " << selector_filename;
91 if (basis_proto !=
"") {
93 KALDI_LOG <<
"Initializing 'basis' from : " << basis_proto;
97 }
else if (basis_filename !=
"") {
99 KALDI_LOG <<
"Reading 'basis' from : " << basis_filename;
103 }
else if (basis_filename_vector.size() > 0) {
105 if (basis_filename_vector.size() !=
nnet_basis_.size()) {
107 <<
"We got " << basis_filename_vector.size();
111 << basis_filename_vector[
i];
117 KALDI_LOG <<
"Initializing 'basis' to Identity <AffineTransform> " 118 <<
OutputDim() <<
"x" << basis_input_dim;
139 bool end_loop =
false;
140 while (!end_loop &&
'<' ==
Peek(is, binary)) {
143 switch (first_char) {
147 else KALDI_ERR <<
"Unknown token: " << token;
153 for (
int32 i = 0;
i < num_basis;
i++) {
178 WriteToken(os, binary,
"<SelectorLearnRateCoef>");
180 if (!binary) os <<
"\n\n";
182 if (!binary) os <<
"\n";
184 if (!binary) os <<
"\n";
187 if (!binary) os <<
"\n";
188 for (
int32 i = 0;
i < num_basis;
i++) {
191 if (!binary) os <<
"\n";
204 return num_params_sum;
208 KALDI_ERR <<
"TODO, not yet implemented!";
216 params->
Range(offset, params_tmp.
Dim()).CopyFromVec(params_tmp);
217 offset += params_tmp.
Dim();
221 params->
Range(offset, params_tmp.
Dim()).CopyFromVec(params_tmp);
222 offset += params_tmp.
Dim();
241 std::ostringstream os;
243 os <<
"basis_network #" <<
i+1 <<
" {\n" 247 os <<
"\nselector {\n" 254 std::ostringstream os;
257 os <<
"basis_gradient #" <<
i+1 <<
" {\n" 262 os <<
"selector_gradient {\n" 269 std::ostringstream os;
272 os <<
"basis_propagate #" <<
i+1 <<
" {\n" 277 os <<
"selector_propagate {\n" 284 std::ostringstream os;
287 os <<
"basis_backpropagate #" <<
i+1 <<
"{\n" 292 os <<
"selector_backpropagate {\n" 356 for (
int32 i = 0;
i < num_basis;
i++) {
365 in_diff_selector.
CopyFromMat(in_diff_selector_tmp);
370 for (
int32 i = 0;
i < num_basis;
i++) {
374 nnet_basis_[
i].Backpropagate(out_diff_scaled, &in_diff_basis_tmp);
375 in_diff_basis.
AddMat(1.0, in_diff_basis_tmp);
456 #endif // KALDI_NNET_NNET_MULTIBASIS_COMPONENT_H_ void Backpropagate(const CuMatrixBase< BaseFloat > &out_diff, CuMatrix< BaseFloat > *in_diff)
Perform backward pass through the network,.
void InitData(std::istream &is)
Initialize the content of the component by the 'line' from the prototype,.
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void SetParams(const VectorBase< BaseFloat > ¶ms)
Set the network weights from a supervector,.
Component * Copy() const
Copy component (deep copy),.
void Propagate(const CuMatrixBase< BaseFloat > &in, CuMatrix< BaseFloat > *out)
Perform forward pass through the network,.
void GetParams(Vector< BaseFloat > *params) const
Get the network weights in a supervector,.
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
int32 NumParams() const
Get the number of parameters in the network,.
const CuSubVector< Real > Row(MatrixIndexT i) const
std::vector< CuMatrix< BaseFloat > > basis_out_
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
void Write(const std::string &wxfilename, bool binary) const
Write Nnet to 'wxfilename',.
void Update(const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &diff)
Compute gradient and update parameters,.
int32 InputDim() const
Dimensionality on network input (input feature dim.),.
Class UpdatableComponent is a Component which has trainable parameters, it contains SGD training hype...
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
std::string Info() const
Print some additional info (after <ComponentName> and the dims),.
Nnet & GetBasis(int32 id)
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
const Nnet & GetBasis(int32 id) const
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
int Peek(std::istream &is, bool binary)
Peek consumes whitespace (if binary == false) and then returns the peek() value of the stream...
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
ComponentType GetType() const
Get Type Identification of the component,.
virtual void SetLearnRateCoef(BaseFloat val)
Set the learn-rate coefficient,.
ComponentType
Component type identification mechanism,.
void GetGradient(VectorBase< BaseFloat > *gradient) const
Get gradient reshaped as a vector,.
void SetParams(const VectorBase< BaseFloat > ¶ms)
Set the trainable parameters from, reshaped as a vector,.
virtual void SetBiasLearnRateCoef(BaseFloat val)
Set the learn-rate coefficient for bias,.
Vector< BaseFloat > posterior_sum_
int32 OutputDim() const
Dimensionality of network outputs (posteriors | bn-features | etc.),.
void AddColSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the columns of the matrix, add to vector.
std::string InfoGradient() const
Print some additional info about gradient (after <...> and dims),.
std::string InfoBackPropagate(bool header=true) const
Create string with back-propagation-buffer statistics,.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
void SetBiasLearnRateCoef(BaseFloat val)
Overriding the default, which was UpdatableComponent::SetBiasLearnRateCoef(...)
MultiBasisComponent(int32 dim_in, int32 dim_out)
std::vector< Nnet > nnet_basis_
The vector of 'basis' networks (output of basis is combined according to the posterior_ from the sele...
int32 InputDim() const
Get the dimension of the input,.
std::string InfoPropagate() const
void Read(const std::string &rxfilename)
Read Nnet from 'rxfilename',.
void SetLearnRateCoef(BaseFloat val)
Overriding the default, which was UpdatableComponent::SetLearnRateCoef(...)
void SetTrainOptions(const NnetTrainOptions &opts)
Overriding the default, which was UpdatableComponent::SetTrainOptions(...)
This class is used for a piece of a CuMatrix.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
BaseFloat threshold_
Threshold, applied to posterior_sum_, disables the unused basis,.
MatrixIndexT Dim() const
Returns the dimension of the vector.
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
BaseFloat selector_lr_coef_
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Matrix for CUDA computing.
void GetParams(VectorBase< BaseFloat > *params) const
Get the trainable parameters reshaped as a vector,.
void Init(const std::string &proto_file)
Initialize the Nnet from the prototype,.
A class representing a vector.
#define KALDI_ASSERT(cond)
std::string InfoBackPropagate() const
std::string InfoGradient(bool header=true) const
Create string with per-component gradient statistics,.
std::string InfoPropagate(bool header=true) const
Create string with propagation-buffer statistics,.
std::string Info() const
Create string with human readable description of the nnet,.
void SetTrainOptions(const NnetTrainOptions &opts)
Set hyper-parameters of the training (pushes to all UpdatableComponents),.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Abstract class, building block of the network.
int32 OutputDim() const
Get the dimension of the output,.
MatrixIndexT NumRows() const
Dimensions.
Provides a vector abstraction class.
int32 NumParams() const
Number of trainable parameters,.
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
CuMatrix< BaseFloat > posterior_
The output of 'selector_',.
Nnet selector_
Selector network,.
void AddDiagVecMat(const Real alpha, const CuVectorBase< Real > &v, const CuMatrixBase< Real > &M, MatrixTransposeType transM, Real beta=1.0)
*this = beta * *this + alpha * diag(v) * M [or M^T].
void ReadData(std::istream &is, bool binary)
Reads the component content.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).