25 int main(
int argc,
char *argv[]) {
27 using namespace kaldi;
32 "Convert cmvn-stats into <AddShift> and <Rescale> components.\n" 33 "Usage: cmvn-to-nnet [options] <transf-in> <nnet-out>\n" 35 " cmvn-to-nnet --binary=false transf.mat nnet.mdl\n";
38 bool binary_write =
false;
40 float var_floor = 1e-10;
41 float learn_rate_coef = 0.0;
44 po.
Register(
"binary", &binary_write,
"Write output in binary mode");
45 po.
Register(
"std-dev", &std_dev,
"Standard deviation of the output.");
47 "Floor the variance, so the factors in <Rescale> are bounded.");
48 po.
Register(
"learn-rate-coef", &learn_rate_coef,
49 "Initialize learning-rate coefficient to a value.");
58 std::string cmvn_stats_rxfilename = po.
GetArg(1),
59 model_out_filename = po.
GetArg(2);
65 Input ki(cmvn_stats_rxfilename, &binary_read);
71 int32 num_dims = cmvn_stats.
NumCols() - 1;
72 double frame_count = cmvn_stats(0, cmvn_stats.
NumCols() - 1);
79 for (int32
d = 0;
d < num_dims;
d++) {
80 BaseFloat mean = cmvn_stats(0,
d) / frame_count;
81 BaseFloat var = cmvn_stats(1,
d) / frame_count - mean * mean;
82 if (var <= var_floor) {
84 <<
" flooring to " << var_floor;
88 scale(
d) = std_dev / sqrt(var);
96 AddShift shift_component(shift.Dim(), shift.Dim());
98 shift_component.SetLearnRateCoef(learn_rate_coef);
106 scale_component.SetLearnRateCoef(learn_rate_coef);
112 Output ko(model_out_filename, binary_write);
114 KALDI_LOG <<
"Written cmvn in 'nnet1' model to: " << model_out_filename;
117 }
catch(
const std::exception &e) {
118 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int main(int argc, char *argv[])
void Write(const std::string &wxfilename, bool binary) const
Write Nnet to 'wxfilename',.
Rescale the data column-wise by a vector (can be used for global variance normalization) ...
void Register(const std::string &name, bool *ptr, const std::string &doc)
void SetParams(const VectorBase< BaseFloat > ¶ms)
Set the trainable parameters from, reshaped as a vector,.
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
Adds shift to all the lines of the matrix (can be used for global mean normalization) ...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
MatrixIndexT Dim() const
Returns the dimension of the vector.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void SetParams(const VectorBase< BaseFloat > ¶ms)
Set the trainable parameters from, reshaped as a vector,.
void AppendComponent(const Component &comp)
Append Component to 'this' instance of Nnet (deep copy),.