27 int main(
int argc,
char *argv[]) {
29 using namespace kaldi;
34 "Copy a (nnet2) neural net and its associated transition model,\n" 35 "possibly changing the binary mode\n" 36 "Also supports multiplying all the learning rates by a factor\n" 37 "(the --learning-rate-factor option) and setting them all to a given\n" 38 "value (the --learning-rate options)\n" 40 "Usage: nnet-am-copy [options] <nnet-in> <nnet-out>\n" 42 " nnet-am-copy --binary=false 1.mdl text.mdl\n";
45 bool binary_write =
true;
46 bool remove_dropout =
false;
48 bool remove_preconditioning =
false;
49 bool collapse =
false;
50 bool match_updatableness =
true;
51 BaseFloat learning_rate_factor = 1.0, learning_rate = -1;
52 std::string learning_rate_scales_str =
" ";
53 std::string learning_rates =
"";
54 std::string scales =
"";
55 std::string stats_from;
58 po.
Register(
"binary", &binary_write,
"Write output in binary mode");
59 po.
Register(
"learning-rate-factor", &learning_rate_factor,
60 "Before copying, multiply all the learning rates in the " 61 "model by this factor.");
62 po.
Register(
"learning-rate", &learning_rate,
63 "If supplied, all the learning rates of \"updatable\" layers" 64 "are set to this value.");
65 po.
Register(
"learning-rates", &learning_rates,
66 "If supplied (a colon-separated list of learning rates), sets " 67 "the learning rates of \"updatable\" layers to these values.");
69 "A colon-separated list of scaling factors, one for each updatable " 70 "layer: a mechanism to scale the parameters.");
71 po.
Register(
"learning-rate-scales", &learning_rate_scales_str,
72 "Colon-separated list of scaling factors for learning rates, " 73 "applied after the --learning-rate and --learning-rates options." 74 "Used to scale learning rates for particular layer types. E.g." 75 "--learning-rate-scales=AffineComponent=0.5");
76 po.
Register(
"truncate", &truncate,
"If set, will truncate the neural net " 77 "to this many components by removing the last components.");
78 po.
Register(
"remove-dropout", &remove_dropout,
"Set this to true to remove " 79 "any dropout components.");
80 po.
Register(
"dropout-scale", &dropout_scale,
"If set, set the dropout scale in any " 81 "dropout components to this value. Note: in traditional dropout, this " 82 "is always zero; you can set it to any value between zero and one.");
83 po.
Register(
"remove-preconditioning", &remove_preconditioning,
"Set this to true to replace " 84 "components of type AffineComponentPreconditioned with AffineComponent.");
85 po.
Register(
"stats-from", &stats_from,
"Before copying neural net, copy the " 86 "statistics in any layer of type NonlinearComponent, from this " 87 "neural network: provide the extended filename.");
88 po.
Register(
"collapse", &collapse,
"If true, collapse sequences of AffineComponents " 89 "and FixedAffineComponents to compactify model");
90 po.
Register(
"match-updatableness", &match_updatableness,
"Only relevant if " 91 "collapse=true; set this to false to collapse mixed types.");
100 std::string nnet_rxfilename = po.
GetArg(1),
101 nnet_wxfilename = po.
GetArg(2);
107 Input ki(nnet_rxfilename, &binary);
112 if (learning_rate_factor != 1.0)
115 if (learning_rate >= 0)
118 if (learning_rates !=
"") {
119 std::vector<BaseFloat> learning_rates_vec;
121 || static_cast<int32>(learning_rates_vec.size()) !=
123 KALDI_ERR <<
"Expected --learning-rates option to be a " 124 <<
"colon-separated string with " 126 <<
" elements, instead got \"" << learning_rates <<
'"';
129 learning_rates_vec.size());
133 if (learning_rate_scales_str !=
" ") {
135 std::map<std::string, BaseFloat> learning_rate_scales;
136 std::vector<std::string> learning_rate_scale_vec;
138 &learning_rate_scale_vec);
139 for (int32 index = 0; index < learning_rate_scale_vec.size();
141 std::vector<std::string> parts;
146 KALDI_ERR <<
"Unknown format for --learning-rate-scales option. " 147 <<
"Expected format is " 148 <<
"--learning-rate-scales=AffineComponent=0.1:AffineComponentPreconditioned=0.5 " 150 << learning_rate_scales_str;
152 learning_rate_scales.insert(std::pair<std::string, BaseFloat>(
153 parts[0], scale_factor));
160 std::vector<BaseFloat> scales_vec;
162 || static_cast<int32>(scales_vec.size()) !=
164 KALDI_ERR <<
"Expected --scales option to be a " 165 <<
"colon-separated string with " 167 <<
" elements, instead got \"" << scales <<
'"';
190 if (stats_from !=
"") {
194 Input ki(stats_from, &binary);
203 Output ko(nnet_wxfilename, binary_write);
207 KALDI_LOG <<
"Copied neural net from " << nnet_rxfilename
208 <<
" to " << nnet_wxfilename;
210 }
catch(
const std::exception &e) {
211 std::cerr << e.what() <<
'\n';
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool SplitStringToFloats(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< F > *out)
void CopyStatsFrom(const Nnet &nnet)
Copies only the statistics in layers of type NonlinearComponewnt, from this neural net...
int32 NumUpdatableComponents() const
Returns the number of updatable components.
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void Read(std::istream &is, bool binary)
int32 OutputDim() const
The output dimension of the network – typically the number of pdfs.
int main(int argc, char *argv[])
void Resize(int32 num_components)
Removes final components from the neural network (used for debugging).
void Register(const std::string &name, bool *ptr, const std::string &doc)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void Read(std::istream &is, bool binary)
const VectorBase< BaseFloat > & Priors() const
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
void Write(std::ostream &os, bool binary) const
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
bool ConvertStringToReal(const std::string &str, T *out)
ConvertStringToReal converts a string into either float or double and returns false if there was any ...
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
void ScaleComponents(const VectorBase< BaseFloat > &scales)
Scales the parameters of each of the updatable components.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void Write(std::ostream &os, bool binary) const
void RemovePreconditioning()
Replace any components of type AffineComponentPreconditioned with components of type AffineComponent...
A class representing a vector.
void SetLearningRates(BaseFloat learning_rates)
Set all the learning rates in the neural net to this value.
void SetDropoutScale(BaseFloat scale)
Calls SetDropoutScale for all the dropout nodes.
void RemoveDropout()
Excise any components of type DropoutComponent or AdditiveNoiseComponent.
void ScaleLearningRates(BaseFloat factor)
Scale all the learning rates in the neural net by this factor.
void Collapse(bool match_updatableness)
Where possible, collapse multiple affine or linear components in a sequence into a single one by comp...
void SetPriors(const VectorBase< BaseFloat > &priors)
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
const Nnet & GetNnet() const