28 void GetWeights(
const std::string &weights_str,
30 std::vector<BaseFloat> *weights) {
32 if (!weights_str.empty()) {
34 if (weights->size() != num_inputs) {
35 KALDI_ERR <<
"--weights option must be a colon-separated list " 36 <<
"with " << num_inputs <<
" elements, got: " 40 for (
int32 i = 0;
i < num_inputs;
i++)
41 weights->push_back(1.0 / num_inputs);
44 float weight_sum = 0.0;
45 for (
int32 i = 0;
i < num_inputs;
i++)
46 weight_sum += (*weights)[
i];
47 for (
int32 i = 0;
i < num_inputs;
i++)
48 (*weights)[
i] = (*weights)[
i] / weight_sum;
49 if (fabs(weight_sum - 1.0) > 0.01) {
50 KALDI_WARN <<
"Normalizing weights to sum to one, sum was " << weight_sum;
57 void ReadModels(std::vector<std::pair<std::string, BaseFloat> > models_and_weights,
60 using namespace nnet3;
62 int32 n = models_and_weights.size();
64 ScaleNnet(models_and_weights[0].second, output_nnet);
68 AddNnet(nnet, models_and_weights[
i].second, output_nnet);
79 int main(
int argc,
char *argv[]) {
81 using namespace kaldi;
84 typedef kaldi::int64 int64;
87 "This program averages the parameters over a number of 'raw' nnet3 neural nets.\n" 89 "Usage: nnet3-average [options] <model1> <model2> ... <modelN> <model-out>\n" 92 " nnet3-average 1.1.nnet 1.2.nnet 1.3.nnet 2.nnet\n";
94 bool binary_write =
true;
95 int32 num_threads = -1;
98 po.
Register(
"binary", &binary_write,
"Write output in binary mode");
99 std::string weights_str;
100 po.
Register(
"weights", &weights_str,
"Colon-separated list of weights, one " 101 "for each input model. These will be normalized to sum to one.");
102 po.
Register(
"num-threads", &num_threads,
"Number of threads to read the " 103 "models (will be set automatically if not set.");
113 first_nnet_rxfilename = po.
GetArg(1),
116 int32 num_inputs = po.
NumArgs() - 1;
118 if (num_threads <= 0) {
120 if (num_inputs > 10) num_threads = 3;
121 else if (num_inputs > 5) num_threads = 2;
122 else num_threads = 1;
125 if (num_threads > 1 && num_threads * 2 > num_inputs) {
126 num_threads = num_inputs / 2;
129 std::vector<BaseFloat> model_weights;
130 GetWeights(weights_str, num_inputs, &model_weights);
132 std::vector<Nnet> nnets(num_threads);
133 std::vector<int32> return_statuses(num_threads);
135 std::vector<std::thread*> threads(num_threads);
137 for (int32 thread_id = 0; thread_id < num_threads; thread_id++) {
138 std::vector<std::pair<std::string, BaseFloat> > this_models_and_weights;
139 for (int32
j = 1 + thread_id;
j < po.
NumArgs();
j += num_threads) {
140 this_models_and_weights.push_back(std::pair<std::string, BaseFloat>(
141 po.
GetArg(
j), model_weights[
j - 1]));
143 threads[thread_id] =
new std::thread(
ReadModels, this_models_and_weights,
145 &(return_statuses[thread_id]));
149 for (int32 thread_id = 0; thread_id < num_threads; thread_id++) {
150 threads[thread_id]->join();
151 delete threads[thread_id];
152 if (!return_statuses[thread_id])
154 if (success && thread_id > 0)
155 AddNnet(nnets[thread_id], 1.0, &(nnets[0]));
159 KALDI_ERR <<
"Error detected in a model-reading thread.";
164 KALDI_LOG <<
"Averaged parameters of " << num_inputs
165 <<
" neural nets, and wrote to " << nnet_wxfilename;
167 }
catch(
const std::exception &e) {
168 std::cerr << e.what() <<
'\n';
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void ScaleNnet(BaseFloat scale, Nnet *nnet)
Scales the nnet parameters and stats by this scale.
void GetWeights(const std::string &weights_str, int32 num_inputs, std::vector< BaseFloat > *weights)
bool SplitStringToFloats(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< F > *out)
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
This file contains some miscellaneous functions dealing with class Nnet.
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
int main(int argc, char *argv[])
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
void ReadModels(std::vector< std::pair< std::string, BaseFloat > > models_and_weights, nnet3::Nnet *output_nnet, int32 *success)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
void AddNnet(const Nnet &src, BaseFloat alpha, Nnet *dest)
Does *dest += alpha * src (affects nnet parameters and stored stats).