32 bool found_output =
false;
33 for (std::vector<NnetIo>::iterator it = eg->
io.begin();
34 it != eg->
io.end(); ++it) {
35 if (it->name ==
"output") {
42 KALDI_ERR <<
"No io-node with name 'output'" 48 if (weight == 1.0)
return;
50 bool found_output =
false;
51 for (std::vector<NnetIo>::iterator it = eg->
io.begin();
52 it != eg->
io.end(); ++it) {
53 if (it->name ==
"output") {
54 it->features.Scale(weight);
60 KALDI_ERR <<
"No supervision with name 'output'" 68 int32 ans = floor(expected_count);
69 expected_count -= ans;
86 int32 *max_output_t) {
87 bool done_input =
false, done_output =
false;
88 int32 num_indexes = eg.
io.size();
89 for (
int32 i = 0;
i < num_indexes;
i++) {
91 std::vector<Index>::const_iterator iter = io.
indexes.begin(),
95 if (io.
name ==
"input" || io.
name ==
"output") {
96 int32 min_t = iter->t, max_t = iter->t;
97 for (; iter != end; ++iter) {
98 int32 this_t = iter->t;
99 min_t = std::min(min_t, this_t);
100 max_t = std::max(max_t, this_t);
102 KALDI_WARN <<
"Example does not contain just a single example; " 103 <<
"too late to do frame selection or reduce context.";
107 if (io.
name ==
"input") {
109 *min_input_t = min_t;
110 *max_input_t = max_t;
114 *min_output_t = min_t;
115 *max_output_t = max_t;
118 for (; iter != end; ++iter) {
120 KALDI_WARN <<
"Example does not contain just a single example; " 121 <<
"too late to do frame selection or reduce context.";
128 KALDI_WARN <<
"Example does not have any input named 'input'";
132 KALDI_WARN <<
"Example does not have any output named 'output'";
152 eg_out->
io.resize(eg.
io.size());
153 for (
size_t i = 0;
i < eg.
io.size();
i++) {
154 bool is_input_or_output;
158 const std::string &name = io_in.
name;
160 if (name ==
"input") {
163 is_input_or_output =
true;
164 }
else if (name ==
"output") {
165 min_t = min_output_t;
166 max_t = max_output_t;
167 is_input_or_output =
true;
169 is_input_or_output =
false;
171 if (!is_input_or_output) {
175 const std::vector<Index> &indexes_in = io_in.
indexes;
176 std::vector<Index> &indexes_out = io_out.
indexes;
177 indexes_out.reserve(indexes_in.size());
178 int32 num_indexes = indexes_in.size(), num_kept = 0;
180 std::vector<bool> keep(num_indexes,
false);
181 std::vector<Index>::const_iterator iter_in = indexes_in.begin(),
182 end_in = indexes_in.end();
183 std::vector<bool>::iterator iter_out = keep.begin();
184 for (; iter_in != end_in; ++iter_in,++iter_out) {
185 int32 t = iter_in->t;
186 bool is_within_range = (t >= min_t && t <= max_t);
187 *iter_out = is_within_range;
188 if (is_within_range) {
189 indexes_out.push_back(*iter_in);
195 KALDI_ERR <<
"FilterExample removed all indexes for '" << name <<
"'";
200 indexes_out.size() ==
static_cast<size_t>(num_kept));
223 std::string frame_str,
228 static bool warned_left =
false, warned_right =
false;
229 int32 min_input_t, max_input_t,
230 min_output_t, max_output_t;
232 &min_output_t, &max_output_t))
233 KALDI_ERR <<
"Too late to perform frame selection/context reduction on " 234 <<
"these examples (already merged?)";
235 if (frame_str !=
"") {
237 if (frame_str ==
"random") {
238 min_output_t = max_output_t =
RandInt(min_output_t,
243 KALDI_ERR <<
"Invalid option --frame='" << frame_str <<
"'";
244 if (frame < min_output_t || frame > max_output_t) {
249 min_output_t = max_output_t = frame;
252 if (left_context != -1) {
253 if (!warned_left && min_input_t > min_output_t - left_context) {
255 KALDI_WARN <<
"You requested --left-context=" << left_context
256 <<
", but example only has left-context of " 257 << (min_output_t - min_input_t)
258 <<
" (will warn only once; this may be harmless if " 259 "using any --*left-context-initial options)";
261 min_input_t = std::max(min_input_t, min_output_t - left_context);
263 if (right_context != -1) {
264 if (!warned_right && max_input_t < max_output_t + right_context) {
266 KALDI_WARN <<
"You requested --right-context=" << right_context
267 <<
", but example only has right-context of " 268 << (max_input_t - max_output_t)
269 <<
" (will warn only once; this may be harmless if " 270 "using any --*right-context-final options.";
272 max_input_t = std::min(max_input_t, max_output_t + right_context);
275 min_input_t, max_input_t,
276 min_output_t, max_output_t,
278 if (frame_shift != 0) {
279 std::vector<std::string> exclude_names;
280 exclude_names.push_back(std::string(
"ivector"));
290 int main(
int argc,
char *argv[]) {
292 using namespace kaldi;
295 typedef kaldi::int64 int64;
298 "Copy examples (single frames or fixed-size groups of frames) for neural\n" 299 "network training, possibly changing the binary mode. Supports multiple wspecifiers, in\n" 300 "which case it will write the examples round-robin to the outputs.\n" 302 "Usage: nnet3-copy-egs [options] <egs-rspecifier> <egs-wspecifier1> [<egs-wspecifier2> ...]\n" 305 "nnet3-copy-egs ark:train.egs ark,t:text.egs\n" 307 "nnet3-copy-egs ark:train.egs ark:1.egs ark:2.egs\n" 308 "See also: nnet3-subset-egs, nnet3-get-egs, nnet3-merge-egs, nnet3-shuffle-egs\n";
311 int32 srand_seed = 0;
312 int32 frame_shift = 0;
318 int32 left_context = -1, right_context = -1;
322 std::string frame_str,
323 eg_weight_rspecifier, eg_output_name_rspecifier;
326 po.
Register(
"random", &random,
"If true, will write frames to output " 327 "archives randomly, not round-robin.");
328 po.
Register(
"frame-shift", &frame_shift,
"Allows you to shift time values " 329 "in the supervision data (excluding iVector data). Only really " 330 "useful in clockwork topologies (i.e. any topology for which " 331 "modulus != 1). Shifting is done after any frame selection.");
332 po.
Register(
"keep-proportion", &keep_proportion,
"If <1.0, this program will " 333 "randomly keep this proportion of the input samples. If >1.0, it will " 334 "in expectation copy a sample this many times. It will copy it a number " 335 "of times equal to floor(keep-proportion) or ceil(keep-proportion).");
336 po.
Register(
"srand", &srand_seed,
"Seed for random number generator " 337 "(only relevant if --random=true or --keep-proportion != 1.0)");
338 po.
Register(
"frame", &frame_str,
"This option can be used to select a single " 339 "frame from each multi-frame example. Set to a number 0, 1, etc. " 340 "to select a frame with a given index, or 'random' to select a " 342 po.
Register(
"left-context", &left_context,
"Can be used to truncate the " 343 "feature left-context that we output.");
344 po.
Register(
"right-context", &right_context,
"Can be used to truncate the " 345 "feature right-context that we output.");
346 po.
Register(
"weights", &eg_weight_rspecifier,
347 "Rspecifier indexed by the key of egs, providing a weight by " 348 "which we will scale the supervision matrix for that eg. " 349 "Used in multilingual training.");
350 po.
Register(
"outputs", &eg_output_name_rspecifier,
351 "Rspecifier indexed by the key of egs, providing a string-valued " 352 "output name, e.g. 'output-0'. If provided, the NnetIo with " 353 "name 'output' will be renamed to the provided name. Used in " 354 "multilingual training.");
364 std::string examples_rspecifier = po.
GetArg(1);
373 int32 num_outputs = po.
NumArgs() - 1;
374 std::vector<NnetExampleWriter*> example_writers(num_outputs);
375 for (int32
i = 0;
i < num_outputs;
i++)
379 int64 num_read = 0, num_written = 0, num_err = 0;
380 for (; !example_reader.
Done(); example_reader.
Next(), num_read++) {
381 const std::string &key = example_reader.
Key();
386 if (!eg_weight_rspecifier.empty()) {
388 if (!egs_weight_reader.
HasKey(key)) {
389 KALDI_WARN <<
"No weight for example key " << key;
393 weight = egs_weight_reader.
Value(key);
397 std::string new_output_name;
398 if (!eg_output_name_rspecifier.empty()) {
399 if (!output_name_reader.
HasKey(key)) {
400 KALDI_WARN <<
"No new output-name for example key " << key;
404 new_output_name = output_name_reader.
Value(key);
406 for (int32 c = 0; c <
count; c++) {
407 int32 index = (random ?
Rand() : num_written) % num_outputs;
408 if (frame_str ==
"" && left_context == -1 && right_context == -1 &&
410 if (!new_output_name.empty() && c == 0)
412 example_writers[index]->Write(key, eg);
417 frame_shift, &eg_modified)) {
418 if (!new_output_name.empty())
422 example_writers[index]->Write(key, eg_modified);
429 for (int32
i = 0;
i < num_outputs;
i++)
430 delete example_writers[
i];
431 KALDI_LOG <<
"Read " << num_read <<
" neural-network training examples, wrote " 432 << num_written <<
", " 433 << num_err <<
" examples had errors.";
434 return (num_written == 0 ? 1 : 0);
435 }
catch(
const std::exception &e) {
436 std::cerr << e.what() <<
'\n';
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
bool WithProb(BaseFloat prob, struct RandomState *state)
GeneralMatrix features
The features or labels.
void ShiftExampleTimes(int32 t_offset, const std::vector< std::string > &exclude_names, NnetExample *eg)
Shifts the time-index t of everything in the "eg" by adding "t_offset" to all "t" values...
std::vector< Index > indexes
"indexes" is a vector the same length as features.NumRows(), explaining the meaning of each row of th...
bool ContainsSingleExample(const NnetExample &eg, int32 *min_input_t, int32 *max_input_t, int32 *min_output_t, int32 *max_output_t)
Returns true if the "eg" contains just a single example, meaning that all the "n" values in the index...
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
void FilterGeneralMatrixRows(const GeneralMatrix &in, const std::vector< bool > &keep_rows, GeneralMatrix *out)
Outputs a GeneralMatrix containing only the rows r of "in" such that keep_rows[r] == true...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
int main(int argc, char *argv[])
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
void ScaleSupervisionWeight(BaseFloat weight, NnetExample *eg)
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
void RenameOutputs(const std::string &new_name, NnetExample *eg)
bool HasKey(const std::string &key)
int32 GetCount(double expected_count)
int Rand(struct RandomState *state)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
TableWriter< KaldiObjectHolder< NnetExample > > NnetExampleWriter
std::string name
the name of the input in the neural net; in simple setups it will just be "input".
bool SelectFromExample(const NnetExample &eg, std::string frame_str, int32 left_context, int32 right_context, int32 frame_shift, NnetExample *eg_out)
This function is responsible for possibly selecting one frame from multiple supervised frames...
void FilterExample(const NnetExample &eg, int32 min_input_t, int32 max_input_t, int32 min_output_t, int32 max_output_t, NnetExample *eg_out)
This function filters the indexes (and associated feature rows) in a NnetExample, removing any index/...
std::vector< NnetIo > io
"io" contains the input and output.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)