32   bool found_output = 
false;
    33   for (std::vector<NnetIo>::iterator it = eg->
io.begin();
    34        it != eg->
io.end(); ++it) {
    35     if (it->name == 
"output") {
    42     KALDI_ERR << 
"No io-node with name 'output'"    48   if (weight == 1.0) 
return;
    50   bool found_output = 
false;
    51   for (std::vector<NnetIo>::iterator it = eg->
io.begin();
    52        it != eg->
io.end(); ++it) {
    53     if (it->name == 
"output") {
    54       it->features.Scale(weight);
    60     KALDI_ERR << 
"No supervision with name 'output'"    68   int32 ans = floor(expected_count);
    69   expected_count -= ans;
    86                            int32 *max_output_t) {
    87   bool done_input = 
false, done_output = 
false;
    88   int32 num_indexes = eg.
io.size();
    89   for (
int32 i = 0; 
i < num_indexes; 
i++) {
    91     std::vector<Index>::const_iterator iter = io.
indexes.begin(),
    95     if (io.
name == 
"input" || io.
name == 
"output") {
    96       int32 min_t = iter->t, max_t = iter->t;
    97       for (; iter != end; ++iter) {
    98         int32 this_t = iter->t;
    99         min_t = std::min(min_t, this_t);
   100         max_t = std::max(max_t, this_t);
   102           KALDI_WARN << 
"Example does not contain just a single example; "   103                      << 
"too late to do frame selection or reduce context.";
   107       if (io.
name == 
"input") {
   109         *min_input_t = min_t;
   110         *max_input_t = max_t;
   114         *min_output_t = min_t;
   115         *max_output_t = max_t;
   118       for (; iter != end; ++iter) {
   120           KALDI_WARN << 
"Example does not contain just a single example; "   121                      << 
"too late to do frame selection or reduce context.";
   128     KALDI_WARN << 
"Example does not have any input named 'input'";
   132     KALDI_WARN << 
"Example does not have any output named 'output'";
   152   eg_out->
io.resize(eg.
io.size());
   153   for (
size_t i = 0; 
i < eg.
io.size(); 
i++) {
   154     bool is_input_or_output;
   158     const std::string &name = io_in.
name;
   160     if (name == 
"input") {
   163       is_input_or_output = 
true;
   164     } 
else if (name == 
"output") {
   165       min_t = min_output_t;
   166       max_t = max_output_t;
   167       is_input_or_output = 
true;
   169       is_input_or_output = 
false;
   171     if (!is_input_or_output) {  
   175       const std::vector<Index> &indexes_in = io_in.
indexes;
   176       std::vector<Index> &indexes_out = io_out.
indexes;
   177       indexes_out.reserve(indexes_in.size());
   178       int32 num_indexes = indexes_in.size(), num_kept = 0;
   180       std::vector<bool> keep(num_indexes, 
false);
   181       std::vector<Index>::const_iterator iter_in = indexes_in.begin(),
   182                                           end_in = indexes_in.end();
   183       std::vector<bool>::iterator iter_out = keep.begin();
   184       for (; iter_in != end_in; ++iter_in,++iter_out) {
   185         int32 t = iter_in->t;
   186         bool is_within_range = (t >= min_t && t <= max_t);
   187         *iter_out = is_within_range;
   188         if (is_within_range) {
   189           indexes_out.push_back(*iter_in);
   195         KALDI_ERR << 
"FilterExample removed all indexes for '" << name << 
"'";
   200                    indexes_out.size() == 
static_cast<size_t>(num_kept));
   223                        std::string frame_str,
   228   static bool warned_left = 
false, warned_right = 
false;
   229   int32 min_input_t, max_input_t,
   230       min_output_t, max_output_t;
   232                              &min_output_t, &max_output_t))
   233     KALDI_ERR << 
"Too late to perform frame selection/context reduction on "   234               << 
"these examples (already merged?)";
   235   if (frame_str != 
"") {
   237     if (frame_str == 
"random") {
   238       min_output_t = max_output_t = 
RandInt(min_output_t,
   243         KALDI_ERR << 
"Invalid option --frame='" << frame_str << 
"'";
   244       if (frame < min_output_t || frame > max_output_t) {
   249       min_output_t = max_output_t = frame;
   252   if (left_context != -1) {
   253     if (!warned_left && min_input_t > min_output_t - left_context) {
   255       KALDI_WARN << 
"You requested --left-context=" << left_context
   256                  << 
", but example only has left-context of "   257                  <<  (min_output_t - min_input_t)
   258                  << 
" (will warn only once; this may be harmless if "   259           "using any --*left-context-initial options)";
   261     min_input_t = std::max(min_input_t, min_output_t - left_context);
   263   if (right_context != -1) {
   264     if (!warned_right && max_input_t < max_output_t + right_context) {
   266       KALDI_WARN << 
"You requested --right-context=" << right_context
   267                 << 
", but example only has right-context of "   268                 <<  (max_input_t - max_output_t)
   269                  << 
" (will warn only once; this may be harmless if "   270             "using any --*right-context-final options.";
   272     max_input_t = std::min(max_input_t, max_output_t + right_context);
   275                 min_input_t, max_input_t,
   276                 min_output_t, max_output_t,
   278   if (frame_shift != 0) {
   279     std::vector<std::string> exclude_names;  
   280     exclude_names.push_back(std::string(
"ivector")); 
   290 int main(
int argc, 
char *argv[]) {
   292     using namespace kaldi;
   295     typedef kaldi::int64 int64;
   298         "Copy examples (single frames or fixed-size groups of frames) for neural\n"   299         "network training, possibly changing the binary mode.  Supports multiple wspecifiers, in\n"   300         "which case it will write the examples round-robin to the outputs.\n"   302         "Usage:  nnet3-copy-egs [options] <egs-rspecifier> <egs-wspecifier1> [<egs-wspecifier2> ...]\n"   305         "nnet3-copy-egs ark:train.egs ark,t:text.egs\n"   307         "nnet3-copy-egs ark:train.egs ark:1.egs ark:2.egs\n"   308         "See also: nnet3-subset-egs, nnet3-get-egs, nnet3-merge-egs, nnet3-shuffle-egs\n";
   311     int32 srand_seed = 0;
   312     int32 frame_shift = 0;
   318     int32 left_context = -1, right_context = -1;
   322     std::string frame_str,
   323       eg_weight_rspecifier, eg_output_name_rspecifier;
   326     po.
Register(
"random", &random, 
"If true, will write frames to output "   327                 "archives randomly, not round-robin.");
   328     po.
Register(
"frame-shift", &frame_shift, 
"Allows you to shift time values "   329                 "in the supervision data (excluding iVector data).  Only really "   330                 "useful in clockwork topologies (i.e. any topology for which "   331                 "modulus != 1).  Shifting is done after any frame selection.");
   332     po.
Register(
"keep-proportion", &keep_proportion, 
"If <1.0, this program will "   333                 "randomly keep this proportion of the input samples.  If >1.0, it will "   334                 "in expectation copy a sample this many times.  It will copy it a number "   335                 "of times equal to floor(keep-proportion) or ceil(keep-proportion).");
   336     po.
Register(
"srand", &srand_seed, 
"Seed for random number generator "   337                 "(only relevant if --random=true or --keep-proportion != 1.0)");
   338     po.
Register(
"frame", &frame_str, 
"This option can be used to select a single "   339                 "frame from each multi-frame example.  Set to a number 0, 1, etc. "   340                 "to select a frame with a given index, or 'random' to select a "   342     po.
Register(
"left-context", &left_context, 
"Can be used to truncate the "   343                 "feature left-context that we output.");
   344     po.
Register(
"right-context", &right_context, 
"Can be used to truncate the "   345                 "feature right-context that we output.");
   346     po.
Register(
"weights", &eg_weight_rspecifier,
   347                 "Rspecifier indexed by the key of egs, providing a weight by "   348                 "which we will scale the supervision matrix for that eg. "   349                 "Used in multilingual training.");
   350     po.
Register(
"outputs", &eg_output_name_rspecifier,
   351                 "Rspecifier indexed by the key of egs, providing a string-valued "   352                 "output name, e.g. 'output-0'.  If provided, the NnetIo with "   353                 "name 'output' will be renamed to the provided name. Used in "   354                 "multilingual training.");
   364     std::string examples_rspecifier = po.
GetArg(1);
   373     int32 num_outputs = po.
NumArgs() - 1;
   374     std::vector<NnetExampleWriter*> example_writers(num_outputs);
   375     for (int32 
i = 0; 
i < num_outputs; 
i++)
   379     int64 num_read = 0, num_written = 0, num_err = 0;
   380     for (; !example_reader.
Done(); example_reader.
Next(), num_read++) {
   381       const std::string &key = example_reader.
Key();
   386       if (!eg_weight_rspecifier.empty()) {
   388         if (!egs_weight_reader.
HasKey(key)) {
   389           KALDI_WARN << 
"No weight for example key " << key;
   393         weight = egs_weight_reader.
Value(key);
   397       std::string new_output_name;
   398       if (!eg_output_name_rspecifier.empty()) {
   399         if (!output_name_reader.
HasKey(key)) {
   400           KALDI_WARN << 
"No new output-name for example key " << key;
   404         new_output_name = output_name_reader.
Value(key);
   406       for (int32 c = 0; c < 
count; c++) {
   407         int32 index = (random ? 
Rand() : num_written) % num_outputs;
   408         if (frame_str == 
"" && left_context == -1 && right_context == -1 &&
   410           if (!new_output_name.empty() && c == 0)
   412           example_writers[index]->Write(key, eg);
   417                                 frame_shift, &eg_modified)) {
   418             if (!new_output_name.empty())
   422             example_writers[index]->Write(key, eg_modified);
   429     for (int32 
i = 0; 
i < num_outputs; 
i++)
   430       delete example_writers[
i];
   431     KALDI_LOG << 
"Read " << num_read << 
" neural-network training examples, wrote "   432               << num_written << 
", "   433               << num_err <<  
" examples had errors.";
   434     return (num_written == 0 ? 1 : 0);
   435   } 
catch(
const std::exception &e) {
   436     std::cerr << e.what() << 
'\n';
 NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
 
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor]. 
 
bool WithProb(BaseFloat prob, struct RandomState *state)
 
GeneralMatrix features
The features or labels. 
 
void ShiftExampleTimes(int32 t_offset, const std::vector< std::string > &exclude_names, NnetExample *eg)
Shifts the time-index t of everything in the "eg" by adding "t_offset" to all "t" values...
 
std::vector< Index > indexes
"indexes" is a vector the same length as features.NumRows(), explaining the meaning of each row of th...
 
bool ContainsSingleExample(const NnetExample &eg, int32 *min_input_t, int32 *max_input_t, int32 *min_output_t, int32 *max_output_t)
Returns true if the "eg" contains just a single example, meaning that all the "n" values in the index...
 
void Register(const std::string &name, bool *ptr, const std::string &doc)
 
Allows random access to a collection of objects in an archive or script file; see The Table concept...
 
void FilterGeneralMatrixRows(const GeneralMatrix &in, const std::vector< bool > &keep_rows, GeneralMatrix *out)
Outputs a GeneralMatrix containing only the rows r of "in" such that keep_rows[r] == true...
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
int main(int argc, char *argv[])
 
const T & Value(const std::string &key)
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
void ScaleSupervisionWeight(BaseFloat weight, NnetExample *eg)
 
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables. 
 
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility. 
 
void RenameOutputs(const std::string &new_name, NnetExample *eg)
 
bool HasKey(const std::string &key)
 
int32 GetCount(double expected_count)
 
int Rand(struct RandomState *state)
 
int NumArgs() const
Number of positional parameters (c.f. argc-1). 
 
#define KALDI_ASSERT(cond)
 
MatrixIndexT NumRows() const
 
TableWriter< KaldiObjectHolder< NnetExample > > NnetExampleWriter
 
std::string name
the name of the input in the neural net; in simple setups it will just be "input". 
 
bool SelectFromExample(const NnetExample &eg, std::string frame_str, int32 left_context, int32 right_context, int32 frame_shift, NnetExample *eg_out)
This function is responsible for possibly selecting one frame from multiple supervised frames...
 
void FilterExample(const NnetExample &eg, int32 min_input_t, int32 max_input_t, int32 min_output_t, int32 max_output_t, NnetExample *eg_out)
This function filters the indexes (and associated feature rows) in a NnetExample, removing any index/...
 
std::vector< NnetIo > io
"io" contains the input and output. 
 
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)