nnet-shuffle-egs.cc File Reference
Include dependency graph for nnet-shuffle-egs.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 26 of file nnet-shuffle-egs.cc.

References SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), rnnlm::i, KALDI_ASSERT, KALDI_LOG, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), ParseOptions::PrintUsage(), kaldi::RandInt(), ParseOptions::Read(), ParseOptions::Register(), SequentialTableReader< Holder >::Value(), and TableWriter< Holder >::Write().

26  {
27  try {
28  using namespace kaldi;
29  using namespace kaldi::nnet2;
30  typedef kaldi::int32 int32;
31  typedef kaldi::int64 int64;
32 
33  const char *usage =
34  "Copy examples (typically single frames) for neural network training,\n"
35  "from the input to output, but randomly shuffle the order. This program will keep\n"
36  "all of the examples in memory at once, unless you use the --buffer-size option\n"
37  "\n"
38  "Usage: nnet-shuffle-egs [options] <egs-rspecifier> <egs-wspecifier>\n"
39  "\n"
40  "nnet-shuffle-egs --srand=1 ark:train.egs ark:shuffled.egs\n";
41 
42  int32 srand_seed = 0;
43  int32 buffer_size = 0;
44  ParseOptions po(usage);
45  po.Register("srand", &srand_seed, "Seed for random number generator ");
46  po.Register("buffer-size", &buffer_size, "If >0, size of a buffer we use "
47  "to do limited-memory partial randomization. Otherwise, do "
48  "full randomization.");
49 
50  po.Read(argc, argv);
51 
52  srand(srand_seed);
53 
54  if (po.NumArgs() != 2) {
55  po.PrintUsage();
56  exit(1);
57  }
58 
59  std::string examples_rspecifier = po.GetArg(1),
60  examples_wspecifier = po.GetArg(2);
61 
62  int64 num_done = 0;
63 
64  std::vector<std::pair<std::string, NnetExample*> > egs;
65 
66  SequentialNnetExampleReader example_reader(examples_rspecifier);
67  NnetExampleWriter example_writer(examples_wspecifier);
68  if (buffer_size == 0) { // Do full randomization
69  // Putting in an extra level of indirection here to avoid excessive
70  // computation and memory demands when we have to resize the vector.
71 
72  for (; !example_reader.Done(); example_reader.Next())
73  egs.push_back(std::make_pair(example_reader.Key(),
74  new NnetExample(example_reader.Value())));
75 
76  std::random_shuffle(egs.begin(), egs.end());
77  } else {
78  KALDI_ASSERT(buffer_size > 0);
79  egs.resize(buffer_size,
80  std::pair<std::string, NnetExample*>("", static_cast<NnetExample *>(NULL)));
81  for (; !example_reader.Done(); example_reader.Next()) {
82  int32 index = RandInt(0, buffer_size - 1);
83  if (egs[index].second == NULL) {
84  egs[index] = std::make_pair(example_reader.Key(),
85  new NnetExample(example_reader.Value()));
86  } else {
87  example_writer.Write(egs[index].first, *(egs[index].second));
88  egs[index].first = example_reader.Key();
89  *(egs[index].second) = example_reader.Value();
90  num_done++;
91  }
92  }
93  }
94  for (size_t i = 0; i < egs.size(); i++) {
95  if (egs[i].second != NULL) {
96  example_writer.Write(egs[i].first, *(egs[i].second));
97  delete egs[i].second;
98  num_done++;
99  }
100  }
101 
102  KALDI_LOG << "Shuffled order of " << num_done
103  << " neural-network training examples "
104  << (buffer_size ? "using a buffer (partial randomization)" : "");
105 
106  return (num_done == 0 ? 1 : 0);
107  } catch(const std::exception &e) {
108  std::cerr << e.what() << '\n';
109  return -1;
110  }
111 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
Definition: nnet-example.h:36
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_LOG
Definition: kaldi-error.h:153
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95