26 int main(
int argc,
char *argv[]) {
28 using namespace kaldi;
31 typedef kaldi::int64 int64;
34 "Copy nnet3 discriminative training examples from the input to output,\n" 35 "while randomly shuffling the order. This program will keep all of the examples\n" 36 "in memory at once, unless you use the --buffer-size option\n" 38 "Usage: nnet3-discriminative-shuffle-egs [options] <egs-rspecifier> <egs-wspecifier>\n" 40 "nnet3-discriminative-shuffle-egs --srand=1 ark:train.egs ark:shuffled.egs\n";
43 int32 buffer_size = 0;
45 po.
Register(
"srand", &srand_seed,
"Seed for random number generator ");
46 po.
Register(
"buffer-size", &buffer_size,
"If >0, size of a buffer we use " 47 "to do limited-memory partial randomization. Otherwise, do " 48 "full randomization.");
59 std::string examples_rspecifier = po.
GetArg(1),
60 examples_wspecifier = po.
GetArg(2);
64 std::vector<std::pair<std::string, NnetDiscriminativeExample*> > egs;
68 if (buffer_size == 0) {
72 for (; !example_reader.
Done(); example_reader.
Next())
73 egs.push_back(std::pair<std::string, NnetDiscriminativeExample*>(
77 std::random_shuffle(egs.begin(), egs.end());
80 egs.resize(buffer_size,
81 std::pair<std::string, NnetDiscriminativeExample*>(
"", NULL));
82 for (; !example_reader.
Done(); example_reader.
Next()) {
83 int32 index =
RandInt(0, buffer_size - 1);
84 if (egs[index].second == NULL) {
85 egs[index] = std::pair<std::string, NnetDiscriminativeExample*>(
89 example_writer.
Write(egs[index].first, *(egs[index].second));
90 egs[index].first = example_reader.
Key();
91 *(egs[index].second) = example_reader.
Value();
96 for (
size_t i = 0;
i < egs.size();
i++) {
97 if (egs[
i].second != NULL) {
98 example_writer.
Write(egs[
i].first, *(egs[
i].second));
104 KALDI_LOG <<
"Shuffled order of " << num_done
105 <<
" neural-network training examples " 106 << (buffer_size ?
"using a buffer (partial randomization)" :
"");
108 return (num_done == 0 ? 1 : 0);
109 }
catch(
const std::exception &e) {
110 std::cerr << e.what() <<
'\n';
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
int main(int argc, char *argv[])
NnetDiscriminativeExample is like NnetExample, but specialized for sequence training.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)