28 using namespace kaldi;
31 typedef kaldi::int64 int64;
34 "Creates a random subset of the input examples, of a specified size.\n" 35 "Uses no more memory than the size of the subset.\n" 37 "Usage: nnet-subset-egs [options] <egs-rspecifier> [<egs-wspecifier2> ...]\n" 40 "nnet-subset-egs [args] ark:- | nnet-subset-egs --n=1000 ark:- ark:subset.egs\n";
44 bool randomize_order =
true;
46 po.Register(
"srand", &srand_seed,
"Seed for random number generator ");
47 po.Register(
"n", &n,
"Number of examples to output");
48 po.Register(
"randomize-order", &randomize_order,
"If true, randomize the order " 55 if (po.NumArgs() != 2) {
60 std::string examples_rspecifier = po.GetArg(1),
61 examples_wspecifier = po.GetArg(2);
63 std::vector<std::pair<std::string, NnetExample> > egs;
69 for (; !example_reader.Done(); example_reader.Next()) {
72 egs.resize(egs.size() + 1);
73 egs.back().first = example_reader.Key();
74 egs.back().second = example_reader.Value();
79 egs[index].first = example_reader.Key();
80 egs[index].second = example_reader.Value();
85 std::random_shuffle(egs.begin(), egs.end());
88 for (
size_t i = 0;
i < egs.size();
i++) {
89 writer.Write(egs[
i].first, egs[
i].second);
92 KALDI_LOG <<
"Selected a subset of " << egs.size() <<
" out of " << num_read
93 <<
" neural-network training examples ";
95 return (num_read != 0 ? 0 : 1);
96 }
catch(
const std::exception &e) {
97 std::cerr << e.what() <<
'\n';
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool WithProb(BaseFloat prob, struct RandomState *state)
A templated class for writing objects to an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)