35 while (expected_count > 1.0) {
47 int main(
int argc,
char *argv[]) {
49 using namespace kaldi;
52 typedef kaldi::int64 int64;
55 "Copy examples (typically single frames) for neural network training,\n" 56 "possibly changing the binary mode. Supports multiple wspecifiers, in\n" 57 "which case it will write the examples round-robin to the outputs.\n" 59 "Usage: nnet-copy-egs [options] <egs-rspecifier> <egs-wspecifier1> [<egs-wspecifier2> ...]\n" 62 "nnet-copy-egs ark:train.egs ark,t:text.egs\n" 64 "nnet-copy-egs ark:train.egs ark:1.egs ark:2.egs\n";
73 int32 left_context = -1, right_context = -1;
76 std::string frame_str;
79 po.
Register(
"random", &random,
"If true, will write frames to output " 80 "archives randomly, not round-robin.");
81 po.
Register(
"keep-proportion", &keep_proportion,
"If <1.0, this program will " 82 "randomly keep this proportion of the input samples. If >1.0, it will " 83 "in expectation copy a sample this many times. It will copy it a number " 84 "of times equal to floor(keep-proportion) or ceil(keep-proportion).");
85 po.
Register(
"srand", &srand_seed,
"Seed for random number generator " 86 "(only relevant if --random=true or --keep-proportion != 1.0)");
87 po.
Register(
"frame", &frame_str,
"This option can be used to select a single " 88 "frame from each multi-frame example. Set to a number 0, 1, etc. " 89 "to select a frame with a given index, or 'random' to select a " 91 po.
Register(
"left-context", &left_context,
"Can be used to truncate the " 92 "feature left-context that we output.");
93 po.
Register(
"right-context", &right_context,
"Can be used to truncate the " 94 "feature right-context that we output.");
103 if (frame_str !=
"") {
105 if (frame_str ==
"random") {
108 KALDI_ERR <<
"Invalid --frame option: '" << frame_str <<
"'";
116 bool copy_eg = (frame != -1 || left_context != -1 || right_context != -1);
117 int32 start_frame = -1, num_frames = -1;
128 std::string examples_rspecifier = po.
GetArg(1);
132 int32 num_outputs = po.
NumArgs() - 1;
133 std::vector<NnetExampleWriter*> example_writers(num_outputs);
134 for (int32
i = 0;
i < num_outputs;
i++)
138 int64 num_read = 0, num_written = 0;
139 for (; !example_reader.
Done(); example_reader.
Next(), num_read++) {
142 std::string key = example_reader.
Key();
144 for (int32 c = 0; c <
count; c++) {
145 int32 index = (random ?
Rand() : num_written) % num_outputs;
147 example_writers[index]->Write(key, eg);
152 if (start_frame == -1 || start_frame < eg.
labels.size()) {
157 left_context, right_context);
158 example_writers[index]->Write(key, eg_mod);
168 for (int32
i = 0;
i < num_outputs;
i++)
169 delete example_writers[
i];
170 KALDI_LOG <<
"Read " << num_read <<
" neural-network training examples, wrote " 172 return (num_written == 0 ? 1 : 0);
173 }
catch(
const std::exception &e) {
174 std::cerr << e.what() <<
'\n';
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
bool WithProb(BaseFloat prob, struct RandomState *state)
void Register(const std::string &name, bool *ptr, const std::string &doc)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
TableWriter< KaldiObjectHolder< NnetExample > > NnetExampleWriter
int Rand(struct RandomState *state)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
int main(int argc, char *argv[])
#define KALDI_ASSERT(cond)
std::vector< std::vector< std::pair< int32, BaseFloat > > > labels
The label(s) for each frame in a sequence of frames; in the normal case, this will be just [ [ (pdf-i...
int32 GetCount(double expected_count)
Note on how to parse this filename: it contains functions relatied to neural-net training examples...
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)