25 int main(
int argc,
char *argv[]) {
27 using namespace kaldi;
30 typedef kaldi::int64 int64;
33 "Copy examples for discriminative neural network training,\n" 34 "and combine successive examples if their combined length will\n" 35 "be less than --max-length. This can help to improve efficiency\n" 36 "(--max-length corresponds to minibatch size)\n" 38 "Usage: nnet-combine-egs-discriminative [options] <egs-rspecifier> <egs-wspecifier>\n" 41 "nnet-combine-egs-discriminative --max-length=512 ark:temp.1.degs ark:1.degs\n";
43 int32 max_length = 512;
44 int32 hard_max_length = 2048;
45 int32 batch_size = 250;
47 po.
Register(
"max-length", &max_length,
"Maximum length of example that we " 48 "will create when combining");
49 po.
Register(
"batch-size", &batch_size,
"Size of batch used when combinging " 51 po.
Register(
"hard-max-length", &hard_max_length,
"Length of example beyond " 52 "which we will discard (very long examples may cause out of " 65 std::string examples_rspecifier = po.
GetArg(1),
66 examples_wspecifier = po.
GetArg(2);
73 int64 num_read = 0, num_written = 0, num_discarded = 0;
75 while (!example_reader.
Done()) {
76 std::vector<DiscriminativeNnetExample> buffer;
77 size_t size = batch_size;
80 for (; !example_reader.
Done() && buffer.size() < size;
81 example_reader.
Next()) {
82 buffer.push_back(example_reader.
Value());
86 std::vector<DiscriminativeNnetExample> combined;
89 for (
size_t i = 0;
i < combined.size();
i++) {
92 if (num_frames > hard_max_length) {
93 KALDI_WARN <<
"Discarding segment of length " << num_frames
94 <<
" because it exceeds --hard-max-length=" 98 std::ostringstream ostr;
99 ostr << (num_written++);
100 example_writer.
Write(ostr.str(), eg);
105 KALDI_LOG <<
"Read " << num_read <<
" discriminative neural-network training" 106 <<
" examples, wrote " << num_written <<
", discarded " 108 return (num_written == 0 ? 1 : 0);
109 }
catch(
const std::exception &e) {
110 std::cerr << e.what() <<
'\n';
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int main(int argc, char *argv[])
A templated class for writing objects to an archive or script file; see The Table concept...
void CombineDiscriminativeExamples(int32 max_length, const std::vector< DiscriminativeNnetExample > &input, std::vector< DiscriminativeNnetExample > *output)
This function is used to combine multiple discriminative-training examples (each corresponding to a s...
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
Matrix< BaseFloat > input_frames
The input data– typically with a number of frames [NumRows()] larger than labels.size(), because it includes features to the left and right as needed for the temporal context of the network.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
This struct is used to store the information we need for discriminative training (MMI or MPE)...
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Note on how to parse this filename: it contains functions relatied to neural-net training examples...