27 using namespace kaldi;
30 typedef kaldi::int64 int64;
33 "Copy examples for discriminative neural network training,\n" 34 "and combine successive examples if their combined length will\n" 35 "be less than --max-length. This can help to improve efficiency\n" 36 "(--max-length corresponds to minibatch size)\n" 38 "Usage: nnet-combine-egs-discriminative [options] <egs-rspecifier> <egs-wspecifier>\n" 41 "nnet-combine-egs-discriminative --max-length=512 ark:temp.1.degs ark:1.degs\n";
43 int32 max_length = 512;
44 int32 hard_max_length = 2048;
45 int32 batch_size = 250;
47 po.Register(
"max-length", &max_length,
"Maximum length of example that we " 48 "will create when combining");
49 po.Register(
"batch-size", &batch_size,
"Size of batch used when combinging " 51 po.Register(
"hard-max-length", &hard_max_length,
"Length of example beyond " 52 "which we will discard (very long examples may cause out of " 57 if (po.NumArgs() != 2) {
65 std::string examples_rspecifier = po.GetArg(1),
66 examples_wspecifier = po.GetArg(2);
73 int64 num_read = 0, num_written = 0, num_discarded = 0;
75 while (!example_reader.Done()) {
76 std::vector<DiscriminativeNnetExample> buffer;
77 size_t size = batch_size;
80 for (; !example_reader.Done() && buffer.size() < size;
81 example_reader.Next()) {
82 buffer.push_back(example_reader.Value());
86 std::vector<DiscriminativeNnetExample> combined;
89 for (
size_t i = 0;
i < combined.size();
i++) {
92 if (num_frames > hard_max_length) {
93 KALDI_WARN <<
"Discarding segment of length " << num_frames
94 <<
" because it exceeds --hard-max-length=" 98 std::ostringstream ostr;
99 ostr << (num_written++);
100 example_writer.Write(ostr.str(), eg);
105 KALDI_LOG <<
"Read " << num_read <<
" discriminative neural-network training" 106 <<
" examples, wrote " << num_written <<
", discarded " 108 return (num_written == 0 ? 1 : 0);
109 }
catch(
const std::exception &e) {
110 std::cerr << e.what() <<
'\n';
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
A templated class for writing objects to an archive or script file; see The Table concept...
void CombineDiscriminativeExamples(int32 max_length, const std::vector< DiscriminativeNnetExample > &input, std::vector< DiscriminativeNnetExample > *output)
This function is used to combine multiple discriminative-training examples (each corresponding to a s...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Matrix< BaseFloat > input_frames
The input data– typically with a number of frames [NumRows()] larger than labels.size(), because it includes features to the left and right as needed for the temporal context of the network.
This struct is used to store the information we need for discriminative training (MMI or MPE)...
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).