42 unordered_map<std::string, std::vector<ChunkInfo *> > *utt_to_chunks) {
43 Input range_input(range_rxfilename);
44 if (!range_rxfilename.empty()) {
46 while (std::getline(range_input.
Stream(), line)) {
48 std::vector<std::string> fields;
50 if (fields.size() != 6)
51 KALDI_ERR <<
"Expected 6 fields in line of range file, got " 52 << fields.size() <<
" instead.";
54 std::string utt = fields[0],
55 start_frame_str = fields[3],
56 num_frames_str = fields[4],
57 label_str = fields[5];
63 KALDI_ERR <<
"Expected integer for output archive in range file.";
65 chunk_info->
name = utt +
"-" + start_frame_str +
"-" + num_frames_str
67 unordered_map<std::string, std::vector<ChunkInfo*> >::iterator
68 got = utt_to_chunks->find(utt);
70 if (got == utt_to_chunks->end()) {
71 std::vector<ChunkInfo* > chunk_infos;
72 chunk_infos.push_back(chunk_info);
73 utt_to_chunks->insert(std::pair<std::string,
74 std::vector<ChunkInfo* > > (utt, chunk_infos));
76 got->second.push_back(chunk_info);
83 const std::vector<ChunkInfo *> &chunks,
const std::string &utt,
84 bool compress,
int32 num_pdfs,
int32 *num_egs_written,
85 std::vector<NnetExampleWriter *> *example_writers) {
86 for (std::vector<ChunkInfo *>::const_iterator it = chunks.begin();
87 it != chunks.end(); ++it) {
93 KALDI_WARN <<
"Unable to create examples for utterance " << utt
94 <<
". Requested chunk size of " 96 <<
" but utterance has only " << num_rows <<
" frames.";
106 for (std::vector<Index>::iterator indx_it = nnet_input.
indexes.begin();
107 indx_it != nnet_input.
indexes.end(); ++indx_it)
111 std::vector<std::pair<int32, BaseFloat> > post;
112 post.push_back(std::pair<int32, BaseFloat>(chunk->
label, 1.0));
113 label.push_back(post);
115 eg.
io.push_back(nnet_input);
121 KALDI_ERR <<
"Requested output index exceeds number of specified " 125 (*num_egs_written) += 1;
133 int main(
int argc,
char *argv[]) {
135 using namespace kaldi;
140 "Get examples for training an nnet3 neural network for the xvector\n" 141 "system. Each output example contains a chunk of features from some\n" 142 "utterance along with a speaker label. The location and length of\n" 143 "the feature chunks are specified in the 'ranges' file. Each line\n" 144 "is interpreted as follows:\n" 145 " <source-utterance> <relative-output-archive-index> " 146 "<absolute-archive-index> <start-frame-index> <num-frames> " 148 "where <relative-output-archive-index> is interpreted as a zero-based\n" 149 "index into the wspecifiers provided on the command line (<egs-0-out>\n" 150 "and so on), and <absolute-archive-index> is ignored by this program.\n" 152 " utt1 3 13 65 300 3\n" 153 " utt1 0 10 50 400 3\n" 156 "Usage: nnet3-xvector-get-egs [options] <ranges-filename> " 157 "<features-rspecifier> <egs-0-out> <egs-1-out> ... <egs-N-1-out>\n" 160 "nnet3-xvector-get-egs ranges.1 \"$feats\" ark:egs_temp.1.ark" 161 " ark:egs_temp.2.ark ark:egs_temp.3.ark\n";
163 bool compress =
true;
167 po.
Register(
"compress", &compress,
"If true, write egs in " 168 "compressed format.");
169 po.
Register(
"num-pdfs", &num_pdfs,
"Number of speakers in the training " 179 std::string range_rspecifier = po.
GetArg(1),
180 feature_rspecifier = po.
GetArg(2);
181 std::vector<NnetExampleWriter *> example_writers;
186 unordered_map<std::string, std::vector<ChunkInfo *> > utt_to_chunks;
194 for (; !feat_reader.
Done(); feat_reader.
Next()) {
195 std::string key = feat_reader.
Key();
197 unordered_map<std::string, std::vector<ChunkInfo*> >::iterator
198 got = utt_to_chunks.find(key);
199 if (got == utt_to_chunks.end()) {
200 KALDI_WARN <<
"Could not create examples from utterance " 201 << key <<
" because it has no entry in the ranges " 205 std::vector<ChunkInfo *> chunks = got->second;
207 &num_egs_written, &example_writers);
213 for (unordered_map<std::string, std::vector<ChunkInfo*> >::iterator
214 map_it = utt_to_chunks.begin();
215 map_it != utt_to_chunks.end(); ++map_it) {
220 KALDI_LOG <<
"Finished generating examples, " 221 <<
"successfully processed " << num_done
222 <<
" feature files, wrote " << num_egs_written <<
" examples; " 223 << num_err <<
" files had errors.";
224 return (num_egs_written == 0 || num_err > num_done ? 1 : 0);
225 }
catch(
const std::exception &e) {
226 std::cerr << e.what() <<
'\n';
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
int main(int argc, char *argv[])
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Base class which provides matrix operations not involving resizing or allocation. ...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
std::vector< Index > indexes
"indexes" is a vector the same length as features.NumRows(), explaining the meaning of each row of th...
void Register(const std::string &name, bool *ptr, const std::string &doc)
static void WriteExamples(const MatrixBase< BaseFloat > &feats, const std::vector< ChunkInfo *> &chunks, const std::string &utt, bool compress, int32 num_pdfs, int32 *num_egs_written, std::vector< NnetExampleWriter *> *example_writers)
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
void Compress()
Compresses any (input) features that are not sparse.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
std::vector< NnetIo > io
"io" contains the input and output.
Sub-matrix representation.
static void ProcessRangeFile(const std::string &range_rxfilename, unordered_map< std::string, std::vector< ChunkInfo *> > *utt_to_chunks)