20 #ifndef KALDI_NNET3_NNET_EXAMPLE_UTILS_H_ 21 #define KALDI_NNET3_NNET_EXAMPLE_UTILS_H_ 50 const std::vector<std::string> &exclude_names,
61 const NnetExample &eg,
62 bool need_model_derivative,
63 bool store_component_stats,
64 ComputationRequest *computation_request);
71 const VectorBase<BaseFloat> &vec);
76 Vector<BaseFloat> *vec);
101 left_context(0), right_context(0),
102 left_context_initial(-1), right_context_final(-1),
103 num_frames_overlap(0), frame_subsampling_factor(1),
104 num_frames_str(
"1") { }
111 po->
Register(
"left-context", &left_context,
"Number of frames of left " 112 "context of input features that are added to each " 114 po->
Register(
"right-context", &right_context,
"Number of frames of right " 115 "context of input features that are added to each " 117 po->
Register(
"left-context-initial", &left_context_initial,
"Number of " 118 "frames of left context of input features that are added to " 119 "each example at the start of the utterance (if <0, this " 120 "defaults to the same as --left-context)");
121 po->
Register(
"right-context-final", &right_context_final,
"Number of " 122 "frames of right context of input features that are added " 123 "to each example at the end of the utterance (if <0, this " 124 "defaults to the same as --right-context)");
125 po->
Register(
"num-frames", &num_frames_str,
"Number of frames with labels " 126 "that each example contains (i.e. the left and right context " 127 "are to be added to this). May just be an integer (e.g. " 128 "--num-frames=8), or a principal value followed by " 129 "alternative values to be used at most once for each utterance " 130 "to deal with odd-sized input, e.g. --num-frames=40,25,50 means " 131 "that most of the time the number of frames will be 40, but to " 132 "deal with odd-sized inputs we may also generate egs with these " 133 "other sizes. All these values will be rounded up to the " 134 "closest multiple of --frame-subsampling-factor. As a special case, " 135 "--num-frames=-1 means 'don't do any splitting'.");
136 po->
Register(
"num-frames-overlap", &num_frames_overlap,
"Number of frames of " 137 "overlap between adjacent eamples (applies to chunks of size " 138 "equal to the primary [first-listed] --num-frames value... " 139 "will be adjusted for different-sized chunks). Advisory; " 140 "will not be exactly enforced.");
141 po->
Register(
"frame-subsampling-factor", &frame_subsampling_factor,
"Used " 142 "if the frame-rate of the output labels in the generated " 143 "examples will be less than the frame-rate at the input");
181 void GetChunksForUtterance(
int32 utterance_length,
182 std::vector<ChunkTimeInfo> *chunk_info);
191 bool LengthsMatch(
const std::string &utt,
192 int32 utterance_length,
193 int32 supervision_length,
194 int32 length_tolerance = 0)
const;
203 void InitSplitForLength();
209 float DefaultDurationOfSplit(
const std::vector<int32> &split)
const;
217 int32 MaxUtteranceLength()
const;
222 void InitSplits(std::vector<std::vector<int32> > *splits)
const;
232 void GetChunkSizesForUtterance(
int32 utterance_length,
233 std::vector<int32> *chunk_sizes)
const;
250 void GetGapSizes(
int32 utterance_length,
251 bool enforce_subsampling_factor,
252 const std::vector<int32> &chunk_sizes,
253 std::vector<int32> *gap_sizes)
const;
260 static void DistributeRandomlyUniform(
int32 n,
261 std::vector<int32> *vec);
268 static void DistributeRandomly(
int32 n,
269 const std::vector<int32> &magnitudes,
270 std::vector<int32> *vec);
274 void SetOutputWeights(
int32 utterance_length,
275 std::vector<ChunkTimeInfo> *chunk_info)
const;
279 const std::vector<ChunkTimeInfo> &chunk_info);
331 measure_output_frames(
"deprecated"),
332 minibatch_size(default_minibatch_size),
333 discard_partial_minibatches(
"deprecated") { }
336 po->
Register(
"compress", &compress,
"If true, compress the output examples " 337 "(not recommended unless you are writing to disk)");
338 po->
Register(
"measure-output-frames", &measure_output_frames,
"This " 339 "value will be ignored (included for back-compatibility)");
340 po->
Register(
"discard-partial-minibatches", &discard_partial_minibatches,
341 "This value will be ignored (included for back-compatibility)");
342 po->
Register(
"minibatch-size", &minibatch_size,
343 "String controlling the minibatch size. May be just an integer, " 344 "meaning a fixed minibatch size (e.g. --minibatch-size=128). " 345 "May be a list of ranges and values, e.g. --minibatch-size=32,64 " 346 "or --minibatch-size=16:32,64,128. All minibatches will be of " 347 "the largest size until the end of the input is reached; " 348 "then, increasingly smaller sizes will be allowed. Only egs " 349 "with the same structure (e.g num-frames) are merged. You may " 350 "specify different minibatch sizes for different sizes of eg " 351 "(defined as the maximum number of Indexes on any input), in " 353 "--minibatch-size='eg_size1=mb_sizes1/eg_size2=mb_sizes2', e.g. " 354 "--minibatch-size=128=64:128,256/256=32:64,128. Egs are given " 355 "minibatch-sizes based on the specified eg-size closest to " 356 "their actual size.");
382 int32 num_available_egs,
383 bool input_ended)
const;
395 std::vector<std::pair<int32, int32> >
ranges;
401 int32 LargestValueInRange(
int32 max_value)
const;
403 static bool ParseIntSet(
const std::string &str,
IntSet *int_set);
410 std::vector<std::pair<int32, IntSet> >
rules;
437 void WroteExample(
int32 example_size,
size_t structure_hash,
438 int32 minibatch_size);
443 void DiscardedExamples(
int32 example_size,
size_t structure_hash,
444 int32 num_discarded);
448 void PrintStats()
const;
470 void PrintAggregateStats()
const;
471 void PrintSpecificStats()
const;
504 void WriteMinibatch(
const std::vector<NnetExample> &egs);
513 typedef unordered_map<NnetExample*, std::vector<NnetExample*>,
522 #endif // KALDI_NNET3_NNET_EXAMPLE_UTILS_H_ NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
This class is responsible for storing, and displaying in log messages, statistics about how examples ...
int64 total_frames_in_chunks_
int32 total_num_utterances_
int32 frame_subsampling_factor
unordered_map< std::pair< int32, size_t >, StatsForExampleSize, PairHasher< int32, size_t > > StatsType
std::vector< std::pair< int32, IntSet > > rules
std::string num_frames_str
A templated class for writing objects to an archive or script file; see The Table concept...
std::vector< std::vector< std::vector< int32 > > > splits_for_length_
void ShiftExampleTimes(int32 t_offset, const std::vector< std::string > &exclude_names, NnetExample *eg)
Shifts the time-index t of everything in the "eg" by adding "t_offset" to all "t" values...
std::string measure_output_frames
const ExampleMergingConfig & config_
This class is responsible for arranging examples in groups that have the same strucure (i...
int32 left_context_initial
NnetExampleWriter * writer_
int64 total_input_frames_
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
The two main classes defined in this header are struct ComputationRequest, which basically defines a ...
std::string discard_partial_minibatches
std::vector< int32 > num_frames
int64 total_frames_overlap_
unordered_map< int32, int32 > minibatch_to_num_written
This hashing object hashes just the structural aspects of the NnetExample without looking at the valu...
void WriteVectorAsChar(std::ostream &os, bool binary, const VectorBase< BaseFloat > &vec)
std::string minibatch_size
void Register(OptionsItf *po)
int32 GetNnetExampleSize(const NnetExample &a)
This function returns the 'size' of a nnet-example as defined for purposes of merging egs...
ExampleMergingConfig(const char *default_minibatch_size="256")
int32 right_context_final
const ExampleGenerationConfig & config_
unordered_map< NnetExample *, std::vector< NnetExample * >, NnetExampleStructureHasher, NnetExampleStructureCompare > MapType
This comparator object compares just the structural aspects of the NnetExample without looking at the...
const ExampleGenerationConfig & Config() const
void Register(OptionsItf *po)
std::map< int32, int32 > chunk_size_to_count_
ExampleGenerationConfig()
ExampleMergingStats stats_
void ReadVectorAsChar(std::istream &is, bool binary, Vector< BaseFloat > *vec)
void AccStatsForUtterance(const TransitionModel &trans_model, const AmDiagGmm &am_gmm, const GaussPost &gpost, const Matrix< BaseFloat > &feats, FmllrRawAccs *accs)
struct ChunkTimeInfo is used by class UtteranceSplitter to output information about how we split an u...
std::vector< std::pair< int32, int32 > > ranges
std::vector< BaseFloat > output_weights
void ComputeDerived()
This function decodes 'num_frames_str' into 'num_frames', and ensures that the members of 'num_frames...
A hashing function-object for pairs of ints.
void GetComputationRequest(const Nnet &nnet, const NnetExample &eg, bool need_model_derivative, bool store_component_stats, ComputationRequest *request)
This function takes a NnetExample (which should already have been frame-selected, if desired...
void MergeExamples(const std::vector< NnetExample > &src, bool compress, NnetExample *merged_eg)
Merge a set of input examples into a single example (typically the size of "src" will be the minibatc...