20 #ifndef KALDI_NNET3_NNET_EXAMPLE_UTILS_H_    21 #define KALDI_NNET3_NNET_EXAMPLE_UTILS_H_    50                        const std::vector<std::string> &exclude_names,
    61                            const NnetExample &eg,
    62                            bool need_model_derivative,
    63                            bool store_component_stats,
    64                            ComputationRequest *computation_request);
    71                        const VectorBase<BaseFloat> &vec);
    76                       Vector<BaseFloat> *vec);
   101       left_context(0), right_context(0),
   102       left_context_initial(-1), right_context_final(-1),
   103       num_frames_overlap(0), frame_subsampling_factor(1),
   104       num_frames_str(
"1") { }
   111     po->
Register(
"left-context", &left_context, 
"Number of frames of left "   112                  "context of input features that are added to each "   114     po->
Register(
"right-context", &right_context, 
"Number of frames of right "   115                  "context of input features that are added to each "   117     po->
Register(
"left-context-initial", &left_context_initial, 
"Number of "   118                  "frames of left context of input features that are added to "   119                  "each example at the start of the utterance (if <0, this "   120                  "defaults to the same as --left-context)");
   121     po->
Register(
"right-context-final", &right_context_final, 
"Number of "   122                  "frames of right context of input features that are added "   123                  "to each example at the end of the utterance (if <0, this "   124                  "defaults to the same as --right-context)");
   125     po->
Register(
"num-frames", &num_frames_str, 
"Number of frames with labels "   126                 "that each example contains (i.e. the left and right context "   127                 "are to be added to this).  May just be an integer (e.g. "   128                 "--num-frames=8), or a principal value followed by "   129                 "alternative values to be used at most once for each utterance "   130                 "to deal with odd-sized input, e.g. --num-frames=40,25,50 means "   131                 "that most of the time the number of frames will be 40, but to "   132                 "deal with odd-sized inputs we may also generate egs with these "   133                 "other sizes.  All these values will be rounded up to the "   134                 "closest multiple of --frame-subsampling-factor.  As a special case, "   135                 "--num-frames=-1 means 'don't do any splitting'.");
   136     po->
Register(
"num-frames-overlap", &num_frames_overlap, 
"Number of frames of "   137                  "overlap between adjacent eamples (applies to chunks of size "   138                  "equal to the primary [first-listed] --num-frames value... "   139                  "will be adjusted for different-sized chunks).  Advisory; "   140                  "will not be exactly enforced.");
   141     po->
Register(
"frame-subsampling-factor", &frame_subsampling_factor, 
"Used "   142                  "if the frame-rate of the output labels in the generated "   143                  "examples will be less than the frame-rate at the input");
   181   void GetChunksForUtterance(
int32 utterance_length,
   182                              std::vector<ChunkTimeInfo> *chunk_info);
   191   bool LengthsMatch(
const std::string &utt,
   192                     int32 utterance_length,
   193                     int32 supervision_length,
   194                     int32 length_tolerance = 0) 
const;
   203   void InitSplitForLength();
   209   float DefaultDurationOfSplit(
const std::vector<int32> &split) 
const;
   217   int32 MaxUtteranceLength() 
const;
   222   void InitSplits(std::vector<std::vector<int32> > *splits) 
const;
   232   void GetChunkSizesForUtterance(
int32 utterance_length,
   233                                  std::vector<int32> *chunk_sizes) 
const;
   250   void GetGapSizes(
int32 utterance_length,
   251                    bool enforce_subsampling_factor,
   252                    const std::vector<int32> &chunk_sizes,
   253                    std::vector<int32> *gap_sizes) 
const;
   260   static void DistributeRandomlyUniform(
int32 n,
   261                                         std::vector<int32> *vec);
   268   static void DistributeRandomly(
int32 n,
   269                                  const std::vector<int32> &magnitudes,
   270                                  std::vector<int32> *vec);
   274   void SetOutputWeights(
int32 utterance_length,
   275                         std::vector<ChunkTimeInfo> *chunk_info) 
const;
   279                             const std::vector<ChunkTimeInfo> &chunk_info);
   331       measure_output_frames(
"deprecated"),
   332       minibatch_size(default_minibatch_size),
   333       discard_partial_minibatches(
"deprecated") { }
   336     po->
Register(
"compress", &compress, 
"If true, compress the output examples "   337                  "(not recommended unless you are writing to disk)");
   338     po->
Register(
"measure-output-frames", &measure_output_frames, 
"This "   339                  "value will be ignored (included for back-compatibility)");
   340     po->
Register(
"discard-partial-minibatches", &discard_partial_minibatches,
   341                  "This value will be ignored (included for back-compatibility)");
   342     po->
Register(
"minibatch-size", &minibatch_size,
   343                  "String controlling the minibatch size.  May be just an integer, "   344                  "meaning a fixed minibatch size (e.g. --minibatch-size=128). "   345                  "May be a list of ranges and values, e.g. --minibatch-size=32,64 "   346                  "or --minibatch-size=16:32,64,128.  All minibatches will be of "   347                  "the largest size until the end of the input is reached; "   348                  "then, increasingly smaller sizes will be allowed.  Only egs "   349                  "with the same structure (e.g num-frames) are merged.  You may "   350                  "specify different minibatch sizes for different sizes of eg "   351                  "(defined as the maximum number of Indexes on any input), in "   353                  "--minibatch-size='eg_size1=mb_sizes1/eg_size2=mb_sizes2', e.g. "   354                  "--minibatch-size=128=64:128,256/256=32:64,128.  Egs are given "   355                  "minibatch-sizes based on the specified eg-size closest to "   356                  "their actual size.");
   382                       int32 num_available_egs,
   383                       bool input_ended) 
const;
   395     std::vector<std::pair<int32, int32> > 
ranges;
   401     int32 LargestValueInRange(
int32 max_value) 
const;
   403   static bool ParseIntSet(
const std::string &str, 
IntSet *int_set);
   410   std::vector<std::pair<int32, IntSet> > 
rules;
   437   void WroteExample(
int32 example_size, 
size_t structure_hash,
   438                     int32 minibatch_size);
   443   void DiscardedExamples(
int32 example_size, 
size_t structure_hash,
   444                          int32 num_discarded);
   448   void PrintStats() 
const;
   470   void PrintAggregateStats() 
const;
   471   void PrintSpecificStats() 
const;
   504   void WriteMinibatch(
const std::vector<NnetExample> &egs);
   513   typedef unordered_map<NnetExample*, std::vector<NnetExample*>,
   522 #endif // KALDI_NNET3_NNET_EXAMPLE_UTILS_H_ NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
This class is responsible for storing, and displaying in log messages, statistics about how examples ...
 
int64 total_frames_in_chunks_
 
int32 total_num_utterances_
 
int32 frame_subsampling_factor
 
unordered_map< std::pair< int32, size_t >, StatsForExampleSize, PairHasher< int32, size_t > > StatsType
 
std::vector< std::pair< int32, IntSet > > rules
 
std::string num_frames_str
 
A templated class for writing objects to an archive or script file; see The Table concept...
 
std::vector< std::vector< std::vector< int32 > > > splits_for_length_
 
void ShiftExampleTimes(int32 t_offset, const std::vector< std::string > &exclude_names, NnetExample *eg)
Shifts the time-index t of everything in the "eg" by adding "t_offset" to all "t" values...
 
std::string measure_output_frames
 
const ExampleMergingConfig & config_
 
This class is responsible for arranging examples in groups that have the same strucure (i...
 
int32 left_context_initial
 
NnetExampleWriter * writer_
 
int64 total_input_frames_
 
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
 
The two main classes defined in this header are struct ComputationRequest, which basically defines a ...
 
std::string discard_partial_minibatches
 
std::vector< int32 > num_frames
 
int64 total_frames_overlap_
 
unordered_map< int32, int32 > minibatch_to_num_written
 
This hashing object hashes just the structural aspects of the NnetExample without looking at the valu...
 
void WriteVectorAsChar(std::ostream &os, bool binary, const VectorBase< BaseFloat > &vec)
 
std::string minibatch_size
 
void Register(OptionsItf *po)
 
int32 GetNnetExampleSize(const NnetExample &a)
This function returns the 'size' of a nnet-example as defined for purposes of merging egs...
 
ExampleMergingConfig(const char *default_minibatch_size="256")
 
int32 right_context_final
 
const ExampleGenerationConfig & config_
 
unordered_map< NnetExample *, std::vector< NnetExample * >, NnetExampleStructureHasher, NnetExampleStructureCompare > MapType
 
This comparator object compares just the structural aspects of the NnetExample without looking at the...
 
const ExampleGenerationConfig & Config() const
 
void Register(OptionsItf *po)
 
std::map< int32, int32 > chunk_size_to_count_
 
ExampleGenerationConfig()
 
ExampleMergingStats stats_
 
void ReadVectorAsChar(std::istream &is, bool binary, Vector< BaseFloat > *vec)
 
void AccStatsForUtterance(const TransitionModel &trans_model, const AmDiagGmm &am_gmm, const GaussPost &gpost, const Matrix< BaseFloat > &feats, FmllrRawAccs *accs)
 
struct ChunkTimeInfo is used by class UtteranceSplitter to output information about how we split an u...
 
std::vector< std::pair< int32, int32 > > ranges
 
std::vector< BaseFloat > output_weights
 
void ComputeDerived()
This function decodes 'num_frames_str' into 'num_frames', and ensures that the members of 'num_frames...
 
A hashing function-object for pairs of ints. 
 
void GetComputationRequest(const Nnet &nnet, const NnetExample &eg, bool need_model_derivative, bool store_component_stats, ComputationRequest *request)
This function takes a NnetExample (which should already have been frame-selected, if desired...
 
void MergeExamples(const std::vector< NnetExample > &src, bool compress, NnetExample *merged_eg)
Merge a set of input examples into a single example (typically the size of "src" will be the minibatc...