UtteranceSplitter Class Reference

#include <nnet-example-utils.h>

Collaboration diagram for UtteranceSplitter:

Public Member Functions

 UtteranceSplitter (const ExampleGenerationConfig &config)
 
const ExampleGenerationConfigConfig () const
 
void GetChunksForUtterance (int32 utterance_length, std::vector< ChunkTimeInfo > *chunk_info)
 
bool LengthsMatch (const std::string &utt, int32 utterance_length, int32 supervision_length, int32 length_tolerance=0) const
 
 ~UtteranceSplitter ()
 
int32 ExitStatus ()
 

Private Member Functions

void InitSplitForLength ()
 
float DefaultDurationOfSplit (const std::vector< int32 > &split) const
 
int32 MaxUtteranceLength () const
 
void InitSplits (std::vector< std::vector< int32 > > *splits) const
 
void GetChunkSizesForUtterance (int32 utterance_length, std::vector< int32 > *chunk_sizes) const
 
void GetGapSizes (int32 utterance_length, bool enforce_subsampling_factor, const std::vector< int32 > &chunk_sizes, std::vector< int32 > *gap_sizes) const
 
void SetOutputWeights (int32 utterance_length, std::vector< ChunkTimeInfo > *chunk_info) const
 
void AccStatsForUtterance (int32 utterance_length, const std::vector< ChunkTimeInfo > &chunk_info)
 

Static Private Member Functions

static void DistributeRandomlyUniform (int32 n, std::vector< int32 > *vec)
 
static void DistributeRandomly (int32 n, const std::vector< int32 > &magnitudes, std::vector< int32 > *vec)
 

Private Attributes

const ExampleGenerationConfigconfig_
 
std::vector< std::vector< std::vector< int32 > > > splits_for_length_
 
int32 total_num_utterances_
 
int64 total_input_frames_
 
int64 total_frames_overlap_
 
int64 total_num_chunks_
 
int64 total_frames_in_chunks_
 
std::map< int32, int32 > chunk_size_to_count_
 

Detailed Description

Definition at line 170 of file nnet-example-utils.h.

Constructor & Destructor Documentation

◆ UtteranceSplitter()

Definition at line 342 of file nnet-example-utils.cc.

References UtteranceSplitter::InitSplitForLength(), KALDI_ERR, ExampleGenerationConfig::num_frames, and ExampleGenerationConfig::num_frames_str.

342  :
343  config_(config),
347  if (config.num_frames_str != "-1") {
348  if (config.num_frames.empty()) {
349  KALDI_ERR << "You need to call ComputeDerived() on the "
350  "ExampleGenerationConfig().";
351  }
353  }
354 }
#define KALDI_ERR
Definition: kaldi-error.h:127
const ExampleGenerationConfig & config_

◆ ~UtteranceSplitter()

Definition at line 356 of file nnet-example-utils.cc.

References UtteranceSplitter::chunk_size_to_count_, KALDI_LOG, UtteranceSplitter::total_frames_in_chunks_, UtteranceSplitter::total_frames_overlap_, UtteranceSplitter::total_input_frames_, UtteranceSplitter::total_num_chunks_, and UtteranceSplitter::total_num_utterances_.

356  {
357  KALDI_LOG << "Split " << total_num_utterances_ << " utts, with "
358  << "total length " << total_input_frames_ << " frames ("
359  << (total_input_frames_ / 360000.0) << " hours assuming "
360  << "100 frames per second)";
361  float average_chunk_length = total_frames_in_chunks_ * 1.0 / total_num_chunks_,
362  overlap_percent = total_frames_overlap_ * 100.0 / total_input_frames_,
363  output_percent = total_frames_in_chunks_ * 100.0 / total_input_frames_,
364  output_percent_no_overlap = output_percent - overlap_percent;
365 
366  KALDI_LOG << "Average chunk length was " << average_chunk_length
367  << " frames; overlap between adjacent chunks was "
368  << overlap_percent << "% of input length; length of output was "
369  << output_percent << "% of input length (minus overlap = "
370  << output_percent_no_overlap << "%).";
371  if (chunk_size_to_count_.size() > 1) {
372  std::ostringstream os;
373  os << std::setprecision(4);
374  for (std::map<int32, int32>::iterator iter = chunk_size_to_count_.begin();
375  iter != chunk_size_to_count_.end(); ++iter) {
376  int32 chunk_size = iter->first,
377  num_frames = chunk_size * iter->second;
378  float percent_of_total = num_frames * 100.0 / total_frames_in_chunks_;
379  if (iter != chunk_size_to_count_.begin()) os << ", ";
380  os << chunk_size << " = " << percent_of_total << "%";
381  }
382  KALDI_LOG << "Output frames are distributed among chunk-sizes as follows: "
383  << os.str();
384  }
385 
386 }
std::map< int32, int32 > chunk_size_to_count_
#define KALDI_LOG
Definition: kaldi-error.h:133

Member Function Documentation

◆ AccStatsForUtterance()

void AccStatsForUtterance ( int32  utterance_length,
const std::vector< ChunkTimeInfo > &  chunk_info 
)
private

Definition at line 863 of file nnet-example-utils.cc.

References UtteranceSplitter::chunk_size_to_count_, UtteranceSplitter::total_frames_in_chunks_, UtteranceSplitter::total_frames_overlap_, UtteranceSplitter::total_input_frames_, UtteranceSplitter::total_num_chunks_, and UtteranceSplitter::total_num_utterances_.

Referenced by UtteranceSplitter::GetChunksForUtterance().

865  {
867  total_input_frames_ += utterance_length;
868 
869  for (size_t c = 0; c < chunk_info.size(); c++) {
870  int32 chunk_size = chunk_info[c].num_frames;
871  if (c > 0) {
872  int32 last_chunk_end = chunk_info[c-1].first_frame +
873  chunk_info[c-1].num_frames;
874  if (last_chunk_end > chunk_info[c].first_frame)
875  total_frames_overlap_ += last_chunk_end - chunk_info[c].first_frame;
876  }
877  std::map<int32, int32>::iterator iter = chunk_size_to_count_.find(
878  chunk_size);
879  if (iter == chunk_size_to_count_.end())
880  chunk_size_to_count_[chunk_size] = 1;
881  else
882  iter->second++;
883  total_num_chunks_ += 1;
884  total_frames_in_chunks_ += chunk_size;
885  }
886 }
std::map< int32, int32 > chunk_size_to_count_

◆ Config()

const ExampleGenerationConfig& Config ( ) const
inline

Definition at line 176 of file nnet-example-utils.h.

Referenced by kaldi::nnet3::ProcessFile().

176 { return config_; }
const ExampleGenerationConfig & config_

◆ DefaultDurationOfSplit()

float DefaultDurationOfSplit ( const std::vector< int32 > &  split) const
private

Definition at line 388 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, rnnlm::i, KALDI_ASSERT, ExampleGenerationConfig::num_frames, and ExampleGenerationConfig::num_frames_overlap.

Referenced by UtteranceSplitter::InitSplitForLength(), and UtteranceSplitter::InitSplits().

389  {
390  if (split.empty()) // not a valid split, but useful to handle this case.
391  return 0.0;
392  float principal_num_frames = config_.num_frames[0],
393  num_frames_overlap = config_.num_frames_overlap;
394  KALDI_ASSERT(num_frames_overlap < principal_num_frames &&
395  "--num-frames-overlap value is too high");
396  float overlap_proportion = num_frames_overlap / principal_num_frames;
397  float ans = std::accumulate(split.begin(), split.end(), int32(0));
398  for (size_t i = 0; i + 1 < split.size(); i++) {
399  float min_adjacent_chunk_length = std::min(split[i], split[i + 1]),
400  overlap = overlap_proportion * min_adjacent_chunk_length;
401  ans -= overlap;
402  }
403  KALDI_ASSERT(ans > 0.0);
404  return ans;
405 }
const ExampleGenerationConfig & config_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169

◆ DistributeRandomly()

void DistributeRandomly ( int32  n,
const std::vector< int32 > &  magnitudes,
std::vector< int32 > *  vec 
)
staticprivate

Definition at line 703 of file nnet-example-utils.cc.

References rnnlm::i, KALDI_ASSERT, and rnnlm::n.

Referenced by UtteranceSplitter::GetGapSizes().

705  {
706  KALDI_ASSERT(!vec->empty() && vec->size() == magnitudes.size());
707  int32 size = vec->size();
708  if (n < 0) {
709  DistributeRandomly(-n, magnitudes, vec);
710  for (int32 i = 0; i < size; i++)
711  (*vec)[i] *= -1;
712  return;
713  }
714  float total_magnitude = std::accumulate(magnitudes.begin(), magnitudes.end(),
715  int32(0));
716  KALDI_ASSERT(total_magnitude > 0);
717  // note: 'partial_counts' contains the negative of the partial counts, so
718  // when we sort the larger partial counts come first.
719  std::vector<std::pair<float, int32> > partial_counts;
720  int32 total_count = 0;
721  for (int32 i = 0; i < size; i++) {
722  float this_count = n * float(magnitudes[i]) / total_magnitude;
723  // note: cast of float to int32 rounds towards zero (down, in this
724  // case, since this_count >= 0).
725  int32 this_whole_count = static_cast<int32>(this_count),
726  this_partial_count = this_count - this_whole_count;
727  (*vec)[i] = this_whole_count;
728  total_count += this_whole_count;
729  partial_counts.push_back(std::pair<float, int32>(-this_partial_count, i));
730  }
731  KALDI_ASSERT(total_count <= n && total_count + size >= n);
732  std::sort(partial_counts.begin(), partial_counts.end());
733  int32 i = 0;
734  // Increment by one the elements of the vector that has the largest partial
735  // count, then the next largest partial count, and so on... until we reach the
736  // desired total-count 'n'.
737  for(; total_count < n; i++,total_count++) {
738  (*vec)[partial_counts[i].second]++;
739  }
740  KALDI_ASSERT(std::accumulate(vec->begin(), vec->end(), int32(0)) == n);
741 }
struct rnnlm::@11::@12 n
static void DistributeRandomly(int32 n, const std::vector< int32 > &magnitudes, std::vector< int32 > *vec)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169

◆ DistributeRandomlyUniform()

void DistributeRandomlyUniform ( int32  n,
std::vector< int32 > *  vec 
)
staticprivate

Definition at line 679 of file nnet-example-utils.cc.

References rnnlm::i, and KALDI_ASSERT.

Referenced by UtteranceSplitter::GetGapSizes().

679  {
680  KALDI_ASSERT(!vec->empty());
681  int32 size = vec->size();
682  if (n < 0) {
684  for (int32 i = 0; i < size; i++)
685  (*vec)[i] *= -1;
686  return;
687  }
688  // from this point we know n >= 0.
689  int32 common_part = n / size,
690  remainder = n % size, i;
691  for (i = 0; i < remainder; i++) {
692  (*vec)[i] = common_part + 1;
693  }
694  for (; i < size; i++) {
695  (*vec)[i] = common_part;
696  }
697  std::random_shuffle(vec->begin(), vec->end());
698  KALDI_ASSERT(std::accumulate(vec->begin(), vec->end(), int32(0)) == n);
699 }
static void DistributeRandomlyUniform(int32 n, std::vector< int32 > *vec)
struct rnnlm::@11::@12 n
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169

◆ ExitStatus()

int32 ExitStatus ( )
inline

Definition at line 199 of file nnet-example-utils.h.

References kaldi::AccStatsForUtterance(), and rnnlm::n.

Referenced by main().

199 { return (total_frames_in_chunks_ > 0 ? 0 : 1); }

◆ GetChunksForUtterance()

void GetChunksForUtterance ( int32  utterance_length,
std::vector< ChunkTimeInfo > *  chunk_info 
)

Definition at line 822 of file nnet-example-utils.cc.

References UtteranceSplitter::AccStatsForUtterance(), UtteranceSplitter::config_, ChunkTimeInfo::first_frame, ExampleGenerationConfig::frame_subsampling_factor, UtteranceSplitter::GetChunkSizesForUtterance(), UtteranceSplitter::GetGapSizes(), rnnlm::i, KALDI_ASSERT, ExampleGenerationConfig::left_context, ChunkTimeInfo::left_context, ExampleGenerationConfig::left_context_initial, ChunkTimeInfo::num_frames, ExampleGenerationConfig::num_frames_str, ExampleGenerationConfig::right_context, ChunkTimeInfo::right_context, ExampleGenerationConfig::right_context_final, and UtteranceSplitter::SetOutputWeights().

Referenced by kaldi::nnet3::ProcessFile().

824  {
825  int32 t = 0;
826  if (config_.num_frames_str == "-1" ) {
827  ChunkTimeInfo *info;
828  info = new ChunkTimeInfo;
829  info->first_frame = 0;
830  info->num_frames = utterance_length;
831  info->left_context = (config_.left_context_initial >= 0 ?
833  info->right_context = (config_.right_context_final >= 0 ?
835  (*chunk_info).push_back(*info);
836  } else {
837  std::vector<int32> chunk_sizes;
838  GetChunkSizesForUtterance(utterance_length, &chunk_sizes);
839  std::vector<int32> gaps(chunk_sizes.size());
840  GetGapSizes(utterance_length, true, chunk_sizes, &gaps);
841  int32 num_chunks = chunk_sizes.size();
842  chunk_info->resize(num_chunks);
843  for (int32 i = 0; i < num_chunks; i++) {
844  t += gaps[i];
845  ChunkTimeInfo &info = (*chunk_info)[i];
846  info.first_frame = t;
847  info.num_frames = chunk_sizes[i];
848  info.left_context = (i == 0 && config_.left_context_initial >= 0 ?
850  info.right_context = (i == num_chunks - 1 && config_.right_context_final >= 0 ?
852  t += chunk_sizes[i];
853  }
854  }
855  SetOutputWeights(utterance_length, chunk_info);
856  AccStatsForUtterance(utterance_length, *chunk_info);
857  // check that the end of the last chunk doesn't go more than
858  // 'config_.frame_subsampling_factor - 1' frames past the end
859  // of the utterance. That amount, we treat as rounding error.
860  KALDI_ASSERT(t - utterance_length < config_.frame_subsampling_factor);
861 }
void GetGapSizes(int32 utterance_length, bool enforce_subsampling_factor, const std::vector< int32 > &chunk_sizes, std::vector< int32 > *gap_sizes) const
void SetOutputWeights(int32 utterance_length, std::vector< ChunkTimeInfo > *chunk_info) const
void GetChunkSizesForUtterance(int32 utterance_length, std::vector< int32 > *chunk_sizes) const
void AccStatsForUtterance(int32 utterance_length, const std::vector< ChunkTimeInfo > &chunk_info)
const ExampleGenerationConfig & config_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169

◆ GetChunkSizesForUtterance()

void GetChunkSizesForUtterance ( int32  utterance_length,
std::vector< int32 > *  chunk_sizes 
) const
private

Definition at line 580 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, rnnlm::i, KALDI_ASSERT, ExampleGenerationConfig::num_frames, ExampleGenerationConfig::num_frames_overlap, kaldi::RandInt(), and UtteranceSplitter::splits_for_length_.

Referenced by UtteranceSplitter::GetChunksForUtterance().

581  {
583  // 'primary_length' is the first-specified num-frames.
584  // It's the only chunk that may be repeated an arbitrary number
585  // of times.
586  int32 primary_length = config_.num_frames[0],
587  num_frames_overlap = config_.num_frames_overlap,
588  max_tabulated_length = splits_for_length_.size() - 1,
589  num_primary_length_repeats = 0;
590  KALDI_ASSERT(primary_length - num_frames_overlap > 0);
591  KALDI_ASSERT(utterance_length >= 0);
592  while (utterance_length > max_tabulated_length) {
593  utterance_length -= (primary_length - num_frames_overlap);
594  num_primary_length_repeats++;
595  }
596  KALDI_ASSERT(utterance_length >= 0);
597  const std::vector<std::vector<int32> > &possible_splits =
598  splits_for_length_[utterance_length];
599  if (possible_splits.empty()) {
600  chunk_sizes->clear();
601  return;
602  }
603  int32 num_possible_splits = possible_splits.size(),
604  randomly_chosen_split = RandInt(0, num_possible_splits - 1);
605  *chunk_sizes = possible_splits[randomly_chosen_split];
606  for (int32 i = 0; i < num_primary_length_repeats; i++)
607  chunk_sizes->push_back(primary_length);
608 
609  std::sort(chunk_sizes->begin(), chunk_sizes->end());
610  if (RandInt(0, 1) == 0) {
611  std::reverse(chunk_sizes->begin(), chunk_sizes->end());
612  }
613 }
std::vector< std::vector< std::vector< int32 > > > splits_for_length_
const ExampleGenerationConfig & config_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:94

◆ GetGapSizes()

void GetGapSizes ( int32  utterance_length,
bool  enforce_subsampling_factor,
const std::vector< int32 > &  chunk_sizes,
std::vector< int32 > *  gap_sizes 
) const
private

Definition at line 744 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, UtteranceSplitter::DistributeRandomly(), UtteranceSplitter::DistributeRandomlyUniform(), ExampleGenerationConfig::frame_subsampling_factor, rnnlm::i, KALDI_ASSERT, and KALDI_ERR.

Referenced by UtteranceSplitter::GetChunksForUtterance().

747  {
748  if (chunk_sizes.empty()) {
749  gap_sizes->clear();
750  return;
751  }
752  if (enforce_subsampling_factor && config_.frame_subsampling_factor > 1) {
753  int32 sf = config_.frame_subsampling_factor, size = chunk_sizes.size();
754  int32 utterance_length_reduced = (utterance_length + (sf - 1)) / sf;
755  std::vector<int32> chunk_sizes_reduced(chunk_sizes);
756  for (int32 i = 0; i < size; i++) {
757  KALDI_ASSERT(chunk_sizes[i] % config_.frame_subsampling_factor == 0);
758  chunk_sizes_reduced[i] /= config_.frame_subsampling_factor;
759  }
760  GetGapSizes(utterance_length_reduced, false,
761  chunk_sizes_reduced, gap_sizes);
762  KALDI_ASSERT(gap_sizes->size() == static_cast<size_t>(size));
763  for (int32 i = 0; i < size; i++)
764  (*gap_sizes)[i] *= config_.frame_subsampling_factor;
765  return;
766  }
767  int32 num_chunks = chunk_sizes.size(),
768  total_of_chunk_sizes = std::accumulate(chunk_sizes.begin(),
769  chunk_sizes.end(),
770  int32(0)),
771  total_gap = utterance_length - total_of_chunk_sizes;
772  gap_sizes->resize(num_chunks);
773 
774  if (total_gap < 0) {
775  // there is an overlap. Overlaps can only go between chunks, not at the
776  // beginning or end of the utterance. Also, we try to make the length of
777  // overlap proportional to the size of the smaller of the two chunks
778  // that the overlap is between.
779  if (num_chunks == 1) {
780  // there needs to be an overlap, but there is only one chunk... this means
781  // the chunk-size exceeds the utterance length, which is not allowed.
782  KALDI_ERR << "Chunk size is " << chunk_sizes[0]
783  << " but utterance length is only "
784  << utterance_length;
785  }
786 
787  // note the elements of 'overlaps' will be <= 0.
788  std::vector<int32> magnitudes(num_chunks - 1),
789  overlaps(num_chunks - 1);
790  // the 'magnitudes' vector will contain the minimum of the lengths of the
791  // two adjacent chunks between which are are going to consider having an
792  // overlap. These will be used to assign the overlap proportional to that
793  // size.
794  for (int32 i = 0; i + 1 < num_chunks; i++) {
795  magnitudes[i] = std::min<int32>(chunk_sizes[i], chunk_sizes[i + 1]);
796  }
797  DistributeRandomly(total_gap, magnitudes, &overlaps);
798  for (int32 i = 0; i + 1 < num_chunks; i++) {
799  // If the following condition does not hold, it's possible we
800  // could get chunk start-times less than zero. I don't believe
801  // it's possible for this condition to fail, but we're checking
802  // for it at this level to make debugging easier, just in case.
803  KALDI_ASSERT(overlaps[i] <= magnitudes[i]);
804  }
805 
806  (*gap_sizes)[0] = 0; // no gap before 1st chunk.
807  for (int32 i = 1; i < num_chunks; i++)
808  (*gap_sizes)[i] = overlaps[i-1];
809  } else {
810  // There may be a gap. Gaps can go at the start or end of the utterance, or
811  // between segments. We try to distribute the gaps evenly.
812  std::vector<int32> gaps(num_chunks + 1);
813  DistributeRandomlyUniform(total_gap, &gaps);
814  // the last element of 'gaps', the one at the end of the utterance, is
815  // implicit and doesn't have to be written to the output.
816  for (int32 i = 0; i < num_chunks; i++)
817  (*gap_sizes)[i] = gaps[i];
818  }
819 }
static void DistributeRandomlyUniform(int32 n, std::vector< int32 > *vec)
void GetGapSizes(int32 utterance_length, bool enforce_subsampling_factor, const std::vector< int32 > &chunk_sizes, std::vector< int32 > *gap_sizes) const
#define KALDI_ERR
Definition: kaldi-error.h:127
const ExampleGenerationConfig & config_
static void DistributeRandomly(int32 n, const std::vector< int32 > &magnitudes, std::vector< int32 > *vec)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169

◆ InitSplitForLength()

void InitSplitForLength ( )
private

Definition at line 447 of file nnet-example-utils.cc.

References UtteranceSplitter::DefaultDurationOfSplit(), kaldi::GetVerboseLevel(), UtteranceSplitter::InitSplits(), KALDI_ASSERT, KALDI_VLOG, UtteranceSplitter::MaxUtteranceLength(), and UtteranceSplitter::splits_for_length_.

Referenced by UtteranceSplitter::UtteranceSplitter().

447  {
448  int32 max_utterance_length = MaxUtteranceLength();
449 
450  // The 'splits' vector is a list of possible splits (a split being
451  // a sorted vector of chunk-sizes).
452  // The vector 'splits' is itself sorted.
453  std::vector<std::vector<int32> > splits;
454  InitSplits(&splits);
455 
456 
457  // Define a split-index 0 <= s < splits.size() as index into the 'splits'
458  // vector, and let a cost c >= 0 represent the mismatch between an
459  // utterance length and the total length of the chunk sizes in a split:
460 
461  // c(default_duration, utt_length) = (default_duration > utt_length ?
462  // default_duration - utt_length :
463  // 2.0 * (utt_length - default_duration))
464  // [but as a special case, set c to infinity if the largest chunk size in the
465  // split is longer than the utterance length; we couldn't, in that case, use
466  // this split for this utterance].
467 
468  // 'costs_for_length[u][s]', indexed by utterance-length u and then split,
469  // contains the cost for utterance-length u and split s.
470 
471  std::vector<std::vector<float> > costs_for_length(
472  max_utterance_length + 1);
473  int32 num_splits = splits.size();
474 
475  for (int32 u = 0; u <= max_utterance_length; u++)
476  costs_for_length[u].reserve(num_splits);
477 
478  for (int32 s = 0; s < num_splits; s++) {
479  const std::vector<int32> &split = splits[s];
480  float default_duration = DefaultDurationOfSplit(split);
481  int32 max_chunk_size = *std::max_element(split.begin(), split.end());
482  for (int32 u = 0; u <= max_utterance_length; u++) {
483  // c is the cost for this utterance length and this split. We penalize
484  // gaps twice as strongly as overlaps, based on the intuition that
485  // completely throwing out frames of data is worse than counting them
486  // twice.
487  float c = (default_duration > float(u) ? default_duration - float(u) :
488  2.0 * (u - default_duration));
489  if (u < max_chunk_size) // can't fit the largest of the chunks in this
490  // utterance
491  c = std::numeric_limits<float>::max();
492  KALDI_ASSERT(c >= 0);
493  costs_for_length[u].push_back(c);
494  }
495  }
496 
497 
498  splits_for_length_.resize(max_utterance_length + 1);
499 
500  for (int32 u = 0; u <= max_utterance_length; u++) {
501  const std::vector<float> &costs = costs_for_length[u];
502  float min_cost = *std::min_element(costs.begin(), costs.end());
503  if (min_cost == std::numeric_limits<float>::max()) {
504  // All costs were infinity, becaues this utterance-length u is shorter
505  // than the smallest chunk-size. Leave splits_for_length_[u] as empty
506  // for this utterance-length, meaning we will not be able to choose any
507  // split, and such utterances will be discarded.
508  continue;
509  }
510  float cost_threshold = 1.9999; // We will choose pseudo-randomly from splits
511  // that are within this distance from the
512  // best cost. Make the threshold just
513  // slightly less than 2... this will
514  // hopefully make the behavior more
515  // deterministic for ties.
516  std::vector<int32> possible_splits;
517  std::vector<float>::const_iterator iter = costs.begin(), end = costs.end();
518  int32 s = 0;
519  for (; iter != end; ++iter,++s)
520  if (*iter < min_cost + cost_threshold)
521  splits_for_length_[u].push_back(splits[s]);
522  }
523 
524  if (GetVerboseLevel() >= 3) {
525  std::ostringstream os;
526  for (int32 u = 0; u <= max_utterance_length; u++) {
527  if (!splits_for_length_[u].empty()) {
528  os << u << "=(";
529  std::vector<std::vector<int32 > >::const_iterator
530  iter1 = splits_for_length_[u].begin(),
531  end1 = splits_for_length_[u].end();
532 
533  while (iter1 != end1) {
534  std::vector<int32>::const_iterator iter2 = iter1->begin(),
535  end2 = iter1->end();
536  while (iter2 != end2) {
537  os << *iter2;
538  ++iter2;
539  if (iter2 != end2) os << ",";
540  }
541  ++iter1;
542  if (iter1 != end1) os << "/";
543  }
544  os << ")";
545  if (u < max_utterance_length) os << ", ";
546  }
547  }
548  KALDI_VLOG(3) << "Utterance-length-to-splits map is: " << os.str();
549  }
550 }
float DefaultDurationOfSplit(const std::vector< int32 > &split) const
int32 GetVerboseLevel()
Definition: kaldi-error.h:69
std::vector< std::vector< std::vector< int32 > > > splits_for_length_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
void InitSplits(std::vector< std::vector< int32 > > *splits) const
#define KALDI_VLOG(v)
Definition: kaldi-error.h:136

◆ InitSplits()

void InitSplits ( std::vector< std::vector< int32 > > *  splits) const
private

Definition at line 631 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, UtteranceSplitter::DefaultDurationOfSplit(), rnnlm::i, rnnlm::j, UtteranceSplitter::MaxUtteranceLength(), rnnlm::n, and ExampleGenerationConfig::num_frames.

Referenced by UtteranceSplitter::InitSplitForLength().

631  {
632  // we consider splits whose default duration (as returned by
633  // DefaultDurationOfSplit()) is up to MaxUtteranceLength() + primary_length.
634  // We can be confident without doing a lot of math, that splits above this
635  // length will never be chosen for any utterance-length up to
636  // MaxUtteranceLength() (which is the maximum we use).
637  int32 primary_length = config_.num_frames[0],
638  default_duration_ceiling = MaxUtteranceLength() + primary_length;
639 
640  typedef unordered_set<std::vector<int32>, VectorHasher<int32> > SetType;
641 
642  SetType splits_set;
643 
644  int32 num_lengths = config_.num_frames.size();
645 
646  // The splits we are allow are: zero to two 'alternate' lengths, plus
647  // an arbitrary number of repeats of the 'primary' length. The repeats
648  // of the 'primary' length are handled by the inner loop over n.
649  // The zero to two 'alternate' lengths are handled by the loops over
650  // i and j. i == 0 and j == 0 are special cases; they mean, no
651  // alternate is chosen.
652  for (int32 i = 0; i < num_lengths; i++) {
653  for (int32 j = 0; j < num_lengths; j++) {
654  std::vector<int32> vec;
655  if (i > 0)
656  vec.push_back(config_.num_frames[i]);
657  if (j > 0)
658  vec.push_back(config_.num_frames[j]);
659  int32 n = 0;
660  while (DefaultDurationOfSplit(vec) <= default_duration_ceiling) {
661  if (!vec.empty()) // Don't allow the empty vector as a split.
662  splits_set.insert(vec);
663  n++;
664  vec.push_back(primary_length);
665  std::sort(vec.begin(), vec.end());
666  }
667  }
668  }
669  for (SetType::const_iterator iter = splits_set.begin();
670  iter != splits_set.end(); ++iter)
671  splits->push_back(*iter);
672  std::sort(splits->begin(), splits->end()); // make the order deterministic,
673  // for consistency of output
674  // between runs and C libraries.
675 }
float DefaultDurationOfSplit(const std::vector< int32 > &split) const
struct rnnlm::@11::@12 n
const ExampleGenerationConfig & config_

◆ LengthsMatch()

bool LengthsMatch ( const std::string &  utt,
int32  utterance_length,
int32  supervision_length,
int32  length_tolerance = 0 
) const

Definition at line 553 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, ExampleGenerationConfig::frame_subsampling_factor, and KALDI_WARN.

Referenced by kaldi::nnet3::ProcessFile().

556  {
558  expected_supervision_length = (utterance_length + sf - 1) / sf;
559  if (std::abs(supervision_length - expected_supervision_length)
560  <= length_tolerance) {
561  return true;
562  } else {
563  if (sf == 1) {
564  KALDI_WARN << "Supervision does not have expected length for utterance "
565  << utt << ": expected length = " << utterance_length
566  << ", got " << supervision_length;
567  } else {
568  KALDI_WARN << "Supervision does not have expected length for utterance "
569  << utt << ": expected length = (" << utterance_length
570  << " + " << sf << " - 1) / " << sf << " = "
571  << expected_supervision_length
572  << ", got: " << supervision_length
573  << " (note: --frame-subsampling-factor=" << sf << ")";
574  }
575  return false;
576  }
577 }
#define KALDI_WARN
Definition: kaldi-error.h:130
const ExampleGenerationConfig & config_

◆ MaxUtteranceLength()

int32 MaxUtteranceLength ( ) const
private

Definition at line 616 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, rnnlm::i, KALDI_ASSERT, and ExampleGenerationConfig::num_frames.

Referenced by UtteranceSplitter::InitSplitForLength(), and UtteranceSplitter::InitSplits().

616  {
617  int32 num_lengths = config_.num_frames.size();
618  KALDI_ASSERT(num_lengths > 0);
619  // 'primary_length' is the first-specified num-frames.
620  // It's the only chunk that may be repeated an arbitrary number
621  // of times.
622  int32 primary_length = config_.num_frames[0],
623  max_length = primary_length;
624  for (int32 i = 0; i < num_lengths; i++) {
626  max_length = std::max(config_.num_frames[i], max_length);
627  }
628  return 2 * max_length + primary_length;
629 }
const ExampleGenerationConfig & config_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169

◆ SetOutputWeights()

void SetOutputWeights ( int32  utterance_length,
std::vector< ChunkTimeInfo > *  chunk_info 
) const
private

Definition at line 889 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, count, ChunkTimeInfo::first_frame, ExampleGenerationConfig::frame_subsampling_factor, rnnlm::i, ChunkTimeInfo::num_frames, and ChunkTimeInfo::output_weights.

Referenced by UtteranceSplitter::GetChunksForUtterance().

891  {
893  int32 num_output_frames = (utterance_length + sf - 1) / sf;
894  // num_output_frames is the number of frames of supervision. 'count[t]' will
895  // be the number of chunks that this output-frame t appears in. Note: the
896  // 'first_frame' and 'num_frames' members of ChunkTimeInfo will always be
897  // multiples of frame_subsampling_factor.
898  std::vector<int32> count(num_output_frames, 0);
899  int32 num_chunks = chunk_info->size();
900  for (int32 i = 0; i < num_chunks; i++) {
901  ChunkTimeInfo &chunk = (*chunk_info)[i];
902  for (int32 t = chunk.first_frame / sf;
903  t < (chunk.first_frame + chunk.num_frames) / sf;
904  t++)
905  count[t]++;
906  }
907  for (int32 i = 0; i < num_chunks; i++) {
908  ChunkTimeInfo &chunk = (*chunk_info)[i];
909  chunk.output_weights.resize(chunk.num_frames / sf);
910  int32 t_start = chunk.first_frame / sf;
911  for (int32 t = t_start;
912  t < (chunk.first_frame + chunk.num_frames) / sf;
913  t++)
914  chunk.output_weights[t - t_start] = 1.0 / count[t];
915  }
916 }
const size_t count
const ExampleGenerationConfig & config_

Member Data Documentation

◆ chunk_size_to_count_

std::map<int32, int32> chunk_size_to_count_
private

◆ config_

◆ splits_for_length_

std::vector<std::vector<std::vector<int32> > > splits_for_length_
private

◆ total_frames_in_chunks_

int64 total_frames_in_chunks_
private

◆ total_frames_overlap_

int64 total_frames_overlap_
private

◆ total_input_frames_

int64 total_input_frames_
private

◆ total_num_chunks_

int64 total_num_chunks_
private

◆ total_num_utterances_

int32 total_num_utterances_
private

The documentation for this class was generated from the following files: