All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
UtteranceSplitter Class Reference

#include <nnet-example-utils.h>

Collaboration diagram for UtteranceSplitter:

Public Member Functions

 UtteranceSplitter (const ExampleGenerationConfig &config)
 
const ExampleGenerationConfigConfig () const
 
void GetChunksForUtterance (int32 utterance_length, std::vector< ChunkTimeInfo > *chunk_info)
 
bool LengthsMatch (const std::string &utt, int32 utterance_length, int32 supervision_length, int32 length_tolerance=0) const
 
 ~UtteranceSplitter ()
 
int32 ExitStatus ()
 

Private Member Functions

void InitSplitForLength ()
 
float DefaultDurationOfSplit (const std::vector< int32 > &split) const
 
int32 MaxUtteranceLength () const
 
void InitSplits (std::vector< std::vector< int32 > > *splits) const
 
void GetChunkSizesForUtterance (int32 utterance_length, std::vector< int32 > *chunk_sizes) const
 
void GetGapSizes (int32 utterance_length, bool enforce_subsampling_factor, const std::vector< int32 > &chunk_sizes, std::vector< int32 > *gap_sizes) const
 
void SetOutputWeights (int32 utterance_length, std::vector< ChunkTimeInfo > *chunk_info) const
 
void AccStatsForUtterance (int32 utterance_length, const std::vector< ChunkTimeInfo > &chunk_info)
 

Static Private Member Functions

static void DistributeRandomlyUniform (int32 n, std::vector< int32 > *vec)
 
static void DistributeRandomly (int32 n, const std::vector< int32 > &magnitudes, std::vector< int32 > *vec)
 

Private Attributes

const ExampleGenerationConfigconfig_
 
std::vector< std::vector
< std::vector< int32 > > > 
splits_for_length_
 
int32 total_num_utterances_
 
int64 total_input_frames_
 
int64 total_frames_overlap_
 
int64 total_num_chunks_
 
int64 total_frames_in_chunks_
 
std::map< int32, int32 > chunk_size_to_count_
 

Detailed Description

Definition at line 169 of file nnet-example-utils.h.

Constructor & Destructor Documentation

Definition at line 339 of file nnet-example-utils.cc.

References UtteranceSplitter::InitSplitForLength(), KALDI_ERR, and ExampleGenerationConfig::num_frames.

339  :
340  config_(config),
344  if (config.num_frames.empty()) {
345  KALDI_ERR << "You need to call ComputeDerived() on the "
346  "ExampleGenerationConfig().";
347  }
349 }
#define KALDI_ERR
Definition: kaldi-error.h:127
const ExampleGenerationConfig & config_

Definition at line 351 of file nnet-example-utils.cc.

References UtteranceSplitter::chunk_size_to_count_, KALDI_LOG, UtteranceSplitter::total_frames_in_chunks_, UtteranceSplitter::total_frames_overlap_, UtteranceSplitter::total_input_frames_, UtteranceSplitter::total_num_chunks_, and UtteranceSplitter::total_num_utterances_.

351  {
352  KALDI_LOG << "Split " << total_num_utterances_ << " utts, with "
353  << "total length " << total_input_frames_ << " frames ("
354  << (total_input_frames_ / 360000.0) << " hours assuming "
355  << "100 frames per second)";
356  float average_chunk_length = total_frames_in_chunks_ * 1.0 / total_num_chunks_,
357  overlap_percent = total_frames_overlap_ * 100.0 / total_input_frames_,
358  output_percent = total_frames_in_chunks_ * 100.0 / total_input_frames_,
359  output_percent_no_overlap = output_percent - overlap_percent;
360 
361  KALDI_LOG << "Average chunk length was " << average_chunk_length
362  << " frames; overlap between adjacent chunks was "
363  << overlap_percent << "% of input length; length of output was "
364  << output_percent << "% of input length (minus overlap = "
365  << output_percent_no_overlap << "%).";
366  if (chunk_size_to_count_.size() > 1) {
367  std::ostringstream os;
368  os << std::setprecision(4);
369  for (std::map<int32, int32>::iterator iter = chunk_size_to_count_.begin();
370  iter != chunk_size_to_count_.end(); ++iter) {
371  int32 chunk_size = iter->first,
372  num_frames = chunk_size * iter->second;
373  float percent_of_total = num_frames * 100.0 / total_frames_in_chunks_;
374  if (iter != chunk_size_to_count_.begin()) os << ", ";
375  os << chunk_size << " = " << percent_of_total << "%";
376  }
377  KALDI_LOG << "Output frames are distributed among chunk-sizes as follows: "
378  << os.str();
379  }
380 }
std::map< int32, int32 > chunk_size_to_count_
#define KALDI_LOG
Definition: kaldi-error.h:133

Member Function Documentation

void AccStatsForUtterance ( int32  utterance_length,
const std::vector< ChunkTimeInfo > &  chunk_info 
)
private

Definition at line 845 of file nnet-example-utils.cc.

References UtteranceSplitter::chunk_size_to_count_, UtteranceSplitter::total_frames_in_chunks_, UtteranceSplitter::total_frames_overlap_, UtteranceSplitter::total_input_frames_, UtteranceSplitter::total_num_chunks_, and UtteranceSplitter::total_num_utterances_.

Referenced by UtteranceSplitter::GetChunksForUtterance().

847  {
849  total_input_frames_ += utterance_length;
850 
851  for (size_t c = 0; c < chunk_info.size(); c++) {
852  int32 chunk_size = chunk_info[c].num_frames;
853  if (c > 0) {
854  int32 last_chunk_end = chunk_info[c-1].first_frame +
855  chunk_info[c-1].num_frames;
856  if (last_chunk_end > chunk_info[c].first_frame)
857  total_frames_overlap_ += last_chunk_end - chunk_info[c].first_frame;
858  }
859  std::map<int32, int32>::iterator iter = chunk_size_to_count_.find(
860  chunk_size);
861  if (iter == chunk_size_to_count_.end())
862  chunk_size_to_count_[chunk_size] = 1;
863  else
864  iter->second++;
865  total_num_chunks_ += 1;
866  total_frames_in_chunks_ += chunk_size;
867  }
868 }
std::map< int32, int32 > chunk_size_to_count_
const ExampleGenerationConfig& Config ( ) const
inline

Definition at line 175 of file nnet-example-utils.h.

References UtteranceSplitter::config_.

Referenced by kaldi::nnet3::ProcessFile().

175 { return config_; }
const ExampleGenerationConfig & config_
float DefaultDurationOfSplit ( const std::vector< int32 > &  split) const
private

Definition at line 382 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, rnnlm::i, KALDI_ASSERT, ExampleGenerationConfig::num_frames, and ExampleGenerationConfig::num_frames_overlap.

Referenced by UtteranceSplitter::InitSplitForLength(), and UtteranceSplitter::InitSplits().

383  {
384  if (split.empty()) // not a valid split, but useful to handle this case.
385  return 0.0;
386  float principal_num_frames = config_.num_frames[0],
387  num_frames_overlap = config_.num_frames_overlap;
388  KALDI_ASSERT(num_frames_overlap < principal_num_frames &&
389  "--num-frames-overlap value is too high");
390  float overlap_proportion = num_frames_overlap / principal_num_frames;
391  float ans = std::accumulate(split.begin(), split.end(), int32(0));
392  for (size_t i = 0; i + 1 < split.size(); i++) {
393  float min_adjacent_chunk_length = std::min(split[i], split[i + 1]),
394  overlap = overlap_proportion * min_adjacent_chunk_length;
395  ans -= overlap;
396  }
397  KALDI_ASSERT(ans > 0.0);
398  return ans;
399 }
const ExampleGenerationConfig & config_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
void DistributeRandomly ( int32  n,
const std::vector< int32 > &  magnitudes,
std::vector< int32 > *  vec 
)
staticprivate

Definition at line 697 of file nnet-example-utils.cc.

References rnnlm::i, KALDI_ASSERT, and rnnlm::n.

Referenced by UtteranceSplitter::GetGapSizes().

699  {
700  KALDI_ASSERT(!vec->empty() && vec->size() == magnitudes.size());
701  int32 size = vec->size();
702  if (n < 0) {
703  DistributeRandomly(-n, magnitudes, vec);
704  for (int32 i = 0; i < size; i++)
705  (*vec)[i] *= -1;
706  return;
707  }
708  float total_magnitude = std::accumulate(magnitudes.begin(), magnitudes.end(),
709  int32(0));
710  KALDI_ASSERT(total_magnitude > 0);
711  // note: 'partial_counts' contains the negative of the partial counts, so
712  // when we sort the larger partial counts come first.
713  std::vector<std::pair<float, int32> > partial_counts;
714  int32 total_count = 0;
715  for (int32 i = 0; i < size; i++) {
716  float this_count = n * float(magnitudes[i]) / total_magnitude;
717  // note: cast of float to int32 rounds towards zero (down, in this
718  // case, since this_count >= 0).
719  int32 this_whole_count = static_cast<int32>(this_count),
720  this_partial_count = this_count - this_whole_count;
721  (*vec)[i] = this_whole_count;
722  total_count += this_whole_count;
723  partial_counts.push_back(std::pair<float, int32>(-this_partial_count, i));
724  }
725  KALDI_ASSERT(total_count <= n && total_count + size >= n);
726  std::sort(partial_counts.begin(), partial_counts.end());
727  int32 i = 0;
728  // Increment by one the elements of the vector that has the largest partial
729  // count, then the next largest partial count, and so on... until we reach the
730  // desired total-count 'n'.
731  for(; total_count < n; i++,total_count++) {
732  (*vec)[partial_counts[i].second]++;
733  }
734  KALDI_ASSERT(std::accumulate(vec->begin(), vec->end(), int32(0)) == n);
735 }
struct rnnlm::@11::@12 n
static void DistributeRandomly(int32 n, const std::vector< int32 > &magnitudes, std::vector< int32 > *vec)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
void DistributeRandomlyUniform ( int32  n,
std::vector< int32 > *  vec 
)
staticprivate

Definition at line 673 of file nnet-example-utils.cc.

References rnnlm::i, and KALDI_ASSERT.

Referenced by UtteranceSplitter::GetGapSizes().

673  {
674  KALDI_ASSERT(!vec->empty());
675  int32 size = vec->size();
676  if (n < 0) {
678  for (int32 i = 0; i < size; i++)
679  (*vec)[i] *= -1;
680  return;
681  }
682  // from this point we know n >= 0.
683  int32 common_part = n / size,
684  remainder = n % size, i;
685  for (i = 0; i < remainder; i++) {
686  (*vec)[i] = common_part + 1;
687  }
688  for (; i < size; i++) {
689  (*vec)[i] = common_part;
690  }
691  std::random_shuffle(vec->begin(), vec->end());
692  KALDI_ASSERT(std::accumulate(vec->begin(), vec->end(), int32(0)) == n);
693 }
static void DistributeRandomlyUniform(int32 n, std::vector< int32 > *vec)
struct rnnlm::@11::@12 n
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
int32 ExitStatus ( )
inline

Definition at line 198 of file nnet-example-utils.h.

References UtteranceSplitter::total_frames_in_chunks_.

Referenced by main().

198 { return (total_frames_in_chunks_ > 0 ? 0 : 1); }
void GetChunksForUtterance ( int32  utterance_length,
std::vector< ChunkTimeInfo > *  chunk_info 
)

Definition at line 816 of file nnet-example-utils.cc.

References UtteranceSplitter::AccStatsForUtterance(), UtteranceSplitter::config_, ChunkTimeInfo::first_frame, ExampleGenerationConfig::frame_subsampling_factor, UtteranceSplitter::GetChunkSizesForUtterance(), UtteranceSplitter::GetGapSizes(), rnnlm::i, KALDI_ASSERT, ExampleGenerationConfig::left_context, ChunkTimeInfo::left_context, ExampleGenerationConfig::left_context_initial, ChunkTimeInfo::num_frames, ExampleGenerationConfig::right_context, ChunkTimeInfo::right_context, ExampleGenerationConfig::right_context_final, and UtteranceSplitter::SetOutputWeights().

Referenced by kaldi::nnet3::ProcessFile().

818  {
819  std::vector<int32> chunk_sizes;
820  GetChunkSizesForUtterance(utterance_length, &chunk_sizes);
821  std::vector<int32> gaps(chunk_sizes.size());
822  GetGapSizes(utterance_length, true, chunk_sizes, &gaps);
823  int32 num_chunks = chunk_sizes.size();
824  chunk_info->resize(num_chunks);
825  int32 t = 0;
826  for (int32 i = 0; i < num_chunks; i++) {
827  t += gaps[i];
828  ChunkTimeInfo &info = (*chunk_info)[i];
829  info.first_frame = t;
830  info.num_frames = chunk_sizes[i];
831  info.left_context = (i == 0 && config_.left_context_initial >= 0 ?
833  info.right_context = (i == num_chunks - 1 && config_.right_context_final >= 0 ?
835  t += chunk_sizes[i];
836  }
837  SetOutputWeights(utterance_length, chunk_info);
838  AccStatsForUtterance(utterance_length, *chunk_info);
839  // check that the end of the last chunk doesn't go more than
840  // 'config_.frame_subsampling_factor - 1' frames past the end
841  // of the utterance. That amount, we treat as rounding error.
842  KALDI_ASSERT(t - utterance_length < config_.frame_subsampling_factor);
843 }
void GetGapSizes(int32 utterance_length, bool enforce_subsampling_factor, const std::vector< int32 > &chunk_sizes, std::vector< int32 > *gap_sizes) const
void AccStatsForUtterance(int32 utterance_length, const std::vector< ChunkTimeInfo > &chunk_info)
void SetOutputWeights(int32 utterance_length, std::vector< ChunkTimeInfo > *chunk_info) const
const ExampleGenerationConfig & config_
void GetChunkSizesForUtterance(int32 utterance_length, std::vector< int32 > *chunk_sizes) const
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
void GetChunkSizesForUtterance ( int32  utterance_length,
std::vector< int32 > *  chunk_sizes 
) const
private

Definition at line 574 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, rnnlm::i, KALDI_ASSERT, ExampleGenerationConfig::num_frames, ExampleGenerationConfig::num_frames_overlap, kaldi::RandInt(), and UtteranceSplitter::splits_for_length_.

Referenced by UtteranceSplitter::GetChunksForUtterance().

575  {
577  // 'primary_length' is the first-specified num-frames.
578  // It's the only chunk that may be repeated an arbitrary number
579  // of times.
580  int32 primary_length = config_.num_frames[0],
581  num_frames_overlap = config_.num_frames_overlap,
582  max_tabulated_length = splits_for_length_.size() - 1,
583  num_primary_length_repeats = 0;
584  KALDI_ASSERT(primary_length - num_frames_overlap > 0);
585  KALDI_ASSERT(utterance_length >= 0);
586  while (utterance_length > max_tabulated_length) {
587  utterance_length -= (primary_length - num_frames_overlap);
588  num_primary_length_repeats++;
589  }
590  KALDI_ASSERT(utterance_length >= 0);
591  const std::vector<std::vector<int32> > &possible_splits =
592  splits_for_length_[utterance_length];
593  if (possible_splits.empty()) {
594  chunk_sizes->clear();
595  return;
596  }
597  int32 num_possible_splits = possible_splits.size(),
598  randomly_chosen_split = RandInt(0, num_possible_splits - 1);
599  *chunk_sizes = possible_splits[randomly_chosen_split];
600  for (int32 i = 0; i < num_primary_length_repeats; i++)
601  chunk_sizes->push_back(primary_length);
602 
603  std::sort(chunk_sizes->begin(), chunk_sizes->end());
604  if (RandInt(0, 1) == 0) {
605  std::reverse(chunk_sizes->begin(), chunk_sizes->end());
606  }
607 }
std::vector< std::vector< std::vector< int32 > > > splits_for_length_
const ExampleGenerationConfig & config_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:94
void GetGapSizes ( int32  utterance_length,
bool  enforce_subsampling_factor,
const std::vector< int32 > &  chunk_sizes,
std::vector< int32 > *  gap_sizes 
) const
private

Definition at line 738 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, UtteranceSplitter::DistributeRandomly(), UtteranceSplitter::DistributeRandomlyUniform(), ExampleGenerationConfig::frame_subsampling_factor, rnnlm::i, KALDI_ASSERT, and KALDI_ERR.

Referenced by UtteranceSplitter::GetChunksForUtterance().

741  {
742  if (chunk_sizes.empty()) {
743  gap_sizes->clear();
744  return;
745  }
746  if (enforce_subsampling_factor && config_.frame_subsampling_factor > 1) {
747  int32 sf = config_.frame_subsampling_factor, size = chunk_sizes.size();
748  int32 utterance_length_reduced = (utterance_length + (sf - 1)) / sf;
749  std::vector<int32> chunk_sizes_reduced(chunk_sizes);
750  for (int32 i = 0; i < size; i++) {
751  KALDI_ASSERT(chunk_sizes[i] % config_.frame_subsampling_factor == 0);
752  chunk_sizes_reduced[i] /= config_.frame_subsampling_factor;
753  }
754  GetGapSizes(utterance_length_reduced, false,
755  chunk_sizes_reduced, gap_sizes);
756  KALDI_ASSERT(gap_sizes->size() == static_cast<size_t>(size));
757  for (int32 i = 0; i < size; i++)
758  (*gap_sizes)[i] *= config_.frame_subsampling_factor;
759  return;
760  }
761  int32 num_chunks = chunk_sizes.size(),
762  total_of_chunk_sizes = std::accumulate(chunk_sizes.begin(),
763  chunk_sizes.end(),
764  int32(0)),
765  total_gap = utterance_length - total_of_chunk_sizes;
766  gap_sizes->resize(num_chunks);
767 
768  if (total_gap < 0) {
769  // there is an overlap. Overlaps can only go between chunks, not at the
770  // beginning or end of the utterance. Also, we try to make the length of
771  // overlap proportional to the size of the smaller of the two chunks
772  // that the overlap is between.
773  if (num_chunks == 1) {
774  // there needs to be an overlap, but there is only one chunk... this means
775  // the chunk-size exceeds the utterance length, which is not allowed.
776  KALDI_ERR << "Chunk size is " << chunk_sizes[0]
777  << " but utterance length is only "
778  << utterance_length;
779  }
780 
781  // note the elements of 'overlaps' will be <= 0.
782  std::vector<int32> magnitudes(num_chunks - 1),
783  overlaps(num_chunks - 1);
784  // the 'magnitudes' vector will contain the minimum of the lengths of the
785  // two adjacent chunks between which are are going to consider having an
786  // overlap. These will be used to assign the overlap proportional to that
787  // size.
788  for (int32 i = 0; i + 1 < num_chunks; i++) {
789  magnitudes[i] = std::min<int32>(chunk_sizes[i], chunk_sizes[i + 1]);
790  }
791  DistributeRandomly(total_gap, magnitudes, &overlaps);
792  for (int32 i = 0; i + 1 < num_chunks; i++) {
793  // If the following condition does not hold, it's possible we
794  // could get chunk start-times less than zero. I don't believe
795  // it's possible for this condition to fail, but we're checking
796  // for it at this level to make debugging easier, just in case.
797  KALDI_ASSERT(overlaps[i] <= magnitudes[i]);
798  }
799 
800  (*gap_sizes)[0] = 0; // no gap before 1st chunk.
801  for (int32 i = 1; i < num_chunks; i++)
802  (*gap_sizes)[i] = overlaps[i-1];
803  } else {
804  // There may be a gap. Gaps can go at the start or end of the utterance, or
805  // between segments. We try to distribute the gaps evenly.
806  std::vector<int32> gaps(num_chunks + 1);
807  DistributeRandomlyUniform(total_gap, &gaps);
808  // the last element of 'gaps', the one at the end of the utterance, is
809  // implicit and doesn't have to be written to the output.
810  for (int32 i = 0; i < num_chunks; i++)
811  (*gap_sizes)[i] = gaps[i];
812  }
813 }
static void DistributeRandomlyUniform(int32 n, std::vector< int32 > *vec)
void GetGapSizes(int32 utterance_length, bool enforce_subsampling_factor, const std::vector< int32 > &chunk_sizes, std::vector< int32 > *gap_sizes) const
#define KALDI_ERR
Definition: kaldi-error.h:127
const ExampleGenerationConfig & config_
static void DistributeRandomly(int32 n, const std::vector< int32 > &magnitudes, std::vector< int32 > *vec)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
void InitSplitForLength ( )
private

Definition at line 441 of file nnet-example-utils.cc.

References UtteranceSplitter::DefaultDurationOfSplit(), kaldi::GetVerboseLevel(), UtteranceSplitter::InitSplits(), KALDI_ASSERT, KALDI_VLOG, UtteranceSplitter::MaxUtteranceLength(), and UtteranceSplitter::splits_for_length_.

Referenced by UtteranceSplitter::UtteranceSplitter().

441  {
442  int32 max_utterance_length = MaxUtteranceLength();
443 
444  // The 'splits' vector is a list of possible splits (a split being
445  // a sorted vector of chunk-sizes).
446  // The vector 'splits' is itself sorted.
447  std::vector<std::vector<int32> > splits;
448  InitSplits(&splits);
449 
450 
451  // Define a split-index 0 <= s < splits.size() as index into the 'splits'
452  // vector, and let a cost c >= 0 represent the mismatch between an
453  // utterance length and the total length of the chunk sizes in a split:
454 
455  // c(default_duration, utt_length) = (default_duration > utt_length ?
456  // default_duration - utt_length :
457  // 2.0 * (utt_length - default_duration))
458  // [but as a special case, set c to infinity if the largest chunk size in the
459  // split is longer than the utterance length; we couldn't, in that case, use
460  // this split for this utterance].
461 
462  // 'costs_for_length[u][s]', indexed by utterance-length u and then split,
463  // contains the cost for utterance-length u and split s.
464 
465  std::vector<std::vector<float> > costs_for_length(
466  max_utterance_length + 1);
467  int32 num_splits = splits.size();
468 
469  for (int32 u = 0; u <= max_utterance_length; u++)
470  costs_for_length[u].reserve(num_splits);
471 
472  for (int32 s = 0; s < num_splits; s++) {
473  const std::vector<int32> &split = splits[s];
474  float default_duration = DefaultDurationOfSplit(split);
475  int32 max_chunk_size = *std::max_element(split.begin(), split.end());
476  for (int32 u = 0; u <= max_utterance_length; u++) {
477  // c is the cost for this utterance length and this split. We penalize
478  // gaps twice as strongly as overlaps, based on the intuition that
479  // completely throwing out frames of data is worse than counting them
480  // twice.
481  float c = (default_duration > float(u) ? default_duration - float(u) :
482  2.0 * (u - default_duration));
483  if (u < max_chunk_size) // can't fit the largest of the chunks in this
484  // utterance
485  c = std::numeric_limits<float>::max();
486  KALDI_ASSERT(c >= 0);
487  costs_for_length[u].push_back(c);
488  }
489  }
490 
491 
492  splits_for_length_.resize(max_utterance_length + 1);
493 
494  for (int32 u = 0; u <= max_utterance_length; u++) {
495  const std::vector<float> &costs = costs_for_length[u];
496  float min_cost = *std::min_element(costs.begin(), costs.end());
497  if (min_cost == std::numeric_limits<float>::max()) {
498  // All costs were infinity, becaues this utterance-length u is shorter
499  // than the smallest chunk-size. Leave splits_for_length_[u] as empty
500  // for this utterance-length, meaning we will not be able to choose any
501  // split, and such utterances will be discarded.
502  continue;
503  }
504  float cost_threshold = 1.9999; // We will choose pseudo-randomly from splits
505  // that are within this distance from the
506  // best cost. Make the threshold just
507  // slightly less than 2... this will
508  // hopefully make the behavior more
509  // deterministic for ties.
510  std::vector<int32> possible_splits;
511  std::vector<float>::const_iterator iter = costs.begin(), end = costs.end();
512  int32 s = 0;
513  for (; iter != end; ++iter,++s)
514  if (*iter < min_cost + cost_threshold)
515  splits_for_length_[u].push_back(splits[s]);
516  }
517 
518  if (GetVerboseLevel() >= 3) {
519  std::ostringstream os;
520  for (int32 u = 0; u <= max_utterance_length; u++) {
521  if (!splits_for_length_[u].empty()) {
522  os << u << "=(";
523  std::vector<std::vector<int32 > >::const_iterator
524  iter1 = splits_for_length_[u].begin(),
525  end1 = splits_for_length_[u].end();
526 
527  while (iter1 != end1) {
528  std::vector<int32>::const_iterator iter2 = iter1->begin(),
529  end2 = iter1->end();
530  while (iter2 != end2) {
531  os << *iter2;
532  ++iter2;
533  if (iter2 != end2) os << ",";
534  }
535  ++iter1;
536  if (iter1 != end1) os << "/";
537  }
538  os << ")";
539  if (u < max_utterance_length) os << ", ";
540  }
541  }
542  KALDI_VLOG(3) << "Utterance-length-to-splits map is: " << os.str();
543  }
544 }
int32 GetVerboseLevel()
Definition: kaldi-error.h:69
void InitSplits(std::vector< std::vector< int32 > > *splits) const
std::vector< std::vector< std::vector< int32 > > > splits_for_length_
float DefaultDurationOfSplit(const std::vector< int32 > &split) const
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
#define KALDI_VLOG(v)
Definition: kaldi-error.h:136
void InitSplits ( std::vector< std::vector< int32 > > *  splits) const
private

Definition at line 625 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, UtteranceSplitter::DefaultDurationOfSplit(), rnnlm::i, rnnlm::j, UtteranceSplitter::MaxUtteranceLength(), rnnlm::n, and ExampleGenerationConfig::num_frames.

Referenced by UtteranceSplitter::InitSplitForLength().

625  {
626  // we consider splits whose default duration (as returned by
627  // DefaultDurationOfSplit()) is up to MaxUtteranceLength() + primary_length.
628  // We can be confident without doing a lot of math, that splits above this
629  // length will never be chosen for any utterance-length up to
630  // MaxUtteranceLength() (which is the maximum we use).
631  int32 primary_length = config_.num_frames[0],
632  default_duration_ceiling = MaxUtteranceLength() + primary_length;
633 
634  typedef unordered_set<std::vector<int32>, VectorHasher<int32> > SetType;
635 
636  SetType splits_set;
637 
638  int32 num_lengths = config_.num_frames.size();
639 
640  // The splits we are allow are: zero to two 'alternate' lengths, plus
641  // an arbitrary number of repeats of the 'primary' length. The repeats
642  // of the 'primary' length are handled by the inner loop over n.
643  // The zero to two 'alternate' lengths are handled by the loops over
644  // i and j. i == 0 and j == 0 are special cases; they mean, no
645  // alternate is chosen.
646  for (int32 i = 0; i < num_lengths; i++) {
647  for (int32 j = 0; j < num_lengths; j++) {
648  std::vector<int32> vec;
649  if (i > 0)
650  vec.push_back(config_.num_frames[i]);
651  if (j > 0)
652  vec.push_back(config_.num_frames[j]);
653  int32 n = 0;
654  while (DefaultDurationOfSplit(vec) <= default_duration_ceiling) {
655  if (!vec.empty()) // Don't allow the empty vector as a split.
656  splits_set.insert(vec);
657  n++;
658  vec.push_back(primary_length);
659  std::sort(vec.begin(), vec.end());
660  }
661  }
662  }
663  for (SetType::const_iterator iter = splits_set.begin();
664  iter != splits_set.end(); ++iter)
665  splits->push_back(*iter);
666  std::sort(splits->begin(), splits->end()); // make the order deterministic,
667  // for consistency of output
668  // between runs and C libraries.
669 }
struct rnnlm::@11::@12 n
const ExampleGenerationConfig & config_
float DefaultDurationOfSplit(const std::vector< int32 > &split) const
bool LengthsMatch ( const std::string &  utt,
int32  utterance_length,
int32  supervision_length,
int32  length_tolerance = 0 
) const

Definition at line 547 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, ExampleGenerationConfig::frame_subsampling_factor, and KALDI_WARN.

Referenced by kaldi::nnet3::ProcessFile().

550  {
552  expected_supervision_length = (utterance_length + sf - 1) / sf;
553  if (std::abs(supervision_length - expected_supervision_length)
554  <= length_tolerance) {
555  return true;
556  } else {
557  if (sf == 1) {
558  KALDI_WARN << "Supervision does not have expected length for utterance "
559  << utt << ": expected length = " << utterance_length
560  << ", got " << supervision_length;
561  } else {
562  KALDI_WARN << "Supervision does not have expected length for utterance "
563  << utt << ": expected length = (" << utterance_length
564  << " + " << sf << " - 1) / " << sf << " = "
565  << expected_supervision_length
566  << ", got: " << supervision_length
567  << " (note: --frame-subsampling-factor=" << sf << ")";
568  }
569  return false;
570  }
571 }
#define KALDI_WARN
Definition: kaldi-error.h:130
const ExampleGenerationConfig & config_
int32 MaxUtteranceLength ( ) const
private

Definition at line 610 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, rnnlm::i, KALDI_ASSERT, and ExampleGenerationConfig::num_frames.

Referenced by UtteranceSplitter::InitSplitForLength(), and UtteranceSplitter::InitSplits().

610  {
611  int32 num_lengths = config_.num_frames.size();
612  KALDI_ASSERT(num_lengths > 0);
613  // 'primary_length' is the first-specified num-frames.
614  // It's the only chunk that may be repeated an arbitrary number
615  // of times.
616  int32 primary_length = config_.num_frames[0],
617  max_length = primary_length;
618  for (int32 i = 0; i < num_lengths; i++) {
620  max_length = std::max(config_.num_frames[i], max_length);
621  }
622  return 2 * max_length + primary_length;
623 }
const ExampleGenerationConfig & config_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
void SetOutputWeights ( int32  utterance_length,
std::vector< ChunkTimeInfo > *  chunk_info 
) const
private

Definition at line 871 of file nnet-example-utils.cc.

References UtteranceSplitter::config_, count, ChunkTimeInfo::first_frame, ExampleGenerationConfig::frame_subsampling_factor, rnnlm::i, ChunkTimeInfo::num_frames, and ChunkTimeInfo::output_weights.

Referenced by UtteranceSplitter::GetChunksForUtterance().

873  {
875  int32 num_output_frames = (utterance_length + sf - 1) / sf;
876  // num_output_frames is the number of frames of supervision. 'count[t]' will
877  // be the number of chunks that this output-frame t appears in. Note: the
878  // 'first_frame' and 'num_frames' members of ChunkTimeInfo will always be
879  // multiples of frame_subsampling_factor.
880  std::vector<int32> count(num_output_frames, 0);
881  int32 num_chunks = chunk_info->size();
882  for (int32 i = 0; i < num_chunks; i++) {
883  ChunkTimeInfo &chunk = (*chunk_info)[i];
884  for (int32 t = chunk.first_frame / sf;
885  t < (chunk.first_frame + chunk.num_frames) / sf;
886  t++)
887  count[t]++;
888  }
889  for (int32 i = 0; i < num_chunks; i++) {
890  ChunkTimeInfo &chunk = (*chunk_info)[i];
891  chunk.output_weights.resize(chunk.num_frames / sf);
892  int32 t_start = chunk.first_frame / sf;
893  for (int32 t = t_start;
894  t < (chunk.first_frame + chunk.num_frames) / sf;
895  t++)
896  chunk.output_weights[t - t_start] = 1.0 / count[t];
897  }
898 }
const size_t count
const ExampleGenerationConfig & config_

Member Data Documentation

std::map<int32, int32> chunk_size_to_count_
private
std::vector<std::vector<std::vector<int32> > > splits_for_length_
private
int64 total_frames_in_chunks_
private
int64 total_frames_overlap_
private
int64 total_input_frames_
private
int64 total_num_chunks_
private
int32 total_num_utterances_
private

The documentation for this class was generated from the following files: