online-feature.cc
Go to the documentation of this file.
1 // feat/online-feature.cc
2 
3 // Copyright 2013 Johns Hopkins University (author: Daniel Povey)
4 // 2014 Yanqing Sun, Junjie Wang,
5 // Daniel Povey, Korbinian Riedhammer
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #include "feat/online-feature.h"
23 #include "transform/cmvn.h"
24 
25 namespace kaldi {
26 
28  items_to_hold_(items_to_hold == 0 ? -1 : items_to_hold),
29  first_available_index_(0) {
30 }
31 
33  for (auto *item : items_) {
34  delete item;
35  }
36 }
37 
39  if (index < first_available_index_) {
40  KALDI_ERR << "Attempted to retrieve feature vector that was "
41  "already removed by the RecyclingVector (index = "
42  << index << "; "
43  << "first_available_index = " << first_available_index_ << "; "
44  << "size = " << Size() << ")";
45  }
46  // 'at' does size checking.
47  return items_.at(index - first_available_index_);
48 }
49 
51  if (items_.size() == items_to_hold_) {
52  delete items_.front();
53  items_.pop_front();
55  }
56  items_.push_back(item);
57 }
58 
59 int RecyclingVector::Size() const {
60  return first_available_index_ + items_.size();
61 }
62 
63 template <class C>
65  VectorBase<BaseFloat> *feat) {
66  feat->CopyFromVec(*(features_.At(frame)));
67 };
68 
69 template <class C>
71  const typename C::Options &opts):
72  computer_(opts), window_function_(computer_.GetFrameOptions()),
73  features_(opts.frame_opts.max_feature_vectors),
74  input_finished_(false), waveform_offset_(0) {
75  // RE the following assert: search for ONLINE_IVECTOR_LIMIT in
76  // online-ivector-feature.cc.
77  // Casting to uint32, an unsigned type, means that -1 would be treated
78  // as `very large`.
79  KALDI_ASSERT(static_cast<uint32>(opts.frame_opts.max_feature_vectors) > 200);
80 }
81 
82 
83 template <class C>
85  BaseFloat sampling_rate) {
86  BaseFloat expected_sampling_rate = computer_.GetFrameOptions().samp_freq;
87 
88  if (resampler_ != nullptr) {
89  KALDI_ASSERT(resampler_->GetInputSamplingRate() == sampling_rate);
90  KALDI_ASSERT(resampler_->GetOutputSamplingRate() == expected_sampling_rate);
91  } else if (((sampling_rate > expected_sampling_rate) &&
92  computer_.GetFrameOptions().allow_downsample) ||
93  ((sampling_rate < expected_sampling_rate) &&
94  computer_.GetFrameOptions().allow_upsample)) {
95  resampler_.reset(new LinearResample(
96  sampling_rate, expected_sampling_rate,
97  std::min(sampling_rate / 2, expected_sampling_rate / 2), 6));
98  } else if (sampling_rate != expected_sampling_rate) {
99  KALDI_ERR << "Sampling frequency mismatch, expected "
100  << expected_sampling_rate << ", got " << sampling_rate
101  << "\nPerhaps you want to use the options "
102  "--allow_{upsample,downsample}";
103  }
104 }
105 
106 template <class C>
108  if (resampler_ != nullptr) {
109  // There may be a few samples left once we flush the resampler_ object, telling it
110  // that the file has finished. This should rarely make any difference.
111  Vector<BaseFloat> appended_wave;
112  Vector<BaseFloat> resampled_wave;
113  resampler_->Resample(appended_wave, true, &resampled_wave);
114 
115  if (resampled_wave.Dim() != 0) {
116  appended_wave.Resize(waveform_remainder_.Dim() +
117  resampled_wave.Dim());
118  if (waveform_remainder_.Dim() != 0)
119  appended_wave.Range(0, waveform_remainder_.Dim())
120  .CopyFromVec(waveform_remainder_);
121  appended_wave.Range(waveform_remainder_.Dim(), resampled_wave.Dim())
122  .CopyFromVec(resampled_wave);
123  waveform_remainder_.Swap(&appended_wave);
124  }
125  }
126  input_finished_ = true;
127  ComputeFeatures();
128 }
129 
130 template <class C>
132  BaseFloat sampling_rate, const VectorBase<BaseFloat> &original_waveform) {
133  if (original_waveform.Dim() == 0)
134  return; // Nothing to do.
135  if (input_finished_)
136  KALDI_ERR << "AcceptWaveform called after InputFinished() was called.";
137 
138  Vector<BaseFloat> appended_wave;
139  Vector<BaseFloat> resampled_wave;
140 
141  const VectorBase<BaseFloat> *waveform;
142 
143  MaybeCreateResampler(sampling_rate);
144  if (resampler_ == nullptr) {
145  waveform = &original_waveform;
146  } else {
147  resampler_->Resample(original_waveform, false, &resampled_wave);
148  waveform = &resampled_wave;
149  }
150 
151  appended_wave.Resize(waveform_remainder_.Dim() + waveform->Dim());
152  if (waveform_remainder_.Dim() != 0)
153  appended_wave.Range(0, waveform_remainder_.Dim())
154  .CopyFromVec(waveform_remainder_);
155  appended_wave.Range(waveform_remainder_.Dim(), waveform->Dim())
156  .CopyFromVec(*waveform);
157  waveform_remainder_.Swap(&appended_wave);
158  ComputeFeatures();
159 }
160 
161 template <class C>
163  const FrameExtractionOptions &frame_opts = computer_.GetFrameOptions();
164  int64 num_samples_total = waveform_offset_ + waveform_remainder_.Dim();
165  int32 num_frames_old = features_.Size(),
166  num_frames_new = NumFrames(num_samples_total, frame_opts,
168  KALDI_ASSERT(num_frames_new >= num_frames_old);
169 
170  Vector<BaseFloat> window;
171  bool need_raw_log_energy = computer_.NeedRawLogEnergy();
172  for (int32 frame = num_frames_old; frame < num_frames_new; frame++) {
173  BaseFloat raw_log_energy = 0.0;
175  frame_opts, window_function_, &window,
176  need_raw_log_energy ? &raw_log_energy : NULL);
177  Vector<BaseFloat> *this_feature = new Vector<BaseFloat>(computer_.Dim(),
178  kUndefined);
179  // note: this online feature-extraction code does not support VTLN.
180  BaseFloat vtln_warp = 1.0;
181  computer_.Compute(raw_log_energy, vtln_warp, &window, this_feature);
182  features_.PushBack(this_feature);
183  }
184  // OK, we will now discard any portion of the signal that will not be
185  // necessary to compute frames in the future.
186  int64 first_sample_of_next_frame = FirstSampleOfFrame(num_frames_new,
187  frame_opts);
188  int32 samples_to_discard = first_sample_of_next_frame - waveform_offset_;
189  if (samples_to_discard > 0) {
190  // discard the leftmost part of the waveform that we no longer need.
191  int32 new_num_samples = waveform_remainder_.Dim() - samples_to_discard;
192  if (new_num_samples <= 0) {
193  // odd, but we'll try to handle it.
194  waveform_offset_ += waveform_remainder_.Dim();
195  waveform_remainder_.Resize(0);
196  } else {
197  Vector<BaseFloat> new_remainder(new_num_samples);
198  new_remainder.CopyFromVec(waveform_remainder_.Range(samples_to_discard,
199  new_num_samples));
200  waveform_offset_ += samples_to_discard;
201  waveform_remainder_.Swap(&new_remainder);
202  }
203  }
204 }
205 
206 // instantiate the templates defined here for MFCC, PLP and filterbank classes.
210 
212  speaker_cmvn_stats(other.speaker_cmvn_stats),
213  global_cmvn_stats(other.global_cmvn_stats),
214  frozen_state(other.frozen_state) { }
215 
216 void OnlineCmvnState::Write(std::ostream &os, bool binary) const {
217  WriteToken(os, binary, "<OnlineCmvnState>"); // magic string.
218  WriteToken(os, binary, "<SpeakerCmvnStats>");
219  speaker_cmvn_stats.Write(os, binary);
220  WriteToken(os, binary, "<GlobalCmvnStats>");
221  global_cmvn_stats.Write(os, binary);
222  WriteToken(os, binary, "<FrozenState>");
223  frozen_state.Write(os, binary);
224  WriteToken(os, binary, "</OnlineCmvnState>");
225 }
226 
227 void OnlineCmvnState::Read(std::istream &is, bool binary) {
228  ExpectToken(is, binary, "<OnlineCmvnState>"); // magic string.
229  ExpectToken(is, binary, "<SpeakerCmvnStats>");
230  speaker_cmvn_stats.Read(is, binary);
231  ExpectToken(is, binary, "<GlobalCmvnStats>");
232  global_cmvn_stats.Read(is, binary);
233  ExpectToken(is, binary, "<FrozenState>");
234  frozen_state.Read(is, binary);
235  ExpectToken(is, binary, "</OnlineCmvnState>");
236 }
237 
239  const OnlineCmvnState &cmvn_state,
241  opts_(opts), temp_stats_(2, src->Dim() + 1),
242  temp_feats_(src->Dim()), temp_feats_dbl_(src->Dim()),
243  src_(src) {
244  SetState(cmvn_state);
245  if (!SplitStringToIntegers(opts.skip_dims, ":", false, &skip_dims_))
246  KALDI_ERR << "Bad --skip-dims option (should be colon-separated list of "
247  << "integers)";
248 }
249 
252  opts_(opts), temp_stats_(2, src->Dim() + 1),
253  temp_feats_(src->Dim()), temp_feats_dbl_(src->Dim()),
254  src_(src) {
255  if (!SplitStringToIntegers(opts.skip_dims, ":", false, &skip_dims_))
256  KALDI_ERR << "Bad --skip-dims option (should be colon-separated list of "
257  << "integers)";
258 }
259 
260 
262  int32 *cached_frame,
263  MatrixBase<double> *stats) {
264  KALDI_ASSERT(frame >= 0);
266  // look for a cached frame on a previous frame as close as possible in time
267  // to "frame". Return if we get one.
268  for (int32 t = frame; t >= 0 && t >= frame - opts_.ring_buffer_size; t--) {
269  if (t % opts_.modulus == 0) {
270  // if this frame should be cached in cached_stats_modulo_, then
271  // we'll look there, and we won't go back any further in time.
272  break;
273  }
274  int32 index = t % opts_.ring_buffer_size;
275  if (cached_stats_ring_[index].first == t) {
276  *cached_frame = t;
277  stats->CopyFromMat(cached_stats_ring_[index].second);
278  return;
279  }
280  }
281  int32 n = frame / opts_.modulus;
282  if (n >= cached_stats_modulo_.size()) {
283  if (cached_stats_modulo_.size() == 0) {
284  *cached_frame = -1;
285  stats->SetZero();
286  return;
287  } else {
288  n = static_cast<int32>(cached_stats_modulo_.size() - 1);
289  }
290  }
291  *cached_frame = n * opts_.modulus;
293  stats->CopyFromMat(*(cached_stats_modulo_[n]));
294 }
295 
296 // Initialize ring buffer for caching stats.
298  if (cached_stats_ring_.empty() && opts_.ring_buffer_size > 0) {
299  Matrix<double> temp(2, this->Dim() + 1);
301  std::pair<int32, Matrix<double> >(-1, temp));
302  }
303 }
304 
306  KALDI_ASSERT(frame >= 0);
307  if (frame % opts_.modulus == 0) { // store in cached_stats_modulo_.
308  int32 n = frame / opts_.modulus;
309  if (n >= cached_stats_modulo_.size()) {
310  // The following assert is a limitation on in what order you can call
311  // CacheFrame. Fortunately the calling code always calls it in sequence,
312  // which it has to because you need a previous frame to compute the
313  // current one.
314  KALDI_ASSERT(n == cached_stats_modulo_.size());
315  cached_stats_modulo_.push_back(new Matrix<double>(stats));
316  } else {
317  KALDI_WARN << "Did not expect to reach this part of code.";
318  // do what seems right, but we shouldn't get here.
319  cached_stats_modulo_[n]->CopyFromMat(stats);
320  }
321  } else { // store in the ring buffer.
323  if (!cached_stats_ring_.empty()) {
324  int32 index = frame % cached_stats_ring_.size();
325  cached_stats_ring_[index].first = frame;
326  cached_stats_ring_[index].second.CopyFromMat(stats);
327  }
328  }
329 }
330 
332  for (size_t i = 0; i < cached_stats_modulo_.size(); i++)
333  delete cached_stats_modulo_[i];
334  cached_stats_modulo_.clear();
335 }
336 
338  MatrixBase<double> *stats_out) {
339  KALDI_ASSERT(frame >= 0 && frame < src_->NumFramesReady());
340 
341  int32 dim = this->Dim(), cur_frame;
342  GetMostRecentCachedFrame(frame, &cur_frame, stats_out);
343 
345  Vector<double> &feats_dbl(temp_feats_dbl_);
346  while (cur_frame < frame) {
347  cur_frame++;
348  src_->GetFrame(cur_frame, &feats);
349  feats_dbl.CopyFromVec(feats);
350  stats_out->Row(0).Range(0, dim).AddVec(1.0, feats_dbl);
352  stats_out->Row(1).Range(0, dim).AddVec2(1.0, feats_dbl);
353  (*stats_out)(0, dim) += 1.0;
354  // it's a sliding buffer; a frame at the back may be
355  // leaving the buffer so we have to subtract that.
356  int32 prev_frame = cur_frame - opts_.cmn_window;
357  if (prev_frame >= 0) {
358  // we need to subtract frame prev_f from the stats.
359  src_->GetFrame(prev_frame, &feats);
360  feats_dbl.CopyFromVec(feats);
361  stats_out->Row(0).Range(0, dim).AddVec(-1.0, feats_dbl);
363  stats_out->Row(1).Range(0, dim).AddVec2(-1.0, feats_dbl);
364  (*stats_out)(0, dim) -= 1.0;
365  }
366  CacheFrame(cur_frame, (*stats_out));
367  }
368 }
369 
370 
371 // static
373  const MatrixBase<double> &global_stats,
374  const OnlineCmvnOptions &opts,
375  MatrixBase<double> *stats) {
376  if (speaker_stats.NumRows() == 2 && !opts.normalize_variance) {
377  // this is just for efficiency: don't operate on the variance if it's not
378  // needed.
379  int32 cols = speaker_stats.NumCols(); // dim + 1
380  SubMatrix<double> stats_temp(*stats, 0, 1, 0, cols);
381  SmoothOnlineCmvnStats(speaker_stats.RowRange(0, 1),
382  global_stats.RowRange(0, 1),
383  opts, &stats_temp);
384  return;
385  }
386  int32 dim = stats->NumCols() - 1;
387  double cur_count = (*stats)(0, dim);
388  // If count exceeded cmn_window it would be an error in how "window_stats"
389  // was accumulated.
390  KALDI_ASSERT(cur_count <= 1.001 * opts.cmn_window);
391  if (cur_count >= opts.cmn_window)
392  return;
393  if (speaker_stats.NumRows() != 0) { // if we have speaker stats..
394  double count_from_speaker = opts.cmn_window - cur_count,
395  speaker_count = speaker_stats(0, dim);
396  if (count_from_speaker > opts.speaker_frames)
397  count_from_speaker = opts.speaker_frames;
398  if (count_from_speaker > speaker_count)
399  count_from_speaker = speaker_count;
400  if (count_from_speaker > 0.0)
401  stats->AddMat(count_from_speaker / speaker_count,
402  speaker_stats);
403  cur_count = (*stats)(0, dim);
404  }
405  if (cur_count >= opts.cmn_window)
406  return;
407  if (global_stats.NumRows() != 0) {
408  double count_from_global = opts.cmn_window - cur_count,
409  global_count = global_stats(0, dim);
410  KALDI_ASSERT(global_count > 0.0);
411  if (count_from_global > opts.global_frames)
412  count_from_global = opts.global_frames;
413  if (count_from_global > 0.0)
414  stats->AddMat(count_from_global / global_count,
415  global_stats);
416  } else {
417  KALDI_ERR << "Global CMN stats are required";
418  }
419 }
420 
422  VectorBase<BaseFloat> *feat) {
423  src_->GetFrame(frame, feat);
424  KALDI_ASSERT(feat->Dim() == this->Dim());
425  int32 dim = feat->Dim();
426  Matrix<double> &stats(temp_stats_);
427  stats.Resize(2, dim + 1, kUndefined); // Will do nothing if size was correct.
428  if (frozen_state_.NumRows() != 0) { // the CMVN state has been frozen.
429  stats.CopyFromMat(frozen_state_);
430  } else {
431  // first get the raw CMVN stats (this involves caching..)
432  this->ComputeStatsForFrame(frame, &stats);
433  // now smooth them.
436  opts_,
437  &stats);
438  }
439 
440  if (!skip_dims_.empty())
442 
443  // call the function ApplyCmvn declared in ../transform/cmvn.h, which
444  // requires a matrix.
445  // 1 row; num-cols == dim; stride == dim.
446  SubMatrix<BaseFloat> feat_mat(feat->Data(), 1, dim, dim);
447  // the function ApplyCmvn takes a matrix, so form a one-row matrix to give it.
448  if (opts_.normalize_mean)
449  ApplyCmvn(stats, opts_.normalize_variance, &feat_mat);
450  else
452 }
453 
454 void OnlineCmvn::Freeze(int32 cur_frame) {
455  int32 dim = this->Dim();
456  Matrix<double> stats(2, dim + 1);
457  // get the raw CMVN stats
458  this->ComputeStatsForFrame(cur_frame, &stats);
459  // now smooth them.
462  opts_,
463  &stats);
464  this->frozen_state_ = stats;
465 }
466 
468  OnlineCmvnState *state_out) {
469  *state_out = this->orig_state_;
470  { // This block updates state_out->speaker_cmvn_stats
471  int32 dim = this->Dim();
472  if (state_out->speaker_cmvn_stats.NumRows() == 0)
473  state_out->speaker_cmvn_stats.Resize(2, dim + 1);
474  Vector<BaseFloat> feat(dim);
475  Vector<double> feat_dbl(dim);
476  for (int32 t = 0; t <= cur_frame; t++) {
477  src_->GetFrame(t, &feat);
478  feat_dbl.CopyFromVec(feat);
479  state_out->speaker_cmvn_stats(0, dim) += 1.0;
480  state_out->speaker_cmvn_stats.Row(0).Range(0, dim).AddVec(1.0, feat_dbl);
481  state_out->speaker_cmvn_stats.Row(1).Range(0, dim).AddVec2(1.0, feat_dbl);
482  }
483  }
484  // Store any frozen state (the effect of the user possibly
485  // having called Freeze().
486  state_out->frozen_state = frozen_state_;
487 }
488 
489 void OnlineCmvn::SetState(const OnlineCmvnState &cmvn_state) {
491  "You cannot call SetState() after processing data.");
492  orig_state_ = cmvn_state;
493  frozen_state_ = cmvn_state.frozen_state;
494 }
495 
497  int32 num_frames = src_->NumFramesReady();
498  if (num_frames > 0 && src_->IsLastFrame(num_frames - 1))
499  return num_frames;
500  else
501  return std::max<int32>(0, num_frames - right_context_);
502 }
503 
505  KALDI_ASSERT(left_context_ >= 0 && right_context_ >= 0);
506  KALDI_ASSERT(frame >= 0 && frame < NumFramesReady());
507  int32 dim_in = src_->Dim();
508  KALDI_ASSERT(feat->Dim() == dim_in * (1 + left_context_ + right_context_));
509  int32 T = src_->NumFramesReady();
510  for (int32 t2 = frame - left_context_; t2 <= frame + right_context_; t2++) {
511  int32 t2_limited = t2;
512  if (t2_limited < 0) t2_limited = 0;
513  if (t2_limited >= T) t2_limited = T - 1;
514  int32 n = t2 - (frame - left_context_); // 0 for left-most frame,
515  // increases to the right.
516  SubVector<BaseFloat> part(*feat, n * dim_in, dim_in);
517  src_->GetFrame(t2_limited, &part);
518  }
519 }
520 
523  src_(src) {
524  int32 src_dim = src_->Dim();
525  if (transform.NumCols() == src_dim) { // Linear transform
526  linear_term_ = transform;
527  offset_.Resize(transform.NumRows()); // Resize() will zero it.
528  } else if (transform.NumCols() == src_dim + 1) { // Affine transform
529  linear_term_ = transform.Range(0, transform.NumRows(), 0, src_dim);
530  offset_.Resize(transform.NumRows());
531  offset_.CopyColFromMat(transform, src_dim);
532  } else {
533  KALDI_ERR << "Dimension mismatch: source features have dimension "
534  << src_dim << " and LDA #cols is " << transform.NumCols();
535  }
536 }
537 
539  Vector<BaseFloat> input_feat(linear_term_.NumCols());
540  src_->GetFrame(frame, &input_feat);
541  feat->CopyFromVec(offset_);
542  feat->AddMatVec(1.0, linear_term_, kNoTrans, input_feat, 1.0);
543 }
544 
546  const std::vector<int32> &frames, MatrixBase<BaseFloat> *feats) {
547  KALDI_ASSERT(static_cast<int32>(frames.size()) == feats->NumRows());
548  int32 num_frames = feats->NumRows(),
549  input_dim = linear_term_.NumCols();
550  Matrix<BaseFloat> input_feats(num_frames, input_dim, kUndefined);
551  src_->GetFrames(frames, &input_feats);
552  feats->CopyRowsFromVec(offset_);
553  feats->AddMatMat(1.0, input_feats, kNoTrans, linear_term_, kTrans, 1.0);
554 }
555 
556 
558  int32 src_dim = src_->Dim();
559  return src_dim * (1 + opts_.order);
560 }
561 
563  int32 num_frames = src_->NumFramesReady(),
564  context = opts_.order * opts_.window;
565  // "context" is the number of frames on the left or (more relevant
566  // here) right which we need in order to produce the output.
567  if (num_frames > 0 && src_->IsLastFrame(num_frames-1))
568  return num_frames;
569  else
570  return std::max<int32>(0, num_frames - context);
571 }
572 
574  VectorBase<BaseFloat> *feat) {
575  KALDI_ASSERT(frame >= 0 && frame < NumFramesReady());
576  KALDI_ASSERT(feat->Dim() == Dim());
577  // We'll produce a temporary matrix containing the features we want to
578  // compute deltas on, but truncated to the necessary context.
579  int32 context = opts_.order * opts_.window;
580  int32 left_frame = frame - context,
581  right_frame = frame + context,
582  src_frames_ready = src_->NumFramesReady();
583  if (left_frame < 0) left_frame = 0;
584  if (right_frame >= src_frames_ready)
585  right_frame = src_frames_ready - 1;
586  KALDI_ASSERT(right_frame >= left_frame);
587  int32 temp_num_frames = right_frame + 1 - left_frame,
588  src_dim = src_->Dim();
589  Matrix<BaseFloat> temp_src(temp_num_frames, src_dim);
590  for (int32 t = left_frame; t <= right_frame; t++) {
591  SubVector<BaseFloat> temp_row(temp_src, t - left_frame);
592  src_->GetFrame(t, &temp_row);
593  }
594  int32 temp_t = frame - left_frame; // temp_t is the offset of frame "frame"
595  // within temp_src
596  delta_features_.Process(temp_src, temp_t, feat);
597 }
598 
599 
602  src_(src), opts_(opts), delta_features_(opts) { }
603 
605  KALDI_ASSERT(frame >= 0);
606  if (static_cast<size_t>(frame) < cache_.size() && cache_[frame] != NULL) {
607  feat->CopyFromVec(*(cache_[frame]));
608  } else {
609  if (static_cast<size_t>(frame) >= cache_.size())
610  cache_.resize(frame + 1, NULL);
611  int32 dim = this->Dim();
612  cache_[frame] = new Vector<BaseFloat>(dim);
613  // The following call will crash if frame "frame" is not ready.
614  src_->GetFrame(frame, cache_[frame]);
615  feat->CopyFromVec(*(cache_[frame]));
616  }
617 }
618 
620  const std::vector<int32> &frames, MatrixBase<BaseFloat> *feats) {
621  int32 num_frames = frames.size();
622  // non_cached_frames will be the subset of 't' values in 'frames' which were
623  // not previously cached, which we therefore need to get from src_.
624  std::vector<int32> non_cached_frames;
625  // 'non_cached_indexes' stores the indexes 'i' into 'frames' corresponding to
626  // the corresponding frames in 'non_cached_frames'.
627  std::vector<int32> non_cached_indexes;
628  non_cached_frames.reserve(frames.size());
629  non_cached_indexes.reserve(frames.size());
630  for (int32 i = 0; i < num_frames; i++) {
631  int32 t = frames[i];
632  if (static_cast<size_t>(t) < cache_.size() && cache_[t] != NULL) {
633  feats->Row(i).CopyFromVec(*(cache_[t]));
634  } else {
635  non_cached_frames.push_back(t);
636  non_cached_indexes.push_back(i);
637  }
638  }
639  if (non_cached_frames.empty())
640  return;
641  int32 num_non_cached_frames = non_cached_frames.size(),
642  dim = this->Dim();
643  Matrix<BaseFloat> non_cached_feats(num_non_cached_frames, dim,
644  kUndefined);
645  src_->GetFrames(non_cached_frames, &non_cached_feats);
646  for (int32 i = 0; i < num_non_cached_frames; i++) {
647  int32 t = non_cached_frames[i];
648  if (static_cast<size_t>(t) < cache_.size() && cache_[t] != NULL) {
649  // We can reach this point due to repeat indexes in 'non_cached_frames'.
650  feats->Row(non_cached_indexes[i]).CopyFromVec(*(cache_[t]));
651  } else {
652  SubVector<BaseFloat> this_feat(non_cached_feats, i);
653  feats->Row(non_cached_indexes[i]).CopyFromVec(this_feat);
654  if (static_cast<size_t>(t) >= cache_.size())
655  cache_.resize(t + 1, NULL);
656  cache_[t] = new Vector<BaseFloat>(this_feat);
657  }
658  }
659 }
660 
661 
663  for (size_t i = 0; i < cache_.size(); i++)
664  delete cache_[i];
665  cache_.resize(0);
666 }
667 
668 
670  KALDI_ASSERT(feat->Dim() == Dim());
671 
672  SubVector<BaseFloat> feat1(*feat, 0, src1_->Dim());
673  SubVector<BaseFloat> feat2(*feat, src1_->Dim(), src2_->Dim());
674  src1_->GetFrame(frame, &feat1);
675  src2_->GetFrame(frame, &feat2);
676 };
677 
678 
679 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
OnlineTransform(const MatrixBase< BaseFloat > &transform, OnlineFeatureInterface *src)
The transform can be a linear transform, or an affine transform where the last column is the offset...
void MaybeCreateResampler(BaseFloat sampling_rate)
void Write(std::ostream &out, bool binary) const
write to stream.
RecyclingVector(int items_to_hold=-1)
By default it does not remove any elements.
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
Matrix< double > speaker_cmvn_stats
void Write(std::ostream &os, bool binary) const
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
virtual int32 Dim() const
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
Vector< double > temp_feats_dbl_
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)=0
Gets the feature vector for this frame.
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
Matrix< double > frozen_state
kaldi::int32 int32
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
OnlineCmvn(const OnlineCmvnOptions &opts, const OnlineCmvnState &cmvn_state, OnlineFeatureInterface *src)
Initializer that sets the cmvn state.
void PushBack(Vector< BaseFloat > *item)
The ownership of the item is passed to this collection - do not delete the item.
Matrix< double > frozen_state_
Vector< BaseFloat > waveform_remainder_
void CacheFrame(int32 frame, const MatrixBase< double > &stats)
Cache this frame of stats.
int Size() const
This method returns the size as if no "recycling" had happened, i.e.
void ExtractWindow(int64 sample_offset, const VectorBase< BaseFloat > &wave, int32 f, const FrameExtractionOptions &opts, const FeatureWindowFunction &window_function, Vector< BaseFloat > *window, BaseFloat *log_energy_pre_window)
FeatureWindowFunction window_function_
void GetState(int32 cur_frame, OnlineCmvnState *cmvn_state)
OnlineFeatureInterface * src_
virtual void AcceptWaveform(BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)
This would be called from the application, when you get more wave data.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
Matrix< double > temp_stats_
virtual void GetFrames(const std::vector< int32 > &frames, MatrixBase< BaseFloat > *feats)
This is like GetFrame() but for a collection of frames.
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void InitRingBufferIfNeeded()
Initialize ring buffer for caching stats.
Vector< BaseFloat > temp_feats_
void Freeze(int32 cur_frame)
void Read(std::istream &is, bool binary)
OnlineGenericBaseFeature(const typename C::Options &opts)
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
std::vector< std::pair< int32, Matrix< double > > > cached_stats_ring_
virtual void GetFrames(const std::vector< int32 > &frames, MatrixBase< BaseFloat > *feats)
This is like GetFrame() but for a collection of frames.
Matrix< BaseFloat > linear_term_
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
void SetState(const OnlineCmvnState &cmvn_state)
int64 FirstSampleOfFrame(int32 frame, const FrameExtractionOptions &opts)
struct rnnlm::@11::@12 n
Struct OnlineCmvnState stores the state of CMVN adaptation between utterances (but not the state of t...
virtual bool IsLastFrame(int32 frame) const =0
Returns true if this is the last frame.
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
#define KALDI_ERR
Definition: kaldi-error.h:147
int32 NumFrames(int64 num_samples, const FrameExtractionOptions &opts, bool flush)
This function returns the number of frames that we can extract from a wave file with the given number...
virtual int32 NumFramesReady() const
returns the feature dimension.
std::vector< int32 > skip_dims_
#define KALDI_WARN
Definition: kaldi-error.h:150
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70
virtual void InputFinished()
InputFinished() tells the class you won&#39;t be providing any more waveform.
OnlineCmvnState orig_state_
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
OnlineCmvnOptions opts_
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void SetZero()
Sets matrix to zero.
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
virtual int32 NumFramesReady() const
returns the feature dimension.
std::unique_ptr< LinearResample > resampler_
virtual void GetFrames(const std::vector< int32 > &frames, MatrixBase< BaseFloat > *feats)
This is like GetFrame() but for a collection of frames.
SubMatrix< Real > RowRange(const MatrixIndexT row_offset, const MatrixIndexT num_rows) const
Definition: kaldi-matrix.h:209
A class representing a vector.
Definition: kaldi-vector.h:406
std::deque< Vector< BaseFloat > * > items_
LinearResample is a special case of ArbitraryResample, where we want to resample a signal at linearly...
Definition: resample.h:147
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void ComputeStatsForFrame(int32 frame, MatrixBase< double > *stats)
Computes the raw CMVN stats for this frame, making use of (and updating if necessary) the cached stat...
static void SmoothOnlineCmvnStats(const MatrixBase< double > &speaker_stats, const MatrixBase< double > &global_stats, const OnlineCmvnOptions &opts, MatrixBase< double > *stats)
Smooth the CMVN stats "stats" (which are stored in the normal format as a 2 x (dim+1) matrix)...
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
OnlineDeltaFeature(const DeltaFeaturesOptions &opts, OnlineFeatureInterface *src)
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
std::vector< Matrix< double > * > cached_stats_modulo_
Matrix< double > global_cmvn_stats
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
void GetMostRecentCachedFrame(int32 frame, int32 *cached_frame, MatrixBase< double > *stats)
Get the most recent cached frame of CMVN stats.
virtual int32 NumFramesReady() const
returns the feature dimension.
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
virtual int32 NumFramesReady() const
returns the feature dimension.
Vector< BaseFloat > offset_
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
This is a templated class for online feature extraction; it&#39;s templated on a class like MfccComputer ...
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
Vector< BaseFloat > * At(int index) const
The ownership is being retained by this collection - do not delete the item.
virtual int32 Dim() const
OnlineFeatureInterface * src_
virtual int32 Dim() const
void ApplyCmvn(const MatrixBase< double > &stats, bool var_norm, MatrixBase< BaseFloat > *feats)
Apply cepstral mean and variance normalization to a matrix of features.
Definition: cmvn.cc:64
Sub-matrix representation.
Definition: kaldi-matrix.h:988
virtual int32 NumFramesReady() const =0
returns the feature dimension.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
virtual int32 Dim() const =0
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
void FakeStatsForSomeDims(const std::vector< int32 > &dims, MatrixBase< double > *stats)
Modify the stats so that for some dimensions (specified in "dims"), we replace them with "fake" stats...
Definition: cmvn.cc:168
OnlineFeatureInterface * src_
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94