Collaboration diagram for OnlinePitchFeatureImpl:

[legend]

Public Member Functions
	OnlinePitchFeatureImpl (const PitchExtractionOptions &opts)

int32	Dim () const

BaseFloat	FrameShiftInSeconds () const

int32	NumFramesReady () const

bool	IsLastFrame (int32 frame) const

void	GetFrame (int32 frame, VectorBase< BaseFloat > *feat)

void	AcceptWaveform (BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)

void	InputFinished ()

	~OnlinePitchFeatureImpl ()

	OnlinePitchFeatureImpl (const OnlinePitchFeatureImpl &other)

Private Member Functions
int32	NumFramesAvailable (int64 num_downsampled_samples, bool snip_edges) const
	This function works out from the signal how many frames are currently available to process (this is called from inside AcceptWaveform()). More...

void	ExtractFrame (const VectorBase< BaseFloat > &downsampled_wave_part, int64 frame_index, VectorBase< BaseFloat > *window)
	This function extracts from the signal the samples numbered from "sample_index" (numbered in the full downsampled signal, not just this part), and of length equal to window->Dim(). More...

void	RecomputeBacktraces ()
	This function is called after we reach frame "recompute_frame", or when InputFinished() is called, whichever comes sooner. More...

void	UpdateRemainder (const VectorBase< BaseFloat > &downsampled_wave_part)
	This function updates downsampled_signal_remainder_, downsampled_samples_processed_, signal_sum_ and signal_sumsq_; it's called from AcceptWaveform(). More...

Private Attributes
PitchExtractionOptions	opts_

int32	nccf_first_lag_

int32	nccf_last_lag_

Vector< BaseFloat >	lags_

ArbitraryResample *	nccf_resampler_

LinearResample *	signal_resampler_

std::vector< PitchFrameInfo * >	frame_info_

std::vector< NccfInfo * >	nccf_info_

int32	frames_latency_

Vector< BaseFloat >	forward_cost_

double	forward_cost_remainder_

std::vector< std::pair< int32, BaseFloat > >	lag_nccf_

bool	input_finished_

double	signal_sumsq_
	sum-squared of previously processed parts of signal; used to get NCCF ballast term. More...

double	signal_sum_
	sum of previously processed parts of signal; used to do mean-subtraction when getting sum-squared, along with signal_sumsq_. More...

int64	downsampled_samples_processed_
	downsampled_samples_processed is the number of samples (after downsampling) that we got in previous calls to AcceptWaveform(). More...

Vector< BaseFloat >	downsampled_signal_remainder_
	This is a small remainder of the previous downsampled signal; it's used by ExtractFrame for frames near the boundary of two waveforms supplied to AcceptWaveform(). More...

Detailed Description

Definition at line 574 of file pitch-functions.cc.

Constructor & Destructor Documentation

◆ OnlinePitchFeatureImpl() [1/2]

OnlinePitchFeatureImpl ( const PitchExtractionOptions & opts )

explicit

Definition at line 715 of file pitch-functions.cc.

                                        :
     opts_(opts), forward_cost_remainder_(0.0), input_finished_(false),
     signal_sumsq_(0.0), signal_sum_(0.0), downsampled_samples_processed_(0) {
   signal_resampler_ = new LinearResample(opts.samp_freq, opts.resample_freq,
                                          opts.lowpass_cutoff,
                                          opts.lowpass_filter_width);
 
   double outer_min_lag = 1.0 / opts.max_f0 -
       (opts.upsample_filter_width/(2.0 * opts.resample_freq));
   double outer_max_lag = 1.0 / opts.min_f0 +
       (opts.upsample_filter_width/(2.0 * opts.resample_freq));
   nccf_first_lag_ = ceil(opts.resample_freq * outer_min_lag);
   nccf_last_lag_ = floor(opts.resample_freq * outer_max_lag);
 
   frames_latency_ = 0;  // will be set in AcceptWaveform()
 
   // Choose the lags at which we resample the NCCF.
   SelectLags(opts, &lags_);
 
   // upsample_cutoff is the filter cutoff for upsampling the NCCF, which is the
   // Nyquist of the resampling frequency.  The NCCF is (almost completely)
   // bandlimited to around "lowpass_cutoff" (1000 by default), and when the
   // spectrum of this bandlimited signal is convolved with the spectrum of an
   // impulse train with frequency "resample_freq", which are separated by 4kHz,
   // we get energy at -5000,-3000, -1000...1000, 3000..5000, etc.  Filtering at
   // half the Nyquist (2000 by default) is sufficient to get only the first
   // repetition.
   BaseFloat upsample_cutoff = opts.resample_freq * 0.5;
 
 
   Vector<BaseFloat> lags_offset(lags_);
   // lags_offset equals lags_ (which are the log-spaced lag values we want to
   // measure the NCCF at) with nccf_first_lag_ / opts.resample_freq subtracted
   // from each element, so we can treat the measured NCCF values as as starting
   // from sample zero in a signal that starts at the point start /
   // opts.resample_freq.  This is necessary because the ArbitraryResample code
   // assumes that the input signal starts from sample zero.
   lags_offset.Add(-nccf_first_lag_ / opts.resample_freq);
 
   int32 num_measured_lags = nccf_last_lag_ + 1 - nccf_first_lag_;
 
   nccf_resampler_ = new ArbitraryResample(num_measured_lags, opts.resample_freq,
                                           upsample_cutoff, lags_offset,
                                           opts.upsample_filter_width);
 
   // add a PitchInfo object for frame -1 (not a real frame).
   frame_info_.push_back(new PitchFrameInfo(lags_.Dim()));
   // zeroes forward_cost_; this is what we want for the fake frame -1.
   forward_cost_.Resize(lags_.Dim());
 }

◆ ~OnlinePitchFeatureImpl()

~OnlinePitchFeatureImpl ( )

Definition at line 1037 of file pitch-functions.cc.

References OnlinePitchFeatureImpl::frame_info_, rnnlm::i, OnlinePitchFeatureImpl::nccf_info_, OnlinePitchFeatureImpl::nccf_resampler_, and OnlinePitchFeatureImpl::signal_resampler_.

                                                 {
   delete nccf_resampler_;
   delete signal_resampler_;
   for (size_t i = 0; i < frame_info_.size(); i++)
     delete frame_info_[i];
   for (size_t i = 0; i < nccf_info_.size(); i++)
     delete nccf_info_[i];
 }

◆ OnlinePitchFeatureImpl() [2/2]

OnlinePitchFeatureImpl ( const OnlinePitchFeatureImpl & other )

Member Function Documentation

◆ AcceptWaveform()

void AcceptWaveform	(	BaseFloat	sampling_rate,
		const VectorBase< BaseFloat > &	waveform
	)

Definition at line 1046 of file pitch-functions.cc.

Referenced by OnlinePitchFeature::AcceptWaveform(), and OnlinePitchFeatureImpl::InputFinished().

                                        {
   // flush out the last few samples of input waveform only if input_finished_ ==
   // true.
   const bool flush = input_finished_;
 
   Vector<BaseFloat> downsampled_wave;
   signal_resampler_->Resample(wave, flush, &downsampled_wave);
 
   // these variables will be used to compute the root-mean-square value of the
   // signal for the ballast term.
   double cur_sumsq = signal_sumsq_, cur_sum = signal_sum_;
   int64 cur_num_samp = downsampled_samples_processed_,
       prev_frame_end_sample = 0;
   if (!opts_.nccf_ballast_online) {
     cur_sumsq += VecVec(downsampled_wave, downsampled_wave);
     cur_sum += downsampled_wave.Sum();
     cur_num_samp += downsampled_wave.Dim();
   }
 
   // end_frame is the total number of frames we can now process, including
   // previously processed ones.
   int32 end_frame = NumFramesAvailable(
       downsampled_samples_processed_ + downsampled_wave.Dim(), opts_.snip_edges);
   // "start_frame" is the first frame-index we process
   int32 start_frame = frame_info_.size() - 1,
       num_new_frames = end_frame - start_frame;
 
   if (num_new_frames == 0) {
     UpdateRemainder(downsampled_wave);
     return;
     // continuing to the rest of the code would generate
     // an error when sizing matrices with zero rows, and
     // anyway is a waste of time.
   }
 
   int32 num_measured_lags = nccf_last_lag_ + 1 - nccf_first_lag_,
       num_resampled_lags = lags_.Dim(),
       frame_shift = opts_.NccfWindowShift(),
       basic_frame_length = opts_.NccfWindowSize(),
       full_frame_length = basic_frame_length + nccf_last_lag_;
 
   Vector<BaseFloat> window(full_frame_length),
       inner_prod(num_measured_lags),
       norm_prod(num_measured_lags);
   Matrix<BaseFloat> nccf_pitch(num_new_frames, num_measured_lags),
       nccf_pov(num_new_frames, num_measured_lags);
 
   Vector<BaseFloat> cur_forward_cost(num_resampled_lags);
 
 
   // Because the resampling of the NCCF is more efficient when grouped together,
   // we first compute the NCCF for all frames, then resample as a matrix, then
   // do the Viterbi [that happens inside the constructor of PitchFrameInfo].
 
   for (int32 frame = start_frame; frame < end_frame; frame++) {
     // start_sample is index into the whole wave, not just this part.
     int64 start_sample;
     if (opts_.snip_edges) {
       // Usual case: offset starts at 0
       start_sample = static_cast<int64>(frame) * frame_shift;
     } else {
       // When we are not snipping the edges, the first offsets may be
       // negative. In this case we will pad with zeros, it should not impact
       // the pitch tracker.
       start_sample =
         static_cast<int64>((frame + 0.5) * frame_shift) - full_frame_length / 2;
     }
     ExtractFrame(downsampled_wave, start_sample, &window);
     if (opts_.nccf_ballast_online) {
       // use only up to end of current frame to compute root-mean-square value.
       // end_sample will be the sample-index into "downsampled_wave", so
       // not really comparable to start_sample.
       int64 end_sample = start_sample + full_frame_length -
           downsampled_samples_processed_;
       KALDI_ASSERT(end_sample > 0);  // or should have processed this frame last
                                      // time.  Note: end_sample is one past last
                                      // sample.
       if (end_sample > downsampled_wave.Dim()) {
         KALDI_ASSERT(input_finished_);
         end_sample = downsampled_wave.Dim();
       }
       SubVector<BaseFloat> new_part(downsampled_wave, prev_frame_end_sample,
                                     end_sample - prev_frame_end_sample);
       cur_num_samp += new_part.Dim();
       cur_sumsq += VecVec(new_part, new_part);
       cur_sum += new_part.Sum();
       prev_frame_end_sample = end_sample;
     }
     double mean_square = cur_sumsq / cur_num_samp -
         pow(cur_sum / cur_num_samp, 2.0);
 
     ComputeCorrelation(window, nccf_first_lag_, nccf_last_lag_,
                        basic_frame_length, &inner_prod, &norm_prod);
     double nccf_ballast_pov = 0.0,
         nccf_ballast_pitch = pow(mean_square * basic_frame_length, 2) *
              opts_.nccf_ballast,
         avg_norm_prod = norm_prod.Sum() / norm_prod.Dim();
     SubVector<BaseFloat> nccf_pitch_row(nccf_pitch, frame - start_frame);
     ComputeNccf(inner_prod, norm_prod, nccf_ballast_pitch,
                 &nccf_pitch_row);
     SubVector<BaseFloat> nccf_pov_row(nccf_pov, frame - start_frame);
     ComputeNccf(inner_prod, norm_prod, nccf_ballast_pov,
                 &nccf_pov_row);
     if (frame < opts_.recompute_frame)
       nccf_info_.push_back(new NccfInfo(avg_norm_prod, mean_square));
   }
 
   Matrix<BaseFloat> nccf_pitch_resampled(num_new_frames, num_resampled_lags);
   nccf_resampler_->Resample(nccf_pitch, &nccf_pitch_resampled);
   nccf_pitch.Resize(0, 0);  // no longer needed.
   Matrix<BaseFloat> nccf_pov_resampled(num_new_frames, num_resampled_lags);
   nccf_resampler_->Resample(nccf_pov, &nccf_pov_resampled);
   nccf_pov.Resize(0, 0);  // no longer needed.
 
   // We've finished dealing with the waveform so we can call UpdateRemainder
   // now; we need to call it before we possibly call RecomputeBacktraces()
   // below, which is why we don't do it at the very end.
   UpdateRemainder(downsampled_wave);
 
   std::vector<std::pair<int32, int32 > > index_info;
 
   for (int32 frame = start_frame; frame < end_frame; frame++) {
     int32 frame_idx = frame - start_frame;
     PitchFrameInfo *prev_info = frame_info_.back(),
         *cur_info = new PitchFrameInfo(prev_info);
     cur_info->SetNccfPov(nccf_pov_resampled.Row(frame_idx));
     cur_info->ComputeBacktraces(opts_, nccf_pitch_resampled.Row(frame_idx),
                                 lags_, forward_cost_, &index_info,
                                 &cur_forward_cost);
     forward_cost_.Swap(&cur_forward_cost);
     // Renormalize forward_cost so smallest element is zero.
     BaseFloat remainder = forward_cost_.Min();
     forward_cost_remainder_ += remainder;
     forward_cost_.Add(-remainder);
     frame_info_.push_back(cur_info);
     if (frame < opts_.recompute_frame)
       nccf_info_[frame]->nccf_pitch_resampled =
           nccf_pitch_resampled.Row(frame_idx);
     if (frame == opts_.recompute_frame - 1 && !opts_.nccf_ballast_online)
       RecomputeBacktraces();
   }
 
   // Trace back the best-path.
   int32 best_final_state;
   forward_cost_.Min(&best_final_state);
   lag_nccf_.resize(frame_info_.size() - 1);  // will keep any existing data.
   frame_info_.back()->SetBestState(best_final_state, lag_nccf_);
   frames_latency_ =
       frame_info_.back()->ComputeLatency(opts_.max_frames_latency);
   KALDI_VLOG(4) << "Latency is " << frames_latency_;
 }

◆ Dim()

int32 Dim ( ) const

inline

Definition at line 578 of file pitch-functions.cc.

578 { return 2; }

◆ ExtractFrame()

void ExtractFrame	(	const VectorBase< BaseFloat > &	downsampled_wave_part,
		int64	frame_index,
		VectorBase< BaseFloat > *	window
	)

private

This function extracts from the signal the samples numbered from "sample_index" (numbered in the full downsampled signal, not just this part), and of length equal to window->Dim().

It uses the data members downsampled_samples_discarded_ and downsampled_signal_remainder_, as well as the more recent part of the downsampled wave "downsampled_wave_part" which is provided.

Parameters

downsampled_wave_part	One chunk of the downsampled wave, starting from sample-index downsampled_samples_discarded_.
sample_index	The desired starting sample index (measured from the start of the whole signal, not just this part).
window	The part of the signal is output to here.

Definition at line 839 of file pitch-functions.cc.

References VectorBase< Real >::CopyFromVec(), VectorBase< Real >::Dim(), OnlinePitchFeatureImpl::downsampled_samples_processed_, OnlinePitchFeatureImpl::downsampled_signal_remainder_, rnnlm::i, OnlinePitchFeatureImpl::input_finished_, KALDI_ASSERT, OnlinePitchFeatureImpl::opts_, PitchExtractionOptions::preemph_coeff, VectorBase< Real >::Range(), VectorBase< Real >::SetZero(), and PitchExtractionOptions::snip_edges.

Referenced by OnlinePitchFeatureImpl::AcceptWaveform().

                                    {
   int32 full_frame_length = window->Dim();
   int32 offset = static_cast<int32>(sample_index -
                                     downsampled_samples_processed_);
 
   // Treat edge cases first
   if (sample_index < 0) {
     // Part of the frame is before the beginning of the signal. This
     // should only happen if opts_.snip_edges == false, when we are
     // processing the first few frames of signal. In this case
     // we pad with zeros.
     KALDI_ASSERT(opts_.snip_edges == false);
     int32 sub_frame_length = sample_index + full_frame_length;
     int32 sub_frame_index = full_frame_length - sub_frame_length;
     KALDI_ASSERT(sub_frame_length > 0 && sub_frame_index > 0);
     window->SetZero();
     SubVector<BaseFloat> sub_window(*window, sub_frame_index, sub_frame_length);
     ExtractFrame(downsampled_wave_part, 0, &sub_window);
     return;
   }
 
   if (offset + full_frame_length > downsampled_wave_part.Dim()) {
     // Requested frame is past end of the signal.  This should only happen if
     // input_finished_ == true, when we're flushing out the last couple of
     // frames of signal.  In this case we pad with zeros.
     KALDI_ASSERT(input_finished_);
     int32 sub_frame_length = downsampled_wave_part.Dim() - offset;
     KALDI_ASSERT(sub_frame_length > 0);
     window->SetZero();
     SubVector<BaseFloat> sub_window(*window, 0, sub_frame_length);
     ExtractFrame(downsampled_wave_part, sample_index, &sub_window);
     return;
   }
 
   // "offset" is the offset of the start of the frame, into this
   // signal.
   if (offset >= 0) {
     // frame is full inside the new part of the signal.
     window->CopyFromVec(downsampled_wave_part.Range(offset, full_frame_length));
   } else {
     // frame is partly in the remainder and partly in the new part.
     int32 remainder_offset = downsampled_signal_remainder_.Dim() + offset;
     KALDI_ASSERT(remainder_offset >= 0);  // or we didn't keep enough remainder.
     KALDI_ASSERT(offset + full_frame_length > 0);  // or we should have
                                                    // processed this frame last
                                                    // time.
 
     int32 old_length = -offset, new_length = offset + full_frame_length;
     window->Range(0, old_length).CopyFromVec(
         downsampled_signal_remainder_.Range(remainder_offset, old_length));
     window->Range(old_length, new_length).CopyFromVec(
         downsampled_wave_part.Range(0, new_length));
   }
   if (opts_.preemph_coeff != 0.0) {
     BaseFloat preemph_coeff = opts_.preemph_coeff;
     for (int32 i = window->Dim() - 1; i > 0; i--)
       (*window)(i) -= preemph_coeff * (*window)(i-1);
     (*window)(0) *= (1.0 - preemph_coeff);
   }
 }

◆ FrameShiftInSeconds()

BaseFloat FrameShiftInSeconds ( ) const

Definition at line 909 of file pitch-functions.cc.

References PitchExtractionOptions::frame_shift_ms, and OnlinePitchFeatureImpl::opts_.

Referenced by OnlinePitchFeature::FrameShiftInSeconds().

                                                             {
   return opts_.frame_shift_ms / 1000.0f;
 }

◆ GetFrame()

void GetFrame	(	int32	frame,
		VectorBase< BaseFloat > *	feat
	)

Definition at line 921 of file pitch-functions.cc.

References VectorBase< Real >::Dim(), KALDI_ASSERT, OnlinePitchFeatureImpl::lag_nccf_, OnlinePitchFeatureImpl::lags_, and OnlinePitchFeatureImpl::NumFramesReady().

Referenced by OnlinePitchFeature::GetFrame().

                                                                    {
   KALDI_ASSERT(frame < NumFramesReady() && feat->Dim() == 2);
   (*feat)(0) = lag_nccf_[frame].second;
   (*feat)(1) = 1.0 / lags_(lag_nccf_[frame].first);
 }

◆ InputFinished()

void InputFinished ( )

Definition at line 928 of file pitch-functions.cc.

References OnlinePitchFeatureImpl::AcceptWaveform(), OnlinePitchFeatureImpl::forward_cost_remainder_, OnlinePitchFeatureImpl::frame_info_, OnlinePitchFeatureImpl::frames_latency_, OnlinePitchFeatureImpl::input_finished_, KALDI_VLOG, PitchExtractionOptions::nccf_ballast_online, OnlinePitchFeatureImpl::opts_, PitchExtractionOptions::recompute_frame, OnlinePitchFeatureImpl::RecomputeBacktraces(), and PitchExtractionOptions::samp_freq.

Referenced by OnlinePitchFeature::InputFinished().

                                            {
   input_finished_ = true;
   // Process an empty waveform; this has an effect because
   // after setting input_finished_ to true, NumFramesAvailable()
   // will return a slightly larger number.
   AcceptWaveform(opts_.samp_freq, Vector<BaseFloat>());
   int32 num_frames = static_cast<size_t>(frame_info_.size() - 1);
   if (num_frames < opts_.recompute_frame && !opts_.nccf_ballast_online)
     RecomputeBacktraces();
   frames_latency_ = 0;
   KALDI_VLOG(3) << "Pitch-tracking Viterbi cost is "
                 << (forward_cost_remainder_ / num_frames)
                 << " per frame, over " << num_frames << " frames.";
 }

◆ IsLastFrame()

bool IsLastFrame ( int32 frame ) const

Definition at line 903 of file pitch-functions.cc.

References OnlinePitchFeatureImpl::input_finished_, KALDI_ASSERT, and OnlinePitchFeatureImpl::NumFramesReady().

Referenced by OnlinePitchFeature::IsLastFrame().

                                                           {
   int32 T = NumFramesReady();
   KALDI_ASSERT(frame < T);
   return (input_finished_ && frame + 1 == T);
 }

◆ NumFramesAvailable()

int32 NumFramesAvailable	(	int64	num_downsampled_samples,
		bool	snip_edges
	)		const

private

This function works out from the signal how many frames are currently available to process (this is called from inside AcceptWaveform()).

Note: the number of frames differs slightly from the number the old pitch code gave. Note: the number this returns depends on whether input_finished_ == true; if it is, it will "force out" a final frame or two.

Definition at line 768 of file pitch-functions.cc.

References OnlinePitchFeatureImpl::input_finished_, OnlinePitchFeatureImpl::nccf_last_lag_, PitchExtractionOptions::NccfWindowShift(), PitchExtractionOptions::NccfWindowSize(), and OnlinePitchFeatureImpl::opts_.

Referenced by OnlinePitchFeatureImpl::AcceptWaveform().

                                                           {
   int32 frame_shift = opts_.NccfWindowShift(),
       frame_length = opts_.NccfWindowSize();
   // Use the "full frame length" to compute the number
   // of frames only if the input is not finished.
   if (!input_finished_)
     frame_length += nccf_last_lag_;
   if (num_downsampled_samples < frame_length) {
     return 0;
   } else {
     if (!snip_edges) {
       if (input_finished_) {
         return static_cast<int32>(num_downsampled_samples * 1.0f /
                                   frame_shift + 0.5f);
       } else {
         return static_cast<int32>((num_downsampled_samples - frame_length / 2) *
                                    1.0f / frame_shift + 0.5f);
       }
     } else {
       return static_cast<int32>((num_downsampled_samples - frame_length) /
                                  frame_shift + 1);
     }
   }
 }

◆ NumFramesReady()

int32 NumFramesReady ( ) const

Definition at line 913 of file pitch-functions.cc.

References OnlinePitchFeatureImpl::frames_latency_, KALDI_ASSERT, and OnlinePitchFeatureImpl::lag_nccf_.

Referenced by OnlinePitchFeatureImpl::GetFrame(), and OnlinePitchFeatureImpl::IsLastFrame().

                                                    {
   int32 num_frames = lag_nccf_.size(),
       latency = frames_latency_;
   KALDI_ASSERT(latency <= num_frames);
   return num_frames - latency;
 }

◆ RecomputeBacktraces()

void RecomputeBacktraces ( )

private

This function is called after we reach frame "recompute_frame", or when InputFinished() is called, whichever comes sooner.

It recomputes the backtraces for frames zero through recompute_frame, if needed because the average energy of the signal has changed, affecting the nccf ballast term. It works out the average signal energy from downsampled_samples_processed_, signal_sum_ and signal_sumsq_ (which, if you see the calling code, might include more frames than just "recompute_frame", it might include up to the end of the current chunk).

Definition at line 945 of file pitch-functions.cc.

Referenced by OnlinePitchFeatureImpl::AcceptWaveform(), and OnlinePitchFeatureImpl::InputFinished().

                                                  {
   KALDI_ASSERT(!opts_.nccf_ballast_online);
   int32 num_frames = static_cast<int32>(frame_info_.size()) - 1;
 
   // The assertion reflects how we believe this function will be called.
   KALDI_ASSERT(num_frames <= opts_.recompute_frame);
   KALDI_ASSERT(nccf_info_.size() == static_cast<size_t>(num_frames));
   if (num_frames == 0)
     return;
   double num_samp = downsampled_samples_processed_, sum = signal_sum_,
       sumsq = signal_sumsq_, mean = sum / num_samp;
   BaseFloat mean_square = sumsq / num_samp - mean * mean;
 
   bool must_recompute = false;
   BaseFloat threshold = 0.01;
   for (int32 frame = 0; frame < num_frames; frame++)
     if (!ApproxEqual(nccf_info_[frame]->mean_square_energy,
                      mean_square, threshold))
       must_recompute = true;
 
   if (!must_recompute) {
     // Nothing to do.  We'll reach here, for instance, if everything was in one
     // chunk and opts_.nccf_ballast_online == false.  This is the case for
     // offline processing.
     for (size_t i = 0; i < nccf_info_.size(); i++)
       delete nccf_info_[i];
     nccf_info_.clear();
     return;
   }
 
   int32 num_states = forward_cost_.Dim(),
       basic_frame_length = opts_.NccfWindowSize();
 
   BaseFloat new_nccf_ballast = pow(mean_square * basic_frame_length, 2) *
       opts_.nccf_ballast;
 
   double forward_cost_remainder = 0.0;
   Vector<BaseFloat> forward_cost(num_states),  // start off at zero.
       next_forward_cost(forward_cost);
   std::vector<std::pair<int32, int32 > > index_info;
 
   for (int32 frame = 0; frame < num_frames; frame++) {
     NccfInfo &nccf_info = *nccf_info_[frame];
     BaseFloat old_mean_square = nccf_info_[frame]->mean_square_energy,
         avg_norm_prod = nccf_info_[frame]->avg_norm_prod,
         old_nccf_ballast = pow(old_mean_square * basic_frame_length, 2) *
             opts_.nccf_ballast,
         nccf_scale = pow((old_nccf_ballast + avg_norm_prod) /
                          (new_nccf_ballast + avg_norm_prod),
                          static_cast<BaseFloat>(0.5));
     // The "nccf_scale" is an estimate of the scaling factor by which the NCCF
     // would change on this frame, on average, by changing the ballast term from
     // "old_nccf_ballast" to "new_nccf_ballast".  It's not exact because the
     // "avg_norm_prod" is just an average of the product e1 * e2 of frame
     // energies of the (frame, shifted-frame), but these won't change that much
     // within a frame, and even if they do, the inaccuracy of the scaled NCCF
     // will still be very small if the ballast term didn't change much, or if
     // it's much larger or smaller than e1*e2.  By doing it as a simple scaling,
     // we save the overhead of the NCCF resampling, which is a considerable part
     // of the whole computation.
     nccf_info.nccf_pitch_resampled.Scale(nccf_scale);
 
     frame_info_[frame + 1]->ComputeBacktraces(
         opts_, nccf_info.nccf_pitch_resampled, lags_,
         forward_cost, &index_info, &next_forward_cost);
 
     forward_cost.Swap(&next_forward_cost);
     BaseFloat remainder = forward_cost.Min();
     forward_cost_remainder += remainder;
     forward_cost.Add(-remainder);
   }
   KALDI_VLOG(3) << "Forward-cost per frame changed from "
                 << (forward_cost_remainder_ / num_frames) << " to "
                 << (forward_cost_remainder / num_frames);
 
   forward_cost_remainder_ = forward_cost_remainder;
   forward_cost_.Swap(&forward_cost);
 
   int32 best_final_state;
   forward_cost_.Min(&best_final_state);
 
   if (lag_nccf_.size() != static_cast<size_t>(num_frames))
     lag_nccf_.resize(num_frames);
 
   frame_info_.back()->SetBestState(best_final_state, lag_nccf_);
   frames_latency_ =
       frame_info_.back()->ComputeLatency(opts_.max_frames_latency);
   for (size_t i = 0; i < nccf_info_.size(); i++)
     delete nccf_info_[i];
   nccf_info_.clear();
 }

◆ UpdateRemainder()

void UpdateRemainder ( const VectorBase< BaseFloat > & downsampled_wave_part )

private

This function updates downsampled_signal_remainder_, downsampled_samples_processed_, signal_sum_ and signal_sumsq_; it's called from AcceptWaveform().

Definition at line 794 of file pitch-functions.cc.

References VectorBase< Real >::Dim(), OnlinePitchFeatureImpl::downsampled_samples_processed_, OnlinePitchFeatureImpl::downsampled_signal_remainder_, OnlinePitchFeatureImpl::frame_info_, rnnlm::i, KALDI_ASSERT, OnlinePitchFeatureImpl::nccf_last_lag_, PitchExtractionOptions::NccfWindowShift(), PitchExtractionOptions::NccfWindowSize(), OnlinePitchFeatureImpl::opts_, OnlinePitchFeatureImpl::signal_sum_, OnlinePitchFeatureImpl::signal_sumsq_, VectorBase< Real >::Sum(), and kaldi::VecVec().

Referenced by OnlinePitchFeatureImpl::AcceptWaveform().

                                                         {
   // frame_info_ has an extra element at frame-1, so subtract
   // one from the length.
   int64 num_frames = static_cast<int64>(frame_info_.size()) - 1,
       next_frame = num_frames,
       frame_shift = opts_.NccfWindowShift(),
       next_frame_sample = frame_shift * next_frame;
 
   signal_sumsq_ += VecVec(downsampled_wave_part, downsampled_wave_part);
   signal_sum_ += downsampled_wave_part.Sum();
 
   // next_frame_sample is the first sample index we'll need for the
   // next frame.
   int64 next_downsampled_samples_processed =
       downsampled_samples_processed_ + downsampled_wave_part.Dim();
 
   if (next_frame_sample > next_downsampled_samples_processed) {
     // this could only happen in the weird situation that the full frame length
     // is less than the frame shift.
     int32 full_frame_length = opts_.NccfWindowSize() + nccf_last_lag_;
     KALDI_ASSERT(full_frame_length < frame_shift && "Code error");
     downsampled_signal_remainder_.Resize(0);
   } else {
     Vector<BaseFloat> new_remainder(next_downsampled_samples_processed -
                                     next_frame_sample);
     // note: next_frame_sample is the index into the entire signal, of
     // new_remainder(0).
     // i is the absolute index of the signal.
     for (int64 i = next_frame_sample;
          i < next_downsampled_samples_processed; i++) {
       if (i >= downsampled_samples_processed_) {  // in current signal.
         new_remainder(i - next_frame_sample) =
             downsampled_wave_part(i - downsampled_samples_processed_);
       } else {  // in old remainder; only reach here if waveform supplied is
         new_remainder(i - next_frame_sample) =                      //  tiny.
             downsampled_signal_remainder_(i - downsampled_samples_processed_ +
                                           downsampled_signal_remainder_.Dim());
       }
     }
     downsampled_signal_remainder_.Swap(&new_remainder);
   }
   downsampled_samples_processed_ = next_downsampled_samples_processed;
 }

Member Data Documentation

◆ downsampled_samples_processed_

int64 downsampled_samples_processed_

private

downsampled_samples_processed is the number of samples (after downsampling) that we got in previous calls to AcceptWaveform().

Definition at line 707 of file pitch-functions.cc.

Referenced by OnlinePitchFeatureImpl::AcceptWaveform(), OnlinePitchFeatureImpl::ExtractFrame(), OnlinePitchFeatureImpl::RecomputeBacktraces(), and OnlinePitchFeatureImpl::UpdateRemainder().

◆ downsampled_signal_remainder_

Vector<BaseFloat> downsampled_signal_remainder_

private

This is a small remainder of the previous downsampled signal; it's used by ExtractFrame for frames near the boundary of two waveforms supplied to AcceptWaveform().

Definition at line 711 of file pitch-functions.cc.

Referenced by OnlinePitchFeatureImpl::ExtractFrame(), and OnlinePitchFeatureImpl::UpdateRemainder().