Collaboration diagram for PitchInterpolator:

[legend]

Public Member Functions
	PitchInterpolator (const PitchInterpolatorOptions &opts, Matrix< BaseFloat > mat, PitchInterpolatorStats stats)

Private Member Functions
void	InitValues (const Matrix< BaseFloat > &mat)

void	MultiplyObsProb (int32 t)

void	ComputeTransitionProb (int32 t)

void	Forward ()

void	Backtrace (Matrix< BaseFloat > mat, PitchInterpolatorStats stats)

Private Attributes
const PitchInterpolatorOptions &	opts_

std::vector< BaseFloat >	min_pitch_

int32	num_frames_

int32	num_pitches_

std::vector< int32 >	pitch_

std::vector< BaseFloat >	p_voicing_

std::vector< std::vector< int32 > >	back_pointers_

Vector< BaseFloat >	log_alpha_

Vector< BaseFloat >	prev_log_alpha_

Detailed Description

Definition at line 77 of file interpolate-pitch.cc.

Constructor & Destructor Documentation

◆ PitchInterpolator()

PitchInterpolator	(	const PitchInterpolatorOptions &	opts,
		Matrix< BaseFloat > *	mat,
		PitchInterpolatorStats *	stats
	)

inline

Definition at line 79 of file interpolate-pitch.cc.

References PitchInterpolatorOptions::Check().

                                                   :
       opts_(opts) {
     opts.Check();
     InitValues(*mat);
     Forward();
     Backtrace(mat, stats);
   }

Member Function Documentation

◆ Backtrace()

void Backtrace	(	Matrix< BaseFloat > *	mat,
		PitchInterpolatorStats *	stats
	)

inlineprivate

Definition at line 212 of file interpolate-pitch.cc.

References KALDI_ASSERT, PitchInterpolatorStats::num_frames_changed, PitchInterpolatorStats::num_frames_tot, PitchInterpolatorStats::num_frames_zero, and PitchInterpolatorOptions::pitch_interval.

                                                                         {
     const BaseFloat pitch_interval = opts_.pitch_interval;
     BaseFloat *p_begin = log_alpha_.Data(), *p_end = p_begin + num_pitches_,
         *p_best = std::max_element(p_begin, p_end);
 
     std::vector<int32> best_pitch(num_frames_);
     int32 best_p = p_best - p_begin; // best discrete pitch p at time T-1.
     for (int32 t = num_frames_ - 1; t >= 0; t--) {
       { // Update stats:
         stats->num_frames_tot++;
         if (pitch_[t] == 0) stats->num_frames_zero++;
         else if (best_p != pitch_[t]) stats->num_frames_changed++;
       }
       BaseFloat pitch = min_pitch_[t] + pitch_interval * best_p;
       (*mat)(t, 1) = pitch;
       KALDI_ASSERT(best_p >= 0 && best_p < num_pitches_);
       if (t > 0)
         best_p = back_pointers_[t][best_p];
     }
   }

◆ ComputeTransitionProb()

void ComputeTransitionProb ( int32 t )

inlineprivate

Definition at line 160 of file interpolate-pitch.cc.

References KALDI_ASSERT, and PitchInterpolatorOptions::pitch_interval.

                                       {
     KALDI_ASSERT(t > 0);
     BaseFloat pitch_interval = opts_.pitch_interval;
     back_pointers_[t].resize(num_pitches_);
     
     // Transition probability between pitch p and p' on times t-1 and t
     // is (p - p')^2, with the pitch measured in Hz.  We're doing Viterbi,
     // so always pick the max over the previous frame's t.
     KALDI_ASSERT(t > 0 && t < num_frames_);
     int32 K = floor(opts_.max_pitch_change_per_frame / pitch_interval + 0.5);
     // K is max #bins we can move; a kind of pruning, for speed.
     for (int32 p = 0; p < num_pitches_; p++) {
       int32 min_prev_p = p - K, max_prev_p = p + K;
       if (min_prev_p < 0) min_prev_p = 0;
       if (max_prev_p >= num_pitches_) max_prev_p = num_pitches_ - 1;
       BaseFloat best_logprob = -1.0e+10;
       int32 best_prev_p = -1;
       for (int32 prev_p = min_prev_p; prev_p <= max_prev_p; prev_p++) {
         BaseFloat delta_pitch = (min_pitch_[t-1] + prev_p * pitch_interval) -
             (min_pitch_[t] + p * pitch_interval);
         BaseFloat this_logprob = prev_log_alpha_(prev_p) 
             - 0.5 * delta_pitch * delta_pitch;
         if (this_logprob > best_logprob) {
           best_logprob = this_logprob;
           best_prev_p = prev_p;
         }
       }
       back_pointers_[t][p] = best_prev_p;
       log_alpha_(p) = best_logprob;
     }    
   }

◆ Forward()

void Forward ( )

inlineprivate

Definition at line 192 of file interpolate-pitch.cc.

                  {
     // Viterbi in a discrete model of the pitch, in which the observation
     // probability of a pitch is p(voicing) at the observed pitch, and
     // interpolator_factor_ * 1.0 - p(voicing) at all other pitches.  the
     // transition log-probability is -0.5 times the squared difference in pitch.
     // [We measure this in Hz, not in integer values, to make it more invariant
     // to the discretization interval].
 
     back_pointers_.resize(num_frames_);
 
     log_alpha_.Resize(num_pitches_);
     prev_log_alpha_.Resize(num_pitches_);
     log_alpha_.Set(0.0);
     MultiplyObsProb(0);
     for (int32 t = 1; t < num_frames_; t++) {
       log_alpha_.Swap(&prev_log_alpha_);
       ComputeTransitionProb(t);
       MultiplyObsProb(t);
     }
   }

◆ InitValues()

void InitValues ( const Matrix< BaseFloat > & mat )

inlineprivate

Definition at line 89 of file interpolate-pitch.cc.

References KALDI_ASSERT, MatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), PitchInterpolatorOptions::pitch_interval, and kaldi::RandUniform().

                                                 {
     BaseFloat pitch_interval = opts_.pitch_interval;
     num_frames_ = mat.NumRows();
     KALDI_ASSERT(mat.NumCols() == 2);
     BaseFloat min_pitch = 1.0e+10, max_pitch = 0.0;
     pitch_.resize(num_frames_);
     p_voicing_.resize(num_frames_);
     for (int32 f = 0; f < num_frames_; f++) {
       BaseFloat p_voicing = mat(f, 0), pitch = mat(f, 1);
       p_voicing *= opts_.max_voicing_prob;
       if (pitch == 0.0) {
         p_voicing = 0.0; // complete uncertainty about real pitch.
       } else {
         if (pitch < min_pitch) min_pitch = pitch;
         if (pitch > max_pitch) max_pitch = pitch;
       }
       p_voicing_[f] = p_voicing;
     }
     if (max_pitch == 0.0) { // No voiced frames at all.
       min_pitch = 100.0;
       max_pitch = 100.0;
     }
     if (max_pitch <= min_pitch + (2.0 * pitch_interval)) {
       max_pitch = min_pitch + 2.0 * pitch_interval;
     } // avoid crashes.
 
     // Note: the + 2 here is for edge effects.
     num_pitches_ = floor((max_pitch - min_pitch) / pitch_interval + 0.5) + 2;
     KALDI_ASSERT(num_pitches_ >= 3);
     min_pitch_.resize(num_frames_);
     for (int32 f = 0; f < num_frames_; f++) {
       min_pitch_[f] = min_pitch - pitch_interval * RandUniform(); // bottom of
       // discretization range for each frame is randomly different.
       
       BaseFloat pitch = mat(f, 1);
       if (pitch == 0.0) {
         pitch_[f] = 0; // This will actually be a don't-care value; we just put in
         // some value that won't crash the algorithm.
       } else {
         int32 int_pitch = floor((pitch - min_pitch_[f]) / pitch_interval + 0.5);
         KALDI_ASSERT(int_pitch >= 0 && int_pitch < num_pitches_);
         pitch_[f] = int_pitch;
       }
     }
   }

◆ MultiplyObsProb()

void MultiplyObsProb ( int32 t )

inlineprivate

Definition at line 135 of file interpolate-pitch.cc.

References kaldi::Log(), and VectorBase< Real >::SetRandn().

                                 {
     // For the forward computation:
     // Multiplies the observation probabilities into alpha at time t.
     // constant_prob is the constant part that does not depend on the pitch value:
     BaseFloat constant_prob = (1.0 - p_voicing_[t]) * opts_.interpolator_factor,
         specified_prob = p_voicing_[t] + constant_prob;
     // specified_prob adds in the extra probability mass at the observed pitch value.
     BaseFloat log_constant_prob = Log(constant_prob),
         log_ratio = Log(specified_prob / constant_prob);
     log_alpha_.Add(log_constant_prob); // add log_constant_prob to all pitches at this time.
     
     log_alpha_(pitch_[t]) += log_ratio; // corrects this to be like adding
     // log(specified_prob) to the observed pitch at this time.  Note: if pitch_[t] == 0,
     // this won't have any effect because log_ratio will be zero too.
     
     Vector<BaseFloat> temp_rand(num_pitches_);
     temp_rand.SetRandn(); // Set to Gaussian noise.  Type of noise doesn't really matter.
     log_alpha_.AddVec(0.01, temp_rand); // We add a small amount of noise to the
     // observation probabilities; this has the effect of breaking symmetries in
     // a more random way to overcome certain weirdnesses that could otherwise
     // happen due to the discretization.
   }

Member Data Documentation

◆ back_pointers_

std::vector<std::vector<int32> > back_pointers_

private

Definition at line 242 of file interpolate-pitch.cc.

◆ log_alpha_

Vector<BaseFloat> log_alpha_

private

Definition at line 245 of file interpolate-pitch.cc.

◆ min_pitch_

std::vector<BaseFloat> min_pitch_

private

Definition at line 233 of file interpolate-pitch.cc.

◆ num_frames_

int32 num_frames_

private

Definition at line 236 of file interpolate-pitch.cc.

◆ num_pitches_

int32 num_pitches_

private

Definition at line 237 of file interpolate-pitch.cc.

◆ opts_

const PitchInterpolatorOptions& opts_

private

Definition at line 232 of file interpolate-pitch.cc.

◆ p_voicing_

std::vector<BaseFloat> p_voicing_

private

Definition at line 240 of file interpolate-pitch.cc.

◆ pitch_

std::vector<int32> pitch_

private

Definition at line 238 of file interpolate-pitch.cc.

◆ prev_log_alpha_

Vector<BaseFloat> prev_log_alpha_

private

Definition at line 246 of file interpolate-pitch.cc.

The documentation for this class was generated from the following file:

featbin/interpolate-pitch.cc

Public Member Functions

Private Member Functions

Private Attributes

Detailed Description

Constructor & Destructor Documentation

◆ PitchInterpolator()

Member Function Documentation

◆ Backtrace()

◆ ComputeTransitionProb()

◆ Forward()

◆ InitValues()

◆ MultiplyObsProb()

Member Data Documentation

◆ back_pointers_

◆ log_alpha_

◆ min_pitch_

◆ num_frames_

◆ num_pitches_

◆ opts_

◆ p_voicing_

◆ pitch_

◆ prev_log_alpha_