PitchInterpolator Class Reference
Collaboration diagram for PitchInterpolator:

Public Member Functions

 PitchInterpolator (const PitchInterpolatorOptions &opts, Matrix< BaseFloat > *mat, PitchInterpolatorStats *stats)
 

Private Member Functions

void InitValues (const Matrix< BaseFloat > &mat)
 
void MultiplyObsProb (int32 t)
 
void ComputeTransitionProb (int32 t)
 
void Forward ()
 
void Backtrace (Matrix< BaseFloat > *mat, PitchInterpolatorStats *stats)
 

Private Attributes

const PitchInterpolatorOptionsopts_
 
std::vector< BaseFloatmin_pitch_
 
int32 num_frames_
 
int32 num_pitches_
 
std::vector< int32pitch_
 
std::vector< BaseFloatp_voicing_
 
std::vector< std::vector< int32 > > back_pointers_
 
Vector< BaseFloatlog_alpha_
 
Vector< BaseFloatprev_log_alpha_
 

Detailed Description

Definition at line 77 of file interpolate-pitch.cc.

Constructor & Destructor Documentation

◆ PitchInterpolator()

PitchInterpolator ( const PitchInterpolatorOptions opts,
Matrix< BaseFloat > *  mat,
PitchInterpolatorStats stats 
)
inline

Definition at line 79 of file interpolate-pitch.cc.

References PitchInterpolatorOptions::Check().

81  :
82  opts_(opts) {
83  opts.Check();
84  InitValues(*mat);
85  Forward();
86  Backtrace(mat, stats);
87  }
void Backtrace(Matrix< BaseFloat > *mat, PitchInterpolatorStats *stats)
void InitValues(const Matrix< BaseFloat > &mat)
const PitchInterpolatorOptions & opts_

Member Function Documentation

◆ Backtrace()

void Backtrace ( Matrix< BaseFloat > *  mat,
PitchInterpolatorStats stats 
)
inlineprivate

Definition at line 212 of file interpolate-pitch.cc.

References KALDI_ASSERT, PitchInterpolatorStats::num_frames_changed, PitchInterpolatorStats::num_frames_tot, PitchInterpolatorStats::num_frames_zero, and PitchInterpolatorOptions::pitch_interval.

212  {
213  const BaseFloat pitch_interval = opts_.pitch_interval;
214  BaseFloat *p_begin = log_alpha_.Data(), *p_end = p_begin + num_pitches_,
215  *p_best = std::max_element(p_begin, p_end);
216 
217  std::vector<int32> best_pitch(num_frames_);
218  int32 best_p = p_best - p_begin; // best discrete pitch p at time T-1.
219  for (int32 t = num_frames_ - 1; t >= 0; t--) {
220  { // Update stats:
221  stats->num_frames_tot++;
222  if (pitch_[t] == 0) stats->num_frames_zero++;
223  else if (best_p != pitch_[t]) stats->num_frames_changed++;
224  }
225  BaseFloat pitch = min_pitch_[t] + pitch_interval * best_p;
226  (*mat)(t, 1) = pitch;
227  KALDI_ASSERT(best_p >= 0 && best_p < num_pitches_);
228  if (t > 0)
229  best_p = back_pointers_[t][best_p];
230  }
231  }
std::vector< BaseFloat > min_pitch_
kaldi::int32 int32
float BaseFloat
Definition: kaldi-types.h:29
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
const PitchInterpolatorOptions & opts_
std::vector< int32 > pitch_
Vector< BaseFloat > log_alpha_
std::vector< std::vector< int32 > > back_pointers_

◆ ComputeTransitionProb()

void ComputeTransitionProb ( int32  t)
inlineprivate

Definition at line 160 of file interpolate-pitch.cc.

References KALDI_ASSERT, and PitchInterpolatorOptions::pitch_interval.

160  {
161  KALDI_ASSERT(t > 0);
162  BaseFloat pitch_interval = opts_.pitch_interval;
163  back_pointers_[t].resize(num_pitches_);
164 
165  // Transition probability between pitch p and p' on times t-1 and t
166  // is (p - p')^2, with the pitch measured in Hz. We're doing Viterbi,
167  // so always pick the max over the previous frame's t.
168  KALDI_ASSERT(t > 0 && t < num_frames_);
169  int32 K = floor(opts_.max_pitch_change_per_frame / pitch_interval + 0.5);
170  // K is max #bins we can move; a kind of pruning, for speed.
171  for (int32 p = 0; p < num_pitches_; p++) {
172  int32 min_prev_p = p - K, max_prev_p = p + K;
173  if (min_prev_p < 0) min_prev_p = 0;
174  if (max_prev_p >= num_pitches_) max_prev_p = num_pitches_ - 1;
175  BaseFloat best_logprob = -1.0e+10;
176  int32 best_prev_p = -1;
177  for (int32 prev_p = min_prev_p; prev_p <= max_prev_p; prev_p++) {
178  BaseFloat delta_pitch = (min_pitch_[t-1] + prev_p * pitch_interval) -
179  (min_pitch_[t] + p * pitch_interval);
180  BaseFloat this_logprob = prev_log_alpha_(prev_p)
181  - 0.5 * delta_pitch * delta_pitch;
182  if (this_logprob > best_logprob) {
183  best_logprob = this_logprob;
184  best_prev_p = prev_p;
185  }
186  }
187  back_pointers_[t][p] = best_prev_p;
188  log_alpha_(p) = best_logprob;
189  }
190  }
std::vector< BaseFloat > min_pitch_
kaldi::int32 int32
float BaseFloat
Definition: kaldi-types.h:29
Vector< BaseFloat > prev_log_alpha_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
const PitchInterpolatorOptions & opts_
Vector< BaseFloat > log_alpha_
std::vector< std::vector< int32 > > back_pointers_

◆ Forward()

void Forward ( )
inlineprivate

Definition at line 192 of file interpolate-pitch.cc.

192  {
193  // Viterbi in a discrete model of the pitch, in which the observation
194  // probability of a pitch is p(voicing) at the observed pitch, and
195  // interpolator_factor_ * 1.0 - p(voicing) at all other pitches. the
196  // transition log-probability is -0.5 times the squared difference in pitch.
197  // [We measure this in Hz, not in integer values, to make it more invariant
198  // to the discretization interval].
199 
200  back_pointers_.resize(num_frames_);
201 
202  log_alpha_.Resize(num_pitches_);
204  log_alpha_.Set(0.0);
205  MultiplyObsProb(0);
206  for (int32 t = 1; t < num_frames_; t++) {
209  MultiplyObsProb(t);
210  }
211  }
kaldi::int32 int32
Vector< BaseFloat > prev_log_alpha_
Vector< BaseFloat > log_alpha_
std::vector< std::vector< int32 > > back_pointers_

◆ InitValues()

void InitValues ( const Matrix< BaseFloat > &  mat)
inlineprivate

Definition at line 89 of file interpolate-pitch.cc.

References KALDI_ASSERT, MatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), PitchInterpolatorOptions::pitch_interval, and kaldi::RandUniform().

89  {
90  BaseFloat pitch_interval = opts_.pitch_interval;
91  num_frames_ = mat.NumRows();
92  KALDI_ASSERT(mat.NumCols() == 2);
93  BaseFloat min_pitch = 1.0e+10, max_pitch = 0.0;
94  pitch_.resize(num_frames_);
95  p_voicing_.resize(num_frames_);
96  for (int32 f = 0; f < num_frames_; f++) {
97  BaseFloat p_voicing = mat(f, 0), pitch = mat(f, 1);
98  p_voicing *= opts_.max_voicing_prob;
99  if (pitch == 0.0) {
100  p_voicing = 0.0; // complete uncertainty about real pitch.
101  } else {
102  if (pitch < min_pitch) min_pitch = pitch;
103  if (pitch > max_pitch) max_pitch = pitch;
104  }
105  p_voicing_[f] = p_voicing;
106  }
107  if (max_pitch == 0.0) { // No voiced frames at all.
108  min_pitch = 100.0;
109  max_pitch = 100.0;
110  }
111  if (max_pitch <= min_pitch + (2.0 * pitch_interval)) {
112  max_pitch = min_pitch + 2.0 * pitch_interval;
113  } // avoid crashes.
114 
115  // Note: the + 2 here is for edge effects.
116  num_pitches_ = floor((max_pitch - min_pitch) / pitch_interval + 0.5) + 2;
118  min_pitch_.resize(num_frames_);
119  for (int32 f = 0; f < num_frames_; f++) {
120  min_pitch_[f] = min_pitch - pitch_interval * RandUniform(); // bottom of
121  // discretization range for each frame is randomly different.
122 
123  BaseFloat pitch = mat(f, 1);
124  if (pitch == 0.0) {
125  pitch_[f] = 0; // This will actually be a don't-care value; we just put in
126  // some value that won't crash the algorithm.
127  } else {
128  int32 int_pitch = floor((pitch - min_pitch_[f]) / pitch_interval + 0.5);
129  KALDI_ASSERT(int_pitch >= 0 && int_pitch < num_pitches_);
130  pitch_[f] = int_pitch;
131  }
132  }
133  }
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
Definition: kaldi-math.h:151
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
std::vector< BaseFloat > p_voicing_
std::vector< BaseFloat > min_pitch_
kaldi::int32 int32
float BaseFloat
Definition: kaldi-types.h:29
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
const PitchInterpolatorOptions & opts_
std::vector< int32 > pitch_

◆ MultiplyObsProb()

void MultiplyObsProb ( int32  t)
inlineprivate

Definition at line 135 of file interpolate-pitch.cc.

References kaldi::Log(), and VectorBase< Real >::SetRandn().

135  {
136  // For the forward computation:
137  // Multiplies the observation probabilities into alpha at time t.
138  // constant_prob is the constant part that does not depend on the pitch value:
139  BaseFloat constant_prob = (1.0 - p_voicing_[t]) * opts_.interpolator_factor,
140  specified_prob = p_voicing_[t] + constant_prob;
141  // specified_prob adds in the extra probability mass at the observed pitch value.
142  BaseFloat log_constant_prob = Log(constant_prob),
143  log_ratio = Log(specified_prob / constant_prob);
144  log_alpha_.Add(log_constant_prob); // add log_constant_prob to all pitches at this time.
145 
146  log_alpha_(pitch_[t]) += log_ratio; // corrects this to be like adding
147  // log(specified_prob) to the observed pitch at this time. Note: if pitch_[t] == 0,
148  // this won't have any effect because log_ratio will be zero too.
149 
150  Vector<BaseFloat> temp_rand(num_pitches_);
151  temp_rand.SetRandn(); // Set to Gaussian noise. Type of noise doesn't really matter.
152  log_alpha_.AddVec(0.01, temp_rand); // We add a small amount of noise to the
153  // observation probabilities; this has the effect of breaking symmetries in
154  // a more random way to overcome certain weirdnesses that could otherwise
155  // happen due to the discretization.
156  }
std::vector< BaseFloat > p_voicing_
float BaseFloat
Definition: kaldi-types.h:29
double Log(double x)
Definition: kaldi-math.h:100
A class representing a vector.
Definition: kaldi-vector.h:406
const PitchInterpolatorOptions & opts_
std::vector< int32 > pitch_
Vector< BaseFloat > log_alpha_

Member Data Documentation

◆ back_pointers_

std::vector<std::vector<int32> > back_pointers_
private

Definition at line 242 of file interpolate-pitch.cc.

◆ log_alpha_

Vector<BaseFloat> log_alpha_
private

Definition at line 245 of file interpolate-pitch.cc.

◆ min_pitch_

std::vector<BaseFloat> min_pitch_
private

Definition at line 233 of file interpolate-pitch.cc.

◆ num_frames_

int32 num_frames_
private

Definition at line 236 of file interpolate-pitch.cc.

◆ num_pitches_

int32 num_pitches_
private

Definition at line 237 of file interpolate-pitch.cc.

◆ opts_

const PitchInterpolatorOptions& opts_
private

Definition at line 232 of file interpolate-pitch.cc.

◆ p_voicing_

std::vector<BaseFloat> p_voicing_
private

Definition at line 240 of file interpolate-pitch.cc.

◆ pitch_

std::vector<int32> pitch_
private

Definition at line 238 of file interpolate-pitch.cc.

◆ prev_log_alpha_

Vector<BaseFloat> prev_log_alpha_
private

Definition at line 246 of file interpolate-pitch.cc.


The documentation for this class was generated from the following file: