47 }
else if (n < -1.0) {
50 BaseFloat f = pow((1.0001 - n), 0.15) - 1.0;
80 if (ndash > 1.0) ndash = 1.0;
82 BaseFloat r = -5.2 + 5.4 *
Exp(7.5 * (ndash - 1.0)) + 4.8 * ndash -
83 2.0 *
Exp(-10.0 * ndash) + 4.2 *
Exp(20.0 * (ndash - 1.0));
104 int32 nccf_window_size,
111 zero_mean_wave.
Add(-wave_part.
Sum() / nccf_window_size);
114 e1 =
VecVec(sub_vec1, sub_vec1);
115 for (
int32 lag = first_lag; lag <= last_lag; lag++) {
117 e2 =
VecVec(sub_vec2, sub_vec2);
118 sum =
VecVec(sub_vec1, sub_vec2);
119 (*inner_prod)(lag - first_lag) = sum;
120 (*norm_prod)(lag - first_lag) = e1 * e2;
136 inner_prod.
Dim() == nccf_vec->
Dim());
137 for (
int32 lag = 0; lag < inner_prod.
Dim(); lag++) {
139 denominator = pow(norm_prod(lag) + nccf_ballast, 0.5),
141 if (denominator != 0.0) {
142 nccf = numerator / denominator;
148 (*nccf_vec)(lag) = nccf;
162 std::vector<BaseFloat> tmp_lags;
164 tmp_lags.push_back(lag);
165 lags->
Resize(tmp_lags.size());
166 std::copy(tmp_lags.begin(), tmp_lags.end(), lags->
Data());
185 nccf_pitch.
Dim() == lags.
Dim());
186 local_cost->
Set(1.0);
188 local_cost->
AddVec(-1.0, nccf_pitch);
215 std::vector<std::pair<int32, BaseFloat> > &lag_nccf);
257 std::vector<std::pair<int32, int32> > *index_info,
302 for (
int32 i = 0;
i < num_states;
i++)
311 std::vector<std::pair<int32, int32> > *index_info,
313 int32 num_states = nccf_pitch.
Dim();
324 const BaseFloat *prev_forward_cost = prev_forward_cost_vec.
Data();
325 BaseFloat *this_forward_cost = this_forward_cost_vec->
Data();
327 if (index_info->empty())
328 index_info->resize(num_states);
331 std::vector<std::pair<int32, int32> > &bounds = *index_info;
338 if (pitch_use_naive_search) {
340 for (
int32 i = 0;
i < num_states;
i++) {
341 BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
343 for (
int32 j = 0;
j < num_states;
j++) {
345 + prev_forward_cost[
j];
346 if (this_cost < best_cost) {
347 best_cost = this_cost;
351 this_forward_cost[
i] = best_cost;
355 int32 last_backpointer = 0;
356 for (
int32 i = 0;
i < num_states;
i++) {
357 int32 start_j = last_backpointer;
358 BaseFloat best_cost = (start_j -
i) * (start_j -
i) * inter_frame_factor
359 + prev_forward_cost[start_j];
360 int32 best_j = start_j;
362 for (
int32 j = start_j + 1;
j < num_states;
j++) {
364 + prev_forward_cost[
j];
365 if (this_cost < best_cost) {
366 best_cost = this_cost;
373 this_forward_cost[
i] = best_cost;
374 bounds[
i].first = best_j;
376 bounds[
i].second = num_states - 1;
378 last_backpointer = best_j;
387 for (
int32 iter = 0; iter < num_states; iter++) {
388 bool changed =
false;
390 last_backpointer = num_states - 1;
391 for (
int32 i = num_states - 1;
i >= 0;
i--) {
392 int32 lower_bound = bounds[
i].first,
393 upper_bound = std::min(last_backpointer, bounds[
i].second);
394 if (upper_bound == lower_bound) {
395 last_backpointer = lower_bound;
401 if (best_j == upper_bound) {
404 last_backpointer = best_j;
410 for (
int32 j = upper_bound;
j > lower_bound + 1;
j--) {
412 + prev_forward_cost[
j];
413 if (this_cost < best_cost) {
414 best_cost = this_cost;
424 bounds[
i].second = best_j;
425 if (best_j != initial_best_j) {
426 this_forward_cost[
i] = best_cost;
430 last_backpointer = best_j;
433 last_backpointer = 0;
434 for (
int32 i = 0;
i < num_states;
i++) {
435 int32 lower_bound = std::max(last_backpointer, bounds[
i].first),
436 upper_bound = bounds[
i].second;
437 if (upper_bound == lower_bound) {
438 last_backpointer = lower_bound;
444 if (best_j == lower_bound) {
448 last_backpointer = best_j;
453 for (
int32 j = lower_bound;
j < upper_bound - 1;
j++) {
455 + prev_forward_cost[
j];
456 if (this_cost < best_cost) {
457 best_cost = this_cost;
467 bounds[
i].first = best_j;
468 if (best_j != initial_best_j) {
469 this_forward_cost[
i] = best_cost;
473 last_backpointer = best_j;
483 this_forward_cost_vec->
AddVec(1.0, local_cost);
488 std::vector<std::pair<int32, BaseFloat> > &lag_nccf) {
494 std::vector<std::pair<int32, BaseFloat> >::reverse_iterator iter = lag_nccf.rbegin();
497 while (this_info != NULL) {
501 if (prev_info != NULL)
502 iter->first = best_state;
503 size_t state_info_index = best_state - this_info->
state_offset_;
506 best_state = this_info->
state_info_[state_info_index].backpointer;
507 if (prev_info != NULL)
508 iter->second = this_info->
state_info_[state_info_index].pov_nccf;
509 this_info = prev_info;
510 if (this_info != NULL) ++iter;
515 if (max_latency <= 0)
return 0;
524 int32 min_living_state = 0, max_living_state = num_states - 1;
528 for (; this_info != NULL && latency < max_latency;) {
531 max_living_state - offset < this_info->
state_info_.size());
533 this_info->
state_info_[min_living_state - offset].backpointer;
535 this_info->
state_info_[max_living_state - offset].backpointer;
536 if (min_living_state == max_living_state) {
540 if (this_info != NULL)
566 avg_norm_prod(avg_norm_prod),
567 mean_square_energy(mean_square_energy) { }
582 int32 NumFramesReady()
const;
584 bool IsLastFrame(
int32 frame)
const;
588 void AcceptWaveform(
BaseFloat sampling_rate,
591 void InputFinished();
608 int32 NumFramesAvailable(int64 num_downsampled_samples,
bool snip_edges)
const;
635 void RecomputeBacktraces();
717 opts_(opts), forward_cost_remainder_(0.0), input_finished_(false),
718 signal_sumsq_(0.0), signal_sum_(0.0), downsampled_samples_processed_(0) {
723 double outer_min_lag = 1.0 / opts.
max_f0 -
725 double outer_max_lag = 1.0 / opts.
min_f0 +
758 upsample_cutoff, lags_offset,
769 int64 num_downsampled_samples,
bool snip_edges)
const {
776 if (num_downsampled_samples < frame_length) {
781 return static_cast<int32>(num_downsampled_samples * 1.0f /
784 return static_cast<int32>((num_downsampled_samples - frame_length / 2) *
785 1.0f / frame_shift + 0.5f);
788 return static_cast<int32>((num_downsampled_samples - frame_length) /
798 int64 num_frames =
static_cast<int64
>(
frame_info_.size()) - 1,
799 next_frame = num_frames,
801 next_frame_sample = frame_shift * next_frame;
808 int64 next_downsampled_samples_processed =
811 if (next_frame_sample > next_downsampled_samples_processed) {
815 KALDI_ASSERT(full_frame_length < frame_shift &&
"Code error");
823 for (int64
i = next_frame_sample;
824 i < next_downsampled_samples_processed;
i++) {
826 new_remainder(
i - next_frame_sample) =
829 new_remainder(
i - next_frame_sample) =
843 int32 full_frame_length = window->
Dim();
844 int32 offset =
static_cast<int32>(sample_index -
848 if (sample_index < 0) {
854 int32 sub_frame_length = sample_index + full_frame_length;
855 int32 sub_frame_index = full_frame_length - sub_frame_length;
856 KALDI_ASSERT(sub_frame_length > 0 && sub_frame_index > 0);
863 if (offset + full_frame_length > downsampled_wave_part.
Dim()) {
868 int32 sub_frame_length = downsampled_wave_part.
Dim() - offset;
872 ExtractFrame(downsampled_wave_part, sample_index, &sub_window);
880 window->
CopyFromVec(downsampled_wave_part.
Range(offset, full_frame_length));
889 int32 old_length = -offset, new_length = offset + full_frame_length;
890 window->
Range(0, old_length).CopyFromVec(
892 window->
Range(old_length, new_length).CopyFromVec(
893 downsampled_wave_part.
Range(0, new_length));
898 (*window)(
i) -= preemph_coeff * (*window)(
i-1);
899 (*window)(0) *= (1.0 - preemph_coeff);
917 return num_frames - latency;
938 KALDI_VLOG(3) <<
"Pitch-tracking Viterbi cost is " 940 <<
" per frame, over " << num_frames <<
" frames.";
956 BaseFloat mean_square = sumsq / num_samp - mean * mean;
958 bool must_recompute =
false;
960 for (
int32 frame = 0; frame < num_frames; frame++)
962 mean_square, threshold))
963 must_recompute =
true;
965 if (!must_recompute) {
978 BaseFloat new_nccf_ballast = pow(mean_square * basic_frame_length, 2) *
981 double forward_cost_remainder = 0.0;
983 next_forward_cost(forward_cost);
984 std::vector<std::pair<int32, int32 > > index_info;
986 for (
int32 frame = 0; frame < num_frames; frame++) {
989 avg_norm_prod =
nccf_info_[frame]->avg_norm_prod,
990 old_nccf_ballast = pow(old_mean_square * basic_frame_length, 2) *
992 nccf_scale = pow((old_nccf_ballast + avg_norm_prod) /
993 (new_nccf_ballast + avg_norm_prod),
994 static_cast<BaseFloat>(0.5));
1009 forward_cost, &index_info, &next_forward_cost);
1011 forward_cost.Swap(&next_forward_cost);
1012 BaseFloat remainder = forward_cost.Min();
1013 forward_cost_remainder += remainder;
1014 forward_cost.Add(-remainder);
1016 KALDI_VLOG(3) <<
"Forward-cost per frame changed from " 1018 << (forward_cost_remainder / num_frames);
1023 int32 best_final_state;
1026 if (
lag_nccf_.size() !=
static_cast<size_t>(num_frames))
1060 prev_frame_end_sample = 0;
1062 cur_sumsq +=
VecVec(downsampled_wave, downsampled_wave);
1063 cur_sum += downsampled_wave.
Sum();
1064 cur_num_samp += downsampled_wave.
Dim();
1073 num_new_frames = end_frame - start_frame;
1075 if (num_new_frames == 0) {
1084 num_resampled_lags =
lags_.Dim(),
1090 inner_prod(num_measured_lags),
1091 norm_prod(num_measured_lags);
1093 nccf_pov(num_new_frames, num_measured_lags);
1102 for (
int32 frame = start_frame; frame < end_frame; frame++) {
1107 start_sample =
static_cast<int64
>(frame) * frame_shift;
1113 static_cast<int64
>((frame + 0.5) * frame_shift) - full_frame_length / 2;
1120 int64 end_sample = start_sample + full_frame_length -
1125 if (end_sample > downsampled_wave.
Dim()) {
1127 end_sample = downsampled_wave.
Dim();
1130 end_sample - prev_frame_end_sample);
1131 cur_num_samp += new_part.
Dim();
1132 cur_sumsq +=
VecVec(new_part, new_part);
1133 cur_sum += new_part.
Sum();
1134 prev_frame_end_sample = end_sample;
1136 double mean_square = cur_sumsq / cur_num_samp -
1137 pow(cur_sum / cur_num_samp, 2.0);
1140 basic_frame_length, &inner_prod, &norm_prod);
1141 double nccf_ballast_pov = 0.0,
1142 nccf_ballast_pitch = pow(mean_square * basic_frame_length, 2) *
1144 avg_norm_prod = norm_prod.
Sum() / norm_prod.
Dim();
1146 ComputeNccf(inner_prod, norm_prod, nccf_ballast_pitch,
1149 ComputeNccf(inner_prod, norm_prod, nccf_ballast_pov,
1157 nccf_pitch.Resize(0, 0);
1167 std::vector<std::pair<int32, int32 > > index_info;
1169 for (
int32 frame = start_frame; frame < end_frame; frame++) {
1170 int32 frame_idx = frame - start_frame;
1173 cur_info->SetNccfPov(nccf_pov_resampled.
Row(frame_idx));
1174 cur_info->ComputeBacktraces(
opts_, nccf_pitch_resampled.
Row(frame_idx),
1177 forward_cost_.Swap(&cur_forward_cost);
1179 BaseFloat remainder = forward_cost_.Min();
1181 forward_cost_.Add(-remainder);
1185 nccf_pitch_resampled.
Row(frame_idx);
1191 int32 best_final_state;
1205 return impl_->NumFramesReady();
1253 int32 cur_rows = 100;
1258 "--simulate-first-pass-online option does not make sense " 1259 "unless you specify --frames-per-chunk");
1261 int32 cur_offset = 0, cur_frame = 0, samp_per_chunk =
1264 while (cur_offset < wave.
Dim()) {
1265 int32 num_samp = std::min(samp_per_chunk, wave.
Dim() - cur_offset);
1268 cur_offset += num_samp;
1269 if (cur_offset == wave.
Dim())
1272 for (; cur_frame < pitch_extractor.
NumFramesReady(); cur_frame++) {
1273 if (cur_frame >= cur_rows) {
1278 pitch_extractor.
GetFrame(cur_frame, &row);
1281 if (cur_frame == 0) {
1282 KALDI_WARN <<
"No features output since wave file too short";
1285 *output = feats.
RowRange(0, cur_frame);
1306 int32 cur_offset = 0, samp_per_chunk =
1308 while (cur_offset < wave.
Dim()) {
1309 int32 num_samp = std::min(samp_per_chunk, wave.
Dim() - cur_offset);
1312 cur_offset += num_samp;
1317 if (num_frames == 0) {
1318 KALDI_WARN <<
"No frames output in pitch extraction";
1322 output->
Resize(num_frames, 2);
1323 for (
int32 frame = 0; frame < num_frames; frame++) {
1325 pitch_extractor.
GetFrame(frame, &row);
1376 template<
typename Real>
1378 if (src.
Dim() == 0)
return;
1401 opts_(opts), src_(src),
1402 dim_ ((opts.add_pov_feature ? 1 : 0)
1403 + (opts.add_normalized_log_pitch ? 1 : 0)
1404 + (opts.add_delta_pitch ? 1 : 0)
1405 + (opts.add_raw_log_pitch ? 1 : 0)) {
1407 " At least one of the pitch features should be chosen. " 1408 "Check your post-process-pitch options.");
1410 "Input feature must be pitch feature (should have dimension 2)");
1446 int32 start_frame = std::max(0, frame - context),
1448 frames_in_window = end_frame - start_frame;
1452 for (
int32 f = start_frame; f < end_frame; f++)
1456 delta_opts.
order = 1;
1481 normalized_log_pitch = log_pitch - avg_log_pitch;
1488 int32 src_frames_ready,
1489 int32 *window_begin,
1490 int32 *window_end)
const {
1493 *window_begin = std::max(0, t - left_context);
1494 *window_end = std::min(t + right_context + 1, src_frames_ready);
1515 int32 this_window_begin, this_window_end;
1517 &this_window_begin, &this_window_end);
1526 this_stats = prev_stats;
1527 int32 prev_window_begin, prev_window_end;
1529 &prev_window_begin, &prev_window_end);
1530 if (this_window_begin != prev_window_begin) {
1531 KALDI_ASSERT(this_window_begin == prev_window_begin + 1);
1535 log_pitch =
Log(tmp(1));
1536 this_stats.
sum_pov -= accurate_pov;
1539 if (this_window_end != prev_window_end) {
1544 log_pitch =
Log(tmp(1));
1545 this_stats.
sum_pov += accurate_pov;
1560 for (
int32 f = this_window_begin; f < this_window_end; f++) {
1563 log_pitch =
Log(tmp(1));
1564 this_stats.
sum_pov += accurate_pov;
1571 if (src_frames_ready == 0) {
1576 return std::max(0, src_frames_ready -
1589 online_process_pitch.
Dim());
1592 online_process_pitch.
GetFrame(t, &row);
1607 "--simulate-first-pass-online option does not make sense " 1608 "unless you specify --frames-per-chunk");
1613 int32 cur_rows = 100;
1616 int32 cur_offset = 0, cur_frame = 0,
1626 while (cur_offset < wave.
Dim()) {
1628 if (samp_per_chunk > 0)
1629 num_samp = std::min(samp_per_chunk, wave.
Dim() - cur_offset);
1631 num_samp = wave.
Dim();
1634 cur_offset += num_samp;
1635 if (cur_offset == wave.
Dim())
1640 if (cur_frame >= cur_rows) {
1645 post_process.
GetFrame(cur_frame, &row);
1650 if (cur_frame == 0) {
1651 KALDI_WARN <<
"No features output since wave file too short";
1654 *output = feats.
RowRange(0, cur_frame);
1661 post_process.
GetFrame(frame, &row);
std::vector< PitchFrameInfo * > frame_info_
virtual int32 NumFramesReady() const
returns the feature dimension.
int32 backpointer
The state index on the previous frame that is the best preceding state for this state.
void ComputeKaldiPitchFirstPass(const PitchExtractionOptions &opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
This function is called from ComputeKaldiPitch when the user specifies opts.simulate_first_pass_onlin...
This class takes a Matrix<BaseFloat> and wraps it as an OnlineFeatureInterface: this can be useful wh...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Vector< BaseFloat > lags_
virtual BaseFloat FrameShiftInSeconds() const
void Resample(const VectorBase< BaseFloat > &input, bool flush, Vector< BaseFloat > *output)
This function does the resampling.
PitchFrameInfo(int32 num_states)
This constructor is used for frame -1; it sets the costs to be all zeros the pov_nccf's to zero and t...
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
OnlinePitchFeature(const PitchExtractionOptions &opts)
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
void Cleanup(PitchFrameInfo *prev_frame)
This function resizes the arrays for this object and updates the reference counts for the previous ob...
std::vector< BaseFloat > delta_feature_noise_
OnlineProcessPitch(const ProcessPitchOptions &opts, OnlineFeatureInterface *src)
Note on the implementation of OnlineProcessPitch: the OnlineFeatureInterface allows random access to ...
BaseFloat NccfToPovFeature(BaseFloat n)
This function processes the NCCF n to a POV feature f by applying the formula f = (1...
virtual int32 Dim() const
double signal_sum_
sum of previously processed parts of signal; used to do mean-subtraction when getting sum-squared...
void ComputeBacktraces(const PitchExtractionOptions &opts, const VectorBase< BaseFloat > &nccf_pitch, const VectorBase< BaseFloat > &lags, const VectorBase< BaseFloat > &prev_forward_cost, std::vector< std::pair< int32, int32 > > *index_info, VectorBase< BaseFloat > *this_forward_cost)
This constructor is used for frames apart from frame -1; the bulk of the Viterbi computation takes pl...
void AcceptWaveform(BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)
Base class which provides matrix operations not involving resizing or allocation. ...
bool add_normalized_log_pitch
virtual void InputFinished()
InputFinished() tells the class you won't be providing any more waveform.
NccfInfo(BaseFloat avg_norm_prod, BaseFloat mean_square_energy)
void ComputeKaldiPitch(const PitchExtractionOptions &opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
This function extracts (pitch, NCCF) per frame, using the pitch extraction method described in "A Pit...
virtual void AcceptWaveform(BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)
This would be called from the application, when you get more wave data.
This online-feature class implements post processing of pitch features.
void ExtractFrame(const VectorBase< BaseFloat > &downsampled_wave_part, int64 frame_index, VectorBase< BaseFloat > *window)
This function extracts from the signal the samples numbered from "sample_index" (numbered in the full...
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)=0
Gets the feature vector for this frame.
float RandGauss(struct RandomState *state=NULL)
ProcessPitchOptions opts_
~OnlinePitchFeatureImpl()
BaseFloat pov_nccf
the version of the NCCF we keep for the POV computation (without the ballast term).
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
OnlinePitchFeatureImpl * impl_
Vector< BaseFloat > downsampled_signal_remainder_
This is a small remainder of the previous downsampled signal; it's used by ExtractFrame for frames ne...
BaseFloat GetPovFeature(int32 frame) const
Computes and returns the POV feature for this frame.
BaseFloat FrameShiftInSeconds() const
int32 normalization_right_context
ArbitraryResample * nccf_resampler_
void AddVecVec(Real alpha, const VectorBase< Real > &v, const VectorBase< Real > &r, Real beta)
Add element-by-element product of vectors:
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void RecomputeBacktraces()
This function is called after we reach frame "recompute_frame", or when InputFinished() is called...
bool IsLastFrame(int32 frame) const
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Vector< BaseFloat > forward_cost_
Class ArbitraryResample allows you to resample a signal (assumed zero outside the sample region...
virtual int32 NumFramesReady() const
returns the feature dimension.
void Resample(const MatrixBase< BaseFloat > &input, MatrixBase< BaseFloat > *output) const
This function does the resampling.
BaseFloat delta_pitch_noise_stddev
PitchExtractionOptions opts_
BaseFloat mean_square_energy
std::vector< NormalizationStats > normalization_stats_
virtual bool IsLastFrame(int32 frame) const =0
Returns true if this is the last frame.
void ComputeNccf(const VectorBase< BaseFloat > &inner_prod, const VectorBase< BaseFloat > &norm_prod, BaseFloat nccf_ballast, VectorBase< BaseFloat > *nccf_vec)
Computes the NCCF as a fraction of the numerator term (a dot product between two vectors) and a denom...
void ComputeAndProcessKaldiPitch(const PitchExtractionOptions &pitch_opts, const ProcessPitchOptions &process_opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
This function combines ComputeKaldiPitch and ProcessPitch.
int64 downsampled_samples_processed_
downsampled_samples_processed is the number of samples (after downsampling) that we got in previous c...
int32 normalization_left_context
Real * Data()
Returns a pointer to the start of the vector's data.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void ComputeLocalCost(const VectorBase< BaseFloat > &nccf_pitch, const VectorBase< BaseFloat > &lags, const PitchExtractionOptions &opts, VectorBase< BaseFloat > *local_cost)
This function computes the local-cost for the Viterbi computation, see eq.
int32 NumFramesAvailable(int64 num_downsampled_samples, bool snip_edges) const
This function works out from the signal how many frames are currently available to process (this is c...
std::vector< NccfInfo * > nccf_info_
void GetNormalizationWindow(int32 frame, int32 src_frames_ready, int32 *window_begin, int32 *window_end) const
Computes the normalization window sizes.
BaseFloat NccfToPov(BaseFloat n)
This function processes the NCCF n to a reasonably accurate probability of voicing p by applying the ...
LinearResample * signal_resampler_
bool pitch_use_naive_search
BaseFloat delta_pitch_scale
Real Sum() const
Returns sum of the elements.
virtual ~OnlinePitchFeature()
void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
std::vector< std::pair< int32, BaseFloat > > lag_nccf_
BaseFloat GetNormalizedLogPitchFeature(int32 frame)
Computes and returns the mean-subtracted log-pitch feature for this frame.
void UpdateRemainder(const VectorBase< BaseFloat > &downsampled_wave_part)
This function updates downsampled_signal_remainder_, downsampled_samples_processed_, signal_sum_ and signal_sumsq_; it's called from AcceptWaveform().
void ComputeCorrelation(const VectorBase< BaseFloat > &wave, int32 first_lag, int32 last_lag, int32 nccf_window_size, VectorBase< BaseFloat > *inner_prod, VectorBase< BaseFloat > *norm_prod)
This function computes some dot products that are required while computing the NCCF.
std::vector< StateInfo > state_info_
SubMatrix< Real > RowRange(const MatrixIndexT row_offset, const MatrixIndexT num_rows) const
A class representing a vector.
int32 cur_best_state_
The current best state in the backtrace from the end.
LinearResample is a special case of ArbitraryResample, where we want to resample a signal at linearly...
#define KALDI_ASSERT(cond)
void ComputeDeltas(const DeltaFeaturesOptions &delta_opts, const MatrixBase< BaseFloat > &input_features, Matrix< BaseFloat > *output_features)
void Set(Real f)
Set all members of a vector to a specified value.
PitchFrameInfo * prev_info_
The structure for the previous frame.
BaseFloat GetDeltaPitchFeature(int32 frame)
Computes and returns the delta-log-pitch feature for this frame.
void UpdateNormalizationStats(int32 frame)
Makes sure the entry in normalization_stats_ for this frame is up to date; called from GetNormalizedL...
void AppendVector(const VectorBase< Real > &src, Vector< Real > *dst)
int32 ComputeLatency(int32 max_latency)
This function may be called on the last (most recent) PitchFrameInfo object; it computes how many fra...
OnlinePitchFeatureImpl(const PitchExtractionOptions &opts)
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
int32 state_offset_
the state index of the first entry in "state_info"; this will initially be zero, but after cleanup mi...
bool UpdatePreviousBestState(PitchFrameInfo *prev_frame)
This function updates.
void ProcessPitch(const ProcessPitchOptions &opts, const MatrixBase< BaseFloat > &input, Matrix< BaseFloat > *output)
This function processes the raw (NCCF, pitch) quantities computed by ComputeKaldiPitch, and processes them into features.
double forward_cost_remainder_
Provides a vector abstraction class.
void Add(Real c)
Add a constant to each element of a vector.
void SetZero()
Set vector to all zeros.
Vector< BaseFloat > nccf_pitch_resampled
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Outputs the two-dimensional feature consisting of (pitch, NCCF).
double signal_sumsq_
sum-squared of previously processed parts of signal; used to get NCCF ballast term.
void SetBestState(int32 best_state, std::vector< std::pair< int32, BaseFloat > > &lag_nccf)
This function may be called for the last (most recent) PitchFrameInfo object with the best state (obt...
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
OnlineFeatureInterface * src_
void SetNccfPov(const VectorBase< BaseFloat > &nccf_pov)
Record the nccf_pov value.
virtual int32 NumFramesReady() const =0
returns the feature dimension.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
int32 NumFramesReady() const
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
void SelectLags(const PitchExtractionOptions &opts, Vector< BaseFloat > *lags)
This function selects the lags at which we measure the NCCF: we need to select lags from 1/max_f0 to ...
virtual int32 Dim() const =0
BaseFloat GetRawLogPitchFeature(int32 frame) const
Computes and returns the raw log-pitch feature for this frame.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).