doc/pitch-functions_8h_source.html

 // feat/pitch-functions.h

 // Copyright     2013  Pegah Ghahremani
 //               2014  IMSL, PKU-HKUST (author: Wei Shi)
 //               2014  Yanqing Sun, Junjie Wang,
 //                     Daniel Povey, Korbinian Riedhammer
 //                     Xin Lei

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #ifndef KALDI_FEAT_PITCH_FUNCTIONS_H_
 #define KALDI_FEAT_PITCH_FUNCTIONS_H_

 #include <cassert>
 #include <cstdlib>
 #include <string>
 #include <vector>

 #include "base/kaldi-error.h"
 #include "feat/mel-computations.h"
 #include "itf/online-feature-itf.h"
 #include "matrix/matrix-lib.h"
 #include "util/common-utils.h"

 namespace kaldi {

 struct PitchExtractionOptions {
   // FrameExtractionOptions frame_opts;
   BaseFloat samp_freq;          // sample frequency in hertz
   BaseFloat frame_shift_ms;     // in milliseconds.
   BaseFloat frame_length_ms;    // in milliseconds.
   BaseFloat preemph_coeff;      // Preemphasis coefficient. [use is deprecated.]
   BaseFloat min_f0;             // min f0 to search (Hz)
   BaseFloat max_f0;             // max f0 to search (Hz)
   BaseFloat soft_min_f0;        // Minimum f0, applied in soft way, must not
                                 // exceed min-f0
   BaseFloat penalty_factor;     // cost factor for FO change
   BaseFloat lowpass_cutoff;     // cutoff frequency for Low pass filter
   BaseFloat resample_freq;      // Integer that determines filter width when
                                 // upsampling NCCF
   BaseFloat delta_pitch;        // the pitch tolerance in pruning lags
   BaseFloat nccf_ballast;       // Increasing this factor reduces NCCF for
                                 // quiet frames, helping ensure pitch
                                 // continuity in unvoiced region
   int32 lowpass_filter_width;   // Integer that determines filter width of
                                 // lowpass filter
   int32 upsample_filter_width;  // Integer that determines filter width when
                                 // upsampling NCCF

   // Below are newer config variables, not present in the original paper,
   // that relate to the online pitch extraction algorithm.

   // The maximum number of frames of latency that we allow the pitch-processing
   // to introduce, for online operation. If you set this to a large value,
   // there would be no inaccuracy from the Viterbi traceback (but it might make
   // you wait to see the pitch). This is not very relevant for the online
   // operation: normalization-right-context is more relevant, you
   // can just leave this value at zero.
   int32 max_frames_latency;

   // Only relevant for the function ComputeKaldiPitch which is called by
   // compute-kaldi-pitch-feats. If nonzero, we provide the input as chunks of
   // this size. This affects the energy normalization which has a small effect
   // on the resulting features, especially at the beginning of a file. For best
   // compatibility with online operation (e.g. if you plan to train models for
   // the online-deocding setup), you might want to set this to a small value,
   // like one frame.
   int32 frames_per_chunk;

   // Only relevant for the function ComputeKaldiPitch which is called by
   // compute-kaldi-pitch-feats, and only relevant if frames_per_chunk is
   // nonzero. If true, it will query the features as soon as they are
   // available, which simulates the first-pass features you would get in online
   // decoding. If false, the features you will get will be the same as those
   // available at the end of the utterance, after InputFinished() has been
   // called: e.g. during lattice rescoring.
   bool simulate_first_pass_online;

   // Only relevant for online operation or when emulating online operation
   // (e.g. when setting frames_per_chunk). This is the frame-index on which we
   // recompute the NCCF (e.g. frame-index 500 = after 5 seconds); if the
   // segment ends before this we do it when the segment ends. We do this by
   // re-computing the signal average energy, which affects the NCCF via the
   // "ballast term", scaling the resampled NCCF by a factor derived from the
   // average change in the "ballast term", and re-doing the backtrace
   // computation. Making this infinity would be the most exact, but would
   // introduce unwanted latency at the end of long utterances, for little
   // benefit.
   int32 recompute_frame;

   // This is a "hidden config" used only for testing the online pitch
   // extraction. If true, we compute the signal root-mean-squared for the
   // ballast term, only up to the current frame, rather than the end of the
   // current chunk of signal. This makes the output insensitive to the
   // chunking, which is useful for testing purposes.
   bool nccf_ballast_online;
   bool snip_edges;
   PitchExtractionOptions():
       samp_freq(16000),
       frame_shift_ms(10.0),
       frame_length_ms(25.0),
       preemph_coeff(0.0),
       min_f0(50),
       max_f0(400),
       soft_min_f0(10.0),
       penalty_factor(0.1),
       lowpass_cutoff(1000),
       resample_freq(4000),
       delta_pitch(0.005),
       nccf_ballast(7000),
       lowpass_filter_width(1),
       upsample_filter_width(5),
       max_frames_latency(0),
       frames_per_chunk(0),
       simulate_first_pass_online(false),
       recompute_frame(500),
       nccf_ballast_online(false),
       snip_edges(true) { }

   void Register(OptionsItf *opts) {
     opts->Register("sample-frequency", &samp_freq,
                    "Waveform data sample frequency (must match the waveform "
                    "file, if specified there)");
     opts->Register("frame-length", &frame_length_ms, "Frame length in "
                    "milliseconds");
     opts->Register("frame-shift", &frame_shift_ms, "Frame shift in "
                    "milliseconds");
     opts->Register("preemphasis-coefficient", &preemph_coeff,
                    "Coefficient for use in signal preemphasis (deprecated)");
     opts->Register("min-f0", &min_f0,
                    "min. F0 to search for (Hz)");
     opts->Register("max-f0", &max_f0,
                    "max. F0 to search for (Hz)");
     opts->Register("soft-min-f0", &soft_min_f0,
                    "Minimum f0, applied in soft way, must not exceed min-f0");
     opts->Register("penalty-factor", &penalty_factor,
                    "cost factor for FO change.");
     opts->Register("lowpass-cutoff", &lowpass_cutoff,
                    "cutoff frequency for LowPass filter (Hz) ");
     opts->Register("resample-frequency", &resample_freq,
                    "Frequency that we down-sample the signal to.  Must be "
                    "more than twice lowpass-cutoff");
     opts->Register("delta-pitch", &delta_pitch,
                    "Smallest relative change in pitch that our algorithm "
                    "measures");
     opts->Register("nccf-ballast", &nccf_ballast,
                    "Increasing this factor reduces NCCF for quiet frames");
     opts->Register("nccf-ballast-online", &nccf_ballast_online,
                    "This is useful mainly for debug; it affects how the NCCF "
                    "ballast is computed.");
     opts->Register("lowpass-filter-width", &lowpass_filter_width,
                    "Integer that determines filter width of "
                    "lowpass filter, more gives sharper filter");
     opts->Register("upsample-filter-width", &upsample_filter_width,
                    "Integer that determines filter width when upsampling NCCF");
     opts->Register("frames-per-chunk", &frames_per_chunk, "Only relevant for "
                    "offline pitch extraction (e.g. compute-kaldi-pitch-feats), "
                    "you can set it to a small nonzero value, such as 10, for "
                    "better feature compatibility with online decoding (affects "
                    "energy normalization in the algorithm)");
     opts->Register("simulate-first-pass-online", &simulate_first_pass_online,
                    "If true, compute-kaldi-pitch-feats will output features "
                    "that correspond to what an online decoder would see in the "
                    "first pass of decoding-- not the final version of the "
                    "features, which is the default.  Relevant if "
                    "--frames-per-chunk > 0");
     opts->Register("recompute-frame", &recompute_frame, "Only relevant for "
                    "online pitch extraction, or for compatibility with online "
                    "pitch extraction.  A non-critical parameter; the frame at "
                    "which we recompute some of the forward pointers, after "
                    "revising our estimate of the signal energy.  Relevant if"
                    "--frames-per-chunk > 0");
     opts->Register("max-frames-latency", &max_frames_latency, "Maximum number "
                    "of frames of latency that we allow pitch tracking to "
                    "introduce into the feature processing (affects output only "
                    "if --frames-per-chunk > 0 and "
                    "--simulate-first-pass-online=true");
     opts->Register("snip-edges", &snip_edges, "If this is set to false, the "
                    "incomplete frames near the ending edge won't be snipped, "
                    "so that the number of frames is the file size divided by "
                    "the frame-shift. This makes different types of features "
                    "give the same number of frames.");
   }
   // Because of floating point representation, it is more reliable to divide
   // by 1000 instead of multiplying by 0.001, but it is a bit slower.
   int32 NccfWindowSize() const {
     return static_cast<int32>(resample_freq * frame_length_ms / 1000.0);
   }
   int32 NccfWindowShift() const {
     return static_cast<int32>(resample_freq * frame_shift_ms / 1000.0);
   }
 };

 struct ProcessPitchOptions {
   BaseFloat pitch_scale;  // the final normalized-log-pitch feature is scaled
                           // with this value
   BaseFloat pov_scale;    // the final POV feature is scaled with this value
   BaseFloat pov_offset;   // An offset that can be added to the final POV
                           // feature (useful for online-decoding, where we don't
                           // do CMN to the pitch-derived features.

   BaseFloat delta_pitch_scale;
   BaseFloat delta_pitch_noise_stddev;  // stddev of noise we add to delta-pitch
   int32 normalization_left_context;    // left-context used for sliding-window
                                        // normalization
   int32 normalization_right_context;   // this should be reduced in online
                                        // decoding to reduce latency

   int32 delta_window;
   int32 delay;

   bool add_pov_feature;
   bool add_normalized_log_pitch;
   bool add_delta_pitch;
   bool add_raw_log_pitch;

   ProcessPitchOptions() :
       pitch_scale(2.0),
       pov_scale(2.0),
       pov_offset(0.0),
       delta_pitch_scale(10.0),
       delta_pitch_noise_stddev(0.005),
       normalization_left_context(75),
       normalization_right_context(75),
       delta_window(2),
       delay(0),
       add_pov_feature(true),
       add_normalized_log_pitch(true),
       add_delta_pitch(true),
       add_raw_log_pitch(false) { }


   void Register(ParseOptions *opts) {
     opts->Register("pitch-scale", &pitch_scale,
                    "Scaling factor for the final normalized log-pitch value");
     opts->Register("pov-scale", &pov_scale,
                    "Scaling factor for final POV (probability of voicing) "
                    "feature");
     opts->Register("pov-offset", &pov_offset,
                    "This can be used to add an offset to the POV feature. "
                    "Intended for use in online decoding as a substitute for "
                    " CMN.");
     opts->Register("delta-pitch-scale", &delta_pitch_scale,
                    "Term to scale the final delta log-pitch feature");
     opts->Register("delta-pitch-noise-stddev", &delta_pitch_noise_stddev,
                    "Standard deviation for noise we add to the delta log-pitch "
                    "(before scaling); should be about the same as delta-pitch "
                    "option to pitch creation.  The purpose is to get rid of "
                    "peaks in the delta-pitch caused by discretization of pitch "
                    "values.");
     opts->Register("normalization-left-context", &normalization_left_context,
                    "Left-context (in frames) for moving window normalization");
     opts->Register("normalization-right-context", &normalization_right_context,
                    "Right-context (in frames) for moving window normalization");
     opts->Register("delta-window", &delta_window,
                    "Number of frames on each side of central frame, to use for "
                    "delta window.");
     opts->Register("delay", &delay,
                    "Number of frames by which the pitch information is "
                    "delayed.");
     opts->Register("add-pov-feature", &add_pov_feature,
                    "If true, the warped NCCF is added to output features");
     opts->Register("add-normalized-log-pitch", &add_normalized_log_pitch,
                    "If true, the log-pitch with POV-weighted mean subtraction "
                    "over 1.5 second window is added to output features");
     opts->Register("add-delta-pitch", &add_delta_pitch,
                    "If true, time derivative of log-pitch is added to output "
                    "features");
     opts->Register("add-raw-log-pitch", &add_raw_log_pitch,
                    "If true, log(pitch) is added to output features");
   }
 };


 // We don't want to expose the pitch-extraction internals here as it's
 // quite complex, so we use a private implementation.
 class OnlinePitchFeatureImpl;


 // Note: to start on a new waveform, just construct a new version
 // of this object.
 class OnlinePitchFeature: public OnlineBaseFeature {
  public:
   explicit OnlinePitchFeature(const PitchExtractionOptions &opts);

   virtual int32 Dim() const { return 2; /* (NCCF, pitch) */ }

   virtual int32 NumFramesReady() const;

   virtual BaseFloat FrameShiftInSeconds() const;

   virtual bool IsLastFrame(int32 frame) const;

   virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);

   virtual void AcceptWaveform(BaseFloat sampling_rate,
                               const VectorBase<BaseFloat> &waveform);

   virtual void InputFinished();

   virtual ~OnlinePitchFeature();

  private:
   OnlinePitchFeatureImpl *impl_;
 };


 class OnlineProcessPitch: public OnlineFeatureInterface {
  public:
   virtual int32 Dim() const { return dim_; }

   virtual bool IsLastFrame(int32 frame) const {
     if (frame <= -1)
       return src_->IsLastFrame(-1);
     else if (frame < opts_.delay)
       return src_->IsLastFrame(-1) == true ? false : src_->IsLastFrame(0);
     else
       return src_->IsLastFrame(frame - opts_.delay);
   }
   virtual BaseFloat FrameShiftInSeconds() const {
     return src_->FrameShiftInSeconds();
   }

   virtual int32 NumFramesReady() const;

   virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);

   virtual ~OnlineProcessPitch() {  }

   // Does not take ownership of "src".
   OnlineProcessPitch(const ProcessPitchOptions &opts,
                      OnlineFeatureInterface *src);

  private:
   enum { kRawFeatureDim = 2};  // anonymous enum to define a constant.
                                // kRawFeatureDim defines the dimension
                                // of the input: (nccf, pitch)

   ProcessPitchOptions opts_;
   OnlineFeatureInterface *src_;
   int32 dim_;  // Output feature dimension, set in initializer.

   struct NormalizationStats {
     int32 cur_num_frames;      // value of src_->NumFramesReady() when
                                // "mean_pitch" was set.
     bool input_finished;       // true if input data was finished when
                                // "mean_pitch" was computed.
     double sum_pov;            // sum of pov over relevant range
     double sum_log_pitch_pov;  // sum of log(pitch) * pov over relevant range

     NormalizationStats(): cur_num_frames(-1), input_finished(false),
                           sum_pov(0.0), sum_log_pitch_pov(0.0) { }
   };

   std::vector<BaseFloat> delta_feature_noise_;

   std::vector<NormalizationStats> normalization_stats_;

   inline BaseFloat GetPovFeature(int32 frame) const;

   inline BaseFloat GetDeltaPitchFeature(int32 frame);

   inline BaseFloat GetRawLogPitchFeature(int32 frame) const;

   inline BaseFloat GetNormalizedLogPitchFeature(int32 frame);

   inline void GetNormalizationWindow(int32 frame,
                                      int32 src_frames_ready,
                                      int32 *window_begin,
                                      int32 *window_end) const;

   inline void UpdateNormalizationStats(int32 frame);
 };


 void ComputeKaldiPitch(const PitchExtractionOptions &opts,
                        const VectorBase<BaseFloat> &wave,
                        Matrix<BaseFloat> *output);

 void ProcessPitch(const ProcessPitchOptions &opts,
                   const MatrixBase<BaseFloat> &input,
                   Matrix<BaseFloat> *output);

 void ComputeAndProcessKaldiPitch(const PitchExtractionOptions &pitch_opts,
                                  const ProcessPitchOptions &process_opts,
                                  const VectorBase<BaseFloat> &wave,
                                  Matrix<BaseFloat> *output);


 }  // namespace kaldi
 #endif  // KALDI_FEAT_PITCH_FUNCTIONS_H_
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::ProcessPitchOptions::add_pov_feature
bool add_pov_feature
Definition: pitch-functions.h:230

kaldi::PitchExtractionOptions::nccf_ballast
BaseFloat nccf_ballast
Definition: pitch-functions.h:57

kaldi::OnlineProcessPitch::NormalizationStats::NormalizationStats
NormalizationStats()
Definition: pitch-functions.h:375

kaldi::ProcessPitchOptions
Definition: pitch-functions.h:212

kaldi::ProcessPitchOptions::add_delta_pitch
bool add_delta_pitch
Definition: pitch-functions.h:232

kaldi::OnlineProcessPitch::NormalizationStats::sum_pov
double sum_pov
Definition: pitch-functions.h:372

matrix-lib.h

kaldi::OnlineProcessPitch::delta_feature_noise_
std::vector< BaseFloat > delta_feature_noise_
Definition: pitch-functions.h:379

kaldi::OnlineProcessPitch::Dim
virtual int32 Dim() const
Definition: pitch-functions.h:334

kaldi::PitchExtractionOptions::penalty_factor
BaseFloat penalty_factor
Definition: pitch-functions.h:52

kaldi::PitchExtractionOptions::max_frames_latency
int32 max_frames_latency
Definition: pitch-functions.h:74

kaldi::MatrixBase
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49

kaldi::ProcessPitchOptions::add_normalized_log_pitch
bool add_normalized_log_pitch
Definition: pitch-functions.h:231

kaldi::PitchExtractionOptions::delta_pitch
BaseFloat delta_pitch
Definition: pitch-functions.h:56

kaldi::ComputeKaldiPitch
void ComputeKaldiPitch(const PitchExtractionOptions &opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
This function extracts (pitch, NCCF) per frame, using the pitch extraction method described in "A Pit...
Definition: pitch-functions.cc:1291

kaldi::OnlineProcessPitch::FrameShiftInSeconds
virtual BaseFloat FrameShiftInSeconds() const
Definition: pitch-functions.h:344

kaldi::PitchExtractionOptions::lowpass_cutoff
BaseFloat lowpass_cutoff
Definition: pitch-functions.h:53

kaldi::OnlineProcessPitch
This online-feature class implements post processing of pitch features.
Definition: pitch-functions.h:332

kaldi::PitchExtractionOptions::upsample_filter_width
int32 upsample_filter_width
Definition: pitch-functions.h:62

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

common-utils.h

kaldi::OnlineProcessPitch::opts_
ProcessPitchOptions opts_
Definition: pitch-functions.h:363

kaldi::Matrix< BaseFloat >

kaldi::PitchExtractionOptions
Definition: pitch-functions.h:42

kaldi::PitchExtractionOptions::PitchExtractionOptions
PitchExtractionOptions()
Definition: pitch-functions.h:113

kaldi::OnlinePitchFeature::impl_
OnlinePitchFeatureImpl * impl_
Definition: pitch-functions.h:324

kaldi::OnlineProcessPitch::NormalizationStats::input_finished
bool input_finished
Definition: pitch-functions.h:370

kaldi::PitchExtractionOptions::lowpass_filter_width
int32 lowpass_filter_width
Definition: pitch-functions.h:60

kaldi::ParseOptions::Register
void Register(const std::string &name, bool *ptr, const std::string &doc)
Definition: parse-options.cc:56

kaldi::PitchExtractionOptions::NccfWindowShift
int32 NccfWindowShift() const
Returns the window-shift in samples, after resampling.
Definition: pitch-functions.h:207

kaldi::OptionsItf::Register
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0

kaldi::ProcessPitchOptions::normalization_right_context
int32 normalization_right_context
Definition: pitch-functions.h:224

kaldi::PitchExtractionOptions::nccf_ballast_online
bool nccf_ballast_online
Definition: pitch-functions.h:111

kaldi::PitchExtractionOptions::frame_shift_ms
BaseFloat frame_shift_ms
Definition: pitch-functions.h:45

kaldi::PitchExtractionOptions::soft_min_f0
BaseFloat soft_min_f0
Definition: pitch-functions.h:50

kaldi::OnlinePitchFeature::Dim
virtual int32 Dim() const
Definition: pitch-functions.h:304

kaldi::ParseOptions
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36

kaldi::PitchExtractionOptions::recompute_frame
int32 recompute_frame
Definition: pitch-functions.h:104

float

kaldi::OnlineProcessPitch::~OnlineProcessPitch
virtual ~OnlineProcessPitch()
Definition: pitch-functions.h:352

kaldi::OnlineProcessPitch::NormalizationStats::cur_num_frames
int32 cur_num_frames
Definition: pitch-functions.h:368

kaldi::PitchExtractionOptions::frame_length_ms
BaseFloat frame_length_ms
Definition: pitch-functions.h:46

kaldi::PitchExtractionOptions::min_f0
BaseFloat min_f0
Definition: pitch-functions.h:48

kaldi::ProcessPitchOptions::delta_pitch_noise_stddev
BaseFloat delta_pitch_noise_stddev
Definition: pitch-functions.h:221

kaldi::OnlineProcessPitch::dim_
int32 dim_
Definition: pitch-functions.h:365

kaldi::OnlineProcessPitch::normalization_stats_
std::vector< NormalizationStats > normalization_stats_
Definition: pitch-functions.h:381

kaldi::ComputeAndProcessKaldiPitch
void ComputeAndProcessKaldiPitch(const PitchExtractionOptions &pitch_opts, const ProcessPitchOptions &process_opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
This function combines ComputeKaldiPitch and ProcessPitch.
Definition: pitch-functions.cc:1597

kaldi::ProcessPitchOptions::normalization_left_context
int32 normalization_left_context
Definition: pitch-functions.h:222

kaldi::PitchExtractionOptions::Register
void Register(OptionsItf *opts)
Definition: pitch-functions.h:135

kaldi::ProcessPitchOptions::add_raw_log_pitch
bool add_raw_log_pitch
Definition: pitch-functions.h:233

kaldi::OptionsItf
Definition: options-itf.h:26

kaldi::PitchExtractionOptions::snip_edges
bool snip_edges
Definition: pitch-functions.h:112

kaldi::ProcessPitchOptions::delta_pitch_scale
BaseFloat delta_pitch_scale
Definition: pitch-functions.h:220

kaldi::ProcessPitchOptions::pov_offset
BaseFloat pov_offset
Definition: pitch-functions.h:216

online-feature-itf.h

kaldi::OnlineBaseFeature
Add a virtual class for "source" features such as MFCC or PLP or pitch features.
Definition: online-feature-itf.h:106

kaldi::OnlineProcessPitch::NormalizationStats
Definition: pitch-functions.h:367

kaldi::PitchExtractionOptions::frames_per_chunk
int32 frames_per_chunk
Definition: pitch-functions.h:83

kaldi::ProcessPitchOptions::delta_window
int32 delta_window
Definition: pitch-functions.h:227

kaldi::PitchExtractionOptions::NccfWindowSize
int32 NccfWindowSize() const
Returns the window-size in samples, after resampling.
Definition: pitch-functions.h:203

kaldi::PitchExtractionOptions::preemph_coeff
BaseFloat preemph_coeff
Definition: pitch-functions.h:47

kaldi::OnlineProcessPitch::IsLastFrame
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
Definition: pitch-functions.h:336

kaldi::ProcessPitchOptions::ProcessPitchOptions
ProcessPitchOptions()
Definition: pitch-functions.h:235

kaldi::OnlineFeatureInterface
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
Definition: online-feature-itf.h:49

kaldi::ProcessPitch
void ProcessPitch(const ProcessPitchOptions &opts, const MatrixBase< BaseFloat > &input, Matrix< BaseFloat > *output)
This function processes the raw (NCCF, pitch) quantities computed by ComputeKaldiPitch, and processes them into features.
Definition: pitch-functions.cc:1581

kaldi::ProcessPitchOptions::pov_scale
BaseFloat pov_scale
Definition: pitch-functions.h:215

kaldi::OnlinePitchFeature
Definition: pitch-functions.h:300

kaldi::PitchExtractionOptions::samp_freq
BaseFloat samp_freq
Definition: pitch-functions.h:44

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::OnlineProcessPitch::NormalizationStats::sum_log_pitch_pov
double sum_log_pitch_pov
Definition: pitch-functions.h:373

kaldi::ProcessPitchOptions::delay
int32 delay
Definition: pitch-functions.h:228

kaldi::OnlinePitchFeatureImpl
Definition: pitch-functions.cc:574

mel-computations.h

kaldi::OnlineProcessPitch::src_
OnlineFeatureInterface * src_
Definition: pitch-functions.h:364

kaldi::ProcessPitchOptions::pitch_scale
BaseFloat pitch_scale
Definition: pitch-functions.h:213

kaldi::ProcessPitchOptions::Register
void Register(ParseOptions *opts)
Definition: pitch-functions.h:251

kaldi-error.h

kaldi::PitchExtractionOptions::simulate_first_pass_online
bool simulate_first_pass_online
Definition: pitch-functions.h:92

kaldi::PitchExtractionOptions::resample_freq
BaseFloat resample_freq
Definition: pitch-functions.h:54

kaldi::PitchExtractionOptions::max_f0
BaseFloat max_f0
Definition: pitch-functions.h:49