24 #ifndef KALDI_FEAT_PITCH_FUNCTIONS_H_    25 #define KALDI_FEAT_PITCH_FUNCTIONS_H_   115       frame_shift_ms(10.0),
   116       frame_length_ms(25.0),
   122       lowpass_cutoff(1000),
   126       lowpass_filter_width(1),
   127       upsample_filter_width(5),
   128       max_frames_latency(0),
   130       simulate_first_pass_online(false),
   131       recompute_frame(500),
   132       nccf_ballast_online(false),
   136     opts->
Register(
"sample-frequency", &samp_freq,
   137                    "Waveform data sample frequency (must match the waveform "   138                    "file, if specified there)");
   139     opts->
Register(
"frame-length", &frame_length_ms, 
"Frame length in "   141     opts->
Register(
"frame-shift", &frame_shift_ms, 
"Frame shift in "   143     opts->
Register(
"preemphasis-coefficient", &preemph_coeff,
   144                    "Coefficient for use in signal preemphasis (deprecated)");
   146                    "min. F0 to search for (Hz)");
   148                    "max. F0 to search for (Hz)");
   149     opts->
Register(
"soft-min-f0", &soft_min_f0,
   150                    "Minimum f0, applied in soft way, must not exceed min-f0");
   151     opts->
Register(
"penalty-factor", &penalty_factor,
   152                    "cost factor for FO change.");
   153     opts->
Register(
"lowpass-cutoff", &lowpass_cutoff,
   154                    "cutoff frequency for LowPass filter (Hz) ");
   155     opts->
Register(
"resample-frequency", &resample_freq,
   156                    "Frequency that we down-sample the signal to.  Must be "   157                    "more than twice lowpass-cutoff");
   158     opts->
Register(
"delta-pitch", &delta_pitch,
   159                    "Smallest relative change in pitch that our algorithm "   161     opts->
Register(
"nccf-ballast", &nccf_ballast,
   162                    "Increasing this factor reduces NCCF for quiet frames");
   163     opts->
Register(
"nccf-ballast-online", &nccf_ballast_online,
   164                    "This is useful mainly for debug; it affects how the NCCF "   165                    "ballast is computed.");
   166     opts->
Register(
"lowpass-filter-width", &lowpass_filter_width,
   167                    "Integer that determines filter width of "   168                    "lowpass filter, more gives sharper filter");
   169     opts->
Register(
"upsample-filter-width", &upsample_filter_width,
   170                    "Integer that determines filter width when upsampling NCCF");
   171     opts->
Register(
"frames-per-chunk", &frames_per_chunk, 
"Only relevant for "   172                    "offline pitch extraction (e.g. compute-kaldi-pitch-feats), "   173                    "you can set it to a small nonzero value, such as 10, for "   174                    "better feature compatibility with online decoding (affects "   175                    "energy normalization in the algorithm)");
   176     opts->
Register(
"simulate-first-pass-online", &simulate_first_pass_online,
   177                    "If true, compute-kaldi-pitch-feats will output features "   178                    "that correspond to what an online decoder would see in the "   179                    "first pass of decoding-- not the final version of the "   180                    "features, which is the default.  Relevant if "   181                    "--frames-per-chunk > 0");
   182     opts->
Register(
"recompute-frame", &recompute_frame, 
"Only relevant for "   183                    "online pitch extraction, or for compatibility with online "   184                    "pitch extraction.  A non-critical parameter; the frame at "   185                    "which we recompute some of the forward pointers, after "   186                    "revising our estimate of the signal energy.  Relevant if"   187                    "--frames-per-chunk > 0");
   188     opts->
Register(
"max-frames-latency", &max_frames_latency, 
"Maximum number "   189                    "of frames of latency that we allow pitch tracking to "   190                    "introduce into the feature processing (affects output only "   191                    "if --frames-per-chunk > 0 and "   192                    "--simulate-first-pass-online=true");
   193     opts->
Register(
"snip-edges", &snip_edges, 
"If this is set to false, the "   194                    "incomplete frames near the ending edge won't be snipped, "   195                    "so that the number of frames is the file size divided by "   196                    "the frame-shift. This makes different types of features "   197                    "give the same number of frames.");
   204     return static_cast<int32>(resample_freq * frame_length_ms / 1000.0);
   208     return static_cast<int32>(resample_freq * frame_shift_ms / 1000.0);
   239       delta_pitch_scale(10.0),
   240       delta_pitch_noise_stddev(0.005),
   241       normalization_left_context(75),
   242       normalization_right_context(75),
   245       add_pov_feature(true),
   246       add_normalized_log_pitch(true),
   247       add_delta_pitch(true),
   248       add_raw_log_pitch(false) { }
   252     opts->
Register(
"pitch-scale", &pitch_scale,
   253                    "Scaling factor for the final normalized log-pitch value");
   254     opts->
Register(
"pov-scale", &pov_scale,
   255                    "Scaling factor for final POV (probability of voicing) "   257     opts->
Register(
"pov-offset", &pov_offset,
   258                    "This can be used to add an offset to the POV feature. "   259                    "Intended for use in online decoding as a substitute for "   261     opts->
Register(
"delta-pitch-scale", &delta_pitch_scale,
   262                    "Term to scale the final delta log-pitch feature");
   263     opts->
Register(
"delta-pitch-noise-stddev", &delta_pitch_noise_stddev,
   264                    "Standard deviation for noise we add to the delta log-pitch "   265                    "(before scaling); should be about the same as delta-pitch "   266                    "option to pitch creation.  The purpose is to get rid of "   267                    "peaks in the delta-pitch caused by discretization of pitch "   269     opts->
Register(
"normalization-left-context", &normalization_left_context,
   270                    "Left-context (in frames) for moving window normalization");
   271     opts->
Register(
"normalization-right-context", &normalization_right_context,
   272                    "Right-context (in frames) for moving window normalization");
   273     opts->
Register(
"delta-window", &delta_window,
   274                    "Number of frames on each side of central frame, to use for "   277                    "Number of frames by which the pitch information is "   279     opts->
Register(
"add-pov-feature", &add_pov_feature,
   280                    "If true, the warped NCCF is added to output features");
   281     opts->
Register(
"add-normalized-log-pitch", &add_normalized_log_pitch,
   282                    "If true, the log-pitch with POV-weighted mean subtraction "   283                    "over 1.5 second window is added to output features");
   284     opts->
Register(
"add-delta-pitch", &add_delta_pitch,
   285                    "If true, time derivative of log-pitch is added to output "   287     opts->
Register(
"add-raw-log-pitch", &add_raw_log_pitch,
   288                    "If true, log(pitch) is added to output features");
   306   virtual int32 NumFramesReady() 
const;
   308   virtual BaseFloat FrameShiftInSeconds() 
const;
   310   virtual bool IsLastFrame(
int32 frame) 
const;
   316   virtual void AcceptWaveform(
BaseFloat sampling_rate,
   319   virtual void InputFinished();
   338       return src_->IsLastFrame(-1);
   339     else if (frame < opts_.delay)
   340       return src_->IsLastFrame(-1) == 
true ? false : src_->IsLastFrame(0);
   342       return src_->IsLastFrame(frame - opts_.delay);
   345     return src_->FrameShiftInSeconds();
   348   virtual int32 NumFramesReady() 
const;
   359   enum { kRawFeatureDim = 2};  
   376                           sum_pov(0.0), sum_log_pitch_pov(0.0) { }
   400   inline void GetNormalizationWindow(
int32 frame,
   401                                      int32 src_frames_ready,
   403                                      int32 *window_end) 
const;
   407   inline void UpdateNormalizationStats(
int32 frame);
   450 #endif  // KALDI_FEAT_PITCH_FUNCTIONS_H_ This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
std::vector< BaseFloat > delta_feature_noise_
 
virtual int32 Dim() const
 
Base class which provides matrix operations not involving resizing or allocation. ...
 
bool add_normalized_log_pitch
 
void ComputeKaldiPitch(const PitchExtractionOptions &opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
This function extracts (pitch, NCCF) per frame, using the pitch extraction method described in "A Pit...
 
virtual BaseFloat FrameShiftInSeconds() const
 
This online-feature class implements post processing of pitch features. 
 
ProcessPitchOptions opts_
 
OnlinePitchFeatureImpl * impl_
 
void Register(const std::string &name, bool *ptr, const std::string &doc)
 
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
 
int32 normalization_right_context
 
virtual int32 Dim() const
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
virtual ~OnlineProcessPitch()
 
BaseFloat delta_pitch_noise_stddev
 
std::vector< NormalizationStats > normalization_stats_
 
void ComputeAndProcessKaldiPitch(const PitchExtractionOptions &pitch_opts, const ProcessPitchOptions &process_opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
This function combines ComputeKaldiPitch and ProcessPitch. 
 
int32 normalization_left_context
 
BaseFloat delta_pitch_scale
 
Add a virtual class for "source" features such as MFCC or PLP or pitch features. 
 
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame. 
 
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
 
void ProcessPitch(const ProcessPitchOptions &opts, const MatrixBase< BaseFloat > &input, Matrix< BaseFloat > *output)
This function processes the raw (NCCF, pitch) quantities computed by ComputeKaldiPitch, and processes them into features. 
 
Provides a vector abstraction class. 
 
OnlineFeatureInterface * src_
 
void Register(ParseOptions *opts)