24 #ifndef KALDI_FEAT_PITCH_FUNCTIONS_H_ 25 #define KALDI_FEAT_PITCH_FUNCTIONS_H_ 115 frame_shift_ms(10.0),
116 frame_length_ms(25.0),
122 lowpass_cutoff(1000),
126 lowpass_filter_width(1),
127 upsample_filter_width(5),
128 max_frames_latency(0),
130 simulate_first_pass_online(false),
131 recompute_frame(500),
132 nccf_ballast_online(false),
136 opts->
Register(
"sample-frequency", &samp_freq,
137 "Waveform data sample frequency (must match the waveform " 138 "file, if specified there)");
139 opts->
Register(
"frame-length", &frame_length_ms,
"Frame length in " 141 opts->
Register(
"frame-shift", &frame_shift_ms,
"Frame shift in " 143 opts->
Register(
"preemphasis-coefficient", &preemph_coeff,
144 "Coefficient for use in signal preemphasis (deprecated)");
146 "min. F0 to search for (Hz)");
148 "max. F0 to search for (Hz)");
149 opts->
Register(
"soft-min-f0", &soft_min_f0,
150 "Minimum f0, applied in soft way, must not exceed min-f0");
151 opts->
Register(
"penalty-factor", &penalty_factor,
152 "cost factor for FO change.");
153 opts->
Register(
"lowpass-cutoff", &lowpass_cutoff,
154 "cutoff frequency for LowPass filter (Hz) ");
155 opts->
Register(
"resample-frequency", &resample_freq,
156 "Frequency that we down-sample the signal to. Must be " 157 "more than twice lowpass-cutoff");
158 opts->
Register(
"delta-pitch", &delta_pitch,
159 "Smallest relative change in pitch that our algorithm " 161 opts->
Register(
"nccf-ballast", &nccf_ballast,
162 "Increasing this factor reduces NCCF for quiet frames");
163 opts->
Register(
"nccf-ballast-online", &nccf_ballast_online,
164 "This is useful mainly for debug; it affects how the NCCF " 165 "ballast is computed.");
166 opts->
Register(
"lowpass-filter-width", &lowpass_filter_width,
167 "Integer that determines filter width of " 168 "lowpass filter, more gives sharper filter");
169 opts->
Register(
"upsample-filter-width", &upsample_filter_width,
170 "Integer that determines filter width when upsampling NCCF");
171 opts->
Register(
"frames-per-chunk", &frames_per_chunk,
"Only relevant for " 172 "offline pitch extraction (e.g. compute-kaldi-pitch-feats), " 173 "you can set it to a small nonzero value, such as 10, for " 174 "better feature compatibility with online decoding (affects " 175 "energy normalization in the algorithm)");
176 opts->
Register(
"simulate-first-pass-online", &simulate_first_pass_online,
177 "If true, compute-kaldi-pitch-feats will output features " 178 "that correspond to what an online decoder would see in the " 179 "first pass of decoding-- not the final version of the " 180 "features, which is the default. Relevant if " 181 "--frames-per-chunk > 0");
182 opts->
Register(
"recompute-frame", &recompute_frame,
"Only relevant for " 183 "online pitch extraction, or for compatibility with online " 184 "pitch extraction. A non-critical parameter; the frame at " 185 "which we recompute some of the forward pointers, after " 186 "revising our estimate of the signal energy. Relevant if" 187 "--frames-per-chunk > 0");
188 opts->
Register(
"max-frames-latency", &max_frames_latency,
"Maximum number " 189 "of frames of latency that we allow pitch tracking to " 190 "introduce into the feature processing (affects output only " 191 "if --frames-per-chunk > 0 and " 192 "--simulate-first-pass-online=true");
193 opts->
Register(
"snip-edges", &snip_edges,
"If this is set to false, the " 194 "incomplete frames near the ending edge won't be snipped, " 195 "so that the number of frames is the file size divided by " 196 "the frame-shift. This makes different types of features " 197 "give the same number of frames.");
204 return static_cast<int32>(resample_freq * frame_length_ms / 1000.0);
208 return static_cast<int32>(resample_freq * frame_shift_ms / 1000.0);
239 delta_pitch_scale(10.0),
240 delta_pitch_noise_stddev(0.005),
241 normalization_left_context(75),
242 normalization_right_context(75),
245 add_pov_feature(true),
246 add_normalized_log_pitch(true),
247 add_delta_pitch(true),
248 add_raw_log_pitch(false) { }
252 opts->
Register(
"pitch-scale", &pitch_scale,
253 "Scaling factor for the final normalized log-pitch value");
254 opts->
Register(
"pov-scale", &pov_scale,
255 "Scaling factor for final POV (probability of voicing) " 257 opts->
Register(
"pov-offset", &pov_offset,
258 "This can be used to add an offset to the POV feature. " 259 "Intended for use in online decoding as a substitute for " 261 opts->
Register(
"delta-pitch-scale", &delta_pitch_scale,
262 "Term to scale the final delta log-pitch feature");
263 opts->
Register(
"delta-pitch-noise-stddev", &delta_pitch_noise_stddev,
264 "Standard deviation for noise we add to the delta log-pitch " 265 "(before scaling); should be about the same as delta-pitch " 266 "option to pitch creation. The purpose is to get rid of " 267 "peaks in the delta-pitch caused by discretization of pitch " 269 opts->
Register(
"normalization-left-context", &normalization_left_context,
270 "Left-context (in frames) for moving window normalization");
271 opts->
Register(
"normalization-right-context", &normalization_right_context,
272 "Right-context (in frames) for moving window normalization");
273 opts->
Register(
"delta-window", &delta_window,
274 "Number of frames on each side of central frame, to use for " 277 "Number of frames by which the pitch information is " 279 opts->
Register(
"add-pov-feature", &add_pov_feature,
280 "If true, the warped NCCF is added to output features");
281 opts->
Register(
"add-normalized-log-pitch", &add_normalized_log_pitch,
282 "If true, the log-pitch with POV-weighted mean subtraction " 283 "over 1.5 second window is added to output features");
284 opts->
Register(
"add-delta-pitch", &add_delta_pitch,
285 "If true, time derivative of log-pitch is added to output " 287 opts->
Register(
"add-raw-log-pitch", &add_raw_log_pitch,
288 "If true, log(pitch) is added to output features");
306 virtual int32 NumFramesReady()
const;
308 virtual BaseFloat FrameShiftInSeconds()
const;
310 virtual bool IsLastFrame(
int32 frame)
const;
316 virtual void AcceptWaveform(
BaseFloat sampling_rate,
319 virtual void InputFinished();
338 return src_->IsLastFrame(-1);
339 else if (frame < opts_.delay)
340 return src_->IsLastFrame(-1) ==
true ? false : src_->IsLastFrame(0);
342 return src_->IsLastFrame(frame - opts_.delay);
345 return src_->FrameShiftInSeconds();
348 virtual int32 NumFramesReady()
const;
359 enum { kRawFeatureDim = 2};
376 sum_pov(0.0), sum_log_pitch_pov(0.0) { }
400 inline void GetNormalizationWindow(
int32 frame,
401 int32 src_frames_ready,
403 int32 *window_end)
const;
407 inline void UpdateNormalizationStats(
int32 frame);
450 #endif // KALDI_FEAT_PITCH_FUNCTIONS_H_ This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
std::vector< BaseFloat > delta_feature_noise_
virtual int32 Dim() const
Base class which provides matrix operations not involving resizing or allocation. ...
bool add_normalized_log_pitch
void ComputeKaldiPitch(const PitchExtractionOptions &opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
This function extracts (pitch, NCCF) per frame, using the pitch extraction method described in "A Pit...
virtual BaseFloat FrameShiftInSeconds() const
This online-feature class implements post processing of pitch features.
ProcessPitchOptions opts_
OnlinePitchFeatureImpl * impl_
void Register(const std::string &name, bool *ptr, const std::string &doc)
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
int32 normalization_right_context
virtual int32 Dim() const
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
virtual ~OnlineProcessPitch()
BaseFloat delta_pitch_noise_stddev
std::vector< NormalizationStats > normalization_stats_
void ComputeAndProcessKaldiPitch(const PitchExtractionOptions &pitch_opts, const ProcessPitchOptions &process_opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
This function combines ComputeKaldiPitch and ProcessPitch.
int32 normalization_left_context
BaseFloat delta_pitch_scale
Add a virtual class for "source" features such as MFCC or PLP or pitch features.
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
void ProcessPitch(const ProcessPitchOptions &opts, const MatrixBase< BaseFloat > &input, Matrix< BaseFloat > *output)
This function processes the raw (NCCF, pitch) quantities computed by ComputeKaldiPitch, and processes them into features.
Provides a vector abstraction class.
OnlineFeatureInterface * src_
void Register(ParseOptions *opts)