21 #ifndef KALDI_IVECTOR_VOICE_ACTIVITY_DETECTION_H_ 22 #define KALDI_IVECTOR_VOICE_ACTIVITY_DETECTION_H_ 49 vad_energy_mean_scale(0.5),
50 vad_frames_context(0),
51 vad_proportion_threshold(0.6) { }
53 opts->
Register(
"vad-energy-threshold", &vad_energy_threshold,
54 "Constant term in energy threshold for MFCC0 for VAD (also see " 55 "--vad-energy-mean-scale)");
56 opts->
Register(
"vad-energy-mean-scale", &vad_energy_mean_scale,
57 "If this is set to s, to get the actual threshold we " 58 "let m be the mean log-energy of the file, and use " 59 "s*m + vad-energy-threshold");
60 opts->
Register(
"vad-frames-context", &vad_frames_context,
61 "Number of frames of context on each side of central frame, " 62 "in window for which energy is monitored");
63 opts->
Register(
"vad-proportion-threshold", &vad_proportion_threshold,
64 "Parameter controlling the proportion of frames within " 65 "the window that need to have more energy than the " 89 #endif // KALDI_IVECTOR_VOICE_ACTIVITY_DETECTION_H_ This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Base class which provides matrix operations not involving resizing or allocation. ...
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
BaseFloat vad_energy_mean_scale
BaseFloat vad_proportion_threshold
void Register(OptionsItf *opts)
A class representing a vector.
BaseFloat vad_energy_threshold
void ComputeVadEnergy(const VadEnergyOptions &opts, const MatrixBase< BaseFloat > &feats, Vector< BaseFloat > *output_voiced)
Compute voice-activity vector for a file: 1 if we judge the frame as voiced, 0 otherwise.