36 htk_mode_(opts.htk_mode) {
38 if (num_bins < 3)
KALDI_ERR <<
"Must have at least 3 mel bins";
42 int32 num_fft_bins = window_length_padded / 2;
51 if (low_freq < 0.0 || low_freq >= nyquist
52 || high_freq <= 0.0 || high_freq > nyquist
53 || high_freq <= low_freq)
54 KALDI_ERR <<
"Bad values in options: low-freq " << low_freq
55 <<
" and high-freq " << high_freq <<
" vs. nyquist " 58 BaseFloat fft_bin_width = sample_freq / window_length_padded;
68 BaseFloat mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins+1);
72 if (vtln_high < 0.0) {
76 if (vtln_warp_factor != 1.0 &&
77 (vtln_low < 0.0 || vtln_low <= low_freq
78 || vtln_low >= high_freq
79 || vtln_high <= 0.0 || vtln_high >= high_freq
80 || vtln_high <= vtln_low))
81 KALDI_ERR <<
"Bad values in options: vtln-low " << vtln_low
82 <<
" and vtln-high " << vtln_high <<
", versus " 83 <<
"low-freq " << low_freq <<
" and high-freq " 86 bins_.resize(num_bins);
89 for (
int32 bin = 0; bin < num_bins; bin++) {
90 BaseFloat left_mel = mel_low_freq + bin * mel_freq_delta,
91 center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
92 right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
94 if (vtln_warp_factor != 1.0) {
96 vtln_warp_factor, left_mel);
98 vtln_warp_factor, center_mel);
100 vtln_warp_factor, right_mel);
106 int32 first_index = -1, last_index = -1;
107 for (
int32 i = 0;
i < num_fft_bins;
i++) {
111 if (mel > left_mel && mel < right_mel) {
113 if (mel <= center_mel)
114 weight = (mel - left_mel) / (center_mel - left_mel);
116 weight = (right_mel-mel) / (right_mel-center_mel);
117 this_bin(
i) = weight;
118 if (first_index == -1)
123 KALDI_ASSERT(first_index != -1 && last_index >= first_index
124 &&
"You may have set --num-mel-bins too large.");
126 bins_[bin].first = first_index;
127 int32 size = last_index + 1 - first_index;
128 bins_[bin].second.Resize(size);
129 bins_[bin].second.CopyFromVec(this_bin.
Range(first_index, size));
132 if (opts.
htk_mode && bin == 0 && mel_low_freq != 0.0)
133 bins_[bin].second(0) = 0.0;
137 for (
size_t i = 0;
i <
bins_.size();
i++) {
139 <<
", vec = " <<
bins_[
i].second;
183 if (freq < low_freq || freq > high_freq)
return freq;
187 "be sure to set the --vtln-low option higher than --low-freq");
189 "be sure to set the --vtln-high option lower than --high-freq [or negative]");
191 BaseFloat l = vtln_low_cutoff * std::max(one, vtln_warp_factor);
192 BaseFloat h = vtln_high_cutoff * std::min(one, vtln_warp_factor);
193 BaseFloat scale = 1.0 / vtln_warp_factor;
198 BaseFloat scale_left = (Fl - low_freq) / (l - low_freq);
202 BaseFloat scale_right = (high_freq - Fh) / (high_freq - h);
205 return low_freq + scale_left * (freq - low_freq);
206 }
else if (freq < h) {
209 return high_freq + scale_right * (freq - high_freq);
231 for (
int32 i = 0;
i < num_bins;
i++) {
236 if (
htk_mode_ && energy < 1.0) energy = 1.0;
237 (*mel_energies_out)(
i) = energy;
246 fprintf(stderr,
"MEL BANKS:\n");
247 for (
int32 i = 0;
i < num_bins;
i++)
248 fprintf(stderr,
" %f", (*mel_energies_out)(
i));
249 fprintf(stderr,
"\n");
258 (*coeffs)(
i) = 1.0 + 0.5 * Q * sin (
M_PI *
i / Q);
274 for (i = 0; i <
n; i++) {
277 for (j = 0; j <
i; j++)
278 ki += pLP[j] * pAC[i - j];
289 for (j = 0; j <
i; j++)
290 pTmp[j] = pLP[j] - ki * pLP[i - j - 1];
292 for (j = 0; j <=
i; j++)
304 for (j = 0; j <
i; j++) {
305 sum +=
static_cast<BaseFloat>(i -
j) * pLPC[j] * pCepst[i - j - 1];
307 pCepst[
i] = -pLPC[
i] - sum /
static_cast<BaseFloat>(i + 1);
320 (*ans)(
i) = fsub * fsub * ((fsq + 1.44e6) / (fsq + 9.61e6));
335 KALDI_WARN <<
"Zero energy in LPC computation";
336 return -
Log(1.0 / ans);
MelBanks(const MelBanksOptions &opts, const FrameExtractionOptions &frame_opts, BaseFloat vtln_warp_factor)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
static BaseFloat MelScale(BaseFloat freq)
std::vector< std::pair< int32, Vector< BaseFloat > > > bins_
static BaseFloat VtlnWarpFreq(BaseFloat vtln_low_cutoff, BaseFloat vtln_high_cutoff, BaseFloat low_freq, BaseFloat high_freq, BaseFloat vtln_warp_factor, BaseFloat freq)
Vector< BaseFloat > center_freqs_
void Lpc2Cepstrum(int n, const BaseFloat *pLPC, BaseFloat *pCepst)
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
static BaseFloat InverseMelScale(BaseFloat mel_freq)
void ComputeLifterCoeffs(BaseFloat Q, VectorBase< BaseFloat > *coeffs)
const Vector< BaseFloat > & GetCenterFreqs() const
BaseFloat ComputeLpc(const VectorBase< BaseFloat > &autocorr_in, Vector< BaseFloat > *lpc_out)
static BaseFloat VtlnWarpMelFreq(BaseFloat vtln_low_cutoff, BaseFloat vtln_high_cutoff, BaseFloat low_freq, BaseFloat high_freq, BaseFloat vtln_warp_factor, BaseFloat mel_freq)
Real * Data()
Returns a pointer to the start of the vector's data.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Compute(const VectorBase< BaseFloat > &fft_energies, VectorBase< BaseFloat > *mel_energies_out) const
Compute Mel energies (note: not log enerties).
A class representing a vector.
#define KALDI_ASSERT(cond)
void GetEqualLoudnessVector(const MelBanks &mel_banks, Vector< BaseFloat > *ans)
Provides a vector abstraction class.
BaseFloat Durbin(int n, const BaseFloat *pAC, BaseFloat *pLP, BaseFloat *pTmp)
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).