MelBanks Class Reference

#include <mel-computations.h>

Collaboration diagram for MelBanks:

Public Member Functions

 MelBanks (const MelBanksOptions &opts, const FrameExtractionOptions &frame_opts, BaseFloat vtln_warp_factor)
 
void Compute (const VectorBase< BaseFloat > &fft_energies, VectorBase< BaseFloat > *mel_energies_out) const
 Compute Mel energies (note: not log enerties). More...
 
int32 NumBins () const
 
const Vector< BaseFloat > & GetCenterFreqs () const
 
const std::vector< std::pair< int32, Vector< BaseFloat > > > & GetBins () const
 
 MelBanks (const MelBanks &other)
 

Static Public Member Functions

static BaseFloat InverseMelScale (BaseFloat mel_freq)
 
static BaseFloat MelScale (BaseFloat freq)
 
static BaseFloat VtlnWarpFreq (BaseFloat vtln_low_cutoff, BaseFloat vtln_high_cutoff, BaseFloat low_freq, BaseFloat high_freq, BaseFloat vtln_warp_factor, BaseFloat freq)
 
static BaseFloat VtlnWarpMelFreq (BaseFloat vtln_low_cutoff, BaseFloat vtln_high_cutoff, BaseFloat low_freq, BaseFloat high_freq, BaseFloat vtln_warp_factor, BaseFloat mel_freq)
 

Private Member Functions

MelBanksoperator= (const MelBanks &other)
 

Private Attributes

Vector< BaseFloatcenter_freqs_
 
std::vector< std::pair< int32, Vector< BaseFloat > > > bins_
 
bool debug_
 
bool htk_mode_
 

Detailed Description

Definition at line 78 of file mel-computations.h.

Constructor & Destructor Documentation

◆ MelBanks() [1/2]

MelBanks ( const MelBanksOptions opts,
const FrameExtractionOptions frame_opts,
BaseFloat  vtln_warp_factor 
)

Definition at line 33 of file mel-computations.cc.

References MelBanks::bins_, MelBanks::center_freqs_, MelBanks::debug_, MelBanksOptions::debug_mel, MelBanksOptions::high_freq, MelBanksOptions::htk_mode, rnnlm::i, MelBanks::InverseMelScale(), KALDI_ASSERT, KALDI_ERR, KALDI_LOG, MelBanksOptions::low_freq, MelBanks::MelScale(), MelBanksOptions::num_bins, FrameExtractionOptions::PaddedWindowSize(), VectorBase< Real >::Range(), FrameExtractionOptions::samp_freq, MelBanksOptions::vtln_high, MelBanksOptions::vtln_low, and MelBanks::VtlnWarpMelFreq().

35  :
36  htk_mode_(opts.htk_mode) {
37  int32 num_bins = opts.num_bins;
38  if (num_bins < 3) KALDI_ERR << "Must have at least 3 mel bins";
39  BaseFloat sample_freq = frame_opts.samp_freq;
40  int32 window_length_padded = frame_opts.PaddedWindowSize();
41  KALDI_ASSERT(window_length_padded % 2 == 0);
42  int32 num_fft_bins = window_length_padded / 2;
43  BaseFloat nyquist = 0.5 * sample_freq;
44 
45  BaseFloat low_freq = opts.low_freq, high_freq;
46  if (opts.high_freq > 0.0)
47  high_freq = opts.high_freq;
48  else
49  high_freq = nyquist + opts.high_freq;
50 
51  if (low_freq < 0.0 || low_freq >= nyquist
52  || high_freq <= 0.0 || high_freq > nyquist
53  || high_freq <= low_freq)
54  KALDI_ERR << "Bad values in options: low-freq " << low_freq
55  << " and high-freq " << high_freq << " vs. nyquist "
56  << nyquist;
57 
58  BaseFloat fft_bin_width = sample_freq / window_length_padded;
59  // fft-bin width [think of it as Nyquist-freq / half-window-length]
60 
61  BaseFloat mel_low_freq = MelScale(low_freq);
62  BaseFloat mel_high_freq = MelScale(high_freq);
63 
64  debug_ = opts.debug_mel;
65 
66  // divide by num_bins+1 in next line because of end-effects where the bins
67  // spread out to the sides.
68  BaseFloat mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins+1);
69 
70  BaseFloat vtln_low = opts.vtln_low,
71  vtln_high = opts.vtln_high;
72  if (vtln_high < 0.0) {
73  vtln_high += nyquist;
74  }
75 
76  if (vtln_warp_factor != 1.0 &&
77  (vtln_low < 0.0 || vtln_low <= low_freq
78  || vtln_low >= high_freq
79  || vtln_high <= 0.0 || vtln_high >= high_freq
80  || vtln_high <= vtln_low))
81  KALDI_ERR << "Bad values in options: vtln-low " << vtln_low
82  << " and vtln-high " << vtln_high << ", versus "
83  << "low-freq " << low_freq << " and high-freq "
84  << high_freq;
85 
86  bins_.resize(num_bins);
87  center_freqs_.Resize(num_bins);
88 
89  for (int32 bin = 0; bin < num_bins; bin++) {
90  BaseFloat left_mel = mel_low_freq + bin * mel_freq_delta,
91  center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
92  right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
93 
94  if (vtln_warp_factor != 1.0) {
95  left_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
96  vtln_warp_factor, left_mel);
97  center_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
98  vtln_warp_factor, center_mel);
99  right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
100  vtln_warp_factor, right_mel);
101  }
102  center_freqs_(bin) = InverseMelScale(center_mel);
103  // this_bin will be a vector of coefficients that is only
104  // nonzero where this mel bin is active.
105  Vector<BaseFloat> this_bin(num_fft_bins);
106  int32 first_index = -1, last_index = -1;
107  for (int32 i = 0; i < num_fft_bins; i++) {
108  BaseFloat freq = (fft_bin_width * i); // Center frequency of this fft
109  // bin.
110  BaseFloat mel = MelScale(freq);
111  if (mel > left_mel && mel < right_mel) {
112  BaseFloat weight;
113  if (mel <= center_mel)
114  weight = (mel - left_mel) / (center_mel - left_mel);
115  else
116  weight = (right_mel-mel) / (right_mel-center_mel);
117  this_bin(i) = weight;
118  if (first_index == -1)
119  first_index = i;
120  last_index = i;
121  }
122  }
123  KALDI_ASSERT(first_index != -1 && last_index >= first_index
124  && "You may have set --num-mel-bins too large.");
125 
126  bins_[bin].first = first_index;
127  int32 size = last_index + 1 - first_index;
128  bins_[bin].second.Resize(size);
129  bins_[bin].second.CopyFromVec(this_bin.Range(first_index, size));
130 
131  // Replicate a bug in HTK, for testing purposes.
132  if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0)
133  bins_[bin].second(0) = 0.0;
134 
135  }
136  if (debug_) {
137  for (size_t i = 0; i < bins_.size(); i++) {
138  KALDI_LOG << "bin " << i << ", offset = " << bins_[i].first
139  << ", vec = " << bins_[i].second;
140  }
141  }
142 }
static BaseFloat MelScale(BaseFloat freq)
std::vector< std::pair< int32, Vector< BaseFloat > > > bins_
Vector< BaseFloat > center_freqs_
kaldi::int32 int32
float BaseFloat
Definition: kaldi-types.h:29
static BaseFloat InverseMelScale(BaseFloat mel_freq)
#define KALDI_ERR
Definition: kaldi-error.h:147
static BaseFloat VtlnWarpMelFreq(BaseFloat vtln_low_cutoff, BaseFloat vtln_high_cutoff, BaseFloat low_freq, BaseFloat high_freq, BaseFloat vtln_warp_factor, BaseFloat mel_freq)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_LOG
Definition: kaldi-error.h:153

◆ MelBanks() [2/2]

MelBanks ( const MelBanks other)

Definition at line 144 of file mel-computations.cc.

144  :
145  center_freqs_(other.center_freqs_),
146  bins_(other.bins_),
147  debug_(other.debug_),
148  htk_mode_(other.htk_mode_) { }
std::vector< std::pair< int32, Vector< BaseFloat > > > bins_
Vector< BaseFloat > center_freqs_

Member Function Documentation

◆ Compute()

void Compute ( const VectorBase< BaseFloat > &  fft_energies,
VectorBase< BaseFloat > *  mel_energies_out 
) const

Compute Mel energies (note: not log enerties).

At input, "fft_energies" contains the FFT energies (not log).

Definition at line 226 of file mel-computations.cc.

References MelBanks::bins_, MelBanks::debug_, VectorBase< Real >::Dim(), MelBanks::htk_mode_, rnnlm::i, KALDI_ASSERT, KALDI_ISNAN, VectorBase< Real >::Range(), and kaldi::VecVec().

Referenced by FbankComputer::Compute().

227  {
228  int32 num_bins = bins_.size();
229  KALDI_ASSERT(mel_energies_out->Dim() == num_bins);
230 
231  for (int32 i = 0; i < num_bins; i++) {
232  int32 offset = bins_[i].first;
233  const Vector<BaseFloat> &v(bins_[i].second);
234  BaseFloat energy = VecVec(v, power_spectrum.Range(offset, v.Dim()));
235  // HTK-like flooring- for testing purposes (we prefer dither)
236  if (htk_mode_ && energy < 1.0) energy = 1.0;
237  (*mel_energies_out)(i) = energy;
238 
239  // The following assert was added due to a problem with OpenBlas that
240  // we had at one point (it was a bug in that library). Just to detect
241  // it early.
242  KALDI_ASSERT(!KALDI_ISNAN((*mel_energies_out)(i)));
243  }
244 
245  if (debug_) {
246  fprintf(stderr, "MEL BANKS:\n");
247  for (int32 i = 0; i < num_bins; i++)
248  fprintf(stderr, " %f", (*mel_energies_out)(i));
249  fprintf(stderr, "\n");
250  }
251 }
std::vector< std::pair< int32, Vector< BaseFloat > > > bins_
kaldi::int32 int32
float BaseFloat
Definition: kaldi-types.h:29
#define KALDI_ISNAN
Definition: kaldi-math.h:72
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37

◆ GetBins()

const std::vector<std::pair<int32, Vector<BaseFloat> > >& GetBins ( ) const
inline

Definition at line 119 of file mel-computations.h.

119  {
120  return bins_;
121  }
std::vector< std::pair< int32, Vector< BaseFloat > > > bins_

◆ GetCenterFreqs()

const Vector<BaseFloat>& GetCenterFreqs ( ) const
inline

Definition at line 117 of file mel-computations.h.

Referenced by kaldi::GetEqualLoudnessVector().

117 { return center_freqs_; }
Vector< BaseFloat > center_freqs_

◆ InverseMelScale()

static BaseFloat InverseMelScale ( BaseFloat  mel_freq)
inlinestatic

Definition at line 81 of file mel-computations.h.

Referenced by MelBanks::MelBanks(), and MelBanks::VtlnWarpMelFreq().

81  {
82  return 700.0f * (expf (mel_freq / 1127.0f) - 1.0f);
83  }

◆ MelScale()

static BaseFloat MelScale ( BaseFloat  freq)
inlinestatic

Definition at line 85 of file mel-computations.h.

References MelBanksOptions::high_freq, and MelBanksOptions::low_freq.

Referenced by MelBanks::MelBanks(), and MelBanks::VtlnWarpMelFreq().

85  {
86  return 1127.0f * logf (1.0f + freq / 700.0f);
87  }

◆ NumBins()

int32 NumBins ( ) const
inline

Definition at line 114 of file mel-computations.h.

Referenced by kaldi::GetEqualLoudnessVector().

114 { return bins_.size(); }
std::vector< std::pair< int32, Vector< BaseFloat > > > bins_

◆ operator=()

MelBanks& operator= ( const MelBanks other)
private

◆ VtlnWarpFreq()

BaseFloat VtlnWarpFreq ( BaseFloat  vtln_low_cutoff,
BaseFloat  vtln_high_cutoff,
BaseFloat  low_freq,
BaseFloat  high_freq,
BaseFloat  vtln_warp_factor,
BaseFloat  freq 
)
static

This computes a VTLN warping function that is not the same as HTK's one, but has similar inputs (this function has the advantage of never producing empty bins).

This function computes a warp function F(freq), defined between low_freq and high_freq inclusive, with the following properties: F(low_freq) == low_freq F(high_freq) == high_freq The function is continuous and piecewise linear with two inflection points. The lower inflection point (measured in terms of the unwarped frequency) is at frequency l, determined as described below. The higher inflection point is at a frequency h, determined as described below. If l <= f <= h, then F(f) = f/vtln_warp_factor. If the higher inflection point (measured in terms of the unwarped frequency) is at h, then max(h, F(h)) == vtln_high_cutoff. Since (by the last point) F(h) == h/vtln_warp_factor, then max(h, h/vtln_warp_factor) == vtln_high_cutoff, so h = vtln_high_cutoff / max(1, 1/vtln_warp_factor). = vtln_high_cutoff * min(1, vtln_warp_factor). If the lower inflection point (measured in terms of the unwarped frequency) is at l, then min(l, F(l)) == vtln_low_cutoff This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor) = vtln_low_cutoff * max(1, vtln_warp_factor)

Definition at line 150 of file mel-computations.cc.

References KALDI_ASSERT.

Referenced by UnitTestVtln(), and MelBanks::VtlnWarpMelFreq().

155  {
159 
181 
182 
183  if (freq < low_freq || freq > high_freq) return freq; // in case this gets called
184  // for out-of-range frequencies, just return the freq.
185 
186  KALDI_ASSERT(vtln_low_cutoff > low_freq &&
187  "be sure to set the --vtln-low option higher than --low-freq");
188  KALDI_ASSERT(vtln_high_cutoff < high_freq &&
189  "be sure to set the --vtln-high option lower than --high-freq [or negative]");
190  BaseFloat one = 1.0;
191  BaseFloat l = vtln_low_cutoff * std::max(one, vtln_warp_factor);
192  BaseFloat h = vtln_high_cutoff * std::min(one, vtln_warp_factor);
193  BaseFloat scale = 1.0 / vtln_warp_factor;
194  BaseFloat Fl = scale * l; // F(l);
195  BaseFloat Fh = scale * h; // F(h);
196  KALDI_ASSERT(l > low_freq && h < high_freq);
197  // slope of left part of the 3-piece linear function
198  BaseFloat scale_left = (Fl - low_freq) / (l - low_freq);
199  // [slope of center part is just "scale"]
200 
201  // slope of right part of the 3-piece linear function
202  BaseFloat scale_right = (high_freq - Fh) / (high_freq - h);
203 
204  if (freq < l) {
205  return low_freq + scale_left * (freq - low_freq);
206  } else if (freq < h) {
207  return scale * freq;
208  } else { // freq >= h
209  return high_freq + scale_right * (freq - high_freq);
210  }
211 }
float BaseFloat
Definition: kaldi-types.h:29
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ VtlnWarpMelFreq()

BaseFloat VtlnWarpMelFreq ( BaseFloat  vtln_low_cutoff,
BaseFloat  vtln_high_cutoff,
BaseFloat  low_freq,
BaseFloat  high_freq,
BaseFloat  vtln_warp_factor,
BaseFloat  mel_freq 
)
static

Definition at line 213 of file mel-computations.cc.

References MelBanks::InverseMelScale(), MelBanks::MelScale(), and MelBanks::VtlnWarpFreq().

Referenced by MelBanks::MelBanks().

218  {
219  return MelScale(VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff,
220  low_freq, high_freq,
221  vtln_warp_factor, InverseMelScale(mel_freq)));
222 }
static BaseFloat MelScale(BaseFloat freq)
static BaseFloat VtlnWarpFreq(BaseFloat vtln_low_cutoff, BaseFloat vtln_high_cutoff, BaseFloat low_freq, BaseFloat high_freq, BaseFloat vtln_warp_factor, BaseFloat freq)
static BaseFloat InverseMelScale(BaseFloat mel_freq)

Member Data Documentation

◆ bins_

std::vector<std::pair<int32, Vector<BaseFloat> > > bins_
private

Definition at line 135 of file mel-computations.h.

Referenced by MelBanks::Compute(), and MelBanks::MelBanks().

◆ center_freqs_

Vector<BaseFloat> center_freqs_
private

Definition at line 131 of file mel-computations.h.

Referenced by MelBanks::MelBanks().

◆ debug_

bool debug_
private

Definition at line 137 of file mel-computations.h.

Referenced by MelBanks::Compute(), and MelBanks::MelBanks().

◆ htk_mode_

bool htk_mode_
private

Definition at line 138 of file mel-computations.h.

Referenced by MelBanks::Compute().


The documentation for this class was generated from the following files: