#include <mel-computations.h>

Collaboration diagram for MelBanks:

[legend]

Public Member Functions
	MelBanks (const MelBanksOptions &opts, const FrameExtractionOptions &frame_opts, BaseFloat vtln_warp_factor)

void	Compute (const VectorBase< BaseFloat > &fft_energies, VectorBase< BaseFloat > *mel_energies_out) const
	Compute Mel energies (note: not log enerties). More...

int32	NumBins () const

const Vector< BaseFloat > &	GetCenterFreqs () const

const std::vector< std::pair< int32, Vector< BaseFloat > > > &	GetBins () const

	MelBanks (const MelBanks &other)

Static Public Member Functions
static BaseFloat	InverseMelScale (BaseFloat mel_freq)

static BaseFloat	MelScale (BaseFloat freq)

static BaseFloat	VtlnWarpFreq (BaseFloat vtln_low_cutoff, BaseFloat vtln_high_cutoff, BaseFloat low_freq, BaseFloat high_freq, BaseFloat vtln_warp_factor, BaseFloat freq)

static BaseFloat	VtlnWarpMelFreq (BaseFloat vtln_low_cutoff, BaseFloat vtln_high_cutoff, BaseFloat low_freq, BaseFloat high_freq, BaseFloat vtln_warp_factor, BaseFloat mel_freq)

Private Member Functions
MelBanks &	operator= (const MelBanks &other)

Private Attributes
Vector< BaseFloat >	center_freqs_

std::vector< std::pair< int32, Vector< BaseFloat > > >	bins_

bool	debug_

bool	htk_mode_

Detailed Description

Definition at line 78 of file mel-computations.h.

Constructor & Destructor Documentation

◆ MelBanks() [1/2]

MelBanks	(	const MelBanksOptions &	opts,
		const FrameExtractionOptions &	frame_opts,
		BaseFloat	vtln_warp_factor
	)

Definition at line 33 of file mel-computations.cc.

References MelBanks::bins_, MelBanks::center_freqs_, MelBanks::debug_, MelBanksOptions::debug_mel, MelBanksOptions::high_freq, MelBanksOptions::htk_mode, rnnlm::i, MelBanks::InverseMelScale(), KALDI_ASSERT, KALDI_ERR, KALDI_LOG, MelBanksOptions::low_freq, MelBanks::MelScale(), MelBanksOptions::num_bins, FrameExtractionOptions::PaddedWindowSize(), VectorBase< Real >::Range(), FrameExtractionOptions::samp_freq, MelBanksOptions::vtln_high, MelBanksOptions::vtln_low, and MelBanks::VtlnWarpMelFreq().

                                               :
     htk_mode_(opts.htk_mode) {
   int32 num_bins = opts.num_bins;
   if (num_bins < 3) KALDI_ERR << "Must have at least 3 mel bins";
   BaseFloat sample_freq = frame_opts.samp_freq;
   int32 window_length_padded = frame_opts.PaddedWindowSize();
   KALDI_ASSERT(window_length_padded % 2 == 0);
   int32 num_fft_bins = window_length_padded / 2;
   BaseFloat nyquist = 0.5 * sample_freq;
 
   BaseFloat low_freq = opts.low_freq, high_freq;
   if (opts.high_freq > 0.0)
     high_freq = opts.high_freq;
   else
     high_freq = nyquist + opts.high_freq;
 
   if (low_freq < 0.0 || low_freq >= nyquist
       || high_freq <= 0.0 || high_freq > nyquist
       || high_freq <= low_freq)
     KALDI_ERR << "Bad values in options: low-freq " << low_freq
               << " and high-freq " << high_freq << " vs. nyquist "
               << nyquist;
 
   BaseFloat fft_bin_width = sample_freq / window_length_padded;
   // fft-bin width [think of it as Nyquist-freq / half-window-length]
 
   BaseFloat mel_low_freq = MelScale(low_freq);
   BaseFloat mel_high_freq = MelScale(high_freq);
 
   debug_ = opts.debug_mel;
 
   // divide by num_bins+1 in next line because of end-effects where the bins
   // spread out to the sides.
   BaseFloat mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins+1);
 
   BaseFloat vtln_low = opts.vtln_low,
       vtln_high = opts.vtln_high;
   if (vtln_high < 0.0) {
     vtln_high += nyquist;
   }
 
   if (vtln_warp_factor != 1.0 &&
       (vtln_low < 0.0 || vtln_low <= low_freq
        || vtln_low >= high_freq
        || vtln_high <= 0.0 || vtln_high >= high_freq
        || vtln_high <= vtln_low))
     KALDI_ERR << "Bad values in options: vtln-low " << vtln_low
               << " and vtln-high " << vtln_high << ", versus "
               << "low-freq " << low_freq << " and high-freq "
               << high_freq;
 
   bins_.resize(num_bins);
   center_freqs_.Resize(num_bins);
 
   for (int32 bin = 0; bin < num_bins; bin++) {
     BaseFloat left_mel = mel_low_freq + bin * mel_freq_delta,
         center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
         right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
 
     if (vtln_warp_factor != 1.0) {
       left_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
                                  vtln_warp_factor, left_mel);
       center_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
                                  vtln_warp_factor, center_mel);
       right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
                                   vtln_warp_factor, right_mel);
     }
     center_freqs_(bin) = InverseMelScale(center_mel);
     // this_bin will be a vector of coefficients that is only
     // nonzero where this mel bin is active.
     Vector<BaseFloat> this_bin(num_fft_bins);
     int32 first_index = -1, last_index = -1;
     for (int32 i = 0; i < num_fft_bins; i++) {
       BaseFloat freq = (fft_bin_width * i);  // Center frequency of this fft
                                              // bin.
       BaseFloat mel = MelScale(freq);
       if (mel > left_mel && mel < right_mel) {
         BaseFloat weight;
         if (mel <= center_mel)
           weight = (mel - left_mel) / (center_mel - left_mel);
         else
          weight = (right_mel-mel) / (right_mel-center_mel);
         this_bin(i) = weight;
         if (first_index == -1)
           first_index = i;
         last_index = i;
       }
     }
     KALDI_ASSERT(first_index != -1 && last_index >= first_index
                  && "You may have set --num-mel-bins too large.");
 
     bins_[bin].first = first_index;
     int32 size = last_index + 1 - first_index;
     bins_[bin].second.Resize(size);
     bins_[bin].second.CopyFromVec(this_bin.Range(first_index, size));
 
     // Replicate a bug in HTK, for testing purposes.
     if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0)
       bins_[bin].second(0) = 0.0;
 
   }
   if (debug_) {
     for (size_t i = 0; i < bins_.size(); i++) {
       KALDI_LOG << "bin " << i << ", offset = " << bins_[i].first
                 << ", vec = " << bins_[i].second;
     }
   }
 }

◆ MelBanks() [2/2]

MelBanks ( const MelBanks & other )

Definition at line 144 of file mel-computations.cc.

                                        :
     center_freqs_(other.center_freqs_),
     bins_(other.bins_),
     debug_(other.debug_),
     htk_mode_(other.htk_mode_) { }

Member Function Documentation

◆ Compute()

void Compute	(	const VectorBase< BaseFloat > &	fft_energies,
		VectorBase< BaseFloat > *	mel_energies_out
	)		const

Compute Mel energies (note: not log enerties).

At input, "fft_energies" contains the FFT energies (not log).

Definition at line 226 of file mel-computations.cc.

References MelBanks::bins_, MelBanks::debug_, VectorBase< Real >::Dim(), MelBanks::htk_mode_, rnnlm::i, KALDI_ASSERT, KALDI_ISNAN, VectorBase< Real >::Range(), and kaldi::VecVec().

Referenced by FbankComputer::Compute().

                                                                       {
   int32 num_bins = bins_.size();
   KALDI_ASSERT(mel_energies_out->Dim() == num_bins);
 
   for (int32 i = 0; i < num_bins; i++) {
     int32 offset = bins_[i].first;
     const Vector<BaseFloat> &v(bins_[i].second);
     BaseFloat energy = VecVec(v, power_spectrum.Range(offset, v.Dim()));
     // HTK-like flooring- for testing purposes (we prefer dither)
     if (htk_mode_ && energy < 1.0) energy = 1.0;
     (*mel_energies_out)(i) = energy;
 
     // The following assert was added due to a problem with OpenBlas that
     // we had at one point (it was a bug in that library).  Just to detect
     // it early.
     KALDI_ASSERT(!KALDI_ISNAN((*mel_energies_out)(i)));
   }
 
   if (debug_) {
     fprintf(stderr, "MEL BANKS:\n");
     for (int32 i = 0; i < num_bins; i++)
       fprintf(stderr, " %f", (*mel_energies_out)(i));
     fprintf(stderr, "\n");
   }
 }

◆ GetBins()

const std::vector<std::pair<int32, Vector<BaseFloat> > >& GetBins ( ) const

inline

Definition at line 119 of file mel-computations.h.

                                                                         {
     return bins_;
   }

◆ GetCenterFreqs()

const Vector<BaseFloat>& GetCenterFreqs ( ) const

inline

Definition at line 117 of file mel-computations.h.

Referenced by kaldi::GetEqualLoudnessVector().

117 { return center_freqs_; }

kaldi::MelBanks::center_freqs_

Vector< BaseFloat > center_freqs_

Definition: mel-computations.h:131

◆ InverseMelScale()

static BaseFloat InverseMelScale ( BaseFloat mel_freq )

inlinestatic

Definition at line 81 of file mel-computations.h.

Referenced by MelBanks::MelBanks(), and MelBanks::VtlnWarpMelFreq().

                                                               {
     return 700.0f * (expf (mel_freq / 1127.0f) - 1.0f);
   }

◆ MelScale()

static BaseFloat MelScale ( BaseFloat freq )

inlinestatic

Definition at line 85 of file mel-computations.h.

References MelBanksOptions::high_freq, and MelBanksOptions::low_freq.

Referenced by MelBanks::MelBanks(), and MelBanks::VtlnWarpMelFreq().

                                                    {
     return 1127.0f * logf (1.0f + freq / 700.0f);
   }

◆ NumBins()

int32 NumBins ( ) const

inline

Definition at line 114 of file mel-computations.h.

Referenced by kaldi::GetEqualLoudnessVector().

114 { return bins_.size(); }

kaldi::MelBanks::bins_

std::vector< std::pair< int32, Vector< BaseFloat > > > bins_

Definition: mel-computations.h:135

◆ operator=()

MelBanks& operator= ( const MelBanks & other )

private

◆ VtlnWarpFreq()

BaseFloat VtlnWarpFreq	(	BaseFloat	vtln_low_cutoff,
		BaseFloat	vtln_high_cutoff,
		BaseFloat	low_freq,
		BaseFloat	high_freq,
		BaseFloat	vtln_warp_factor,
		BaseFloat	freq
	)

static

This computes a VTLN warping function that is not the same as HTK's one, but has similar inputs (this function has the advantage of never producing empty bins).

This function computes a warp function F(freq), defined between low_freq and high_freq inclusive, with the following properties: F(low_freq) == low_freq F(high_freq) == high_freq The function is continuous and piecewise linear with two inflection points. The lower inflection point (measured in terms of the unwarped frequency) is at frequency l, determined as described below. The higher inflection point is at a frequency h, determined as described below. If l <= f <= h, then F(f) = f/vtln_warp_factor. If the higher inflection point (measured in terms of the unwarped frequency) is at h, then max(h, F(h)) == vtln_high_cutoff. Since (by the last point) F(h) == h/vtln_warp_factor, then max(h, h/vtln_warp_factor) == vtln_high_cutoff, so h = vtln_high_cutoff / max(1, 1/vtln_warp_factor). = vtln_high_cutoff * min(1, vtln_warp_factor). If the lower inflection point (measured in terms of the unwarped frequency) is at l, then min(l, F(l)) == vtln_low_cutoff This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor) = vtln_low_cutoff * max(1, vtln_warp_factor)

Definition at line 150 of file mel-computations.cc.

References KALDI_ASSERT.

Referenced by UnitTestVtln(), and MelBanks::VtlnWarpMelFreq().

                                                  {
 
 
 
   if (freq < low_freq || freq > high_freq) return freq;  // in case this gets called
   // for out-of-range frequencies, just return the freq.
 
   KALDI_ASSERT(vtln_low_cutoff > low_freq &&
                "be sure to set the --vtln-low option higher than --low-freq");
   KALDI_ASSERT(vtln_high_cutoff < high_freq &&
                "be sure to set the --vtln-high option lower than --high-freq [or negative]");
   BaseFloat one = 1.0;
   BaseFloat l = vtln_low_cutoff * std::max(one, vtln_warp_factor);
   BaseFloat h = vtln_high_cutoff * std::min(one, vtln_warp_factor);
   BaseFloat scale = 1.0 / vtln_warp_factor;
   BaseFloat Fl = scale * l;  // F(l);
   BaseFloat Fh = scale * h;  // F(h);
   KALDI_ASSERT(l > low_freq && h < high_freq);
   // slope of left part of the 3-piece linear function
   BaseFloat scale_left = (Fl - low_freq) / (l - low_freq);
   // [slope of center part is just "scale"]
 
   // slope of right part of the 3-piece linear function
   BaseFloat scale_right = (high_freq - Fh) / (high_freq - h);
 
   if (freq < l) {
     return low_freq + scale_left * (freq - low_freq);
   } else if (freq < h) {
     return scale * freq;
   } else {  // freq >= h
     return high_freq + scale_right * (freq - high_freq);
   }
 }

◆ VtlnWarpMelFreq()

BaseFloat VtlnWarpMelFreq	(	BaseFloat	vtln_low_cutoff,
		BaseFloat	vtln_high_cutoff,
		BaseFloat	low_freq,
		BaseFloat	high_freq,
		BaseFloat	vtln_warp_factor,
		BaseFloat	mel_freq
	)

static

Definition at line 213 of file mel-computations.cc.

References MelBanks::InverseMelScale(), MelBanks::MelScale(), and MelBanks::VtlnWarpFreq().

Referenced by MelBanks::MelBanks().

                                                         {
   return MelScale(VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff,
                                low_freq, high_freq,
                                vtln_warp_factor, InverseMelScale(mel_freq)));
 }

Member Data Documentation

◆ bins_

std::vector<std::pair<int32, Vector<BaseFloat> > > bins_

private

Definition at line 135 of file mel-computations.h.

Referenced by MelBanks::Compute(), and MelBanks::MelBanks().

◆ center_freqs_

Vector<BaseFloat> center_freqs_

private

Definition at line 131 of file mel-computations.h.

Referenced by MelBanks::MelBanks().

◆ debug_

bool debug_

private

Definition at line 137 of file mel-computations.h.

Referenced by MelBanks::Compute(), and MelBanks::MelBanks().

◆ htk_mode_

bool htk_mode_

private

Definition at line 138 of file mel-computations.h.

Referenced by MelBanks::Compute().

The documentation for this class was generated from the following files:

feat/mel-computations.h
feat/mel-computations.cc

Public Member Functions

Static Public Member Functions

Private Member Functions

Private Attributes

Detailed Description

Constructor & Destructor Documentation

◆ MelBanks() [1/2]

◆ MelBanks() [2/2]

Member Function Documentation

◆ Compute()

◆ GetBins()

◆ GetCenterFreqs()

◆ InverseMelScale()

◆ MelScale()

◆ NumBins()

◆ operator=()

◆ VtlnWarpFreq()

◆ VtlnWarpMelFreq()

Member Data Documentation

◆ bins_

◆ center_freqs_

◆ debug_

◆ htk_mode_