feature-fbank.cc
Go to the documentation of this file.
1 // feat/feature-fbank.cc
2 
3 // Copyright 2009-2012 Karel Vesely
4 // 2016 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #include "feat/feature-fbank.h"
23 
24 namespace kaldi {
25 
27  opts_(opts), srfft_(NULL) {
28  if (opts.energy_floor > 0.0)
30 
31  int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
32  if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two...
33  srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
34 
35  // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
36  // [note: this call caches it.]
37  GetMelBanks(1.0);
38 }
39 
42  mel_banks_(other.mel_banks_), srfft_(NULL) {
43  for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
44  iter != mel_banks_.end();
45  ++iter)
46  iter->second = new MelBanks(*(iter->second));
47  if (other.srfft_)
49 }
50 
52  for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
53  iter != mel_banks_.end(); ++iter)
54  delete iter->second;
55  delete srfft_;
56 }
57 
59  MelBanks *this_mel_banks = NULL;
60  std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.find(vtln_warp);
61  if (iter == mel_banks_.end()) {
62  this_mel_banks = new MelBanks(opts_.mel_opts,
64  vtln_warp);
65  mel_banks_[vtln_warp] = this_mel_banks;
66  } else {
67  this_mel_banks = iter->second;
68  }
69  return this_mel_banks;
70 }
71 
72 void FbankComputer::Compute(BaseFloat signal_raw_log_energy,
73  BaseFloat vtln_warp,
74  VectorBase<BaseFloat> *signal_frame,
75  VectorBase<BaseFloat> *feature) {
76 
77  const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
78 
79  KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
80  feature->Dim() == this->Dim());
81 
82 
83  // Compute energy after window function (not the raw one).
85  signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
86  std::numeric_limits<float>::epsilon()));
87 
88  if (srfft_ != NULL) // Compute FFT using split-radix algorithm.
89  srfft_->Compute(signal_frame->Data(), true);
90  else // An alternative algorithm that works for non-powers-of-two.
91  RealFft(signal_frame, true);
92 
93  // Convert the FFT into a power spectrum.
94  ComputePowerSpectrum(signal_frame);
95  SubVector<BaseFloat> power_spectrum(*signal_frame, 0,
96  signal_frame->Dim() / 2 + 1);
97 
98  // Use magnitude instead of power if requested.
99  if (!opts_.use_power)
100  power_spectrum.ApplyPow(0.5);
101 
102  int32 mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0);
103  SubVector<BaseFloat> mel_energies(*feature,
104  mel_offset,
106 
107  // Sum with mel fiterbanks over the power spectrum
108  mel_banks.Compute(power_spectrum, &mel_energies);
109  if (opts_.use_log_fbank) {
110  // Avoid log of zero (which should be prevented anyway by dithering).
111  mel_energies.ApplyFloor(std::numeric_limits<float>::epsilon());
112  mel_energies.ApplyLog(); // take the log.
113  }
114 
115  // Copy energy as first value (or the last, if htk_compat == true).
116  if (opts_.use_energy) {
117  if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) {
118  signal_raw_log_energy = log_energy_floor_;
119  }
120  int32 energy_index = opts_.htk_compat ? opts_.mel_opts.num_bins : 0;
121  (*feature)(energy_index) = signal_raw_log_energy;
122  }
123 }
124 
125 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
kaldi::int32 int32
void ApplyLog()
Apply natural log to all elements.
FrameExtractionOptions frame_opts
Definition: feature-fbank.h:42
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=nullptr)
Applies floor to all elements.
Definition: kaldi-vector.h:149
double Log(double x)
Definition: kaldi-math.h:100
Class for computing mel-filterbank features; see Computing MFCC features for more information...
Definition: feature-fbank.h:86
void Compute(BaseFloat signal_raw_log_energy, BaseFloat vtln_warp, VectorBase< BaseFloat > *signal_frame, VectorBase< BaseFloat > *feature)
Function that computes one frame of features from one frame of signal.
BaseFloat energy_floor
Definition: feature-fbank.h:45
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void Compute(const VectorBase< BaseFloat > &fft_energies, VectorBase< BaseFloat > *mel_energies_out) const
Compute Mel energies (note: not log enerties).
int32 Dim() const
Definition: feature-fbank.h:93
FbankComputer(const FbankOptions &opts)
SplitRadixRealFft< BaseFloat > * srfft_
void ComputePowerSpectrum(VectorBase< BaseFloat > *waveform)
BaseFloat log_energy_floor_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void ApplyPow(Real power)
Take all elements of vector to a power.
Definition: kaldi-vector.h:179
MelBanksOptions mel_opts
Definition: feature-fbank.h:43
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
FbankOptions contains basic options for computing filterbank features.
Definition: feature-fbank.h:41
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
const MelBanks * GetMelBanks(BaseFloat vtln_warp)
std::map< BaseFloat, MelBanks * > mel_banks_
void RealFft(VectorBase< Real > *v, bool forward)
RealFft is a fourier transform of real inputs.