feature-mfcc.cc
Go to the documentation of this file.
1 // feat/feature-mfcc.cc
2 
3 // Copyright 2009-2011 Karel Vesely; Petr Motlicek
4 // 2016 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #include "feat/feature-mfcc.h"
23 
24 
25 namespace kaldi {
26 
27 
28 void MfccComputer::Compute(BaseFloat signal_raw_log_energy,
29  BaseFloat vtln_warp,
30  VectorBase<BaseFloat> *signal_frame,
31  VectorBase<BaseFloat> *feature) {
32  KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
33  feature->Dim() == this->Dim());
34 
35  const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
36 
38  signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
39  std::numeric_limits<float>::epsilon()));
40 
41  if (srfft_ != NULL) // Compute FFT using the split-radix algorithm.
42  srfft_->Compute(signal_frame->Data(), true);
43  else // An alternative algorithm that works for non-powers-of-two.
44  RealFft(signal_frame, true);
45 
46  // Convert the FFT into a power spectrum.
47  ComputePowerSpectrum(signal_frame);
48  SubVector<BaseFloat> power_spectrum(*signal_frame, 0,
49  signal_frame->Dim() / 2 + 1);
50 
51  mel_banks.Compute(power_spectrum, &mel_energies_);
52 
53  // avoid log of zero (which should be prevented anyway by dithering).
54  mel_energies_.ApplyFloor(std::numeric_limits<float>::epsilon());
55  mel_energies_.ApplyLog(); // take the log.
56 
57  feature->SetZero(); // in case there were NaNs.
58  // feature = dct_matrix_ * mel_energies [which now have log]
59  feature->AddMatVec(1.0, dct_matrix_, kNoTrans, mel_energies_, 0.0);
60 
61  if (opts_.cepstral_lifter != 0.0)
62  feature->MulElements(lifter_coeffs_);
63 
64  if (opts_.use_energy) {
65  if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_)
66  signal_raw_log_energy = log_energy_floor_;
67  (*feature)(0) = signal_raw_log_energy;
68  }
69 
70  if (opts_.htk_compat) {
71  BaseFloat energy = (*feature)(0);
72  for (int32 i = 0; i < opts_.num_ceps - 1; i++)
73  (*feature)(i) = (*feature)(i+1);
74  if (!opts_.use_energy)
75  energy *= M_SQRT2; // scale on C0 (actually removing a scale
76  // we previously added that's part of one common definition of
77  // the cosine transform.)
78  (*feature)(opts_.num_ceps - 1) = energy;
79  }
80 }
81 
83  opts_(opts), srfft_(NULL),
84  mel_energies_(opts.mel_opts.num_bins) {
85 
86  int32 num_bins = opts.mel_opts.num_bins;
87  if (opts.num_ceps > num_bins)
88  KALDI_ERR << "num-ceps cannot be larger than num-mel-bins."
89  << " It should be smaller or equal. You provided num-ceps: "
90  << opts.num_ceps << " and num-mel-bins: "
91  << num_bins;
92 
93  Matrix<BaseFloat> dct_matrix(num_bins, num_bins);
94  ComputeDctMatrix(&dct_matrix);
95  // Note that we include zeroth dct in either case. If using the
96  // energy we replace this with the energy. This means a different
97  // ordering of features than HTK.
98  SubMatrix<BaseFloat> dct_rows(dct_matrix, 0, opts.num_ceps, 0, num_bins);
99  dct_matrix_.Resize(opts.num_ceps, num_bins);
100  dct_matrix_.CopyFromMat(dct_rows); // subset of rows.
101  if (opts.cepstral_lifter != 0.0) {
102  lifter_coeffs_.Resize(opts.num_ceps);
104  }
105  if (opts.energy_floor > 0.0)
107 
108  int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
109  if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two...
110  srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
111 
112  // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
113  // [note: this call caches it.]
114  GetMelBanks(1.0);
115 }
116 
118  opts_(other.opts_), lifter_coeffs_(other.lifter_coeffs_),
119  dct_matrix_(other.dct_matrix_),
121  mel_banks_(other.mel_banks_),
122  srfft_(NULL),
124  for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
125  iter != mel_banks_.end(); ++iter)
126  iter->second = new MelBanks(*(iter->second));
127  if (other.srfft_ != NULL)
129 }
130 
131 
132 
134  for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
135  iter != mel_banks_.end();
136  ++iter)
137  delete iter->second;
138  delete srfft_;
139 }
140 
142  MelBanks *this_mel_banks = NULL;
143  std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.find(vtln_warp);
144  if (iter == mel_banks_.end()) {
145  this_mel_banks = new MelBanks(opts_.mel_opts,
147  vtln_warp);
148  mel_banks_[vtln_warp] = this_mel_banks;
149  } else {
150  this_mel_banks = iter->second;
151  }
152  return this_mel_banks;
153 }
154 
155 
156 
157 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MfccOptions contains basic options for computing MFCC features.
Definition: feature-mfcc.h:38
void ComputeDctMatrix(Matrix< Real > *M)
ComputeDctMatrix computes a matrix corresponding to the DCT, such that M * v equals the DCT of vector...
kaldi::int32 int32
const MelBanks * GetMelBanks(BaseFloat vtln_warp)
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
MfccComputer(const MfccOptions &opts)
Definition: feature-mfcc.cc:82
Vector< BaseFloat > mel_energies_
Definition: feature-mfcc.h:144
BaseFloat energy_floor
Definition: feature-mfcc.h:43
BaseFloat cepstral_lifter
Definition: feature-mfcc.h:46
BaseFloat log_energy_floor_
Definition: feature-mfcc.h:138
std::map< BaseFloat, MelBanks * > mel_banks_
Definition: feature-mfcc.h:139
MelBanksOptions mel_opts
Definition: feature-mfcc.h:40
#define M_SQRT2
Definition: kaldi-math.h:48
double Log(double x)
Definition: kaldi-math.h:100
FrameExtractionOptions frame_opts
Definition: feature-mfcc.h:39
void MulElements(const VectorBase< Real > &v)
Multiply element-by-element by another vector.
Matrix< BaseFloat > dct_matrix_
Definition: feature-mfcc.h:137
void ComputeLifterCoeffs(BaseFloat Q, VectorBase< BaseFloat > *coeffs)
#define KALDI_ERR
Definition: kaldi-error.h:147
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
void ComputePowerSpectrum(VectorBase< BaseFloat > *waveform)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
Vector< BaseFloat > lifter_coeffs_
Definition: feature-mfcc.h:136
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void SetZero()
Set vector to all zeros.
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
SplitRadixRealFft< BaseFloat > * srfft_
Definition: feature-mfcc.h:140
Sub-matrix representation.
Definition: kaldi-matrix.h:988
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void RealFft(VectorBase< Real > *v, bool forward)
RealFft is a fourier transform of real inputs.
int32 Dim() const
Definition: feature-mfcc.h:97
void Compute(BaseFloat signal_raw_log_energy, BaseFloat vtln_warp, VectorBase< BaseFloat > *signal_frame, VectorBase< BaseFloat > *feature)
Function that computes one frame of features from one frame of signal.
Definition: feature-mfcc.cc:28