feature-plp.cc
Go to the documentation of this file.
1 // feat/feature-plp.cc
2 
3 // Copyright 2009-2011 Petr Motlicek; Karel Vesely
4 // 2016 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #include "feat/feature-plp.h"
23 
24 namespace kaldi {
25 
27  opts_(opts), srfft_(NULL),
28  mel_energies_duplicated_(opts_.mel_opts.num_bins + 2, kUndefined),
29  autocorr_coeffs_(opts_.lpc_order + 1, kUndefined),
30  lpc_coeffs_(opts_.lpc_order, kUndefined),
31  raw_cepstrum_(opts_.lpc_order, kUndefined) {
32 
33  if (opts.cepstral_lifter != 0.0) {
34  lifter_coeffs_.Resize(opts.num_ceps);
36  }
38  &idft_bases_);
39 
40  if (opts.energy_floor > 0.0)
42 
43  int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
44  if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two...
45  srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
46 
47  // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
48  // [note: this call caches it.]
49  GetMelBanks(1.0);
50 }
51 
56  srfft_(NULL),
57  mel_energies_duplicated_(opts_.mel_opts.num_bins + 2, kUndefined),
58  autocorr_coeffs_(opts_.lpc_order + 1, kUndefined),
59  lpc_coeffs_(opts_.lpc_order, kUndefined),
60  raw_cepstrum_(opts_.lpc_order, kUndefined) {
61  for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
62  iter != mel_banks_.end(); ++iter)
63  iter->second = new MelBanks(*(iter->second));
64  for (std::map<BaseFloat, Vector<BaseFloat>*>::iterator
65  iter = equal_loudness_.begin();
66  iter != equal_loudness_.end(); ++iter)
67  iter->second = new Vector<BaseFloat>(*(iter->second));
68  if (other.srfft_ != NULL)
70 }
71 
73  for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
74  iter != mel_banks_.end(); ++iter)
75  delete iter->second;
76  for (std::map<BaseFloat, Vector<BaseFloat>* >::iterator
77  iter = equal_loudness_.begin();
78  iter != equal_loudness_.end(); ++iter)
79  delete iter->second;
80  delete srfft_;
81 }
82 
84  MelBanks *this_mel_banks = NULL;
85  std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.find(vtln_warp);
86  if (iter == mel_banks_.end()) {
87  this_mel_banks = new MelBanks(opts_.mel_opts,
89  vtln_warp);
90  mel_banks_[vtln_warp] = this_mel_banks;
91  } else {
92  this_mel_banks = iter->second;
93  }
94  return this_mel_banks;
95 }
96 
98  const MelBanks *this_mel_banks = GetMelBanks(vtln_warp);
99  Vector<BaseFloat> *ans = NULL;
100  std::map<BaseFloat, Vector<BaseFloat>*>::iterator iter
101  = equal_loudness_.find(vtln_warp);
102  if (iter == equal_loudness_.end()) {
103  ans = new Vector<BaseFloat>;
104  GetEqualLoudnessVector(*this_mel_banks, ans);
105  equal_loudness_[vtln_warp] = ans;
106  } else {
107  ans = iter->second;
108  }
109  return ans;
110 }
111 
112 void PlpComputer::Compute(BaseFloat signal_raw_log_energy,
113  BaseFloat vtln_warp,
114  VectorBase<BaseFloat> *signal_frame,
115  VectorBase<BaseFloat> *feature) {
116  KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
117  feature->Dim() == this->Dim());
118 
119  const MelBanks &mel_banks = *GetMelBanks(vtln_warp);
120  const Vector<BaseFloat> &equal_loudness = *GetEqualLoudness(vtln_warp);
121 
122 
123  KALDI_ASSERT(opts_.num_ceps <= opts_.lpc_order+1); // our num-ceps includes C0.
124 
125 
127  signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
128  std::numeric_limits<float>::min()));
129 
130  if (srfft_ != NULL) // Compute FFT using split-radix algorithm.
131  srfft_->Compute(signal_frame->Data(), true);
132  else // An alternative algorithm that works for non-powers-of-two.
133  RealFft(signal_frame, true);
134 
135  // Convert the FFT into a power spectrum.
136  ComputePowerSpectrum(signal_frame); // elements 0 ... signal_frame->Dim()/2
137 
138  SubVector<BaseFloat> power_spectrum(*signal_frame,
139  0, signal_frame->Dim() / 2 + 1);
140 
141  int32 num_mel_bins = opts_.mel_opts.num_bins;
142 
143  SubVector<BaseFloat> mel_energies(mel_energies_duplicated_, 1, num_mel_bins);
144 
145  mel_banks.Compute(power_spectrum, &mel_energies);
146 
147  mel_energies.MulElements(equal_loudness);
148 
149  mel_energies.ApplyPow(opts_.compress_factor);
150 
151  // duplicate first and last elements
153  mel_energies_duplicated_(num_mel_bins + 1) =
154  mel_energies_duplicated_(num_mel_bins);
155 
156  autocorr_coeffs_.SetZero(); // In case of NaNs or infs
157  autocorr_coeffs_.AddMatVec(1.0, idft_bases_, kNoTrans,
159 
160  BaseFloat residual_log_energy = ComputeLpc(autocorr_coeffs_, &lpc_coeffs_);
161 
162  residual_log_energy = std::max<BaseFloat>(residual_log_energy,
163  std::numeric_limits<float>::min());
164 
166  feature->Range(1, opts_.num_ceps - 1).CopyFromVec(
167  raw_cepstrum_.Range(0, opts_.num_ceps - 1));
168  (*feature)(0) = residual_log_energy;
169 
170  if (opts_.cepstral_lifter != 0.0)
171  feature->MulElements(lifter_coeffs_);
172 
173  if (opts_.cepstral_scale != 1.0)
174  feature->Scale(opts_.cepstral_scale);
175 
176  if (opts_.use_energy) {
177  if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_)
178  signal_raw_log_energy = log_energy_floor_;
179  (*feature)(0) = signal_raw_log_energy;
180  }
181 
182  if (opts_.htk_compat) { // reorder the features.
183  BaseFloat log_energy = (*feature)(0);
184  for (int32 i = 0; i < opts_.num_ceps-1; i++)
185  (*feature)(i) = (*feature)(i+1);
186  (*feature)(opts_.num_ceps-1) = log_energy;
187  }
188 }
189 
190 
191 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
BaseFloat compress_factor
Definition: feature-plp.h:50
int32 Dim() const
Definition: feature-plp.h:111
BaseFloat log_energy_floor_
Definition: feature-plp.h:151
void Lpc2Cepstrum(int n, const BaseFloat *pLPC, BaseFloat *pCepst)
Vector< BaseFloat > mel_energies_duplicated_
Definition: feature-plp.h:157
This is the new-style interface to the PLP computation.
Definition: feature-plp.h:101
const Vector< BaseFloat > * GetEqualLoudness(BaseFloat vtln_warp)
Definition: feature-plp.cc:97
Vector< BaseFloat > lifter_coeffs_
Definition: feature-plp.h:149
Vector< BaseFloat > lpc_coeffs_
Definition: feature-plp.h:161
kaldi::int32 int32
MelBanksOptions mel_opts
Definition: feature-plp.h:44
Vector< BaseFloat > autocorr_coeffs_
Definition: feature-plp.h:159
PlpOptions opts_
Definition: feature-plp.h:148
Matrix< BaseFloat > idft_bases_
Definition: feature-plp.h:150
double Log(double x)
Definition: kaldi-math.h:100
FrameExtractionOptions frame_opts
Definition: feature-plp.h:43
void MulElements(const VectorBase< Real > &v)
Multiply element-by-element by another vector.
BaseFloat energy_floor
Definition: feature-plp.h:48
std::map< BaseFloat, Vector< BaseFloat > *> equal_loudness_
Definition: feature-plp.h:153
void ComputeLifterCoeffs(BaseFloat Q, VectorBase< BaseFloat > *coeffs)
BaseFloat ComputeLpc(const VectorBase< BaseFloat > &autocorr_in, Vector< BaseFloat > *lpc_out)
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70
BaseFloat cepstral_scale
Definition: feature-plp.h:52
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void InitIdftBases(int32 n_bases, int32 dimension, Matrix< BaseFloat > *mat_out)
void Scale(Real alpha)
Multiplies all elements by this constant.
void Compute(BaseFloat signal_raw_log_energy, BaseFloat vtln_warp, VectorBase< BaseFloat > *signal_frame, VectorBase< BaseFloat > *feature)
Function that computes one frame of features from one frame of signal.
Definition: feature-plp.cc:112
void ComputePowerSpectrum(VectorBase< BaseFloat > *waveform)
const MelBanks * GetMelBanks(BaseFloat vtln_warp)
Definition: feature-plp.cc:83
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void ApplyPow(Real power)
Take all elements of vector to a power.
Definition: kaldi-vector.h:179
PlpComputer(const PlpOptions &opts)
Definition: feature-plp.cc:26
Vector< BaseFloat > raw_cepstrum_
Definition: feature-plp.h:163
void GetEqualLoudnessVector(const MelBanks &mel_banks, Vector< BaseFloat > *ans)
PlpOptions contains basic options for computing PLP features.
Definition: feature-plp.h:42
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
SplitRadixRealFft< BaseFloat > * srfft_
Definition: feature-plp.h:154
std::map< BaseFloat, MelBanks * > mel_banks_
Definition: feature-plp.h:152
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void RealFft(VectorBase< Real > *v, bool forward)
RealFft is a fourier transform of real inputs.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94