feature-spectrogram.cc
Go to the documentation of this file.
1 // feat/feature-spectrogram.cc
2 
3 // Copyright 2009-2012 Karel Vesely
4 // Copyright 2012 Navdeep Jaitly
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
23 
24 
25 namespace kaldi {
26 
28  : opts_(opts), srfft_(NULL) {
29  if (opts.energy_floor > 0.0)
31 
32  int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
33  if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two
34  srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
35 }
36 
38  opts_(other.opts_), log_energy_floor_(other.log_energy_floor_), srfft_(NULL) {
39  if (other.srfft_ != NULL)
41 }
42 
44  delete srfft_;
45 }
46 
47 void SpectrogramComputer::Compute(BaseFloat signal_raw_log_energy,
48  BaseFloat vtln_warp,
49  VectorBase<BaseFloat> *signal_frame,
50  VectorBase<BaseFloat> *feature) {
51  KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
52  feature->Dim() == this->Dim());
53 
54 
55  // Compute energy after window function (not the raw one)
56  if (!opts_.raw_energy)
57  signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
58  std::numeric_limits<float>::epsilon()));
59 
60  if (srfft_ != NULL) // Compute FFT using split-radix algorithm.
61  srfft_->Compute(signal_frame->Data(), true);
62  else // An alternative algorithm that works for non-powers-of-two
63  RealFft(signal_frame, true);
64 
65  if (opts_.return_raw_fft) {
66  feature->CopyFromVec(*signal_frame);
67  return;
68  }
69 
70  // Convert the FFT into a power spectrum.
71  ComputePowerSpectrum(signal_frame);
72  SubVector<BaseFloat> power_spectrum(*signal_frame,
73  0, signal_frame->Dim() / 2 + 1);
74 
75  power_spectrum.ApplyFloor(std::numeric_limits<float>::epsilon());
76  power_spectrum.ApplyLog();
77 
78  feature->CopyFromVec(power_spectrum);
79 
80  if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_)
81  signal_raw_log_energy = log_energy_floor_;
82  // The zeroth spectrogram component is always set to the signal energy,
83  // instead of the square of the constant component of the signal.
84  (*feature)(0) = signal_raw_log_energy;
85 }
86 
87 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Class for computing spectrogram features.
kaldi::int32 int32
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=nullptr)
Applies floor to all elements.
Definition: kaldi-vector.h:149
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
double Log(double x)
Definition: kaldi-math.h:100
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void ComputePowerSpectrum(VectorBase< BaseFloat > *waveform)
SplitRadixRealFft< BaseFloat > * srfft_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void Compute(BaseFloat signal_raw_log_energy, BaseFloat vtln_warp, VectorBase< BaseFloat > *signal_frame, VectorBase< BaseFloat > *feature)
Function that computes one frame of spectrogram features from one frame of signal.
SpectrogramOptions contains basic options for computing spectrogram features.
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
FrameExtractionOptions frame_opts
SpectrogramComputer(const SpectrogramOptions &opts)
void RealFft(VectorBase< Real > *v, bool forward)
RealFft is a fourier transform of real inputs.