doc/feature-functions_8cc_source.html

 // feat/feature-functions.cc

 // Copyright 2009-2011  Karel Vesely;  Petr Motlicek;  Microsoft Corporation
 //                2013  Johns Hopkins University (author: Daniel Povey)
 //                2014  IMSL, PKU-HKUST (author: Wei Shi)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #include "feat/feature-functions.h"
 #include "matrix/matrix-functions.h"


 namespace kaldi {

 void ComputePowerSpectrum(VectorBase<BaseFloat> *waveform) {
   int32 dim = waveform->Dim();

   // no, letting it be non-power-of-two for now.
   // KALDI_ASSERT(dim > 0 && (dim & (dim-1) == 0));  // make sure a power of two.. actually my FFT code
   // does not require this (dan) but this is better in case we use different code [dan].

   // RealFft(waveform, true);  // true == forward (not inverse) FFT; makes no difference here,
   // as we just want power spectrum.

   // now we have in waveform, first half of complex spectrum
   // it's stored as [real0, realN/2, real1, im1, real2, im2, ...]
   int32 half_dim = dim/2;
   BaseFloat first_energy = (*waveform)(0) * (*waveform)(0),
       last_energy = (*waveform)(1) * (*waveform)(1);  // handle this special case
   for (int32 i = 1; i < half_dim; i++) {
     BaseFloat real = (*waveform)(i*2), im = (*waveform)(i*2 + 1);
     (*waveform)(i) = real*real + im*im;
   }
   (*waveform)(0) = first_energy;
   (*waveform)(half_dim) = last_energy;  // Will actually never be used, and anyway
   // if the signal has been bandlimited sensibly this should be zero.
 }


 DeltaFeatures::DeltaFeatures(const DeltaFeaturesOptions &opts): opts_(opts) {
   KALDI_ASSERT(opts.order >= 0 && opts.order < 1000);  // just make sure we don't get binary junk.
   // opts will normally be 2 or 3.
   KALDI_ASSERT(opts.window > 0 && opts.window < 1000);  // again, basic sanity check.
   // normally the window size will be two.

   scales_.resize(opts.order+1);
   scales_[0].Resize(1);
   scales_[0](0) = 1.0;  // trivial window for 0th order delta [i.e. baseline feats]

   for (int32 i = 1; i <= opts.order; i++) {
     Vector<BaseFloat> &prev_scales = scales_[i-1],
         &cur_scales = scales_[i];
     int32 window = opts.window;  // this code is designed to still
     // work if instead we later make it an array and do opts.window[i-1],
     // or something like that. "window" is a parameter specifying delta-window
     // width which is actually 2*window + 1.
     KALDI_ASSERT(window != 0);
     int32 prev_offset = (static_cast<int32>(prev_scales.Dim()-1))/2,
         cur_offset = prev_offset + window;
     cur_scales.Resize(prev_scales.Dim() + 2*window);  // also zeros it.

     BaseFloat normalizer = 0.0;
     for (int32 j = -window; j <= window; j++) {
       normalizer += j*j;
       for (int32 k = -prev_offset; k <= prev_offset; k++) {
         cur_scales(j+k+cur_offset) +=
             static_cast<BaseFloat>(j) * prev_scales(k+prev_offset);
       }
     }
     cur_scales.Scale(1.0 / normalizer);
   }
 }

 void DeltaFeatures::Process(const MatrixBase<BaseFloat> &input_feats,
                             int32 frame,
                             VectorBase<BaseFloat> *output_frame) const {
   KALDI_ASSERT(frame < input_feats.NumRows());
   int32 num_frames = input_feats.NumRows(),
       feat_dim = input_feats.NumCols();
   KALDI_ASSERT(static_cast<int32>(output_frame->Dim()) == feat_dim * (opts_.order+1));
   output_frame->SetZero();
   for (int32 i = 0; i <= opts_.order; i++) {
     const Vector<BaseFloat> &scales = scales_[i];
     int32 max_offset = (scales.Dim() - 1) / 2;
     SubVector<BaseFloat> output(*output_frame, i*feat_dim, feat_dim);
     for (int32 j = -max_offset; j <= max_offset; j++) {
       // if asked to read
       int32 offset_frame = frame + j;
       if (offset_frame < 0) offset_frame = 0;
       else if (offset_frame >= num_frames)
         offset_frame = num_frames - 1;
       BaseFloat scale = scales(j + max_offset);
       if (scale != 0.0)
         output.AddVec(scale, input_feats.Row(offset_frame));
     }
   }
 }

 ShiftedDeltaFeatures::ShiftedDeltaFeatures(
   const ShiftedDeltaFeaturesOptions &opts): opts_(opts) {
   KALDI_ASSERT(opts.window > 0 && opts.window < 1000);

   // Default window is 1.
   int32 window = opts.window;
   KALDI_ASSERT(window != 0);
   scales_.Resize(1 + 2*window);  // also zeros it.
   BaseFloat normalizer = 0.0;
   for (int32 j = -window; j <= window; j++) {
     normalizer += j*j;
     scales_(j + window) += static_cast<BaseFloat>(j);
   }
   scales_.Scale(1.0 / normalizer);
 }

 void ShiftedDeltaFeatures::Process(const MatrixBase<BaseFloat> &input_feats,
                             int32 frame,
                             SubVector<BaseFloat> *output_frame) const {
   KALDI_ASSERT(frame < input_feats.NumRows());
   int32 num_frames = input_feats.NumRows(),
       feat_dim = input_feats.NumCols();
   KALDI_ASSERT(static_cast<int32>(output_frame->Dim())
                == feat_dim * (opts_.num_blocks + 1));
   output_frame->SetZero();

   // The original features
   SubVector<BaseFloat> output(*output_frame, 0, feat_dim);
   output.AddVec(1.0, input_feats.Row(frame));

   // Concatenate the delta-blocks. Each block is block_shift
   // (usually 3) frames apart.
   for (int32 i = 0; i < opts_.num_blocks; i++) {
     int32 max_offset = (scales_.Dim() - 1) / 2;
     SubVector<BaseFloat> output(*output_frame, (i + 1) * feat_dim, feat_dim);
     for (int32 j = -max_offset; j <= max_offset; j++) {
       int32 offset_frame = frame + j + i * opts_.block_shift;
       if (offset_frame < 0) offset_frame = 0;
       else if (offset_frame >= num_frames)
         offset_frame = num_frames - 1;
       BaseFloat scale = scales_(j + max_offset);
       if (scale != 0.0)
         output.AddVec(scale, input_feats.Row(offset_frame));
     }
   }
 }

 void ComputeDeltas(const DeltaFeaturesOptions &delta_opts,
                    const MatrixBase<BaseFloat> &input_features,
                    Matrix<BaseFloat> *output_features) {
   output_features->Resize(input_features.NumRows(),
                           input_features.NumCols()
                           *(delta_opts.order + 1));
   DeltaFeatures delta(delta_opts);
   for (int32 r = 0; r < static_cast<int32>(input_features.NumRows()); r++) {
     SubVector<BaseFloat> row(*output_features, r);
     delta.Process(input_features, r, &row);
   }
 }

 void ComputeShiftedDeltas(const ShiftedDeltaFeaturesOptions &delta_opts,
                    const MatrixBase<BaseFloat> &input_features,
                    Matrix<BaseFloat> *output_features) {
   output_features->Resize(input_features.NumRows(),
                           input_features.NumCols()
                           * (delta_opts.num_blocks + 1));
   ShiftedDeltaFeatures delta(delta_opts);

   for (int32 r = 0; r < static_cast<int32>(input_features.NumRows()); r++) {
     SubVector<BaseFloat> row(*output_features, r);
     delta.Process(input_features, r, &row);
   }
 }


 void InitIdftBases(int32 n_bases, int32 dimension, Matrix<BaseFloat> *mat_out) {
   BaseFloat angle = M_PI / static_cast<BaseFloat>(dimension - 1);
   BaseFloat scale = 1.0f / (2.0 * static_cast<BaseFloat>(dimension - 1));
   mat_out->Resize(n_bases, dimension);
   for (int32 i = 0; i < n_bases; i++) {
     (*mat_out)(i, 0) = 1.0 * scale;
     BaseFloat i_fl = static_cast<BaseFloat>(i);
     for (int32 j = 1; j < dimension - 1; j++) {
       BaseFloat j_fl = static_cast<BaseFloat>(j);
       (*mat_out)(i, j) = 2.0 * scale * cos(angle * i_fl * j_fl);
     }

     (*mat_out)(i, dimension -1)
         = scale * cos(angle * i_fl * static_cast<BaseFloat>(dimension-1));
   }
 }

 void SpliceFrames(const MatrixBase<BaseFloat> &input_features,
                   int32 left_context,
                   int32 right_context,
                   Matrix<BaseFloat> *output_features) {
   int32 T = input_features.NumRows(), D = input_features.NumCols();
   if (T == 0 || D == 0)
     KALDI_ERR << "SpliceFrames: empty input";
   KALDI_ASSERT(left_context >= 0 && right_context >= 0);
   int32 N = 1 + left_context + right_context;
   output_features->Resize(T, D*N);
   for (int32 t = 0; t < T; t++) {
     SubVector<BaseFloat> dst_row(*output_features, t);
     for (int32 j = 0; j < N; j++) {
       int32 t2 = t + j - left_context;
       if (t2 < 0) t2 = 0;
       if (t2 >= T) t2 = T-1;
       SubVector<BaseFloat> dst(dst_row, j*D, D),
           src(input_features, t2);
       dst.CopyFromVec(src);
     }
   }
 }

 void ReverseFrames(const MatrixBase<BaseFloat> &input_features,
                    Matrix<BaseFloat> *output_features) {
   int32 T = input_features.NumRows(), D = input_features.NumCols();
   if (T == 0 || D == 0)
     KALDI_ERR << "ReverseFrames: empty input";
   output_features->Resize(T, D);
   for (int32 t = 0; t < T; t++) {
     SubVector<BaseFloat> dst_row(*output_features, t);
     SubVector<BaseFloat> src_row(input_features, T-1-t);
     dst_row.CopyFromVec(src_row);
   }
 }


 void SlidingWindowCmnOptions::Check() const {
   KALDI_ASSERT(cmn_window > 0);
   if (center)
     KALDI_ASSERT(min_window > 0 && min_window <= cmn_window);
   // else ignored so value doesn't matter.
 }

 // Internal version of SlidingWindowCmn with double-precision arguments.
 void SlidingWindowCmnInternal(const SlidingWindowCmnOptions &opts,
                               const MatrixBase<double> &input,
                               MatrixBase<double> *output) {
   opts.Check();
   int32 num_frames = input.NumRows(), dim = input.NumCols(),
         last_window_start = -1, last_window_end = -1,
         warning_count = 0;
   Vector<double> cur_sum(dim), cur_sumsq(dim);

   for (int32 t = 0; t < num_frames; t++) {
     int32 window_start, window_end; // note: window_end will be one
     // past the end of the window we use for normalization.
     if (opts.center) {
       window_start = t - (opts.cmn_window / 2);
       window_end = window_start + opts.cmn_window;
     } else {
       window_start = t - opts.cmn_window;
       window_end = t + 1;
     }
     if (window_start < 0) { // shift window right if starts <0.
       window_end -= window_start;
       window_start = 0; // or: window_start -= window_start
     }
     if (!opts.center) {
       if (window_end > t)
         window_end = std::max(t + 1, opts.min_window);
     }
     if (window_end > num_frames) {
       window_start -= (window_end - num_frames);
       window_end = num_frames;
       if (window_start < 0) window_start = 0;
     }
     if (last_window_start == -1) {
       SubMatrix<double> input_part(input,
                                       window_start, window_end - window_start,
                                       0, dim);
       cur_sum.AddRowSumMat(1.0, input_part , 0.0);
       if (opts.normalize_variance)
         cur_sumsq.AddDiagMat2(1.0, input_part, kTrans, 0.0);
     } else {
       if (window_start > last_window_start) {
         KALDI_ASSERT(window_start == last_window_start + 1);
         SubVector<double> frame_to_remove(input, last_window_start);
         cur_sum.AddVec(-1.0, frame_to_remove);
         if (opts.normalize_variance)
           cur_sumsq.AddVec2(-1.0, frame_to_remove);
       }
       if (window_end > last_window_end) {
         KALDI_ASSERT(window_end == last_window_end + 1);
         SubVector<double> frame_to_add(input, last_window_end);
         cur_sum.AddVec(1.0, frame_to_add);
         if (opts.normalize_variance)
           cur_sumsq.AddVec2(1.0, frame_to_add);
       }
     }
     int32 window_frames = window_end - window_start;
     last_window_start = window_start;
     last_window_end = window_end;

     KALDI_ASSERT(window_frames > 0);
     SubVector<double> input_frame(input, t),
         output_frame(*output, t);
     output_frame.CopyFromVec(input_frame);
     output_frame.AddVec(-1.0 / window_frames, cur_sum);

     if (opts.normalize_variance) {
       if (window_frames == 1) {
         output_frame.Set(0.0);
       } else {
         Vector<double> variance(cur_sumsq);
         variance.Scale(1.0 / window_frames);
         variance.AddVec2(-1.0 / (window_frames * window_frames), cur_sum);
         // now "variance" is the variance of the features in the window,
         // around their own mean.
         int32 num_floored;
         variance.ApplyFloor(1.0e-10, &num_floored);
         if (num_floored > 0 && num_frames > 1) {
           if (opts.max_warnings == warning_count) {
             KALDI_WARN << "Suppressing the remaining variance flooring "
                        << "warnings. Run program with --max-warnings=-1 to "
                        << "see all warnings.";
           }
           // If opts.max_warnings is a negative number, we won't restrict the
           // number of times that the warning is printed out.
           else if (opts.max_warnings < 0
                    || opts.max_warnings > warning_count) {
             KALDI_WARN << "Flooring when normalizing variance, floored "
                        << num_floored << " elements; num-frames was "
                        << window_frames;
           }
           warning_count++;
         }
         variance.ApplyPow(-0.5); // get inverse standard deviation.
         output_frame.MulElements(variance);
       }
     }
   }
 }


 void SlidingWindowCmn(const SlidingWindowCmnOptions &opts,
                       const MatrixBase<BaseFloat> &input,
                       MatrixBase<BaseFloat> *output) {
   KALDI_ASSERT(SameDim(input, *output) && input.NumRows() > 0);
   Matrix<double> input_dbl(input), output_dbl(input.NumRows(), input.NumCols());
   // call double-precision version
   SlidingWindowCmnInternal(opts, input_dbl, &output_dbl);
   output->CopyFromMat(output_dbl);
 }


 }  // namespace kaldi
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

rnnlm::real
double real
Definition: mikolov-rnnlm-lib.h:64

matrix-functions.h

kaldi::DeltaFeaturesOptions::window
int32 window
Definition: feature-functions.h:50

kaldi::DeltaFeaturesOptions::order
int32 order
Definition: feature-functions.h:49

rnnlm::j
int j
Definition: mikolov-rnnlm-lib.cc:66

feature-functions.h

M_PI
#define M_PI
Definition: kaldi-math.h:44

kaldi::MatrixBase::NumCols
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67

kaldi::MatrixBase
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49

kaldi::SlidingWindowCmnOptions::cmn_window
int32 cmn_window
Definition: feature-functions.h:159

kaldi::SlidingWindowCmnOptions::center
bool center
Definition: feature-functions.h:163

kaldi::VectorBase::AddDiagMat2
void AddDiagMat2(Real alpha, const MatrixBase< Real > &M, MatrixTransposeType trans=kNoTrans, Real beta=1.0)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
Definition: kaldi-vector.cc:1304

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::Matrix< BaseFloat >

kaldi::MatrixBase::CopyFromMat
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Definition: kaldi-matrix.cc:862

kaldi::ReverseFrames
void ReverseFrames(const MatrixBase< BaseFloat > &input_features, Matrix< BaseFloat > *output_features)
Definition: feature-functions.cc:228

kaldi::ShiftedDeltaFeaturesOptions::block_shift
int32 block_shift
Definition: feature-functions.h:86

kaldi::ShiftedDeltaFeatures::scales_
Vector< BaseFloat > scales_
Definition: feature-functions.h:115

kaldi::ShiftedDeltaFeaturesOptions::window
int32 window
Definition: feature-functions.h:86

kaldi::kTrans
Definition: matrix-common.h:33

kaldi::SameDim
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
Definition: kaldi-matrix.h:1111

kaldi::ShiftedDeltaFeaturesOptions
Definition: feature-functions.h:85

kaldi::VectorBase::AddVec2
void AddVec2(const Real alpha, const VectorBase< Real > &v)
Add vector : *this = *this + alpha * rv^2 [element-wise squaring].
Definition: kaldi-vector.cc:1255

kaldi::VectorBase::ApplyFloor
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=nullptr)
Applies floor to all elements.
Definition: kaldi-vector.h:149

kaldi::VectorBase::CopyFromVec
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
Definition: kaldi-vector.cc:228

kaldi::ShiftedDeltaFeatures::opts_
ShiftedDeltaFeaturesOptions opts_
Definition: feature-functions.h:114

kaldi::BaseFloat
float BaseFloat
Definition: kaldi-types.h:29

kaldi::MatrixBase::Row
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188

float

kaldi::SlidingWindowCmnOptions
Definition: feature-functions.h:158

kaldi::VectorBase::MulElements
void MulElements(const VectorBase< Real > &v)
Multiply element-by-element by another vector.
Definition: kaldi-vector.cc:968

kaldi::SlidingWindowCmnInternal
void SlidingWindowCmnInternal(const SlidingWindowCmnOptions &opts, const MatrixBase< double > &input, MatrixBase< double > *output)
Definition: feature-functions.cc:250

kaldi::DeltaFeatures::opts_
DeltaFeaturesOptions opts_
Definition: feature-functions.h:79

kaldi::DeltaFeatures::Process
void Process(const MatrixBase< BaseFloat > &input_feats, int32 frame, VectorBase< BaseFloat > *output_frame) const
Definition: feature-functions.cc:88

kaldi::ShiftedDeltaFeatures
Definition: feature-functions.h:100

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::ComputeShiftedDeltas
void ComputeShiftedDeltas(const ShiftedDeltaFeaturesOptions &delta_opts, const MatrixBase< BaseFloat > &input_features, Matrix< BaseFloat > *output_features)
Definition: feature-functions.cc:173

KALDI_WARN
#define KALDI_WARN
Definition: kaldi-error.h:150

kaldi::VectorBase::Dim
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64

kaldi::InitIdftBases
void InitIdftBases(int32 n_bases, int32 dimension, Matrix< BaseFloat > *mat_out)
Definition: feature-functions.cc:188

kaldi::VectorBase::Scale
void Scale(Real alpha)
Multiplies all elements by this constant.
Definition: kaldi-vector.cc:963

kaldi::SlidingWindowCmnOptions::Check
void Check() const
Definition: feature-functions.cc:242

kaldi::DeltaFeatures::DeltaFeatures
DeltaFeatures(const DeltaFeaturesOptions &opts)
Definition: feature-functions.cc:54

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::SlidingWindowCmnOptions::max_warnings
int32 max_warnings
Definition: feature-functions.h:161

kaldi::DeltaFeaturesOptions
Definition: feature-functions.h:48

kaldi::ComputePowerSpectrum
void ComputePowerSpectrum(VectorBase< BaseFloat > *waveform)
Definition: feature-functions.cc:29

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::SlidingWindowCmnOptions::normalize_variance
bool normalize_variance
Definition: feature-functions.h:162

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::MatrixBase::NumRows
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64

kaldi::ComputeDeltas
void ComputeDeltas(const DeltaFeaturesOptions &delta_opts, const MatrixBase< BaseFloat > &input_features, Matrix< BaseFloat > *output_features)
Definition: feature-functions.cc:160

kaldi::VectorBase::Set
void Set(Real f)
Set all members of a vector to a specified value.
Definition: kaldi-vector.cc:336

kaldi::SpliceFrames
void SpliceFrames(const MatrixBase< BaseFloat > &input_features, int32 left_context, int32 right_context, Matrix< BaseFloat > *output_features)
Definition: feature-functions.cc:205

kaldi::VectorBase::ApplyPow
void ApplyPow(Real power)
Take all elements of vector to a power.
Definition: kaldi-vector.h:179

kaldi::DeltaFeatures
Definition: feature-functions.h:64

kaldi::ShiftedDeltaFeaturesOptions::num_blocks
int32 num_blocks
Definition: feature-functions.h:86

kaldi::DeltaFeatures::scales_
std::vector< Vector< BaseFloat > > scales_
Definition: feature-functions.h:80

kaldi::Matrix::Resize
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Definition: kaldi-matrix.cc:819

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::VectorBase::SetZero
void SetZero()
Set vector to all zeros.
Definition: kaldi-vector.cc:288

kaldi::ShiftedDeltaFeatures::Process
void Process(const MatrixBase< BaseFloat > &input_feats, int32 frame, SubVector< BaseFloat > *output_frame) const
Definition: feature-functions.cc:129

kaldi::VectorBase::AddVec
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Definition: kaldi-vector.cc:1044

kaldi::SubMatrix
Sub-matrix representation.
Definition: kaldi-matrix.h:988

kaldi::SubVector
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501

kaldi::SlidingWindowCmnOptions::min_window
int32 min_window
Definition: feature-functions.h:160

kaldi::SlidingWindowCmn
void SlidingWindowCmn(const SlidingWindowCmnOptions &opts, const MatrixBase< BaseFloat > &input, MatrixBase< BaseFloat > *output)
Applies sliding-window cepstral mean and/or variance normalization.
Definition: feature-functions.cc:350

kaldi::ShiftedDeltaFeatures::ShiftedDeltaFeatures
ShiftedDeltaFeatures(const ShiftedDeltaFeaturesOptions &opts)
Definition: feature-functions.cc:113