doc/feature-functions_8h_source.html

 // feat/feature-functions.h

 // Copyright 2009-2011  Karel Vesely;  Petr Motlicek;  Microsoft Corporation
 //                2014  IMSL, PKU-HKUST (author: Wei Shi)
 //                2016  Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #ifndef KALDI_FEAT_FEATURE_FUNCTIONS_H_
 #define KALDI_FEAT_FEATURE_FUNCTIONS_H_

 #include <string>
 #include <vector>

 #include "matrix/matrix-lib.h"
 #include "util/common-utils.h"
 #include "base/kaldi-error.h"

 namespace kaldi {


 // ComputePowerSpectrum converts a complex FFT (as produced by the FFT
 // functions in matrix/matrix-functions.h), and converts it into
 // a power spectrum.  If the complex FFT is a vector of size n (representing
 // half the complex FFT of a real signal of size n, as described there),
 // this function computes in the first (n/2) + 1 elements of it, the
 // energies of the fft bins from zero to the Nyquist frequency.  Contents of the
 // remaining (n/2) - 1 elements are undefined at output.
 void ComputePowerSpectrum(VectorBase<BaseFloat> *complex_fft);


 struct DeltaFeaturesOptions {
   int32 order;
   int32 window;  // e.g. 2; controls window size (window size is 2*window + 1)
   // the behavior at the edges is to replicate the first or last frame.
   // this is not configurable.

   DeltaFeaturesOptions(int32 order = 2, int32 window = 2):
       order(order), window(window) { }
   void Register(OptionsItf *opts) {
     opts->Register("delta-order", &order, "Order of delta computation");
     opts->Register("delta-window", &window,
                    "Parameter controlling window for delta computation (actual window"
                    " size for each delta order is 1 + 2*delta-window-size)");
   }
 };

 class DeltaFeatures {
  public:
   // This class provides a low-level function to compute delta features.
   // The function takes as input a matrix of features and a frame index
   // that it should compute the deltas on.  It puts its output in an object
   // of type VectorBase, of size (original-feature-dimension) * (opts.order+1).
   // This is not the most efficient way to do the computation, but it's
   // state-free and thus easier to understand

   explicit DeltaFeatures(const DeltaFeaturesOptions &opts);

   void Process(const MatrixBase<BaseFloat> &input_feats,
                int32 frame,
                VectorBase<BaseFloat> *output_frame) const;
  private:
   DeltaFeaturesOptions opts_;
   std::vector<Vector<BaseFloat> > scales_;  // a scaling window for each
   // of the orders, including zero: multiply the features for each
   // dimension by this window.
 };

 struct ShiftedDeltaFeaturesOptions {
   int32 window,           // The time delay and advance
         num_blocks,
         block_shift;      // Distance between consecutive blocks

   ShiftedDeltaFeaturesOptions():
       window(1), num_blocks(7), block_shift(3) { }
   void Register(OptionsItf *opts) {
     opts->Register("delta-window", &window, "Size of delta advance and delay.");
     opts->Register("num-blocks", &num_blocks, "Number of delta blocks in advance"
                    " of each frame to be concatenated");
     opts->Register("block-shift", &block_shift, "Distance between each block");
   }
 };

 class ShiftedDeltaFeatures {
  public:
   // This class provides a low-level function to compute shifted
   // delta cesptra (SDC).
   // The function takes as input a matrix of features and a frame index
   // that it should compute the deltas on.  It puts its output in an object
   // of type VectorBase, of size original-feature-dimension + (1  * num_blocks).

   explicit ShiftedDeltaFeatures(const ShiftedDeltaFeaturesOptions &opts);

   void Process(const MatrixBase<BaseFloat> &input_feats,
                int32 frame,
                SubVector<BaseFloat> *output_frame) const;
  private:
   ShiftedDeltaFeaturesOptions opts_;
   Vector<BaseFloat> scales_;  // a scaling window for each

 };

 // ComputeDeltas is a convenience function that computes deltas on a feature
 // file.  If you want to deal with features coming in bit by bit you would have
 // to use the DeltaFeatures class directly, and do the computation frame by
 // frame.  Later we will have to come up with a nice mechanism to do this for
 // features coming in.
 void ComputeDeltas(const DeltaFeaturesOptions &delta_opts,
                    const MatrixBase<BaseFloat> &input_features,
                    Matrix<BaseFloat> *output_features);

 // ComputeShiftedDeltas computes deltas from a feature file by applying
 // ShiftedDeltaFeatures over the frames. This function is provided for
 // convenience, however, ShiftedDeltaFeatures can be used directly.
 void ComputeShiftedDeltas(const ShiftedDeltaFeaturesOptions &delta_opts,
                    const MatrixBase<BaseFloat> &input_features,
                    Matrix<BaseFloat> *output_features);

 // SpliceFrames will normally be used together with LDA.
 // It splices frames together to make a window.  At the
 // start and end of an utterance, it duplicates the first
 // and last frames.
 // Will throw if input features are empty.
 // left_context and right_context must be nonnegative.
 // these both represent a number of frames (e.g. 4, 4 is
 // a good choice).
 void SpliceFrames(const MatrixBase<BaseFloat> &input_features,
                   int32 left_context,
                   int32 right_context,
                   Matrix<BaseFloat> *output_features);

 // ReverseFrames reverses the frames in time (used for backwards decoding)
 void ReverseFrames(const MatrixBase<BaseFloat> &input_features,
                   Matrix<BaseFloat> *output_features);


 void InitIdftBases(int32 n_bases, int32 dimension, Matrix<BaseFloat> *mat_out);


 // This is used for speaker-id.  Also see OnlineCmnOptions in ../online2/, which
 // is online CMN with no latency, for online speech recognition.
 struct SlidingWindowCmnOptions {
   int32 cmn_window;
   int32 min_window;
   int32 max_warnings;
   bool normalize_variance;
   bool center;

   SlidingWindowCmnOptions():
       cmn_window(600),
       min_window(100),
       max_warnings(5),
       normalize_variance(false),
       center(false) { }

   void Register(OptionsItf *opts) {
     opts->Register("cmn-window", &cmn_window, "Window in frames for running "
                    "average CMN computation");
     opts->Register("min-cmn-window", &min_window, "Minimum CMN window "
                    "used at start of decoding (adds latency only at start). "
                    "Only applicable if center == false, ignored if center==true");
     opts->Register("max-warnings", &max_warnings, "Maximum warnings to report "
                    "per utterance. 0 to disable, -1 to show all.");
     opts->Register("norm-vars", &normalize_variance, "If true, normalize "
                    "variance to one."); // naming this as in apply-cmvn.cc
     opts->Register("center", &center, "If true, use a window centered on the "
                    "current frame (to the extent possible, modulo end effects). "
                    "If false, window is to the left.");
   }
   void Check() const;
 };


 void SlidingWindowCmn(const SlidingWindowCmnOptions &opts,
                       const MatrixBase<BaseFloat> &input,
                       MatrixBase<BaseFloat> *output);


 }  // namespace kaldi


 #endif  // KALDI_FEAT_FEATURE_FUNCTIONS_H_
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::DeltaFeaturesOptions::window
int32 window
Definition: feature-functions.h:50

kaldi::DeltaFeaturesOptions::order
int32 order
Definition: feature-functions.h:49

matrix-lib.h

kaldi::MatrixBase
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49

kaldi::SlidingWindowCmnOptions::cmn_window
int32 cmn_window
Definition: feature-functions.h:159

kaldi::SlidingWindowCmnOptions::center
bool center
Definition: feature-functions.h:163

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

common-utils.h

kaldi::Matrix< BaseFloat >

kaldi::ReverseFrames
void ReverseFrames(const MatrixBase< BaseFloat > &input_features, Matrix< BaseFloat > *output_features)
Definition: feature-functions.cc:228

kaldi::ShiftedDeltaFeatures::scales_
Vector< BaseFloat > scales_
Definition: feature-functions.h:115

kaldi::OptionsItf::Register
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0

kaldi::ShiftedDeltaFeaturesOptions::window
int32 window
Definition: feature-functions.h:86

kaldi::ShiftedDeltaFeaturesOptions
Definition: feature-functions.h:85

kaldi::ShiftedDeltaFeatures::opts_
ShiftedDeltaFeaturesOptions opts_
Definition: feature-functions.h:114

kaldi::ShiftedDeltaFeaturesOptions::Register
void Register(OptionsItf *opts)
Definition: feature-functions.h:92

kaldi::SlidingWindowCmnOptions
Definition: feature-functions.h:158

kaldi::DeltaFeatures::opts_
DeltaFeaturesOptions opts_
Definition: feature-functions.h:79

kaldi::ShiftedDeltaFeatures
Definition: feature-functions.h:100

kaldi::DeltaFeaturesOptions::DeltaFeaturesOptions
DeltaFeaturesOptions(int32 order=2, int32 window=2)
Definition: feature-functions.h:54

kaldi::ComputeShiftedDeltas
void ComputeShiftedDeltas(const ShiftedDeltaFeaturesOptions &delta_opts, const MatrixBase< BaseFloat > &input_features, Matrix< BaseFloat > *output_features)
Definition: feature-functions.cc:173

kaldi::ShiftedDeltaFeaturesOptions::ShiftedDeltaFeaturesOptions
ShiftedDeltaFeaturesOptions()
Definition: feature-functions.h:90

kaldi::SlidingWindowCmnOptions::SlidingWindowCmnOptions
SlidingWindowCmnOptions()
Definition: feature-functions.h:165

kaldi::SlidingWindowCmnOptions::Register
void Register(OptionsItf *opts)
Definition: feature-functions.h:172

kaldi::OptionsItf
Definition: options-itf.h:26

kaldi::InitIdftBases
void InitIdftBases(int32 n_bases, int32 dimension, Matrix< BaseFloat > *mat_out)
Definition: feature-functions.cc:188

kaldi::SlidingWindowCmnOptions::max_warnings
int32 max_warnings
Definition: feature-functions.h:161

kaldi::DeltaFeaturesOptions
Definition: feature-functions.h:48

kaldi::ComputePowerSpectrum
void ComputePowerSpectrum(VectorBase< BaseFloat > *waveform)
Definition: feature-functions.cc:29

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::SlidingWindowCmnOptions::normalize_variance
bool normalize_variance
Definition: feature-functions.h:162

kaldi::ComputeDeltas
void ComputeDeltas(const DeltaFeaturesOptions &delta_opts, const MatrixBase< BaseFloat > &input_features, Matrix< BaseFloat > *output_features)
Definition: feature-functions.cc:160

kaldi::SpliceFrames
void SpliceFrames(const MatrixBase< BaseFloat > &input_features, int32 left_context, int32 right_context, Matrix< BaseFloat > *output_features)
Definition: feature-functions.cc:205

kaldi::DeltaFeatures
Definition: feature-functions.h:64

kaldi::DeltaFeatures::scales_
std::vector< Vector< BaseFloat > > scales_
Definition: feature-functions.h:80

kaldi::DeltaFeaturesOptions::Register
void Register(OptionsItf *opts)
Definition: feature-functions.h:56

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::SubVector
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501

kaldi::SlidingWindowCmnOptions::min_window
int32 min_window
Definition: feature-functions.h:160

kaldi-error.h

kaldi::SlidingWindowCmn
void SlidingWindowCmn(const SlidingWindowCmnOptions &opts, const MatrixBase< BaseFloat > &input, MatrixBase< BaseFloat > *output)
Applies sliding-window cepstral mean and/or variance normalization.
Definition: feature-functions.cc:350