doc/feature-window_8h_source.html

 // feat/feature-window.h

 // Copyright 2009-2011  Karel Vesely;  Petr Motlicek;  Saarland University
 //           2014-2016  Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #ifndef KALDI_FEAT_FEATURE_WINDOW_H_
 #define KALDI_FEAT_FEATURE_WINDOW_H_

 #include <map>
 #include <string>

 #include "matrix/matrix-lib.h"
 #include "util/common-utils.h"
 #include "base/kaldi-error.h"

 namespace kaldi {

 struct FrameExtractionOptions {
   BaseFloat samp_freq;
   BaseFloat frame_shift_ms;  // in milliseconds.
   BaseFloat frame_length_ms;  // in milliseconds.
   BaseFloat dither;  // Amount of dithering, 0.0 means no dither.
   BaseFloat preemph_coeff;  // Preemphasis coefficient.
   bool remove_dc_offset;  // Subtract mean of wave before FFT.
   std::string window_type;  // e.g. Hamming window
   // May be "hamming", "rectangular", "povey", "hanning", "sine", "blackman"
   // "povey" is a window I made to be similar to Hamming but to go to zero at the
   // edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85)
   // I just don't think the Hamming window makes sense as a windowing function.
   bool round_to_power_of_two;
   BaseFloat blackman_coeff;
   bool snip_edges;
   bool allow_downsample;
   bool allow_upsample;
   int max_feature_vectors;
   FrameExtractionOptions():
       samp_freq(16000),
       frame_shift_ms(10.0),
       frame_length_ms(25.0),
       dither(1.0),
       preemph_coeff(0.97),
       remove_dc_offset(true),
       window_type("povey"),
       round_to_power_of_two(true),
       blackman_coeff(0.42),
       snip_edges(true),
       allow_downsample(false),
       allow_upsample(false),
       max_feature_vectors(-1)
       { }

   void Register(OptionsItf *opts) {
     opts->Register("sample-frequency", &samp_freq,
                    "Waveform data sample frequency (must match the waveform file, "
                    "if specified there)");
     opts->Register("frame-length", &frame_length_ms, "Frame length in milliseconds");
     opts->Register("frame-shift", &frame_shift_ms, "Frame shift in milliseconds");
     opts->Register("preemphasis-coefficient", &preemph_coeff,
                    "Coefficient for use in signal preemphasis");
     opts->Register("remove-dc-offset", &remove_dc_offset,
                    "Subtract mean from waveform on each frame");
     opts->Register("dither", &dither, "Dithering constant (0.0 means no dither). "
                    "If you turn this off, you should set the --energy-floor "
                    "option, e.g. to 1.0 or 0.1");
     opts->Register("window-type", &window_type, "Type of window "
                    "(\"hamming\"|\"hanning\"|\"povey\"|\"rectangular\""
                    "|\"sine\"|\"blackmann\")");
     opts->Register("blackman-coeff", &blackman_coeff,
                    "Constant coefficient for generalized Blackman window.");
     opts->Register("round-to-power-of-two", &round_to_power_of_two,
                    "If true, round window size to power of two by zero-padding "
                    "input to FFT.");
     opts->Register("snip-edges", &snip_edges,
                    "If true, end effects will be handled by outputting only frames that "
                    "completely fit in the file, and the number of frames depends on the "
                    "frame-length.  If false, the number of frames depends only on the "
                    "frame-shift, and we reflect the data at the ends.");
     opts->Register("allow-downsample", &allow_downsample,
                    "If true, allow the input waveform to have a higher frequency than "
                    "the specified --sample-frequency (and we'll downsample).");
     opts->Register("max-feature-vectors", &max_feature_vectors,
                    "Memory optimization. If larger than 0, periodically remove feature "
                    "vectors so that only this number of the latest feature vectors is "
                    "retained.");
     opts->Register("allow-upsample", &allow_upsample,
                    "If true, allow the input waveform to have a lower frequency than "
                    "the specified --sample-frequency (and we'll upsample).");
   }
   int32 WindowShift() const {
     return static_cast<int32>(samp_freq * 0.001 * frame_shift_ms);
   }
   int32 WindowSize() const {
     return static_cast<int32>(samp_freq * 0.001 * frame_length_ms);
   }
   int32 PaddedWindowSize() const {
     return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize()) :
                                     WindowSize());
   }
 };


 struct FeatureWindowFunction {
   FeatureWindowFunction() {}
   explicit FeatureWindowFunction(const FrameExtractionOptions &opts);
   FeatureWindowFunction(const FeatureWindowFunction &other):
       window(other.window) { }
   Vector<BaseFloat> window;
 };


 int32 NumFrames(int64 num_samples,
                 const FrameExtractionOptions &opts,
                 bool flush = true);

 /*
    This function returns the index of the first sample of the frame indexed
    'frame'.  If snip-edges=true, it just returns frame * opts.WindowShift(); if
    snip-edges=false, the formula is a little more complicated and the result may
    be negative.
 */
 int64 FirstSampleOfFrame(int32 frame,
                          const FrameExtractionOptions &opts);


 void Dither(VectorBase<BaseFloat> *waveform, BaseFloat dither_value);

 void Preemphasize(VectorBase<BaseFloat> *waveform, BaseFloat preemph_coeff);

 void ProcessWindow(const FrameExtractionOptions &opts,
                    const FeatureWindowFunction &window_function,
                    VectorBase<BaseFloat> *window,
                    BaseFloat *log_energy_pre_window = NULL);


 /*
   ExtractWindow() extracts a windowed frame of waveform (possibly with a
   power-of-two, padded size, depending on the config), including all the
   proessing done by ProcessWindow().

   @param [in] sample_offset  If 'wave' is not the entire waveform, but
                    part of it to the left has been discarded, then the
                    number of samples prior to 'wave' that we have
                    already discarded.  Set this to zero if you are
                    processing the entire waveform in one piece, or
                    if you get 'no matching function' compilation
                    errors when updating the code.
   @param [in] wave  The waveform
   @param [in] f     The frame index to be extracted, with
                     0 <= f < NumFrames(sample_offset + wave.Dim(), opts, true)
   @param [in] opts  The options class to be used
   @param [in] window_function  The windowing function, as derived from the
                     options class.
   @param [out] window  The windowed, possibly-padded waveform to be
                      extracted.  Will be resized as needed.
   @param [out] log_energy_pre_window  If non-NULL, the log-energy of
                    the signal prior to pre-emphasis and multiplying by
                    the windowing function will be written to here.
 */
 void ExtractWindow(int64 sample_offset,
                    const VectorBase<BaseFloat> &wave,
                    int32 f,
                    const FrameExtractionOptions &opts,
                    const FeatureWindowFunction &window_function,
                    Vector<BaseFloat> *window,
                    BaseFloat *log_energy_pre_window = NULL);


 }  // namespace kaldi


 #endif  // KALDI_FEAT_FEATURE_WINDOW_H_
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::FrameExtractionOptions::PaddedWindowSize
int32 PaddedWindowSize() const
Definition: feature-window.h:112

matrix-lib.h

kaldi::FrameExtractionOptions::frame_shift_ms
BaseFloat frame_shift_ms
Definition: feature-window.h:37

kaldi::FeatureWindowFunction
Definition: feature-window.h:119

kaldi::FeatureWindowFunction::FeatureWindowFunction
FeatureWindowFunction()
Definition: feature-window.h:120

kaldi::FrameExtractionOptions::window_type
std::string window_type
Definition: feature-window.h:42

kaldi::FeatureWindowFunction::window
Vector< BaseFloat > window
Definition: feature-window.h:124

kaldi::FrameExtractionOptions::dither
BaseFloat dither
Definition: feature-window.h:39

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

common-utils.h

kaldi::RoundUpToNearestPowerOfTwo
int32 RoundUpToNearestPowerOfTwo(int32 n)
Definition: kaldi-math.cc:32

kaldi::FrameExtractionOptions
Definition: feature-window.h:35

kaldi::FrameExtractionOptions::frame_length_ms
BaseFloat frame_length_ms
Definition: feature-window.h:38

kaldi::FrameExtractionOptions::samp_freq
BaseFloat samp_freq
Definition: feature-window.h:36

kaldi::FrameExtractionOptions::WindowSize
int32 WindowSize() const
Definition: feature-window.h:109

kaldi::OptionsItf::Register
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0

kaldi::ExtractWindow
void ExtractWindow(int64 sample_offset, const VectorBase< BaseFloat > &wave, int32 f, const FrameExtractionOptions &opts, const FeatureWindowFunction &window_function, Vector< BaseFloat > *window, BaseFloat *log_energy_pre_window)
Definition: feature-window.cc:166

kaldi::FrameExtractionOptions::preemph_coeff
BaseFloat preemph_coeff
Definition: feature-window.h:40

kaldi::FrameExtractionOptions::FrameExtractionOptions
FrameExtractionOptions()
Definition: feature-window.h:53

kaldi::FrameExtractionOptions::max_feature_vectors
int max_feature_vectors
Definition: feature-window.h:52

float

kaldi::FrameExtractionOptions::snip_edges
bool snip_edges
Definition: feature-window.h:49

kaldi::FirstSampleOfFrame
int64 FirstSampleOfFrame(int32 frame, const FrameExtractionOptions &opts)
Definition: feature-window.cc:30

kaldi::FrameExtractionOptions::round_to_power_of_two
bool round_to_power_of_two
Definition: feature-window.h:47

kaldi::NumFrames
int32 NumFrames(int64 num_samples, const FrameExtractionOptions &opts, bool flush)
This function returns the number of frames that we can extract from a wave file with the given number...
Definition: feature-window.cc:42

kaldi::FrameExtractionOptions::allow_upsample
bool allow_upsample
Definition: feature-window.h:51

kaldi::FrameExtractionOptions::WindowShift
int32 WindowShift() const
Definition: feature-window.h:106

kaldi::ProcessWindow
void ProcessWindow(const FrameExtractionOptions &opts, const FeatureWindowFunction &window_function, VectorBase< BaseFloat > *window, BaseFloat *log_energy_pre_window)
This function does all the windowing steps after actually extracting the windowed signal: depending o...
Definition: feature-window.cc:137

kaldi::FrameExtractionOptions::Register
void Register(OptionsItf *opts)
Definition: feature-window.h:69

kaldi::OptionsItf
Definition: options-itf.h:26

kaldi::Preemphasize
void Preemphasize(VectorBase< BaseFloat > *waveform, BaseFloat preemph_coeff)
Definition: feature-window.cc:101

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::FrameExtractionOptions::remove_dc_offset
bool remove_dc_offset
Definition: feature-window.h:41

kaldi::FrameExtractionOptions::blackman_coeff
BaseFloat blackman_coeff
Definition: feature-window.h:48

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::FrameExtractionOptions::allow_downsample
bool allow_downsample
Definition: feature-window.h:50

kaldi-error.h

kaldi::Dither
void Dither(VectorBase< BaseFloat > *waveform, BaseFloat dither_value)
Definition: feature-window.cc:90

kaldi::FeatureWindowFunction::FeatureWindowFunction
FeatureWindowFunction(const FeatureWindowFunction &other)
Definition: feature-window.h:122