23 #ifndef KALDI_FEAT_ONLINE_FEATURE_H_ 24 #define KALDI_FEAT_ONLINE_FEATURE_H_ 68 std::deque<Vector<BaseFloat>*>
items_;
83 virtual int32 Dim()
const {
return computer_.Dim(); }
88 return input_finished_ && frame == NumFramesReady() - 1;
91 return computer_.GetFrameOptions().frame_shift_ms / 1000.0f;
108 virtual void AcceptWaveform(
BaseFloat sampling_rate,
116 virtual void InputFinished();
125 void ComputeFeatures();
127 void MaybeCreateResampler(
BaseFloat sampling_rate);
173 virtual int32 Dim()
const {
return mat_.NumCols(); }
186 return (frame + 1 == mat_.NumRows());
223 normalize_mean(true),
224 normalize_variance(false),
226 ring_buffer_size(20),
230 KALDI_ASSERT(speaker_frames <= cmn_window && global_frames <= speaker_frames
235 po->
Register(
"cmn-window", &cmn_window,
"Number of frames of sliding " 236 "context for cepstral mean normalization.");
237 po->
Register(
"global-frames", &global_frames,
"Number of frames of " 238 "global-average cepstral mean normalization stats to use for " 239 "first utterance of a speaker");
240 po->
Register(
"speaker-frames", &speaker_frames,
"Number of frames of " 241 "previous utterance(s) from this speaker to use in cepstral " 242 "mean normalization");
245 po->
Register(
"norm-vars", &normalize_variance,
"If true, do " 246 "cepstral variance normalization in addition to cepstral mean " 248 po->
Register(
"norm-means", &normalize_mean,
"If true, do mean normalization " 249 "(note: you cannot normalize the variance but not the mean)");
250 po->
Register(
"skip-dims", &skip_dims,
"Dimensions to skip normalization of " 251 "(colon-separated list of integers)");}
284 global_cmvn_stats(global_stats) { }
289 void Write(std::ostream &os,
bool binary)
const;
290 void Read(std::istream &is,
bool binary);
330 return src_->IsLastFrame(frame);
333 return src_->FrameShiftInSeconds();
369 void GetState(
int32 cur_frame,
386 void Freeze(
int32 cur_frame);
402 void GetMostRecentCachedFrame(
int32 frame,
410 inline void InitRingBufferIfNeeded();
415 void ComputeStatsForFrame(
int32 frame,
451 po->
Register(
"left-context", &left_context,
"Left-context for frame " 452 "splicing prior to LDA");
453 po->
Register(
"right-context", &right_context,
"Right-context for frame " 454 "splicing prior to LDA");
464 return src_->Dim() * (1 + left_context_ + right_context_);
468 return src_->IsLastFrame(frame);
471 return src_->FrameShiftInSeconds();
474 virtual int32 NumFramesReady()
const;
483 left_context_(opts.left_context), right_context_(opts.right_context),
498 virtual int32 Dim()
const {
return offset_.Dim(); }
501 return src_->IsLastFrame(frame);
504 return src_->FrameShiftInSeconds();
511 virtual void GetFrames(
const std::vector<int32> &frames,
535 virtual int32 Dim()
const;
538 return src_->IsLastFrame(frame);
541 return src_->FrameShiftInSeconds();
544 virtual int32 NumFramesReady()
const;
569 return src_->IsLastFrame(frame);
572 return src_->FrameShiftInSeconds();
579 virtual void GetFrames(
const std::vector<int32> &frames,
603 virtual int32 Dim()
const {
return src1_->Dim() + src2_->Dim(); }
606 return (src1_->IsLastFrame(frame) || src2_->IsLastFrame(frame));
610 return src1_->FrameShiftInSeconds();
614 return std::min(src1_->NumFramesReady(), src2_->NumFramesReady());
632 #endif // KALDI_FEAT_ONLINE_FEATURE_H_ This class takes a Matrix<BaseFloat> and wraps it as an OnlineFeatureInterface: this can be useful wh...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
void Register(ParseOptions *po)
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
RecyclingVector(int items_to_hold=-1)
By default it does not remove any elements.
OnlineGenericBaseFeature< PlpComputer > OnlinePlp
virtual ~OnlineAppendFeature()
virtual BaseFloat FrameShiftInSeconds() const
virtual int32 NumFramesReady() const
returns the feature dimension.
Matrix< double > speaker_cmvn_stats
OnlineGenericBaseFeature< MfccComputer > OnlineMfcc
OnlineGenericBaseFeature< FbankComputer > OnlineFbank
virtual int32 Dim() const
virtual int32 Dim() const
OnlineCmvnState(const Matrix< double > &global_stats)
Base class which provides matrix operations not involving resizing or allocation. ...
virtual int32 Dim() const
RecyclingVector features_
This class does an online version of the cepstral mean and [optionally] variance, but note that this ...
virtual int32 NumFramesReady() const
returns the feature dimension.
Vector< double > temp_feats_dbl_
DeltaFeaturesOptions opts_
OnlineSpliceFrames(const OnlineSpliceOptions &opts, OnlineFeatureInterface *src)
Matrix< double > frozen_state
OnlineCacheFeature(OnlineFeatureInterface *src)
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
void PushBack(Vector< BaseFloat > *item)
The ownership of the item is passed to this collection - do not delete the item.
Matrix< double > frozen_state_
Vector< BaseFloat > waveform_remainder_
void Register(const std::string &name, bool *ptr, const std::string &doc)
int Size() const
This method returns the size as if no "recycling" had happened, i.e.
FeatureWindowFunction window_function_
OnlineFeatureInterface * src_
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
virtual int32 Dim() const
Matrix< double > temp_stats_
virtual BaseFloat FrameShiftInSeconds() const
Vector< BaseFloat > temp_feats_
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
This online-feature class implements combination of two feature streams (such as pitch, plp) into one stream.
virtual ~OnlineCacheFeature()
DeltaFeatures delta_features_
OnlineFeatureInterface * src_
std::vector< std::pair< int32, Matrix< double > > > cached_stats_ring_
virtual BaseFloat FrameShiftInSeconds() const
virtual int32 NumFramesReady() const
returns the feature dimension.
Struct OnlineCmvnState stores the state of CMVN adaptation between utterances (but not the state of t...
virtual BaseFloat FrameShiftInSeconds() const
BaseFloat sampling_frequency_
This class serves as a storage for feature vectors with an option to limit the memory usage by removi...
std::vector< int32 > skip_dims_
virtual int32 NumFramesReady() const
returns the feature dimension.
OnlineCmvnState orig_state_
virtual BaseFloat FrameShiftInSeconds() const
virtual BaseFloat FrameShiftInSeconds() const
OnlineFeatureInterface * src2_
std::unique_ptr< LinearResample > resampler_
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
OnlineAppendFeature(OnlineFeatureInterface *src1, OnlineFeatureInterface *src2)
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
OnlineFeatureInterface * src_
void Register(ParseOptions *po)
Add a virtual class for "source" features such as MFCC or PLP or pitch features.
A class representing a vector.
std::deque< Vector< BaseFloat > * > items_
#define KALDI_ASSERT(cond)
virtual int32 Dim() const
OnlineMatrixFeature(const MatrixBase< BaseFloat > &mat)
Caution: this class maintains the const reference from the constructor, so don't let it go out of sco...
const MatrixBase< BaseFloat > & mat_
OnlineFeatureInterface * src1_
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
std::vector< Matrix< double > * > cached_stats_modulo_
Matrix< double > global_cmvn_stats
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
std::vector< Vector< BaseFloat > *> cache_
int first_available_index_
virtual int32 NumFramesReady() const
returns the feature dimension.
virtual int32 Dim() const
Provides a vector abstraction class.
This is a templated class for online feature extraction; it's templated on a class like MfccComputer ...
Vector< BaseFloat > * At(int index) const
The ownership is being retained by this collection - do not delete the item.
This feature type can be used to cache its input, to avoid repetition of computation in a multi-pass ...
OnlineFeatureInterface * src_
virtual BaseFloat FrameShiftInSeconds() const