28 items_to_hold_(items_to_hold == 0 ? -1 : items_to_hold),
29 first_available_index_(0) {
33 for (
auto *item :
items_) {
40 KALDI_ERR <<
"Attempted to retrieve feature vector that was " 41 "already removed by the RecyclingVector (index = " 44 <<
"size = " <<
Size() <<
")";
71 const typename C::Options &opts):
72 computer_(opts), window_function_(computer_.GetFrameOptions()),
73 features_(opts.frame_opts.max_feature_vectors),
74 input_finished_(false), waveform_offset_(0) {
79 KALDI_ASSERT(static_cast<uint32>(opts.frame_opts.max_feature_vectors) > 200);
91 }
else if (((sampling_rate > expected_sampling_rate) &&
92 computer_.GetFrameOptions().allow_downsample) ||
93 ((sampling_rate < expected_sampling_rate) &&
94 computer_.GetFrameOptions().allow_upsample)) {
96 sampling_rate, expected_sampling_rate,
97 std::min(sampling_rate / 2, expected_sampling_rate / 2), 6));
98 }
else if (sampling_rate != expected_sampling_rate) {
99 KALDI_ERR <<
"Sampling frequency mismatch, expected " 100 << expected_sampling_rate <<
", got " << sampling_rate
101 <<
"\nPerhaps you want to use the options " 102 "--allow_{upsample,downsample}";
113 resampler_->Resample(appended_wave,
true, &resampled_wave);
115 if (resampled_wave.
Dim() != 0) {
117 resampled_wave.
Dim());
122 .CopyFromVec(resampled_wave);
133 if (original_waveform.
Dim() == 0)
136 KALDI_ERR <<
"AcceptWaveform called after InputFinished() was called.";
145 waveform = &original_waveform;
147 resampler_->Resample(original_waveform,
false, &resampled_wave);
148 waveform = &resampled_wave;
156 .CopyFromVec(*waveform);
166 num_frames_new =
NumFrames(num_samples_total, frame_opts,
171 bool need_raw_log_energy =
computer_.NeedRawLogEnergy();
172 for (
int32 frame = num_frames_old; frame < num_frames_new; frame++) {
176 need_raw_log_energy ? &raw_log_energy : NULL);
181 computer_.Compute(raw_log_energy, vtln_warp, &window, this_feature);
189 if (samples_to_discard > 0) {
192 if (new_num_samples <= 0) {
200 waveform_offset_ += samples_to_discard;
212 speaker_cmvn_stats(other.speaker_cmvn_stats),
213 global_cmvn_stats(other.global_cmvn_stats),
214 frozen_state(other.frozen_state) { }
241 opts_(opts), temp_stats_(2, src->Dim() + 1),
242 temp_feats_(src->Dim()), temp_feats_dbl_(src->Dim()),
246 KALDI_ERR <<
"Bad --skip-dims option (should be colon-separated list of " 256 KALDI_ERR <<
"Bad --skip-dims option (should be colon-separated list of " 317 KALDI_WARN <<
"Did not expect to reach this part of code.";
346 while (cur_frame < frame) {
350 stats_out->
Row(0).Range(0, dim).AddVec(1.0, feats_dbl);
352 stats_out->
Row(1).Range(0, dim).AddVec2(1.0, feats_dbl);
353 (*stats_out)(0, dim) += 1.0;
357 if (prev_frame >= 0) {
361 stats_out->
Row(0).Range(0, dim).AddVec(-1.0, feats_dbl);
363 stats_out->
Row(1).Range(0, dim).AddVec2(-1.0, feats_dbl);
364 (*stats_out)(0, dim) -= 1.0;
387 double cur_count = (*stats)(0, dim);
393 if (speaker_stats.
NumRows() != 0) {
394 double count_from_speaker = opts.
cmn_window - cur_count,
395 speaker_count = speaker_stats(0, dim);
398 if (count_from_speaker > speaker_count)
399 count_from_speaker = speaker_count;
400 if (count_from_speaker > 0.0)
401 stats->
AddMat(count_from_speaker / speaker_count,
403 cur_count = (*stats)(0, dim);
407 if (global_stats.
NumRows() != 0) {
408 double count_from_global = opts.
cmn_window - cur_count,
409 global_count = global_stats(0, dim);
413 if (count_from_global > 0.0)
414 stats->
AddMat(count_from_global / global_count,
417 KALDI_ERR <<
"Global CMN stats are required";
476 for (
int32 t = 0; t <= cur_frame; t++) {
491 "You cannot call SetState() after processing data.");
501 return std::max<int32>(0, num_frames - right_context_);
505 KALDI_ASSERT(left_context_ >= 0 && right_context_ >= 0);
508 KALDI_ASSERT(feat->
Dim() == dim_in * (1 + left_context_ + right_context_));
510 for (
int32 t2 = frame - left_context_; t2 <= frame + right_context_; t2++) {
511 int32 t2_limited = t2;
512 if (t2_limited < 0) t2_limited = 0;
513 if (t2_limited >= T) t2_limited = T - 1;
514 int32 n = t2 - (frame - left_context_);
525 if (transform.
NumCols() == src_dim) {
528 }
else if (transform.
NumCols() == src_dim + 1) {
531 offset_.CopyColFromMat(transform, src_dim);
533 KALDI_ERR <<
"Dimension mismatch: source features have dimension " 534 << src_dim <<
" and LDA #cols is " << transform.
NumCols();
559 return src_dim * (1 + opts_.order);
564 context = opts_.order * opts_.window;
570 return std::max<int32>(0, num_frames - context);
579 int32 context = opts_.order * opts_.window;
580 int32 left_frame = frame - context,
581 right_frame = frame + context,
583 if (left_frame < 0) left_frame = 0;
584 if (right_frame >= src_frames_ready)
585 right_frame = src_frames_ready - 1;
587 int32 temp_num_frames = right_frame + 1 - left_frame,
590 for (
int32 t = left_frame; t <= right_frame; t++) {
594 int32 temp_t = frame - left_frame;
596 delta_features_.Process(temp_src, temp_t, feat);
602 src_(src), opts_(opts), delta_features_(opts) { }
606 if (static_cast<size_t>(frame) < cache_.size() && cache_[frame] != NULL) {
609 if (static_cast<size_t>(frame) >= cache_.size())
610 cache_.resize(frame + 1, NULL);
621 int32 num_frames = frames.size();
624 std::vector<int32> non_cached_frames;
627 std::vector<int32> non_cached_indexes;
628 non_cached_frames.reserve(frames.size());
629 non_cached_indexes.reserve(frames.size());
630 for (
int32 i = 0;
i < num_frames;
i++) {
632 if (static_cast<size_t>(t) < cache_.size() && cache_[t] != NULL) {
633 feats->
Row(
i).CopyFromVec(*(cache_[t]));
635 non_cached_frames.push_back(t);
636 non_cached_indexes.push_back(
i);
639 if (non_cached_frames.empty())
641 int32 num_non_cached_frames = non_cached_frames.size(),
646 for (
int32 i = 0;
i < num_non_cached_frames;
i++) {
647 int32 t = non_cached_frames[
i];
648 if (static_cast<size_t>(t) < cache_.size() && cache_[t] != NULL) {
650 feats->
Row(non_cached_indexes[
i]).CopyFromVec(*(cache_[t]));
653 feats->
Row(non_cached_indexes[
i]).CopyFromVec(this_feat);
654 if (static_cast<size_t>(t) >= cache_.size())
655 cache_.resize(t + 1, NULL);
663 for (
size_t i = 0;
i < cache_.size();
i++)
674 src1_->GetFrame(frame, &feat1);
675 src2_->GetFrame(frame, &feat2);
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void MaybeCreateResampler(BaseFloat sampling_rate)
void Write(std::ostream &out, bool binary) const
write to stream.
RecyclingVector(int items_to_hold=-1)
By default it does not remove any elements.
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
Matrix< double > speaker_cmvn_stats
void Write(std::ostream &os, bool binary) const
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Base class which provides matrix operations not involving resizing or allocation. ...
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
virtual int32 Dim() const
RecyclingVector features_
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
Vector< double > temp_feats_dbl_
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)=0
Gets the feature vector for this frame.
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
Matrix< double > frozen_state
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
OnlineCmvn(const OnlineCmvnOptions &opts, const OnlineCmvnState &cmvn_state, OnlineFeatureInterface *src)
Initializer that sets the cmvn state.
void PushBack(Vector< BaseFloat > *item)
The ownership of the item is passed to this collection - do not delete the item.
Matrix< double > frozen_state_
Vector< BaseFloat > waveform_remainder_
void CacheFrame(int32 frame, const MatrixBase< double > &stats)
Cache this frame of stats.
int Size() const
This method returns the size as if no "recycling" had happened, i.e.
void ExtractWindow(int64 sample_offset, const VectorBase< BaseFloat > &wave, int32 f, const FrameExtractionOptions &opts, const FeatureWindowFunction &window_function, Vector< BaseFloat > *window, BaseFloat *log_energy_pre_window)
FeatureWindowFunction window_function_
void GetState(int32 cur_frame, OnlineCmvnState *cmvn_state)
OnlineFeatureInterface * src_
virtual void AcceptWaveform(BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)
This would be called from the application, when you get more wave data.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
Matrix< double > temp_stats_
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void InitRingBufferIfNeeded()
Initialize ring buffer for caching stats.
Vector< BaseFloat > temp_feats_
void Freeze(int32 cur_frame)
void Read(std::istream &is, bool binary)
OnlineGenericBaseFeature(const typename C::Options &opts)
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
std::vector< std::pair< int32, Matrix< double > > > cached_stats_ring_
virtual void GetFrames(const std::vector< int32 > &frames, MatrixBase< BaseFloat > *feats)
This is like GetFrame() but for a collection of frames.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
void SetState(const OnlineCmvnState &cmvn_state)
int64 FirstSampleOfFrame(int32 frame, const FrameExtractionOptions &opts)
Struct OnlineCmvnState stores the state of CMVN adaptation between utterances (but not the state of t...
virtual bool IsLastFrame(int32 frame) const =0
Returns true if this is the last frame.
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
int32 NumFrames(int64 num_samples, const FrameExtractionOptions &opts, bool flush)
This function returns the number of frames that we can extract from a wave file with the given number...
virtual int32 NumFramesReady() const
returns the feature dimension.
std::vector< int32 > skip_dims_
Real * Data()
Returns a pointer to the start of the vector's data.
virtual void InputFinished()
InputFinished() tells the class you won't be providing any more waveform.
OnlineCmvnState orig_state_
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void SetZero()
Sets matrix to zero.
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
std::unique_ptr< LinearResample > resampler_
virtual void GetFrames(const std::vector< int32 > &frames, MatrixBase< BaseFloat > *feats)
This is like GetFrame() but for a collection of frames.
SubMatrix< Real > RowRange(const MatrixIndexT row_offset, const MatrixIndexT num_rows) const
A class representing a vector.
std::deque< Vector< BaseFloat > * > items_
LinearResample is a special case of ArbitraryResample, where we want to resample a signal at linearly...
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void ComputeStatsForFrame(int32 frame, MatrixBase< double > *stats)
Computes the raw CMVN stats for this frame, making use of (and updating if necessary) the cached stat...
static void SmoothOnlineCmvnStats(const MatrixBase< double > &speaker_stats, const MatrixBase< double > &global_stats, const OnlineCmvnOptions &opts, MatrixBase< double > *stats)
Smooth the CMVN stats "stats" (which are stored in the normal format as a 2 x (dim+1) matrix)...
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
OnlineDeltaFeature(const DeltaFeaturesOptions &opts, OnlineFeatureInterface *src)
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
std::vector< Matrix< double > * > cached_stats_modulo_
Matrix< double > global_cmvn_stats
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
void GetMostRecentCachedFrame(int32 frame, int32 *cached_frame, MatrixBase< double > *stats)
Get the most recent cached frame of CMVN stats.
int first_available_index_
virtual int32 NumFramesReady() const
returns the feature dimension.
virtual int32 NumFramesReady() const
returns the feature dimension.
Provides a vector abstraction class.
This is a templated class for online feature extraction; it's templated on a class like MfccComputer ...
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
Vector< BaseFloat > * At(int index) const
The ownership is being retained by this collection - do not delete the item.
virtual int32 Dim() const
void ApplyCmvn(const MatrixBase< double > &stats, bool var_norm, MatrixBase< BaseFloat > *feats)
Apply cepstral mean and variance normalization to a matrix of features.
Sub-matrix representation.
virtual int32 NumFramesReady() const =0
returns the feature dimension.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
virtual int32 Dim() const =0
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
void FakeStatsForSomeDims(const std::vector< int32 > &dims, MatrixBase< double > *stats)
Modify the stats so that for some dimensions (specified in "dims"), we replace them with "fake" stats...
OnlineFeatureInterface * src_
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).