doc/online-ivector-feature_8h_source.html

 // online2/online-ivector-feature.h

 // Copyright 2013-2014   Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #ifndef KALDI_ONLINE2_ONLINE_IVECTOR_FEATURE_H_
 #define KALDI_ONLINE2_ONLINE_IVECTOR_FEATURE_H_

 #include <string>
 #include <vector>
 #include <deque>

 #include "matrix/matrix-lib.h"
 #include "util/common-utils.h"
 #include "base/kaldi-error.h"
 #include "itf/online-feature-itf.h"
 #include "gmm/diag-gmm.h"
 #include "feat/online-feature.h"
 #include "ivector/ivector-extractor.h"
 #include "decoder/lattice-faster-online-decoder.h"
 #include "decoder/lattice-incremental-online-decoder.h"

 namespace kaldi {


 struct OnlineIvectorExtractionConfig {
   std::string lda_mat_rxfilename;  // to read the LDA+MLLT matrix
   std::string global_cmvn_stats_rxfilename; // to read matrix of global CMVN
                                             // stats
   std::string splice_config_rxfilename;  // to read OnlineSpliceOptions
   std::string cmvn_config_rxfilename;  // to read in OnlineCmvnOptions
   bool online_cmvn_iextractor; // flag activating online-cmvn in iextractor
                                // feature pipeline
   std::string diag_ubm_rxfilename;  // reads type DiagGmm.
   std::string ivector_extractor_rxfilename;  // reads type IvectorExtractor

   // the following four configuration values should in principle match those
   // given to the script extract_ivectors_online.sh, although none of them are
   // super-critical.
   int32 ivector_period;  // How frequently we re-estimate iVectors.
   int32 num_gselect;  // maximum number of posteriors to use per frame for
                       // iVector extractor.
   BaseFloat min_post;  // pruning threshold for posteriors for the iVector
                        // extractor.
   BaseFloat posterior_scale;  // Scale on posteriors used for iVector
                               // extraction; can be interpreted as the inverse
                               // of a scale on the log-prior.
   BaseFloat max_count;  // Maximum stats count we allow before we start scaling
                         // down stats (if nonzero).. this prevents us getting
                         // atypical-looking iVectors for very long utterances.
                         // Interpret this as a number of frames times
                         // posterior_scale, typically 1/10 of a frame count.

   int32 num_cg_iters;  // set to 15.  I don't believe this is very important, so it's
                        // not configurable from the command line for now.


   // If use_most_recent_ivector is true, we always return the most recent
   // available iVector rather than the one for the current frame.  This means
   // that if audio is coming in faster than we can process it, we will return a
   // more accurate iVector.
   bool use_most_recent_ivector;

   // If true, always read ahead to NumFramesReady() when getting iVector stats.
   bool greedy_ivector_extractor;

   // max_remembered_frames is the largest number of frames it will remember
   // between utterances of the same speaker; this affects the output of
   // GetAdaptationState(), and has the effect of limiting the number of frames
   // of both the CMVN stats and the iVector stats.  Setting this to a smaller
   // value means the adaptation is less constrained by previous utterances
   // (assuming you provided info from a previous utterance of the same speaker
   // by calling SetAdaptationState()).
   BaseFloat max_remembered_frames;

   OnlineIvectorExtractionConfig(): online_cmvn_iextractor(false),
                                    ivector_period(10), num_gselect(5),
                                    min_post(0.025), posterior_scale(0.1),
                                    max_count(0.0), num_cg_iters(15),
                                    use_most_recent_ivector(true),
                                    greedy_ivector_extractor(false),
                                    max_remembered_frames(1000) { }

   void Register(OptionsItf *opts) {
     opts->Register("lda-matrix", &lda_mat_rxfilename, "Filename of LDA matrix, "
                    "e.g. final.mat; used for iVector extraction. ");
     opts->Register("global-cmvn-stats", &global_cmvn_stats_rxfilename,
                    "(Extended) filename for global CMVN stats, used in iVector "
                    "extraction, obtained for example from "
                    "'matrix-sum scp:data/train/cmvn.scp -', only used for "
                    "iVector extraction");
     opts->Register("cmvn-config", &cmvn_config_rxfilename, "Configuration "
                    "file for online CMVN features (e.g. conf/online_cmvn.conf),"
                    "only used for iVector extraction.  Contains options "
                    "as for the program 'apply-cmvn-online'");
     opts->Register("online-cmvn-iextractor", &online_cmvn_iextractor,
                    "add online-cmvn to feature pipeline of ivector extractor, "
                    "use the cmvn setup from the UBM.  Note: the default of "
                    "false is what we historically used; we'd use true if "
                    "we were using CMVN'ed features for the neural net.");
     opts->Register("splice-config", &splice_config_rxfilename, "Configuration file "
                    "for frame splicing (--left-context and --right-context "
                    "options); used for iVector extraction.");
     opts->Register("diag-ubm", &diag_ubm_rxfilename, "Filename of diagonal UBM "
                    "used to obtain posteriors for iVector extraction, e.g. "
                    "final.dubm");
     opts->Register("ivector-extractor", &ivector_extractor_rxfilename,
                    "Filename of iVector extractor, e.g. final.ie");
     opts->Register("ivector-period", &ivector_period, "Frequency with which "
                    "we extract iVectors for neural network adaptation");
     opts->Register("num-gselect", &num_gselect, "Number of Gaussians to select "
                    "for iVector extraction");
     opts->Register("min-post", &min_post, "Threshold for posterior pruning in "
                    "iVector extraction");
     opts->Register("posterior-scale", &posterior_scale, "Scale for posteriors in "
                    "iVector extraction (may be viewed as inverse of prior scale)");
     opts->Register("max-count", &max_count, "Maximum data count we allow before "
                    "we start scaling the stats down (if nonzero)... helps to make "
                    "iVectors from long utterances look more typical.  Interpret "
                    "as a frame-count times --posterior-scale, typically 1/10 of "
                    "a number of frames.  Suggest 100.");
     opts->Register("use-most-recent-ivector", &use_most_recent_ivector, "If true, "
                    "always use most recent available iVector, rather than the "
                    "one for the designated frame.");
     opts->Register("greedy-ivector-extractor", &greedy_ivector_extractor, "If "
                    "true, 'read ahead' as many frames as we currently have available "
                    "when extracting the iVector.  May improve iVector quality.");
     opts->Register("max-remembered-frames", &max_remembered_frames, "The maximum "
                    "number of frames of adaptation history that we carry through "
                    "to later utterances of the same speaker (having a finite "
                    "number allows the speaker adaptation state to change over "
                    "time).  Interpret as a real frame count, i.e. not a count "
                    "scaled by --posterior-scale.");
   }
 };

 struct OnlineIvectorExtractionInfo {

   Matrix<BaseFloat> lda_mat;  // LDA+MLLT matrix.
   Matrix<double> global_cmvn_stats;  // Global CMVN stats.

   OnlineCmvnOptions cmvn_opts;  // Options for online CMN/CMVN computation.
   bool online_cmvn_iextractor;  // flag activating online CMN/CMVN for iextractor input.
   OnlineSpliceOptions splice_opts;  // Options for frame splicing
                                     // (--left-context,--right-context)

   DiagGmm diag_ubm;
   IvectorExtractor extractor;

   // the following configuration variables are copied from
   // OnlineIvectorExtractionConfig, see comments there.
   int32 ivector_period;
   int32 num_gselect;
   BaseFloat min_post;
   BaseFloat posterior_scale;
   BaseFloat max_count;
   int32 num_cg_iters;
   bool use_most_recent_ivector;
   bool greedy_ivector_extractor;
   BaseFloat max_remembered_frames;

   OnlineIvectorExtractionInfo(const OnlineIvectorExtractionConfig &config);

   void Init(const OnlineIvectorExtractionConfig &config);

   int32 ExpectedFeatureDim() const;

   // This constructor creates a version of this object where everything
   // is empty or zero.
   OnlineIvectorExtractionInfo();

   void Check() const;
  private:
   KALDI_DISALLOW_COPY_AND_ASSIGN(OnlineIvectorExtractionInfo);
 };

 struct OnlineIvectorExtractorAdaptationState {
   // CMVN state for the features used to get posteriors for iVector extraction;
   // online CMVN is not used for the features supplied to the neural net,
   // instead the iVector is used.

   // Adaptation state for online CMVN (used for getting posteriors for iVector)
   OnlineCmvnState cmvn_state;

   OnlineIvectorEstimationStats ivector_stats;

   OnlineIvectorExtractorAdaptationState(const OnlineIvectorExtractionInfo &info):
       cmvn_state(info.global_cmvn_stats),
       ivector_stats(info.extractor.IvectorDim(),
                     info.extractor.PriorOffset(),
                     info.max_count) { }

   OnlineIvectorExtractorAdaptationState(
       const OnlineIvectorExtractorAdaptationState &other);

   void LimitFrames(BaseFloat max_remembered_frames,
                    BaseFloat posterior_scale);

   void Write(std::ostream &os, bool binary) const;
   void Read(std::istream &is, bool binary);
 };


 class OnlineIvectorFeature: public OnlineFeatureInterface {
  public:
   explicit OnlineIvectorFeature(const OnlineIvectorExtractionInfo &info,
                                 OnlineFeatureInterface *base_feature);

   // This version of the constructor accepts per-frame weights (relates to
   // downweighting silence).  This is intended for use in offline operation,
   // i.e. during training.  [will implement this when needed.]
   //explicit OnlineIvectorFeature(const OnlineIvectorExtractionInfo &info,
   //     std::vector<BaseFloat> frame_weights,
   //OnlineFeatureInterface *base_feature);


   // Member functions from OnlineFeatureInterface:

   virtual int32 Dim() const;
   virtual bool IsLastFrame(int32 frame) const;
   virtual int32 NumFramesReady() const;
   virtual BaseFloat FrameShiftInSeconds() const;
   virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);

   void SetAdaptationState(
       const OnlineIvectorExtractorAdaptationState &adaptation_state);


   void GetAdaptationState(
       OnlineIvectorExtractorAdaptationState *adaptation_state) const;

   virtual ~OnlineIvectorFeature();

   // Some diagnostics (not present in generic interface):
   // UBM log-like per frame:
   BaseFloat UbmLogLikePerFrame() const;
   // Objective improvement per frame from iVector estimation, versus default iVector
   // value, measured at utterance end.
   BaseFloat ObjfImprPerFrame() const;

   // returns number of frames seen (but not counting the posterior-scale).
   BaseFloat NumFrames() const {
     return ivector_stats_.NumFrames() / info_.posterior_scale;
   }


   // If you are downweighting silence, you can call
   // OnlineSilenceWeighting::GetDeltaWeights and supply the output to this class
   // using UpdateFrameWeights().  The reason why this call happens outside this
   // class, rather than this class pulling in the data weights, relates to
   // multi-threaded operation and also from not wanting this class to have
   // excessive dependencies.
   //
   // You must either always call this as soon as new data becomes available
   // (ideally just after calling AcceptWaveform), or never call it for the
   // lifetime of this object.
   void UpdateFrameWeights(
       const std::vector<std::pair<int32, BaseFloat> > &delta_weights);

  private:

   // This accumulates i-vector stats for a set of frames, specified as pairs
   // (t, weight).  The weights do not have to be positive.  (In the online
   // silence-weighting that we do, negative weights can occur if we change our
   // minds about the assignment of a frame as silence vs. non-silence).
   void UpdateStatsForFrames(
       const std::vector<std::pair<int32, BaseFloat> > &frame_weights);

   // Returns a modified version of info_.min_post, which is opts_.min_post if
   // weight is 1.0 or -1.0, but gets larger if fabs(weight) is small... but no
   // larger than 0.99.  (This is an efficiency thing, to not bother processing
   // very small counts).
   BaseFloat GetMinPost(BaseFloat weight) const;

   // This is the original UpdateStatsUntilFrame that is called when there is
   // no data-weighting involved.
   void UpdateStatsUntilFrame(int32 frame);

   // This is the new UpdateStatsUntilFrame that is called when there is
   // data-weighting (i.e. when the user has been calling UpdateFrameWeights()).
   void UpdateStatsUntilFrameWeighted(int32 frame);

   void PrintDiagnostics() const;

   const OnlineIvectorExtractionInfo &info_;

   OnlineFeatureInterface *base_;  // The feature this is built on top of
                                   // (e.g. MFCC); not owned here

   OnlineFeatureInterface *lda_;  // LDA on top of raw+splice features.
   OnlineCmvn *cmvn_;  // the CMVN that we give to the lda_normalized_.
   OnlineFeatureInterface *lda_normalized_;  // LDA on top of CMVN+splice

   // the following is the pointers to OnlineFeatureInterface objects that are
   // owned here and which we need to delete.
   std::vector<OnlineFeatureInterface*> to_delete_;

   OnlineIvectorEstimationStats ivector_stats_;

   int32 num_frames_stats_;

   std::priority_queue<std::pair<int32, BaseFloat>,
                       std::vector<std::pair<int32, BaseFloat> >,
                       std::greater<std::pair<int32, BaseFloat> > > delta_weights_;

   std::vector<BaseFloat> current_frame_weight_debug_;

   bool delta_weights_provided_;
   bool updated_with_no_delta_weights_;

   int32 most_recent_frame_with_weight_;

   double tot_ubm_loglike_;

   Vector<double> current_ivector_;

   std::vector<Vector<BaseFloat>* > ivectors_history_;

 };


 struct OnlineSilenceWeightingConfig {
   std::string silence_phones_str;
   // The weighting factor that we apply to silence phones in the iVector
   // extraction.  This option is only relevant if the --silence-phones option is
   // set.
   BaseFloat silence_weight;

   // Transition-ids that get repeated at least this many times (if
   // max_state_duration > 0) are treated as silence.
   BaseFloat max_state_duration;

   // This is the scale that we apply to data that we don't yet have a decoder
   // traceback for, in the online silence
   BaseFloat new_data_weight;

   bool Active() const {
     return !silence_phones_str.empty() && silence_weight != 1.0;
   }

   OnlineSilenceWeightingConfig():
       silence_weight(1.0), max_state_duration(-1) { }

   void Register(OptionsItf *opts) {
     opts->Register("silence-phones", &silence_phones_str, "(RE weighting in "
                    "iVector estimation for online decoding) List of integer ids of "
                    "silence phones, separated by colons (or commas).  Data that "
                    "(according to the traceback of the decoder) corresponds to "
                    "these phones will be downweighted by --silence-weight.");
     opts->Register("silence-weight", &silence_weight, "(RE weighting in "
                    "iVector estimation for online decoding) Weighting factor for frames "
                    "that the decoder trace-back identifies as silence; only "
                    "relevant if the --silence-phones option is set.");
     opts->Register("max-state-duration", &max_state_duration, "(RE weighting in "
                    "iVector estimation for online decoding) Maximum allowed "
                    "duration of a single transition-id; runs with durations longer "
                    "than this will be weighted down to the silence-weight.");
   }
   // e.g. prefix = "ivector-silence-weighting"
   void RegisterWithPrefix(std::string prefix, OptionsItf *opts) {
     ParseOptions po_prefix(prefix, opts);
     this->Register(&po_prefix);
   }
 };

 // This class is responsible for keeping track of the best-path traceback from
 // the decoder (efficiently) and computing a weighting of the data based on the
 // classification of frames as silence (or not silence)... also with a duration
 // limitation, so data from a very long run of the same transition-id will get
 // weighted down.  (this is often associated with misrecognition or silence).
 class OnlineSilenceWeighting {
  public:
   // Note: you would initialize a new copy of this object for each new
   // utterance.
   // The frame-subsampling-factor is used for newer nnet3 models, especially
   // chain models, when the frame-rate of the decoder is different from the
   // frame-rate of the input features.  E.g. you might set it to 3 for such
   // models.

   OnlineSilenceWeighting(const TransitionModel &trans_model,
                          const OnlineSilenceWeightingConfig &config,
                          int32 frame_subsampling_factor = 1);

   bool Active() const { return config_.Active(); }

   // This should be called before GetDeltaWeights, so this class knows about the
   // traceback info from the decoder.  It records the traceback information from
   // the decoder using its BestPathEnd() and related functions.
   // It will be instantiated for FST == fst::Fst<fst::StdArc> and fst::GrammarFst.
   template <typename FST>
   void ComputeCurrentTraceback(const LatticeFasterOnlineDecoderTpl<FST> &decoder);
   template <typename FST>
   void ComputeCurrentTraceback(const LatticeIncrementalOnlineDecoderTpl<FST> &decoder);

   // Calling this function gets the changes in weight that require us to modify
   // the stats... the output format is (frame-index, delta-weight).
   //
   // The num_frames_ready argument is the number of frames available at
   // the input (or equivalently, output) of the online iVector feature in the
   // feature pipeline from the stream start. It may be more than the currently
   // available decoder traceback.
   //
   // The first_decoder_frame is the offset from the start of the stream in
   // pipeline frames when decoder was restarted last time. We do not change
   // weight for the frames earlier than first_decoder_frame. Set it to 0 in
   // case of compilation error to reproduce the previous behavior or for a
   // single utterance decoding.
   //
   // How many frames of weights it outputs depends on how much "num_frames_ready"
   // increased since last time we called this function, and whether the decoder
   // traceback changed.  Negative delta_weights might occur if frames previously
   // classified as non-silence become classified as silence if the decoder's
   // traceback changes.  You must call this function with "num_frames_ready"
   // arguments that only increase, not decrease, with time.  You would provide
   // this output to class OnlineIvectorFeature by calling its function
   // UpdateFrameWeights with the output.
   //
   // Returned frame-index is in pipeline frames from the pipeline start.
   void GetDeltaWeights(
       int32 num_frames_ready, int32 first_decoder_frame,
       std::vector<std::pair<int32, BaseFloat> > *delta_weights);

   // A method for backward compatibility, same as above, but for a single
   // utterance.
   void GetDeltaWeights(
       int32 num_frames_ready,
       std::vector<std::pair<int32, BaseFloat> > *delta_weights) {
     GetDeltaWeights(num_frames_ready, 0, delta_weights);
   }

  private:
   const TransitionModel &trans_model_;
   const OnlineSilenceWeightingConfig &config_;

   int32 frame_subsampling_factor_;

   unordered_set<int32> silence_phones_;

   struct FrameInfo {
     // The only reason we need the token pointer is to know far back we have to
     // trace before the traceback is the same as what we previously traced back.
     void *token;
     int32 transition_id;
     // current_weight is the weight we've previously told the iVector
     // extractor to use for this frame, if any.  It may not equal the
     // weight we "want" it to use (any difference between the two will
     // be output when the user calls GetDeltaWeights().
     BaseFloat current_weight;
     FrameInfo(): token(NULL), transition_id(-1), current_weight(0.0) {}
   };

   // This contains information about any previously computed traceback;
   // when the traceback changes we use this variable to compare it with the
   // previous traceback.
   // It's indexed at the frame-rate of the decoder (may be different
   // by 'frame_subsampling_factor_' from the frame-rate of the features.
   std::vector<FrameInfo> frame_info_;

   // This records how many frames have been output and that currently reflect
   // the traceback accurately.  It is used to avoid GetDeltaWeights() having to
   // visit each frame as far back as t = 0, each time it is called.
   // GetDeltaWeights() sets this to the number of frames that it output, and
   // ComputeCurrentTraceback() then reduces it to however far it traced back.
   // However, we may have to go further back in time than this in order to
   // properly honor the "max-state-duration" config.  This, if needed, is done
   // in GetDeltaWeights() before outputting the delta weights.
   int32 num_frames_output_and_correct_;
 };


 }  // namespace kaldi

 #endif  // KALDI_ONLINE2_ONLINE_IVECTOR_FEATURE_H_
kaldi::OnlineIvectorExtractionConfig::max_remembered_frames
BaseFloat max_remembered_frames
Definition: online-ivector-feature.h:103

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::OnlineIvectorFeature::NumFrames
BaseFloat NumFrames() const
Definition: online-ivector-feature.h:307

kaldi::OnlineSilenceWeightingConfig::Active
bool Active() const
Definition: online-ivector-feature.h:431

kaldi::OnlineIvectorExtractionConfig::cmvn_config_rxfilename
std::string cmvn_config_rxfilename
Definition: online-ivector-feature.h:60

kaldi::OnlineIvectorExtractionInfo::num_cg_iters
int32 num_cg_iters
Definition: online-ivector-feature.h:188

kaldi::OnlineIvectorExtractionConfig::min_post
BaseFloat min_post
Definition: online-ivector-feature.h:72

kaldi::OnlineIvectorExtractionInfo::min_post
BaseFloat min_post
Definition: online-ivector-feature.h:185

diag-gmm.h

matrix-lib.h

kaldi::OnlineIvectorExtractionInfo
This struct contains various things that are needed (as const references) by class OnlineIvectorExtra...
Definition: online-ivector-feature.h:168

kaldi::OnlineIvectorFeature::current_ivector_
Vector< double > current_ivector_
Most recently estimated iVector, will have been estimated at the greatest time t where t <= num_frame...
Definition: online-ivector-feature.h:404

kaldi::OnlineIvectorExtractionInfo::cmvn_opts
OnlineCmvnOptions cmvn_opts
Definition: online-ivector-feature.h:173

kaldi::OnlineSilenceWeighting
Definition: online-ivector-feature.h:465

kaldi::OnlineIvectorExtractorAdaptationState::cmvn_state
OnlineCmvnState cmvn_state
Definition: online-ivector-feature.h:217

kaldi::OnlineIvectorExtractionInfo::extractor
IvectorExtractor extractor
Definition: online-ivector-feature.h:179

kaldi::LatticeIncrementalOnlineDecoderTpl
LatticeIncrementalOnlineDecoderTpl is as LatticeIncrementalDecoderTpl but also supports an efficient ...
Definition: lattice-incremental-online-decoder.h:51

kaldi::OnlineIvectorExtractorAdaptationState::OnlineIvectorExtractorAdaptationState
OnlineIvectorExtractorAdaptationState(const OnlineIvectorExtractionInfo &info)
This constructor initializes adaptation-state with no prior speaker history.
Definition: online-ivector-feature.h:223

kaldi::OnlineCmvn
This class does an online version of the cepstral mean and [optionally] variance, but note that this ...
Definition: online-feature.h:321

kaldi::OnlineSilenceWeighting::GetDeltaWeights
void GetDeltaWeights(int32 num_frames_ready, std::vector< std::pair< int32, BaseFloat > > *delta_weights)
Definition: online-ivector-feature.h:519

kaldi::OnlineIvectorFeature::cmvn_
OnlineCmvn * cmvn_
Definition: online-ivector-feature.h:356

kaldi::OnlineIvectorExtractorAdaptationState
This class stores the adaptation state from the online iVector extractor, which can help you to initi...
Definition: online-ivector-feature.h:211

kaldi::OnlineSilenceWeightingConfig
Definition: online-ivector-feature.h:416

online-feature.h

kaldi::OnlineIvectorExtractionInfo::ivector_period
int32 ivector_period
Definition: online-ivector-feature.h:183

kaldi::OnlineIvectorFeature::base_
OnlineFeatureInterface * base_
Definition: online-ivector-feature.h:352

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

common-utils.h

kaldi::OnlineIvectorEstimationStats
This class helps us to efficiently estimate iVectors in situations where the data is coming in frame ...
Definition: ivector-extractor.h:314

kaldi::Matrix< BaseFloat >

KALDI_DISALLOW_COPY_AND_ASSIGN
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
Definition: kaldi-utils.h:121

kaldi::OnlineSilenceWeighting::FrameInfo::token
void * token
Definition: online-ivector-feature.h:536

kaldi::IvectorExtractor
Definition: ivector-extractor.h:136

kaldi::OnlineIvectorExtractionConfig::global_cmvn_stats_rxfilename
std::string global_cmvn_stats_rxfilename
Definition: online-ivector-feature.h:57

kaldi::OnlineIvectorExtractionInfo::lda_mat
Matrix< BaseFloat > lda_mat
Definition: online-ivector-feature.h:170

kaldi::OnlineIvectorFeature::num_frames_stats_
int32 num_frames_stats_
num_frames_stats_ is the number of frames of data we have already accumulated from this utterance and...
Definition: online-ivector-feature.h:372

kaldi::OnlineIvectorExtractionConfig::max_count
BaseFloat max_count
Definition: online-ivector-feature.h:77

kaldi::OnlineSilenceWeightingConfig::new_data_weight
BaseFloat new_data_weight
Definition: online-ivector-feature.h:429

kaldi::OptionsItf::Register
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0

kaldi::OnlineIvectorExtractionInfo::diag_ubm
DiagGmm diag_ubm
Definition: online-ivector-feature.h:178

kaldi::TransitionModel
Definition: transition-model.h:123

kaldi::OnlineIvectorExtractionConfig::online_cmvn_iextractor
bool online_cmvn_iextractor
Definition: online-ivector-feature.h:61

kaldi::OnlineIvectorExtractionConfig::num_gselect
int32 num_gselect
Definition: online-ivector-feature.h:70

kaldi::OnlineIvectorFeature::tot_ubm_loglike_
double tot_ubm_loglike_
The following is only needed for diagnostics.
Definition: online-ivector-feature.h:399

kaldi::OnlineCmvnOptions
Definition: online-feature.h:203

kaldi::OnlineIvectorFeature::lda_normalized_
OnlineFeatureInterface * lda_normalized_
Definition: online-ivector-feature.h:357

kaldi::OnlineIvectorExtractionInfo::num_gselect
int32 num_gselect
Definition: online-ivector-feature.h:184

kaldi::OnlineIvectorFeature::ivectors_history_
std::vector< Vector< BaseFloat > *> ivectors_history_
if info_.use_most_recent_ivector == false, we need to store the iVector we estimated each info_...
Definition: online-ivector-feature.h:411

kaldi::OnlineIvectorExtractionInfo::splice_opts
OnlineSpliceOptions splice_opts
Definition: online-ivector-feature.h:175

kaldi::OnlineSilenceWeighting::FrameInfo::current_weight
BaseFloat current_weight
Definition: online-ivector-feature.h:542

kaldi::OnlineSilenceWeightingConfig::RegisterWithPrefix
void RegisterWithPrefix(std::string prefix, OptionsItf *opts)
Definition: online-ivector-feature.h:454

kaldi::ParseOptions
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36

kaldi::OnlineIvectorFeature::ivector_stats_
OnlineIvectorEstimationStats ivector_stats_
the iVector estimation stats
Definition: online-ivector-feature.h:364

kaldi::OnlineIvectorExtractorAdaptationState::ivector_stats
OnlineIvectorEstimationStats ivector_stats
Stats for online iVector estimation.
Definition: online-ivector-feature.h:220

kaldi::OnlineIvectorFeature::updated_with_no_delta_weights_
bool updated_with_no_delta_weights_
The following is also used to detect wrong usage of this class; it&#39;s set to true if UpdateStatsUntilF...
Definition: online-ivector-feature.h:392

kaldi::OnlineIvectorFeature::most_recent_frame_with_weight_
int32 most_recent_frame_with_weight_
if delta_weights_ was ever called, this keeps track of the most recent frame that ever had a weight...
Definition: online-ivector-feature.h:396

float

kaldi::OnlineIvectorExtractionConfig::OnlineIvectorExtractionConfig
OnlineIvectorExtractionConfig()
Definition: online-ivector-feature.h:105

kaldi::OnlineSilenceWeighting::config_
const OnlineSilenceWeightingConfig & config_
Definition: online-ivector-feature.h:527

kaldi::OnlineSilenceWeighting::Active
bool Active() const
Definition: online-ivector-feature.h:478

kaldi::OnlineIvectorExtractionConfig::num_cg_iters
int32 num_cg_iters
Definition: online-ivector-feature.h:83

kaldi::OnlineIvectorExtractionConfig::posterior_scale
BaseFloat posterior_scale
Definition: online-ivector-feature.h:74

kaldi::OnlineCmvnState
Struct OnlineCmvnState stores the state of CMVN adaptation between utterances (but not the state of t...
Definition: online-feature.h:266

kaldi::OnlineIvectorExtractionConfig::diag_ubm_rxfilename
std::string diag_ubm_rxfilename
Definition: online-ivector-feature.h:63

kaldi::OnlineIvectorExtractionConfig::use_most_recent_ivector
bool use_most_recent_ivector
Definition: online-ivector-feature.h:91

kaldi::OnlineIvectorExtractionInfo::max_remembered_frames
BaseFloat max_remembered_frames
Definition: online-ivector-feature.h:191

kaldi::OnlineSilenceWeightingConfig::Register
void Register(OptionsItf *opts)
Definition: online-ivector-feature.h:438

kaldi::OnlineIvectorExtractionConfig::ivector_extractor_rxfilename
std::string ivector_extractor_rxfilename
Definition: online-ivector-feature.h:64

lattice-faster-online-decoder.h

kaldi::OnlineSilenceWeighting::FrameInfo::FrameInfo
FrameInfo()
Definition: online-ivector-feature.h:543

kaldi::OnlineIvectorExtractionConfig::greedy_ivector_extractor
bool greedy_ivector_extractor
Definition: online-ivector-feature.h:94

lattice-incremental-online-decoder.h

kaldi::OnlineIvectorFeature::current_frame_weight_debug_
std::vector< BaseFloat > current_frame_weight_debug_
this is only used for validating that the frame-weighting code is not buggy.
Definition: online-ivector-feature.h:385

kaldi::OnlineIvectorFeature::info_
const OnlineIvectorExtractionInfo & info_
Definition: online-ivector-feature.h:350

kaldi::OptionsItf
Definition: options-itf.h:26

kaldi::LatticeFasterOnlineDecoderTpl
LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also supports an efficient way to get...
Definition: lattice-faster-online-decoder.h:54

kaldi::OnlineIvectorFeature::delta_weights_
std::priority_queue< std::pair< int32, BaseFloat >, std::vector< std::pair< int32, BaseFloat > >, std::greater< std::pair< int32, BaseFloat > > > delta_weights_
delta_weights_ is written to by UpdateFrameWeights, in the case where the iVector estimation is silen...
Definition: online-ivector-feature.h:382

ivector-extractor.h

kaldi::OnlineSilenceWeightingConfig::silence_phones_str
std::string silence_phones_str
Definition: online-ivector-feature.h:417

kaldi::OnlineIvectorExtractionConfig::lda_mat_rxfilename
std::string lda_mat_rxfilename
Definition: online-ivector-feature.h:56

kaldi::OnlineIvectorFeature::delta_weights_provided_
bool delta_weights_provided_
delta_weights_provided_ is set to true if UpdateFrameWeights was ever called; it&#39;s used to detect wro...
Definition: online-ivector-feature.h:389

kaldi::OnlineIvectorExtractionInfo::use_most_recent_ivector
bool use_most_recent_ivector
Definition: online-ivector-feature.h:189

online-feature-itf.h

kaldi::OnlineIvectorFeature::to_delete_
std::vector< OnlineFeatureInterface * > to_delete_
Definition: online-ivector-feature.h:361

kaldi::OnlineSilenceWeighting::silence_phones_
unordered_set< int32 > silence_phones_
Definition: online-ivector-feature.h:531

kaldi::Vector< double >

kaldi::OnlineIvectorExtractionInfo::max_count
BaseFloat max_count
Definition: online-ivector-feature.h:187

kaldi::OnlineSpliceOptions
Definition: online-feature.h:446

kaldi::OnlineIvectorExtractionConfig::splice_config_rxfilename
std::string splice_config_rxfilename
Definition: online-ivector-feature.h:59

kaldi::OnlineSilenceWeighting::FrameInfo
Definition: online-ivector-feature.h:533

kaldi::OnlineSilenceWeightingConfig::max_state_duration
BaseFloat max_state_duration
Definition: online-ivector-feature.h:425

kaldi::DiagGmm
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42

kaldi::OnlineIvectorExtractionConfig::ivector_period
int32 ivector_period
Definition: online-ivector-feature.h:69

kaldi::OnlineSilenceWeightingConfig::silence_weight
BaseFloat silence_weight
Definition: online-ivector-feature.h:421

kaldi::OnlineFeatureInterface
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
Definition: online-feature-itf.h:49

kaldi::OnlineIvectorExtractionInfo::greedy_ivector_extractor
bool greedy_ivector_extractor
Definition: online-ivector-feature.h:190

kaldi::OnlineIvectorExtractionConfig
This class includes configuration variables relating to the online iVector extraction, but not including configuration for the "base feature", i.e.
Definition: online-ivector-feature.h:55

kaldi::OnlineSilenceWeightingConfig::OnlineSilenceWeightingConfig
OnlineSilenceWeightingConfig()
Definition: online-ivector-feature.h:435

kaldi::OnlineSilenceWeighting::trans_model_
const TransitionModel & trans_model_
Definition: online-ivector-feature.h:526

kaldi::OnlineIvectorExtractionInfo::online_cmvn_iextractor
bool online_cmvn_iextractor
Definition: online-ivector-feature.h:174

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::OnlineIvectorExtractionConfig::Register
void Register(OptionsItf *opts)
Definition: online-ivector-feature.h:113

kaldi::OnlineIvectorExtractionInfo::global_cmvn_stats
Matrix< double > global_cmvn_stats
Definition: online-ivector-feature.h:171

kaldi::OnlineSilenceWeighting::FrameInfo::transition_id
int32 transition_id
Definition: online-ivector-feature.h:537

kaldi::OnlineIvectorExtractionInfo::posterior_scale
BaseFloat posterior_scale
Definition: online-ivector-feature.h:186

kaldi::OnlineSilenceWeighting::frame_info_
std::vector< FrameInfo > frame_info_
Definition: online-ivector-feature.h:551

kaldi::OnlineSilenceWeighting::num_frames_output_and_correct_
int32 num_frames_output_and_correct_
Definition: online-ivector-feature.h:561

kaldi::OnlineIvectorFeature::lda_
OnlineFeatureInterface * lda_
Definition: online-ivector-feature.h:355

kaldi-error.h

kaldi::OnlineSilenceWeighting::frame_subsampling_factor_
int32 frame_subsampling_factor_
Definition: online-ivector-feature.h:529

kaldi::OnlineIvectorFeature
OnlineIvectorFeature is an online feature-extraction class that&#39;s responsible for extracting iVectors...
Definition: online-ivector-feature.h:256