doc/online-gmm-decoding_8h_source.html

 // online2/online-gmm-decoding.h

 // Copyright 2014  Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #ifndef KALDI_ONLINE2_ONLINE_GMM_DECODING_H_
 #define KALDI_ONLINE2_ONLINE_GMM_DECODING_H_

 #include <string>
 #include <vector>
 #include <deque>

 #include "matrix/matrix-lib.h"
 #include "util/common-utils.h"
 #include "base/kaldi-error.h"
 #include "transform/basis-fmllr-diag-gmm.h"
 #include "transform/fmllr-diag-gmm.h"
 #include "online2/online-feature-pipeline.h"
 #include "online2/online-gmm-decodable.h"
 #include "online2/online-endpoint.h"
 #include "decoder/lattice-faster-online-decoder.h"
 #include "hmm/transition-model.h"
 #include "gmm/am-diag-gmm.h"
 #include "hmm/posterior.h"


 namespace kaldi {


 struct OnlineGmmDecodingAdaptationPolicyConfig {
   BaseFloat adaptation_first_utt_delay;
   BaseFloat adaptation_first_utt_ratio;
   BaseFloat adaptation_delay;
   BaseFloat adaptation_ratio;
   OnlineGmmDecodingAdaptationPolicyConfig():
       adaptation_first_utt_delay(2.0),
       adaptation_first_utt_ratio(1.5),
       adaptation_delay(5.0),
       adaptation_ratio(2.0) { }

   void Register(OptionsItf *opts) {
     opts->Register("adaptation-first-utt-delay", &adaptation_first_utt_delay,
                    "Delay before first basis-fMLLR adaptation for first utterance "
                    "of each speaker");
     opts->Register("adaptation-first-utt-ratio", &adaptation_first_utt_ratio,
                    "Ratio that controls frequency of fMLLR adaptation for first "
                    "utterance of each speaker");
     opts->Register("adaptation-delay", &adaptation_delay,
                    "Delay before first basis-fMLLR adaptation for not-first "
                    "utterances of each speaker");
     opts->Register("adaptation-ratio", &adaptation_ratio,
                    "Ratio that controls frequency of fMLLR adaptation for "
                    "not-first utterances of each speaker");
   }

   void Check() const;

   bool DoAdapt(BaseFloat chunk_begin_secs,
                BaseFloat chunk_end_secs,
                bool is_first_utterance) const;

 };


 struct OnlineGmmDecodingConfig {
   BaseFloat fmllr_lattice_beam;

   BasisFmllrOptions basis_opts; // options for basis-fMLLR adaptation.

   LatticeFasterDecoderConfig faster_decoder_opts;

   OnlineGmmDecodingAdaptationPolicyConfig adaptation_policy_opts;

   // rxfilename for model trained with online-CMN features
   // (only needed if different from model_rxfilename)
   std::string online_alimdl_rxfilename;
   // rxfilename for model used for estimating fMLLR transforms
   std::string model_rxfilename;
   // rxfilename for possible discriminatively trained model
   // (only needed if different from model_rxfilename)
   std::string rescore_model_rxfilename;
   // rxfilename for the BasisFmllrEstimate object containing the basis
   // used for basis-fMLLR.
   std::string fmllr_basis_rxfilename;

   BaseFloat acoustic_scale;

   std::string silence_phones;
   BaseFloat silence_weight;


   OnlineGmmDecodingConfig():  fmllr_lattice_beam(3.0), acoustic_scale(0.1),
                               silence_weight(0.1) { }

   void Register(OptionsItf *opts) {
     { // register basis_opts with prefix, there are getting to be too many
       // options.
       ParseOptions basis_po("basis", opts);
       basis_opts.Register(&basis_po);
     }
     adaptation_policy_opts.Register(opts);
     faster_decoder_opts.Register(opts);
     opts->Register("acoustic-scale", &acoustic_scale,
                    "Scaling factor for acoustic likelihoods");
     opts->Register("silence-phones", &silence_phones,
                    "Colon-separated list of integer ids of silence phones, e.g. "
                    "1:2:3 (affects adaptation).");
     opts->Register("silence-weight", &silence_weight,
                    "Weight applied to silence frames for fMLLR estimation (if "
                    "--silence-phones option is supplied)");
     opts->Register("fmllr-lattice-beam", &fmllr_lattice_beam, "Beam used in "
                    "pruning lattices for fMLLR estimation");
     opts->Register("online-alignment-model", &online_alimdl_rxfilename,
                    "(Extended) filename for model trained with online CMN "
                    "features, e.g. from apply-cmvn-online.");
     opts->Register("model", &model_rxfilename, "(Extended) filename for model, "
                    "typically the one used for fMLLR computation.  Required option.");
     opts->Register("rescore-model", &rescore_model_rxfilename, "(Extended) filename "
                    "for model to rescore lattices with, e.g. discriminatively trained"
                    "model, if it differs from that supplied to --model option.  Must"
                    "have the same tree.");
     opts->Register("fmllr-basis", &fmllr_basis_rxfilename, "(Extended) filename "
                    "of fMLLR basis object, as output by gmm-basis-fmllr-training");
   }
 };


 class OnlineGmmDecodingModels {
  public:
   OnlineGmmDecodingModels(const OnlineGmmDecodingConfig &config);

   const TransitionModel &GetTransitionModel() const;

   const AmDiagGmm &GetOnlineAlignmentModel() const;

   const AmDiagGmm &GetModel() const;

   const AmDiagGmm &GetFinalModel() const;

   const BasisFmllrEstimate &GetFmllrBasis() const;

  private:
   // The transition-model is only needed for its integer ids, and these need to
   // be identical for all 3 models, so we only store one (it doesn't matter
   // which one).
   TransitionModel tmodel_;
   // The model trained with online-CMVN features
   // (if supplied, otherwise use model_)
   AmDiagGmm online_alignment_model_;
   // The ML-trained model used to get transforms (required)
   AmDiagGmm model_;
   // The discriminatively trained model
   // (if supplied, otherwise use model_)
   AmDiagGmm rescore_model_;
   // The following object contains the basis elements for
   // "Basis fMLLR".
   BasisFmllrEstimate fmllr_basis_;
 };


 struct OnlineGmmAdaptationState {
   OnlineCmvnState cmvn_state;
   FmllrDiagGmmAccs spk_stats;
   Matrix<BaseFloat> transform;

   // Writing and reading of the state of the object
   void Write(std::ostream &out_stream, bool binary) const;
   void Read(std::istream &in_stream, bool binary);


 };

 class SingleUtteranceGmmDecoder {
  public:
   SingleUtteranceGmmDecoder(const OnlineGmmDecodingConfig &config,
                             const OnlineGmmDecodingModels &models,
                             const OnlineFeaturePipeline &feature_prototype,
                             const fst::Fst<fst::StdArc> &fst,
                             const OnlineGmmAdaptationState &adaptation_state);

   OnlineFeaturePipeline &FeaturePipeline() { return *feature_pipeline_; }

   void AdvanceDecoding();

   void FinalizeDecoding();

   bool HaveTransform() const;

   void EstimateFmllr(bool end_of_utterance);

   void GetAdaptationState(OnlineGmmAdaptationState *adaptation_state) const;

   void GetLattice(bool rescore_if_needed,
                   bool end_of_utterance,
                   CompactLattice *clat) const;

   void GetBestPath(bool end_of_utterance,
                    Lattice *best_path) const;

   BaseFloat FinalRelativeCost() { return decoder_.FinalRelativeCost(); }


   bool EndpointDetected(const OnlineEndpointConfig &config);

   ~SingleUtteranceGmmDecoder();
  private:
   bool GetGaussianPosteriors(bool end_of_utterance, GaussPost *gpost);

   bool RescoringIsNeeded() const;

   OnlineGmmDecodingConfig config_;
   std::vector<int32> silence_phones_; // sorted, unique list of silence phones,
                                       // derived from config_
   const OnlineGmmDecodingModels &models_;
   OnlineFeaturePipeline *feature_pipeline_;  // owned here.
   const OnlineGmmAdaptationState &orig_adaptation_state_;
   // adaptation_state_ generally reflects the "current" state of the
   // adaptation. Note: adaptation_state_.cmvn_state is just copied from
   // orig_adaptation_state, the function GetAdaptationState() gets the CMVN
   // state.
   OnlineGmmAdaptationState adaptation_state_;
   LatticeFasterOnlineDecoder decoder_;
 };


 }  // namespace kaldi


 #endif  // KALDI_ONLINE2_ONLINE_GMM_DECODING_H_
am-diag-gmm.h

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::SingleUtteranceGmmDecoder::config_
OnlineGmmDecodingConfig config_
Definition: online-gmm-decoding.h:293

kaldi::OnlineGmmDecodingConfig::fmllr_lattice_beam
BaseFloat fmllr_lattice_beam
Definition: online-gmm-decoding.h:96

kaldi::OnlineGmmDecodingAdaptationPolicyConfig::adaptation_first_utt_delay
BaseFloat adaptation_first_utt_delay
Definition: online-gmm-decoding.h:57

matrix-lib.h

kaldi::OnlineGmmDecodingModels::model_
AmDiagGmm model_
Definition: online-gmm-decoding.h:189

online-gmm-decodable.h

kaldi::OnlineGmmDecodingModels
This class is used to read, store and give access to the models used for 3 phases of decoding (first-...
Definition: online-gmm-decoding.h:166

kaldi::OnlineGmmAdaptationState
Definition: online-gmm-decoding.h:199

fst
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21

kaldi::OnlineGmmDecodingConfig::silence_phones
std::string silence_phones
Definition: online-gmm-decoding.h:118

kaldi::OnlineGmmDecodingConfig::Register
void Register(OptionsItf *opts)
Definition: online-gmm-decoding.h:125

kaldi::OnlineGmmAdaptationState::transform
Matrix< BaseFloat > transform
Definition: online-gmm-decoding.h:202

kaldi::BasisFmllrOptions
Definition: basis-fmllr-diag-gmm.h:48

kaldi::FmllrDiagGmmAccs
This does not work with multiple feature transforms.
Definition: fmllr-diag-gmm.h:61

kaldi::OnlineGmmDecodingAdaptationPolicyConfig::adaptation_ratio
BaseFloat adaptation_ratio
Definition: online-gmm-decoding.h:60

kaldi::SingleUtteranceGmmDecoder::adaptation_state_
OnlineGmmAdaptationState adaptation_state_
Definition: online-gmm-decoding.h:303

kaldi::OnlineEndpointConfig
Definition: online-endpoint.h:127

online-feature-pipeline.h
This file contains a class OnlineFeaturePipeline for online feature extraction, which puts together v...

common-utils.h

kaldi::Matrix< BaseFloat >

kaldi::OnlineGmmDecodingAdaptationPolicyConfig::OnlineGmmDecodingAdaptationPolicyConfig
OnlineGmmDecodingAdaptationPolicyConfig()
Definition: online-gmm-decoding.h:61

kaldi::EndpointDetected
bool EndpointDetected(const OnlineEndpointConfig &config, int32 num_frames_decoded, int32 trailing_silence_frames, BaseFloat frame_shift_in_seconds, BaseFloat final_relative_cost)
This function returns true if this set of endpointing rules thinks we should terminate decoding...
Definition: online-endpoint.cc:46

kaldi::OnlineGmmDecodingModels::fmllr_basis_
BasisFmllrEstimate fmllr_basis_
Definition: online-gmm-decoding.h:195

kaldi::OnlineGmmDecodingAdaptationPolicyConfig::Check
void Check() const
Check that configuration values make sense.
Definition: online-gmm-decoding.cc:416

kaldi::OnlineGmmDecodingConfig::OnlineGmmDecodingConfig
OnlineGmmDecodingConfig()
Definition: online-gmm-decoding.h:122

kaldi::LatticeFasterDecoderConfig
Definition: lattice-faster-decoder.h:38

posterior.h

kaldi::AmDiagGmm
Definition: am-diag-gmm.h:36

kaldi::OptionsItf::Register
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0

kaldi::OnlineFeaturePipeline
OnlineFeaturePipeline is a class that&#39;s responsible for putting together the various stages of the fe...
Definition: online-feature-pipeline.h:157

kaldi::TransitionModel
Definition: transition-model.h:123

kaldi::SingleUtteranceGmmDecoder
You will instantiate this class when you want to decode a single utterance using the online-decoding ...
Definition: online-gmm-decoding.h:216

kaldi::OnlineGmmDecodingAdaptationPolicyConfig::adaptation_delay
BaseFloat adaptation_delay
Definition: online-gmm-decoding.h:59

kaldi::ParseOptions
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36

float

kaldi::OnlineGmmDecodingModels::online_alignment_model_
AmDiagGmm online_alignment_model_
Definition: online-gmm-decoding.h:187

transition-model.h

kaldi::OnlineGmmDecodingConfig::fmllr_basis_rxfilename
std::string fmllr_basis_rxfilename
Definition: online-gmm-decoding.h:114

kaldi::OnlineGmmDecodingAdaptationPolicyConfig::adaptation_first_utt_ratio
BaseFloat adaptation_first_utt_ratio
Definition: online-gmm-decoding.h:58

kaldi::OnlineGmmDecodingConfig::rescore_model_rxfilename
std::string rescore_model_rxfilename
Definition: online-gmm-decoding.h:111

kaldi::BasisFmllrOptions::Register
void Register(OptionsItf *opts)
Definition: basis-fmllr-diag-gmm.h:54

kaldi::OnlineCmvnState
Struct OnlineCmvnState stores the state of CMVN adaptation between utterances (but not the state of t...
Definition: online-feature.h:266

kaldi::Lattice
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44

lattice-faster-online-decoder.h

kaldi::OnlineGmmDecodingConfig::model_rxfilename
std::string model_rxfilename
Definition: online-gmm-decoding.h:108

kaldi::OnlineGmmDecodingConfig::faster_decoder_opts
LatticeFasterDecoderConfig faster_decoder_opts
Definition: online-gmm-decoding.h:100

kaldi::OnlineGmmDecodingConfig
Definition: online-gmm-decoding.h:95

kaldi::SingleUtteranceGmmDecoder::feature_pipeline_
OnlineFeaturePipeline * feature_pipeline_
Definition: online-gmm-decoding.h:297

kaldi::OptionsItf
Definition: options-itf.h:26

kaldi::OnlineGmmDecodingAdaptationPolicyConfig
This configuration class controls when to re-estimate the basis-fMLLR during online decoding...
Definition: online-gmm-decoding.h:56

kaldi::LatticeFasterOnlineDecoderTpl< fst::StdFst >

kaldi::CompactLattice
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46

kaldi::SingleUtteranceGmmDecoder::FinalRelativeCost
BaseFloat FinalRelativeCost()
This function outputs to "final_relative_cost", if non-NULL, a number >= 0 that will be close to zero...
Definition: online-gmm-decoding.h:276

kaldi::OnlineGmmDecodingAdaptationPolicyConfig::DoAdapt
bool DoAdapt(BaseFloat chunk_begin_secs, BaseFloat chunk_end_secs, bool is_first_utterance) const
This function returns true if we are scheduled to re-estimate fMLLR somewhere in the interval [ chunk...
Definition: online-gmm-decoding.cc:423

kaldi::OnlineGmmDecodingConfig::acoustic_scale
BaseFloat acoustic_scale
Definition: online-gmm-decoding.h:116

kaldi::OnlineGmmDecodingConfig::adaptation_policy_opts
OnlineGmmDecodingAdaptationPolicyConfig adaptation_policy_opts
Definition: online-gmm-decoding.h:102

kaldi::OnlineGmmDecodingModels::tmodel_
TransitionModel tmodel_
Definition: online-gmm-decoding.h:184

kaldi::SingleUtteranceGmmDecoder::FeaturePipeline
OnlineFeaturePipeline & FeaturePipeline()
Definition: online-gmm-decoding.h:224

basis-fmllr-diag-gmm.h

kaldi::OnlineGmmDecodingConfig::online_alimdl_rxfilename
std::string online_alimdl_rxfilename
Definition: online-gmm-decoding.h:106

kaldi::OnlineGmmDecodingConfig::silence_weight
BaseFloat silence_weight
Definition: online-gmm-decoding.h:119

kaldi::SingleUtteranceGmmDecoder::models_
const OnlineGmmDecodingModels & models_
Definition: online-gmm-decoding.h:296

kaldi::OnlineGmmDecodingModels::rescore_model_
AmDiagGmm rescore_model_
Definition: online-gmm-decoding.h:192

kaldi::SingleUtteranceGmmDecoder::orig_adaptation_state_
const OnlineGmmAdaptationState & orig_adaptation_state_
Definition: online-gmm-decoding.h:298

fmllr-diag-gmm.h

online-endpoint.h

kaldi::GaussPost
std::vector< std::vector< std::pair< int32, Vector< BaseFloat > > > > GaussPost
GaussPost is a typedef for storing Gaussian-level posteriors for an utterance.
Definition: posterior.h:51

kaldi::OnlineGmmAdaptationState::spk_stats
FmllrDiagGmmAccs spk_stats
Definition: online-gmm-decoding.h:201

kaldi::SingleUtteranceGmmDecoder::decoder_
LatticeFasterOnlineDecoder decoder_
Definition: online-gmm-decoding.h:304

kaldi::LatticeFasterDecoderConfig::Register
void Register(OptionsItf *opts)
Definition: lattice-faster-decoder.h:67

kaldi::OnlineGmmDecodingAdaptationPolicyConfig::Register
void Register(OptionsItf *opts)
Definition: online-gmm-decoding.h:67

kaldi::SingleUtteranceGmmDecoder::silence_phones_
std::vector< int32 > silence_phones_
Definition: online-gmm-decoding.h:294

kaldi-error.h

kaldi::OnlineGmmAdaptationState::cmvn_state
OnlineCmvnState cmvn_state
Definition: online-gmm-decoding.h:200

kaldi::OnlineGmmDecodingConfig::basis_opts
BasisFmllrOptions basis_opts
Definition: online-gmm-decoding.h:98

kaldi::BasisFmllrEstimate
Estimation functions for basis fMLLR.
Definition: basis-fmllr-diag-gmm.h:107