doc/am-sgmm2_8h_source.html

 // sgmm2/am-sgmm2.h

 // Copyright 2009-2011  Microsoft Corporation;  Lukas Burget;
 //                      Saarland University (Author: Arnab Ghoshal);
 //                      Ondrej Glembek;  Yanmin Qian;
 // Copyright 2012-2013  Johns Hopkins University (author: Daniel Povey)
 //                      Liang Lu;  Arnab Ghoshal

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #ifndef KALDI_SGMM2_AM_SGMM2_H_
 #define KALDI_SGMM2_AM_SGMM2_H_

 #include <vector>

 #include "base/kaldi-common.h"
 #include "matrix/matrix-lib.h"
 #include "gmm/model-common.h"
 #include "gmm/diag-gmm.h"
 #include "gmm/full-gmm.h"
 #include "itf/options-itf.h"
 #include "util/table-types.h"
 #include "util/kaldi-thread.h"

 namespace kaldi {
 /*
   When reading this file, keep in mind two references: the paper
  "The Subspace Gaussian Mixture Model-- a Structured Model for Speech Recognition", by D. Povey,
   L. Burget et. al (Computer Speech and Language, 2011), and
   "The Symmetric Subspace Gaussian Mixture Model": Microsoft Research technical report MSR-TR-2010-138.
   We will refer to these as "the paper" [or "the CSL paper"] and "the techreport".

   (1) SSGMM

   We'll use the acronym SSGMM to refer to the Symmetric SGMM, and we'll mark in
   the code with "[SSGMM]" things that relate to it.  The technical report
   describes an extention to the originally described model where we have
   speaker-dependent mixture weights.  These are implemented here.  Note: we only
   implement the "more efficient" version of the update for the speaker
   projection vectors \u_i.  There is also an ICASSP paper that describes the
   stuff in the techreport (more briefly), with results, but we don't refer to
   any equation numbers in that.

   (2) SCTM

   What we implement here has another extension that was not in the CSL paper: an
   extension to the "state-clustered tied mixture" [SCTM] system-- a bit like BBN's
   style of system, except for SGMMs not Gaussians, at the sub-state not Gaussian level.
   We build a first
   tree, at which level the phonetic sub-state vectors are defined, and then a
   "more detailed" tree, at which level we share the sub-state mixture weights.
   In this class, NumPdfs() returns the real number of pdf's (i.e. the #leaves
   of the more detailed tree), and NumPdfGroups() returns the number of groups of
   pdf's that share the sub-state vectors.
   We use the index j2 for indexing 0...NumPdfs()-1 [as it's the "2nd level" of the tree],
   and j1 for indexing 0...NumPdfGroups()-1 [as it's the "1st level" of the tree].
   The weights are stored as c[j2][m].  There is a mapping Pdf2Group(j2) which returns
   the corresponding j1 for a given j2, and Group2PdfList(j1) which returns a vector<int32>
   consisting of the list of j2 indices for that j1.

   The count quantities we store during the accumulation phase could most simply
   be stored as gamma[j2][m][i] (where m is the sub-state index), but this is
   inefficient.  Instead we store them separately as gamma1[j1][m][i] and gamma2[j2][m],
   so each count gets stored in two separate places; this makes the stats more compact.

   In this implementation, the normalizers n_{jmi} are now stored as n[j1][m][i],
   without including the log-weight term log c[j2][m].  In the computation of
   state likelihoods, we first compute the log-prob of the data given each of the
   sub-state vectors; and we compute the log-sum of this and the posteriors over
   each of the vectors [treating the weights as 1.0].  Call these
   "pseudo-posteriors".  Then to take into account the contribution of the
   weights in a state j2, we take the dot product of the weight-vector c[j2][...]
   with this vector of pseudo-posteriors.  The log of this dot-product gets added to the
   original log-sum.
 */


 struct Sgmm2SplitSubstatesConfig {
   int32 split_substates;
   BaseFloat perturb_factor;
   BaseFloat power;
   BaseFloat max_cond;
   BaseFloat min_count;
   Sgmm2SplitSubstatesConfig(): split_substates(0),
                                perturb_factor(0.01),
                                power(0.2),
                                max_cond(100.0),
                                min_count(40.0) { }
   void Register(OptionsItf *opts) {
     opts->Register("split-substates", &split_substates, "Increase number of "
                    "substates to this overall target.");
     opts->Register("max-cond-split", &max_cond, "Max condition number of smoothing "
                    "matrix used in substate splitting.");
     opts->Register("perturb-factor", &perturb_factor, "Perturbation factor for "
                    "state vectors while splitting substates.");
     opts->Register("power", &power, "Exponent for substate occupancies used while "
                    "splitting substates.");
     opts->Register("min-count", &min_count, "Minimum allowed count, used in allocating "
                    "sub-states to state in mixture splitting.");
   }
 };

 // Caution: this config is probably not used in most of the setups, we generally do the Gaussian
 // selection using separate programs
 struct Sgmm2GselectConfig {
   int32 full_gmm_nbest;
   int32 diag_gmm_nbest;

   Sgmm2GselectConfig() {
     full_gmm_nbest = 15;
     diag_gmm_nbest = 50;
   }

   void Register(OptionsItf *opts) {
     opts->Register("full-gmm-nbest", &full_gmm_nbest, "Number of highest-scoring"
                    " full-covariance Gaussians selected per frame.");
     opts->Register("diag-gmm-nbest", &diag_gmm_nbest, "Number of highest-scoring"
                    " diagonal-covariance Gaussians selected per frame.");
   }
 };

 struct Sgmm2PerFrameDerivedVars {
   std::vector<int32> gselect;
   Vector<BaseFloat> xt;
   Matrix<BaseFloat> xti;
   Matrix<BaseFloat> zti;
   Vector<BaseFloat> nti;

   void Resize(int32 ngauss, int32 feat_dim, int32 phn_dim) { // resizes but does
     // not necessarily zero things.
     if (xt.Dim() != feat_dim) xt.Resize(feat_dim);
     if (xti.NumRows() != ngauss || xti.NumCols() != feat_dim)
       xti.Resize(ngauss, feat_dim);
     if (zti.NumRows() != ngauss || zti.NumCols() != phn_dim)
       zti.Resize(ngauss, phn_dim);
     if (nti.Dim() != ngauss)
       nti.Resize(ngauss);
   }
 };

 class AmSgmm2;

 class Sgmm2PerSpkDerivedVars {
   // To set this up, call ComputePerSpkDerivedVars from the sgmm object.
  public:
   void Clear() {
     v_s.Resize(0);
     o_s.Resize(0, 0);
     b_is.Resize(0);
     log_b_is.Resize(0);
     log_d_jms.resize(0);
   }
   bool Empty() { return v_s.Dim() == 0; }
   // caution: after SetSpeakerVector you typically want to
   // use the function AmSgmm::ComputePerSpkDerivedVars
   const Vector<BaseFloat> &GetSpeakerVector() { return v_s; }

   void SetSpeakerVector(const Vector<BaseFloat> &v_s_in) {
     v_s.Resize(v_s_in.Dim());
     v_s.CopyFromVec(v_s_in);
   }
  protected:
   friend class AmSgmm2;
   friend class MleAmSgmm2Accs;
   Vector<BaseFloat> v_s;
   Matrix<BaseFloat> o_s;
   Vector<BaseFloat> b_is;
   Vector<BaseFloat> log_b_is;
   std::vector<Vector<BaseFloat> > log_d_jms;
 };

 struct Sgmm2LikelihoodCache {
  public:
   // you'll typically initialize with (sgmm.NumGroups(), sgmm.NumPdfs()).
   Sgmm2LikelihoodCache(int32 num_groups, int32 num_pdfs):
       substate_cache(num_groups), pdf_cache(num_pdfs), t(1) { }

   struct SubstateCacheElement { // indexed by j1.
     SubstateCacheElement(): t(0) { }
     // The "likes" and "remaining_log_like" quantities store the
     // log-like of the data given each substate vector, in a redundant
     // way, so the likelihood is likes(i) * exp(remaining_log_like).
     // This is to get around problems with numerical range.
     Vector<BaseFloat> likes;
     BaseFloat remaining_log_like;
     int32 t; // used in detecting "freshness."
   };
   struct PdfCacheElement { // indexed by j2.
     PdfCacheElement(): t(0) { }
     BaseFloat log_like;
     int32 t; // used in detecting "freshness."
   };

   void NextFrame(); // increments t.
   std::vector<SubstateCacheElement> substate_cache; // indexed by j1.
   std::vector<PdfCacheElement> pdf_cache; // indexed by j2.
   int32 t;
 };


 class AmSgmm2 {
  public:
   AmSgmm2() {}
   void Read(std::istream &is, bool binary);
   void Write(std::ostream &os, bool binary,
              SgmmWriteFlagsType write_params) const;

   void Check(bool show_properties = true);

   void InitializeFromFullGmm(const FullGmm &gmm,
                              const std::vector<int32> &pdf2group,
                              int32 phn_subspace_dim,
                              int32 spk_subspace_dim,
                              bool speaker_dependent_weights,
                              BaseFloat self_weight); // self_weight relates to
   // initialization of the weights.  if self_weight == 1.0 it means we
   // just have 1 sub-state per group, otherwise we have one per pdf,
   // and each pdf has "self_weight" as its "own" weight.

   void CopyGlobalsInitVecs(const AmSgmm2 &other,
                            const std::vector<int32> &pdf2group,
                            BaseFloat self_weight);

   void CopyFromSgmm2(const AmSgmm2 &other,
                     bool copy_normalizers,
                     bool copy_weights);  // copy_weights is to copy w_{jmi} [which are
    // stored, in the symmetric SSGMM.]

   BaseFloat GaussianSelection(const Sgmm2GselectConfig &config,
                               const VectorBase<BaseFloat> &data,
                               std::vector<int32> *gselect) const;

   void ComputePerFrameVars(const VectorBase<BaseFloat> &data,
                            const std::vector<int32> &gselect,
                            const Sgmm2PerSpkDerivedVars &spk_vars,
                            Sgmm2PerFrameDerivedVars *per_frame_vars) const;


   void ComputePerSpkDerivedVars(Sgmm2PerSpkDerivedVars *vars) const;

   BaseFloat LogLikelihood(const Sgmm2PerFrameDerivedVars &per_frame_vars,
                           int32 j2, // pdf_id
                           Sgmm2LikelihoodCache *cache, // be careful to call NextFrame() when needed!
                           Sgmm2PerSpkDerivedVars *spk_vars,
                           BaseFloat log_prune = 0.0) const;

   BaseFloat ComponentPosteriors(const Sgmm2PerFrameDerivedVars &per_frame_vars,
                                 int32 j2,
                                 Sgmm2PerSpkDerivedVars *spk_vars,
                                 Matrix<BaseFloat> *post) const;

   void SplitSubstates(const Vector<BaseFloat> &state_occupancies, // [indexed by pdf-id j2]
                       const Sgmm2SplitSubstatesConfig &config);

   void IncreasePhoneSpaceDim(int32 target_dim,
                              const Matrix<BaseFloat> &norm_xform);

   void IncreaseSpkSpaceDim(int32 target_dim,
                            const Matrix<BaseFloat> &norm_xform,
                            bool speaker_dependent_weights);

   void ComputeDerivedVars();

   void ComputeNormalizers();

   void ComputeWeights();

   void ComputeFmllrPreXform(const Vector<BaseFloat> &pdf_occs,
                             Matrix<BaseFloat> *xform,
                             Matrix<BaseFloat> *inv_xform,
                             Vector<BaseFloat> *diag_mean_scatter) const;

   int32 NumPdfs() const { return pdf2group_.size(); }
   int32 NumGroups() const { return group2pdf_.size(); } // relates to SCTM.  # pdf groups,
   // <= NumPdfs().
   int32 Pdf2Group(int32 j2) const; // relates to SCTM.
   int32 NumSubstatesForPdf(int32 j2) const {
     KALDI_ASSERT(j2 < NumPdfs()); return c_[j2].Dim();
   }
   int32 NumSubstatesForGroup(int32 j1) const {
     KALDI_ASSERT(j1 < NumGroups()); return v_[j1].NumRows();
   }
   int32 NumGauss() const { return M_.size(); }
   int32 PhoneSpaceDim() const { return w_.NumCols(); }
   int32 SpkSpaceDim() const { return (N_.size() > 0) ? N_[0].NumCols() : 0; }
   int32 FeatureDim() const { return M_[0].NumRows(); }

   bool HasSpeakerDependentWeights() const { return (u_.NumRows() != 0); }

   bool HasSpeakerSpace() const { return (!N_.empty()); }

   void RemoveSpeakerSpace() { N_.clear(); u_.Resize(0, 0); w_jmi_.clear(); }

   // [SSGMM] get the quantity d_{jm}^{(s)} and cache it with
   // spk vars if necessary.  Called in accumulation code.
   BaseFloat GetDjms(int32 j1, int32 m,
                     Sgmm2PerSpkDerivedVars *spk_vars) const;

   const FullGmm & full_ubm() const { return full_ubm_; }
   const DiagGmm & diag_ubm() const { return diag_ubm_; }


   template<typename Real>
   void GetInvCovars(int32 gauss_index, SpMatrix<Real> *out) const;

   template<typename Real>
   void GetSubstateMean(int32 j1, int32 m, int32 i,
                        VectorBase<Real> *mean_out) const;

   template<typename Real>
   void GetNtransSigmaInv(std::vector< Matrix<Real> > *out) const;

   template<typename Real>
   void GetSubstateSpeakerMean(int32 j1, int32 substate, int32 gauss,
                               const Sgmm2PerSpkDerivedVars &spk,
                               VectorBase<Real> *mean_out) const;

   template<typename Real>
   void GetVarScaledSubstateSpeakerMean(int32 j1, int32 substate,
                                        int32 gauss,
                                        const Sgmm2PerSpkDerivedVars &spk,
                                        VectorBase<Real> *mean_out) const;

   template<class Real>
   void ComputeH(std::vector< SpMatrix<Real> > *H_i) const;

  protected:
   std::vector<int32> pdf2group_;
   std::vector<std::vector<int32> > group2pdf_; // the reverse map.

   DiagGmm diag_ubm_;
   FullGmm full_ubm_;


   std::vector< SpMatrix<BaseFloat> > SigmaInv_;
   std::vector< Matrix<BaseFloat> > M_;
   std::vector< Matrix<BaseFloat> > N_;
   Matrix<BaseFloat> w_;
   Matrix<BaseFloat> u_;


   std::vector< Matrix<BaseFloat> > v_;
   std::vector< Vector<BaseFloat> > c_;
   std::vector< Matrix<BaseFloat> > n_;
   std::vector< Matrix<BaseFloat> > w_jmi_;

   // Priors for MAP adaptation of M -- keeping them here for now but they may
   // be moved somewhere else eventually
   // These are parameters of a matrix-variate normal distribution. The means are
   // the unadapted M_i, and we have 2 separate covaraince matrices for the rows
   // and columns of M.
   std::vector< Matrix<BaseFloat> > M_prior_;  // Matrix-variate Gaussian mean
   SpMatrix<BaseFloat> row_cov_inv_;
   SpMatrix<BaseFloat> col_cov_inv_;

  private:
   void ComputeGammaI(const Vector<BaseFloat> &state_occupancies,
                      Vector<BaseFloat> *gamma_i) const;

   void SplitSubstatesInGroup(const Vector<BaseFloat> &pdf_occupancies,
                              const Sgmm2SplitSubstatesConfig &opts,
                              const SpMatrix<BaseFloat> &sqrt_H_sm,
                              int32 j1, int32 M);

   void ComputeNormalizersInternal(int32 num_threads, int32 thread,
                                   int32 *entropy_count, double *entropy_sum);

   inline void ComponentLogLikes(const Sgmm2PerFrameDerivedVars &per_frame_vars,
                                 int32 j1,
                                 Sgmm2PerSpkDerivedVars *spk_vars,
                                 Matrix<BaseFloat> *loglikes) const;


   void InitializeMw(int32 phn_subspace_dim,
                      const Matrix<BaseFloat> &norm_xform);
   void InitializeNu(int32 spk_subspace_dim,
                     const Matrix<BaseFloat> &norm_xform,
                     bool speaker_dependent_weights);
   void InitializeVecsAndSubstateWeights(BaseFloat self_weight);
   void InitializeCovars();

   void ComputeHsmFromModel(
       const std::vector< SpMatrix<BaseFloat> > &H,
       const Vector<BaseFloat> &state_occupancies,
       SpMatrix<BaseFloat> *H_sm,
       BaseFloat max_cond) const;

   void ComputePdfMappings(); // sets up group2pdf_ from pdf2group_.

   KALDI_DISALLOW_COPY_AND_ASSIGN(AmSgmm2);
   friend class ComputeNormalizersClass;
   friend class Sgmm2Project;
   friend class EbwAmSgmm2Updater;
   friend class MleAmSgmm2Accs;
   friend class MleAmSgmm2Updater;
   friend class MleSgmm2SpeakerAccs;
   friend class AmSgmm2Functions;  // misc functions that need access.
   friend class Sgmm2Feature;
 };

 template<typename Real>
 inline void AmSgmm2::GetInvCovars(int32 gauss_index,
                                   SpMatrix<Real> *out) const {
   out->Resize(SigmaInv_[gauss_index].NumRows(), kUndefined);
   out->CopyFromSp(SigmaInv_[gauss_index]);
 }


 template<typename Real>
 inline void AmSgmm2::GetSubstateMean(int32 j1, int32 m, int32 i,
                                     VectorBase<Real> *mean_out) const {
   KALDI_ASSERT(mean_out != NULL);
   KALDI_ASSERT(j1 < NumGroups() && m < NumSubstatesForGroup(j1)
                && i < NumGauss());
   KALDI_ASSERT(mean_out->Dim() == FeatureDim());
   Vector<BaseFloat> mean_tmp(FeatureDim());
   mean_tmp.AddMatVec(1.0, M_[i], kNoTrans, v_[j1].Row(m), 0.0);
   mean_out->CopyFromVec(mean_tmp);
 }


 template<typename Real>
 inline void AmSgmm2::GetSubstateSpeakerMean(int32 j1, int32 m, int32 i,
                                             const Sgmm2PerSpkDerivedVars &spk,
                                            VectorBase<Real> *mean_out) const {
   GetSubstateMean(j1, m, i, mean_out);
   if (spk.v_s.Dim() != 0)  // have speaker adaptation...
     mean_out->AddVec(1.0, spk.o_s.Row(i));
 }

 template<typename Real>
 void AmSgmm2::GetVarScaledSubstateSpeakerMean(int32 j1, int32 m, int32 i,
                                              const Sgmm2PerSpkDerivedVars &spk,
                                              VectorBase<Real> *mean_out) const {
   Vector<BaseFloat> tmp_mean(mean_out->Dim()), tmp_mean2(mean_out->Dim());
   GetSubstateSpeakerMean(j1, m, i, spk, &tmp_mean);
   tmp_mean2.AddSpVec(1.0, SigmaInv_[i], tmp_mean, 0.0);
   mean_out->CopyFromVec(tmp_mean2);
 }


 void ComputeFeatureNormalizingTransform(const FullGmm &gmm, Matrix<BaseFloat> *xform);


 struct Sgmm2GauPostElement {
   // Need gselect info here, since "posteriors" is  relative to this set of
   // selected Gaussians.
   std::vector<int32> gselect;
   std::vector<int32> tids;  // transition-ids for each entry in "posteriors"
   std::vector<Matrix<BaseFloat> > posteriors;
 };


 class Sgmm2GauPost: public std::vector<Sgmm2GauPostElement> {
  public:
   // Add the standard Kaldi Read and Write routines so
   // we can use KaldiObjectHolder with this type.
   explicit Sgmm2GauPost(size_t i) : std::vector<Sgmm2GauPostElement>(i) {}
   Sgmm2GauPost() {}
   void Write(std::ostream &os, bool binary) const;
   void Read(std::istream &is, bool binary);
 };

 typedef KaldiObjectHolder<Sgmm2GauPost> Sgmm2GauPostHolder;
 typedef RandomAccessTableReader<Sgmm2GauPostHolder> RandomAccessSgmm2GauPostReader;
 typedef SequentialTableReader<Sgmm2GauPostHolder> SequentialSgmm2GauPostReader;
 typedef TableWriter<Sgmm2GauPostHolder> Sgmm2GauPostWriter;

 }  // namespace kaldi


 #endif  // KALDI_SGMM2_AM_SGMM2_H_
kaldi::ComputeNormalizersClass
Definition: am-sgmm2.cc:820

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::SgmmWriteFlagsType
uint16 SgmmWriteFlagsType
Bitwise OR of the above flags.
Definition: model-common.h:70

kaldi::AmSgmm2::u_
Matrix< BaseFloat > u_
[SSGMM] Speaker-subspace weight projection vectors. Dimension is [I][T]
Definition: am-sgmm2.h:431

kaldi::AmSgmm2
Class for definition of the subspace Gmm acoustic model.
Definition: am-sgmm2.h:231

kaldi::SpMatrix
Packed symetric matrix class.
Definition: matrix-common.h:62

kaldi::AmSgmm2::pdf2group_
std::vector< int32 > pdf2group_
Definition: am-sgmm2.h:409

kaldi::kUndefined
Definition: matrix-common.h:39

kaldi::KaldiObjectHolder
KaldiObjectHolder works for Kaldi objects that have the "standard" Read and Write functions...
Definition: kaldi-holder-inl.h:45

diag-gmm.h

kaldi::Sgmm2PerFrameDerivedVars::xt
Vector< BaseFloat > xt
x&#39;(t), FMLLR-adapted, dim = [D], eq.(33)
Definition: am-sgmm2.h:144

matrix-lib.h

kaldi::AmSgmm2::GetSubstateMean
void GetSubstateMean(int32 j1, int32 m, int32 i, VectorBase< Real > *mean_out) const
Definition: am-sgmm2.h:519

kaldi-thread.h

kaldi::AmSgmm2::HasSpeakerSpace
bool HasSpeakerSpace() const
Definition: am-sgmm2.h:368

kaldi::AmSgmm2::c_
std::vector< Vector< BaseFloat > > c_
c_{jm}, mixture weights. Dimension is [J2][#mix]
Definition: am-sgmm2.h:438

kaldi::MatrixBase::NumCols
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67

kaldi::AmSgmm2::w_
Matrix< BaseFloat > w_
Phonetic-subspace weight projection vectors. Dimension is [I][S].
Definition: am-sgmm2.h:429

kaldi::Sgmm2LikelihoodCache::t
int32 t
Definition: am-sgmm2.h:224

kaldi::AmSgmm2::AmSgmm2
AmSgmm2()
Definition: am-sgmm2.h:233

kaldi::Sgmm2PerSpkDerivedVars::v_s
Vector< BaseFloat > v_s
Speaker adaptation vector v_^{(s)}. Dim is [T].
Definition: am-sgmm2.h:187

kaldi::Sgmm2Project
Definition: am-sgmm2-project.h:30

kaldi::FullGmm
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40

kaldi::MleSgmm2SpeakerAccs
Class for the accumulators required to update the speaker vectors v_s.
Definition: estimate-am-sgmm2.h:354

kaldi::Sgmm2PerSpkDerivedVars::GetSpeakerVector
const Vector< BaseFloat > & GetSpeakerVector()
Definition: am-sgmm2.h:178

kaldi::Sgmm2SplitSubstatesConfig::max_cond
BaseFloat max_cond
Definition: am-sgmm2.h:95

kaldi::AmSgmm2::GetVarScaledSubstateSpeakerMean
void GetVarScaledSubstateSpeakerMean(int32 j1, int32 substate, int32 gauss, const Sgmm2PerSpkDerivedVars &spk, VectorBase< Real > *mean_out) const
Definition: am-sgmm2.h:541

kaldi::Sgmm2GauPostHolder
KaldiObjectHolder< Sgmm2GauPost > Sgmm2GauPostHolder
Definition: am-sgmm2.h:578

kaldi::AmSgmm2::GetInvCovars
void GetInvCovars(int32 gauss_index, SpMatrix< Real > *out) const
Templated accessors (used to accumulate in different precision)
Definition: am-sgmm2.h:511

kaldi::Sgmm2LikelihoodCache::SubstateCacheElement
Definition: am-sgmm2.h:205

kaldi::Sgmm2GauPost::Sgmm2GauPost
Sgmm2GauPost()
Definition: am-sgmm2.h:573

kaldi::TableWriter
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::AmSgmm2::diag_ubm_
DiagGmm diag_ubm_
These contain the "background" model associated with the subspace GMM.
Definition: am-sgmm2.h:413

kaldi::Sgmm2GauPostElement::posteriors
std::vector< Matrix< BaseFloat > > posteriors
Definition: am-sgmm2.h:563

kaldi::AmSgmm2::n_
std::vector< Matrix< BaseFloat > > n_
n_{jim}, per-Gaussian normalizer. Dimension is [J1][I][#mix]
Definition: am-sgmm2.h:440

kaldi::AmSgmm2::N_
std::vector< Matrix< BaseFloat > > N_
Speaker-subspace projections. Dimension is [I][D][T].
Definition: am-sgmm2.h:427

kaldi::Matrix< BaseFloat >

kaldi::AmSgmm2::col_cov_inv_
SpMatrix< BaseFloat > col_cov_inv_
Definition: am-sgmm2.h:451

kaldi::Vector::Resize
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
Definition: kaldi-vector.cc:190

KALDI_DISALLOW_COPY_AND_ASSIGN
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
Definition: kaldi-utils.h:121

kaldi::Sgmm2LikelihoodCache::SubstateCacheElement::remaining_log_like
BaseFloat remaining_log_like
Definition: am-sgmm2.h:212

kaldi::Sgmm2LikelihoodCache::SubstateCacheElement::SubstateCacheElement
SubstateCacheElement()
Definition: am-sgmm2.h:206

kaldi::AmSgmm2::full_ubm
const FullGmm & full_ubm() const
Accessors.
Definition: am-sgmm2.h:378

kaldi::AmSgmm2::PhoneSpaceDim
int32 PhoneSpaceDim() const
Definition: am-sgmm2.h:361

kaldi::AmSgmm2::v_
std::vector< Matrix< BaseFloat > > v_
The parameters in a particular SGMM state.
Definition: am-sgmm2.h:436

kaldi::SpMatrix::CopyFromSp
void CopyFromSp(const SpMatrix< Real > &other)
Definition: sp-matrix.h:85

kaldi::Sgmm2SplitSubstatesConfig
Definition: am-sgmm2.h:91

kaldi::Sgmm2LikelihoodCache::Sgmm2LikelihoodCache
Sgmm2LikelihoodCache(int32 num_groups, int32 num_pdfs)
Definition: am-sgmm2.h:202

kaldi::Sgmm2LikelihoodCache::PdfCacheElement::t
int32 t
Definition: am-sgmm2.h:218

kaldi::OptionsItf::Register
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0

kaldi::RandomAccessTableReader
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233

kaldi::Sgmm2PerFrameDerivedVars::zti
Matrix< BaseFloat > zti
z_{i}(t), dim = [I][S], eq.(35)
Definition: am-sgmm2.h:146

kaldi::Sgmm2PerSpkDerivedVars::b_is
Vector< BaseFloat > b_is
Definition: am-sgmm2.h:189

kaldi::AmSgmm2::M_
std::vector< Matrix< BaseFloat > > M_
Phonetic-subspace projections. Dimension is [I][D][S].
Definition: am-sgmm2.h:425

kaldi::EbwAmSgmm2Updater
Definition: estimate-am-sgmm2-ebw.h:147

kaldi::AmSgmm2::FeatureDim
int32 FeatureDim() const
Definition: am-sgmm2.h:363

kaldi::Sgmm2GauPostElement::tids
std::vector< int32 > tids
Definition: am-sgmm2.h:562

options-itf.h

full-gmm.h

table-types.h

kaldi::VectorBase::CopyFromVec
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
Definition: kaldi-vector.cc:228

kaldi::Sgmm2PerSpkDerivedVars::log_d_jms
std::vector< Vector< BaseFloat > > log_d_jms
< [SSGMM] log of the above (more efficient to store both).
Definition: am-sgmm2.h:191

kaldi::SequentialSgmm2GauPostReader
SequentialTableReader< Sgmm2GauPostHolder > SequentialSgmm2GauPostReader
Definition: am-sgmm2.h:580

kaldi::AmSgmm2::NumSubstatesForPdf
int32 NumSubstatesForPdf(int32 j2) const
Definition: am-sgmm2.h:354

kaldi::AmSgmm2::NumGroups
int32 NumGroups() const
Definition: am-sgmm2.h:351

kaldi::Sgmm2LikelihoodCache::PdfCacheElement::log_like
BaseFloat log_like
Definition: am-sgmm2.h:217

kaldi::AmSgmm2::GetSubstateSpeakerMean
void GetSubstateSpeakerMean(int32 j1, int32 substate, int32 gauss, const Sgmm2PerSpkDerivedVars &spk, VectorBase< Real > *mean_out) const
Definition: am-sgmm2.h:532

kaldi::Sgmm2PerFrameDerivedVars::xti
Matrix< BaseFloat > xti
x_{i}(t) = x&#39;(t) - o_i(s): dim = [I][D], eq.(34)
Definition: am-sgmm2.h:145

kaldi::MatrixBase::Row
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188

kaldi::AmSgmm2::SigmaInv_
std::vector< SpMatrix< BaseFloat > > SigmaInv_
Globally shared parameters of the subspace GMM.
Definition: am-sgmm2.h:423

float

kaldi::Sgmm2LikelihoodCache::SubstateCacheElement::likes
Vector< BaseFloat > likes
Definition: am-sgmm2.h:211

kaldi::Sgmm2LikelihoodCache::SubstateCacheElement::t
int32 t
Definition: am-sgmm2.h:213

kaldi::AmSgmm2::M_prior_
std::vector< Matrix< BaseFloat > > M_prior_
Definition: am-sgmm2.h:449

kaldi::AmSgmm2::full_ubm_
FullGmm full_ubm_
Definition: am-sgmm2.h:414

kaldi::Sgmm2PerSpkDerivedVars::Empty
bool Empty()
Definition: am-sgmm2.h:175

kaldi::Sgmm2PerFrameDerivedVars::nti
Vector< BaseFloat > nti
n_{i}(t), dim = [I], eq.
Definition: am-sgmm2.h:147

kaldi::SequentialTableReader
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287

kaldi::Sgmm2LikelihoodCache::pdf_cache
std::vector< PdfCacheElement > pdf_cache
Definition: am-sgmm2.h:223

kaldi::Sgmm2GauPostElement
This is the entry for a single time.
Definition: am-sgmm2.h:558

kaldi::AmSgmm2::NumPdfs
int32 NumPdfs() const
Various model dimensions.
Definition: am-sgmm2.h:350

kaldi::Sgmm2GselectConfig::full_gmm_nbest
int32 full_gmm_nbest
Number of highest-scoring full-covariance Gaussians per frame.
Definition: am-sgmm2.h:120

kaldi::kNoTrans
Definition: matrix-common.h:34

kaldi::Sgmm2SplitSubstatesConfig::perturb_factor
BaseFloat perturb_factor
Definition: am-sgmm2.h:93

kaldi::Sgmm2PerFrameDerivedVars::Resize
void Resize(int32 ngauss, int32 feat_dim, int32 phn_dim)
Definition: am-sgmm2.h:151

kaldi::Sgmm2LikelihoodCache::PdfCacheElement::PdfCacheElement
PdfCacheElement()
Definition: am-sgmm2.h:216

kaldi::Sgmm2GauPost
indexed by time.
Definition: am-sgmm2.h:568

kaldi::VectorBase::Dim
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64

kaldi::OptionsItf
Definition: options-itf.h:26

kaldi::AmSgmm2::NumGauss
int32 NumGauss() const
Definition: am-sgmm2.h:360

kaldi::Sgmm2GselectConfig
Definition: am-sgmm2.h:118

kaldi::Sgmm2PerFrameDerivedVars::gselect
std::vector< int32 > gselect
Definition: am-sgmm2.h:143

kaldi::AmSgmm2::diag_ubm
const DiagGmm & diag_ubm() const
Definition: am-sgmm2.h:379

kaldi::AmSgmm2::row_cov_inv_
SpMatrix< BaseFloat > row_cov_inv_
Definition: am-sgmm2.h:450

kaldi::Sgmm2GauPost::Sgmm2GauPost
Sgmm2GauPost(size_t i)
Definition: am-sgmm2.h:572

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::Sgmm2GauPostWriter
TableWriter< Sgmm2GauPostHolder > Sgmm2GauPostWriter
Definition: am-sgmm2.h:581

kaldi::Sgmm2GauPostElement::gselect
std::vector< int32 > gselect
Definition: am-sgmm2.h:561

kaldi::Sgmm2GselectConfig::diag_gmm_nbest
int32 diag_gmm_nbest
Number of highest-scoring diagonal-covariance Gaussians per frame.
Definition: am-sgmm2.h:122

kaldi::AmSgmm2::group2pdf_
std::vector< std::vector< int32 > > group2pdf_
Definition: am-sgmm2.h:410

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::MatrixBase::NumRows
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64

kaldi::Sgmm2SplitSubstatesConfig::split_substates
int32 split_substates
Definition: am-sgmm2.h:92

kaldi::Sgmm2PerSpkDerivedVars
Definition: am-sgmm2.h:165

kaldi::AmSgmm2::RemoveSpeakerSpace
void RemoveSpeakerSpace()
Definition: am-sgmm2.h:370

kaldi::DiagGmm
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42

kaldi::Sgmm2LikelihoodCache
Sgmm2LikelihoodCache caches SGMM likelihoods at two levels: the final pdf likelihoods, and the sub-state level likelihoods, which means that with the SCTM system we can avoid redundant computation.
Definition: am-sgmm2.h:199

kaldi::Sgmm2LikelihoodCache::substate_cache
std::vector< SubstateCacheElement > substate_cache
Definition: am-sgmm2.h:222

model-common.h

kaldi::Matrix::Resize
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Definition: kaldi-matrix.cc:819

kaldi::Sgmm2PerSpkDerivedVars::Clear
void Clear()
Definition: am-sgmm2.h:168

kaldi::Sgmm2PerSpkDerivedVars::SetSpeakerVector
void SetSpeakerVector(const Vector< BaseFloat > &v_s_in)
Definition: am-sgmm2.h:180

kaldi::SpMatrix::Resize
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Definition: sp-matrix.h:81

kaldi::AmSgmm2::HasSpeakerDependentWeights
bool HasSpeakerDependentWeights() const
True if doing SSGMM.
Definition: am-sgmm2.h:366

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::Sgmm2SplitSubstatesConfig::Register
void Register(OptionsItf *opts)
Definition: am-sgmm2.h:102

kaldi::MleAmSgmm2Accs
Class for the accumulators associated with the phonetic-subspace model parameters.
Definition: estimate-am-sgmm2.h:119

kaldi::Sgmm2LikelihoodCache::PdfCacheElement
Definition: am-sgmm2.h:215

kaldi::AmSgmm2::SpkSpaceDim
int32 SpkSpaceDim() const
Definition: am-sgmm2.h:362

kaldi::ComputeFeatureNormalizingTransform
void ComputeFeatureNormalizingTransform(const FullGmm &gmm, Matrix< BaseFloat > *xform)
Computes the inverse of an LDA transform (without dimensionality reduction) The computed transform is...
Definition: am-sgmm2.cc:1297

kaldi::VectorBase::AddVec
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Definition: kaldi-vector.cc:1044

kaldi::Sgmm2PerSpkDerivedVars::log_b_is
Vector< BaseFloat > log_b_is
< [SSGMM]: Eq. (22) in techreport, b_i^{(s)} = (^T ^{(s)})
Definition: am-sgmm2.h:190

kaldi-common.h

kaldi::Sgmm2PerFrameDerivedVars
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
Definition: am-sgmm2.h:142

kaldi::AmSgmm2::NumSubstatesForGroup
int32 NumSubstatesForGroup(int32 j1) const
Definition: am-sgmm2.h:357

kaldi::Sgmm2GselectConfig::Register
void Register(OptionsItf *opts)
Definition: am-sgmm2.h:129

kaldi::RandomAccessSgmm2GauPostReader
RandomAccessTableReader< Sgmm2GauPostHolder > RandomAccessSgmm2GauPostReader
Definition: am-sgmm2.h:579

kaldi::Sgmm2SplitSubstatesConfig::min_count
BaseFloat min_count
Definition: am-sgmm2.h:96

kaldi::Sgmm2SplitSubstatesConfig::power
BaseFloat power
Definition: am-sgmm2.h:94

kaldi::MleAmSgmm2Updater
Definition: estimate-am-sgmm2.h:246

kaldi::AmSgmm2::w_jmi_
std::vector< Matrix< BaseFloat > > w_jmi_
[SSGMM] w_{jmi}, dimension is [J1][#mix][I]. Computed from w_ and v_.
Definition: am-sgmm2.h:442

kaldi::Sgmm2SplitSubstatesConfig::Sgmm2SplitSubstatesConfig
Sgmm2SplitSubstatesConfig()
Definition: am-sgmm2.h:97

kaldi::Sgmm2PerSpkDerivedVars::o_s
Matrix< BaseFloat > o_s
Per-speaker offsets o_{i}. Dimension is [I][D].
Definition: am-sgmm2.h:188

kaldi::Sgmm2GselectConfig::Sgmm2GselectConfig
Sgmm2GselectConfig()
Definition: am-sgmm2.h:124