doc/fmllr-diag-gmm_8h_source.html

 // transform/fmllr-diag-gmm.h

 // Copyright 2009-2011  Microsoft Corporation;  Saarland University
 //                2013  Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #ifndef KALDI_TRANSFORM_FMLLR_DIAG_GMM_H_
 #define KALDI_TRANSFORM_FMLLR_DIAG_GMM_H_

 #include <vector>

 #include "base/kaldi-common.h"
 #include "gmm/am-diag-gmm.h"
 #include "gmm/mle-full-gmm.h"
 #include "transform/transform-common.h"
 #include "util/kaldi-table.h"
 #include "util/kaldi-holder.h"

 namespace kaldi {

 /* This header contains routines for performing global CMLLR,
    without a regression tree (however, you can down-weight silence
    in training using the program weight-silence-post on the
    state-level posteriors).  For regression-tree CMLLR, see
    fmllr-diag-gmm.h
 */

 struct FmllrOptions {
   std::string update_type;
   BaseFloat min_count;
   int32 num_iters;
   FmllrOptions(): update_type("full"), min_count(500.0), num_iters(40) { }
   void Register(OptionsItf *opts) {
     opts->Register("fmllr-update-type", &update_type,
                    "Update type for fMLLR (\"full\"|\"diag\"|\"offset\"|\"none\")");
     opts->Register("fmllr-min-count", &min_count,
                    "Minimum count required to update fMLLR");
     opts->Register("fmllr-num-iters", &num_iters,
                    "Number of iterations in fMLLR update phase.");
   }
 };


 class FmllrDiagGmmAccs: public AffineXformStats {
  public:
   // If supplied, the "opts" will only be used to limit the
   // stats that are accumulated, to the parts we'll need in the
   // update.
   FmllrDiagGmmAccs(const FmllrOptions &opts = FmllrOptions()):
       opts_(opts) { }
   explicit FmllrDiagGmmAccs(const FmllrDiagGmmAccs &other):
       AffineXformStats(other), single_frame_stats_(other.single_frame_stats_),
       opts_(other.opts_) {}
   explicit FmllrDiagGmmAccs(int32 dim, const FmllrOptions &opts = FmllrOptions()):
       opts_(opts) { Init(dim); }

   // The following initializer gives us an efficient way to
   // compute these stats from full-cov Gaussian statistics
   // (accumulated from a *diagonal* model (e.g. use
   // AccumFullGmm::AccumulateFromPosteriors or
   // AccumulateFromDiag).
   FmllrDiagGmmAccs(const DiagGmm &gmm, const AccumFullGmm &fgmm_accs);

   void Init(size_t dim) {
     AffineXformStats::Init(dim, dim); single_frame_stats_.Init(dim);
   }
   void Read(std::istream &in, bool binary, bool add) {
       AffineXformStats::Read(in, binary, add);
       single_frame_stats_.Init(Dim());
   }
   BaseFloat AccumulateForGmm(const DiagGmm &gmm,
                              const VectorBase<BaseFloat> &data,
                              BaseFloat weight);

   BaseFloat AccumulateForGmmPreselect(const DiagGmm &gmm,
                                       const std::vector<int32> &gselect,
                                       const VectorBase<BaseFloat> &data,
                                       BaseFloat weight);

   void AccumulateFromPosteriors(const DiagGmm &gmm,
                                 const VectorBase<BaseFloat> &data,
                                 const VectorBase<BaseFloat> &posteriors);

   void AccumulateFromPosteriorsPreselect(
       const DiagGmm &gmm,
       const std::vector<int32> &gselect,
       const VectorBase<BaseFloat> &data,
       const VectorBase<BaseFloat> &posteriors);


   void Update(const FmllrOptions &opts,
               MatrixBase<BaseFloat> *fmllr_mat,
               BaseFloat *objf_impr,
               BaseFloat *count);

   // Note: we allow copy and assignment for this class.

   // Note: you can use the inherited AffineXformStats::Read
   //       and AffineXformStats::Write methods for writing/reading
   //       of the object. It is not necessary to store the other
   //       private variables of this class

  private:
   // The things below, added in 2013, relate to an optimization that lets us
   // speed up accumulation if there are multiple active pdfs per frame
   // (e.g. when accumulating from lattices), or if we don't anticipate
   // doing a "full" update.

   struct SingleFrameStats {
     Vector<BaseFloat> x; // dim-dimensional features.
     Vector<BaseFloat> a; // linear term in per-frame auxf; dim is model-dim.
     Vector<BaseFloat> b; // quadratic term in per-frame auxf; dim is model-dim.
     double count;
     SingleFrameStats(int32 dim = 0) { Init(dim); }
     SingleFrameStats(const SingleFrameStats &s): x(s.x), a(s.a), b(s.b),
                                                  count(s.count) {}
     void Init(int32 dim);
   };

   void CommitSingleFrameStats();

   void InitSingleFrameStats(const VectorBase<BaseFloat> &data);

   bool DataHasChanged(const VectorBase<BaseFloat> &data) const; // compares it to the
   // data in single_frame_stats_, returns true if it's different.

   SingleFrameStats single_frame_stats_;

   // We only use the opts_ variable for its "update_type" data member,
   // which limits what parts of the G matrix we accumulate.
   FmllrOptions opts_;

 };


 // Initializes the FMLLR matrix to its default values.
 inline void InitFmllr(int32 dim,
                       Matrix<BaseFloat> *out_fmllr) {
   out_fmllr->Resize(dim, dim+1);
   out_fmllr->SetUnit();  // sets diagonal elements to one.
 }

 // ComputeFmllr optimizes the FMLLR matrix, controlled by the options.
 // It starts the optimization from the current value of the matrix (e.g. use
 // InitFmllr to get this).
 // Returns auxf improvement.
 BaseFloat ComputeFmllrDiagGmm(const FmllrDiagGmmAccs &accs,
                               const FmllrOptions &opts,
                               Matrix<BaseFloat> *out_fmllr,
                               BaseFloat *logdet);  // add this to likelihoods

 inline BaseFloat ComputeFmllrLogDet(const Matrix<BaseFloat> &fmllr_mat) {
   KALDI_ASSERT(fmllr_mat.NumRows() != 0 && fmllr_mat.NumCols() == fmllr_mat.NumRows()+1);
   SubMatrix<BaseFloat> tmp(fmllr_mat,
                            0, fmllr_mat.NumRows(),
                            0, fmllr_mat.NumRows());
   return tmp.LogDet();
 }


 BaseFloat ComputeFmllrMatrixDiagGmmFull(const MatrixBase<BaseFloat> &in_xform,
                                         const AffineXformStats &stats,
                                         int32 num_iters,
                                         MatrixBase<BaseFloat> *out_xform);

 BaseFloat ComputeFmllrMatrixDiagGmmDiagonal(const MatrixBase<BaseFloat> &in_xform,
                                             const AffineXformStats &stats,
                                             MatrixBase<BaseFloat> *out_xform);
 // Simpler implementation I am testing.
 BaseFloat ComputeFmllrMatrixDiagGmmDiagonal2(const MatrixBase<BaseFloat> &in_xform,
                                              const AffineXformStats &stats,
                                              MatrixBase<BaseFloat> *out_xform);

 BaseFloat ComputeFmllrMatrixDiagGmmOffset(const MatrixBase<BaseFloat> &in_xform,
                                           const AffineXformStats &stats,
                                           MatrixBase<BaseFloat> *out_xform);


 BaseFloat ComputeFmllrMatrixDiagGmm(const MatrixBase<BaseFloat> &in_xform,
                                     const AffineXformStats &stats,
                                     std::string fmllr_type,  // "none", "offset", "diag", "full"
                                     int32 num_iters,
                                     MatrixBase<BaseFloat> *out_xform);

 float FmllrAuxFuncDiagGmm(const MatrixBase<float> &xform,
                           const AffineXformStats &stats);
 double FmllrAuxFuncDiagGmm(const MatrixBase<double> &xform,
                            const AffineXformStats &stats);


 BaseFloat FmllrAuxfGradient(const MatrixBase<BaseFloat> &xform,
                             const AffineXformStats &stats,
                             MatrixBase<BaseFloat> *grad_out);


 void ApplyFeatureTransformToStats(const MatrixBase<BaseFloat> &xform,
                                   AffineXformStats *stats);

 void ApplyModelTransformToStats(const MatrixBase<BaseFloat> &xform,
                                 AffineXformStats *stats);


 void FmllrInnerUpdate(SpMatrix<double> &inv_G,
                       VectorBase<double> &k,
                       double beta,
                       int32 row,
                       MatrixBase<double> *transform);


 } // namespace kaldi

 #endif  // KALDI_TRANSFORM_FMLLR_DIAG_GMM_H_
kaldi::FmllrDiagGmmAccs::SingleFrameStats::x
Vector< BaseFloat > x
Definition: fmllr-diag-gmm.h:134

am-diag-gmm.h

kaldi-table.h

kaldi::ApplyModelTransformToStats
void ApplyModelTransformToStats(const MatrixBase< BaseFloat > &xform, AffineXformStats *stats)
ApplyModelTransformToStats takes a transform "xform", which must be diagonal (i.e.
Definition: fmllr-diag-gmm.cc:421

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::FmllrDiagGmmAccs::SingleFrameStats
Definition: fmllr-diag-gmm.h:133

kaldi::SpMatrix< double >

kaldi::FmllrDiagGmmAccs::FmllrDiagGmmAccs
FmllrDiagGmmAccs(int32 dim, const FmllrOptions &opts=FmllrOptions())
Definition: fmllr-diag-gmm.h:71

kaldi::ComputeFmllrMatrixDiagGmmFull
BaseFloat ComputeFmllrMatrixDiagGmmFull(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, int32 num_iters, MatrixBase< BaseFloat > *out_xform)
Updates the FMLLR matrix using Mark Gales&#39; row-by-row update.
Definition: fmllr-diag-gmm.cc:236

kaldi-holder.h

kaldi::AffineXformStats
Definition: transform-common.h:30

kaldi::MatrixBase::NumCols
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67

kaldi::MatrixBase
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49

kaldi::InitFmllr
void InitFmllr(int32 dim, Matrix< BaseFloat > *out_fmllr)
Definition: fmllr-diag-gmm.h:161

kaldi::FmllrDiagGmmAccs
This does not work with multiple feature transforms.
Definition: fmllr-diag-gmm.h:61

kaldi::FmllrDiagGmmAccs::single_frame_stats_
SingleFrameStats single_frame_stats_
Definition: fmllr-diag-gmm.h:151

kaldi::FmllrOptions::update_type
std::string update_type
"full", "diag", "offset", "none"
Definition: fmllr-diag-gmm.h:44

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::Matrix< BaseFloat >

kaldi::MatrixBase::SetUnit
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
Definition: kaldi-matrix.cc:1348

kaldi::FmllrAuxfGradient
BaseFloat FmllrAuxfGradient(const MatrixBase< BaseFloat > &xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *grad_out)
Returns the (diagonal-GMM) FMLLR auxiliary function value given the transform and the stats...
Definition: fmllr-diag-gmm.cc:510

kaldi::OptionsItf::Register
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0

kaldi::ApplyFeatureTransformToStats
void ApplyFeatureTransformToStats(const MatrixBase< BaseFloat > &xform, AffineXformStats *stats)
This function applies a feature-level transform to stats (useful for certain techniques based on fMLL...
Definition: fmllr-diag-gmm.cc:381

mle-full-gmm.h

kaldi::FmllrDiagGmmAccs::SingleFrameStats::SingleFrameStats
SingleFrameStats(int32 dim=0)
Definition: fmllr-diag-gmm.h:138

kaldi::ComputeFmllrMatrixDiagGmmOffset
BaseFloat ComputeFmllrMatrixDiagGmmOffset(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *out_xform)
This does offset-only fMLLR, i.e. it only estimates an offset.
Definition: fmllr-diag-gmm.cc:350

count
const size_t count
Definition: arpa-file-parser-test.cc:66

kaldi::FmllrDiagGmmAccs::opts_
FmllrOptions opts_
Definition: fmllr-diag-gmm.h:155

float

kaldi::FmllrDiagGmmAccs::FmllrDiagGmmAccs
FmllrDiagGmmAccs(const FmllrOptions &opts=FmllrOptions())
Definition: fmllr-diag-gmm.h:66

kaldi::FmllrOptions::num_iters
int32 num_iters
Definition: fmllr-diag-gmm.h:46

kaldi::AccumFullGmm
Class for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
Definition: mle-full-gmm.h:74

kaldi::FmllrDiagGmmAccs::SingleFrameStats::SingleFrameStats
SingleFrameStats(const SingleFrameStats &s)
Definition: fmllr-diag-gmm.h:139

kaldi::OptionsItf
Definition: options-itf.h:26

kaldi::FmllrOptions::Register
void Register(OptionsItf *opts)
Definition: fmllr-diag-gmm.h:48

kaldi::ComputeFmllrMatrixDiagGmmDiagonal
BaseFloat ComputeFmllrMatrixDiagGmmDiagonal(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *out_xform)
This does diagonal fMLLR (i.e.
Definition: fmllr-diag-gmm.cc:275

kaldi::AffineXformStats::Read
void Read(std::istream &in, bool binary, bool add)
Definition: transform-common.cc:69

kaldi::ComputeFmllrMatrixDiagGmm
BaseFloat ComputeFmllrMatrixDiagGmm(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, std::string fmllr_type, int32 num_iters, MatrixBase< BaseFloat > *out_xform)
This function internally calls ComputeFmllrMatrixDiagGmm{Full, Diagonal, Offset}, depending on "fmllr...
Definition: fmllr-diag-gmm.cc:169

kaldi::FmllrOptions
Definition: fmllr-diag-gmm.h:43

kaldi::AffineXformStats::Init
void Init(int32 dim, int32 num_gs)
Definition: transform-common.cc:28

kaldi::FmllrInnerUpdate
void FmllrInnerUpdate(SpMatrix< double > &inv_G, VectorBase< double > &k, double beta, int32 row, MatrixBase< double > *transform)
This function does one row of the inner-loop fMLLR transform update.
Definition: fmllr-diag-gmm.cc:193

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::ComputeFmllrDiagGmm
BaseFloat ComputeFmllrDiagGmm(const FmllrDiagGmmAccs &accs, const FmllrOptions &opts, Matrix< BaseFloat > *out_fmllr, BaseFloat *logdet)

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::MatrixBase::NumRows
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64

kaldi::FmllrAuxFuncDiagGmm
float FmllrAuxFuncDiagGmm(const MatrixBase< float > &xform, const AffineXformStats &stats)
Returns the (diagonal-GMM) FMLLR auxiliary function value given the transform and the stats...
Definition: fmllr-diag-gmm.cc:481

kaldi::MatrixBase::LogDet
Real LogDet(Real *det_sign=NULL) const
Returns logdet of matrix.
Definition: kaldi-matrix.cc:2038

kaldi::ComputeFmllrMatrixDiagGmmDiagonal2
BaseFloat ComputeFmllrMatrixDiagGmmDiagonal2(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *out_xform)

kaldi::DiagGmm
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42

kaldi::FmllrDiagGmmAccs::Init
void Init(size_t dim)
Definition: fmllr-diag-gmm.h:81

kaldi::ComputeFmllrLogDet
BaseFloat ComputeFmllrLogDet(const Matrix< BaseFloat > &fmllr_mat)
Definition: fmllr-diag-gmm.h:176

kaldi::Matrix::Resize
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Definition: kaldi-matrix.cc:819

kaldi::FmllrDiagGmmAccs::SingleFrameStats::b
Vector< BaseFloat > b
Definition: fmllr-diag-gmm.h:136

transform-common.h

kaldi::FmllrOptions::FmllrOptions
FmllrOptions()
Definition: fmllr-diag-gmm.h:47

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::FmllrDiagGmmAccs::SingleFrameStats::count
double count
Definition: fmllr-diag-gmm.h:137

kaldi::FmllrDiagGmmAccs::SingleFrameStats::a
Vector< BaseFloat > a
Definition: fmllr-diag-gmm.h:135

kaldi::FmllrDiagGmmAccs::FmllrDiagGmmAccs
FmllrDiagGmmAccs(const FmllrDiagGmmAccs &other)
Definition: fmllr-diag-gmm.h:68

kaldi-common.h

kaldi::SubMatrix
Sub-matrix representation.
Definition: kaldi-matrix.h:988

kaldi::FmllrDiagGmmAccs::Read
void Read(std::istream &in, bool binary, bool add)
Definition: fmllr-diag-gmm.h:84

kaldi::FmllrOptions::min_count
BaseFloat min_count
Definition: fmllr-diag-gmm.h:45