doc/fmllr-sgmm2_8h_source.html

 // sgmm2/fmllr-sgmm2.h

 // Copyright 2009-2012     Saarland University (author: Arnab Ghoshal)
 //                         Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #ifndef KALDI_SGMM2_FMLLR_SGMM2_H_
 #define KALDI_SGMM2_FMLLR_SGMM2_H_

 #include <string>
 #include <vector>

 #include "base/kaldi-common.h"
 #include "sgmm2/am-sgmm2.h"
 #include "transform/transform-common.h"
 #include "util/kaldi-table.h"
 #include "util/kaldi-holder.h"
 #include "itf/options-itf.h"

 namespace kaldi {

 struct Sgmm2FmllrConfig {
   int32 fmllr_iters;
   int32 step_iters;
   BaseFloat fmllr_min_count_basis;
   BaseFloat fmllr_min_count;
   BaseFloat fmllr_min_count_full;
   int32 num_fmllr_bases;
   BaseFloat bases_occ_scale;

   Sgmm2FmllrConfig() {
     fmllr_iters = 5;
     step_iters = 10;
     fmllr_min_count_basis = 100.0;
     fmllr_min_count = 1000.0;
     fmllr_min_count_full = 5000.0;
     num_fmllr_bases = 50;
     bases_occ_scale = 0.2;
   }

   void Register(OptionsItf *opts);
 };

 inline void Sgmm2FmllrConfig::Register(OptionsItf *opts) {
   std::string module = "Sgmm2FmllrConfig: ";
   opts->Register("fmllr-iters", &fmllr_iters, module+
                  "Number of iterations in FMLLR estimation.");
   opts->Register("fmllr-step-iters", &step_iters, module+
                  "Number of iterations to find optimal FMLLR step size.");
   opts->Register("fmllr-min-count-bases", &fmllr_min_count_basis, module+
                  "Minimum occupancy count to estimate FMLLR using basis matrices.");
   opts->Register("fmllr-min-count", &fmllr_min_count, module+
                  "Minimum occupancy count to estimate FMLLR (without bases).");
   opts->Register("fmllr-min-count-full", &fmllr_min_count_full, module+
                  "Minimum occupancy count to stop using basis matrices for FMLLR.");
   opts->Register("fmllr-num-bases", &num_fmllr_bases, module+
                  "Number of FMLLR basis matrices.");
   opts->Register("fmllr-bases-occ-scale", &bases_occ_scale, module+
                  "Scale per-speaker count to determine number of CMLLR bases.");
 }


 class Sgmm2FmllrGlobalParams {
  public:
   void Init(const AmSgmm2 &sgmm, const Vector<BaseFloat> &state_occs);
   void Write(std::ostream &out_stream, bool binary) const;
   void Read(std::istream &in_stream, bool binary);
   bool IsEmpty() const {
     return (pre_xform_.NumRows() == 0 || inv_xform_.NumRows() == 0 ||
             mean_scatter_.Dim() == 0);
   }
   bool HasBasis() const { return fmllr_bases_.size() != 0; }

   Matrix<BaseFloat> pre_xform_;
   Matrix<BaseFloat> inv_xform_;
   Vector<BaseFloat> mean_scatter_;
   std::vector< Matrix<BaseFloat> > fmllr_bases_;
 };

 inline void Sgmm2FmllrGlobalParams::Init(const AmSgmm2 &sgmm,
                                         const Vector<BaseFloat> &state_occs) {
   sgmm.ComputeFmllrPreXform(state_occs, &pre_xform_, &inv_xform_,
                             &mean_scatter_);
 }

 class FmllrSgmm2Accs {
  public:
   FmllrSgmm2Accs() : dim_(-1) {}
   ~FmllrSgmm2Accs() {}

   void Init(int32 dim, int32 num_gaussians);
   void SetZero() { stats_.SetZero(); }

   void Write(std::ostream &out_stream, bool binary) const;
   void Read(std::istream &in_stream, bool binary, bool add);

   BaseFloat Accumulate(const AmSgmm2 &sgmm,
                        const VectorBase<BaseFloat> &data,
                        const Sgmm2PerFrameDerivedVars &frame_vars,
                        int32 state_index,
                        BaseFloat weight,
                        Sgmm2PerSpkDerivedVars *spk);

   void AccumulateFromPosteriors(const AmSgmm2 &sgmm,
                                 const Sgmm2PerSpkDerivedVars &spk,
                                 const VectorBase<BaseFloat> &data,
                                 const std::vector<int32> &gauss_select,
                                 const Matrix<BaseFloat> &posteriors,
                                 int32 state_index);

   void AccumulateForFmllrSubspace(const AmSgmm2 &sgmm,
                                   const Sgmm2FmllrGlobalParams &fmllr_globals,
                                   SpMatrix<double> *grad_scatter);

   BaseFloat FmllrObjGradient(const AmSgmm2 &sgmm,
                              const Matrix<BaseFloat> &xform,
                              Matrix<BaseFloat> *grad_out,
                              Matrix<BaseFloat> *G_out) const;

   bool Update(const AmSgmm2 &model,
               const Sgmm2FmllrGlobalParams &fmllr_globals,
               const Sgmm2FmllrConfig &opts, Matrix<BaseFloat> *out_xform,
               BaseFloat *frame_count, BaseFloat *auxf_improv) const;

   int32 Dim() const { return dim_; }
   const AffineXformStats &stats() const { return stats_; }

  private:
   AffineXformStats stats_;
   int32 dim_;

   // Cannot have copy constructor and assigment operator
   KALDI_DISALLOW_COPY_AND_ASSIGN(FmllrSgmm2Accs);
 };

 void EstimateSgmm2FmllrSubspace(const SpMatrix<double> &fmllr_grad_scatter,
                                int32 num_fmllr_bases, int32 feat_dim,
                                Sgmm2FmllrGlobalParams *fmllr_globals,
                                double min_eig = 0.0);

 }  // namespace kaldi

 #endif  // KALDI_SGMM2_FMLLR_SGMM2_H_
kaldi-table.h

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::AmSgmm2
Class for definition of the subspace Gmm acoustic model.
Definition: am-sgmm2.h:231

kaldi::FmllrSgmm2Accs::SetZero
void SetZero()
Definition: fmllr-sgmm2.h:128

kaldi::SpMatrix< double >

kaldi::FmllrSgmm2Accs
Class for computing the accumulators needed for the maximum-likelihood estimate of FMLLR transforms f...
Definition: fmllr-sgmm2.h:122

kaldi-holder.h

kaldi::Sgmm2FmllrGlobalParams::pre_xform_
Matrix< BaseFloat > pre_xform_
Pre-transform matrix. Dim is [D][D+1].
Definition: fmllr-sgmm2.h:103

kaldi::AffineXformStats
Definition: transform-common.h:30

kaldi::FmllrSgmm2Accs::~FmllrSgmm2Accs
~FmllrSgmm2Accs()
Definition: fmllr-sgmm2.h:125

kaldi::Sgmm2FmllrConfig::fmllr_min_count_full
BaseFloat fmllr_min_count_full
Minimum occupancy count to stop using FMLLR bases and switch to regular FMLLR estimation.
Definition: fmllr-sgmm2.h:49

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::Matrix< BaseFloat >

KALDI_DISALLOW_COPY_AND_ASSIGN
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
Definition: kaldi-utils.h:121

kaldi::Sgmm2FmllrConfig::Register
void Register(OptionsItf *opts)
Definition: fmllr-sgmm2.h:69

kaldi::Sgmm2FmllrConfig::Sgmm2FmllrConfig
Sgmm2FmllrConfig()
Definition: fmllr-sgmm2.h:56

kaldi::EstimateSgmm2FmllrSubspace
void EstimateSgmm2FmllrSubspace(const SpMatrix< double > &fmllr_grad_scatter, int32 num_fmllr_bases, int32 feat_dim, Sgmm2FmllrGlobalParams *globals, double min_eig)
Computes the fMLLR basis matrices given the scatter of the vectorized gradients (eq: B...
Definition: fmllr-sgmm2.cc:506

kaldi::OptionsItf::Register
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0

kaldi::Sgmm2FmllrConfig::bases_occ_scale
BaseFloat bases_occ_scale
Scale per-speaker count to determine number of CMLLR bases.
Definition: fmllr-sgmm2.h:54

kaldi::Sgmm2FmllrGlobalParams::HasBasis
bool HasBasis() const
Definition: fmllr-sgmm2.h:100

kaldi::FmllrSgmm2Accs::Dim
int32 Dim() const
Accessors.
Definition: fmllr-sgmm2.h:171

kaldi::Sgmm2FmllrGlobalParams::IsEmpty
bool IsEmpty() const
Definition: fmllr-sgmm2.h:96

kaldi::Sgmm2FmllrConfig::fmllr_iters
int32 fmllr_iters
Number of iterations in FMLLR estimation.
Definition: fmllr-sgmm2.h:41

options-itf.h

float

kaldi::Sgmm2FmllrGlobalParams::inv_xform_
Matrix< BaseFloat > inv_xform_
Inverse of pre-transform. Dim is [D][D+1].
Definition: fmllr-sgmm2.h:105

am-sgmm2.h

kaldi::OptionsItf
Definition: options-itf.h:26

kaldi::Sgmm2FmllrGlobalParams::Init
void Init(const AmSgmm2 &sgmm, const Vector< BaseFloat > &state_occs)
Definition: fmllr-sgmm2.h:112

kaldi::Sgmm2FmllrConfig
Configuration variables needed in the estimation of FMLLR for SGMMs.
Definition: fmllr-sgmm2.h:40

kaldi::Sgmm2FmllrConfig::step_iters
int32 step_iters
Iterations to find optimal FMLLR step size.
Definition: fmllr-sgmm2.h:42

kaldi::Sgmm2FmllrConfig::num_fmllr_bases
int32 num_fmllr_bases
Number of basis matrices to use for FMLLR estimation.
Definition: fmllr-sgmm2.h:52

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::Sgmm2FmllrConfig::fmllr_min_count_basis
BaseFloat fmllr_min_count_basis
Minimum occupancy count to estimate FMLLR using basis matrices.
Definition: fmllr-sgmm2.h:44

kaldi::Sgmm2PerSpkDerivedVars
Definition: am-sgmm2.h:165

kaldi::FmllrSgmm2Accs::stats
const AffineXformStats & stats() const
Definition: fmllr-sgmm2.h:172

kaldi::FmllrSgmm2Accs::dim_
int32 dim_
Dimension of feature vectors.
Definition: fmllr-sgmm2.h:176

transform-common.h

kaldi::Sgmm2FmllrGlobalParams
Global adaptation parameters.
Definition: fmllr-sgmm2.h:91

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::Sgmm2FmllrGlobalParams::mean_scatter_
Vector< BaseFloat > mean_scatter_
Diagonal of mean-scatter matrix. Dim is [D].
Definition: fmllr-sgmm2.h:107

kaldi::Sgmm2FmllrGlobalParams::fmllr_bases_
std::vector< Matrix< BaseFloat > > fmllr_bases_
{W}_b. [b][d][d], dim is [B][D][D+1].
Definition: fmllr-sgmm2.h:109

kaldi::FmllrSgmm2Accs::FmllrSgmm2Accs
FmllrSgmm2Accs()
Definition: fmllr-sgmm2.h:124

kaldi-common.h

kaldi::Sgmm2PerFrameDerivedVars
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
Definition: am-sgmm2.h:142

kaldi::FmllrSgmm2Accs::stats_
AffineXformStats stats_
Accumulated stats.
Definition: fmllr-sgmm2.h:175

kaldi::Sgmm2FmllrConfig::fmllr_min_count
BaseFloat fmllr_min_count
Minimum occupancy count to estimate FMLLR without basis matrices.
Definition: fmllr-sgmm2.h:46

kaldi::AmSgmm2::ComputeFmllrPreXform
void ComputeFmllrPreXform(const Vector< BaseFloat > &pdf_occs, Matrix< BaseFloat > *xform, Matrix< BaseFloat > *inv_xform, Vector< BaseFloat > *diag_mean_scatter) const
Computes the LDA-like pre-transform and its inverse as well as the eigenvalues of the scatter of the ...
Definition: am-sgmm2.cc:965