doc/fmllr-sgmm2_8cc_source.html

 // sgmm2/fmllr-sgmm2.cc

 // Copyright 2009-2012   Saarland University (author: Arnab Ghoshal)
 //                       Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include <algorithm>
 #include <string>
 #include <vector>
 using std::vector;

 #include "sgmm2/fmllr-sgmm2.h"
 #include "util/parse-options.h"

 namespace kaldi {

 static void ApplyPreXformToGradient(const Sgmm2FmllrGlobalParams &globals,
                                     const Matrix<BaseFloat> &gradient_in,
                                     Matrix<BaseFloat> *gradient_out) {
   // Eq. (B.14): P' = A_{inv}^T P {W_{pre}^+}^T
   int32 dim = gradient_in.NumRows();
   Matrix<BaseFloat> Wpre_plus(dim + 1, dim + 1, kSetZero);
   Wpre_plus.Range(0, dim, 0, dim + 1).CopyFromMat(globals.pre_xform_);
   Wpre_plus(dim, dim) = 1;
   SubMatrix<BaseFloat> Ainv(globals.inv_xform_, 0, dim, 0, dim);
   Matrix<BaseFloat> AinvP(dim, dim + 1, kUndefined);
   AinvP.AddMatMat(1.0, Ainv, kTrans, gradient_in, kNoTrans, 0.0);
   gradient_out->AddMatMat(1.0, AinvP, kNoTrans, Wpre_plus, kTrans, 0.0);
 }

 static void ApplyInvPreXformToChange(const Sgmm2FmllrGlobalParams &globals,
                                      const Matrix<BaseFloat> &delta_in,
                                      Matrix<BaseFloat> *delta_out) {
   // Eq. (B.25): \Delta = A_{inv} \Delta' W_{pre}^+
   int32 dim = delta_in.NumRows();
   Matrix<BaseFloat> Wpre_plus(dim + 1, dim + 1, kSetZero);
   Wpre_plus.Range(0, dim, 0, dim + 1).CopyFromMat(globals.pre_xform_);
   Wpre_plus(dim, dim) = 1;
   SubMatrix<BaseFloat> Ainv(globals.inv_xform_, 0, dim, 0, dim);
   Matrix<BaseFloat> AinvD(dim, dim + 1, kUndefined);
   AinvD.AddMatMat(1.0, Ainv, kNoTrans, delta_in, kNoTrans, 0.0);
   delta_out->AddMatMat(1.0, AinvD, kNoTrans, Wpre_plus, kNoTrans, 0.0);
 }

 static void ApplyHessianXformToGradient(const Sgmm2FmllrGlobalParams &globals,
                                         const Matrix<BaseFloat> &gradient_in,
                                         Matrix<BaseFloat> *gradient_out) {
   int32 dim = gradient_in.NumRows();
   const Vector<BaseFloat> &D = globals.mean_scatter_;
   if (D.Min() <= 0.0)
     KALDI_ERR << "Cannot estimate FMLLR: mean scatter has 0 eigenvalues.";
   for (int32 r = 0; r < dim; r++) {
     for (int32 c = 0; c < r; c++) {
       // Eq. (B.15)
       (*gradient_out)(r, c) = gradient_in(r, c) / std::sqrt(1 + D(c));
       // Eq. (B.16)
       (*gradient_out)(c, r) = gradient_in(c, r) / std::sqrt(1 + D(r) -
           1 / (1 + D(c))) - gradient_in(r, c) / ((1 + D(c)) *
               std::sqrt(1 + D(r) - 1 / (1 + D(c))));
     }
     // Eq. (B.17) & (B.18)
     (*gradient_out)(r, r) = gradient_in(r, r) / std::sqrt(2 + D(r));
     (*gradient_out)(r, dim) = gradient_in(r, dim);
   }
 }

 static void ApplyInvHessianXformToChange(const Sgmm2FmllrGlobalParams &globals,
                                          const Matrix<BaseFloat> &delta_in,
                                          Matrix<BaseFloat> *delta_out) {
   int32 dim = delta_in.NumRows();
   const Vector<BaseFloat> &D = globals.mean_scatter_;
   if (D.Min() <= 0.0)
     KALDI_ERR << "Cannot estimate FMLLR: mean scatter has 0 eigenvalues.";
   for (int32 r = 0; r < dim; r++) {
     for (int32 c = 0; c < r; c++) {
       // Eq. (B.21)
       (*delta_out)(r, c) = delta_in(r, c) / std::sqrt(1 + D(c)) -
           delta_in(c, r) / ((1 + D(c)) * std::sqrt(1 + D(r) - 1 / (1 + D(c))));
       // Eq. (B.22)
       (*delta_out)(c, r) = delta_in(c, r) / std::sqrt(1 + D(r) - 1/ (1 + D(c)));
     }
     // Eq. (B.23) & (B.24)
     (*delta_out)(r, r) = delta_in(r, r) / std::sqrt(2 + D(r));
     (*delta_out)(r, dim) = delta_in(r, dim);
   }
 }


 void Sgmm2FmllrGlobalParams::Write(std::ostream &out, bool binary) const {
   WriteToken(out, binary, "<SGMM_FMLLR_GLOBAL_PARAMS>");
   WriteToken(out, binary, "<PRE_XFORM>");
   pre_xform_.Write(out, binary);
   WriteToken(out, binary, "<INV_XFORM>");
   inv_xform_.Write(out, binary);
   WriteToken(out, binary, "<MEAN_SCATTER>");
   mean_scatter_.Write(out, binary);
   if (fmllr_bases_.size() != 0) {
     WriteToken(out, binary, "<FMLLR_BASIS>");
     uint32 tmp = static_cast<uint32>(fmllr_bases_.size());
     WriteBasicType(out, binary, tmp);
     for (uint32 i = 0; i < tmp; i++) {
       fmllr_bases_[i].Write(out, binary);
     }
   }
   WriteToken(out, binary, "</SGMM_FMLLR_GLOBAL_PARAMS>");
 }

 void Sgmm2FmllrGlobalParams::Read(std::istream &in, bool binary) {
   ExpectToken(in, binary, "<SGMM_FMLLR_GLOBAL_PARAMS>");
   ExpectToken(in, binary, "<PRE_XFORM>");
   pre_xform_.Read(in, binary);
   ExpectToken(in, binary, "<INV_XFORM>");
   inv_xform_.Read(in, binary);
   ExpectToken(in, binary, "<MEAN_SCATTER>");
   mean_scatter_.Read(in, binary);
   std::string token;
   ReadToken(in, binary, &token);
   if (token == "<FMLLR_BASIS>") {
     uint32 tmp;
     ReadBasicType(in, binary, &tmp);
     fmllr_bases_.resize(tmp);
     for (uint32 i = 0; i < tmp; i++) {
       fmllr_bases_[i].Read(in, binary);
     }
   } else {
     if (token != "</SGMM_FMLLR_GLOBAL_PARAMS>")
       KALDI_ERR << "Unexpected token '" << token << "' found.";
   }
 }


 void FmllrSgmm2Accs::Init(int32 dim, int32 num_gaussians) {
   if (dim == 0) {  // empty stats
     dim_ = 0;  // non-zero dimension is meaningless in empty stats
     stats_.Init(0, 0);  // clear the stats
   } else {
     dim_ = dim;
     stats_.Init(dim, num_gaussians);
   }
 }

 BaseFloat FmllrSgmm2Accs::Accumulate(const AmSgmm2 &model,
                                      const VectorBase<BaseFloat> &data,
                                      const Sgmm2PerFrameDerivedVars &frame_vars,
                                      int32 pdf_index, BaseFloat weight,
                                      Sgmm2PerSpkDerivedVars *spk) {
   // Calulate Gaussian posteriors and collect statistics
   Matrix<BaseFloat> posteriors;
   BaseFloat log_like = model.ComponentPosteriors(frame_vars, pdf_index,
                                                  spk, &posteriors);
   posteriors.Scale(weight);
   AccumulateFromPosteriors(model, *spk, data, frame_vars.gselect, posteriors,
                            pdf_index);
   return log_like;
 }

 void FmllrSgmm2Accs::AccumulateFromPosteriors(
     const AmSgmm2 &model,
     const Sgmm2PerSpkDerivedVars &spk,
     const VectorBase<BaseFloat> &data,
     const vector<int32> &gselect,
     const Matrix<BaseFloat> &posteriors,
     int32 j2) {
   Vector<double> var_scaled_mean(dim_), extended_data(dim_+1);
   extended_data.Range(0, dim_).CopyFromVec(data);
   extended_data(dim_) = 1.0;
   SpMatrix<double> scatter(dim_+1, kSetZero);
   scatter.AddVec2(1.0, extended_data);
   int32 j1 = model.Pdf2Group(j2);
   for (int32 ki = 0, ki_max = gselect.size(); ki < ki_max; ki++) {
     int32 i = gselect[ki];

     for (int32 m = 0; m < model.NumSubstatesForGroup(j1); m++) {
       // posterior gamma_{jkmi}(t)                             eq.(39)
       BaseFloat gammat_jmi = posteriors(ki, m);

       // Accumulate statistics for non-zero gaussian posterior
       if (gammat_jmi > 0.0) {
         stats_.beta_ += gammat_jmi;
         model.GetVarScaledSubstateSpeakerMean(j1, m, i, spk,
                                               &var_scaled_mean);
         // Eq. (52): K += \gamma_{jmi} \Sigma_{i}^{-1} \mu_{jmi}^{(s)} x^{+T}
         stats_.K_.AddVecVec(gammat_jmi, var_scaled_mean, extended_data);
         // Eq. (53): G_{i} += \gamma_{jmi} x^{+} x^{+T}
         stats_.G_[i].AddSp(gammat_jmi, scatter);
       }  // non-zero posteriors
     }  // loop over substates
   }  // loop over selected Gaussians
 }

 void FmllrSgmm2Accs::AccumulateForFmllrSubspace(const AmSgmm2 &sgmm,
     const Sgmm2FmllrGlobalParams &globals, SpMatrix<double> *grad_scatter) {
   if (stats_.beta_ <= 0.0) {
     KALDI_WARN << "Not committing any stats since no stats accumulated.";
     return;
   }
   int32 dim = sgmm.FeatureDim();
   Matrix<BaseFloat> xform(dim, dim + 1, kUndefined);
   xform.SetUnit();
   Matrix<BaseFloat> grad(dim, dim + 1, kSetZero);
   this->FmllrObjGradient(sgmm, xform, &grad, NULL);
   Matrix<BaseFloat> pre_xformed_grad(dim, dim + 1, kSetZero);
   ApplyPreXformToGradient(globals, grad, &pre_xformed_grad);
   Matrix<BaseFloat> hess_xformed_grad(dim, dim + 1, kSetZero);
   ApplyHessianXformToGradient(globals, pre_xformed_grad, &hess_xformed_grad);
   Vector<double> grad_vec(dim * (dim + 1));
   grad_vec.CopyRowsFromMat(hess_xformed_grad);
   grad_vec.Scale(1 / std::sqrt(stats_.beta_));
   grad_scatter->AddVec2(1.0, grad_vec);
   KALDI_LOG << "Frame counts for when committing fMLLR subspace stats are "
             << stats_.beta_;
 }


 BaseFloat FmllrSgmm2Accs::FmllrObjGradient(const AmSgmm2 &sgmm,
                                           const Matrix<BaseFloat> &xform,
                                           Matrix<BaseFloat> *grad_out,
                                           Matrix<BaseFloat> *G_out) const {
   int32 dim = sgmm.FeatureDim(),
       num_gauss = sgmm.NumGauss();
   KALDI_ASSERT(stats_.G_.size() == static_cast<size_t>(num_gauss));
   Matrix<double> xform_d(xform);
   SubMatrix<double> A(xform_d, 0, dim, 0, dim);
   Matrix<double> xform_g(dim, dim + 1), total_g(dim, dim + 1);
   SpMatrix<double> inv_covar(dim);
   double obj = stats_.beta_ * A.LogDet() +
       TraceMatMat(xform_d, stats_.K_, kTrans);
   for (int32 i = 0; i < num_gauss; i++) {
     sgmm.GetInvCovars(i, &inv_covar);
     xform_g.AddMatSp(1.0, xform_d, kNoTrans, stats_.G_[i], 0.0);
     total_g.AddSpMat(1.0, inv_covar, xform_g, kNoTrans, 1.0);
   }
   obj -= 0.5 * TraceMatMat(xform_d, total_g, kTrans);
   if (G_out != NULL) G_out->CopyFromMat(total_g);

   // Compute the gradient: P = \beta [(A^{-1})^{T} , 0] + K - S
   if (grad_out != NULL) {
     Matrix<double> grad_d(dim, dim + 1, kSetZero);
     grad_d.Range(0, dim, 0, dim).CopyFromMat(A);
     grad_d.Range(0, dim, 0, dim).InvertDouble();
     grad_d.Range(0, dim, 0, dim).Transpose();
     grad_d.Scale(stats_.beta_);
     grad_d.AddMat(-1.0, total_g, kNoTrans);
     grad_d.AddMat(1.0, stats_.K_, kNoTrans);
     grad_out->CopyFromMat(grad_d);
   }

   return obj;
 }


 void FmllrSgmm2Accs::Write(std::ostream &out, bool binary) const {
   WriteToken(out, binary, "<FMLLRACCS>");
   WriteToken(out, binary, "<DIMENSION>");
   WriteBasicType(out, binary, dim_);
   WriteToken(out, binary, "<STATS>");
   stats_.Write(out, binary);
   WriteToken(out, binary, "</FMLLRACCS>");
 }

 void FmllrSgmm2Accs::Read(std::istream &in, bool binary, bool add) {
   ExpectToken(in, binary, "<FMLLRACCS>");
   ExpectToken(in, binary, "<DIMENSION>");
   ReadBasicType(in, binary, &dim_);
   KALDI_ASSERT(dim_ > 0);
   ExpectToken(in, binary, "<STATS>");
   stats_.Read(in, binary, add);
   ExpectToken(in, binary, "</FMLLRACCS>");
 }


 static BaseFloat CalcFmllrStepSize(const AffineXformStats &stats,
                                    const AmSgmm2 &sgmm,
                                    const MatrixBase<BaseFloat> &Delta,
                                    const MatrixBase<BaseFloat> &A,
                                    const Matrix<BaseFloat> &G,
                                    int32 max_iters) {
   int32 dim = sgmm.FeatureDim();
   Matrix<double> Delta_d(Delta);
   Matrix<double> G_d(G);
   SubMatrix<double> Delta_C(Delta_d, 0, dim, 0, dim);

   // Eq. (B.28): m = tr(\Delta K^T) - tr(\Delta S^T)
   BaseFloat m = TraceMatMat(Delta_d, stats.K_, kTrans)
                     - TraceMatMat(Delta_d, G_d, kTrans);
   // Eq. (B.29): n = \sum_i tr(\Delta \Sigma_{i}^{-1} \Delta S_{i})
   BaseFloat n = 0;
   SpMatrix<double> inv_covar;
   for (int32 i = 0, num_gauss = sgmm.NumGauss(); i < num_gauss; i++) {
     sgmm.GetInvCovars(i, &inv_covar);
     n += TraceMatSpMatSp(Delta_d, kTrans, inv_covar, Delta_d, kNoTrans,
                          stats.G_[i]);
   }

   BaseFloat step_size = 0.0;
   // initialize just to get rid of compile errors.
   BaseFloat obj_step_old, obj_step_new = 0.0;
   Matrix<double> new_A(dim, dim);
   Matrix<double> B(dim, dim);
   for (int32 iter_step = 0; iter_step < max_iters; iter_step++) {
     if (iter_step == 0) {
       obj_step_old = stats.beta_ * A.LogDet();  // Q_0 = \beta * log det(A)
     } else {
       obj_step_old = obj_step_new;
     }

     // Eq. (B.30); B = (A + k\Delta^{-C})^{-1} \Delta^{-C}
     new_A.CopyFromMat(A);
     new_A.AddMat(step_size, Delta_C, kNoTrans);
     new_A.InvertDouble();
     B.AddMatMat(1.0, new_A, kNoTrans, Delta_C, kNoTrans, 0.0);

     BaseFloat d = m - step_size * n + stats.beta_ * TraceMat(B);
     BaseFloat d2 = -n - stats.beta_ * TraceMatMat(B, B, kNoTrans);
     if (std::fabs(d / d2) < 0.000001) { break; }  // converged

     BaseFloat step_size_change = -(d / d2);
     step_size += step_size_change;  // Eq. (B.33)

     // Halve step size when the auxiliary function decreases.
     do {
       new_A.CopyFromMat(A);
       new_A.AddMat(step_size, Delta_C, kNoTrans);
       BaseFloat logdet = new_A.LogDet();
       obj_step_new = stats.beta_ * logdet + step_size * m -
           0.5 * step_size * step_size * n;

       if (obj_step_new - obj_step_old < -0.001) {
         KALDI_WARN << "Objective function decreased (" << obj_step_old << "->"
                    << obj_step_new << "). Halving step size change ("
                    << step_size << " -> " << (step_size - (step_size_change/2))
                    << ")";
         step_size_change /= 2;
         step_size -= step_size_change;  // take away half of our step
       }  // Facing numeric precision issues. Compute in double?
     } while (obj_step_new - obj_step_old < -0.001 && step_size_change > 1e-05);
   }
   return step_size;
 }


 bool FmllrSgmm2Accs::Update(const AmSgmm2 &sgmm,
                            const Sgmm2FmllrGlobalParams &globals,
                            const Sgmm2FmllrConfig &opts,
                            Matrix<BaseFloat> *out_xform,
                            BaseFloat *frame_count, BaseFloat *auxf_out) const {
   BaseFloat auxf_improv = 0.0, logdet = 0.0;
   KALDI_ASSERT(out_xform->NumRows() == dim_ && out_xform->NumCols() == dim_+1);
   BaseFloat mincount = (globals.HasBasis() ?
       std::min(opts.fmllr_min_count_basis, opts.fmllr_min_count_full) :
       opts.fmllr_min_count);
   bool using_subspace = (globals.HasBasis() ?
       (stats_.beta_ < opts.fmllr_min_count_full) : false);

   if (globals.IsEmpty())
     KALDI_ERR << "Must set up pre-transforms before estimating FMLLR.";

   KALDI_VLOG(1) << "Mincount = " << mincount << "; Basis: "
                 << std::string(globals.HasBasis()? "yes; " : "no; ")
                 << "Using subspace: " << std::string(using_subspace? "yes; "
                     : "no; ");

   int32 num_bases = 0;
   if (using_subspace) {
     KALDI_ASSERT(globals.fmllr_bases_.size() != 0);
     int32 max_bases = std::min(static_cast<int32>(globals.fmllr_bases_.size()),
                                opts.num_fmllr_bases);
     num_bases = (opts.bases_occ_scale <= 0.0)? max_bases :
         std::min(max_bases, static_cast<int32>(std::floor(opts.bases_occ_scale
                                                           * stats_.beta_)));
     KALDI_VLOG(1) << "Have " << stats_.beta_ << " frames for speaker: Using "
                   << num_bases << " fMLLR bases.";
   }

   // initialization just to get rid of compile errors.
   BaseFloat auxf_old = 0, auxf_new = 0;
   if (frame_count != NULL) *frame_count = stats_.beta_;

   // If occupancy is greater than the min count, update the transform
   if (stats_.beta_ >= mincount) {
     for (int32 iter = 0; iter < opts.fmllr_iters; iter++) {
       Matrix<BaseFloat> grad(dim_, dim_ + 1, kSetZero);
       Matrix<BaseFloat> G(dim_, dim_ + 1, kSetZero);
       auxf_new = this->FmllrObjGradient(sgmm, *out_xform, &grad, &G);

       // For diagnostic purposes
       KALDI_VLOG(3) << "Iter " << iter << ": Auxiliary function = "
           << (auxf_new / stats_.beta_) << " per frame over " << stats_.beta_
           << " frames";

       if (iter > 0) {
         // For diagnostic purposes
         KALDI_VLOG(2) << "Iter " << iter << ": Auxiliary function improvement: "
             << ((auxf_new - auxf_old) / stats_.beta_) << " per frame over "
             << (stats_.beta_) << " frames";
         auxf_improv += auxf_new - auxf_old;
       }

       Matrix<BaseFloat> pre_xformed_grad(dim_, dim_ + 1, kSetZero);
       ApplyPreXformToGradient(globals, grad, &pre_xformed_grad);
 //      std::cout << "Pre-X Grad = " << pre_xformed_grad << std::endl;

       // Transform P_sk with the Hessian
       Matrix<BaseFloat> hess_xformed_grad(dim_, dim_ + 1, kSetZero);
       ApplyHessianXformToGradient(globals, pre_xformed_grad,
                                   &hess_xformed_grad);
 //      std::cout << "Hess-X Grad = " << hess_xformed_grad << std::endl;

       // Update the actual FMLLR transform matrices
       Matrix<BaseFloat> hess_xformed_delta(dim_, dim_ + 1, kUndefined);
       if (using_subspace) {
         // Note that in this case we can simply store the speaker-specific
         // coefficients for each of the basis matrices. The current
         // implementation stores the computed transform to simplify the code!
         hess_xformed_delta.SetZero();
         for (int32 b = 0; b < num_bases; b++) {  // Eq (B.20)
           hess_xformed_delta.AddMat(TraceMatMat(globals.fmllr_bases_[b],
                                                 hess_xformed_grad, kTrans),
                                     globals.fmllr_bases_[b], kNoTrans);
         }
         hess_xformed_delta.Scale(1 / stats_.beta_);
       } else {
         hess_xformed_delta.CopyFromMat(hess_xformed_grad);
         hess_xformed_delta.Scale(1 / stats_.beta_);  // Eq. (B.19)
       }

 //      std::cout << "Hess-X Delta = " << hess_xformed_delta << std::endl;

       // Transform Delta with the Hessian
       Matrix<BaseFloat> pre_xformed_delta(dim_, dim_ + 1, kSetZero);
       ApplyInvHessianXformToChange(globals, hess_xformed_delta,
                                    &pre_xformed_delta);

       // Apply inverse pre-transform to Delta
       Matrix<BaseFloat> delta(dim_, dim_ + 1, kSetZero);
       ApplyInvPreXformToChange(globals, pre_xformed_delta, &delta);

 #ifdef KALDI_PARANOID
       // Check whether co-ordinate transformation is correct.
       {
         BaseFloat tr1 = TraceMatMat(delta, grad, kTrans);
         BaseFloat tr2 = TraceMatMat(pre_xformed_delta, pre_xformed_grad,
                                     kTrans);
         BaseFloat tr3 = TraceMatMat(hess_xformed_delta, hess_xformed_grad,
                                     kTrans);
         AssertEqual(tr1, tr2, 1e-5);
         AssertEqual(tr2, tr3, 1e-5);
       }
 #endif

       // Calculate the optimal step size
       SubMatrix<BaseFloat> A(*out_xform, 0, dim_, 0, dim_);
       BaseFloat step_size = CalcFmllrStepSize(stats_, sgmm, delta, A, G,
                                               opts.fmllr_iters);

       // Update: W <-- W + k \Delta   Eq. (B.34)
       out_xform->AddMat(step_size, delta, kNoTrans);
       auxf_old = auxf_new;

       // Check the objective function change for the last iteration
       if (iter == opts.fmllr_iters - 1) {
         auxf_new = this->FmllrObjGradient(sgmm, *out_xform, NULL, NULL);
         logdet = A.LogDet();
         // SubMatrix A points to the memory location of out_xform, and so will
         // contain the updated value

         KALDI_VLOG(2) << "Iter " << iter << ": Auxiliary function improvement: "
             << ((auxf_new - auxf_old) / stats_.beta_) << " per frame over "
             << (stats_.beta_) << " frames";
         auxf_improv += auxf_new - auxf_old;
       }
     }
     if (auxf_out != NULL) *auxf_out = auxf_improv;
     auxf_improv /= (stats_.beta_ + 1.0e-10);

     KALDI_LOG << "Auxiliary function improvement for FMLLR = " << auxf_improv
         << " per frame over " << stats_.beta_ << " frames. Log-determinant = "
         << logdet;
     return true;
   } else {
     KALDI_ASSERT(stats_.beta_ < mincount);
 //    std::cerr.precision(10);
 //    std::cerr.setf(std::ios::fixed,std::ios::floatfield);
     KALDI_WARN << "Not updating FMLLR because count is " << stats_.beta_
         << " < " << (mincount);
     if (auxf_out != NULL) *auxf_out = 0.0;
     return false;
   }  // Do not use the transform if it does not have enough counts
   KALDI_ASSERT(false);  // Should never be reached.
 }

 void EstimateSgmm2FmllrSubspace(const SpMatrix<double> &fmllr_grad_scatter,
                                int32 num_fmllr_bases, int32 feat_dim,
                                Sgmm2FmllrGlobalParams *globals, double min_eig) {
   KALDI_ASSERT(num_fmllr_bases > 0 && feat_dim > 0);
   if (num_fmllr_bases >  feat_dim * (feat_dim + 1)) {
     num_fmllr_bases = feat_dim * (feat_dim + 1);
     KALDI_WARN << "Limiting number of fMLLR bases to be the same as transform "
                << "dimension.";
   }

   vector< Matrix<BaseFloat> > &fmllr_bases(globals->fmllr_bases_);

   Vector<double> s(fmllr_grad_scatter.NumRows());
   Matrix<double> U(fmllr_grad_scatter.NumRows(),
                       fmllr_grad_scatter.NumRows());
   try {
     fmllr_grad_scatter.Eig(&s, &U);
     SortSvd(&s, &U);  // in case was not exactly sorted.
     KALDI_VLOG(1) << "Eigenvalues (max 200) of CMLLR scatter are: "
                   << (SubVector<double>(s, 0,
                                         std::min(static_cast<MatrixIndexT>(200),
                                                  s.Dim())));


 //    for (int32 b = 2; b < num_fmllr_bases; b++) {
 //      if (s(b) < min_eig) {
 //        num_fmllr_bases = b;
 //        KALDI_WARN << "Limiting number of fMLLR bases to " << num_fmllr_bases
 //                   << " because of small eigenvalues.";
 //        break;
 //      }
 //    }

     U.Transpose();  // Now the rows of U correspond to the basis vectors.
     fmllr_bases.resize(num_fmllr_bases);
     for (int32 b = 0; b < num_fmllr_bases; b++) {
       fmllr_bases[b].Resize(feat_dim, feat_dim + 1, kSetZero);
       fmllr_bases[b].CopyRowsFromVec(U.Row(b));
     }
     KALDI_LOG << "Estimated " << num_fmllr_bases << " fMLLR basis matrices.";
   } catch(const std::exception &e) {
     KALDI_WARN << "Not estimating FMLLR bases because of a thrown exception:\n"
                << e.what();
     fmllr_bases.resize(0);
   }
 }  // End of EstimateSgmm2FmllrSubspace


 }  // namespace kaldi

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::MatrixBase::InvertDouble
void InvertDouble(Real *LogDet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse [double].
Definition: kaldi-matrix.cc:2046

kaldi::AmSgmm2
Class for definition of the subspace Gmm acoustic model.
Definition: am-sgmm2.h:231

kaldi::SpMatrix< double >

kaldi::MatrixBase::Write
void Write(std::ostream &out, bool binary) const
write to stream.
Definition: kaldi-matrix.cc:1379

kaldi::AffineXformStats::K_
Matrix< double > K_
K_ is the summed outer product of [mean times inverse variance] with [extended data], scaled by the occupation counts; dimension is dim by (dim+1)
Definition: transform-common.h:36

parse-options.h

kaldi::kUndefined
Definition: matrix-common.h:39

kaldi::AmSgmm2::ComponentPosteriors
BaseFloat ComponentPosteriors(const Sgmm2PerFrameDerivedVars &per_frame_vars, int32 j2, Sgmm2PerSpkDerivedVars *spk_vars, Matrix< BaseFloat > *post) const
Similar to LogLikelihood() function above, but also computes the posterior probabilities for the pre-...
Definition: am-sgmm2.cc:574

kaldi::FmllrSgmm2Accs::FmllrObjGradient
BaseFloat FmllrObjGradient(const AmSgmm2 &sgmm, const Matrix< BaseFloat > &xform, Matrix< BaseFloat > *grad_out, Matrix< BaseFloat > *G_out) const
Definition: fmllr-sgmm2.cc:229

kaldi::ApplyPreXformToGradient
static void ApplyPreXformToGradient(const Sgmm2FmllrGlobalParams &globals, const Matrix< BaseFloat > &gradient_in, Matrix< BaseFloat > *gradient_out)
Definition: fmllr-sgmm2.cc:31

kaldi::Sgmm2FmllrGlobalParams::Write
void Write(std::ostream &out_stream, bool binary) const
Definition: fmllr-sgmm2.cc:103

fmllr-sgmm2.h

kaldi::Sgmm2FmllrGlobalParams::pre_xform_
Matrix< BaseFloat > pre_xform_
Pre-transform matrix. Dim is [D][D+1].
Definition: fmllr-sgmm2.h:103

kaldi::AffineXformStats
Definition: transform-common.h:30

kaldi::MatrixBase::NumCols
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67

kaldi::TraceMat
double TraceMat(const MatrixBase< Real > &A)
Returns trace of matrix.
Definition: kaldi-matrix.h:1042

kaldi::MatrixBase
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49

kaldi::ReadBasicType
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55

kaldi::AmSgmm2::GetVarScaledSubstateSpeakerMean
void GetVarScaledSubstateSpeakerMean(int32 j1, int32 substate, int32 gauss, const Sgmm2PerSpkDerivedVars &spk, VectorBase< Real > *mean_out) const
Definition: am-sgmm2.h:541

kaldi::Sgmm2FmllrConfig::fmllr_min_count_full
BaseFloat fmllr_min_count_full
Minimum occupancy count to stop using FMLLR bases and switch to regular FMLLR estimation.
Definition: fmllr-sgmm2.h:49

kaldi::AmSgmm2::GetInvCovars
void GetInvCovars(int32 gauss_index, SpMatrix< Real > *out) const
Templated accessors (used to accumulate in different precision)
Definition: am-sgmm2.h:511

kaldi::FmllrSgmm2Accs::AccumulateFromPosteriors
void AccumulateFromPosteriors(const AmSgmm2 &sgmm, const Sgmm2PerSpkDerivedVars &spk, const VectorBase< BaseFloat > &data, const std::vector< int32 > &gauss_select, const Matrix< BaseFloat > &posteriors, int32 state_index)
Definition: fmllr-sgmm2.cc:171

kaldi::MatrixBase::AddMat
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
Definition: kaldi-matrix.cc:356

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::ReadToken
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154

kaldi::SpMatrix::Eig
void Eig(VectorBase< Real > *s, MatrixBase< Real > *P=NULL) const
Solves the symmetric eigenvalue problem: at end we should have (*this) = P * diag(s) * P^T...
Definition: qr.cc:433

kaldi::Matrix< BaseFloat >

kaldi::MatrixBase::CopyFromMat
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Definition: kaldi-matrix.cc:862

kaldi::VectorBase::Min
Real Min() const
Returns the minimum value of any element, or +infinity for the empty vector.
Definition: kaldi-vector.cc:614

kaldi::PackedMatrix::NumRows
MatrixIndexT NumRows() const
Definition: packed-matrix.h:104

kaldi::MatrixBase::SetUnit
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
Definition: kaldi-matrix.cc:1348

kaldi::EstimateSgmm2FmllrSubspace
void EstimateSgmm2FmllrSubspace(const SpMatrix< double > &fmllr_grad_scatter, int32 num_fmllr_bases, int32 feat_dim, Sgmm2FmllrGlobalParams *globals, double min_eig)
Computes the fMLLR basis matrices given the scatter of the vectorized gradients (eq: B...
Definition: fmllr-sgmm2.cc:506

kaldi::FmllrSgmm2Accs::AccumulateForFmllrSubspace
void AccumulateForFmllrSubspace(const AmSgmm2 &sgmm, const Sgmm2FmllrGlobalParams &fmllr_globals, SpMatrix< double > *grad_scatter)
Definition: fmllr-sgmm2.cc:205

kaldi::Sgmm2FmllrConfig::bases_occ_scale
BaseFloat bases_occ_scale
Scale per-speaker count to determine number of CMLLR bases.
Definition: fmllr-sgmm2.h:54

kaldi::Sgmm2FmllrGlobalParams::HasBasis
bool HasBasis() const
Definition: fmllr-sgmm2.h:100

kaldi::kTrans
Definition: matrix-common.h:33

kaldi::Sgmm2FmllrGlobalParams::IsEmpty
bool IsEmpty() const
Definition: fmllr-sgmm2.h:96

kaldi::Sgmm2FmllrConfig::fmllr_iters
int32 fmllr_iters
Number of iterations in FMLLR estimation.
Definition: fmllr-sgmm2.h:41

kaldi::FmllrSgmm2Accs::Update
bool Update(const AmSgmm2 &model, const Sgmm2FmllrGlobalParams &fmllr_globals, const Sgmm2FmllrConfig &opts, Matrix< BaseFloat > *out_xform, BaseFloat *frame_count, BaseFloat *auxf_improv) const
Computes the FMLLR transform from the accumulated stats, using the pre-transforms in fmllr_globals...
Definition: fmllr-sgmm2.cc:356

kaldi::ApplyInvPreXformToChange
static void ApplyInvPreXformToChange(const Sgmm2FmllrGlobalParams &globals, const Matrix< BaseFloat > &delta_in, Matrix< BaseFloat > *delta_out)
Definition: fmllr-sgmm2.cc:45

kaldi::AmSgmm2::FeatureDim
int32 FeatureDim() const
Definition: am-sgmm2.h:363

kaldi::SpMatrix::AddVec2
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946

kaldi::Matrix::Read
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
Definition: kaldi-matrix.cc:1450

kaldi::FmllrSgmm2Accs::Read
void Read(std::istream &in_stream, bool binary, bool add)
Definition: fmllr-sgmm2.cc:275

kaldi::Sgmm2FmllrGlobalParams::Read
void Read(std::istream &in_stream, bool binary)
Definition: fmllr-sgmm2.cc:122

float

kaldi::CalcFmllrStepSize
static BaseFloat CalcFmllrStepSize(const AffineXformStats &stats, const AmSgmm2 &sgmm, const MatrixBase< BaseFloat > &Delta, const MatrixBase< BaseFloat > &A, const Matrix< BaseFloat > &G, int32 max_iters)
Definition: fmllr-sgmm2.cc:286

kaldi::Sgmm2FmllrGlobalParams::inv_xform_
Matrix< BaseFloat > inv_xform_
Inverse of pre-transform. Dim is [D][D+1].
Definition: fmllr-sgmm2.h:105

kaldi::MatrixBase::Scale
void Scale(Real alpha)
Multiply each element with a scalar value.
Definition: kaldi-matrix.cc:1209

kaldi::ExpectToken
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191

rnnlm::n
struct rnnlm::@11::@12 n

kaldi::AmSgmm2::Pdf2Group
int32 Pdf2Group(int32 j2) const
Definition: am-sgmm2.cc:196

kaldi::MatrixBase::AddMatMat
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
Definition: kaldi-matrix.cc:171

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::kNoTrans
Definition: matrix-common.h:34

KALDI_WARN
#define KALDI_WARN
Definition: kaldi-error.h:150

kaldi::ApplyInvHessianXformToChange
static void ApplyInvHessianXformToChange(const Sgmm2FmllrGlobalParams &globals, const Matrix< BaseFloat > &delta_in, Matrix< BaseFloat > *delta_out)
Definition: fmllr-sgmm2.cc:81

kaldi::TraceMatMat
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
Definition: kaldi-matrix.cc:2692

kaldi::WriteToken
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134

kaldi::VectorBase::Dim
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64

kaldi::MatrixBase::SetZero
void SetZero()
Sets matrix to zero.
Definition: kaldi-matrix.cc:1330

kaldi::VectorBase::Scale
void Scale(Real alpha)
Multiplies all elements by this constant.
Definition: kaldi-vector.cc:963

kaldi::Sgmm2FmllrConfig
Configuration variables needed in the estimation of FMLLR for SGMMs.
Definition: fmllr-sgmm2.h:40

kaldi::AffineXformStats::G_
std::vector< SpMatrix< double > > G_
G_ is the outer product of extended-data, scaled by inverse variance, for each dimension.
Definition: transform-common.h:41

kaldi::AmSgmm2::NumGauss
int32 NumGauss() const
Definition: am-sgmm2.h:360

kaldi::TraceMatSpMatSp
Real TraceMatSpMatSp(const MatrixBase< Real > &A, MatrixTransposeType transA, const SpMatrix< Real > &B, const MatrixBase< Real > &C, MatrixTransposeType transC, const SpMatrix< Real > &D)
Returns tr (A B C D) (A and C may be transposed as specified by transA and transB).
Definition: sp-matrix.cc:438

kaldi::Sgmm2PerFrameDerivedVars::gselect
std::vector< int32 > gselect
Definition: am-sgmm2.h:143

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::Sgmm2FmllrConfig::num_fmllr_bases
int32 num_fmllr_bases
Number of basis matrices to use for FMLLR estimation.
Definition: fmllr-sgmm2.h:52

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::FmllrSgmm2Accs::Accumulate
BaseFloat Accumulate(const AmSgmm2 &sgmm, const VectorBase< BaseFloat > &data, const Sgmm2PerFrameDerivedVars &frame_vars, int32 state_index, BaseFloat weight, Sgmm2PerSpkDerivedVars *spk)
Accumulation routine that computes the Gaussian posteriors and calls the AccumulateFromPosteriors fun...
Definition: fmllr-sgmm2.cc:156

kaldi::kSetZero
Definition: matrix-common.h:38

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::MatrixBase::NumRows
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64

kaldi::FmllrSgmm2Accs::Write
void Write(std::ostream &out_stream, bool binary) const
Definition: fmllr-sgmm2.cc:266

kaldi::MatrixBase::LogDet
Real LogDet(Real *det_sign=NULL) const
Returns logdet of matrix.
Definition: kaldi-matrix.cc:2038

kaldi::Sgmm2FmllrConfig::fmllr_min_count_basis
BaseFloat fmllr_min_count_basis
Minimum occupancy count to estimate FMLLR using basis matrices.
Definition: fmllr-sgmm2.h:44

kaldi::ApplyHessianXformToGradient
static void ApplyHessianXformToGradient(const Sgmm2FmllrGlobalParams &globals, const Matrix< BaseFloat > &gradient_in, Matrix< BaseFloat > *gradient_out)
Definition: fmllr-sgmm2.cc:59

kaldi::Sgmm2PerSpkDerivedVars
Definition: am-sgmm2.h:165

kaldi::AssertEqual
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
Definition: kaldi-math.h:276

KALDI_VLOG
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156

kaldi::VectorBase::CopyRowsFromMat
void CopyRowsFromMat(const MatrixBase< Real > &M)
Performs a row stack of the matrix M.
Definition: kaldi-vector.cc:348

kaldi::MatrixBase::Range
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202

kaldi::WriteBasicType
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34

kaldi::Sgmm2FmllrGlobalParams
Global adaptation parameters.
Definition: fmllr-sgmm2.h:91

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::Sgmm2FmllrGlobalParams::mean_scatter_
Vector< BaseFloat > mean_scatter_
Diagonal of mean-scatter matrix. Dim is [D].
Definition: fmllr-sgmm2.h:107

kaldi::Sgmm2FmllrGlobalParams::fmllr_bases_
std::vector< Matrix< BaseFloat > > fmllr_bases_
{W}_b. [b][d][d], dim is [B][D][D+1].
Definition: fmllr-sgmm2.h:109

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

kaldi::FmllrSgmm2Accs::Init
void Init(int32 dim, int32 num_gaussians)
Definition: fmllr-sgmm2.cc:146

kaldi::AffineXformStats::beta_
double beta_
beta_ is the occupation count.
Definition: transform-common.h:33

kaldi::MatrixBase::AddSpMat
void AddSpMat(const Real alpha, const SpMatrix< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*SpA*B.
Definition: kaldi-matrix.h:692

kaldi::Sgmm2PerFrameDerivedVars
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
Definition: am-sgmm2.h:142

kaldi::AmSgmm2::NumSubstatesForGroup
int32 NumSubstatesForGroup(int32 j1) const
Definition: am-sgmm2.h:357

kaldi::Sgmm2FmllrConfig::fmllr_min_count
BaseFloat fmllr_min_count
Minimum occupancy count to estimate FMLLR without basis matrices.
Definition: fmllr-sgmm2.h:46

kaldi::SubMatrix
Sub-matrix representation.
Definition: kaldi-matrix.h:988

kaldi::SubVector
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501

kaldi::SortSvd
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.
Definition: kaldi-matrix.cc:2580

rnnlm::d
double d
Definition: mikolov-rnnlm-lib.cc:64

kaldi::VectorBase::Range
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94