doc/decodable-am-diag-gmm-regtree_8cc_source.html

 // transform/decodable-am-diag-gmm-regtree.cc

 // Copyright 2009-2011  Saarland University;  Lukas Burget
 //                2013  Johns Hopkins Universith (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include <vector>
 using std::vector;

 #include "transform/decodable-am-diag-gmm-regtree.h"

 namespace kaldi {


 BaseFloat DecodableAmDiagGmmRegtreeFmllr::LogLikelihoodZeroBased(int32 frame,
                                                           int32 state) {
   KALDI_ASSERT(frame < NumFramesReady() && frame >= 0);
   KALDI_ASSERT(state < NumIndices() && state >= 0);

   if (!valid_logdets_) {
     logdets_.Resize(fmllr_xform_.NumRegClasses());
     fmllr_xform_.GetLogDets(&logdets_);
     valid_logdets_ = true;
   }

   if (log_like_cache_[state].hit_time == frame) {
     return log_like_cache_[state].log_like;  // return cached value, if found
   }

   const DiagGmm &pdf = acoustic_model_.GetPdf(state);
   const VectorBase<BaseFloat> &data = feature_matrix_.Row(frame);

   // check if everything is in order
   if (pdf.Dim() != data.Dim()) {
     KALDI_ERR << "Dim mismatch: data dim = "  << data.Dim()
         << " vs. model dim = " << pdf.Dim();
   }
   if (!pdf.valid_gconsts()) {
     KALDI_ERR << "State "  << (state)  << ": Must call ComputeGconsts() "
         "before computing likelihood.";
   }

   if (frame != previous_frame_) {  // cache the transformed & squared stats.
     fmllr_xform_.TransformFeature(data, &xformed_data_);
     xformed_data_squared_ = xformed_data_;
     vector< Vector <BaseFloat> >::iterator it = xformed_data_squared_.begin(),
         end = xformed_data_squared_.end();
     for (; it != end; ++it) { it->ApplyPow(2.0); }
     previous_frame_ = frame;
   }

   Vector<BaseFloat> loglikes(pdf.gconsts());  // need to recreate for each pdf
   int32 baseclass, regclass;
   for (int32 comp_id = 0, num_comp = pdf.NumGauss(); comp_id < num_comp;
       ++comp_id) {
     baseclass = regtree_.Gauss2BaseclassId(state, comp_id);
     regclass = fmllr_xform_.Base2RegClass(baseclass);
     // loglikes +=  means * inv(vars) * data.
     loglikes(comp_id) += VecVec(pdf.means_invvars().Row(comp_id),
                                 xformed_data_[regclass]);
     // loglikes += -0.5 * inv(vars) * data_sq.
     loglikes(comp_id) -= 0.5 * VecVec(pdf.inv_vars().Row(comp_id),
                                       xformed_data_squared_[regclass]);
     loglikes(comp_id) += logdets_(regclass);
   }

   BaseFloat log_sum = loglikes.LogSumExp(log_sum_exp_prune_);
   if (KALDI_ISNAN(log_sum) || KALDI_ISINF(log_sum))
     KALDI_ERR << "Invalid answer (overflow or invalid variances/features?)";

   log_like_cache_[state].log_like = log_sum;
   log_like_cache_[state].hit_time = frame;

   return log_sum;
 }

 DecodableAmDiagGmmRegtreeMllr::~DecodableAmDiagGmmRegtreeMllr() {
   DeletePointers(&xformed_mean_invvars_);
   DeletePointers(&xformed_gconsts_);
 }


 void DecodableAmDiagGmmRegtreeMllr::InitCache() {
   if (xformed_mean_invvars_.size() != 0)
     DeletePointers(&xformed_mean_invvars_);
   if (xformed_gconsts_.size() != 0)
     DeletePointers(&xformed_gconsts_);
   int32 num_pdfs = acoustic_model_.NumPdfs();
   xformed_mean_invvars_.resize(num_pdfs);
   xformed_gconsts_.resize(num_pdfs);
   is_cached_.resize(num_pdfs, false);
   ResetLogLikeCache();
 }


 // This is almost the same code as DiagGmm::ComputeGconsts, except that
 // means are used instead of means * inv(vars). This saves some computation.
 static void ComputeGconsts(const VectorBase<BaseFloat> &weights,
                            const MatrixBase<BaseFloat> &means,
                            const MatrixBase<BaseFloat> &inv_vars,
                            VectorBase<BaseFloat> *gconsts_out) {
   int32 num_gauss = weights.Dim();
   int32 dim = means.NumCols();
   KALDI_ASSERT(means.NumRows() == num_gauss
       && inv_vars.NumRows() == num_gauss && inv_vars.NumCols() == dim);
   KALDI_ASSERT(gconsts_out->Dim() == num_gauss);

   BaseFloat offset = -0.5 * M_LOG_2PI * dim;  // constant term in gconst.
   int32 num_bad = 0;

   for (int32 gauss = 0; gauss < num_gauss; gauss++) {
     KALDI_ASSERT(weights(gauss) >= 0);  // Cannot have negative weights.
     BaseFloat gc = Log(weights(gauss)) + offset;  // May be -inf if weights == 0
     for (int32 d = 0; d < dim; d++) {
       gc += 0.5 * Log(inv_vars(gauss, d)) - 0.5 * means(gauss, d)
         * means(gauss, d) * inv_vars(gauss, d);  // diff from DiagGmm version.
     }

     if (KALDI_ISNAN(gc)) {  // negative infinity is OK but NaN is not acceptable
       KALDI_ERR << "At component "  << gauss
                 << ", not a number in gconst computation";
     }
     if (KALDI_ISINF(gc)) {
       num_bad++;
       // If positive infinity, make it negative infinity.
       // Want to make sure the answer becomes -inf in the end, not NaN.
       if (gc > 0) gc = -gc;
     }
     (*gconsts_out)(gauss) = gc;
   }
   if (num_bad > 0)
     KALDI_WARN << num_bad << " unusable components found while computing "
                << "gconsts.";
 }


 const Matrix<BaseFloat>& DecodableAmDiagGmmRegtreeMllr::GetXformedMeanInvVars(
     int32 state) {
   if (is_cached_[state]) {  // found in cache
     KALDI_ASSERT(xformed_mean_invvars_[state] != NULL);
     KALDI_VLOG(3) << "For PDF index " << state << ": transformed means "
                   << "found in cache.";
     return *xformed_mean_invvars_[state];
   } else {  // transform the means and cache them
     KALDI_ASSERT(xformed_mean_invvars_[state] == NULL);
     KALDI_VLOG(3) << "For PDF index " << state << ": transforming means.";
     int32 num_gauss = acoustic_model_.GetPdf(state).NumGauss(),
         dim = acoustic_model_.Dim();
     const Vector<BaseFloat> &weights = acoustic_model_.GetPdf(state).weights();
     const Matrix<BaseFloat> &invvars = acoustic_model_.GetPdf(state).inv_vars();
     xformed_mean_invvars_[state] = new Matrix<BaseFloat>(num_gauss, dim);
     mllr_xform_.GetTransformedMeans(regtree_, acoustic_model_, state,
                                     xformed_mean_invvars_[state]);
     xformed_gconsts_[state] = new Vector<BaseFloat>(num_gauss);
     // At this point, the transformed means haven't been multiplied with
     // the inv vars, and they are used to compute gconsts first.
     ComputeGconsts(weights, *xformed_mean_invvars_[state], invvars,
                    xformed_gconsts_[state]);
     // Finally, multiply the transformed means with the inv vars.
     xformed_mean_invvars_[state]->MulElements(invvars);
     is_cached_[state] = true;
     return *xformed_mean_invvars_[state];
   }
 }

 const Vector<BaseFloat>& DecodableAmDiagGmmRegtreeMllr::GetXformedGconsts(
     int32 state) {
   if (!is_cached_[state]) {
     KALDI_ERR << "GConsts not cached for state: " << state << ". Must call "
               << "GetXformedMeanInvVars() first.";
   }
   KALDI_ASSERT(xformed_gconsts_[state] != NULL);
   return *xformed_gconsts_[state];
 }

 BaseFloat DecodableAmDiagGmmRegtreeMllr::LogLikelihoodZeroBased(int32 frame,
                                                                 int32 state) {
 //  KALDI_ERR << "Function not completely implemented yet.";
   KALDI_ASSERT(frame < NumFramesReady() && frame >= 0);
   KALDI_ASSERT(state < NumIndices() && state >= 0);

   if (log_like_cache_[state].hit_time == frame) {
     return log_like_cache_[state].log_like;  // return cached value, if found
   }

   const DiagGmm &pdf = acoustic_model_.GetPdf(state);
   const VectorBase<BaseFloat> &data = feature_matrix_.Row(frame);

   // check if everything is in order
   if (pdf.Dim() != data.Dim()) {
     KALDI_ERR << "Dim mismatch: data dim = "  << data.Dim()
         << " vs. model dim = " << pdf.Dim();
   }

   if (frame != previous_frame_) {  // cache the squared stats.
     data_squared_.CopyFromVec(feature_matrix_.Row(frame));
     data_squared_.ApplyPow(2.0);
     previous_frame_ = frame;
   }

   const Matrix<BaseFloat> &means_invvars = GetXformedMeanInvVars(state);
   const Vector<BaseFloat> &gconsts = GetXformedGconsts(state);

   Vector<BaseFloat> loglikes(gconsts);  // need to recreate for each pdf
   // loglikes +=  means * inv(vars) * data.
   loglikes.AddMatVec(1.0, means_invvars, kNoTrans, data, 1.0);
   // loglikes += -0.5 * inv(vars) * data_sq.
   loglikes.AddMatVec(-0.5, pdf.inv_vars(), kNoTrans, data_squared_, 1.0);

   BaseFloat log_sum = loglikes.LogSumExp(log_sum_exp_prune_);
   if (KALDI_ISNAN(log_sum) || KALDI_ISINF(log_sum))
     KALDI_ERR << "Invalid answer (overflow or invalid variances/features?)";

   log_like_cache_[state].log_like = log_sum;
   log_like_cache_[state].hit_time = frame;

   return log_sum;
 }

 }  // namespace kaldi
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::DiagGmm::Dim
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: diag-gmm.h:74

kaldi::RegressionTree::Gauss2BaseclassId
int32 Gauss2BaseclassId(size_t pdf_id, size_t gauss_id) const
Definition: regression-tree.h:71

kaldi::DeletePointers
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
Definition: stl-utils.h:184

M_LOG_2PI
#define M_LOG_2PI
Definition: kaldi-math.h:60

kaldi::DecodableAmDiagGmmRegtreeFmllr::xformed_data_
std::vector< Vector< BaseFloat > > xformed_data_
Definition: decodable-am-diag-gmm-regtree.h:72

kaldi::DecodableAmDiagGmmUnmapped::log_sum_exp_prune_
BaseFloat log_sum_exp_prune_
Definition: decodable-am-diag-gmm.h:82

kaldi::DecodableAmDiagGmmRegtreeFmllr::valid_logdets_
bool valid_logdets_
Definition: decodable-am-diag-gmm-regtree.h:75

kaldi::DecodableAmDiagGmmRegtreeMllr::InitCache
void InitCache()
Initializes the mean & gconst caches.
Definition: decodable-am-diag-gmm-regtree.cc:97

kaldi::MatrixBase::NumCols
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67

kaldi::MatrixBase
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49

kaldi::DiagGmm::means_invvars
const Matrix< BaseFloat > & means_invvars() const
Definition: diag-gmm.h:179

KALDI_ISINF
#define KALDI_ISINF
Definition: kaldi-math.h:73

kaldi::DecodableAmDiagGmmRegtreeFmllr::logdets_
Vector< BaseFloat > logdets_
Definition: decodable-am-diag-gmm-regtree.h:74

kaldi::DecodableAmDiagGmmRegtreeMllr::LogLikelihoodZeroBased
virtual BaseFloat LogLikelihoodZeroBased(int32 frame, int32 state_index)
Definition: decodable-am-diag-gmm-regtree.cc:190

kaldi::DiagGmm::gconsts
const Vector< BaseFloat > & gconsts() const
Const accessors.
Definition: diag-gmm.h:174

kaldi::DiagGmm::valid_gconsts
bool valid_gconsts() const
Definition: diag-gmm.h:181

kaldi::RegtreeFmllrDiagGmm::GetLogDets
void GetLogDets(VectorBase< BaseFloat > *out) const
Definition: regtree-fmllr-diag-gmm.h:131

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::Matrix< BaseFloat >

kaldi::DecodableAmDiagGmmUnmapped::previous_frame_
int32 previous_frame_
Definition: decodable-am-diag-gmm.h:81

kaldi::VectorBase::LogSumExp
Real LogSumExp(Real prune=-1.0) const
Returns log(sum(exp())) without exp overflow If prune > 0.0, ignores terms less than the max - prune...
Definition: kaldi-vector.cc:755

kaldi::DecodableAmDiagGmmRegtreeFmllr::regtree_
const RegressionTree & regtree_
Definition: decodable-am-diag-gmm-regtree.h:71

kaldi::DecodableAmDiagGmmUnmapped::data_squared_
Vector< BaseFloat > data_squared_
Cache for fast likelihood calculation.
Definition: decodable-am-diag-gmm.h:91

kaldi::DecodableAmDiagGmmRegtreeFmllr::fmllr_xform_
const RegtreeFmllrDiagGmm & fmllr_xform_
Definition: decodable-am-diag-gmm-regtree.h:70

kaldi::RegtreeFmllrDiagGmm::NumRegClasses
int32 NumRegClasses() const
Definition: regtree-fmllr-diag-gmm.h:94

kaldi::DecodableAmDiagGmmUnmapped::acoustic_model_
const AmDiagGmm & acoustic_model_
Definition: decodable-am-diag-gmm.h:79

kaldi::MatrixBase::Row
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188

kaldi::Log
double Log(double x)
Definition: kaldi-math.h:100

float

kaldi::DecodableAmDiagGmmRegtreeFmllr::xformed_data_squared_
std::vector< Vector< BaseFloat > > xformed_data_squared_
Definition: decodable-am-diag-gmm-regtree.h:73

kaldi::DecodableAmDiagGmmRegtreeFmllr::NumIndices
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
Definition: decodable-am-diag-gmm-regtree.h:60

kaldi::DecodableAmDiagGmmRegtreeMllr::GetXformedMeanInvVars
const Matrix< BaseFloat > & GetXformedMeanInvVars(int32 state_index)
Get the transformed means times inverse variances for a given pdf, and cache them.
Definition: decodable-am-diag-gmm-regtree.cc:151

kaldi::RegtreeFmllrDiagGmm::TransformFeature
void TransformFeature(const VectorBase< BaseFloat > &in, std::vector< Vector< BaseFloat > > *out) const
Get the transformed features for each of the transforms.
Definition: regtree-fmllr-diag-gmm.cc:118

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::kNoTrans
Definition: matrix-common.h:34

KALDI_WARN
#define KALDI_WARN
Definition: kaldi-error.h:150

kaldi::DecodableAmDiagGmmRegtreeMllr::~DecodableAmDiagGmmRegtreeMllr
~DecodableAmDiagGmmRegtreeMllr()
Definition: decodable-am-diag-gmm-regtree.cc:91

kaldi::DiagGmm::weights
const Vector< BaseFloat > & weights() const
Definition: diag-gmm.h:178

kaldi::DiagGmm::NumGauss
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72

kaldi::VectorBase::Dim
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64

kaldi::DecodableAmDiagGmmUnmapped::feature_matrix_
const Matrix< BaseFloat > & feature_matrix_
Definition: decodable-am-diag-gmm.h:80

kaldi::VectorBase::AddMatVec
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92

kaldi::DecodableAmDiagGmmUnmapped::log_like_cache_
std::vector< LikelihoodCacheRecord > log_like_cache_
Definition: decodable-am-diag-gmm.h:89

kaldi::DecodableAmDiagGmmUnmapped::ResetLogLikeCache
void ResetLogLikeCache()
Definition: decodable-am-diag-gmm.cc:74

kaldi::AmDiagGmm::Dim
int32 Dim() const
Definition: am-diag-gmm.h:79

kaldi::AmDiagGmm::NumPdfs
int32 NumPdfs() const
Definition: am-diag-gmm.h:82

kaldi::AmDiagGmm::GetPdf
DiagGmm & GetPdf(int32 pdf_index)
Accessors.
Definition: am-diag-gmm.h:119

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

KALDI_ISNAN
#define KALDI_ISNAN
Definition: kaldi-math.h:72

decodable-am-diag-gmm-regtree.h

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::MatrixBase::NumRows
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64

kaldi::DecodableAmDiagGmmRegtreeMllr::GetXformedGconsts
const Vector< BaseFloat > & GetXformedGconsts(int32 state_index)
Get the cached (while computing transformed means) gconsts for likelihood calculation.
Definition: decodable-am-diag-gmm-regtree.cc:180

KALDI_VLOG
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156

kaldi::DiagGmm
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42

kaldi::DecodableAmDiagGmmRegtreeFmllr::NumFramesReady
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
Definition: decodable-am-diag-gmm-regtree.h:57

kaldi::DecodableAmDiagGmmRegtreeFmllr::LogLikelihoodZeroBased
virtual BaseFloat LogLikelihoodZeroBased(int32 frame, int32 state_index)
Definition: decodable-am-diag-gmm-regtree.cc:29

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::VecVec
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37

kaldi::RegtreeFmllrDiagGmm::Base2RegClass
int32 Base2RegClass(int32 bclass) const
Definition: regtree-fmllr-diag-gmm.h:97

rnnlm::d
double d
Definition: mikolov-rnnlm-lib.cc:64

kaldi::ComputeGconsts
static void ComputeGconsts(const VectorBase< BaseFloat > &weights, const MatrixBase< BaseFloat > &means, const MatrixBase< BaseFloat > &inv_vars, VectorBase< BaseFloat > *gconsts_out)
Definition: decodable-am-diag-gmm-regtree.cc:112

kaldi::DiagGmm::inv_vars
const Matrix< BaseFloat > & inv_vars() const
Definition: diag-gmm.h:180