47 const std::vector<int32> &gselect,
61 for (
size_t i = 0;
i < gselect.size();
i++) {
62 stats.
a.AddVec(posterior(
i), means_invvars.
Row(gselect[
i]));
63 stats.
b.AddVec(posterior(i), inv_vars.Row(gselect[i]));
71 && gmm.
Dim() == fgmm_accs.
Dim());
74 for (
int32 g = 0; g < num_gauss; g++) {
75 double this_occ = fgmm_accs.
occupancy()(g);
76 if (this_occ == 0)
continue;
82 this_extended_mean_acc.
Range(0, dim).CopyFromVec(this_mean_acc);
83 this_extended_mean_acc(dim) = this_occ;
88 this_extended_cov_acc.
Range(0, dim, 0, dim).CopyFromMat(this_cov_acc);
89 this_extended_cov_acc.
Row(dim).CopyFromVec(this_extended_mean_acc);
94 K_.
AddVecVec(1.0, this_mean_invvar_dbl, this_extended_mean_acc);
96 G_[
d].AddSp(this_invvar(
d), this_extended_cov_acc_sp);
109 posterior.
Scale(weight);
116 const std::vector<int32> &gselect,
119 KALDI_ASSERT(!gselect.empty() &&
"Empty gselect information");
123 BaseFloat loglike = loglikes.ApplySoftMax();
124 loglikes.Scale(weight);
138 KALDI_ERR <<
"You must initialize the fMLLR matrix to a non-singular value " 139 "(so we can report objective function changes); e.g. call SetUnit()";
140 if (opts.
update_type ==
"full" && this->opts_.update_type !=
"full") {
141 KALDI_ERR <<
"You are requesting a full-fMLLR update but you accumulated " 142 <<
"stats for more limited update type.";
157 <<
", fmllr-update-type must be one of \"full\"|\"diag\"|\"offset\"|\"none\"";
159 if (objf_impr) *objf_impr = objf_change;
160 if (count) *count =
beta_;
162 KALDI_WARN <<
"Not updating fMLLR since below min-count: count is " <<
beta_;
163 if (objf_impr) *objf_impr = 0.0;
164 if (count) *count =
beta_;
171 std::string fmllr_type,
174 if (fmllr_type ==
"full") {
176 }
else if (fmllr_type ==
"diag") {
178 }
else if (fmllr_type ==
"offset") {
180 }
else if (fmllr_type ==
"none") {
182 KALDI_WARN <<
"You set fMLLR type to \"none\" but your starting transform " 183 "is not unit [this is strange, and diagnostics will be wrong].";
187 KALDI_ERR <<
"Unknown fMLLR update type " << fmllr_type
188 <<
", must be one of \"full\"|\"diag\"|\"offset\"|\"none\"";
206 cofact_mat.
Invert(&logdet);
213 cofact_row.
Range(0, dim).CopyRowFromMat(cofact_mat, row);
216 cofact_row_invg.
AddSpVec(1.0, inv_G, cofact_row, 0.0);
219 double e1 =
VecVec(cofact_row_invg, cofact_row);
220 double e2 =
VecVec(cofact_row_invg, k);
221 double discr = std::sqrt(e2 * e2 + 4 * e1 * beta);
222 double alpha1 = (-e2 + discr) / (2 * e1);
223 double alpha2 = (-e2 - discr) / (2 * e1);
224 double auxf1 = beta *
Log(std::abs(alpha1 * e1 + e2)) -
225 0.5 * alpha1 * alpha1 * e1;
226 double auxf2 = beta *
Log(std::abs(alpha2 * e1 + e2)) -
227 0.5 * alpha2 * alpha2 * e1;
228 double alpha = (auxf1 > auxf2) ? alpha1 : alpha2;
231 cofact_row.
Scale(alpha);
232 cofact_row.
AddVec(1.0, k);
233 transform->
Row(row).AddSpVec(1.0, inv_G, cofact_row, 0.0);
243 vector< SpMatrix<double> > inv_g(dim);
245 inv_g[
d].Resize(dim + 1);
246 inv_g[
d].CopyFromSp(stats.
G_[
d]);
253 for (
int32 iter = 0; iter < num_iters; ++iter) {
261 objf_improvement = new_objf - old_objf;
262 KALDI_LOG <<
"fMLLR objf improvement is " 263 << (objf_improvement / (stats.
beta_ + 1.0e-10))
264 <<
" per frame over " << stats.
beta_ <<
" frames.";
265 if (objf_improvement < 0.0 && !
ApproxEqual(new_objf, old_objf)) {
266 KALDI_WARN <<
"No applying fMLLR transform change because objective " 267 <<
"function did not increase.";
271 return objf_improvement;
322 double beta = stats.
beta_;
325 KALDI_WARN <<
"Computing diagonal fMLLR matrix: no stats [using original transform]";
332 double k_ii = stats.
K_(
i,
i), k_id = stats.
K_(
i, dim),
333 g_iii = stats.
G_[
i](
i,
i), g_idd = stats.
G_[
i](dim, dim),
334 g_idi = stats.
G_[
i](dim,
i);
335 double a = g_idi*g_idi/g_idd - g_iii,
336 b = k_ii - g_idi*k_id/g_idd,
338 double s = (-b - std::sqrt(b*b - 4*a*c)) / (2*a);
340 double o = (k_id - s*g_idi) / g_idd;
341 (*out_xform)(
i,
i) = s;
342 (*out_xform)(
i, dim) = o;
345 KALDI_VLOG(2) <<
"fMLLR objective function improvement = " 346 << (new_obj - old_obj);
347 return new_obj - old_obj;
366 BaseFloat objf_before = -0.5 * b_i * b_i * stats.
G_[
i](dim, dim)
367 - b_i * stats.
G_[i](i, dim) + b_i * stats.
K_(i, dim);
368 b_i = (stats.
K_(i, dim) - stats.
G_[
i](
i, dim)) / stats.
G_[
i](dim, dim);
369 (*out_xform)(
i, dim) = b_i;
370 BaseFloat objf_after = -0.5 * b_i * b_i * stats.
G_[
i](dim, dim)
371 - b_i * stats.
G_[i](i, dim) + b_i * stats.
K_(i, dim);
372 if (objf_after < objf_before)
373 KALDI_WARN <<
"Objf decrease in offset estimation:" 374 << objf_after <<
" < " << objf_before;
375 objf_impr += objf_after - objf_before;
390 if (xform.
NumRows() == dim+1) {
405 xform_full(
i, dim) = xform(
i, dim);
407 xform_full(dim, dim) = 1.0;
413 stats->
G_[
i].CopyFromSp(Gtmp);
472 stats->
K_(
i,
j) = d_i * stats->
K_(
i,
j) - d_i * b_i * stats->
G_[
i](dim,
j);
477 stats->
G_[
i].Scale(d_i * d_i);
491 obj -= 0.5 *
VecVec(xform_row_g, xform_d.
Row(
d));
505 obj -= 0.5 *
VecVec(xform_row_g, xform.
Row(
d));
524 obj -= 0.5 *
VecVec(xform_row_g, xform_d.
Row(
d));
531 tmp_grad.
Range(0, dim, 0, dim).CopyFromMat(A);
532 tmp_grad.
Range(0, dim, 0, dim).Invert();
533 tmp_grad.
Range(0, dim, 0, dim).Transpose();
556 stats.
x.CopyFromVec(data);
566 if (stats.
count == 0.0)
return;
569 xplus.
Range(0, dim).CopyFromVec(stats.
x);
582 this->
G_[
i].AddSp(stats.
b(
i), scatter);
587 this->
G_[
i](
i,
i) += scale * x_i * x_i;
588 this->
G_[
i](dim,
i) += scale * 1.0 * x_i;
589 this->
G_[
i](dim, dim) += scale * 1.0 * 1.0;
bool ApproxEqual(const VectorBase< Real > &other, float tol=0.01) const
Returns true if ((*this)-other).Norm(2.0) <= tol * (*this).Norm(2.0).
void ApplyModelTransformToStats(const MatrixBase< BaseFloat > &xform, AffineXformStats *stats)
ApplyModelTransformToStats takes a transform "xform", which must be diagonal (i.e.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
void CopyColFromVec(const VectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix.
bool IsDiagonal(Real cutoff=1.0e-05) const
Returns true if matrix is Diagonal.
int32 NumGauss() const
Returns the number of mixture components.
const std::vector< SpMatrix< double > > & covariance_accumulator() const
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
BaseFloat ComputeFmllrMatrixDiagGmmFull(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, int32 num_iters, MatrixBase< BaseFloat > *out_xform)
Updates the FMLLR matrix using Mark Gales' row-by-row update.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Base class which provides matrix operations not involving resizing or allocation. ...
const Matrix< BaseFloat > & means_invvars() const
void AccumulateFromPosteriors(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, const VectorBase< BaseFloat > &posteriors)
Accumulate stats for a GMM, given supplied posteriors.
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
SingleFrameStats single_frame_stats_
std::string update_type
"full", "diag", "offset", "none"
int32 Dim() const
Returns the dimensionality of the feature vectors.
void AddSpVec(const Real alpha, const SpMatrix< Real > &M, const VectorBase< Real > &v, const Real beta)
Add symmetric positive definite matrix times vector: this <– beta*this + alpha*M*v.
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
void CommitSingleFrameStats()
const Vector< double > & occupancy() const
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
BaseFloat FmllrAuxfGradient(const MatrixBase< BaseFloat > &xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *grad_out)
Returns the (diagonal-GMM) FMLLR auxiliary function value given the transform and the stats...
void ApplyFeatureTransformToStats(const MatrixBase< BaseFloat > &xform, AffineXformStats *stats)
This function applies a feature-level transform to stats (useful for certain techniques based on fMLL...
void InitSingleFrameStats(const VectorBase< BaseFloat > &data)
BaseFloat ComputeFmllrMatrixDiagGmmOffset(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *out_xform)
This does offset-only fMLLR, i.e. it only estimates an offset.
bool IsZero(Real cutoff=1.0e-05) const
Returns true if matrix is all zeros.
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v'
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
FmllrDiagGmmAccs(const FmllrOptions &opts=FmllrOptions())
void Scale(Real alpha)
Multiply each element with a scalar value.
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
Class for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
const Matrix< double > & mean_accumulator() const
BaseFloat ComputeFmllrMatrixDiagGmmDiagonal(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *out_xform)
This does diagonal fMLLR (i.e.
Real Sum() const
Returns sum of the elements.
BaseFloat AccumulateForGmmPreselect(const DiagGmm &gmm, const std::vector< int32 > &gselect, const VectorBase< BaseFloat > &data, BaseFloat weight)
This is like AccumulateForGmm but when you have gselect (Gaussian selection) information.
BaseFloat ComputeFmllrMatrixDiagGmm(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, std::string fmllr_type, int32 num_iters, MatrixBase< BaseFloat > *out_xform)
This function internally calls ComputeFmllrMatrixDiagGmm{Full, Diagonal, Offset}, depending on "fmllr...
void FmllrInnerUpdate(SpMatrix< double > &inv_G, VectorBase< double > &k, double beta, int32 row, MatrixBase< double > *transform)
This function does one row of the inner-loop fMLLR transform update.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void AddMat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
Extension of rank-N update: this <– beta*this + alpha * M * A * M^T.
float FmllrAuxFuncDiagGmm(const MatrixBase< float > &xform, const AffineXformStats &stats)
Returns the (diagonal-GMM) FMLLR auxiliary function value given the transform and the stats...
Real LogDet(Real *det_sign=NULL) const
Returns logdet of matrix.
void AddVecVec(const Real alpha, const VectorBase< OtherReal > &a, const VectorBase< OtherReal > &b)
*this += alpha * a * b^T
void CopyRowFromVec(const VectorBase< Real > &v, const MatrixIndexT row)
Copy vector into specific row of matrix.
Definition for Gaussian Mixture Model with diagonal covariances.
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
void AccumulateFromPosteriorsPreselect(const DiagGmm &gmm, const std::vector< int32 > &gselect, const VectorBase< BaseFloat > &data, const VectorBase< BaseFloat > &posteriors)
Accumulate stats for a GMM, given supplied posteriors.
void Invert(Real *log_det=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Provides a vector abstraction class.
bool IsUnit(Real cutoff=1.0e-05) const
Returns true if the matrix is all zeros, except for ones on diagonal.
bool DataHasChanged(const VectorBase< BaseFloat > &data) const
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Sub-matrix representation.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
BaseFloat AccumulateForGmm(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, BaseFloat weight)
Accumulate stats for a single GMM in the model; returns log likelihood.
void Update(const FmllrOptions &opts, MatrixBase< BaseFloat > *fmllr_mat, BaseFloat *objf_impr, BaseFloat *count)
Update.
const Matrix< BaseFloat > & inv_vars() const
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).