38 Wpre_plus(dim, dim) = 1;
52 Wpre_plus(dim, dim) = 1;
65 KALDI_ERR <<
"Cannot estimate FMLLR: mean scatter has 0 eigenvalues.";
66 for (
int32 r = 0; r < dim; r++) {
67 for (
int32 c = 0; c < r; c++) {
69 (*gradient_out)(r, c) = gradient_in(r, c) / std::sqrt(1 + D(c));
71 (*gradient_out)(c, r) = gradient_in(c, r) / std::sqrt(1 + D(r) -
72 1 / (1 + D(c))) - gradient_in(r, c) / ((1 + D(c)) *
73 std::sqrt(1 + D(r) - 1 / (1 + D(c))));
76 (*gradient_out)(r, r) = gradient_in(r, r) / std::sqrt(2 + D(r));
77 (*gradient_out)(r, dim) = gradient_in(r, dim);
87 KALDI_ERR <<
"Cannot estimate FMLLR: mean scatter has 0 eigenvalues.";
88 for (
int32 r = 0; r < dim; r++) {
89 for (
int32 c = 0; c < r; c++) {
91 (*delta_out)(r, c) = delta_in(r, c) / std::sqrt(1 + D(c)) -
92 delta_in(c, r) / ((1 + D(c)) * std::sqrt(1 + D(r) - 1 / (1 + D(c))));
94 (*delta_out)(c, r) = delta_in(c, r) / std::sqrt(1 + D(r) - 1/ (1 + D(c)));
97 (*delta_out)(r, r) = delta_in(r, r) / std::sqrt(2 + D(r));
98 (*delta_out)(r, dim) = delta_in(r, dim);
104 WriteToken(out, binary,
"<SGMM_FMLLR_GLOBAL_PARAMS>");
115 for (uint32
i = 0;
i < tmp;
i++) {
119 WriteToken(out, binary,
"</SGMM_FMLLR_GLOBAL_PARAMS>");
123 ExpectToken(in, binary,
"<SGMM_FMLLR_GLOBAL_PARAMS>");
132 if (token ==
"<FMLLR_BASIS>") {
136 for (uint32
i = 0;
i < tmp;
i++) {
140 if (token !=
"</SGMM_FMLLR_GLOBAL_PARAMS>")
141 KALDI_ERR <<
"Unexpected token '" << token <<
"' found.";
152 stats_.Init(dim, num_gaussians);
165 posteriors.
Scale(weight);
166 AccumulateFromPosteriors(model, *spk, data, frame_vars.
gselect, posteriors,
175 const vector<int32> &gselect,
179 extended_data.
Range(0, dim_).CopyFromVec(data);
180 extended_data(dim_) = 1.0;
182 scatter.
AddVec2(1.0, extended_data);
184 for (
int32 ki = 0, ki_max = gselect.size(); ki < ki_max; ki++) {
189 BaseFloat gammat_jmi = posteriors(ki, m);
192 if (gammat_jmi > 0.0) {
193 stats_.beta_ += gammat_jmi;
197 stats_.K_.AddVecVec(gammat_jmi, var_scaled_mean, extended_data);
199 stats_.G_[
i].AddSp(gammat_jmi, scatter);
207 if (stats_.beta_ <= 0.0) {
208 KALDI_WARN <<
"Not committing any stats since no stats accumulated.";
215 this->FmllrObjGradient(sgmm, xform, &grad, NULL);
222 grad_vec.
Scale(1 / std::sqrt(stats_.beta_));
223 grad_scatter->
AddVec2(1.0, grad_vec);
224 KALDI_LOG <<
"Frame counts for when committing fMLLR subspace stats are " 235 KALDI_ASSERT(stats_.G_.size() ==
static_cast<size_t>(num_gauss));
240 double obj = stats_.beta_ * A.
LogDet() +
242 for (
int32 i = 0;
i < num_gauss;
i++) {
244 xform_g.AddMatSp(1.0, xform_d,
kNoTrans, stats_.G_[
i], 0.0);
251 if (grad_out != NULL) {
253 grad_d.
Range(0, dim, 0, dim).CopyFromMat(A);
254 grad_d.
Range(0, dim, 0, dim).InvertDouble();
255 grad_d.
Range(0, dim, 0, dim).Transpose();
256 grad_d.
Scale(stats_.beta_);
271 stats_.Write(out, binary);
281 stats_.Read(in, binary, add);
311 BaseFloat obj_step_old, obj_step_new = 0.0;
314 for (
int32 iter_step = 0; iter_step < max_iters; iter_step++) {
315 if (iter_step == 0) {
318 obj_step_old = obj_step_new;
329 if (std::fabs(d / d2) < 0.000001) {
break; }
332 step_size += step_size_change;
339 obj_step_new = stats.
beta_ * logdet + step_size * m -
340 0.5 * step_size * step_size *
n;
342 if (obj_step_new - obj_step_old < -0.001) {
343 KALDI_WARN <<
"Objective function decreased (" << obj_step_old <<
"->" 344 << obj_step_new <<
"). Halving step size change (" 345 << step_size <<
" -> " << (step_size - (step_size_change/2))
347 step_size_change /= 2;
348 step_size -= step_size_change;
350 }
while (obj_step_new - obj_step_old < -0.001 && step_size_change > 1e-05);
361 BaseFloat auxf_improv = 0.0, logdet = 0.0;
366 bool using_subspace = (globals.
HasBasis() ?
370 KALDI_ERR <<
"Must set up pre-transforms before estimating FMLLR.";
372 KALDI_VLOG(1) <<
"Mincount = " << mincount <<
"; Basis: " 373 << std::string(globals.
HasBasis()?
"yes; " :
"no; ")
374 <<
"Using subspace: " << std::string(using_subspace?
"yes; " 378 if (using_subspace) {
385 KALDI_VLOG(1) <<
"Have " << stats_.beta_ <<
" frames for speaker: Using " 386 << num_bases <<
" fMLLR bases.";
391 if (frame_count != NULL) *frame_count = stats_.beta_;
394 if (stats_.beta_ >= mincount) {
398 auxf_new = this->FmllrObjGradient(sgmm, *out_xform, &grad, &G);
401 KALDI_VLOG(3) <<
"Iter " << iter <<
": Auxiliary function = " 402 << (auxf_new / stats_.beta_) <<
" per frame over " << stats_.beta_
407 KALDI_VLOG(2) <<
"Iter " << iter <<
": Auxiliary function improvement: " 408 << ((auxf_new - auxf_old) / stats_.beta_) <<
" per frame over " 409 << (stats_.beta_) <<
" frames";
410 auxf_improv += auxf_new - auxf_old;
425 if (using_subspace) {
430 for (
int32 b = 0; b < num_bases; b++) {
432 hess_xformed_grad,
kTrans),
435 hess_xformed_delta.
Scale(1 / stats_.beta_);
438 hess_xformed_delta.
Scale(1 / stats_.beta_);
452 #ifdef KALDI_PARANOID 476 auxf_new = this->FmllrObjGradient(sgmm, *out_xform, NULL, NULL);
481 KALDI_VLOG(2) <<
"Iter " << iter <<
": Auxiliary function improvement: " 482 << ((auxf_new - auxf_old) / stats_.beta_) <<
" per frame over " 483 << (stats_.beta_) <<
" frames";
484 auxf_improv += auxf_new - auxf_old;
487 if (auxf_out != NULL) *auxf_out = auxf_improv;
488 auxf_improv /= (stats_.beta_ + 1.0e-10);
490 KALDI_LOG <<
"Auxiliary function improvement for FMLLR = " << auxf_improv
491 <<
" per frame over " << stats_.beta_ <<
" frames. Log-determinant = " 498 KALDI_WARN <<
"Not updating FMLLR because count is " << stats_.beta_
499 <<
" < " << (mincount);
500 if (auxf_out != NULL) *auxf_out = 0.0;
510 if (num_fmllr_bases > feat_dim * (feat_dim + 1)) {
511 num_fmllr_bases = feat_dim * (feat_dim + 1);
512 KALDI_WARN <<
"Limiting number of fMLLR bases to be the same as transform " 516 vector< Matrix<BaseFloat> > &fmllr_bases(globals->
fmllr_bases_);
522 fmllr_grad_scatter.
Eig(&s, &U);
524 KALDI_VLOG(1) <<
"Eigenvalues (max 200) of CMLLR scatter are: " 526 std::min(static_cast<MatrixIndexT>(200),
540 fmllr_bases.resize(num_fmllr_bases);
541 for (
int32 b = 0; b < num_fmllr_bases; b++) {
542 fmllr_bases[b].Resize(feat_dim, feat_dim + 1,
kSetZero);
543 fmllr_bases[b].CopyRowsFromVec(U.Row(b));
545 KALDI_LOG <<
"Estimated " << num_fmllr_bases <<
" fMLLR basis matrices.";
546 }
catch(
const std::exception &e) {
547 KALDI_WARN <<
"Not estimating FMLLR bases because of a thrown exception:\n" 549 fmllr_bases.resize(0);
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void InvertDouble(Real *LogDet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse [double].
Class for definition of the subspace Gmm acoustic model.
void Write(std::ostream &out, bool binary) const
write to stream.
BaseFloat ComponentPosteriors(const Sgmm2PerFrameDerivedVars &per_frame_vars, int32 j2, Sgmm2PerSpkDerivedVars *spk_vars, Matrix< BaseFloat > *post) const
Similar to LogLikelihood() function above, but also computes the posterior probabilities for the pre-...
BaseFloat FmllrObjGradient(const AmSgmm2 &sgmm, const Matrix< BaseFloat > &xform, Matrix< BaseFloat > *grad_out, Matrix< BaseFloat > *G_out) const
static void ApplyPreXformToGradient(const Sgmm2FmllrGlobalParams &globals, const Matrix< BaseFloat > &gradient_in, Matrix< BaseFloat > *gradient_out)
void Write(std::ostream &out_stream, bool binary) const
Matrix< BaseFloat > pre_xform_
Pre-transform matrix. Dim is [D][D+1].
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
double TraceMat(const MatrixBase< Real > &A)
Returns trace of matrix.
Base class which provides matrix operations not involving resizing or allocation. ...
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
void GetVarScaledSubstateSpeakerMean(int32 j1, int32 substate, int32 gauss, const Sgmm2PerSpkDerivedVars &spk, VectorBase< Real > *mean_out) const
BaseFloat fmllr_min_count_full
Minimum occupancy count to stop using FMLLR bases and switch to regular FMLLR estimation.
void GetInvCovars(int32 gauss_index, SpMatrix< Real > *out) const
Templated accessors (used to accumulate in different precision)
void AccumulateFromPosteriors(const AmSgmm2 &sgmm, const Sgmm2PerSpkDerivedVars &spk, const VectorBase< BaseFloat > &data, const std::vector< int32 > &gauss_select, const Matrix< BaseFloat > &posteriors, int32 state_index)
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
void Eig(VectorBase< Real > *s, MatrixBase< Real > *P=NULL) const
Solves the symmetric eigenvalue problem: at end we should have (*this) = P * diag(s) * P^T...
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Real Min() const
Returns the minimum value of any element, or +infinity for the empty vector.
MatrixIndexT NumRows() const
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
void EstimateSgmm2FmllrSubspace(const SpMatrix< double > &fmllr_grad_scatter, int32 num_fmllr_bases, int32 feat_dim, Sgmm2FmllrGlobalParams *globals, double min_eig)
Computes the fMLLR basis matrices given the scatter of the vectorized gradients (eq: B...
void AccumulateForFmllrSubspace(const AmSgmm2 &sgmm, const Sgmm2FmllrGlobalParams &fmllr_globals, SpMatrix< double > *grad_scatter)
BaseFloat bases_occ_scale
Scale per-speaker count to determine number of CMLLR bases.
int32 fmllr_iters
Number of iterations in FMLLR estimation.
bool Update(const AmSgmm2 &model, const Sgmm2FmllrGlobalParams &fmllr_globals, const Sgmm2FmllrConfig &opts, Matrix< BaseFloat > *out_xform, BaseFloat *frame_count, BaseFloat *auxf_improv) const
Computes the FMLLR transform from the accumulated stats, using the pre-transforms in fmllr_globals...
static void ApplyInvPreXformToChange(const Sgmm2FmllrGlobalParams &globals, const Matrix< BaseFloat > &delta_in, Matrix< BaseFloat > *delta_out)
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v'
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void Read(std::istream &in_stream, bool binary, bool add)
void Read(std::istream &in_stream, bool binary)
static BaseFloat CalcFmllrStepSize(const AffineXformStats &stats, const AmSgmm2 &sgmm, const MatrixBase< BaseFloat > &Delta, const MatrixBase< BaseFloat > &A, const Matrix< BaseFloat > &G, int32 max_iters)
Matrix< BaseFloat > inv_xform_
Inverse of pre-transform. Dim is [D][D+1].
void Scale(Real alpha)
Multiply each element with a scalar value.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
int32 Pdf2Group(int32 j2) const
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
static void ApplyInvHessianXformToChange(const Sgmm2FmllrGlobalParams &globals, const Matrix< BaseFloat > &delta_in, Matrix< BaseFloat > *delta_out)
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void SetZero()
Sets matrix to zero.
void Scale(Real alpha)
Multiplies all elements by this constant.
Configuration variables needed in the estimation of FMLLR for SGMMs.
Real TraceMatSpMatSp(const MatrixBase< Real > &A, MatrixTransposeType transA, const SpMatrix< Real > &B, const MatrixBase< Real > &C, MatrixTransposeType transC, const SpMatrix< Real > &D)
Returns tr (A B C D) (A and C may be transposed as specified by transA and transB).
std::vector< int32 > gselect
int32 num_fmllr_bases
Number of basis matrices to use for FMLLR estimation.
A class representing a vector.
BaseFloat Accumulate(const AmSgmm2 &sgmm, const VectorBase< BaseFloat > &data, const Sgmm2PerFrameDerivedVars &frame_vars, int32 state_index, BaseFloat weight, Sgmm2PerSpkDerivedVars *spk)
Accumulation routine that computes the Gaussian posteriors and calls the AccumulateFromPosteriors fun...
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void Write(std::ostream &out_stream, bool binary) const
Real LogDet(Real *det_sign=NULL) const
Returns logdet of matrix.
BaseFloat fmllr_min_count_basis
Minimum occupancy count to estimate FMLLR using basis matrices.
static void ApplyHessianXformToGradient(const Sgmm2FmllrGlobalParams &globals, const Matrix< BaseFloat > &gradient_in, Matrix< BaseFloat > *gradient_out)
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
void CopyRowsFromMat(const MatrixBase< Real > &M)
Performs a row stack of the matrix M.
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Global adaptation parameters.
Provides a vector abstraction class.
Vector< BaseFloat > mean_scatter_
Diagonal of mean-scatter matrix. Dim is [D].
std::vector< Matrix< BaseFloat > > fmllr_bases_
{W}_b. [b][d][d], dim is [B][D][D+1].
void Init(int32 dim, int32 num_gaussians)
void AddSpMat(const Real alpha, const SpMatrix< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*SpA*B.
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
int32 NumSubstatesForGroup(int32 j1) const
BaseFloat fmllr_min_count
Minimum occupancy count to estimate FMLLR without basis matrices.
Sub-matrix representation.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).