44 const AffineXformStats &spk_stats,
45 const Matrix<BaseFloat> &spk_stats_tmp_K,
46 const std::vector<SpMatrix<BaseFloat> > &spk_stats_tmp_G,
47 const Matrix<BaseFloat> &delta,
48 const Matrix<BaseFloat> &A,
49 const Matrix<BaseFloat> &S,
58 if (!binary) os <<
'\n';
84 KALDI_ERR <<
"Invalid feature dimension " << dim;
100 for (
int d = 0;
d <
dim_; ++
d) {
102 grad_mat.
Row(
d).AddVec(-1.0, G_d_mat.
Row(
d));
110 if (spk_stats.
beta_ > 0) {
122 tmp_uint32 =
static_cast<uint32
>(basis_size_);
124 if (fmllr_basis_.size() != 0) {
126 for (
int32 n = 0;
n < basis_size_; ++
n) {
127 fmllr_basis_[
n].Write(os, binary);
141 basis_size_ =
static_cast<int32>(tmp_uint32);
144 fmllr_basis_.resize(basis_size_);
145 for (
int32 n = 0;
n < basis_size_; ++
n) {
146 fmllr_basis_[
n].Read(is, binary);
148 dim_ = fmllr_basis_[
n].NumRows();
165 vector< SpMatrix<double> > G_hat(
dim_);
173 for (
int32 j = 0;
j < num_pdf; ++
j) {
182 for (
int32 m = 0; m < num_comp; ++m) {
183 extend_mean.
Range(0, dim_).CopyFromVec(means.
Row(m));
184 extend_mean(dim_) = 1.0;
185 extend_var.
Range(0, dim_).CopyFromVec(vars.
Row(m));
186 extend_var(dim_) = 0;
192 double alpha = (1.0 / num_pdf) * weights(m) * (1.0 / vars.
Row(m)(
d));
193 G_hat[
d].
AddVec2(alpha, extend_mean);
196 G_hat[
d].AddDiagVec(alpha, extend_var);
204 H_mat.
Range(
d * (dim_ + 1), (dim_ + 1),
d * (dim_ + 1), (dim_ + 1))
205 .CopyFromSp(G_hat[
d]);
212 H_mat(
i * (dim_ + 1) +
j,
j * (dim_ + 1) +
i) += 1;
215 KALDI_ERR <<
"Preconditioner matrix H = H(1) + H(2) is not symmetric";
224 ComputeAmDiagPrecond(am_gmm, &precond_mat);
249 fmllr_basis_.resize(basis_size_);
250 for (
int32 n = 0;
n < basis_size_; ++
n) {
256 fmllr_basis_[
n].CopyRowsFromVec(basis_vec);
262 Lvec_scaled.
Scale(1.0 / (2 * basis_accus.
beta_));
263 KALDI_LOG <<
"The [per-frame] eigenvalues sorted from largest to smallest: " << Lvec_scaled;
266 KALDI_LOG <<
"Sum of the [per-frame] eigenvalues, that is" 267 " the log-likelihood improvement, is " << Lvec_scaled.Sum();
275 if (coefficient == NULL) {
277 return ComputeTransform(spk_stats, out_xform, &tmp, options);
281 KALDI_WARN <<
"Not updating fMLLR since count is below min-count: " 291 if (out_xform->
IsZero()) {
301 std::vector<SpMatrix<BaseFloat> > stats_tmp_G(
dim_);
307 int32 basis_size =
int32 (std::min(
double(basis_size_),
321 S.
Row(
d).AddSpVec(1.0, stats_tmp_G[
d], W_mat.
Row(d), 0.0);
335 P.
Range(0, dim_, 0, dim_).CopyFromMat(A_inv_trans);
337 P.
AddMat(1.0, stats_tmp_K);
348 for (
int32 n = 0;
n < basis_size; ++
n) {
350 delta_W.
AddMat(delta_d(n), fmllr_basis_[n]);
356 coefficient->
AddVec(step_size, delta_d);
360 KALDI_VLOG(4) <<
"Objective function (iter=" << iter <<
"): " 361 << start_obj / spk_stats.
beta_ <<
" -> " 362 << (end_obj / spk_stats.
beta_) <<
" over " 363 << spk_stats.
beta_ <<
" frames";
365 impr_spk += (end_obj - start_obj);
393 G_row_delta.
AddSpVec(1.0, spk_stats_tmp_G[
d], delta.
Row(d), 0.0);
402 for (
int32 iter_step = 1; iter_step <= max_iters; ++iter_step) {
403 if (iter_step == 1) {
423 d2 = std::min((
double)d2, -c / 10.0);
433 step_size += step_size_change;
443 obj_new = spk_stats.
beta_ * tmp_A.
LogDet() + step_size * b -
444 0.5 * step_size * step_size * c;
446 if (obj_new - obj_old < -1.0e-04 * spk_stats.
beta_) {
447 KALDI_WARN <<
"Objective function decreased (" << obj_old <<
"->" 448 << obj_new <<
"). Halving step size change ( step size " 449 << step_size <<
" -> " << (step_size - (step_size_change/2))
451 step_size_change /= 2;
452 step_size -= step_size_change;
454 }
while (obj_new - obj_old < -1.0e-04 * spk_stats.beta_ && step_size_change > 1e-05);
double beta_
Occupancy count.
void Write(std::ostream &out_stream, bool binary) const
Routines for reading and writing fMLLR basis matrices.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void InvertDouble(Real *LogDet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse [double].
void AccuGradientScatter(const AffineXformStats &spk_stats)
Accumulate gradient scatter for one (training) speaker.
void Transpose()
Transpose the matrix.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
double TraceMat(const MatrixBase< Real > &A)
Returns trace of matrix.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
void Read(std::istream &in_stream, bool binary)
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
static BaseFloat CalBasisFmllrStepSize(const AffineXformStats &spk_stats, const Matrix< BaseFloat > &spk_stats_tmp_K, const std::vector< SpMatrix< BaseFloat > > &spk_stats_tmp_G, const Matrix< BaseFloat > &delta, const Matrix< BaseFloat > &A, const Matrix< BaseFloat > &S, int32 max_iters)
This function takes the step direction (delta) of fMLLR matrix as argument, and optimize step size us...
void Write(std::ostream &out_stream, bool binary) const
Routines for reading and writing stats.
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void AddSpVec(const Real alpha, const SpMatrix< Real > &M, const VectorBase< Real > &v, const Real beta)
Add symmetric positive definite matrix times vector: this <– beta*this + alpha*M*v.
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
MatrixIndexT NumRows() const
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
double ComputeTransform(const AffineXformStats &spk_stats, Matrix< BaseFloat > *out_xform, Vector< BaseFloat > *coefficients, BasisFmllrOptions options) const
This function performs speaker adaptation, computing the fMLLR matrix based on speaker statistics...
void GetVars(Matrix< Real > *v) const
Accessor for covariances.
void AddVec2(const Real alpha, const VectorBase< Real > &v)
Add vector : *this = *this + alpha * rv^2 [element-wise squaring].
void ComputeAmDiagPrecond(const AmDiagGmm &am_gmm, SpMatrix< double > *pre_cond)
This function computes the preconditioner matrix, prior to base matrices estimation.
bool IsZero(Real cutoff=1.0e-05) const
Returns true if matrix is all zeros.
bool IsSymmetric(Real cutoff=1.0e-05) const
Returns true if matrix is Symmetric.
void Cholesky(const SpMatrix< Real > &orig)
Stats for fMLLR subspace estimation.
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
void Scale(Real alpha)
Multiply each element with a scalar value.
void Read(std::istream &in_stream, bool binary, bool add=false)
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
void GetMeans(Matrix< Real > *m) const
Accessor for means.
void CopyFromTp(const TpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given tpmatrix. (no resize is done).
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
void ResizeAccus(int32 dim)
const Vector< BaseFloat > & weights() const
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
void SetZero()
Sets matrix to zero.
int32 dim_
Feature dimension.
void Scale(Real alpha)
Multiplies all elements by this constant.
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
SpMatrix< BaseFloat > grad_scatter_
Gradient scatter. Dim is [(D+1)*D] [(D+1)*D].
void CopyFromMat(const MatrixBase< Real > &orig, SpCopyType copy_type=kTakeMean)
DiagGmm & GetPdf(int32 pdf_index)
Accessors.
A class representing a vector.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void AddMat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
Extension of rank-N update: this <– beta*this + alpha * M * A * M^T.
float FmllrAuxFuncDiagGmm(const MatrixBase< float > &xform, const AffineXformStats &stats)
Returns the (diagonal-GMM) FMLLR auxiliary function value given the transform and the stats...
Real LogDet(Real *det_sign=NULL) const
Returns logdet of matrix.
Definition for Gaussian Mixture Model with diagonal covariances.
void CopyRowsFromMat(const MatrixBase< Real > &M)
Performs a row stack of the matrix M.
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void EstimateFmllrBasis(const AmDiagGmm &am_gmm, const BasisFmllrAccus &basis_accus)
Estimate the base matrices efficiently in a Maximum Likelihood manner.
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
void SymPosSemiDefEig(VectorBase< Real > *s, MatrixBase< Real > *P, Real tolerance=0.001) const
This is the version of SVD that we implement for symmetric positive definite matrices.
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Sub-matrix representation.
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).