33 model_dim_(model_dim) {
36 KALDI_ERR <<
"Expecting full LDA+MLLT transform to be square or d by d+1 " 37 <<
"(make sure you are including rejected rows).";
39 if (raw_dim <= 0 || full_transform.
NumRows() % raw_dim != 0)
40 KALDI_ERR <<
"Raw feature dimension is invalid " << raw_dim
41 <<
"(must be positive and divide feature dimension)";
48 int32 full_dim2 = ((full_dim+1)*(full_dim+2))/2;
52 Q_.
Resize(model_dim + 1, full_dim + 1);
72 if (stats.
count == 0.0)
return;
80 a_ext.Range(0, model_dim).CopyFromVec(stats.
a);
81 b_ext.
Range(0, model_dim).CopyFromVec(stats.
b);
82 a_ext(model_dim) = stats.
count;
83 b_ext(model_dim) = stats.
count;
88 int32 full_dim2 = ((full_dim + 1) * (full_dim + 2)) / 2;
97 stats.
s.Range(0, full_dim).CopyFromVec(data);
98 stats.
s(full_dim) = 1.0;
112 "Expect raw, spliced data, which should have same dimension as " 127 posterior.
Scale(weight);
135 if (full_dim > model_dim) {
137 model_dim, full_dim - model_dim);
138 log_like += -0.5 * (
VecVec(rejected_data, rejected_data)
277 for (
size_t m = 0; m < num_comp; m++) {
279 if (this_post != 0.0) {
309 <<
" is less than min count " << opts.
min_count;
316 !raw_fmllr_mat->
IsZero());
322 std::vector<SpMatrix<double> > diag_stats;
324 std::vector<std::vector<Matrix<double> > > off_diag_stats;
332 &linear_stats, &diag_stats, &off_diag_stats);
335 for (
size_t i = 0;
i < diag_stats.size();
i++) {
336 diag_stats[
i].Invert();
339 KALDI_WARN <<
"Error inverting stats matrices for fMLLR " 340 <<
"[min-count too small? Bad data?], not updating.";
346 double effective_beta =
count_ * splice_width;
349 double auxf_orig =
GetAuxf(simple_linear_stats, simple_quadratic_stats,
352 for (
int32 row = 0; row < raw_dim; row++) {
357 for (
int32 row2 = 0; row2 < raw_dim; row2++) {
361 fmllr_mat.Row(row2), 1.0);
365 fmllr_mat.Row(row2), 1.0);
376 double cur_auxf =
GetAuxf(simple_linear_stats, simple_quadratic_stats,
378 auxf_change = cur_auxf - auxf_orig;
379 KALDI_VLOG(2) <<
"Updating raw fMLLR: objf improvement per frame was " 380 << (auxf_change / this->
count_) <<
" over " 381 << this->
count_ <<
" frames, by the " << iter
385 double auxf_final =
GetAuxf(simple_linear_stats, simple_quadratic_stats,
387 auxf_change = auxf_final - auxf_orig;
389 KALDI_VLOG(1) <<
"Updating raw fMLLR: objf improvement per frame was " 390 << (auxf_change / this->
count_) <<
" over " 391 << this->
count_ <<
" frames.";
392 if (auxf_final > auxf_orig) {
393 *objf_impr = auxf_change;
414 raw_dim2 = raw_dim * (raw_dim + 1);
416 for (
int32 i = 0;
i < full_dim;
i++)
417 (*M)[
i].Resize(raw_dim2, full_dim + 1);
423 for (
int32 i = 0;
i < full_dim;
i++) {
426 int32 i1 =
i / raw_dim, i2 =
i % raw_dim;
427 for (
int32 j = 0;
j < raw_dim2;
j++) {
429 int32 j1 =
j / (raw_dim + 1), j2 =
j % (raw_dim + 1);
430 for (
int32 k = 0; k < full_dim + 1; k++) {
434 }
else if (k == full_dim) {
441 int32 k1 = k / raw_dim, k2 = k % raw_dim;
442 if (k1 != i1 || k2 != j2)
448 for (
int32 l = 0; l < full_dim; l++)
458 std::vector<Matrix<double> > M;
462 raw_dim2 = raw_dim * (raw_dim + 1),
463 full_dim2 = ((full_dim+1)*(full_dim+2))/2;
464 simple_linear_stats->
Resize(raw_dim2);
465 simple_quadratic_stats->
Resize(raw_dim2);
466 for (
int32 i = 0;
i < full_dim;
i++) {
472 S_i_vec.CopyFromVec(
S_.
Row(
i));
481 S_i_vec.CopyFromVec(
S_.
Row(model_dim));
498 std::vector<std::vector<
Matrix<double> > > *off_diag_stats)
const {
505 diag_stats->resize(
RawDim());
506 off_diag_stats->resize(
RawDim());
514 (*diag_stats)[
i].Resize(
RawDim() + 1);
515 (*diag_stats)[
i].CopyFromMat(this_diag,
kTakeMean);
519 (*off_diag_stats)[
i].resize(
i);
524 (*off_diag_stats)[
i][
j] = this_off_diag;
538 double logdet = square_part.
LogDet();
539 return VecVec(fmllr_vec, simple_linear_stats) -
540 0.5 *
VecSpVec(fmllr_vec, simple_quadratic_stats, fmllr_vec) +
541 logdet * spice_width *
count_;
bool ApproxEqual(const VectorBase< Real > &other, float tol=0.01) const
Returns true if ((*this)-other).Norm(2.0) <= tol * (*this).Norm(2.0).
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Vector< BaseFloat > transform_offset_
SingleFrameStats single_frame_stats_
int32 ModelDim() const
Dimension of the model.
int32 SpliceWidth() const
Number of frames that are spliced together each time.
void Update(const FmllrRawOptions &opts, MatrixBase< BaseFloat > *raw_fmllr_mat, BaseFloat *objf_impr, BaseFloat *count)
Update "raw_fmllr_mat"; it should have the correct dimension and reasonable values at entry (see the ...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Base class which provides matrix operations not involving resizing or allocation. ...
const Matrix< BaseFloat > & means_invvars() const
void ConvertToSimpleStats(Vector< double > *simple_linear_stats, SpMatrix< double > *simple_quadratic_stats) const
Converts from the Q and S stats to a simple objective function of the form l .
int32 GetVerboseLevel()
Get verbosity level, usually set via command line '–verbose=' switch.
void ComputeM(std::vector< Matrix< double > > *M) const
Computes the M_i matrices used in the update, see the extended comment in fmllr-raw.cc for explanation.
void AccumulateFromPosteriors(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, const VectorBase< BaseFloat > &posteriors)
Accumulate stats for a GMM, given supplied posteriors.
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Matrix< BaseFloat > full_transform_
void AddVecVec(Real alpha, const VectorBase< Real > &v, const VectorBase< Real > &r, Real beta)
Add element-by-element product of vectors:
bool IsZero(Real cutoff=1.0e-05) const
Returns true if matrix is all zeros.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v'
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
int32 FullDim() const
Full feature dimension after splicing.
void AddSmat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
This is a version of AddMat2Sp specialized for when M is fairly sparse.
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
void ConvertToPerRowStats(const Vector< double > &simple_linear_stats, const SpMatrix< double > &simple_quadratic_stats_sp, Matrix< double > *linear_stats, std::vector< SpMatrix< double > > *diag_stats, std::vector< std::vector< Matrix< double > > > *off_diag_stats) const
Transform stats into a convenient format for the update.
Real VecSpVec(const VectorBase< Real > &v1, const SpMatrix< Real > &M, const VectorBase< Real > &v2)
Computes v1^T * M * v2.
Vector< BaseFloat > transformed_data
double GetAuxf(const Vector< double > &simple_linear_stats, const SpMatrix< double > &simple_quadratic_stats, const Matrix< double > &fmllr_mat) const
Compute the auxiliary function for this matrix.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void SetZero()
Sets matrix to zero.
void Scale(Real alpha)
Multiplies all elements by this constant.
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
void FmllrInnerUpdate(SpMatrix< double > &inv_G, VectorBase< double > &k, double beta, int32 row, MatrixBase< double > *transform)
This function does one row of the inner-loop fMLLR transform update.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Real LogDet(Real *det_sign=NULL) const
Returns logdet of matrix.
void AddVecVec(const Real alpha, const VectorBase< OtherReal > &a, const VectorBase< OtherReal > &b)
*this += alpha * a * b^T
Definition for Gaussian Mixture Model with diagonal covariances.
void CopyRowsFromMat(const MatrixBase< Real > &M)
Performs a row stack of the matrix M.
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
BaseFloat AccumulateForGmm(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, BaseFloat weight)
Accumulate stats for a single GMM in the model; returns log likelihood.
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void InitSingleFrameStats(const VectorBase< BaseFloat > &data)
int32 RawDim() const
Dimension of raw MFCC (etc.) features.
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Provides a vector abstraction class.
void SetZero()
Set vector to all zeros.
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
bool DataHasChanged(const VectorBase< BaseFloat > &data) const
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Sub-matrix representation.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
void CommitSingleFrameStats()
const Matrix< BaseFloat > & inv_vars() const
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).