39 if (!add)
beta_ = beta;
42 if (add &&
G_.size() != 0 &&
static_cast<size_t>(dim) !=
G_.size())
43 KALDI_ERR <<
"MlltAccs::Read, summing accs of different size.";
44 if (!add ||
G_.empty())
G_.resize(dim);
46 for (
size_t i = 0;
i <
G_.size();
i++)
53 if(!binary) os <<
'\n';
58 if(!binary) os <<
'\n';
59 for (
size_t i = 0;
i <
G_.size();
i++)
62 if(!binary) os <<
'\n';
79 KALDI_WARN <<
"Mllt:Update, very small count " << beta;
81 KALDI_WARN <<
"Mllt:Update, insufficient count " << beta;
83 int32 num_iters = 200;
85 M.CopyFromMat(*M_ptr);
86 std::vector<SpMatrix<double> > Ginv(dim);
89 Ginv[
i].CopyFromSp(G[
i]);
93 double tot_objf_impr = 0.0;
94 for (
int32 p = 0; p < num_iters; p++) {
108 double objf_before = beta *
Log(std::abs(
VecVec(row, cofactor)))
112 Ginv[i], cofactor, 0.0);
113 double objf_after = beta *
Log(std::abs(
VecVec(row, cofactor)))
115 if (objf_after < objf_before - fabs(objf_before)*0.00001)
116 KALDI_ERR <<
"Objective decrease in MLLT update.";
117 tot_objf_impr += objf_after - objf_before;
119 if (p < 10 || p % 10 == 0)
120 KALDI_LOG <<
"MLLT objective improvement per frame by " << p
121 <<
"'th iteration is " << (tot_objf_impr/beta) <<
" per frame " 122 <<
"over " << beta <<
" frames.";
125 *objf_impr_out = tot_objf_impr;
142 double this_beta_ = 0.0;
146 if (posterior == 0.0)
continue;
149 mean.AddVecDivVec(1.0, mean_invvar, inv_var, 0.0);
150 mean.AddVec(-1.0, data);
151 offset_dbl.CopyFromVec(mean);
153 tmp.AddVec2(1.0, offset_dbl);
155 G_[
j].AddSp(inv_var(
j)*posterior, tmp);
156 this_beta_ += posterior;
167 posteriors.Scale(weight);
175 const std::vector<int32> &gselect,
181 BaseFloat loglike = loglikes.ApplySoftMax();
185 for (
size_t i = 0;
i < gselect.size();
i++)
186 posteriors(gselect[
i]) = loglikes(i) * weight;
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
BaseFloat AccumulateFromGmmPreselect(const DiagGmm &gmm, const std::vector< int32 > &gselect, const VectorBase< BaseFloat > &data, BaseFloat weight)
void Transpose()
Transpose the matrix.
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Base class which provides matrix operations not involving resizing or allocation. ...
const Matrix< BaseFloat > & means_invvars() const
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
void Read(std::istream &is, bool binary, bool add=false)
Float RandPrune(Float post, BaseFloat prune_thresh, struct RandomState *state=NULL)
void AddSpVec(const Real alpha, const SpMatrix< Real > &M, const VectorBase< Real > &v, const Real beta)
Add symmetric positive definite matrix times vector: this <– beta*this + alpha*M*v.
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
void AccumulateFromPosteriors(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, const VectorBase< BaseFloat > &posteriors)
void Update(MatrixBase< BaseFloat > *M, BaseFloat *objf_impr_out, BaseFloat *count_out) const
The Update function does the ML update; it requires that M has the right size.
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
std::vector< SpMatrix< double > > G_
void Init(int32 dim, BaseFloat rand_prune=0.25)
initializes (destroys anything that was there before).
Real VecSpVec(const VectorBase< Real > &v1, const SpMatrix< Real > &M, const VectorBase< Real > &v2)
Computes v1^T * M * v2.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
MatrixIndexT Dim() const
Returns the dimension of the vector.
A class representing a vector.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
BaseFloat rand_prune_
rand_prune_ controls randomized pruning; the larger it is, the more pruning we do.
Definition for Gaussian Mixture Model with diagonal covariances.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
BaseFloat AccumulateFromGmm(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, BaseFloat weight)
void Invert(Real *log_det=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Provides a vector abstraction class.
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
void Write(std::ostream &os, bool binary) const
const Matrix< BaseFloat > & inv_vars() const