26 double ml_count,
double ml_x_stats,
double ml_x2_stats,
27 double disc_count,
double disc_x_stats,
double disc_x2_stats,
28 double model_mean,
double model_var,
BaseFloat min_variance,
29 double *ml_x_stats_deriv,
double *ml_x2_stats_deriv) {
31 double model_inv_var = 1.0/model_var,
32 model_inv_var_sq = model_inv_var*model_inv_var,
33 model_mean_sq = model_mean*model_mean;
41 double diff_wrt_model_mean = (1.0/model_var) * (disc_x_stats - model_mean*disc_count),
43 0.5 * ((disc_x2_stats - 2*model_mean*disc_x_stats + disc_count*model_mean_sq)
45 - disc_count*model_inv_var);
47 double stats_mean = ml_x_stats / ml_count,
48 stats_var = ml_x2_stats / ml_count - (ml_x_stats / ml_count)*(ml_x_stats / ml_count);
68 double diff_wrt_stats_mean = diff_wrt_model_mean;
69 double diff_wrt_stats_var;
70 if (model_var <= min_variance*1.01) {
71 diff_wrt_stats_var = 0.0;
72 KALDI_VLOG(2) <<
"Variance derivative is zero (min variance)";
74 diff_wrt_stats_var = diff_wrt_model_var * model_var / stats_var;
85 *ml_x_stats_deriv = diff_wrt_stats_mean / ml_count - 2 * diff_wrt_stats_var * stats_mean / ml_count;
86 *ml_x2_stats_deriv = diff_wrt_stats_var / ml_count;
119 for (
int32 gauss = 0; gauss < num_gauss; gauss++) {
121 double num_count = num_acc.
occupancy()(gauss),
125 if (ml_count <= min_gaussian_occupancy) {
127 KALDI_WARN <<
"Skipping Gaussian because very small ML count: (num,den,ml) = " 128 << num_count <<
", " << den_count <<
", " << ml_count;
130 double disc_count = num_count - den_count;
138 model_mean = gmm_normal.
means_(gauss,
d),
139 model_var = gmm_normal.
vars_(gauss,
d);
141 double x_acc_deriv = 0.0, x2_acc_deriv = 0.0;
143 disc_count, disc_x_acc, disc_x2_acc,
144 model_mean, model_var, min_variance,
145 &x_acc_deriv, &x2_acc_deriv);
147 x_stats_deriv(
d) = x_acc_deriv;
148 x2_stats_deriv(
d) = x2_acc_deriv;
169 for (
int32 pdf = 0; pdf < num_pdfs; pdf++)
171 ml_accs.
GetAcc(pdf), min_variance, min_gaussian_occupancy,
172 &(out_accs->
GetAcc(pdf)));
183 double *tot_divergence) {
186 old_ml_acc.
Dim() == dim);
188 new_ml_acc.
Dim() == dim);
195 for (
int32 gauss = 0; gauss < num_gauss; gauss++) {
196 double old_ml_count = old_ml_acc.
occupancy()(gauss),
197 new_ml_count = new_ml_acc.
occupancy()(gauss);
198 if (old_ml_count <= min_gaussian_occupancy ||
199 new_ml_count <= min_gaussian_occupancy) {
200 KALDI_WARN <<
"Gaussian being skipped because it has small count: (old,new) = " 201 << old_ml_count <<
", " << new_ml_count;
204 *tot_count += new_ml_count;
206 double old_model_mean = gmm_normal.
means_(gauss,
d),
207 old_model_var = gmm_normal.
vars_(gauss,
d),
210 - old_ml_mean*old_ml_mean,
213 - new_ml_mean*new_ml_mean,
214 new_model_mean = old_model_mean + new_ml_mean - old_ml_mean,
215 new_model_var = std::max(static_cast<double>(min_variance),
216 old_model_var * new_ml_var / old_ml_var);
218 0.5 *(((new_model_mean-old_model_mean)*(new_model_mean-old_model_mean) +
219 new_model_var - old_model_var)/old_model_var +
220 Log(old_model_var / new_model_var));
221 if (divergence < 0.0)
222 KALDI_WARN <<
"Negative divergence " << divergence;
223 *tot_divergence += divergence * new_ml_count;
224 gmm_normal.
means_(gauss,
d) = new_model_mean;
225 gmm_normal.
vars_(gauss,
d) = new_model_var;
240 double tot_count = 0.0, tot_divergence = 0.0;
241 for (
int32 pdf = 0; pdf < num_pdfs; pdf++)
243 min_variance, min_gaussian_occupancy, &am_gmm->
GetPdf(pdf),
244 &tot_count, &tot_divergence);
245 KALDI_LOG <<
"K-L divergence from old to new model is " 246 << (tot_divergence/tot_count) <<
" over " 247 << tot_count <<
" frames.";
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
int32 ComputeGconsts()
Sets the gconsts for all the PDFs.
Definition for Gaussian Mixture Model with diagonal covariances in normal mode: where the parameters ...
const VectorBase< double > & occupancy() const
const MatrixBase< double > & variance_accumulator() const
const MatrixBase< double > & mean_accumulator() const
void GetStatsDerivative(const DiagGmm &gmm, const AccumDiagGmm &num_acc, const AccumDiagGmm &den_acc, const AccumDiagGmm &ml_acc, BaseFloat min_variance, BaseFloat min_gaussian_occupancy, AccumDiagGmm *out_accs)
void DoRescalingUpdate(const AccumDiagGmm &old_ml_acc, const AccumDiagGmm &new_ml_acc, BaseFloat min_variance, BaseFloat min_gaussian_occupancy, DiagGmm *gmm, double *tot_count, double *tot_divergence)
GmmFlagsType Flags() const
void AddStatsForComponent(int32 comp_id, double occ, const VectorBase< double > &x_stats, const VectorBase< double > &x2_stats)
Increment the stats for this component by the specified amount (not all parts may be taken...
Matrix< double > vars_
diagonal variance
int32 NumGauss() const
Returns the number of mixture components in the GMM.
int32 Dim() const
Returns the dimensionality of the feature vectors.
Matrix< double > means_
Means.
DiagGmm & GetPdf(int32 pdf_index)
Accessors.
const AccumDiagGmm & GetAcc(int32 index) const
#define KALDI_ASSERT(cond)
Definition for Gaussian Mixture Model with diagonal covariances.
void Resize(int32 num_gauss, int32 dim, GmmFlagsType flags)
Allocates memory for accumulators.
void GetSingleStatsDerivative(double ml_count, double ml_x_stats, double ml_x2_stats, double disc_count, double disc_x_stats, double disc_x2_stats, double model_mean, double model_var, BaseFloat min_variance, double *ml_x_stats_deriv, double *ml_x2_stats_deriv)
void Init(const AmDiagGmm &model, GmmFlagsType flags)
Initializes accumulators for each GMM based on the number of components and dimension.
int32 NumGauss() const
Returns the number of mixture components.
void CopyToDiagGmm(DiagGmm *diaggmm, GmmFlagsType flags=kGmmAll) const
Copies to DiagGmm the requested parameters.