41 WriteToken(out_stream, binary,
"<NUMGaussians>");
43 WriteToken(out_stream, binary,
"<FEATUREDIM>");
45 WriteToken(out_stream, binary,
"<PHONESPACEDIM>");
47 WriteToken(out_stream, binary,
"<SPKSPACEDIM>");
49 if (!binary) out_stream <<
"\n";
88 gamma_j1.
Write(out_stream, binary);
107 if (
a_.size() != 0) {
113 WriteToken(out_stream, binary,
"<total_like>");
116 WriteToken(out_stream, binary,
"<total_frames>");
119 WriteToken(out_stream, binary,
"</SGMMACCS>");
141 while (token !=
"</SGMMACCS>") {
142 if (token ==
"<Y>") {
144 for (
size_t i = 0;
i <
Y_.size();
i++) {
145 Y_[
i].Read(in_stream, binary, add);
147 }
else if (token ==
"<Z>") {
149 for (
size_t i = 0;
i <
Z_.size();
i++) {
150 Z_[
i].Read(in_stream, binary, add);
152 }
else if (token ==
"<R>") {
155 for (
size_t i = 0;
i <
R_.size();
i++) {
156 R_[
i].Read(in_stream, binary, add);
158 }
else if (token ==
"<S>") {
160 for (
size_t i = 0;
i <
S_.size();
i++) {
161 S_[
i].Read(in_stream, binary, add);
163 }
else if (token ==
"<y>") {
166 y_[j1].Read(in_stream, binary, add);
168 }
else if (token ==
"<gamma>") {
171 gamma_[j1].Read(in_stream, binary, add);
175 }
else if (token ==
"<a>") {
178 a_[j1].Read(in_stream, binary, add);
180 }
else if (token ==
"<gamma_c>") {
183 gamma_c_[j2].Read(in_stream, binary, add);
185 }
else if (token ==
"<t>") {
186 t_.
Read(in_stream, binary, add);
187 }
else if (token ==
"<U>") {
190 U_[
i].Read(in_stream, binary, add);
192 }
else if (token ==
"<total_like>") {
199 }
else if (token ==
"<total_frames>") {
207 KALDI_ERR <<
"Unexpected token '" << token <<
"' in model file ";
214 bool show_properties)
const {
228 std::ostringstream debug_str;
230 if (
Y_.size() == 0) {
231 debug_str <<
"Y: no. ";
239 if (!nz &&
Y_[
i](0, 0) != 0) { nz =
true; }
241 debug_str <<
"Y: yes, " << string(nz ?
"nonzero. " :
"zero. ");
244 if (
Z_.size() == 0) {
246 debug_str <<
"Z, R: no. ";
251 bool Z_nz =
false, R_nz =
false;
256 if (!Z_nz &&
Z_[
i](0, 0) != 0) { Z_nz =
true; }
257 if (!R_nz &&
R_[
i](0, 0) != 0) { R_nz =
true; }
260 debug_str <<
"Z: yes, " << string(Z_nz ?
"nonzero. " :
"zero. ");
261 debug_str <<
"R: yes, " << string(R_nz ?
"nonzero. " :
"zero. ");
262 debug_str <<
"gamma_s: yes, " << string(gamma_s_nz ?
"nonzero. " :
"zero. ");
265 if (
S_.size() == 0) {
266 debug_str <<
"S: no. ";
273 if (!S_nz &&
S_[
i](0, 0) != 0) { S_nz =
true; }
275 debug_str <<
"S: yes, " << string(S_nz ?
"nonzero. " :
"zero. ");
278 if (
y_.size() == 0) {
279 debug_str <<
"y: no. ";
287 if (!nz &&
y_[j1](0, 0) != 0) { nz =
true; }
289 debug_str <<
"y: yes, " << string(nz ?
"nonzero. " :
"zero. ");
292 if (
a_.size() == 0) {
293 debug_str <<
"a: no. ";
295 debug_str <<
"a: yes. ";
301 if (!nz &&
a_[j1].Sum() != 0) nz =
true;
303 debug_str <<
"a: yes, " << string(nz ?
"nonzero. " :
"zero. ");
306 double tot_gamma = 0.0;
308 debug_str <<
"gamma: no. ";
310 debug_str <<
"gamma: yes. ";
315 tot_gamma +=
gamma_[j1].Sum();
317 bool nz = (tot_gamma != 0.0);
319 debug_str <<
"gamma: yes, " << string(nz ?
"nonzero. " :
"zero. ");
326 double tot_gamma_c = 0.0;
331 bool nz = (tot_gamma_c != 0.0);
332 debug_str <<
"gamma_c: yes, " << string(nz ?
"nonzero. " :
"zero. ");
334 KALDI_WARN <<
"Counts from gamma and gamma_c differ " 335 << tot_gamma <<
" vs. " << tot_gamma_c;
339 debug_str <<
"t: no. ";
345 debug_str <<
"t: yes, " << string(nz ?
"nonzero. " :
"zero. ");
348 if (
U_.size() == 0) {
349 debug_str <<
"U: no. ";
354 if (!nz &&
U_[
i].FrobeniusNorm() != 0) nz =
true;
358 debug_str <<
"t: yes, " << string(nz ?
"nonzero. " :
"zero. ");
362 KALDI_LOG <<
"Subspace GMM model properties: " << debug_str.str();
367 bool have_spk_vecs) {
393 KALDI_ERR <<
"Cannot set up accumulators for speaker projections " 394 <<
"because speaker subspace has not been set up";
439 "remove the flag \"u\" if you don't have u set up.");
479 posteriors.
Scale(weight);
493 double tot_count = 0.0;
494 const vector<int32> &gselect = frame_vars.
gselect;
497 a_is_part(gselect.size());
504 for (
int32 m = 0; m < num_substates; m++) {
507 for (
int32 ki = 0; ki < static_cast<int32>(gselect.size()); ki++) {
512 if (gammat_jmi == 0.0)
continue;
513 gammat(ki) += gammat_jmi;
516 gammat_jm += gammat_jmi;
519 tot_count += gammat_jmi;
522 gamma_[j1](m,
i) += gammat_jmi;
527 y_[j1].Row(m).AddVec(gammat_jmi, frame_vars.
zti.
Row(ki));
531 Y_[
i].AddVecVec(gammat_jmi, frame_vars.
xti.
Row(ki),
532 model.
v_[j1].Row(m));
539 xt_jmi.CopyFromVec(frame_vars.
xt);
540 xt_jmi.AddVec(-1.0, mu_jmi);
542 if (spk_vars->
v_s.Dim() != 0)
543 Z_[i].AddVecVec(gammat_jmi, xt_jmi, spk_vars->
v_s);
548 if (gammat_jm != 0.0) {
555 a_[j1].Row(m).AddVec(gammat_jm / d_jms, spk_vars->
b_is);
568 for (
int32 ki = 0; ki < static_cast<int32>(gselect.size()); ki++) {
570 if (gammat(ki) != 0.0) {
572 S_[
i].AddVec2(gammat(ki), frame_vars.
xti.
Row(ki));
583 if (v_s.
Dim() != 0 && !v_s.
IsZero() && !
R_.empty()) {
607 for (
int32 j2 = 0; j2 < J2; j2++) {
616 std::vector< SpMatrix<double> > Q;
621 std::vector< SpMatrix<double> > S_means;
622 std::vector<Matrix<double> > log_a;
630 ComputeQ(accs, *model, &Q);
632 ComputeSMeans(accs, *model, &S_means);
633 if (!accs.
a_.empty())
634 ComputeLogA(accs, &log_a);
637 vector< SpMatrix<double> > H;
641 || options_.renormalize_V)
647 tot_impr += UpdatePhoneVectors(accs, H, log_a, model);
648 if (flags & kSgmmPhoneProjections) {
649 if (options_.tau_map_M > 0.0)
650 tot_impr += MapUpdateM(accs, Q, gamma_i, model);
652 tot_impr += UpdateM(accs, Q, gamma_i, model);
655 tot_impr += UpdateW(accs, log_a, gamma_i, model);
656 if (flags & kSgmmCovarianceMatrix)
657 tot_impr += UpdateVars(accs, S_means, gamma_i, model);
659 tot_impr += UpdateSubstateWeights(accs, model);
661 tot_impr += UpdateN(accs, gamma_i, model);
663 tot_impr += UpdateU(accs, gamma_i, model);
665 if ((flags & kSgmmSpeakerProjections) && (options_.renormalize_N))
666 RenormalizeN(accs, gamma_i, model);
670 if (options_.renormalize_V)
671 RenormalizeV(accs, model, gamma_i, H);
673 KALDI_LOG <<
"*Overall auxf improvement, combining all parameters, is " 676 KALDI_LOG <<
"***Overall data likelihood is " 694 if (accs.
gamma_[j1](m,
i) > 0.0) {
695 (*Q)[
i].AddVec2(static_cast<BaseFloat>(accs.
gamma_[j1](m,
i)),
696 model.
v_[j1].Row(m));
722 (*S_means)[
i].CopyFromMat(YM_MY);
726 if (accs.
gamma_[j1](m,
i) != 0.0) {
729 (*S_means)[
i].AddVec2(static_cast<BaseFloat>(accs.
gamma_[j1](m,
i)), mu_jmi);
746 updater_(updater), accs_(accs), model_(model),
747 H_(H), log_a_(log_a), auxf_impr_ptr_(auxf_impr),
752 updater_(other.updater_), accs_(other.accs_), model_(other.model_),
753 H_(other.H_), log_a_(other.log_a_), auxf_impr_ptr_(other.auxf_impr_ptr_),
757 *auxf_impr_ptr_ += auxf_impr_;
760 inline void operator() () {
763 updater_.UpdatePhoneVectorsInternal(accs_, H_, log_a_, model_,
764 &auxf_impr_, num_threads_, thread_id_);
770 const std::vector<SpMatrix<double> > &
H_;
771 const std::vector<Matrix<double> > &
log_a_;
790 double count = 0.0, auxf_impr = 0.0;
793 count += accs.
gamma_[j1].Sum();
798 double auxf_per_frame = auxf_impr / (count + 1.0e-20);
800 KALDI_LOG <<
"**Overall auxf impr for v is " << auxf_per_frame <<
" over " 801 << count <<
" frames";
802 return auxf_per_frame;
818 int32 num_substates = accs.
a_[j1].NumRows();
821 for (
int32 m = 0; m < num_substates; m++) {
822 if (accs.
a_[j1](m, 0) == 0.0) {
824 if (accs.
gamma_[j1].Row(m).Sum() != 0.0)
825 KALDI_WARN <<
"Inconsistency between a and gamma stats. [BAD!]";
828 (*log_a)[j1].Row(m).CopyFromVec(accs.
a_[j1].Row(m));
829 (*log_a)[j1].Row(m).ApplyLog();
835 <<
" sub-states with zero \"a\" (and presumably gamma) stats.";
843 double *auxf_impr_ptr,
845 int32 thread_id)
const {
847 int32 J1 = accs.
num_groups_, block_size = (J1 + (num_threads-1)) / num_threads,
848 j1_start = block_size * thread_id,
849 j1_end = std::min(accs.
num_groups_, j1_start + block_size);
851 double tot_auxf_impr = 0.0;
853 for (
int32 j1 = j1_start; j1 < j1_end; j1++) {
855 double gamma_jm = accs.
gamma_[j1].Row(m).Sum();
859 double gamma_jmi = accs.
gamma_[j1](m,
i);
860 if (gamma_jmi != 0.0)
861 X_jm.
AddSp(gamma_jmi, H[
i]);
867 double exact_auxf_start = 0.0, exact_auxf = 0.0, approx_auxf_impr = 0.0;
868 int32 backtrack_iter, max_backtrack = 10;
869 for (backtrack_iter = 0; backtrack_iter < max_backtrack; backtrack_iter++) {
879 if (!log_a.empty()) w_jm.
AddVec(1.0, log_a[j1].Row(m));
887 if (backtrack_iter == 0) {
888 exact_auxf_start = exact_auxf;
890 if (exact_auxf >= exact_auxf_start) {
893 KALDI_LOG <<
"Backtracking computation of v_jm for j = " << j1
894 <<
" and m = " << m <<
" because auxf changed by " 895 << (exact_auxf-exact_auxf_start) <<
" [vs. predicted:] " 897 v_jm.AddVec(1.0, v_jm_orig);
902 if (backtrack_iter == 0) {
907 double gamma_jmi = accs.
gamma_[j1](m,
i);
908 double quadratic_term = std::max(gamma_jmi, gamma_jm * w_jm(
i));
909 double scalar = gamma_jmi - gamma_jm * w_jm(
i) + quadratic_term
911 g_jm.AddVec(scalar, model->
w_.
Row(
i));
912 if (quadratic_term > 1.0e-10) {
913 H_jm.
AddVec2(static_cast<BaseFloat>(quadratic_term), model->
w_.
Row(
i));
919 opts.
K = options_.max_cond;
920 opts.
eps = options_.epsilon;
925 double exact_auxf_impr = exact_auxf - exact_auxf_start;
926 tot_auxf_impr += exact_auxf_impr;
927 if (backtrack_iter == max_backtrack) {
928 KALDI_WARN <<
"Backtracked " << max_backtrack <<
" times [not updating]";
930 model->
v_[j1].Row(m).CopyFromVec(v_jm);
933 if (j1 < 3 && m < 3) {
934 KALDI_LOG <<
"Auxf impr for j = " << j1 <<
" m = " << m <<
" is " 935 << (exact_auxf_impr/gamma_jm+1.0e-20) <<
" per frame over " 936 << gamma_jm <<
" frames.";
940 *auxf_impr_ptr = tot_auxf_impr;
960 Sigma.
AddVec2(static_cast<BaseFloat>(1.0), model->
v_[j1].Row(m));
964 KALDI_LOG <<
"Not renormalizing v because scatter is not positive definite" 965 <<
" -- maybe first iter?";
968 Sigma.
Scale(1.0 / count);
969 KALDI_LOG <<
"Scatter of vectors v is : ";
991 KALDI_LOG <<
"Note on the next diagnostic: the first number is generally not " 992 <<
"that meaningful as it relates to the static offset";
993 H_sm_proj.
PrintEigs(
"H_sm_proj (Significance of dims in vector space.. note)");
1006 #ifdef KALDI_PARANOID 1023 model->
v_[j1].Row(m).CopyFromVec(tmp);
1029 model->
w_.
Row(i).CopyFromVec(tmp);
1035 model->
M_[
i].CopyFromMat(tmpM);
1044 double tot_count = 0.0, tot_like_impr = 0.0;
1047 KALDI_WARN <<
"For component " <<
i <<
": not updating M due to very " 1048 <<
"small count (=" << gamma_i(
i) <<
").";
1054 opts.
K = options_.max_cond;
1055 opts.
eps = options_.epsilon;
1063 model->
M_[
i].CopyFromMat(Mi);
1066 KALDI_VLOG(2) <<
"Objf impr for projection M for i = " << i <<
", is " 1067 << (impr/(gamma_i(i) + 1.0e-20)) <<
" over " << gamma_i(i)
1070 tot_count += gamma_i(i);
1071 tot_like_impr += impr;
1073 tot_like_impr /= (tot_count + 1.0e-20);
1074 KALDI_LOG <<
"Overall objective function improvement for model projections " 1075 <<
"M is " << tot_like_impr <<
" over " << tot_count <<
" frames";
1076 return tot_like_impr;
1101 model->
M_prior_.resize(nGaussians);
1102 for (
int32 i = 0;
i < nGaussians;
i++) {
1108 if (options_.full_col_cov || options_.full_row_cov) {
1110 for (
int32 i = 0;
i < nGaussians;
i++)
1112 avg_M.
Scale(1.0 / nGaussians);
1115 for (
int32 iter = 0; iter < options_.map_M_prior_iters; iter++) {
1117 double prior_like = -0.5 * nGaussians * (Ddim * Sdim *
Log(2 *
M_PI)
1120 for (
int32 i = 0;
i < nGaussians;
i++) {
1122 MDiff.
AddMat(-1.0, avg_M);
1129 KALDI_LOG <<
"Before iteration " << iter
1130 <<
" of updating prior over M, log like per dimension modeled is " 1131 << prior_like / (nGaussians * Ddim * Sdim);
1135 if (options_.full_col_cov) {
1138 for (
int32 i = 0;
i < nGaussians;
i++) {
1140 MDiff.
AddMat(-1.0, avg_M);
1142 model->
col_cov_inv_.AddMat2Sp(1.0 / (Ddim * nGaussians),
1147 limited = model->
col_cov_inv_.LimitCond(options_.max_cond,
1150 KALDI_LOG <<
"Computing column covariances for M: limited " << limited
1151 <<
" singular values, max condition is " 1152 << options_.max_cond;
1157 if (options_.full_row_cov) {
1160 for (
int32 i = 0;
i < nGaussians;
i++) {
1162 MDiff.
AddMat(-1.0, avg_M);
1164 model->
row_cov_inv_.AddMat2Sp(1.0 / (Sdim * nGaussians),
1169 limited = model->
row_cov_inv_.LimitCond(options_.max_cond,
1172 KALDI_LOG <<
"Computing row covariances for M: limited " << limited
1173 <<
" singular values, max condition is " 1174 << options_.max_cond;
1191 KALDI_LOG <<
"Prior smoothing parameter: Tau = " << options_.tau_map_M;
1194 KALDI_LOG <<
"Computing the prior first";
1195 ComputeMPrior(model);
1203 Q2.
Scale(options_.tau_map_M);
1205 double totcount = 0.0, tot_like_impr = 0.0;
1206 for (
int32 i = 0;
i < nGaussians; ++
i) {
1208 KALDI_WARN <<
"For component " <<
i <<
": not updating M due to very " 1209 <<
"small count (=" << gamma_i(
i) <<
").";
1223 G.
AddMat(1.0, prior_term_i);
1229 opts.
K = options_.max_cond;
1230 opts.
eps = options_.epsilon;
1233 model->
M_[
i].CopyFromMat(Mi);
1235 KALDI_LOG <<
"Objf impr for projection M for i = " << i <<
", is " 1236 << (impr / (gamma_i(i) + 1.0e-20)) <<
" over " << gamma_i(i)
1239 totcount += gamma_i(i);
1240 tot_like_impr += impr;
1242 tot_like_impr /= (totcount + 1.0e-20);
1243 KALDI_LOG <<
"Overall objective function improvement for model projections " 1244 <<
"M is " << tot_like_impr <<
" over " << totcount <<
" frames";
1245 return tot_like_impr;
1270 j1_start = block_size * thread_id,
1271 j1_end = std::min(accs.
num_groups_, j1_start + block_size);
1280 for (
int32 j1 = j1_start; j1 < j1_end; j1++) {
1293 if (!log_a.empty()) w_j.
AddMat(1.0, log_a[j1]);
1297 double gamma_jm = accs.
gamma_[j1].Row(m).Sum();
1303 v_vT.
AddVec2(static_cast<BaseFloat>(1.0), v_j_double.
Row(m));
1304 v_vT_m.
Row(m).CopyFromPacked(v_vT);
1310 linear_term(m,
i) = accs.
gamma_[j1](m,
i) - gamma_jm * w_jm(
i);
1311 quadratic_term(m,
i) = std::max(accs.
gamma_[j1](m,
i),
1312 gamma_jm * w_jm(
i));
1325 KALDI_LOG <<
"Updating weight projections";
1329 double tot_predicted_like_impr = 0.0, tot_like_before = 0.0,
1330 tot_like_after = 0.0;
1338 double tot_count = gamma_i.
Sum();
1340 for (
int iter = 0; iter < options_.weight_projections_iters; iter++) {
1343 double k_like_before = 0.0;
1345 UpdateWClass c(accs, *model, w, log_a, &F_i, &g_i, &k_like_before);
1349 double k_predicted_like_impr = 0.0, k_like_after = 0.0;
1350 double min_step = 0.001, step_size;
1354 opts.
K = options_.max_cond;
1355 opts.
eps = options_.epsilon;
1357 for (step_size = 1.0; step_size >= min_step; step_size /= 2) {
1358 k_predicted_like_impr = 0.0;
1370 delta_w.
Scale(step_size);
1371 double predicted_impr =
VecVec(delta_w, g_i.
Row(
i)) -
1372 0.5 *
VecSpVec(delta_w, this_F_i, delta_w);
1379 KALDI_LOG <<
"Predicted objf impr for w, iter = " << iter
1380 <<
", i = " <<
i <<
" is " 1381 << (predicted_impr/gamma_i(
i)+1.0e-20)
1382 <<
" per frame over " << gamma_i(
i) <<
" frames.";
1383 k_predicted_like_impr += predicted_impr;
1384 w.
Row(
i).AddVec(1.0, delta_w);
1391 if (!log_a.empty()) w_j.
AddMat(1.0, log_a[j1]);
1392 for (
int32 m = 0; m < M; m++) {
1398 KALDI_VLOG(2) <<
"For iteration " << iter <<
", updating w gives " 1399 <<
"predicted per-frame like impr " 1400 << (k_predicted_like_impr / tot_count) <<
", actual " 1401 << ((k_like_after - k_like_before) / tot_count) <<
", over " 1402 << tot_count <<
" frames";
1403 if (k_like_after < k_like_before) {
1405 if (fabs(k_like_after - k_like_before) / tot_count < 1.0e-05) {
1406 k_like_after = k_like_before;
1407 KALDI_WARN <<
"Not updating weights as not increasing auxf and " 1408 <<
"probably due to numerical issues (since small change).";
1411 KALDI_WARN <<
"Halving step size for weights as likelihood did " 1418 if (step_size < min_step) {
1422 tot_predicted_like_impr += k_predicted_like_impr;
1423 tot_like_after += k_like_after;
1424 tot_like_before += k_like_before;
1431 tot_predicted_like_impr /= tot_count;
1432 tot_like_after = (tot_like_after - tot_like_before) / tot_count;
1433 KALDI_LOG <<
"**Overall objf impr for w is " << tot_predicted_like_impr
1434 <<
", actual " << tot_like_after <<
", over " 1435 << tot_count <<
" frames";
1436 return tot_like_after;
1442 double tot_impr = 0.0;
1445 opts.
K = options_.max_cond;
1446 opts.
eps = options_.epsilon;
1449 if (gamma_i(
i) < 200.0) {
1450 KALDI_LOG <<
"Count is small " << gamma_i(
i) <<
" for gaussian " 1451 <<
i <<
", not updating u_i.";
1458 double impr_per_frame = impr / gamma_i(
i);
1459 if (impr_per_frame > options_.max_impr_u) {
1460 KALDI_WARN <<
"Updating speaker weight projections u, for Gaussian index " 1461 <<
i <<
", impr/frame is " << impr_per_frame <<
" over " 1462 << gamma_i(
i) <<
" frames, scaling back to not exceed " 1463 << options_.max_impr_u;
1464 double scale = options_.max_impr_u / impr_per_frame;
1466 delta_u.
Scale(scale);
1473 KALDI_LOG <<
"Objf impr for spk weight-projection u for i = " << (
i)
1474 <<
", is " << (impr / (gamma_i(
i) + 1.0e-20)) <<
" over " 1475 << gamma_i(
i) <<
" frames";
1477 u_i.
AddVec(1.0, delta_u);
1478 model->
u_.
Row(
i).CopyFromVec(u_i);
1481 KALDI_LOG <<
"**Overall objf impr for u is " << (tot_impr/gamma_i.
Sum())
1482 <<
", over " << gamma_i.
Sum() <<
" frames";
1483 return tot_impr / gamma_i.
Sum();
1489 double tot_count = 0.0, tot_like_impr = 0.0;
1491 KALDI_ERR <<
"Speaker subspace dim is zero or no stats accumulated";
1495 opts.
K = options_.max_cond;
1496 opts.
eps = options_.epsilon;
1501 KALDI_WARN <<
"Not updating speaker basis for i = " << (
i)
1502 <<
" because count is too small " << (gamma_i(
i));
1510 model->
N_[
i].CopyFromMat(Ni);
1512 KALDI_LOG <<
"Objf impr for spk projection N for i = " << (
i)
1513 <<
", is " << (impr / (gamma_i(
i) + 1.0e-20)) <<
" over " 1514 << gamma_i(
i) <<
" frames";
1516 tot_count += gamma_i(
i);
1517 tot_like_impr += impr;
1520 KALDI_LOG <<
"**Overall objf impr for N is " << (tot_like_impr/tot_count)
1521 <<
" over " << tot_count <<
" frames";
1522 return (tot_like_impr/tot_count);
1529 double tot_count = gamma_i.
Sum();
1530 if (tot_count == 0) {
1531 KALDI_WARN <<
"Not renormalizing N, since there are no counts.";
1542 RTot.
Scale(1.0 / tot_count);
1546 KALDI_LOG <<
"Renormalizing N, eigs are: " << (eigs);
1549 sqrteigs(t) = sqrt(eigs(t));
1561 model->
N_[
i].CopyFromMat(Ntmp);
1573 double tot_objf_impr = 0.0, tot_t = 0.0;
1584 Sigma_i_ml.
AddSp(1.0, accs.
S_[i]);
1586 covfloor.
AddSp(1.0, Sigma_i_ml);
1589 if (gamma_i(i) > 1.0e-20) {
1590 Sigma_i_ml.
Scale(1 / (gamma_i(i) + 1.0e-20));
1596 objf_improv(i) = model->
SigmaInv_[
i].LogPosDefDet() -
1603 if (gamma_i.
Sum() == 0) {
1604 KALDI_WARN <<
"Updating variances: zero counts. Setting floor to unit.";
1607 covfloor.
Scale(options_.cov_floor / gamma_i.
Sum());
1610 KALDI_WARN <<
"Covariance flooring matrix is poorly conditioned. Fixed " 1611 <<
"up " << tmp <<
" eigenvalues.";
1615 if (options_.cov_diag_ratio > 1000) {
1616 KALDI_LOG <<
"Assuming you want to build a diagonal system since " 1617 <<
"cov_diag_ratio is large: making diagonal covFloor.";
1620 covfloor(i,
j) = 0.0;
1630 if (gamma_i(
i) < options_.cov_diag_ratio * accs.
feature_dim_) {
1631 KALDI_WARN <<
"For Gaussian component " <<
i <<
": Too low count " 1632 << gamma_i(
i) <<
" for covariance matrix estimation. Setting to " 1635 for (
int32 e = 0; e <
d; e++)
1636 Sigma_i(d, e) = 0.0;
1640 KALDI_WARN <<
"For Gaussian component " <<
i <<
": Floored " << floored
1641 <<
" covariance eigenvalues.";
1647 int floored = Sigma_i.ApplyFloor(covfloor);
1649 KALDI_WARN <<
"For Gaussian component " <<
i <<
": Floored " 1650 << floored <<
" covariance eigenvalues.";
1655 objf_improv(
i) += Sigma_i.LogPosDefDet() +
1657 objf_improv(
i) *= (-0.5 * gamma_i(
i));
1659 tot_objf_impr += objf_improv(
i);
1660 tot_t += gamma_i(
i);
1662 KALDI_VLOG(2) <<
"objf impr from variance update =" << objf_improv(
i)
1663 / (gamma_i(
i) + 1.0e-20) <<
" over " << (gamma_i(
i))
1664 <<
" frames for i = " << (
i);
1667 KALDI_WARN <<
"Updating within-class covariance matrix i = " << (
i)
1668 <<
", numerical problem";
1675 KALDI_LOG <<
"**Overall objf impr for variance update = " 1676 << (tot_objf_impr / (tot_t+ 1.0e-20))
1677 <<
" over " << tot_t <<
" frames";
1678 return tot_objf_impr / (tot_t + 1.0e-20);
1684 KALDI_LOG <<
"Updating substate mixture weights";
1687 double tot_gamma = 0.0, objf_impr = 0.0;
1689 double gamma_j_sm = 0.0;
1693 smoothed_occs.
Add(options_.tau_c);
1694 gamma_j_sm += smoothed_occs.
Sum();
1695 tot_gamma += occs.
Sum();
1697 for (
int32 m = 0; m < num_substates; m++) {
1698 double cur_weight = model->
c_[j2](m);
1699 if (cur_weight <= 0) {
1700 KALDI_WARN <<
"Zero or negative weight, flooring";
1701 cur_weight = 1.0e-10;
1703 model->
c_[j2](m) = smoothed_occs(m) / gamma_j_sm;
1704 objf_impr +=
Log(model->
c_[j2](m) / cur_weight) * occs(m);
1707 KALDI_LOG <<
"**Overall objf impr for c is " << (objf_impr/tot_gamma)
1708 <<
", over " << tot_gamma <<
" frames.";
1709 return (objf_impr/tot_gamma);
1749 posteriors.
Scale(weight);
1760 double tot_count = 0.0;
1764 const vector<int32> &gselect = frame_vars.
gselect;
1768 zt_jmi(spk_space_dim);
1771 bool have_spk_dep_weights = (
a_s_.
Dim() != 0);
1773 for (
int32 m = 0; m < num_substates; m++) {
1775 for (
int32 ki = 0; ki < static_cast<int32>(gselect.size()); ki++) {
1779 if (gammat_jmi != 0.0) {
1780 gammat_jm += gammat_jmi;
1781 tot_count += gammat_jmi;
1783 xt_jmi.CopyFromVec(frame_vars.
xt);
1784 xt_jmi.AddVec(-1.0, mu_jmi);
1793 if (have_spk_dep_weights) {
1796 if (d_jms == -1.0) d_jms = 1.0;
1811 if (tot_gamma < min_count) {
1812 KALDI_WARN <<
"Updating speaker vectors, count is " << tot_gamma
1813 <<
" < " << min_count <<
"not updating.";
1814 if (objf_impr_out) *objf_impr_out = 0.0;
1815 if (count_out) *count_out = 0.0;
1819 UpdateNoU(v_s, objf_impr_out, count_out);
1821 UpdateWithU(model, v_s, objf_impr_out, count_out);
1838 for (
int32 i = 0;
i < num_gauss;
i++)
1845 double tot_objf_impr =
1850 KALDI_LOG <<
"*Objf impr for speaker vector is " << (tot_objf_impr / tot_gamma)
1851 <<
" over " << tot_gamma <<
" frames.";
1853 if (objf_impr_out) *objf_impr_out = tot_objf_impr;
1854 if (count_out) *count_out = tot_gamma;
1866 if (v_s_ptr->
Dim() != T) v_s_ptr->
Resize(T);
1871 for (
int32 i = 0;
i < num_gauss;
i++)
1875 int32 num_iters = 5,
1877 max_backtracks = 10;
1883 for (
int32 iter = 0; iter < num_iters; iter++) {
1885 v_s_per_iter.
Row(iter).CopyFromVec(v_s);
1896 tilde_w_is.
AddVec(1.0, log_a_s_);
1902 if (iter > 0 && auxf(iter) < auxf(iter-1) &&
1905 KALDI_WARN <<
"Backtracking in speaker vector update, on iter " 1906 << iter <<
", auxfs are " << auxf(iter-1) <<
" -> " 1909 v_s.
AddVec(0.5, v_s_per_iter.
Row(iter-1));
1910 if (++num_backtracks >= max_backtracks) {
1911 KALDI_WARN <<
"Backtracked " << max_backtracks
1912 <<
" times in speaker-vector update.";
1914 v_s_per_iter.
Row(num_iters-1).CopyFromVec(v_s_per_iter.
Row(iter-1));
1917 auxf(num_iters-1) = auxf(iter-1);
1923 for (
int32 i = 0; i < num_gauss; i++) {
1936 double auxf_change = auxf(num_iters-1) - auxf(0);
1937 KALDI_LOG <<
"*Objf impr for speaker vector is " << (auxf_change / tot_gamma)
1938 <<
" per frame, over " << tot_gamma <<
" frames.";
1940 if (objf_impr_out) *objf_impr_out = auxf_change;
1941 if (count_out) *count_out = tot_gamma;
1947 KALDI_ERR <<
"In destructor of MleAmSgmm2Accs: detected that you forgot to " 1948 "call CommitStatsForSpk()";
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Matrix< double > t_
[SSGMM] each row is one of the t_i quantities in the less-exact version of the SSGMM update for the s...
std::vector< Vector< double > > gamma_c_
Sub-state occupancies gamma_{jm}^{(c)} for each sub-state.
bool IsUnit(Real cutoff=1.0e-05) const
void CopyFromVec(const SubVector< OtherReal > &orig)
CopyFromVec just interprets the vector as having the same layout as the packed matrix.
Matrix< BaseFloat > u_
[SSGMM] Speaker-subspace weight projection vectors. Dimension is [I][T]
bool IsDiagonal(Real cutoff=1.0e-05) const
Class for definition of the subspace Gmm acoustic model.
void UpdateNoU(Vector< BaseFloat > *v_s, BaseFloat *objf_impr_out, BaseFloat *count_out)
Packed symetric matrix class.
void Write(std::ostream &out, bool binary) const
write to stream.
void ApplyExp()
Apply exponential to each value in vector.
BaseFloat Accumulate(const AmSgmm2 &model, const Sgmm2PerFrameDerivedVars &frame_vars, int32 pdf_index, BaseFloat weight, Sgmm2PerSpkDerivedVars *spk_vars)
Returns likelihood.
BaseFloat Accumulate(const AmSgmm2 &model, const Sgmm2PerFrameDerivedVars &frame_vars, int32 pdf_index, BaseFloat weight, Sgmm2PerSpkDerivedVars *spk_vars)
Accumulate statistics. Returns per-frame log-likelihood.
MleSgmm2SpeakerAccs(const AmSgmm2 &model, BaseFloat rand_prune_=1.0e-05)
Initialize the object. Error if speaker subspace not set up.
void RenormalizeV(const MleAmSgmm2Accs &accs, AmSgmm2 *model, const Vector< double > &gamma_i, const std::vector< SpMatrix< double > > &H)
BaseFloat ComponentPosteriors(const Sgmm2PerFrameDerivedVars &per_frame_vars, int32 j2, Sgmm2PerSpkDerivedVars *spk_vars, Matrix< BaseFloat > *post) const
Similar to LogLikelihood() function above, but also computes the posterior probabilities for the pre-...
static void ComputeQ(const MleAmSgmm2Accs &accs, const AmSgmm2 &model, std::vector< SpMatrix< double > > *Q)
Compute the Q_i quantities (Eq. 64).
This class describes the options for maximizing various quadratic objective functions.
Vector< BaseFloat > xt
x'(t), FMLLR-adapted, dim = [D], eq.(33)
double SolveQuadraticProblem(const SpMatrix< double > &H, const VectorBase< double > &g, const SolverOptions &opts, VectorBase< double > *x)
void GetSubstateMean(int32 j1, int32 m, int32 i, VectorBase< Real > *mean_out) const
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
bool IsZero(Real cutoff=1.0e-06) const
Returns true if matrix is all zeros.
std::vector< Vector< BaseFloat > > c_
c_{jm}, mixture weights. Dimension is [J2][#mix]
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Real SolveQuadraticMatrixProblem(const SpMatrix< Real > &Q, const MatrixBase< Real > &Y, const SpMatrix< Real > &SigmaInv, const SolverOptions &opts, MatrixBase< Real > *M)
Maximizes the auxiliary function : Like a numerically stable version of .
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Matrix< BaseFloat > w_
Phonetic-subspace weight projection vectors. Dimension is [I][S].
Vector< BaseFloat > v_s
Speaker adaptation vector v_^{(s)}. Dim is [T].
static void UpdateWGetStats(const MleAmSgmm2Accs &accs, const AmSgmm2 &model, const Matrix< double > &w, const std::vector< Matrix< double > > &log_a, Matrix< double > *F_i, Matrix< double > *g_i, double *tot_like, int32 num_threads, int32 thread_id)
Called, multithreaded, inside UpdateW.
void GetStateOccupancies(Vector< BaseFloat > *occs) const
Accessors.
void Read(std::istream &in_stream, bool binary, bool add)
Float RandPrune(Float post, BaseFloat prune_thresh, struct RandomState *state=NULL)
double UpdateM(const MleAmSgmm2Accs &accs, const std::vector< SpMatrix< double > > &Q, const Vector< double > &gamma_i, AmSgmm2 *model)
double MapUpdateM(const MleAmSgmm2Accs &accs, const std::vector< SpMatrix< double > > &Q, const Vector< double > &gamma_i, AmSgmm2 *model)
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
Vector< double > a_s_
a_i^{(s)}. For SSGMM.
std::vector< Matrix< BaseFloat > > n_
n_{jim}, per-Gaussian normalizer. Dimension is [J1][I][#mix]
std::vector< Matrix< BaseFloat > > N_
Speaker-subspace projections. Dimension is [I][D][T].
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
double UpdateW(const MleAmSgmm2Accs &accs, const std::vector< Matrix< double > > &log_a, const Vector< double > &gamma_i, AmSgmm2 *model)
BaseFloat rand_prune_
small constant to randomly prune tiny posteriors
static void ComputeSMeans(const MleAmSgmm2Accs &accs, const AmSgmm2 &model, std::vector< SpMatrix< double > > *S_means)
Compute the S_means quantities, minus sum: (Y_i M_i^T + M_i Y_I^T).
const MleAmSgmm2Updater & updater_
double UpdatePhoneVectors(const MleAmSgmm2Accs &accs, const std::vector< SpMatrix< double > > &H, const std::vector< Matrix< double > > &log_a, AmSgmm2 *model) const
In this update, smoothing terms are not supported.
std::vector< SpMatrix< double > > S_
S_{i}^{-}, scatter of adapted feature vectors x_{i}(t). Dim is [I][D][D].
SpMatrix< BaseFloat > col_cov_inv_
void SetUnit()
< Set to zero
void ApplyLog()
Apply natural log to all elements.
std::vector< Matrix< double > > gamma_
Gaussian occupancies gamma_{jmi} for each substate and Gaussian index, pooled over groups...
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void AddSpVec(const Real alpha, const SpMatrix< Real > &M, const VectorBase< Real > &v, const Real beta)
Add symmetric positive definite matrix times vector: this <– beta*this + alpha*M*v.
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Real LogSumExp(Real prune=-1.0) const
Returns log(sum(exp())) without exp overflow If prune > 0.0, ignores terms less than the max - prune...
MatrixIndexT NumRows() const
void RenormalizeN(const MleAmSgmm2Accs &accs, const Vector< double > &gamma_i, AmSgmm2 *model)
int32 PhoneSpaceDim() const
double UpdateVars(const MleAmSgmm2Accs &accs, const std::vector< SpMatrix< double > > &S_means, const Vector< double > &gamma_i, AmSgmm2 *model)
Real SolveDoubleQuadraticMatrixProblem(const MatrixBase< Real > &G, const SpMatrix< Real > &P1, const SpMatrix< Real > &P2, const SpMatrix< Real > &Q1, const SpMatrix< Real > &Q2, const SolverOptions &opts, MatrixBase< Real > *M)
Maximizes the auxiliary function : Encountered in matrix update with a prior.
std::vector< Matrix< BaseFloat > > v_
The parameters in a particular SGMM state.
void CopyFromSp(const SpMatrix< Real > &other)
int32 num_groups_
Other model specifications.
~UpdatePhoneVectorsClass()
void Update(const MleAmSgmm2Accs &accs, AmSgmm2 *model, SgmmUpdateFlagsType flags)
std::vector< SpMatrix< double > > U_
the U_i quantities from the less-exact version of the SSGMM update for the speaker weight projections...
void UpdatePhoneVectorsInternal(const MleAmSgmm2Accs &accs, const std::vector< SpMatrix< double > > &H, const std::vector< Matrix< double > > &log_a, AmSgmm2 *model, double *auxf_impr, int32 num_threads, int32 thread_id) const
Matrix< BaseFloat > zti
z_{i}(t), dim = [I][S], eq.(35)
BaseFloat GetDjms(int32 j1, int32 m, Sgmm2PerSpkDerivedVars *spk_vars) const
std::vector< Matrix< BaseFloat > > M_
Phonetic-subspace projections. Dimension is [I][D][S].
void PrintEigs(const char *name)
void CommitStatsForSpk(const AmSgmm2 &model, const Sgmm2PerSpkDerivedVars &spk_vars)
Accumulates global stats for the current speaker (if applicable).
void AddVecVec(Real alpha, const VectorBase< Real > &v, const VectorBase< Real > &r, Real beta)
Add element-by-element product of vectors:
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=nullptr)
Applies floor to all elements.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v'
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
int32 NumSubstatesForPdf(int32 j2) const
t .. not really part of SGMM.
void Cholesky(const SpMatrix< Real > &orig)
std::vector< Matrix< double > > Y_
The stats which are not tied to any state.
BaseFloat AccumulateFromPosteriors(const AmSgmm2 &model, const Sgmm2PerFrameDerivedVars &frame_vars, const Matrix< BaseFloat > &posteriors, int32 pdf_index, Sgmm2PerSpkDerivedVars *spk_vars)
Accumulate statistics, given posteriors.
uint16 SgmmUpdateFlagsType
Bitwise OR of the above flags.
Matrix< BaseFloat > xti
x_{i}(t) = x'(t) - o_i(s): dim = [I][D], eq.(34)
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
std::vector< SpMatrix< BaseFloat > > SigmaInv_
Globally shared parameters of the subspace GMM.
std::vector< Matrix< double > > y_
The SGMM state specific stats.
void Scale(Real alpha)
Multiply each element with a scalar value.
void AddSp(const Real alpha, const SpMatrix< Real > &Ma)
The letters correspond to the variable names.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
std::vector< Matrix< BaseFloat > > M_prior_
int32 Pdf2Group(int32 j2) const
void RunMultiThreaded(const C &c_in)
Here, class C should inherit from MultiThreadable.
const MleAmSgmm2Accs & accs_
void AddMatSp(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const SpMatrix< Real > &B, const Real beta)
this <– beta*this + alpha*A*B.
void CopyFromTp(const TpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given tpmatrix. (no resize is done).
void Update(const AmSgmm2 &model, BaseFloat min_count, Vector< BaseFloat > *v_s, BaseFloat *objf_impr_out, BaseFloat *count_out)
Update speaker vector.
int32 NumPdfs() const
Various model dimensions.
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
UpdatePhoneVectorsClass(const UpdatePhoneVectorsClass &other)
void ComputeH(std::vector< SpMatrix< Real > > *H_i) const
Computes quantities H = M_i Sigma_i^{-1} M_i^T.
double TraceSpSp(const SpMatrix< double > &A, const SpMatrix< double > &B)
std::vector< Matrix< double > > NtransSigmaInv_
N_i^T {i}^{-1}. Needed for y^{(s)}.
Real VecSpVec(const VectorBase< Real > &v1, const SpMatrix< Real > &M, const VectorBase< Real > &v2)
Computes v1^T * M * v2.
const std::vector< Matrix< double > > & log_a_
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void SetZero()
Sets matrix to zero.
void Scale(Real alpha)
Multiplies all elements by this constant.
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
void Clear()
Clear the statistics.
BaseFloat AccumulateFromPosteriors(const AmSgmm2 &model, const Sgmm2PerFrameDerivedVars &frame_vars, const Matrix< BaseFloat > &posteriors, int32 pdf_index, Sgmm2PerSpkDerivedVars *spk_vars)
Returns count accumulated (may differ from posteriors.Sum() due to weight pruning).
Real Sum() const
Returns sum of the elements.
std::vector< int32 > gselect
SpMatrix< BaseFloat > row_cov_inv_
void MulColsVec(const VectorBase< Real > &scale)
Equivalent to (*this) = (*this) * diag(scale).
void Write(std::ostream &out_stream, bool binary) const
std::vector< Matrix< double > > a_
[SSGMM] These a_{jmi} quantities are dimensionally the same as the gamma quantities.
Vector< double > y_s_
Statistics for speaker adaptation (vectors), stored per-speaker.
std::vector< SpMatrix< double > > H_spk_
The following variable does not change per speaker, it just relates to the speaker subspace...
UpdatePhoneVectorsClass(const MleAmSgmm2Updater &updater, const MleAmSgmm2Accs &accs, const std::vector< SpMatrix< double > > &H, const std::vector< Matrix< double > > &log_a, AmSgmm2 *model, double *auxf_impr)
Vector< double > a_s_
[SSGMM], this is a per-speaker variable storing the a_i^{(s)} quantities that we will use in order to...
void GetNtransSigmaInv(std::vector< Matrix< Real > > *out) const
A class representing a vector.
std::vector< SpMatrix< double > > R_
R_{i}, quadratic term for speaker subspace estimation. Dim is [I][T][T].
double UpdateSubstateWeights(const MleAmSgmm2Accs &accs, AmSgmm2 *model)
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void UpdateWithU(const AmSgmm2 &model, Vector< BaseFloat > *v_s, BaseFloat *objf_impr_out, BaseFloat *count_out)
void AddMat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
Extension of rank-N update: this <– beta*this + alpha * M * A * M^T.
Real FrobeniusNorm() const
Frobenius norm, which is the sqrt of sum of square elements.
int32 feature_dim_
Dimensionality of various subspaces.
void AddVecVec(const Real alpha, const VectorBase< OtherReal > &a, const VectorBase< OtherReal > &b)
*this += alpha * a * b^T
const std::vector< SpMatrix< double > > & H_
std::vector< Matrix< double > > Z_
Stats Z_{i} for speaker-subspace projections N. Dim is [I][D][T].
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void Check(const AmSgmm2 &model, bool show_properties=true) const
Checks the various accumulators for correct sizes given a model.
double UpdateU(const MleAmSgmm2Accs &accs, const Vector< double > &gamma_i, AmSgmm2 *model)
bool HasSpeakerDependentWeights() const
True if doing SSGMM.
void Invert(Real *log_det=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
void SymPosSemiDefEig(VectorBase< Real > *s, MatrixBase< Real > *P, Real tolerance=0.001) const
This is the version of SVD that we implement for symmetric positive definite matrices.
Provides a vector abstraction class.
void Add(Real c)
Add a constant to each element of a vector.
Class for the accumulators associated with the phonetic-subspace model parameters.
void SetZero()
Set vector to all zeros.
Vector< double > gamma_s_
gamma_{i}^{(s)}. Per-speaker counts for each Gaussian. Dimension is [I]
int32 SpkSpaceDim() const
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
void AddSpMat(const Real alpha, const SpMatrix< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*SpA*B.
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
int32 NumSubstatesForGroup(int32 j1) const
void ComputeMPrior(AmSgmm2 *model)
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Vector< double > gamma_s_
gamma_{i}^{(s)}.
void ResizeAccumulators(const AmSgmm2 &model, SgmmUpdateFlagsType flags, bool have_spk_vecs)
Resizes the accumulators to the correct sizes given the model.
std::vector< Matrix< BaseFloat > > w_jmi_
[SSGMM] w_{jmi}, dimension is [J1][#mix][I]. Computed from w_ and v_.
MatrixIndexT LimitCondDouble(Real maxCond=1.0e+5, bool invert=false)
double UpdateN(const MleAmSgmm2Accs &accs, const Vector< double > &gamma_i, AmSgmm2 *model)
static void ComputeLogA(const MleAmSgmm2Accs &accs, std::vector< Matrix< double > > *log_a)