54 gamma_i->
Resize(NumGauss());
55 for (
int32 j1 = 0; j1 < NumGroups(); j1++) {
56 int32 M = NumSubstatesForGroup(j1);
57 const std::vector<int32> &pdfs = group2pdf_[j1];
59 for (
size_t i = 0;
i < pdfs.size();
i++) {
61 substate_weight.
AddVec(state_occupancies(j2), c_[j2]);
63 for (
int32 m = 0; m < M; m++) {
64 w_jm.AddMatVec(1.0, w_,
kNoTrans, v_[j1].Row(m), 0.0);
66 gamma_i->
AddVec(substate_weight(m), w_jm);
73 if (pdf2group_.empty()) {
74 KALDI_WARN <<
"ComputePdfMappings(): no pdf2group_ map, assuming you " 75 "are reading in old model.";
77 pdf2group_.resize(v_.size());
78 for (
int32 j2 = 0; j2 < static_cast<int32>(pdf2group_.size()); j2++)
82 for (
int32 j2 = 0; j2 < static_cast<int32>(pdf2group_.size()); j2++) {
83 int32 j1 = pdf2group_[j2];
84 if (group2pdf_.size() <= j1) group2pdf_.resize(j1+1);
85 group2pdf_[j1].push_back(j2);
100 int32 num_pdfs = -1, feat_dim, num_gauss;
115 while (token !=
"</SGMM>") {
116 if (token ==
"<PDF2GROUP>") {
118 ComputePdfMappings();
119 }
else if (token ==
"<WEIGHTIDX2GAUSS>") {
120 std::vector<int32> garbage;
122 }
else if (token ==
"<DIAG_UBM>") {
123 diag_ubm_.Read(in_stream, binary);
124 }
else if (token ==
"<FULL_UBM>") {
125 full_ubm_.Read(in_stream, binary);
126 }
else if (token ==
"<SigmaInv>") {
127 SigmaInv_.resize(num_gauss);
128 for (
int32 i = 0;
i < num_gauss;
i++) {
129 SigmaInv_[
i].Read(in_stream, binary);
131 }
else if (token ==
"<M>") {
132 M_.resize(num_gauss);
133 for (
int32 i = 0;
i < num_gauss;
i++) {
134 M_[
i].Read(in_stream, binary);
136 }
else if (token ==
"<N>") {
137 N_.resize(num_gauss);
138 for (
int32 i = 0;
i < num_gauss;
i++) {
139 N_[
i].Read(in_stream, binary);
141 }
else if (token ==
"<w>") {
142 w_.Read(in_stream, binary);
143 }
else if (token ==
"<u>") {
144 u_.Read(in_stream, binary);
145 }
else if (token ==
"<v>") {
146 int32 num_groups = group2pdf_.size();
147 if (num_groups == 0) {
148 KALDI_WARN <<
"Reading old model with new code (should still work)";
149 num_groups = num_pdfs;
151 v_.resize(num_groups);
152 for (
int32 j1 = 0; j1 < num_groups; j1++) {
153 v_[j1].Read(in_stream, binary);
155 }
else if (token ==
"<c>") {
157 for (
int32 j2 = 0; j2 < num_pdfs; j2++) {
158 c_[j2].Read(in_stream, binary);
160 }
else if (token ==
"<n>") {
161 int32 num_groups = group2pdf_.size();
162 if (num_groups == 0) num_groups = num_pdfs;
163 n_.resize(num_groups);
164 for (
int32 j1 = 0; j1 < num_groups; j1++) {
165 n_[j1].Read(in_stream, binary);
169 }
else if (token ==
"<M_Prior>") {
172 M_prior_.resize(num_gauss);
173 for (
int32 i = 0;
i < num_gauss;
i++) {
174 M_prior_[
i].Read(in_stream, binary);
176 }
else if (token ==
"<Row_Cov_Inv>") {
177 row_cov_inv_.Read(in_stream, binary);
178 }
else if (token ==
"<Col_Cov_Inv>") {
179 col_cov_inv_.Read(in_stream, binary);
181 KALDI_ERR <<
"Unexpected token '" << token <<
"' in model file ";
186 if (pdf2group_.empty())
187 ComputePdfMappings();
191 ComputeNormalizers();
192 if (HasSpeakerDependentWeights())
197 KALDI_ASSERT(static_cast<size_t>(j2) < pdf2group_.size());
198 int32 j1 = pdf2group_[j2];
206 int32 num_pdfs = NumPdfs(),
207 feat_dim = FeatureDim(),
208 num_gauss = NumGauss();
211 if (!binary) out_stream <<
"\n";
212 WriteToken(out_stream, binary,
"<NUMSTATES>");
214 WriteToken(out_stream, binary,
"<DIMENSION>");
218 if (!binary) out_stream <<
"\n";
222 diag_ubm_.Write(out_stream, binary);
224 full_ubm_.Write(out_stream, binary);
229 if (!binary) out_stream <<
"\n";
230 for (
int32 i = 0;
i < num_gauss;
i++) {
231 SigmaInv_[
i].Write(out_stream, binary);
234 if (!binary) out_stream <<
"\n";
235 for (
int32 i = 0;
i < num_gauss;
i++) {
236 M_[
i].Write(out_stream, binary);
238 if (N_.size() != 0) {
240 if (!binary) out_stream <<
"\n";
241 for (
int32 i = 0;
i < num_gauss;
i++) {
242 N_[
i].Write(out_stream, binary);
246 w_.Write(out_stream, binary);
248 u_.Write(out_stream, binary);
252 WriteToken(out_stream, binary,
"<PDF2GROUP>");
255 for (
int32 j1 = 0; j1 < NumGroups(); j1++) {
256 v_[j1].Write(out_stream, binary);
259 for (
int32 j2 = 0; j2 < num_pdfs; j2++) {
260 c_[j2].Write(out_stream, binary);
267 KALDI_WARN <<
"Not writing normalizers since they are not present.";
269 for (
int32 j1 = 0; j1 < NumGroups(); j1++)
270 n_[j1].Write(out_stream, binary);
277 int32 J1 = NumGroups(),
279 num_gauss = NumGauss(),
280 feat_dim = FeatureDim(),
281 phn_dim = PhoneSpaceDim(),
282 spk_dim = SpkSpaceDim();
285 KALDI_LOG <<
"AmSgmm2: #pdfs = " << J2 <<
", #pdf-groups = " 286 << J1 <<
", #Gaussians = " 287 << num_gauss <<
", feature dim = " << feat_dim
288 <<
", phone-space dim =" << phn_dim
289 <<
", speaker-space dim =" << spk_dim;
290 KALDI_ASSERT(J1 > 0 && num_gauss > 0 && feat_dim > 0 && phn_dim > 0
291 && J2 > 0 && J2 >= J1);
293 std::ostringstream debug_str;
304 KALDI_ASSERT(SigmaInv_.size() ==
static_cast<size_t>(num_gauss));
305 for (
int32 i = 0;
i < num_gauss;
i++) {
307 SigmaInv_[
i](0, 0) > 0.0);
311 KALDI_ASSERT(N_.size() ==
static_cast<size_t>(num_gauss));
312 for (
int32 i = 0;
i < num_gauss;
i++)
313 KALDI_ASSERT(N_[
i].NumRows() == feat_dim && N_[
i].NumCols() == spk_dim);
314 if (u_.NumRows() == 0) {
315 debug_str <<
"Speaker-weight projections: no.";
317 KALDI_ASSERT(u_.NumRows() == num_gauss && u_.NumCols() == spk_dim);
318 debug_str <<
"Speaker-weight projections: yes.";
324 KALDI_ASSERT(M_.size() ==
static_cast<size_t>(num_gauss));
325 for (
int32 i = 0;
i < num_gauss;
i++) {
326 KALDI_ASSERT(M_[
i].NumRows() == feat_dim && M_[
i].NumCols() == phn_dim);
329 KALDI_ASSERT(w_.NumRows() == num_gauss && w_.NumCols() == phn_dim);
333 c_.size() ==
static_cast<size_t>(J2));
334 int32 nSubstatesTot = 0;
335 for (
int32 j1 = 0; j1 < J1; j1++) {
336 int32 M_j = NumSubstatesForGroup(j1);
337 nSubstatesTot += M_j;
339 v_[j1].NumCols() == phn_dim);
341 debug_str <<
"Substates: "<< (nSubstatesTot) <<
". ";
342 int32 nSubstateWeights = 0;
343 for (
int32 j2 = 0; j2 < J2; j2++) {
344 int32 j1 = Pdf2Group(j2);
345 int32 M = NumSubstatesForPdf(j2);
347 nSubstateWeights += M;
350 debug_str <<
"SubstateWeights: "<< (nSubstateWeights) <<
". ";
354 if (n_.size() == 0) {
355 debug_str <<
"Normalizers: no. ";
357 debug_str <<
"Normalizers: yes. ";
359 for (
int32 j1 = 0; j1 < J1; j1++) {
361 n_[j1].NumCols() == NumSubstatesForGroup(j1));
366 if (w_jmi_.size() == 0) {
367 debug_str <<
"Computed weights: no. ";
369 debug_str <<
"Computed weights: yes. ";
371 for (
int32 j1 = 0; j1 < J1; j1++) {
372 KALDI_ASSERT(w_jmi_[j1].NumRows() == NumSubstatesForGroup(j1) &&
373 w_jmi_[j1].NumCols() == num_gauss);
378 KALDI_LOG <<
"Subspace GMM model properties: " << debug_str.str();
382 const std::vector<int32> &pdf2group,
383 int32 phn_subspace_dim,
384 int32 spk_subspace_dim,
385 bool speaker_dependent_weights,
387 pdf2group_ = pdf2group;
388 ComputePdfMappings();
389 full_ubm_.CopyFromFullGmm(full_gmm);
390 diag_ubm_.CopyFromFullGmm(full_gmm);
391 if (phn_subspace_dim < 1 || phn_subspace_dim > full_gmm.
Dim() + 1) {
392 KALDI_WARN <<
"Initial phone-subspace dimension must be >= 1, value is " 393 << phn_subspace_dim <<
"; setting to " << full_gmm.
Dim() + 1;
394 phn_subspace_dim = full_gmm.
Dim() + 1;
407 InitializeMw(phn_subspace_dim, norm_xform);
408 if (spk_subspace_dim > 0)
409 InitializeNu(spk_subspace_dim, norm_xform, speaker_dependent_weights);
410 InitializeVecsAndSubstateWeights(self_weight);
416 bool copy_normalizers,
423 diag_ubm_.CopyFromDiagGmm(other.
diag_ubm_);
424 full_ubm_.CopyFromFullGmm(other.
full_ubm_);
436 if (copy_normalizers) n_ = other.
n_;
437 if (copy_weights) w_jmi_ = other.
w_jmi_;
443 const std::vector<int32> &gselect,
446 KALDI_ASSERT(!n_.empty() &&
"ComputeNormalizers() must be called.");
448 per_frame_vars->
Resize(gselect.size(), FeatureDim(), PhoneSpaceDim());
450 per_frame_vars->
gselect = gselect;
451 per_frame_vars->
xt.CopyFromVec(data);
453 for (
int32 ki = 0, last = gselect.size(); ki < last; ki++) {
455 per_frame_vars->
xti.
Row(ki).CopyFromVec(per_frame_vars->
xt);
456 if (spk_vars.
v_s.Dim() != 0)
457 per_frame_vars->
xti.
Row(ki).AddVec(-1.0, spk_vars.
o_s.
Row(i));
461 bool speaker_dep_weights =
462 (spk_vars.
v_s.Dim() != 0 && HasSpeakerDependentWeights());
463 for (
int32 ki = 0, last = gselect.size(); ki < last; ki++) {
466 SigmaInv_xt.
AddSpVec(1.0, SigmaInv_[i], per_frame_vars->
xti.
Row(ki), 0.0);
468 per_frame_vars->
zti.
Row(ki).AddMatVec(1.0, M_[i],
kTrans, SigmaInv_xt, 0.0);
471 SigmaInv_xt) + ssgmm_term;
480 const vector<int32> &gselect = per_frame_vars.
gselect;
481 int32 num_gselect = gselect.size(), num_substates = v_[j1].NumRows();
487 loglikes->
Resize(num_gselect, num_substates);
488 bool speaker_dep_weights =
489 (spk_vars->
v_s.Dim() != 0 && HasSpeakerDependentWeights());
490 if (speaker_dep_weights) {
492 KALDI_ASSERT(static_cast<int32>(w_jmi_.size()) == NumGroups() ||
493 "You need to call ComputeWeights().");
495 for (
int32 ki = 0; ki < num_gselect; ki++) {
500 logp_xi.
AddVec(1.0, n_[j1].Row(i));
501 logp_xi.
Add(per_frame_vars.
nti(ki));
503 if (speaker_dep_weights) {
505 if (log_d.
Dim() == 0) {
506 log_d.
Resize(num_substates);
527 #ifdef KALDI_PARANOID 528 bool random_test = (
Rand() % 1000 == 1);
531 bool random_test =
false;
533 if (pdf_cache.
t == t) {
534 if (!random_test)
return pdf_cache.
log_like;
542 int32 j1 = pdf2group_[j2];
545 if (substate_cache.
t != t) {
546 substate_cache.
t =
t;
548 ComponentLogLikes(per_frame_vars, j1, spk_vars, &loglikes);
554 substate_cache.
likes.Resize(num_substates);
555 substate_cache.
likes.AddRowSumMat(1.0, loglikes);
568 KALDI_ASSERT(log_like == log_like && log_like - log_like == 0);
579 int32 j1 = pdf2group_[j2];
580 ComponentLogLikes(per_frame_vars, j1, spk_vars, post);
592 loglike +=
Log(tot_like);
593 post->
Scale(1.0 / tot_like);
604 const std::vector<int32> &pdfs = group2pdf_[j1];
605 int32 phn_dim = PhoneSpaceDim(), cur_M = NumSubstatesForGroup(j1),
606 num_pdfs_for_group = pdfs.size();
610 if (cur_M == tgt_M)
return;
614 v_[j1].Resize(tgt_M, phn_dim);
615 v_[j1].Range(0, cur_M, 0, phn_dim).CopyFromMat(tmp_v_j);
620 for (
int32 i = 0;
i < num_pdfs_for_group;
i++) {
622 c_j.
Row(
i).Range(0, cur_M).CopyFromVec(c_[j2]);
626 for (; cur_M < tgt_M; cur_M++) {
632 split_m = std::max_element(data, data+cur_M) - data;
634 for (
int32 i = 0;
i < num_pdfs_for_group;
i++) {
637 c_j(
i, split_m) = c_j(
i, cur_M) = c_j(
i, split_m) / 2;
640 std::generate(rand_vec.Data(), rand_vec.Data() + rand_vec.Dim(),
643 v_[j1].Row(cur_M).CopyFromVec(v_[j1].Row(split_m));
644 v_[j1].Row(cur_M).AddVec(1.0, v_shift);
645 v_[j1].Row(split_m).AddVec(-1.0, v_shift);
649 for (
int32 i = 0;
i < num_pdfs_for_group;
i++) {
651 c_[j2].Resize(tgt_M);
652 c_[j2].CopyFromVec(c_j.
Row(
i));
660 int32 J1 = NumGroups(), J2 = NumPdfs();
662 for (
int32 j2 = 0; j2 < J2; j2++)
663 group_occupancies(Pdf2Group(j2)) += pdf_occupancies(j2);
665 vector<int32> tgt_num_substates;
670 int32 tot_num_substates_old = 0, tot_num_substates_new = 0;
671 vector< SpMatrix<BaseFloat> > H_i;
675 ComputeHsmFromModel(H_i, pdf_occupancies, &sqrt_H_sm, opts.
max_cond);
679 for (
int32 j1 = 0; j1 < J1; j1++) {
680 int32 cur_M = NumSubstatesForGroup(j1),
681 tgt_M = tgt_num_substates[j1];
682 tot_num_substates_old += cur_M;
683 tot_num_substates_new += std::max(cur_M, tgt_M);
685 SplitSubstatesInGroup(pdf_occupancies, opts, sqrt_H_sm, j1, tgt_M);
687 if (tot_num_substates_old == tot_num_substates_new) {
688 KALDI_LOG <<
"Not splitting substates; current #substates is " 689 << tot_num_substates_old <<
" and target is " 692 KALDI_LOG <<
"Getting rid of normalizers as they will no longer be valid";
694 KALDI_LOG <<
"Split " << tot_num_substates_old <<
" substates to " 695 << tot_num_substates_new;
702 int32 initial_dim = PhoneSpaceDim(),
703 feat_dim = FeatureDim();
706 if (target_dim < initial_dim)
707 KALDI_ERR <<
"You asked to increase phn dim to a value lower than the " 708 <<
" current dimension, " << target_dim <<
" < " << initial_dim;
710 if (target_dim > initial_dim + feat_dim) {
711 KALDI_WARN <<
"Cannot increase phone subspace dimensionality from " 712 << initial_dim <<
" to " << target_dim <<
", increasing to " 713 << initial_dim + feat_dim;
714 target_dim = initial_dim + feat_dim;
717 if (initial_dim < target_dim) {
719 for (
int32 i = 0;
i < NumGauss();
i++) {
721 M_[
i].Resize(feat_dim, target_dim);
722 M_[
i].Range(0, feat_dim, 0, tmp_M.
NumCols()).CopyFromMat(tmp_M);
723 M_[
i].Range(0, feat_dim, tmp_M.
NumCols(),
724 target_dim - tmp_M.
NumCols()).CopyFromMat(norm_xform.
Range(0,
725 feat_dim, 0, target_dim-tmp_M.
NumCols()));
729 w_.Range(0, tmp_w.
NumRows(), 0, tmp_w.
NumCols()).CopyFromMat(tmp_w);
731 for (
int32 j1 = 0; j1 < NumGroups(); j1++) {
735 v_[j1].Range(0, tmp_v_j.
NumRows(), 0, tmp_v_j.
NumCols()).CopyFromMat(
738 KALDI_LOG <<
"Phone subspace dimensionality increased from " <<
739 initial_dim <<
" to " << target_dim;
741 KALDI_LOG <<
"Phone subspace dimensionality unchanged, since target " <<
742 "dimension (" << target_dim <<
") <= initial dimansion (" <<
749 bool speaker_dependent_weights) {
750 int32 initial_dim = SpkSpaceDim(),
751 feat_dim = FeatureDim();
755 N_.resize(NumGauss());
757 if (target_dim < initial_dim)
758 KALDI_ERR <<
"You asked to increase spk dim to a value lower than the " 759 <<
" current dimension, " << target_dim <<
" < " << initial_dim;
761 if (target_dim > initial_dim + feat_dim) {
762 KALDI_WARN <<
"Cannot increase speaker subspace dimensionality from " 763 << initial_dim <<
" to " << target_dim <<
", increasing to " 764 << initial_dim + feat_dim;
765 target_dim = initial_dim + feat_dim;
768 if (initial_dim < target_dim) {
769 int32 dim_change = target_dim - initial_dim;
772 for (
int32 i = 0;
i < NumGauss();
i++) {
774 N_[
i].Resize(feat_dim, target_dim);
775 if (initial_dim != 0) {
776 N_[
i].Range(0, feat_dim, 0, tmp_N.
NumCols()).CopyFromMat(tmp_N);
778 N_[
i].Range(0, feat_dim, tmp_N.
NumCols(), dim_change).CopyFromMat(
779 norm_xform.
Range(0, feat_dim, 0, dim_change));
783 if (u_.NumRows() != 0 || (initial_dim == 0 && speaker_dependent_weights))
784 u_.Resize(NumGauss(), target_dim,
kCopyData);
785 KALDI_LOG <<
"Speaker subspace dimensionality increased from " <<
786 initial_dim <<
" to " << target_dim;
787 if (initial_dim == 0 && speaker_dependent_weights)
788 KALDI_LOG <<
"Added parameters u for speaker-dependent weights.";
790 KALDI_LOG <<
"Speaker subspace dimensionality unchanged, since target " <<
791 "dimension (" << target_dim <<
") <= initial dimansion (" <<
797 int32 J1 = NumGroups();
800 for (
int32 j1 = 0; j1 < J1; j1++) {
801 int32 M = NumSubstatesForGroup(j1);
802 w_jmi_[j1].Resize(M, i);
805 for (
int32 m = 0; m < M; m++)
806 w_jmi_[j1].Row(m).ApplySoftMax();
811 if (n_.empty()) ComputeNormalizers();
812 if (diag_ubm_.NumGauss() != full_ubm_.NumGauss()
813 || diag_ubm_.Dim() != full_ubm_.Dim()) {
814 diag_ubm_.CopyFromFullGmm(full_ubm_);
816 if (w_jmi_.empty() && HasSpeakerDependentWeights())
823 int32 *entropy_count_ptr,
824 double *entropy_sum_ptr):
825 am_sgmm_(am_sgmm), entropy_count_ptr_(entropy_count_ptr),
826 entropy_sum_ptr_(entropy_sum_ptr), entropy_count_(0),
827 entropy_sum_(0.0) { }
831 am_sgmm_(other.am_sgmm_), entropy_count_ptr_(other.entropy_count_ptr_),
832 entropy_sum_ptr_(other.entropy_sum_ptr_), entropy_count_(0),
833 entropy_sum_(0.0) { }
836 *entropy_count_ptr_ += entropy_count_;
837 *entropy_sum_ptr_ += entropy_sum_;
840 inline void operator() () {
843 am_sgmm_->ComputeNormalizersInternal(num_threads_, thread_id_,
859 n_.resize(NumPdfs());
860 int32 entropy_count = 0;
861 double entropy_sum = 0.0;
865 KALDI_LOG <<
"Entropy of weights in substates is " 866 << (entropy_sum / entropy_count) <<
" over " << entropy_count
867 <<
" substates, equivalent to perplexity of " 868 << (
Exp(entropy_sum /entropy_count));
869 KALDI_LOG <<
"Done computing normalizers";
874 int32 *entropy_count,
875 double *entropy_sum) {
880 for (
int32 i = 0;
i < NumGauss();
i++) {
882 log_det_Sigma(
i) = - SigmaInv_[
i].LogPosDefDet();
885 KALDI_WARN <<
"Covariance is not positive definite, setting to unit";
886 SigmaInv_[
i].SetUnit();
887 log_det_Sigma(
i) = 0.0;
891 int32 J1 = NumGroups();
893 int block_size = (NumPdfs() + num_threads-1) / num_threads;
894 int j_start = thread * block_size, j_end = std::min(J1, j_start + block_size);
896 int32 I = NumGauss();
897 for (
int32 j1 = j_start; j1 < j_end; j1++) {
898 int32 M = NumSubstatesForGroup(j1);
906 for (
int32 m = 0; m < M; m++) {
907 log_w_jm.
Row(m).Add(-1.0 * log_w_jm.
Row(m).LogSumExp());
910 for (
int32 i = 0;
i < NumGauss();
i++) {
911 (*entropy_sum) -= log_w_jm(m,
i) *
Exp(log_w_jm(m,
i));
921 for (
int32 m = 0; m < M; m++) {
930 n_[j1](
i, m) = log_w_jm(m, i) - 0.5 * (log_det_Sigma(i) + DLog2pi
935 KALDI_LOG <<
"Warning: normalizer for j1 = " << j1 <<
", m = " << m
936 <<
", i = " << i <<
" is infinite or NaN " << tmp <<
"= " 937 << log_w_jm(m, i) <<
"+" 938 << (-0.5 * log_det_Sigma(i)) <<
"+" << (-0.5 * DLog2pi)
939 <<
"+" << (mu_SigmaInv_mu) <<
", setting to finite.";
940 n_[j1](
i, m) = -1.0e+40;
951 if (spk_vars->
log_d_jms.empty())
return -1;
954 KALDI_ASSERT(!w_jmi_.empty() &&
"You need to call ComputeWeights() on SGMM.");
956 if (log_d.Dim() == 0) {
957 log_d.Resize(NumSubstatesForGroup(j1));
958 log_d.AddMatVec(1.0, w_jmi_[j1],
kNoTrans, spk_vars->
b_is, 0.0);
961 return Exp(log_d(m));
969 int32 num_pdfs = NumPdfs(),
970 num_gauss = NumGauss(),
977 if (total_occ == 0) {
978 KALDI_WARN <<
"Zero probability (computing transform). Using unit " 990 state_posteriors.
Scale(1/total_occ);
996 for (
int32 j1 = 0; j1 < NumGroups(); j1++) {
997 const std::vector<int32> &pdfs = group2pdf_[j1];
998 int32 M = NumSubstatesForGroup(j1);
1000 for (
size_t i = 0;
i < pdfs.size();
i++) {
1002 substate_weight.
AddVec(state_posteriors(j2), c_[j2]);
1004 for (
int32 m = 0; m < M; m++) {
1005 BaseFloat this_substate_weight = substate_weight(m);
1010 for (
int32 i = 0;
i < num_gauss;
i++) {
1011 BaseFloat weight = this_substate_weight * w_jm(
i);
1012 mu_jmi.AddMatVec(1.0, M_[
i],
kNoTrans, v_[j1].Row(m), 0.0);
1014 global_mean.
AddVec(weight, mu_jmi);
1016 between_class_covar.
AddVec2(weight, mu_jmi);
1017 gauss_weight(i) += weight;
1021 between_class_covar.
AddVec2(-1.0, global_mean);
1023 for (
int32 i = 0;
i < num_gauss;
i++) {
1027 within_class_covar.AddSp(gauss_weight(i), Sigma);
1041 diag_mean_scatter->
Resize(dim);
1042 xform->
Resize(dim, dim + 1);
1043 inv_xform->
Resize(dim, dim + 1);
1045 tmpB.
Eig(diag_mean_scatter, &U);
1050 KALDI_WARN <<
"Floored " << n <<
" elements of the mean-scatter matrix.";
1056 #ifdef KALDI_PARANOID 1071 b_pre.AddMatVec(-1.0, Apre,
kNoTrans, global_mean, 0.0);
1072 for (
int32 r = 0; r < dim; r++) {
1073 xform->
Row(r)(dim) = b_pre(r);
1078 inv_xform->
Range(0, dim, 0, dim).InvertDouble();
1079 for (
int32 r = 0; r < dim; r++)
1080 inv_xform->
Row(r)(dim) = global_mean(r);
1083 template<
typename Real>
1086 "Cannot compute N^{T} \\Sigma_{i}^{-1} without speaker projections.");
1087 out->resize(NumGauss());
1090 for (
int32 i = 0;
i < NumGauss();
i++) {
1093 (*out)[
i].Resize(SpkSpaceDim(), FeatureDim());
1106 template<
class Real>
1109 (*H_i).resize(NumGauss());
1111 for (
int32 i = 0;
i < NumGauss();
i++) {
1112 (*H_i)[
i].Resize(PhoneSpaceDim());
1114 (*H_i)[
i].CopyFromSp(H_i_tmp);
1128 int32 ddim = full_ubm_.Dim();
1134 int32 num_gauss = full_ubm_.NumGauss();
1135 w_.Resize(num_gauss, phn_subspace_dim);
1136 M_.resize(num_gauss);
1137 for (
int32 i = 0;
i < num_gauss;
i++) {
1138 full_ubm_.GetComponentMean(
i, &mean);
1140 thisM.
Resize(ddim, phn_subspace_dim);
1143 int32 nonrandom_dim = std::min(phn_subspace_dim - 1, ddim),
1144 random_dim = phn_subspace_dim - 1 - nonrandom_dim;
1145 thisM.
Range(0, ddim, 1, nonrandom_dim).CopyFromMat(
1150 thisM.
Range(0, ddim, nonrandom_dim + 1, random_dim).SetRandn();
1157 bool speaker_dependent_weights) {
1158 int32 ddim = full_ubm_.Dim();
1160 int32 num_gauss = full_ubm_.NumGauss();
1161 N_.resize(num_gauss);
1162 for (
int32 i = 0;
i < num_gauss;
i++) {
1163 N_[
i].Resize(ddim, spk_subspace_dim);
1166 int32 nonrandom_dim = std::min(spk_subspace_dim, ddim),
1167 random_dim = spk_subspace_dim - nonrandom_dim;
1169 N_[
i].Range(0, ddim, 0, nonrandom_dim).
1170 CopyFromMat(norm_xform.
Range(0, ddim, 0, nonrandom_dim),
kNoTrans);
1174 N_[
i].Range(0, ddim, nonrandom_dim, random_dim).SetRandn();
1176 if (speaker_dependent_weights) {
1177 u_.Resize(num_gauss, spk_subspace_dim);
1184 const std::vector<int32> &pdf2group,
1187 pdf2group_ = pdf2group;
1188 ComputePdfMappings();
1191 diag_ubm_.CopyFromDiagGmm(other.
diag_ubm_);
1192 full_ubm_.CopyFromFullGmm(other.
full_ubm_);
1202 InitializeVecsAndSubstateWeights(self_weight);
1208 int32 J1 = NumGroups(), J2 = NumPdfs();
1210 int32 phn_subspace_dim = PhoneSpaceDim();
1211 KALDI_ASSERT(phn_subspace_dim > 0 &&
"Initialize M and w first.");
1214 if (self_weight == 1.0) {
1215 for (
int32 j1 = 0; j1 < J1; j1++) {
1216 v_[j1].Resize(1, phn_subspace_dim);
1220 for (
int32 j2 = 0; j2 < J2; j2++) {
1225 for (
int32 j1 = 0; j1 < J1; j1++) {
1226 int32 npdfs = group2pdf_[j1].size();
1227 v_[j1].Resize(npdfs, phn_subspace_dim);
1228 for (
int32 m = 0; m < npdfs; m++)
1232 for (
int32 j2 = 0; j2 < J2; j2++) {
1233 int32 j1 = pdf2group_[j2], npdfs = group2pdf_[j1].size();
1234 c_[j2].Resize(npdfs);
1235 if (npdfs == 1) c_[j2].Set(1.0);
1238 double other_weight = (1.0 - self_weight) / std::max((1-npdfs), 1);
1239 c_[j2].Set(other_weight);
1240 for (
int32 k = 0; k < npdfs; k++)
1241 if(group2pdf_[j1][k] == j2) c_[j2](k) = self_weight;
1249 std::vector< SpMatrix<BaseFloat> > &inv_covars(full_ubm_.inv_covars());
1250 int32 num_gauss = full_ubm_.NumGauss();
1251 int32 dim = full_ubm_.Dim();
1252 SigmaInv_.resize(num_gauss);
1253 for (
int32 i = 0;
i < num_gauss;
i++) {
1254 SigmaInv_[
i].Resize(dim);
1255 SigmaInv_[
i].CopyFromSp(inv_covars[
i]);
1265 int32 num_gauss = NumGauss();
1269 H_sm->
Resize(PhoneSpaceDim());
1272 ComputeGammaI(state_occupancies, &gamma_i);
1275 for (
int32 i = 0;
i < num_gauss;
i++) {
1276 if (gamma_i(
i) > 0) {
1277 H_sm->
AddSp(gamma_i(
i), H[
i]);
1286 H_sm->
Scale(1.0 / sum);
1289 KALDI_WARN <<
"Limited " << (tmp) <<
" eigenvalues of H_sm";
1294 KALDI_LOG <<
"total count is " << tot_sum;
1309 std::vector< SpMatrix<BaseFloat> > tmp_covars;
1312 for (
int32 i = 0;
i < num_gauss;
i++) {
1314 total_weight += w_i;
1315 within_class_covar.
AddSp(w_i, tmp_covars[
i]);
1316 between_class_covar.
AddVec2(w_i, tmp_means.
Row(i));
1317 global_mean.
AddVec(w_i, tmp_means.
Row(i));
1320 if (fabs(total_weight - 1.0) > 0.001) {
1321 KALDI_WARN <<
"Total weight across the GMMs is " << (total_weight)
1322 <<
", renormalizing.";
1323 global_mean.
Scale(1.0 / total_weight);
1324 within_class_covar.
Scale(1.0 / total_weight);
1325 between_class_covar.
Scale(1.0 / total_weight);
1327 between_class_covar.
AddVec2(-1.0, global_mean);
1349 #ifdef KALDI_PARANOID 1354 wc_covar_full.CopyFromSp(within_class_covar);
1356 wc_covar_full.AddMatMat(1.0, tmp,
kNoTrans, inv_xform,
kTrans, 0.0);
1361 bc_covar_full.CopyFromSp(between_class_covar);
1363 bc_covar_full.AddMatMat(1.0, tmp,
kNoTrans, inv_xform,
kTrans, 0.0);
1371 if (vars->
v_s.Dim() != 0) {
1373 vars->
o_s.
Resize(NumGauss(), FeatureDim());
1374 int32 num_gauss = NumGauss();
1376 for (
int32 i = 0;
i < num_gauss;
i++) {
1382 if (HasSpeakerDependentWeights()) {
1387 vars->
b_is.Resize(NumGauss());
1389 vars->
b_is.ApplyExp();
1392 vars->
b_is(
i) = 1.0;
1397 vars->
b_is.Resize(0);
1408 std::vector<int32> *gselect)
const {
1410 diag_ubm_.NumGauss() == full_ubm_.NumGauss() &&
1411 diag_ubm_.Dim() == data.
Dim());
1414 int32 num_gauss = diag_ubm_.NumGauss();
1416 std::vector< std::pair<BaseFloat, int32> > pruned_pairs;
1418 diag_ubm_.LogLikelihoods(data, &loglikes);
1421 std::nth_element(ptr, ptr+num_gauss-config.
diag_gmm_nbest, ptr+num_gauss);
1423 for (
int32 g = 0; g < num_gauss; g++)
1424 if (loglikes(g) >= thresh)
1425 pruned_pairs.push_back(
1426 std::make_pair(full_ubm_.ComponentLogLikelihood(data, g), g));
1429 full_ubm_.LogLikelihoods(data, &loglikes);
1430 for (
int32 g = 0; g < num_gauss; g++)
1431 pruned_pairs.push_back(std::make_pair(loglikes(g), g));
1434 if (pruned_pairs.size() >
static_cast<size_t>(config.
full_gmm_nbest)) {
1435 std::nth_element(pruned_pairs.begin(),
1437 pruned_pairs.end());
1438 pruned_pairs.erase(pruned_pairs.begin(),
1443 gselect->resize(pruned_pairs.size());
1445 std::sort(pruned_pairs.begin(), pruned_pairs.end(),
1446 std::greater< std::pair<BaseFloat, int32> >());
1447 for (
size_t i = 0;
i < pruned_pairs.size();
i++) {
1448 loglikes_tmp(
i) = pruned_pairs[
i].first;
1449 (*gselect)[
i] = pruned_pairs[
i].second;
1451 return loglikes_tmp.LogSumExp();
1456 int32 T = this->size();
1463 KALDI_ASSERT((*
this)[
t].tids.size() == (*this)[
t].posteriors.size());
1464 for (
size_t i = 0;
i < (*this)[
t].posteriors.size();
i++) {
1465 (*this)[
t].posteriors[
i].Write(os, binary);
1483 size_t sz = (*this)[
t].tids.size();
1484 (*this)[
t].posteriors.resize(sz);
1485 for (
size_t i = 0;
i < sz;
i++)
1486 (*
this)[
t].posteriors[
i].Read(is, binary);
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
uint16 SgmmWriteFlagsType
Bitwise OR of the above flags.
void ComputeWeights()
Computes the weights w_jmi_, which is needed for likelihood evaluation with SSGMMs.
void Add(const Real alpha)
Add a scalar to each element.
void Write(std::ostream &os, bool binary, SgmmWriteFlagsType write_params) const
bool IsUnit(Real cutoff=1.0e-05) const
void InvertDouble(Real *LogDet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse [double].
Matrix< BaseFloat > u_
[SSGMM] Speaker-subspace weight projection vectors. Dimension is [I][T]
bool IsDiagonal(Real cutoff=1.0e-05) const
Class for definition of the subspace Gmm acoustic model.
void IncreasePhoneSpaceDim(int32 target_dim, const Matrix< BaseFloat > &norm_xform)
Functions for increasing the phonetic and speaker space dimensions.
Packed symetric matrix class.
std::vector< int32 > pdf2group_
void CopyColFromVec(const VectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix.
BaseFloat ComponentPosteriors(const Sgmm2PerFrameDerivedVars &per_frame_vars, int32 j2, Sgmm2PerSpkDerivedVars *spk_vars, Matrix< BaseFloat > *post) const
Similar to LogLikelihood() function above, but also computes the posterior probabilities for the pre-...
Vector< BaseFloat > xt
x'(t), FMLLR-adapted, dim = [D], eq.(33)
void CopyFromSgmm2(const AmSgmm2 &other, bool copy_normalizers, bool copy_weights)
Used to copy models (useful in update)
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
std::vector< Vector< BaseFloat > > c_
c_{jm}, mixture weights. Dimension is [J2][#mix]
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Matrix< BaseFloat > w_
Phonetic-subspace weight projection vectors. Dimension is [I][S].
Vector< BaseFloat > v_s
Speaker adaptation vector v_^{(s)}. Dim is [T].
Definition for Gaussian Mixture Model with full covariances.
Real Max() const
Returns maximum element of matrix.
void InvertDouble(Real *logdet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
void Read(std::istream &is, bool binary)
#define KALDI_ISFINITE(x)
void InitializeFromFullGmm(const FullGmm &gmm, const std::vector< int32 > &pdf2group, int32 phn_subspace_dim, int32 spk_subspace_dim, bool speaker_dependent_weights, BaseFloat self_weight)
Initializes the SGMM parameters from a full-covariance UBM.
float RandGauss(struct RandomState *state=NULL)
DiagGmm diag_ubm_
These contain the "background" model associated with the subspace GMM.
std::vector< Matrix< BaseFloat > > n_
n_{jim}, per-Gaussian normalizer. Dimension is [J1][I][#mix]
std::vector< Matrix< BaseFloat > > N_
Speaker-subspace projections. Dimension is [I][D][T].
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
void ComputeHsmFromModel(const std::vector< SpMatrix< BaseFloat > > &H, const Vector< BaseFloat > &state_occupancies, SpMatrix< BaseFloat > *H_sm, BaseFloat max_cond) const
void ApplyPow(Real exponent)
Takes matrix to a fraction power via Svd.
void Eig(VectorBase< Real > *s, MatrixBase< Real > *P=NULL) const
Solves the symmetric eigenvalue problem: at end we should have (*this) = P * diag(s) * P^T...
void IncreaseSpkSpaceDim(int32 target_dim, const Matrix< BaseFloat > &norm_xform, bool speaker_dependent_weights)
Increase the subspace dimension for speakers.
void SetUnit()
< Set to zero
void ApplyLog()
Apply natural log to all elements.
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void AddSpVec(const Real alpha, const SpMatrix< Real > &M, const VectorBase< Real > &v, const Real beta)
Add symmetric positive definite matrix times vector: this <– beta*this + alpha*M*v.
void ComputeNormalizersInternal(int32 num_threads, int32 thread, int32 *entropy_count, double *entropy_sum)
Compute a subset of normalizers; used in multi-threaded implementation.
BaseFloat remaining_log_like
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Real ApplySoftMax()
Apply soft-max to vector and return normalizer (log sum of exponentials).
double * entropy_sum_ptr_
void ComputePerSpkDerivedVars(Sgmm2PerSpkDerivedVars *vars) const
Computes the per-speaker derived vars; assumes vars->v_s is already set up.
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
std::vector< Matrix< BaseFloat > > v_
The parameters in a particular SGMM state.
static float _RandGauss()
void InitializeCovars()
initializes the within-class covariances.
void InitializeVecsAndSubstateWeights(BaseFloat self_weight)
int32 * entropy_count_ptr_
Matrix< BaseFloat > zti
z_{i}(t), dim = [I][S], eq.(35)
BaseFloat GetDjms(int32 j1, int32 m, Sgmm2PerSpkDerivedVars *spk_vars) const
std::vector< Matrix< BaseFloat > > M_
Phonetic-subspace projections. Dimension is [I][D][S].
void CopyFromSp(const SpMatrix< OtherReal > &M)
Copy given spmatrix. (no resize is done).
void GetSplitTargets(const Vector< BaseFloat > &state_occs, int32 target_components, BaseFloat power, BaseFloat min_count, std::vector< int32 > *targets)
Get Gaussian-mixture or substate-mixture splitting targets, according to a power rule (e...
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=nullptr)
Applies floor to all elements.
void Read(std::istream &is, bool binary)
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
std::vector< Vector< BaseFloat > > log_d_jms
< [SSGMM] log of the above (more efficient to store both).
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v'
void Cholesky(const SpMatrix< Real > &orig)
BaseFloat LogLikelihood(const Sgmm2PerFrameDerivedVars &per_frame_vars, int32 j2, Sgmm2LikelihoodCache *cache, Sgmm2PerSpkDerivedVars *spk_vars, BaseFloat log_prune=0.0) const
This does a likelihood computation for a given state using the pre-selected Gaussian components (in p...
Matrix< BaseFloat > xti
x_{i}(t) = x'(t) - o_i(s): dim = [I][D], eq.(34)
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
std::vector< SpMatrix< BaseFloat > > SigmaInv_
Globally shared parameters of the subspace GMM.
void ComponentLogLikes(const Sgmm2PerFrameDerivedVars &per_frame_vars, int32 j1, Sgmm2PerSpkDerivedVars *spk_vars, Matrix< BaseFloat > *loglikes) const
The code below is called internally from LogLikelihood() and ComponentPosteriors().
void SplitSubstates(const Vector< BaseFloat > &state_occupancies, const Sgmm2SplitSubstatesConfig &config)
Increases the total number of substates based on the state occupancies.
void Scale(Real alpha)
Multiply each element with a scalar value.
Vector< BaseFloat > likes
void AddSp(const Real alpha, const SpMatrix< Real > &Ma)
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
void AddVecToRows(const Real alpha, const VectorBase< OtherReal > &v)
[each row of *this] += alpha * v
int32 Pdf2Group(int32 j2) const
void Check(bool show_properties=true)
Checks the various components for correct sizes.
void RunMultiThreaded(const C &c_in)
Here, class C should inherit from MultiThreadable.
void AddMatSp(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const SpMatrix< Real > &B, const Real beta)
this <– beta*this + alpha*A*B.
Vector< BaseFloat > nti
n_{i}(t), dim = [I], eq.
std::vector< PdfCacheElement > pdf_cache
BaseFloat GaussianSelection(const Sgmm2GselectConfig &config, const VectorBase< BaseFloat > &data, std::vector< int32 > *gselect) const
Computes the top-scoring Gaussian indices (used for pruning of later stages of computation).
void CopyFromTp(const TpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given tpmatrix. (no resize is done).
void SplitSubstatesInGroup(const Vector< BaseFloat > &pdf_occupancies, const Sgmm2SplitSubstatesConfig &opts, const SpMatrix< BaseFloat > &sqrt_H_sm, int32 j1, int32 M)
Called inside SplitSubstates(); splits substates of one group.
void CopyGlobalsInitVecs(const AmSgmm2 &other, const std::vector< int32 > &pdf2group, BaseFloat self_weight)
Copies the global parameters from the supplied model, but sets the state vectors to zero...
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
int32 full_gmm_nbest
Number of highest-scoring full-covariance Gaussians per frame.
void ComputeH(std::vector< SpMatrix< Real > > *H_i) const
Computes quantities H = M_i Sigma_i^{-1} M_i^T.
void ComputePerFrameVars(const VectorBase< BaseFloat > &data, const std::vector< int32 > &gselect, const Sgmm2PerSpkDerivedVars &spk_vars, Sgmm2PerFrameDerivedVars *per_frame_vars) const
This needs to be called with each new frame of data, prior to accumulation or likelihood evaluation: ...
Packed symetric matrix class.
void Resize(int32 ngauss, int32 feat_dim, int32 phn_dim)
Real * Data()
Returns a pointer to the start of the vector's data.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
void InitializeNu(int32 spk_subspace_dim, const Matrix< BaseFloat > &norm_xform, bool speaker_dependent_weights)
Initializes the matrices N_ and [if speaker_dependent_weights==true] u_.
MatrixIndexT Dim() const
Returns the dimension of the vector.
Real Sum() const
Returns sum of all elements in matrix.
void Scale(Real alpha)
Multiplies all elements by this constant.
const Vector< BaseFloat > & weights() const
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
void ComputePdfMappings()
int Rand(struct RandomState *state)
Real Sum() const
Returns sum of the elements.
std::vector< int32 > gselect
ComputeNormalizersClass()
void MulColsVec(const VectorBase< Real > &scale)
Equivalent to (*this) = (*this) * diag(scale).
ComputeNormalizersClass(const ComputeNormalizersClass &other)
int32 diag_gmm_nbest
Number of highest-scoring diagonal-covariance Gaussians per frame.
std::vector< std::vector< int32 > > group2pdf_
void GetNtransSigmaInv(std::vector< Matrix< Real > > *out) const
A class representing a vector.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void AddMat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
Extension of rank-N update: this <– beta*this + alpha * M * A * M^T.
void ComputeDerivedVars()
Computes (and initializes if necessary) derived vars...
void ComputeNormalizers()
Computes the data-independent terms in the log-likelihood computation for each Gaussian component and...
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Sgmm2LikelihoodCache caches SGMM likelihoods at two levels: the final pdf likelihoods, and the sub-state level likelihoods, which means that with the SCTM system we can avoid redundant computation.
std::vector< SubstateCacheElement > substate_cache
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void Write(std::ostream &os, bool binary) const
~ComputeNormalizersClass()
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Provides a vector abstraction class.
void Add(Real c)
Add a constant to each element of a vector.
void ComputeGammaI(const Vector< BaseFloat > &state_occupancies, Vector< BaseFloat > *gamma_i) const
Computes quasi-occupancies gamma_i from the state-level occupancies, assuming model correctness...
void ComputeFeatureNormalizingTransform(const FullGmm &gmm, Matrix< BaseFloat > *xform)
Computes the inverse of an LDA transform (without dimensionality reduction) The computed transform is...
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Vector< BaseFloat > log_b_is
< [SSGMM]: Eq. (22) in techreport, b_i^{(s)} = (^T ^{(s)})
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
Sub-matrix representation.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
void InitializeMw(int32 phn_subspace_dim, const Matrix< BaseFloat > &norm_xform)
Initializes the matrices M_ and w_.
void GetCovarsAndMeans(std::vector< SpMatrix< Real > > *covars, Matrix< Real > *means) const
Accessor for covariances and means.
std::vector< Matrix< BaseFloat > > w_jmi_
[SSGMM] w_{jmi}, dimension is [J1][#mix][I]. Computed from w_ and v_.
MatrixIndexT LimitCondDouble(Real maxCond=1.0e+5, bool invert=false)
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.
ComputeNormalizersClass(AmSgmm2 *am_sgmm, int32 *entropy_count_ptr, double *entropy_sum_ptr)
Matrix< BaseFloat > o_s
Per-speaker offsets o_{i}. Dimension is [I][D].
void ComputeFmllrPreXform(const Vector< BaseFloat > &pdf_occs, Matrix< BaseFloat > *xform, Matrix< BaseFloat > *inv_xform, Vector< BaseFloat > *diag_mean_scatter) const
Computes the LDA-like pre-transform and its inverse as well as the eigenvalues of the scatter of the ...