24 #ifndef KALDI_SGMM2_ESTIMATE_AM_SGMM2_H_ 25 #define KALDI_SGMM2_ESTIMATE_AM_SGMM2_H_ 1 73 renormalize_N =
false;
75 weight_projections_iters = 3;
78 map_M_prior_iters = 5;
85 std::string module =
"MleAmSgmm2Options: ";
86 opts->
Register(
"tau-c", &tau_c, module+
87 "Count for smoothing weight update.");
88 opts->
Register(
"cov-floor", &cov_floor, module+
89 "Covariance floor (fraction of average covariance).");
90 opts->
Register(
"cov-diag-ratio", &cov_diag_ratio, module+
91 "Minimum occ/dim ratio below which use diagonal covariances.");
92 opts->
Register(
"max-cond", &max_cond, module+
"Maximum condition number used to " 93 "regularize the solution of certain quadratic auxiliary functions.");
94 opts->
Register(
"weight-projections-iters", &weight_projections_iters, module+
95 "Number for iterations for weight projection estimation.");
96 opts->
Register(
"renormalize-v", &renormalize_V, module+
"If true, renormalize " 97 "the phonetic-subspace vectors to have meaningful sizes.");
98 opts->
Register(
"renormalize-n", &renormalize_N, module+
"If true, renormalize " 99 "the speaker subspace to have meaningful sizes.");
100 opts->
Register(
"max-impr-u", &max_impr_u, module+
"Maximum objective function " 101 "improvement per frame allowed in update of u (to " 102 "maintain stability.");
104 opts->
Register(
"tau-map-M", &tau_map_M, module+
"Smoothing for MAP estimate " 105 "of M (0 means ML update).");
106 opts->
Register(
"map-M-prior-iters", &map_M_prior_iters, module+
107 "Number of iterations to estimate prior covariances for M.");
108 opts->
Register(
"full-row-cov", &full_row_cov, module+
109 "Estimate row covariance instead of using I.");
110 opts->
Register(
"full-col-cov", &full_col_cov, module+
111 "Estimate column covariance instead of using I.");
122 : total_frames_(0.0), total_like_(0.0), feature_dim_(0),
123 phn_space_dim_(0), spk_space_dim_(0), num_gaussians_(0),
124 num_pdfs_(0), num_groups_(0), rand_prune_(rand_prune) {}
129 : total_frames_(0.0), total_like_(0.0), rand_prune_(rand_prune) {
130 ResizeAccumulators(model, flags, have_spk_vecs);
135 void Read(std::istream &in_stream,
bool binary,
bool add);
136 void Write(std::ostream &out_stream,
bool binary)
const;
142 void Check(
const AmSgmm2 &model,
bool show_properties =
true)
const;
168 void CommitStatsForSpk(
const AmSgmm2 &model,
182 std::vector< Matrix<double> >
Y_;
184 std::vector< Matrix<double> >
Z_;
186 std::vector< SpMatrix<double> >
R_;
188 std::vector< SpMatrix<double> >
S_;
192 std::vector< Matrix<double> >
y_;
200 std::vector< Matrix<double> >
a_;
217 std::vector<SpMatrix<double> >
U_;
249 : options_(options) {}
261 friend class EbwEstimateAmSgmm2;
284 int32 thread_id)
const;
335 void ComputeMPrior(
AmSgmm2 *model);
381 void Update(
const AmSgmm2 &model,
394 void UpdateWithU(
const AmSgmm2 &model,
434 accs_(accs), model_(model), w_(w), log_a_(log_a),
435 F_i_ptr_(F_i), g_i_ptr_(g_i), tot_like_ptr_(tot_like) {
443 accs_(other.accs_), model_(other.model_), w_(other.w_),
444 log_a_(other.log_a_), F_i_ptr_(other.F_i_ptr_), g_i_ptr_(other.g_i_ptr_),
445 F_i_(other.F_i_), g_i_(other.g_i_), tot_like_ptr_(other.tot_like_ptr_),
449 F_i_ptr_->AddMat(1.0, F_i_,
kNoTrans);
450 g_i_ptr_->AddMat(1.0, g_i_,
kNoTrans);
451 *tot_like_ptr_ += tot_like_;
454 inline void operator() () {
458 &F_i_, &g_i_, &tot_like_,
459 num_threads_, thread_id_);
465 const std::vector<Matrix<double> > &
log_a_;
478 #endif // KALDI_SGMM2_ESTIMATE_AM_SGMM2_H_ This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Matrix< double > t_
[SSGMM] each row is one of the t_i quantities in the less-exact version of the SSGMM update for the s...
std::vector< Vector< double > > gamma_c_
Sub-state occupancies gamma_{jm}^{(c)} for each sub-state.
Class for definition of the subspace Gmm acoustic model.
BaseFloat cov_floor
Floor covariance matrices Sigma_i to this times average cov.
int map_M_prior_iters
num of iterations to update the prior of M
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
UpdateWClass(const UpdateWClass &other)
static void UpdateWGetStats(const MleAmSgmm2Accs &accs, const AmSgmm2 &model, const Matrix< double > &w, const std::vector< Matrix< double > > &log_a, Matrix< double > *F_i, Matrix< double > *g_i, double *tot_like, int32 num_threads, int32 thread_id)
Called, multithreaded, inside UpdateW.
BaseFloat tau_c
Smoothing constant for sub-state weights [count to add to each one].
Class for the accumulators required to update the speaker vectors v_s.
MleAmSgmm2Updater(const MleAmSgmm2Options &options)
Vector< double > a_s_
a_i^{(s)}. For SSGMM.
BaseFloat epsilon
very small value used to prevent SVD crashing.
BaseFloat rand_prune_
small constant to randomly prune tiny posteriors
std::vector< SpMatrix< double > > S_
S_{i}^{-}, scatter of adapted feature vectors x_{i}(t). Dim is [I][D][D].
std::vector< Matrix< double > > gamma_
Gaussian occupancies gamma_{jmi} for each substate and Gaussian index, pooled over groups...
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
bool full_row_cov
Estimate row covariance instead of using I.
Matrix< double > * F_i_ptr_
MleAmSgmm2Options options_
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
BaseFloat cov_diag_ratio
ratio to dim below which we use diagonal. default 2, set to inf for diag.
std::vector< SpMatrix< double > > U_
the U_i quantities from the less-exact version of the SSGMM update for the speaker weight projections...
void Register(OptionsItf *opts)
void Reconfigure(const MleAmSgmm2Options &options)
int weight_projections_iters
Number of iters when re-estimating weight projections "w".
const std::vector< Matrix< double > > & log_a_
std::vector< Matrix< double > > Y_
The stats which are not tied to any state.
uint16 SgmmUpdateFlagsType
Bitwise OR of the above flags.
bool full_col_cov
Estimate col covariance instead of using I.
std::vector< Matrix< double > > y_
The SGMM state specific stats.
MleAmSgmm2Accs(BaseFloat rand_prune=1.0e-05)
int32 PhoneSpaceDim() const
BaseFloat max_impr_u
max improvement per frame allowed in update of u.
std::vector< Matrix< double > > NtransSigmaInv_
N_i^T {i}^{-1}. Needed for y^{(s)}.
Matrix< double > * g_i_ptr_
UpdateWClass(const MleAmSgmm2Accs &accs, const AmSgmm2 &model, const Matrix< double > &w, const std::vector< Matrix< double > > &log_a, Matrix< double > *F_i, Matrix< double > *g_i, double *tot_like)
std::vector< Matrix< double > > a_
[SSGMM] These a_{jmi} quantities are dimensionally the same as the gamma quantities.
Vector< double > y_s_
Statistics for speaker adaptation (vectors), stored per-speaker.
std::vector< SpMatrix< double > > H_spk_
The following variable does not change per speaker, it just relates to the speaker subspace...
Vector< double > a_s_
[SSGMM], this is a per-speaker variable storing the a_i^{(s)} quantities that we will use in order to...
BaseFloat max_cond
Max on condition of matrices in update beyond which we do not update.
A class representing a vector.
std::vector< SpMatrix< double > > R_
R_{i}, quadratic term for speaker subspace estimation. Dim is [I][T][T].
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
const Matrix< double > & w_
std::vector< Matrix< double > > Z_
Stats Z_{i} for speaker-subspace projections N. Dim is [I][D][T].
BaseFloat tau_map_M
For MAP update of the phonetic subspace M.
const MleAmSgmm2Accs & accs_
Configuration variables needed in the SGMM estimation process.
Class for the accumulators associated with the phonetic-subspace model parameters.
Vector< double > gamma_s_
gamma_{i}^{(s)}. Per-speaker counts for each Gaussian. Dimension is [I]
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
Vector< double > gamma_s_
gamma_{i}^{(s)}.
MleAmSgmm2Accs(const AmSgmm2 &model, SgmmUpdateFlagsType flags, bool have_spk_vecs, BaseFloat rand_prune=1.0e-05)