22 #ifndef KALDI_SGMM2_FMLLR_SGMM2_H_ 23 #define KALDI_SGMM2_FMLLR_SGMM2_H_ 59 fmllr_min_count_basis = 100.0;
60 fmllr_min_count = 1000.0;
61 fmllr_min_count_full = 5000.0;
63 bases_occ_scale = 0.2;
70 std::string module =
"Sgmm2FmllrConfig: ";
72 "Number of iterations in FMLLR estimation.");
74 "Number of iterations to find optimal FMLLR step size.");
76 "Minimum occupancy count to estimate FMLLR using basis matrices.");
78 "Minimum occupancy count to estimate FMLLR (without bases).");
80 "Minimum occupancy count to stop using basis matrices for FMLLR.");
82 "Number of FMLLR basis matrices.");
84 "Scale per-speaker count to determine number of CMLLR bases.");
94 void Write(std::ostream &out_stream,
bool binary)
const;
95 void Read(std::istream &in_stream,
bool binary);
97 return (pre_xform_.NumRows() == 0 || inv_xform_.NumRows() == 0 ||
98 mean_scatter_.Dim() == 0);
100 bool HasBasis()
const {
return fmllr_bases_.size() != 0; }
130 void Write(std::ostream &out_stream,
bool binary)
const;
131 void Read(std::istream &in_stream,
bool binary,
bool add);
145 void AccumulateFromPosteriors(
const AmSgmm2 &sgmm,
148 const std::vector<int32> &gauss_select,
152 void AccumulateForFmllrSubspace(
const AmSgmm2 &sgmm,
165 bool Update(
const AmSgmm2 &model,
189 double min_eig = 0.0);
193 #endif // KALDI_SGMM2_FMLLR_SGMM2_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Class for definition of the subspace Gmm acoustic model.
Class for computing the accumulators needed for the maximum-likelihood estimate of FMLLR transforms f...
Matrix< BaseFloat > pre_xform_
Pre-transform matrix. Dim is [D][D+1].
BaseFloat fmllr_min_count_full
Minimum occupancy count to stop using FMLLR bases and switch to regular FMLLR estimation.
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
void Register(OptionsItf *opts)
void EstimateSgmm2FmllrSubspace(const SpMatrix< double > &fmllr_grad_scatter, int32 num_fmllr_bases, int32 feat_dim, Sgmm2FmllrGlobalParams *globals, double min_eig)
Computes the fMLLR basis matrices given the scatter of the vectorized gradients (eq: B...
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
BaseFloat bases_occ_scale
Scale per-speaker count to determine number of CMLLR bases.
int32 Dim() const
Accessors.
int32 fmllr_iters
Number of iterations in FMLLR estimation.
Matrix< BaseFloat > inv_xform_
Inverse of pre-transform. Dim is [D][D+1].
void Init(const AmSgmm2 &sgmm, const Vector< BaseFloat > &state_occs)
Configuration variables needed in the estimation of FMLLR for SGMMs.
int32 step_iters
Iterations to find optimal FMLLR step size.
int32 num_fmllr_bases
Number of basis matrices to use for FMLLR estimation.
A class representing a vector.
BaseFloat fmllr_min_count_basis
Minimum occupancy count to estimate FMLLR using basis matrices.
const AffineXformStats & stats() const
int32 dim_
Dimension of feature vectors.
Global adaptation parameters.
Provides a vector abstraction class.
Vector< BaseFloat > mean_scatter_
Diagonal of mean-scatter matrix. Dim is [D].
std::vector< Matrix< BaseFloat > > fmllr_bases_
{W}_b. [b][d][d], dim is [B][D][D+1].
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
AffineXformStats stats_
Accumulated stats.
BaseFloat fmllr_min_count
Minimum occupancy count to estimate FMLLR without basis matrices.
void ComputeFmllrPreXform(const Vector< BaseFloat > &pdf_occs, Matrix< BaseFloat > *xform, Matrix< BaseFloat > *inv_xform, Vector< BaseFloat > *diag_mean_scatter) const
Computes the LDA-like pre-transform and its inverse as well as the eigenvalues of the scatter of the ...