32 if (num_xforms == 0) {
44 vector< Matrix<BaseFloat> >::iterator xform_itr =
xform_matrices_.begin(),
46 for (; xform_itr != xform_itr_end; ++xform_itr) {
47 xform_itr->Resize(dim, dim+1);
56 vector< Matrix<BaseFloat> >::iterator xform_itr =
xform_matrices_.begin(),
58 for (; xform_itr != xform_itr_end; ++xform_itr) {
65 KALDI_ERR <<
"Do not call Validate() with an uninitialized object (dim = " 71 <<
", number of log-determinant terms = " << (
logdet_.Dim())
72 <<
". Expected number = 0";
81 <<
", number of log-determinant terms = " << (
logdet_.Dim())
87 KALDI_ERR <<
"For transform " << (
i) <<
": inconsistent size: rows = " 95 KALDI_ERR <<
"For baseclass " << (
i) <<
", transform index " 101 if (num_xforms_ > 1) {
102 KALDI_WARN <<
"Multiple FMLLR transforms found without baseclass info.";
124 KALDI_WARN <<
"Asked to apply empty feature transform. Copying instead.";
126 (*out)[0].Resize(in.
Dim());
127 (*out)[0].CopyFromVec(in);
135 extended_feat.Range(0,
dim_).CopyFromVec(in);
136 extended_feat(
dim_) = 1.0;
141 (*out)[xform_index].Resize(
dim_);
155 vector< Matrix<BaseFloat> >::const_iterator xform_itr =
157 for (; xform_itr != xform_itr_end; ++xform_itr) {
159 xform_itr->Write(out, binary);
177 vector< Matrix<BaseFloat> >::iterator xform_itr =
xform_matrices_.begin(),
179 for (; xform_itr != xform_itr_end; ++xform_itr) {
181 xform_itr->Read(in, binary);
182 KALDI_ASSERT(xform_itr->NumRows() == (xform_itr->NumCols() - 1)
183 && xform_itr->NumRows() ==
dim_);
198 if (num_bclass == 0) {
200 baseclass_stats_.clear();
201 num_baseclasses_ = 0;
205 num_baseclasses_ = num_bclass;
208 baseclass_stats_.resize(num_bclass);
209 for (vector<AffineXformStats*>::iterator it = baseclass_stats_.begin(),
210 end = baseclass_stats_.end(); it != end; ++it) {
212 (*it)->Init(dim, dim);
218 for (vector<AffineXformStats*>::iterator it = baseclass_stats_.begin(),
219 end = baseclass_stats_.end(); it != end; ++it) {
231 posterior.
Scale(weight);
235 extended_data.
Range(0,
dim_).CopyFromVec(data);
236 extended_data(
dim_) = 1.0;
238 scatter.
AddVec2(1.0, extended_data);
242 for (
int32 m = 0; m < num_comp; m++) {
246 baseclass_stats_[bclass]->beta_ += posterior_d(m);
247 baseclass_stats_[bclass]->K_.AddVecVec(posterior_d(m), inv_var_mean,
250 g_scale(bclass,
d) += posterior(m) * pdf.
inv_vars()(m,
d);
252 for (
size_t bclass = 0; bclass < baseclass_stats_.size(); bclass++) {
253 vector< SpMatrix<double> > &G = baseclass_stats_[bclass]->G_;
255 if (g_scale(bclass,
d) != 0.0)
256 G[
d].
AddSp(g_scale(bclass,
d), scatter);
266 size_t dim =
static_cast<size_t>(
dim_);
268 extended_data.
Range(0, dim).CopyFromVec(data);
269 extended_data(dim) = 1.0;
271 scatter.
AddVec2(1.0, extended_data);
272 double weight_d =
static_cast<double>(weight);
278 baseclass_stats_[bclass]->beta_ += weight_d;
279 baseclass_stats_[bclass]->K_.AddVecVec(weight_d, inv_var_mean, extended_data);
280 vector< SpMatrix<double> > &G = baseclass_stats_[bclass]->G_;
281 for (
size_t d = 0;
d < dim;
d++)
282 G[
d].AddSp((weight_d * pdf.
inv_vars()(gauss_index,
d)), scatter);
292 vector<AffineXformStats*>::const_iterator itr = baseclass_stats_.begin(),
293 end = baseclass_stats_.end();
294 for ( ; itr != end; ++itr)
295 (*itr)->Write(out, binary);
306 baseclass_stats_.resize(num_baseclasses_);
308 vector<AffineXformStats*>::iterator itr = baseclass_stats_.begin(),
309 end = baseclass_stats_.end();
310 for ( ; itr != end; ++itr) {
313 (*itr)->Read(in, binary, add);
324 BaseFloat tot_auxf_impr = 0.0, tot_t = 0.0;
327 vector<AffineXformStats*> regclass_stats;
328 vector<int32> base2regclass;
330 &base2regclass, ®class_stats);
334 out_fmllr->
Init(regclass_stats.size(),
dim_);
335 size_t num_rclass = regclass_stats.size();
336 for (
size_t rclass_index = 0;
337 rclass_index < num_rclass; ++rclass_index) {
340 tot_t += regclass_stats[rclass_index]->beta_;
348 KALDI_LOG <<
"Estimated " << num_rclass <<
" regression classes.";
355 for (
int32 bclass_index = 0; bclass_index < num_baseclasses_;
357 tot_t += baseclass_stats_[bclass_index]->beta_;
360 out_fmllr->
Init(num_baseclasses_,
dim_);
361 vector<int32> base2regclass(num_baseclasses_);
362 for (
int32 bclass_index = 0; bclass_index < num_baseclasses_;
364 if (baseclass_stats_[bclass_index]->beta_ >= opts.
min_count) {
370 *(baseclass_stats_[bclass_index]),
375 *(baseclass_stats_[bclass_index]),
380 *(baseclass_stats_[bclass_index]),
386 <<
", fmllr-update-type must be one of \"full\"|\"diag\"|\"offset\"|\"none\"";
389 base2regclass[bclass_index] = bclass_index;
391 KALDI_WARN <<
"For baseclass " << (bclass_index) <<
" count = " 392 << (baseclass_stats_[bclass_index]->beta_) <<
" < " 393 << opts.
min_count <<
": not updating FMLLR";
394 base2regclass[bclass_index] = -1;
399 if (auxf_impr_out) *auxf_impr_out = tot_auxf_impr;
400 if (tot_t_out) *tot_t_out = tot_t;
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void AccumulateForGaussian(const RegressionTree ®tree, const AmDiagGmm &am, const VectorBase< BaseFloat > &data, size_t pdf_index, size_t gauss_index, BaseFloat weight)
Accumulate stats for a single Gaussian component in the model.
int32 Gauss2BaseclassId(size_t pdf_id, size_t gauss_id) const
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
void set_bclass2xforms(const std::vector< int32 > &in)
BaseFloat ComputeFmllrMatrixDiagGmmFull(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, int32 num_iters, MatrixBase< BaseFloat > *out_xform)
Updates the FMLLR matrix using Mark Gales' row-by-row update.
void Read(std::istream &in_stream, bool binary)
const Matrix< BaseFloat > & means_invvars() const
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
void ComputeLogDets()
Computes the log-determinant of the Jacobians for each transform.
void Init(size_t num_xforms, size_t dim)
Allocates memory for transform matrix & bias vector.
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
std::vector< int32 > bclass2xforms_
For each baseclass index of which transform to use; -1 => no xform.
void CopyRowFromMat(const MatrixBase< Real > &M, MatrixIndexT row)
Extracts a row of the matrix M.
bool GatherStats(const std::vector< AffineXformStats *> &stats_in, double min_count, std::vector< int32 > *regclasses_out, std::vector< AffineXformStats *> *stats_out) const
Parses the regression tree and finds the nodes whose occupancies (read from stats_in) are greater tha...
An FMLLR (feature-space MLLR) transformation, also called CMLLR (constrained MLLR) is an affine trans...
BaseFloat ComputeFmllrMatrixDiagGmmOffset(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *out_xform)
This does offset-only fMLLR, i.e. it only estimates an offset.
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v'
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
Configuration variables for FMLLR transforms.
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
A regression tree is a clustering of Gaussian densities in an acoustic model, such that the group of ...
void TransformFeature(const VectorBase< BaseFloat > &in, std::vector< Vector< BaseFloat > > *out) const
Get the transformed features for each of the transforms.
void Validate()
Checks whether the various parameters are consistent.
BaseFloat min_count
Minimum occupancy for computing a transform.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
bool use_regtree
If 'true', find transforms to generate using regression tree.
BaseFloat ComputeFmllrMatrixDiagGmmDiagonal(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *out_xform)
This does diagonal fMLLR (i.e.
int32 num_xforms_
Number of transform matrices.
int32 num_iters
Number of iterations (if using an iterative update)
DiagGmm & GetPdf(int32 pdf_index)
Accessors.
std::vector< Matrix< BaseFloat > > xform_matrices_
Transform matrices.
void SetParameters(const MatrixBase< BaseFloat > &mat, size_t regclass)
Mutators.
A class representing a vector.
#define KALDI_ASSERT(cond)
void SetUnit()
Sets transform matrix to identity and bias vector to zero.
Real LogDet(Real *det_sign=NULL) const
Returns logdet of matrix.
void AddSp(const Real alpha, const SpMatrix< OtherReal > &S)
*this += alpha * S
int32 dim_
Dimension of feature vectors.
Definition for Gaussian Mixture Model with diagonal covariances.
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Vector< BaseFloat > logdet_
Log-determinants of the Jacobians.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
void Write(std::ostream &out_stream, bool binary) const
void Read(std::istream &in_stream, bool binary, bool add)
BaseFloat AccumulateForGmm(const RegressionTree ®tree, const AmDiagGmm &am, const VectorBase< BaseFloat > &data, size_t pdf_index, BaseFloat weight)
Accumulate stats for a single GMM in the model; returns log likelihood.
std::string update_type
"full", "diag", "offset", "none"
void Update(const RegressionTree ®tree, const RegtreeFmllrOptions &opts, RegtreeFmllrDiagGmm *out_fmllr, BaseFloat *auxf_impr, BaseFloat *tot_t) const
Provides a vector abstraction class.
void Write(std::ostream &out_stream, bool binary) const
void Init(size_t num_bclass, size_t dim)
Sub-matrix representation.
bool valid_logdet_
Whether logdets are for current transforms.
const Matrix< BaseFloat > & inv_vars() const
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).