22 #ifndef KALDI_IVECTOR_IVECTOR_EXTRACTOR_H_ 23 #define KALDI_IVECTOR_IVECTOR_EXTRACTOR_H_ 58 opts->
Register(
"acoustic-weight", &acoustic_weight,
59 "Weight on part of auxf that involves the data (e.g. 0.2); " 60 "if this weight is small, the prior will have more effect.");
61 opts->
Register(
"max-count", &max_count,
62 "Maximum frame count (affects prior scaling): if >0, the prior " 63 "term will be scaled up after the frame count exceeds this " 64 "value. Note that this count is considered after posterior " 65 "scaling (e.g. --acoustic-weight option, or scale argument to " 66 "scale-post), so you would normally use a cutoff 10 times " 67 "smaller than the corresponding number of frames.");
82 bool need_2nd_order_stats):
83 gamma_(num_gauss), X_(num_gauss, feat_dim) {
84 if (need_2nd_order_stats) {
86 for (
int32 i = 0;
i < num_gauss;
i++)
87 S_[
i].Resize(feat_dim);
94 void Scale(
double scale);
103 std::vector<SpMatrix<double> >
S_;
113 use_weights(true) { }
115 opts->
Register(
"num-iters", &num_iters,
"Number of iterations in " 116 "iVector estimation (>1 needed due to weights)");
117 opts->
Register(
"ivector-dim", &ivector_dim,
"Dimension of iVector");
118 opts->
Register(
"use-weights", &use_weights,
"If true, regress the " 119 "log-weights on the iVector");
151 void GetIvectorDistribution(
184 double GetAcousticAuxfVariance(
189 double GetAcousticAuxfMean(
196 double GetAcousticAuxfGconst(
202 double GetAcousticAuxfWeight(
213 void GetIvectorDistMean(
221 void GetIvectorDistPrior(
232 void GetIvectorDistWeight(
242 int32 FeatDim()
const;
243 int32 IvectorDim()
const;
244 int32 NumGauss()
const;
246 void Write(std::ostream &os,
bool binary)
const;
247 void Read(std::istream &is,
bool binary);
252 void ComputeDerivedVars();
253 void ComputeDerivedVars(
int32 i);
260 double new_prior_offset);
276 std::vector<Matrix<double> >
M_;
329 const std::vector<std::pair<int32, BaseFloat> > &gauss_post);
334 const std::vector<std::vector<std::pair<int32, BaseFloat> > > &gauss_post);
352 void GetIvector(
int32 num_cg_iters,
365 double Count()
const {
return num_frames_; }
370 void Scale(
double scale);
372 void Write(std::ostream &os,
bool binary)
const;
373 void Read(std::istream &is,
bool binary);
391 double DefaultObjf()
const;
420 int32 ivector_period,
436 num_samples_for_weights(10),
439 opts->
Register(
"update-variances", &update_variances,
"If true, update the " 440 "Gaussian variances");
441 opts->
Register(
"compute-auxf", &compute_auxf,
"If true, compute the " 442 "auxiliary functions on training data; can be used to " 443 "debug and check convergence.");
444 opts->
Register(
"num-samples-for-weights", &num_samples_for_weights,
445 "Number of samples from iVector distribution to use " 446 "for accumulating stats for weight update. Must be >1");
447 opts->
Register(
"cache-size", &cache_size,
"Size of an internal " 448 "cache (not critical, only affects speed/memory)");
460 gaussian_min_count(100.0),
461 diagonalize(true) { }
463 opts->
Register(
"variance-floor-factor", &variance_floor_factor,
464 "Factor that determines variance flooring (we floor each covar " 465 "to this times global average covariance");
466 opts->
Register(
"gaussian-min-count", &gaussian_min_count,
467 "Minimum total count per Gaussian, below which we refuse to " 468 "update any associated parameters.");
469 opts->
Register(
"diagonalize", &diagonalize,
470 "If true, diagonalize the quadratic term in the " 471 "objective function. This reorders the ivector dimensions " 472 "from most to least important.");
504 void Read(std::istream &is,
bool binary,
bool add =
false);
506 void Write(std::ostream &os,
bool binary);
509 void Write(std::ostream &os,
bool binary)
const;
598 double PriorDiagnostics(
double old_prior_offset)
const;
620 std::vector<Matrix<double> >
Y_;
661 std::vector< SpMatrix<double> >
S_;
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
IvectorEstimationOptions()
void Register(OptionsItf *opts)
Base class which provides matrix operations not involving resizing or allocation. ...
Definition for Gaussian Mixture Model with full covariances.
This class helps us to efficiently estimate iVectors in situations where the data is coming in frame ...
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
double PriorOffset() const
double EstimateIvectorsOnline(const Matrix< BaseFloat > &feats, const Posterior &post, const IvectorExtractor &extractor, int32 ivector_period, int32 num_cg_iters, BaseFloat max_count, Matrix< BaseFloat > *ivectors)
void TransformIvectors(const Matrix< BaseFloat > &ivectors_in, const PldaConfig &plda_config, const Plda &plda, Matrix< BaseFloat > *ivectors_out)
SpMatrix< double > quadratic_term_
Vector< double > linear_term_
void AccStatsForUtterance(const TransitionModel &trans_model, const AmDiagGmm &am_gmm, const GaussPost &gpost, const Matrix< BaseFloat > &feats, FmllrRawAccs *accs)
Provides a vector abstraction class.
Sub-matrix representation.
OnlineIvectorEstimationStats & operator=(const OnlineIvectorEstimationStats &other)