24 #ifndef KALDI_SGMM2_AM_SGMM2_H_    25 #define KALDI_SGMM2_AM_SGMM2_H_   103     opts->
Register(
"split-substates", &split_substates, 
"Increase number of "   104                    "substates to this overall target.");
   105     opts->
Register(
"max-cond-split", &max_cond, 
"Max condition number of smoothing "   106                    "matrix used in substate splitting.");
   107     opts->
Register(
"perturb-factor", &perturb_factor, 
"Perturbation factor for "   108                    "state vectors while splitting substates.");
   109     opts->
Register(
"power", &power, 
"Exponent for substate occupancies used while "   110                    "splitting substates.");
   111     opts->
Register(
"min-count", &min_count, 
"Minimum allowed count, used in allocating "   112                    "sub-states to state in mixture splitting.");
   130     opts->
Register(
"full-gmm-nbest", &full_gmm_nbest, 
"Number of highest-scoring"   131                    " full-covariance Gaussians selected per frame.");
   132     opts->
Register(
"diag-gmm-nbest", &diag_gmm_nbest, 
"Number of highest-scoring"   133                    " diagonal-covariance Gaussians selected per frame.");
   153     if (xt.
Dim() != feat_dim) xt.
Resize(feat_dim);
   155       xti.
Resize(ngauss, feat_dim);
   157       zti.
Resize(ngauss, phn_dim);
   158     if (nti.
Dim() != ngauss)
   175   bool Empty() { 
return v_s.Dim() == 0; }
   181     v_s.Resize(v_s_in.
Dim());
   182     v_s.CopyFromVec(v_s_in);
   203       substate_cache(num_groups), pdf_cache(num_pdfs), t(1) { }
   234   void Read(std::istream &is, 
bool binary);
   235   void Write(std::ostream &os, 
bool binary,
   241   void Check(
bool show_properties = 
true);
   247   void InitializeFromFullGmm(
const FullGmm &gmm,
   248                              const std::vector<int32> &pdf2group,
   249                              int32 phn_subspace_dim,
   250                              int32 spk_subspace_dim,
   251                              bool speaker_dependent_weights,
   259   void CopyGlobalsInitVecs(
const AmSgmm2 &other,
   260                            const std::vector<int32> &pdf2group,
   264   void CopyFromSgmm2(
const AmSgmm2 &other,
   265                     bool copy_normalizers,
   274                               std::vector<int32> *gselect) 
const;
   279                            const std::vector<int32> &gselect,
   317   void IncreasePhoneSpaceDim(
int32 target_dim,
   324   void IncreaseSpkSpaceDim(
int32 target_dim,
   326                            bool speaker_dependent_weights);
   332   void ComputeDerivedVars();
   336   void ComputeNormalizers();
   340   void ComputeWeights();
   358     KALDI_ASSERT(j1 < NumGroups()); 
return v_[j1].NumRows();
   383   template<
typename Real>
   386   template<
typename Real>
   390   template<
typename Real>
   391   void GetNtransSigmaInv(std::vector< 
Matrix<Real> > *out) 
const;
   393   template<
typename Real>
   398   template<
typename Real>
   399   void GetVarScaledSubstateSpeakerMean(
int32 j1, 
int32 substate,
   425   std::vector< Matrix<BaseFloat> > 
M_;
   427   std::vector< Matrix<BaseFloat> > 
N_;
   436   std::vector< Matrix<BaseFloat> > 
v_;
   438   std::vector< Vector<BaseFloat> > 
c_;
   440   std::vector< Matrix<BaseFloat> > 
n_;
   466   void ComputeNormalizersInternal(
int32 num_threads, 
int32 thread,
   467                                   int32 *entropy_count, 
double *entropy_sum);
   480   void InitializeMw(
int32 phn_subspace_dim,
   483   void InitializeNu(
int32 spk_subspace_dim,                    
   485                     bool speaker_dependent_weights);
   486   void InitializeVecsAndSubstateWeights(
BaseFloat self_weight);
   487   void InitializeCovars();  
   489   void ComputeHsmFromModel(
   495   void ComputePdfMappings(); 
   506   friend class AmSgmm2Functions;  
   507   friend class Sgmm2Feature;
   510 template<
typename Real>
   518 template<
typename Real>
   522   KALDI_ASSERT(j1 < NumGroups() && m < NumSubstatesForGroup(j1)
   526   mean_tmp.AddMatVec(1.0, M_[i], 
kNoTrans, v_[j1].Row(m), 0.0);
   531 template<
typename Real>
   535   GetSubstateMean(j1, m, i, mean_out);
   536   if (spk.
v_s.Dim() != 0)  
   540 template<
typename Real>
   545   GetSubstateSpeakerMean(j1, m, i, spk, &tmp_mean);
   546   tmp_mean2.AddSpVec(1.0, SigmaInv_[i], tmp_mean, 0.0);
   574   void Write(std::ostream &os, 
bool binary) 
const;
   575   void Read(std::istream &is, 
bool binary);
   586 #endif  // KALDI_SGMM2_AM_SGMM2_H_ 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
uint16 SgmmWriteFlagsType
Bitwise OR of the above flags. 
 
Matrix< BaseFloat > u_
[SSGMM] Speaker-subspace weight projection vectors. Dimension is [I][T] 
 
Class for definition of the subspace Gmm acoustic model. 
 
Packed symetric matrix class. 
 
std::vector< int32 > pdf2group_
 
KaldiObjectHolder works for Kaldi objects that have the "standard" Read and Write functions...
 
Vector< BaseFloat > xt
x'(t), FMLLR-adapted, dim = [D], eq.(33) 
 
void GetSubstateMean(int32 j1, int32 m, int32 i, VectorBase< Real > *mean_out) const
 
bool HasSpeakerSpace() const
 
std::vector< Vector< BaseFloat > > c_
c_{jm}, mixture weights. Dimension is [J2][#mix] 
 
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix). 
 
Matrix< BaseFloat > w_
Phonetic-subspace weight projection vectors. Dimension is [I][S]. 
 
Vector< BaseFloat > v_s
Speaker adaptation vector v_^{(s)}. Dim is [T]. 
 
Definition for Gaussian Mixture Model with full covariances. 
 
Class for the accumulators required to update the speaker vectors v_s. 
 
const Vector< BaseFloat > & GetSpeakerVector()
 
void GetVarScaledSubstateSpeakerMean(int32 j1, int32 substate, int32 gauss, const Sgmm2PerSpkDerivedVars &spk, VectorBase< Real > *mean_out) const
 
KaldiObjectHolder< Sgmm2GauPost > Sgmm2GauPostHolder
 
void GetInvCovars(int32 gauss_index, SpMatrix< Real > *out) const
Templated accessors (used to accumulate in different precision) 
 
A templated class for writing objects to an archive or script file; see The Table concept...
 
DiagGmm diag_ubm_
These contain the "background" model associated with the subspace GMM. 
 
std::vector< Matrix< BaseFloat > > posteriors
 
std::vector< Matrix< BaseFloat > > n_
n_{jim}, per-Gaussian normalizer. Dimension is [J1][I][#mix] 
 
std::vector< Matrix< BaseFloat > > N_
Speaker-subspace projections. Dimension is [I][D][T]. 
 
SpMatrix< BaseFloat > col_cov_inv_
 
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero). 
 
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
 
BaseFloat remaining_log_like
 
const FullGmm & full_ubm() const
Accessors. 
 
int32 PhoneSpaceDim() const
 
std::vector< Matrix< BaseFloat > > v_
The parameters in a particular SGMM state. 
 
void CopyFromSp(const SpMatrix< Real > &other)
 
Sgmm2LikelihoodCache(int32 num_groups, int32 num_pdfs)
 
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
 
Allows random access to a collection of objects in an archive or script file; see The Table concept...
 
Matrix< BaseFloat > zti
z_{i}(t), dim = [I][S], eq.(35) 
 
std::vector< Matrix< BaseFloat > > M_
Phonetic-subspace projections. Dimension is [I][D][S]. 
 
std::vector< int32 > tids
 
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size). 
 
std::vector< Vector< BaseFloat > > log_d_jms
< [SSGMM] log of the above (more efficient to store both). 
 
SequentialTableReader< Sgmm2GauPostHolder > SequentialSgmm2GauPostReader
 
int32 NumSubstatesForPdf(int32 j2) const
 
void GetSubstateSpeakerMean(int32 j1, int32 substate, int32 gauss, const Sgmm2PerSpkDerivedVars &spk, VectorBase< Real > *mean_out) const
 
Matrix< BaseFloat > xti
x_{i}(t) = x'(t) - o_i(s): dim = [I][D], eq.(34) 
 
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const]. 
 
std::vector< SpMatrix< BaseFloat > > SigmaInv_
Globally shared parameters of the subspace GMM. 
 
Vector< BaseFloat > likes
 
std::vector< Matrix< BaseFloat > > M_prior_
 
Vector< BaseFloat > nti
n_{i}(t), dim = [I], eq. 
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
std::vector< PdfCacheElement > pdf_cache
 
This is the entry for a single time. 
 
int32 NumPdfs() const
Various model dimensions. 
 
int32 full_gmm_nbest
Number of highest-scoring full-covariance Gaussians per frame. 
 
void Resize(int32 ngauss, int32 feat_dim, int32 phn_dim)
 
MatrixIndexT Dim() const
Returns the dimension of the vector. 
 
std::vector< int32 > gselect
 
const DiagGmm & diag_ubm() const
 
SpMatrix< BaseFloat > row_cov_inv_
 
TableWriter< Sgmm2GauPostHolder > Sgmm2GauPostWriter
 
std::vector< int32 > gselect
 
int32 diag_gmm_nbest
Number of highest-scoring diagonal-covariance Gaussians per frame. 
 
std::vector< std::vector< int32 > > group2pdf_
 
A class representing a vector. 
 
#define KALDI_ASSERT(cond)
 
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix). 
 
void RemoveSpeakerSpace()
 
Definition for Gaussian Mixture Model with diagonal covariances. 
 
Sgmm2LikelihoodCache caches SGMM likelihoods at two levels: the final pdf likelihoods, and the sub-state level likelihoods, which means that with the SCTM system we can avoid redundant computation. 
 
std::vector< SubstateCacheElement > substate_cache
 
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero). 
 
void SetSpeakerVector(const Vector< BaseFloat > &v_s_in)
 
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
 
bool HasSpeakerDependentWeights() const
True if doing SSGMM. 
 
Provides a vector abstraction class. 
 
void Register(OptionsItf *opts)
 
Class for the accumulators associated with the phonetic-subspace model parameters. 
 
int32 SpkSpaceDim() const
 
void ComputeFeatureNormalizingTransform(const FullGmm &gmm, Matrix< BaseFloat > *xform)
Computes the inverse of an LDA transform (without dimensionality reduction) The computed transform is...
 
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
 
Vector< BaseFloat > log_b_is
< [SSGMM]: Eq. (22) in techreport, b_i^{(s)} = (^T ^{(s)}) 
 
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
 
int32 NumSubstatesForGroup(int32 j1) const
 
void Register(OptionsItf *opts)
 
RandomAccessTableReader< Sgmm2GauPostHolder > RandomAccessSgmm2GauPostReader
 
std::vector< Matrix< BaseFloat > > w_jmi_
[SSGMM] w_{jmi}, dimension is [J1][#mix][I]. Computed from w_ and v_. 
 
Sgmm2SplitSubstatesConfig()
 
Matrix< BaseFloat > o_s
Per-speaker offsets o_{i}. Dimension is [I][D].