mle-full-gmm-test.cc File Reference
#include "gmm/full-gmm.h"
#include "gmm/diag-gmm.h"
#include "gmm/model-common.h"
#include "gmm/mle-full-gmm.h"
#include "gmm/mle-diag-gmm.h"
#include "util/stl-utils.h"
#include "util/kaldi-io.h"
Include dependency graph for mle-full-gmm-test.cc:

Go to the source code of this file.

Functions

void TestComponentAcc (const FullGmm &gmm, const Matrix< BaseFloat > &feats)
 
void rand_posdef_spmatrix (size_t dim, SpMatrix< BaseFloat > *matrix, TpMatrix< BaseFloat > *matrix_sqrt=NULL, BaseFloat *logdet=NULL)
 
BaseFloat GetLogLikeTest (const FullGmm &gmm, const VectorBase< BaseFloat > &feats, bool print_eigs)
 
void test_flags_driven_update (const FullGmm &gmm, const Matrix< BaseFloat > &feats, GmmFlagsType flags)
 
void test_io (const FullGmm &gmm, const AccumFullGmm &est_gmm, bool binary, const Matrix< BaseFloat > &feats)
 
void UnitTestEstimateFullGmm ()
 
int main ()
 

Function Documentation

◆ GetLogLikeTest()

BaseFloat GetLogLikeTest ( const FullGmm gmm,
const VectorBase< BaseFloat > &  feats,
bool  print_eigs 
)

Definition at line 113 of file mle-full-gmm-test.cc.

References VectorBase< Real >::AddVec(), FullGmm::Dim(), FullGmm::GetMeans(), rnnlm::i, FullGmm::inv_covars(), SpMatrix< Real >::Invert(), kaldi::Log(), kaldi::LogAdd(), M_LOG_2PI, FullGmm::NumGauss(), PackedMatrix< Real >::NumRows(), MatrixBase< Real >::Row(), SpMatrix< Real >::SymPosSemiDefEig(), kaldi::VecSpVec(), and FullGmm::weights().

Referenced by UnitTestEstimateFullGmm().

115  {
116  BaseFloat log_like_sum = -1.0e+10;
117  Matrix<BaseFloat> means;
118  gmm.GetMeans(&means);
119  const std::vector<SpMatrix<BaseFloat> > inv_covars = gmm.inv_covars();
120 
121  if (print_eigs)
122  for (size_t i = 0; i < inv_covars.size(); i++) {
123  SpMatrix<BaseFloat> cov(inv_covars[i]);
124  size_t dim = cov.NumRows();
125  cov.Invert();
126  std::cout << i << "'th component eigs are: ";
127  Vector<BaseFloat> s(dim);
128  Matrix<BaseFloat> P(dim, dim);
129  cov.SymPosSemiDefEig(&s, &P);
130  std::cout << s;
131  }
132 
133  for (int32 i = 0; i < gmm.NumGauss(); i++) {
134  BaseFloat logdet = -(inv_covars[i].LogPosDefDet());
135  BaseFloat log_like = Log(gmm.weights()(i))
136  -0.5 * (gmm.Dim() * M_LOG_2PI + logdet);
137  Vector<BaseFloat> offset(feats);
138  offset.AddVec(-1.0, means.Row(i));
139  log_like -= 0.5 * VecSpVec(offset, inv_covars[i], offset);
140  log_like_sum = LogAdd(log_like_sum, log_like);
141  }
142  return log_like_sum;
143 }
Packed symetric matrix class.
Definition: matrix-common.h:62
const std::vector< SpMatrix< BaseFloat > > & inv_covars() const
Definition: full-gmm.h:146
#define M_LOG_2PI
Definition: kaldi-math.h:60
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: full-gmm.h:60
kaldi::int32 int32
float BaseFloat
Definition: kaldi-types.h:29
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
double Log(double x)
Definition: kaldi-math.h:100
Real VecSpVec(const VectorBase< Real > &v1, const SpMatrix< Real > &M, const VectorBase< Real > &v2)
Computes v1^T * M * v2.
Definition: sp-matrix.cc:964
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: full-gmm.h:58
const Vector< BaseFloat > & weights() const
Definition: full-gmm.h:144
double LogAdd(double x, double y)
Definition: kaldi-math.h:184
void GetMeans(Matrix< Real > *m) const
Accessor for means.
Definition: full-gmm-inl.h:118
A class representing a vector.
Definition: kaldi-vector.h:406

◆ main()

int main ( )

Definition at line 478 of file mle-full-gmm-test.cc.

References rnnlm::i, and UnitTestEstimateFullGmm().

478  {
479  // repeat the test five times
480  for (int i = 0; i < 2; i++)
482  std::cout << "Test OK.\n";
483 }
void UnitTestEstimateFullGmm()

◆ rand_posdef_spmatrix()

void rand_posdef_spmatrix ( size_t  dim,
SpMatrix< BaseFloat > *  matrix,
TpMatrix< BaseFloat > *  matrix_sqrt = NULL,
BaseFloat *  logdet = NULL 
)

Definition at line 90 of file mle-full-gmm-test.cc.

References SpMatrix< Real >::AddMat2(), TpMatrix< Real >::Cholesky(), MatrixBase< Real >::Cond(), kaldi::kNoTrans, SpMatrix< Real >::LogPosDefDet(), and MatrixBase< Real >::SetRandn().

Referenced by UnitTestEstimateFullGmm().

92  {
93  // generate random (non-singular) matrix
94  Matrix<BaseFloat> tmp(dim, dim);
95  while (1) {
96  tmp.SetRandn();
97  if (tmp.Cond() < 100) break;
98  std::cout << "Condition number of random matrix large "
99  << static_cast<float>(tmp.Cond()) << ", trying again (this is normal)"
100  << '\n';
101  }
102  // tmp * tmp^T will give positive definite matrix
103  matrix->AddMat2(1.0, tmp, kNoTrans, 0.0);
104 
105  if (matrix_sqrt != NULL) matrix_sqrt->Cholesky(*matrix);
106  if (logdet != NULL) *logdet = matrix->LogPosDefDet();
107  if ((matrix_sqrt == NULL) && (logdet == NULL)) {
108  TpMatrix<BaseFloat> sqrt(dim);
109  sqrt.Cholesky(*matrix);
110  }
111 }
void AddMat2(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const Real beta)
rank-N update: if (transM == kNoTrans) (*this) = beta*(*this) + alpha * M * M^T, or (if transM == kTr...
Definition: sp-matrix.cc:1110
void Cholesky(const SpMatrix< Real > &orig)
Definition: tp-matrix.cc:88
Real LogPosDefDet() const
Computes log determinant but only for +ve-def matrices (it uses Cholesky).
Definition: sp-matrix.cc:36
Packed symetric matrix class.
Definition: matrix-common.h:63

◆ test_flags_driven_update()

void test_flags_driven_update ( const FullGmm gmm,
const Matrix< BaseFloat > &  feats,
GmmFlagsType  flags 
)

Definition at line 145 of file mle-full-gmm-test.cc.

References AccumFullGmm::AccumulateFromFull(), kaldi::AssertEqual(), FullGmm::ComputeGconsts(), FullGmm::CopyFromFullGmm(), FullGmm::Dim(), FullGmm::GetCovars(), FullGmm::GetMeans(), rnnlm::i, KALDI_LOG, KALDI_WARN, kaldi::kGmmAll, kaldi::kGmmMeans, kaldi::kGmmVariances, kaldi::kGmmWeights, FullGmm::LogLikelihood(), kaldi::MleFullGmmUpdate(), FullGmm::NumGauss(), MatrixBase< Real >::NumRows(), AccumFullGmm::Resize(), MatrixBase< Real >::Row(), FullGmm::SetInvCovars(), FullGmm::SetMeans(), FullGmm::SetWeights(), AccumFullGmm::SetZero(), and FullGmm::weights().

Referenced by UnitTestEstimateFullGmm().

147  {
148  MleFullGmmOptions config;
149  AccumFullGmm est_gmm_allp; // updates all params
150  // let's trust that all-params update works
151  AccumFullGmm est_gmm_somep; // updates params indicated by flags
152 
153  // warm-up estimators
154  est_gmm_allp.Resize(gmm.NumGauss(), gmm.Dim(), kGmmAll);
155  est_gmm_allp.SetZero(kGmmAll);
156 
157  est_gmm_somep.Resize(gmm.NumGauss(), gmm.Dim(), flags);
158  est_gmm_somep.SetZero(flags);
159 
160  // accumulate estimators
161  for (int32 i = 0; i < feats.NumRows(); i++) {
162  est_gmm_allp.AccumulateFromFull(gmm, feats.Row(i), 1.0F);
163  est_gmm_somep.AccumulateFromFull(gmm, feats.Row(i), 1.0F);
164  }
165 
166  FullGmm gmm_all_update; // model with all params updated
167  FullGmm gmm_some_update; // model with some params updated
168  gmm_all_update.CopyFromFullGmm(gmm); // init with orig. model
169  gmm_some_update.CopyFromFullGmm(gmm); // init with orig. model
170 
171  MleFullGmmUpdate(config, est_gmm_allp, kGmmAll, &gmm_all_update, NULL, NULL);
172  MleFullGmmUpdate(config, est_gmm_somep, flags, &gmm_some_update, NULL, NULL);
173 
174  if (gmm_all_update.NumGauss() != gmm.NumGauss()) {
175  KALDI_WARN << "Unable to pass test_update_flags() test because of "
176  "component removal during Update() call (this is normal)";
177  return;
178  }
179 
180  // now back-off the gmm_all_update params that were not updated
181  // in gmm_some_update to orig.
182  if (~flags & kGmmWeights)
183  gmm_all_update.SetWeights(gmm.weights());
184  if (~flags & kGmmMeans) {
185  Matrix<BaseFloat> means(gmm.NumGauss(), gmm.Dim());
186  gmm.GetMeans(&means);
187  gmm_all_update.SetMeans(means);
188  }
189  if (~flags & kGmmVariances) {
190  std::vector<SpMatrix<BaseFloat> > vars(gmm.NumGauss());
191  for (int32 i = 0; i < gmm.NumGauss(); i++)
192  vars[i].Resize(gmm.Dim());
193  gmm.GetCovars(&vars);
194  for (int32 i = 0; i < gmm.NumGauss(); i++)
195  vars[i].InvertDouble();
196  gmm_all_update.SetInvCovars(vars);
197  }
198  gmm_some_update.ComputeGconsts();
199  gmm_all_update.ComputeGconsts();
200 
201  // now both models gmm_all_update, gmm_all_update have the same params updated
202  // compute loglike for models for check
203  double loglike0 = 0.0;
204  double loglike1 = 0.0;
205  double loglike2 = 0.0;
206  for (int32 i = 0; i < feats.NumRows(); i++) {
207  loglike0 += static_cast<double>(
208  gmm.LogLikelihood(feats.Row(i)));
209  loglike1 += static_cast<double>(
210  gmm_all_update.LogLikelihood(feats.Row(i)));
211  loglike2 += static_cast<double>(
212  gmm_some_update.LogLikelihood(feats.Row(i)));
213  }
214  KALDI_LOG << "loglike1 = " << loglike1 << " loglike2 = " << loglike2;
215  AssertEqual(loglike1, loglike2, 0.01);
216 }
void SetWeights(const Vector< Real > &w)
Mutators for both float or double.
Definition: full-gmm-inl.h:31
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: full-gmm.h:60
Configuration variables like variance floor, minimum occupancy, etc.
Definition: mle-full-gmm.h:38
int32 ComputeGconsts()
Sets the gconsts.
Definition: full-gmm.cc:92
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40
kaldi::int32 int32
void SetZero(GmmFlagsType flags)
Definition: mle-full-gmm.cc:63
void GetCovars(std::vector< SpMatrix< Real > > *v) const
Accessor for covariances.
Definition: full-gmm-inl.h:106
void Resize(int32 num_components, int32 dim, GmmFlagsType flags)
Allocates memory for accumulators.
Definition: mle-full-gmm.cc:37
void CopyFromFullGmm(const FullGmm &fullgmm)
Copies from given FullGmm.
Definition: full-gmm.cc:65
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void MleFullGmmUpdate(const MleFullGmmOptions &config, const AccumFullGmm &fullgmm_acc, GmmFlagsType flags, FullGmm *gmm, BaseFloat *obj_change_out, BaseFloat *count_out)
for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
Class for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
Definition: mle-full-gmm.h:74
#define KALDI_WARN
Definition: kaldi-error.h:150
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: full-gmm.h:58
const Vector< BaseFloat > & weights() const
Definition: full-gmm.h:144
void GetMeans(Matrix< Real > *m) const
Accessor for means.
Definition: full-gmm-inl.h:118
void SetInvCovars(const std::vector< SpMatrix< Real > > &v)
Set the (inverse) covariances and recompute means_invcovars_.
Definition: full-gmm-inl.h:83
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
Definition: kaldi-math.h:276
BaseFloat AccumulateFromFull(const FullGmm &gmm, const VectorBase< BaseFloat > &data, BaseFloat frame_posterior)
Accumulate for all components given a full-covariance GMM.
#define KALDI_LOG
Definition: kaldi-error.h:153
BaseFloat LogLikelihood(const VectorBase< BaseFloat > &data) const
Returns the log-likelihood of a data point (vector) given the GMM.
Definition: full-gmm.cc:582
void SetMeans(const Matrix< Real > &m)
Use SetMeans to update only the Gaussian means (and not variances)
Definition: full-gmm-inl.h:38

◆ test_io()

void test_io ( const FullGmm gmm,
const AccumFullGmm est_gmm,
bool  binary,
const Matrix< BaseFloat > &  feats 
)

Definition at line 219 of file mle-full-gmm-test.cc.

References kaldi::AssertEqual(), FullGmm::CopyFromFullGmm(), FullGmm::Dim(), AccumFullGmm::Flags(), rnnlm::i, kaldi::kGmmAll, FullGmm::LogLikelihood(), kaldi::MleFullGmmUpdate(), FullGmm::NumGauss(), MatrixBase< Real >::NumRows(), AccumFullGmm::Read(), AccumFullGmm::Resize(), MatrixBase< Real >::Row(), AccumFullGmm::Scale(), Input::Stream(), and AccumFullGmm::Write().

Referenced by UnitTestEstimateFullGmm().

220  {
221  std::cout << "Testing I/O, binary = " << binary << '\n';
222 
223  est_gmm.Write(Output("tmp_stats", binary).Stream(), binary);
224 
225  bool binary_in;
226  AccumFullGmm est_gmm2;
227  est_gmm2.Resize(gmm.NumGauss(),
228  gmm.Dim(), kGmmAll);
229  Input ki("tmp_stats", &binary_in);
230  est_gmm2.Read(ki.Stream(), binary_in, false); // not adding
231 
232  Input ki2("tmp_stats", &binary_in);
233  est_gmm2.Read(ki2.Stream(), binary_in, true); // adding
234 
235  est_gmm2.Scale(0.5, kGmmAll);
236  // 0.5 -> make it same as what it would have been if we read just once.
237  // [may affect it due to removal of components with small counts].
238 
239  MleFullGmmOptions config;
240  FullGmm gmm1;
241  FullGmm gmm2;
242  gmm1.CopyFromFullGmm(gmm);
243  gmm2.CopyFromFullGmm(gmm);
244  MleFullGmmUpdate(config, est_gmm, est_gmm.Flags(), &gmm1, NULL, NULL);
245  MleFullGmmUpdate(config, est_gmm2, est_gmm2.Flags(), &gmm2, NULL, NULL);
246 
247  BaseFloat loglike1 = 0.0;
248  BaseFloat loglike2 = 0.0;
249  for (int32 i = 0; i < feats.NumRows(); i++) {
250  loglike1 += gmm1.LogLikelihood(feats.Row(i));
251  loglike2 += gmm2.LogLikelihood(feats.Row(i));
252  }
253 
254  AssertEqual(loglike1, loglike2, 0.01);
255 
256  unlink("tmp_stats");
257 }
void Write(std::ostream &out_stream, bool binary) const
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: full-gmm.h:60
Configuration variables like variance floor, minimum occupancy, etc.
Definition: mle-full-gmm.h:38
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40
kaldi::int32 int32
GmmFlagsType Flags() const
Accessors.
Definition: mle-full-gmm.h:126
void Resize(int32 num_components, int32 dim, GmmFlagsType flags)
Allocates memory for accumulators.
Definition: mle-full-gmm.cc:37
void CopyFromFullGmm(const FullGmm &fullgmm)
Copies from given FullGmm.
Definition: full-gmm.cc:65
float BaseFloat
Definition: kaldi-types.h:29
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void MleFullGmmUpdate(const MleFullGmmOptions &config, const AccumFullGmm &fullgmm_acc, GmmFlagsType flags, FullGmm *gmm, BaseFloat *obj_change_out, BaseFloat *count_out)
for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
Class for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
Definition: mle-full-gmm.h:74
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: full-gmm.h:58
void Read(std::istream &in_stream, bool binary, bool add)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
Definition: kaldi-math.h:276
BaseFloat LogLikelihood(const VectorBase< BaseFloat > &data) const
Returns the log-likelihood of a data point (vector) given the GMM.
Definition: full-gmm.cc:582
void Scale(BaseFloat f, GmmFlagsType flags)
Definition: mle-full-gmm.cc:79

◆ TestComponentAcc()

void TestComponentAcc ( const FullGmm gmm,
const Matrix< BaseFloat > &  feats 
)

Definition at line 31 of file mle-full-gmm-test.cc.

References AccumFullGmm::AccumulateForComponent(), AccumFullGmm::AccumulateFromFull(), kaldi::AssertEqual(), FullGmm::ComponentPosteriors(), FullGmm::Dim(), rnnlm::i, KALDI_ASSERT, KALDI_WARN, kaldi::kGmmAll, FullGmm::LogLikelihood(), kaldi::MleFullGmmUpdate(), FullGmm::NumGauss(), AccumFullGmm::NumGauss(), MatrixBase< Real >::NumRows(), FullGmm::Resize(), AccumFullGmm::Resize(), MatrixBase< Real >::Row(), and AccumFullGmm::SetZero().

Referenced by UnitTestEstimateFullGmm().

31  {
32  MleFullGmmOptions config;
33  AccumFullGmm est_atonce; // updates all components
34  AccumFullGmm est_compwise; // updates single components
35 
36  // Initialize estimators
37  est_atonce.Resize(gmm.NumGauss(), gmm.Dim(), kGmmAll);
38  est_atonce.SetZero(kGmmAll);
39  est_compwise.Resize(gmm.NumGauss(),
40  gmm.Dim(), kGmmAll);
41  est_compwise.SetZero(kGmmAll);
42 
43  // accumulate estimators
44  for (int32 i = 0; i < feats.NumRows(); i++) {
45  est_atonce.AccumulateFromFull(gmm, feats.Row(i), 1.0F);
46  Vector<BaseFloat> post(gmm.NumGauss());
47  gmm.ComponentPosteriors(feats.Row(i), &post);
48  for (int32 m = 0; m < gmm.NumGauss(); m++) {
49  est_compwise.AccumulateForComponent(feats.Row(i), m, post(m));
50  }
51  }
52 
53  FullGmm gmm_atonce; // model with all components accumulated together
54  FullGmm gmm_compwise; // model with each component accumulated separately
55  gmm_atonce.Resize(gmm.NumGauss(), gmm.Dim());
56  gmm_compwise.Resize(gmm.NumGauss(), gmm.Dim());
57 
58  MleFullGmmUpdate(config, est_atonce, kGmmAll, &gmm_atonce, NULL, NULL);
59  MleFullGmmUpdate(config, est_compwise, kGmmAll, &gmm_compwise, NULL, NULL);
60 
61  // the two ways of updating should result in the same model
62  double loglike0 = 0.0;
63  double loglike1 = 0.0;
64  double loglike2 = 0.0;
65  for (int32 i = 0; i < feats.NumRows(); i++) {
66  loglike0 += static_cast<double>(gmm.LogLikelihood(feats.Row(i)));
67  loglike1 += static_cast<double>(gmm_atonce.LogLikelihood(feats.Row(i)));
68  loglike2 += static_cast<double>(gmm_compwise.LogLikelihood(feats.Row(i)));
69  }
70 
71  std::cout << "Per-frame log-likelihood before update = "
72  << (loglike0/feats.NumRows()) << '\n';
73  std::cout << "Per-frame log-likelihood (accumulating at once) = "
74  << (loglike1/feats.NumRows()) << '\n';
75  std::cout << "Per-frame log-likelihood (accumulating component-wise) = "
76  << (loglike2/feats.NumRows()) << '\n';
77 
78  AssertEqual(loglike1, loglike2, 1.0e-6);
79 
80  if (est_atonce.NumGauss() != gmm.NumGauss()) {
81  KALDI_WARN << "Unable to pass test_update_flags() test because of "
82  "component removal during Update() call (this is normal)";
83  return;
84  } else {
85  KALDI_ASSERT(loglike1 >= loglike0 - (std::abs(loglike1)+std::abs(loglike0))*1.0e-06);
86  KALDI_ASSERT(loglike2 >= loglike0 - (std::abs(loglike2)+std::abs(loglike0))*1.0e-06);
87  }
88 }
int32 NumGauss() const
Returns the number of mixture components.
Definition: mle-full-gmm.h:97
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, VectorBase< BaseFloat > *posterior) const
Computes the posterior probabilities of all Gaussian components given a data point.
Definition: full-gmm.cc:719
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: full-gmm.h:60
Configuration variables like variance floor, minimum occupancy, etc.
Definition: mle-full-gmm.h:38
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40
kaldi::int32 int32
void SetZero(GmmFlagsType flags)
Definition: mle-full-gmm.cc:63
void Resize(int32 num_components, int32 dim, GmmFlagsType flags)
Allocates memory for accumulators.
Definition: mle-full-gmm.cc:37
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
Definition: full-gmm.cc:41
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void MleFullGmmUpdate(const MleFullGmmOptions &config, const AccumFullGmm &fullgmm_acc, GmmFlagsType flags, FullGmm *gmm, BaseFloat *obj_change_out, BaseFloat *count_out)
for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
Class for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
Definition: mle-full-gmm.h:74
#define KALDI_WARN
Definition: kaldi-error.h:150
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: full-gmm.h:58
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
Definition: kaldi-math.h:276
BaseFloat AccumulateFromFull(const FullGmm &gmm, const VectorBase< BaseFloat > &data, BaseFloat frame_posterior)
Accumulate for all components given a full-covariance GMM.
BaseFloat LogLikelihood(const VectorBase< BaseFloat > &data) const
Returns the log-likelihood of a data point (vector) given the GMM.
Definition: full-gmm.cc:582
void AccumulateForComponent(const VectorBase< BaseFloat > &data, int32 comp_index, BaseFloat weight)
Accumulate for a single component, given the posterior.
Definition: mle-full-gmm.cc:96

◆ UnitTestEstimateFullGmm()

void UnitTestEstimateFullGmm ( )

Definition at line 260 of file mle-full-gmm-test.cc.

References AccumFullGmm::AccumulateFromFull(), MatrixBase< Real >::AddMatMat(), VectorBase< Real >::AddRowSumMat(), PackedMatrix< Real >::AddToDiag(), MatrixBase< Real >::AddVecVec(), kaldi::ApproxEqual(), kaldi::AssertEqual(), FullGmm::ComputeGconsts(), FullGmmNormal::CopyToFullGmm(), count, rnnlm::d, FullGmm::Dim(), GetLogLikeTest(), rnnlm::i, FullGmm::inv_covars(), KALDI_ASSERT, KALDI_LOG, kaldi::kGmmAll, kaldi::kGmmMeans, kaldi::kGmmVariances, kaldi::kGmmWeights, kaldi::kNoTrans, kaldi::kTrans, MatrixBase< Real >::LogDet(), M_LOG_2PI, FullGmmNormal::means_, FullGmm::means_invcovars(), kaldi::MleFullGmmUpdate(), FullGmm::NumGauss(), MatrixBase< Real >::NumRows(), rand_posdef_spmatrix(), kaldi::RandGauss(), FullGmm::Resize(), AccumFullGmm::Resize(), MatrixBase< Real >::Row(), MatrixBase< Real >::Scale(), VectorBase< Real >::Scale(), FullGmm::SetInvCovarsAndMeans(), FullGmm::SetWeights(), AccumFullGmm::SetZero(), FullGmm::Split(), MatrixBase< Real >::SymPosSemiDefEig(), test_flags_driven_update(), test_io(), TestComponentAcc(), FullGmmNormal::vars_, and FullGmm::weights().

Referenced by main().

260  {
261  // using namespace kaldi;
262 
263  // dimension of the gmm
264  int32 dim = 10;
265 
266  // number of mixtures in the data
267  int32 nMix = 7;
268 
269  // number of iterations for estimation
270  int32 maxiterations = 20;
271 
272  // maximum number of densities in the GMM
273  // larger than the number of mixtures in the data
274  // so that we can test the removal of unseen components
275  int32 maxcomponents = 50;
276 
277  // generate random feature vectors
278  // first, generate parameters of vectors distribution
279  // (mean and covariance matrices)
280  Matrix<BaseFloat> means_f(nMix, dim);
281  std::vector<SpMatrix<BaseFloat> > vars_f(nMix);
282  std::vector<TpMatrix<BaseFloat> > vars_f_sqrt(nMix);
283  for (int32 mix = 0; mix < nMix; mix++) {
284  vars_f[mix].Resize(dim);
285  vars_f_sqrt[mix].Resize(dim);
286  }
287 
288  for (int32 m = 0; m < nMix; m++) {
289  for (int32 d = 0; d < dim; d++) {
290  means_f(m, d) = kaldi::RandGauss();
291  }
292  rand_posdef_spmatrix(dim, &vars_f[m], &vars_f_sqrt[m], NULL);
293  }
294 
295  // second, generate 1000 feature vectors for each of the mixture components
296  int32 counter = 0, multiple = 200;
297  Matrix<BaseFloat> feats(nMix*200, dim);
298  Vector<BaseFloat> rnd_vec(dim);
299  for (int32 m = 0; m < nMix; m++) {
300  for (int32 i = 0; i < multiple; i++) {
301  for (int32 d = 0; d < dim; d++) {
302  rnd_vec(d) = RandGauss();
303  }
304  feats.Row(counter).CopyFromVec(means_f.Row(m));
305  feats.Row(counter).AddTpVec(1.0, vars_f_sqrt[m], kNoTrans, rnd_vec, 1.0);
306  ++counter;
307  }
308  }
309 
310  {
311  // Work out "perfect" log-like w/ one component.
312  Matrix<BaseFloat> cov(dim, dim);
313  Vector<BaseFloat> mean(dim);
314  cov.AddMatMat(1.0, feats, kTrans, feats, kNoTrans, 0.0);
315  cov.Scale(1.0 / feats.NumRows());
316  mean.AddRowSumMat(1.0, feats);
317  mean.Scale(1.0 / feats.NumRows());
318  cov.AddVecVec(-1.0, mean, mean);
319  BaseFloat logdet = cov.LogDet();
320  BaseFloat avg_log = -0.5*(logdet + dim*(M_LOG_2PI + 1));
321  std::cout << "Avg log-like per frame [full-cov, 1-mix] should be: "
322  << avg_log << '\n';
323  std::cout << "Total log-like [full-cov, 1-mix] should be: "
324  << (feats.NumRows()*avg_log) << '\n';
325 
326  Vector<BaseFloat> s(dim);
327  Matrix<BaseFloat> P(dim, dim);
328  cov.SymPosSemiDefEig(&s, &P);
329  std::cout << "Cov eigs are " << s;
330  }
331 
332  // write the feature vectors to a file
333  // std::ofstream of("tmpfeats");
334  // of.precision(10);
335  // of << feats;
336  // of.close();
337 
338  // now generate randomly initial values for the GMM
339  Vector<BaseFloat> weights(1);
340  Matrix<BaseFloat> means(1, dim);
341  std::vector<SpMatrix<BaseFloat> > invcovars(1);
342  invcovars[0].Resize(dim);
343 
344  for (int32 d = 0; d < dim; d++) {
345  means(0, d) = kaldi::RandGauss()*5.0F;
346  }
347  SpMatrix<BaseFloat> covar(dim);
348  rand_posdef_spmatrix(dim, &covar, NULL, NULL);
349  covar.AddToDiag(0.1); // Ensure the condition is reasonable, otherwise
350  // we can get arbitrarily large inverse.
351  invcovars[0].CopyFromSp(covar);
352  invcovars[0].InvertDouble();
353  weights(0) = 1.0F;
354 
355  // new GMM
356  FullGmm *gmm = new FullGmm();
357  gmm->Resize(1, dim);
358  gmm->SetWeights(weights);
359  gmm->SetInvCovarsAndMeans(invcovars, means);
360  gmm->ComputeGconsts();
361 
362  {
363  KALDI_LOG << "Testing natural<>normal conversion";
364  FullGmmNormal ngmm(*gmm);
365  FullGmm rgmm;
366  rgmm.Resize(1, dim);
367  ngmm.CopyToFullGmm(&rgmm, kGmmAll);
368 
369  // check contents
370  KALDI_ASSERT(ApproxEqual(weights(0), 1.0F, 1e-6));
371  KALDI_ASSERT(ApproxEqual(gmm->weights()(0), rgmm.weights()(0), 1e-6));
372  double prec_m = 1e-3;
373  double prec_v = 1e-3;
374  for (int32 d = 0; d < dim; d++) {
375  KALDI_ASSERT(std::abs(means.Row(0)(d) - ngmm.means_.Row(0)(d)) < prec_m);
376  KALDI_ASSERT(std::abs(gmm->means_invcovars().Row(0)(d) - rgmm.means_invcovars().Row(0)(d)) < prec_v);
377  for (int32 d2 = d; d2 < dim; ++d2) {
378  KALDI_ASSERT(std::abs(covar(d, d2) - ngmm.vars_[0](d, d2)) < prec_v);
379  KALDI_ASSERT(std::abs(gmm->inv_covars()[0](d, d2) - rgmm.inv_covars()[0](d, d2)) < prec_v);
380  }
381  }
382  KALDI_LOG << "OK";
383  }
384 
385  MleFullGmmOptions config;
386  GmmFlagsType flags_all = kGmmAll;
387 
388 
389  AccumFullGmm est_gmm;
390  est_gmm.Resize(gmm->NumGauss(), gmm->Dim(), flags_all);
391 
392  // iterate
393  int32 iteration = 0;
394  float lastloglike = 0.0;
395  int32 lastloglike_nM = 0;
396 
397  while (iteration < maxiterations) {
398  // First, resize accums for the case of component splitting
399  est_gmm.Resize(gmm->NumGauss(),
400  gmm->Dim(), flags_all);
401  est_gmm.SetZero(flags_all);
402  double loglike = 0.0;
403  double loglike_test = 0.0;
404  for (int32 i = 0; i < counter; i++) {
405  loglike += static_cast<double>(
406  est_gmm.AccumulateFromFull(*gmm, feats.Row(i), 1.0F));
407  if (iteration < 4) {
408  loglike_test += GetLogLikeTest(*gmm, feats.Row(i), (i == 0));
409  AssertEqual(loglike, loglike_test);
410  }
411  }
412 
413  std::cout << "Loglikelihood before iteration "
414  << iteration << " : " << std::scientific << loglike
415  << " number of components: " << gmm->NumGauss() << '\n';
416 
417  // std::cout << "Model is: " << *gmm;
418 
419  // every 5th iteration check loglike change and update lastloglike
420  if (iteration % 5 == 0) {
421  // likelihood should be increasing on the long term
422  if ((iteration > 0) && (gmm->NumGauss() >= lastloglike_nM)) {
423  KALDI_ASSERT(loglike > lastloglike);
424  }
425  lastloglike = loglike;
426  lastloglike_nM = gmm->NumGauss();
427  }
428 
429  BaseFloat obj, count;
430  MleFullGmmUpdate(config, est_gmm, flags_all, gmm, &obj, &count);
431  KALDI_LOG << "ML objective function change = " << (obj/count)
432  << " per frame, over " << (count) << " frames.";
433 
434  // split components to double count at second iteration
435  // and every next 3rd iteration
436  // stop splitting when maxcomponents reached
437  if ( (iteration < maxiterations - 3) && (iteration % 4 == 1)
438  && (gmm->NumGauss() * 2 <= maxcomponents)) {
439  gmm->Split(gmm->NumGauss() * 2, 0.01);
440  }
441 
442  if (iteration == 5) { // run following tests with not too overfitted model
443  std::cout << "Testing flags-driven updates kGmmAll" << '\n';
444  test_flags_driven_update(*gmm, feats, kGmmAll);
445  std::cout << "Testing flags-driven updates kGmmWeights" << '\n';
447  std::cout << "Testing flags-driven kGmmMeans" << '\n';
448  test_flags_driven_update(*gmm, feats, kGmmMeans);
449  std::cout << "Testing flags-driven kGmmVariances" << '\n';
451  std::cout << "Testing flags-driven kGmmWeights | kGmmMeans" << '\n';
453  std::cout << "Testing component-wise accumulation" << '\n';
454  TestComponentAcc(*gmm, feats);
455  }
456 
457  iteration++;
458  }
459 
460  { // I/O tests
461  GmmFlagsType flags_all = kGmmAll;
462  est_gmm.Resize(gmm->NumGauss(),
463  gmm->Dim(), flags_all);
464  est_gmm.SetZero(flags_all);
465  float loglike = 0.0;
466  for (int32 i = 0; i < counter; i++) {
467  loglike += est_gmm.AccumulateFromFull(*gmm, feats.Row(i), 1.0F);
468  }
469  test_io(*gmm, est_gmm, false, feats);
470  test_io(*gmm, est_gmm, true, feats);
471  }
472 
473  delete gmm;
474  gmm = NULL;
475 }
void SetWeights(const Vector< Real > &w)
Mutators for both float or double.
Definition: full-gmm-inl.h:31
void test_io(const FullGmm &gmm, const AccumFullGmm &est_gmm, bool binary, const Matrix< BaseFloat > &feats)
Packed symetric matrix class.
Definition: matrix-common.h:62
void rand_posdef_spmatrix(size_t dim, SpMatrix< BaseFloat > *matrix, TpMatrix< BaseFloat > *matrix_sqrt=NULL, BaseFloat *logdet=NULL)
const std::vector< SpMatrix< BaseFloat > > & inv_covars() const
Definition: full-gmm.h:146
#define M_LOG_2PI
Definition: kaldi-math.h:60
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: full-gmm.h:60
Definition for Gaussian Mixture Model with full covariances in normal mode: where the parameters are ...
Configuration variables like variance floor, minimum occupancy, etc.
Definition: mle-full-gmm.h:38
int32 ComputeGconsts()
Sets the gconsts.
Definition: full-gmm.cc:92
void Split(int32 target_components, float perturb_factor, std::vector< int32 > *history=NULL)
Merge the components and remember the order in which the components were merged (flat list of pairs) ...
Definition: full-gmm.cc:132
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40
void SetInvCovarsAndMeans(const std::vector< SpMatrix< Real > > &invcovars, const Matrix< Real > &means)
Use SetInvCovarsAndMeans if updating both means and (inverse) covariances.
Definition: full-gmm-inl.h:50
float RandGauss(struct RandomState *state=NULL)
Definition: kaldi-math.h:155
kaldi::int32 int32
uint16 GmmFlagsType
Bitwise OR of the above flags.
Definition: model-common.h:35
void SetZero(GmmFlagsType flags)
Definition: mle-full-gmm.cc:63
void Resize(int32 num_components, int32 dim, GmmFlagsType flags)
Allocates memory for accumulators.
Definition: mle-full-gmm.cc:37
const size_t count
float BaseFloat
Definition: kaldi-types.h:29
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
Definition: full-gmm.cc:41
void test_flags_driven_update(const FullGmm &gmm, const Matrix< BaseFloat > &feats, GmmFlagsType flags)
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void MleFullGmmUpdate(const MleFullGmmOptions &config, const AccumFullGmm &fullgmm_acc, GmmFlagsType flags, FullGmm *gmm, BaseFloat *obj_change_out, BaseFloat *count_out)
for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
Class for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
Definition: mle-full-gmm.h:74
BaseFloat GetLogLikeTest(const FullGmm &gmm, const VectorBase< BaseFloat > &feats, bool print_eigs)
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: full-gmm.h:58
const Vector< BaseFloat > & weights() const
Definition: full-gmm.h:144
void TestComponentAcc(const FullGmm &gmm, const Matrix< BaseFloat > &feats)
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
Definition: kaldi-math.h:276
BaseFloat AccumulateFromFull(const FullGmm &gmm, const VectorBase< BaseFloat > &data, BaseFloat frame_posterior)
Accumulate for all components given a full-covariance GMM.
const Matrix< BaseFloat > & means_invcovars() const
Definition: full-gmm.h:145
#define KALDI_LOG
Definition: kaldi-error.h:153
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265