mllt.cc
Go to the documentation of this file.
1 // transform/mllt.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "transform/mllt.h"
21 #include "util/const-integer-set.h"
22 
23 namespace kaldi {
24 
25 void MlltAccs::Init(int32 dim, BaseFloat rand_prune) { // initializes (destroys anything that was there before).
26  KALDI_ASSERT(dim > 0);
27  beta_ = 0;
28  rand_prune_ = rand_prune;
29  G_.resize(dim);
30  for (int32 i = 0; i < dim; i++)
31  G_[i].Resize(dim); // will zero it too.
32 }
33 
34 void MlltAccs::Read(std::istream &is, bool binary, bool add) {
35  ExpectToken(is, binary, "<MlltAccs>");
36  double beta;
37  int32 dim;
38  ReadBasicType(is, binary, &beta);
39  if (!add) beta_ = beta;
40  else beta_ += beta;
41  ReadBasicType(is, binary, &dim);
42  if (add && G_.size() != 0 && static_cast<size_t>(dim) != G_.size())
43  KALDI_ERR << "MlltAccs::Read, summing accs of different size.";
44  if (!add || G_.empty()) G_.resize(dim);
45  ExpectToken(is, binary, "<G>");
46  for (size_t i = 0; i < G_.size(); i++)
47  G_[i].Read(is, binary, add);
48  ExpectToken(is, binary, "</MlltAccs>");
49 }
50 
51 void MlltAccs::Write(std::ostream &os, bool binary) const {
52  WriteToken(os, binary, "<MlltAccs>");
53  if(!binary) os << '\n';
54  WriteBasicType(os, binary, beta_);
55  int32 dim = G_.size();
56  WriteBasicType(os, binary, dim);
57  WriteToken(os, binary, "<G>");
58  if(!binary) os << '\n';
59  for (size_t i = 0; i < G_.size(); i++)
60  G_[i].Write(os, binary);
61  WriteToken(os, binary, "</MlltAccs>");
62  if(!binary) os << '\n';
63 }
64 
65 // static version of the Update function.
66 void MlltAccs::Update(double beta,
67  const std::vector<SpMatrix<double> > &G,
68  MatrixBase<BaseFloat> *M_ptr,
69  BaseFloat *objf_impr_out,
70  BaseFloat *count_out) {
71  int32 dim = G.size();
72  KALDI_ASSERT(dim != 0 && M_ptr != NULL
73  && M_ptr->NumRows() == dim
74  && M_ptr->NumCols() == dim);
75  if (beta < 10*dim) { // not really enough data to estimate.
76  // don't bother with min-count parameter etc., as MLLT is typically
77  // global.
78  if (beta > 2*dim)
79  KALDI_WARN << "Mllt:Update, very small count " << beta;
80  else
81  KALDI_WARN << "Mllt:Update, insufficient count " << beta;
82  }
83  int32 num_iters = 200; // may later make this an option.
84  Matrix<double> M(dim, dim), Minv(dim, dim);
85  M.CopyFromMat(*M_ptr);
86  std::vector<SpMatrix<double> > Ginv(dim);
87  for (int32 i = 0; i < dim; i++) {
88  Ginv[i].Resize(dim);
89  Ginv[i].CopyFromSp(G[i]);
90  Ginv[i].Invert();
91  }
92 
93  double tot_objf_impr = 0.0;
94  for (int32 p = 0; p < num_iters; p++) {
95  for (int32 i = 0; i < dim; i++) { // for each row
96  SubVector<double> row(M, i);
97  // work out cofactor (actually cofactor times a constant which
98  // doesn't affect anything):
99  Minv.CopyFromMat(M);
100  Minv.Invert();
101  Minv.Transpose();
102  SubVector<double> cofactor(Minv, i);
103  // Objf is: beta log(|row . cofactor|) -0.5 row^T G[i] row
104  // optimized by (c.f. Mark Gales's techreport "semitied covariance matrices
105  // for hidden markov models, eq. (22)),
106  // row = G_i^{-1} cofactor sqrt(beta / cofactor^T G_i^{-1} cofactor). (1)
107  // here, "row" and "cofactor" are considered as column vectors.
108  double objf_before = beta * Log(std::abs(VecVec(row, cofactor)))
109  -0.5 * VecSpVec(row, G[i], row);
110  // do eq. (1) above:
111  row.AddSpVec(std::sqrt(beta / VecSpVec(cofactor, Ginv[i], cofactor)),
112  Ginv[i], cofactor, 0.0);
113  double objf_after = beta * Log(std::abs(VecVec(row, cofactor)))
114  -0.5 * VecSpVec(row, G[i], row);
115  if (objf_after < objf_before - fabs(objf_before)*0.00001)
116  KALDI_ERR << "Objective decrease in MLLT update.";
117  tot_objf_impr += objf_after - objf_before;
118  }
119  if (p < 10 || p % 10 == 0)
120  KALDI_LOG << "MLLT objective improvement per frame by " << p
121  << "'th iteration is " << (tot_objf_impr/beta) << " per frame "
122  << "over " << beta << " frames.";
123  }
124  if (objf_impr_out)
125  *objf_impr_out = tot_objf_impr;
126  if (count_out)
127  *count_out = beta;
128  M_ptr->CopyFromMat(M);
129 }
130 
132  const VectorBase<BaseFloat> &data,
133  const VectorBase<BaseFloat> &posteriors) {
134  KALDI_ASSERT(data.Dim() == gmm.Dim());
135  KALDI_ASSERT(data.Dim() == Dim());
136  KALDI_ASSERT(posteriors.Dim() == gmm.NumGauss());
137  const Matrix<BaseFloat> &means_invvars = gmm.means_invvars();
138  const Matrix<BaseFloat> &inv_vars = gmm.inv_vars();
139  Vector<BaseFloat> mean(data.Dim());
140  SpMatrix<double> tmp(data.Dim());
141  Vector<double> offset_dbl(data.Dim());
142  double this_beta_ = 0.0;
143  KALDI_ASSERT(rand_prune_ >= 0.0);
144  for (int32 i = 0; i < posteriors.Dim(); i++) { // for each mixcomp..
145  BaseFloat posterior = RandPrune(posteriors(i), rand_prune_);
146  if (posterior == 0.0) continue;
147  SubVector<BaseFloat> mean_invvar(means_invvars, i);
148  SubVector<BaseFloat> inv_var(inv_vars, i);
149  mean.AddVecDivVec(1.0, mean_invvar, inv_var, 0.0); // get mean.
150  mean.AddVec(-1.0, data); // get offset
151  offset_dbl.CopyFromVec(mean); // make it double.
152  tmp.SetZero();
153  tmp.AddVec2(1.0, offset_dbl);
154  for (int32 j = 0; j < data.Dim(); j++)
155  G_[j].AddSp(inv_var(j)*posterior, tmp);
156  this_beta_ += posterior;
157  }
158  beta_ += this_beta_;
159  Vector<double> data_dbl(data);
160 }
161 
163  const VectorBase<BaseFloat> &data,
164  BaseFloat weight) { // e.g. weight = 1.0
165  Vector<BaseFloat> posteriors(gmm.NumGauss());
166  BaseFloat ans = gmm.ComponentPosteriors(data, &posteriors);
167  posteriors.Scale(weight);
168  AccumulateFromPosteriors(gmm, data, posteriors);
169  return ans;
170 }
171 
172 
174  const DiagGmm &gmm,
175  const std::vector<int32> &gselect,
176  const VectorBase<BaseFloat> &data,
177  BaseFloat weight) { // e.g. weight = 1.0
178  KALDI_ASSERT(!gselect.empty());
179  Vector<BaseFloat> loglikes;
180  gmm.LogLikelihoodsPreselect(data, gselect, &loglikes);
181  BaseFloat loglike = loglikes.ApplySoftMax();
182  // now "loglikes" is a vector of posteriors, indexed
183  // by the same index as gselect.
184  Vector<BaseFloat> posteriors(gmm.NumGauss());
185  for (size_t i = 0; i < gselect.size(); i++)
186  posteriors(gselect[i]) = loglikes(i) * weight;
187  AccumulateFromPosteriors(gmm, data, posteriors);
188  return loglike;
189 }
190 
191 
192 
193 
194 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: diag-gmm.h:74
BaseFloat AccumulateFromGmmPreselect(const DiagGmm &gmm, const std::vector< int32 > &gselect, const VectorBase< BaseFloat > &data, BaseFloat weight)
Definition: mllt.cc:173
void Transpose()
Transpose the matrix.
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
Definition: diag-gmm.cc:566
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
const Matrix< BaseFloat > & means_invvars() const
Definition: diag-gmm.h:179
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
void Read(std::istream &is, bool binary, bool add=false)
Definition: mllt.cc:34
Float RandPrune(Float post, BaseFloat prune_thresh, struct RandomState *state=NULL)
Definition: kaldi-math.h:174
kaldi::int32 int32
void AddSpVec(const Real alpha, const SpMatrix< Real > &M, const VectorBase< Real > &v, const Real beta)
Add symmetric positive definite matrix times vector: this <– beta*this + alpha*M*v.
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
void AccumulateFromPosteriors(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, const VectorBase< BaseFloat > &posteriors)
Definition: mllt.cc:131
void Update(MatrixBase< BaseFloat > *M, BaseFloat *objf_impr_out, BaseFloat *count_out) const
The Update function does the ML update; it requires that M has the right size.
Definition: mllt.h:69
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
Definition: diag-gmm.cc:601
double Log(double x)
Definition: kaldi-math.h:100
double beta_
Definition: mllt.h:108
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
std::vector< SpMatrix< double > > G_
Definition: mllt.h:109
#define KALDI_ERR
Definition: kaldi-error.h:147
void Init(int32 dim, BaseFloat rand_prune=0.25)
initializes (destroys anything that was there before).
Definition: mllt.cc:25
Real VecSpVec(const VectorBase< Real > &v1, const SpMatrix< Real > &M, const VectorBase< Real > &v2)
Computes v1^T * M * v2.
Definition: sp-matrix.cc:964
#define KALDI_WARN
Definition: kaldi-error.h:150
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
int32 Dim()
Definition: mllt.h:60
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
BaseFloat rand_prune_
rand_prune_ controls randomized pruning; the larger it is, the more pruning we do.
Definition: mllt.h:107
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
BaseFloat AccumulateFromGmm(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, BaseFloat weight)
Definition: mllt.cc:162
void Invert(Real *log_det=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Definition: kaldi-matrix.cc:38
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
#define KALDI_LOG
Definition: kaldi-error.h:153
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void Write(std::ostream &os, bool binary) const
Definition: mllt.cc:51
const Matrix< BaseFloat > & inv_vars() const
Definition: diag-gmm.h:180