lda-estimate.cc
Go to the documentation of this file.
1 // transform/lda-estimate.cc
2 
3 // Copyright 2009-2011 Jan Silovsky
4 // 2013 Johns Hopkins University
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #include "transform/lda-estimate.h"
23 
24 namespace kaldi {
25 
26 void LdaEstimate::Init(int32 num_classes, int32 dimension) {
27  zero_acc_.Resize(num_classes);
28  first_acc_.Resize(num_classes, dimension);
29  total_second_acc_.Resize(dimension);
30 }
31 
36 }
37 
39  double d = static_cast<double>(f);
40  zero_acc_.Scale(d);
41  first_acc_.Scale(d);
43 }
44 
46  int32 class_id, BaseFloat weight) {
47  KALDI_ASSERT(class_id >= 0);
48  KALDI_ASSERT(class_id < NumClasses() && data.Dim() == Dim());
49 
50  Vector<double> data_d(data);
51 
52  zero_acc_(class_id) += weight;
53  first_acc_.Row(class_id).AddVec(weight, data_d);
54  total_second_acc_.AddVec2(weight, data_d);
55 }
56 
58  SpMatrix<double> *between_covar,
59  Vector<double> *total_mean,
60  double *tot_count) const {
61  int32 num_class = NumClasses(), dim = Dim();
62  double sum = zero_acc_.Sum();
63  *tot_count = sum;
64  total_covar->Resize(dim);
65  total_covar->CopyFromSp(total_second_acc_);
66  total_mean->Resize(dim);
67  total_mean->AddRowSumMat(1.0, first_acc_);
68  total_mean->Scale(1.0 / sum);
69  total_covar->Scale(1.0 / sum);
70  total_covar->AddVec2(-1.0, *total_mean);
71 
72  between_covar->Resize(dim);
73  Vector<double> class_mean(dim);
74  for (int32 c = 0; c < num_class; c++) {
75  if (zero_acc_(c) != 0.0) {
76  class_mean.CopyRowFromMat(first_acc_, c);
77  class_mean.Scale(1.0 / zero_acc_(c));
78  between_covar->AddVec2(zero_acc_(c) / sum, class_mean);
79  }
80  }
81  between_covar->AddVec2(-1.0, *total_mean);
82 }
83 
84 
87  Matrix<BaseFloat> *mfull) const {
88  int32 target_dim = opts.dim;
89  KALDI_ASSERT(target_dim > 0);
90  // between-class covar is of most rank C-1
91  KALDI_ASSERT(target_dim <= Dim() && (target_dim < NumClasses() || opts.allow_large_dim));
92  int32 dim = Dim();
93 
94  double count;
95  SpMatrix<double> total_covar, bc_covar;
96  Vector<double> total_mean;
97  GetStats(&total_covar, &bc_covar, &total_mean, &count);
98 
99  // within-class covariance
100  SpMatrix<double> wc_covar(total_covar);
101  wc_covar.AddSp(-1.0, bc_covar);
102  TpMatrix<double> wc_covar_sqrt(dim);
103  try {
104  wc_covar_sqrt.Cholesky(wc_covar);
105  } catch (...) {
106  BaseFloat smooth = 1.0e-03 * wc_covar.Trace() / wc_covar.NumRows();
107  KALDI_LOG << "Cholesky failed (possibly not +ve definite), so adding " << smooth
108  << " to diagonal and trying again.\n";
109  for (int32 i = 0; i < wc_covar.NumRows(); i++)
110  wc_covar(i, i) += smooth;
111  wc_covar_sqrt.Cholesky(wc_covar);
112  }
113  Matrix<double> wc_covar_sqrt_mat(wc_covar_sqrt);
114  // copy wc_covar_sqrt to Matrix, because it facilitates further use
115  wc_covar_sqrt_mat.Invert();
116 
117  SpMatrix<double> tmp_sp(dim);
118  tmp_sp.AddMat2Sp(1.0, wc_covar_sqrt_mat, kNoTrans, bc_covar, 0.0);
119  Matrix<double> tmp_mat(tmp_sp);
120 
121  Matrix<double> svd_u(dim, dim), svd_vt(dim, dim);
122  Vector<double> svd_d(dim);
123  tmp_mat.Svd(&svd_d, &svd_u, &svd_vt);
124  SortSvd(&svd_d, &svd_u);
125 
126  KALDI_LOG << "Data count is " << count;
127  KALDI_LOG << "LDA singular values are " << svd_d;
128 
129  KALDI_LOG << "Sum of all singular values is " << svd_d.Sum();
130  KALDI_LOG << "Sum of selected singular values is " <<
131  SubVector<double>(svd_d, 0, target_dim).Sum();
132 
133  Matrix<double> lda_mat(dim, dim);
134  lda_mat.AddMatMat(1.0, svd_u, kTrans, wc_covar_sqrt_mat, kNoTrans, 0.0);
135 
136  // finally, copy first target_dim rows to m
137  m->Resize(target_dim, dim);
138  m->CopyFromMat(lda_mat.Range(0, target_dim, 0, dim));
139 
140  if (mfull != NULL) {
141  mfull->Resize(dim, dim);
142  mfull->CopyFromMat(lda_mat);
143  }
144 
145  if (opts.within_class_factor != 1.0) { // This is not the normal code path;
146  // it's intended for use in neural net inputs.
147  for (int32 i = 0; i < svd_d.Dim(); i++) {
148  BaseFloat old_var = 1.0 + svd_d(i), // the total variance of that dim..
149  new_var = opts.within_class_factor + svd_d(i), // the variance we want..
150  scale = sqrt(new_var / old_var);
151  if (i < m->NumRows())
152  m->Row(i).Scale(scale);
153  if (mfull != NULL)
154  mfull->Row(i).Scale(scale);
155  }
156  }
157 
158  if (opts.remove_offset) {
159  AddMeanOffset(total_mean, m);
160  if (mfull != NULL)
161  AddMeanOffset(total_mean, mfull);
162  }
163 }
164 
165 // static
167  Matrix<BaseFloat> *projection) {
168  Vector<BaseFloat> mean(mean_dbl);
169  Vector<BaseFloat> neg_projected_mean(projection->NumRows());
170  // the negative
171  neg_projected_mean.AddMatVec(-1.0, *projection, kNoTrans, mean, 0.0);
172  projection->Resize(projection->NumRows(),
173  projection->NumCols() + 1,
174  kCopyData);
175  projection->CopyColFromVec(neg_projected_mean, projection->NumCols() - 1);
176 }
177 
178 
179 
180 void LdaEstimate::Read(std::istream &in_stream, bool binary, bool add) {
181  int32 num_classes, dim;
182  std::string token;
183 
184  ExpectToken(in_stream, binary, "<LDAACCS>");
185  ExpectToken(in_stream, binary, "<VECSIZE>");
186  ReadBasicType(in_stream, binary, &dim);
187  ExpectToken(in_stream, binary, "<NUMCLASSES>");
188  ReadBasicType(in_stream, binary, &num_classes);
189 
190  if (add) {
191  if (NumClasses() != 0 || Dim() != 0) {
192  if (num_classes != NumClasses() || dim != Dim()) {
193  KALDI_ERR <<"LdaEstimate::Read, dimension or classes count mismatch, "
194  <<(NumClasses()) << ", " <<(Dim()) << ", "
195  << " vs. " <<(num_classes) << ", " << (dim);
196  }
197  } else {
198  Init(num_classes, dim);
199  }
200  } else {
201  Init(num_classes, dim);
202  }
203 
204  // these are needed for demangling the variances.
205  Vector<double> tmp_zero_acc;
206  Matrix<double> tmp_first_acc;
207  SpMatrix<double> tmp_sec_acc;
208 
209  ReadToken(in_stream, binary, &token);
210  while (token != "</LDAACCS>") {
211  if (token == "<ZERO_ACCS>") {
212  tmp_zero_acc.Read(in_stream, binary, false);
213  if (!add) zero_acc_.SetZero();
214  zero_acc_.AddVec(1.0, tmp_zero_acc);
215  // zero_acc_.Read(in_stream, binary, add);
216  } else if (token == "<FIRST_ACCS>") {
217  tmp_first_acc.Read(in_stream, binary, false);
218  if (!add) first_acc_.SetZero();
219  first_acc_.AddMat(1.0, tmp_first_acc);
220  // first_acc_.Read(in_stream, binary, add);
221  } else if (token == "<SECOND_ACCS>") {
222  tmp_sec_acc.Read(in_stream, binary, false);
223  for (int32 c = 0; c < static_cast<int32>(NumClasses()); c++) {
224  if (tmp_zero_acc(c) != 0)
225  tmp_sec_acc.AddVec2(1.0 / tmp_zero_acc(c), tmp_first_acc.Row(c));
226  }
227  if (!add) total_second_acc_.SetZero();
228  total_second_acc_.AddSp(1.0, tmp_sec_acc);
229  // total_second_acc_.Read(in_stream, binary, add);
230  } else {
231  KALDI_ERR << "Unexpected token '" << token << "' in file ";
232  }
233  ReadToken(in_stream, binary, &token);
234  }
235 }
236 
237 void LdaEstimate::Write(std::ostream &out_stream, bool binary) const {
238  WriteToken(out_stream, binary, "<LDAACCS>");
239  WriteToken(out_stream, binary, "<VECSIZE>");
240  WriteBasicType(out_stream, binary, static_cast<int32>(Dim()));
241  WriteToken(out_stream, binary, "<NUMCLASSES>");
242  WriteBasicType(out_stream, binary, static_cast<int32>(NumClasses()));
243 
244  WriteToken(out_stream, binary, "<ZERO_ACCS>");
245  Vector<BaseFloat> zero_acc_bf(zero_acc_);
246  zero_acc_bf.Write(out_stream, binary);
247  WriteToken(out_stream, binary, "<FIRST_ACCS>");
248  Matrix<BaseFloat> first_acc_bf(first_acc_);
249  first_acc_bf.Write(out_stream, binary);
250  WriteToken(out_stream, binary, "<SECOND_ACCS>");
251  SpMatrix<double> tmp_sec_acc(total_second_acc_);
252  for (int32 c = 0; c < static_cast<int32>(NumClasses()); c++) {
253  if (zero_acc_(c) != 0)
254  tmp_sec_acc.AddVec2(-1.0 / zero_acc_(c), first_acc_.Row(c));
255  }
256  SpMatrix<BaseFloat> tmp_sec_acc_bf(tmp_sec_acc);
257  tmp_sec_acc_bf.Write(out_stream, binary);
258 
259  WriteToken(out_stream, binary, "</LDAACCS>");
260 }
261 
262 
263 } // End of namespace kaldi
void Accumulate(const VectorBase< BaseFloat > &data, int32 class_id, BaseFloat weight=1.0)
Accumulates data.
Definition: lda-estimate.cc:45
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void Write(std::ostream &out, bool binary) const
write to stream.
void CopyColFromVec(const VectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix.
void Scale(Real c)
void Read(std::istream &in, bool binary, bool add=false)
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
void Write(std::ostream &out, bool binary) const
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
int32 Dim() const
Returns the dimensionality of the feature vectors.
Definition: lda-estimate.h:66
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
Real Trace() const
Definition: sp-matrix.cc:171
void Write(std::ostream &Out, bool binary) const
Writes to C++ stream (option to write in binary).
void Scale(BaseFloat f)
Scales all accumulators.
Definition: lda-estimate.cc:38
Matrix< double > first_acc_
Definition: lda-estimate.h:94
void Write(std::ostream &out_stream, bool binary) const
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void Init(int32 num_classes, int32 dimension)
Allocates memory for accumulators.
Definition: lda-estimate.cc:26
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
MatrixIndexT NumRows() const
Vector< double > zero_acc_
Definition: lda-estimate.h:93
void CopyFromSp(const SpMatrix< Real > &other)
Definition: sp-matrix.h:85
void CopyRowFromMat(const MatrixBase< Real > &M, MatrixIndexT row)
Extracts a row of the matrix M.
const size_t count
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void Cholesky(const SpMatrix< Real > &orig)
Definition: tp-matrix.cc:88
void Estimate(const LdaEstimateOptions &opts, Matrix< BaseFloat > *M, Matrix< BaseFloat > *Mfull=NULL) const
Estimates the LDA transform matrix m.
Definition: lda-estimate.cc:85
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void Read(std::istream &in_stream, bool binary, bool add)
void Scale(Real alpha)
Multiply each element with a scalar value.
void AddSp(const Real alpha, const SpMatrix< Real > &Ma)
Definition: sp-matrix.h:211
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
static void AddMeanOffset(const VectorBase< double > &total_mean, Matrix< BaseFloat > *projection)
This function modifies the LDA matrix so that it also subtracts the mean feature value.
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
#define KALDI_ERR
Definition: kaldi-error.h:147
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void SetZero()
Sets matrix to zero.
void Scale(Real alpha)
Multiplies all elements by this constant.
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
int32 NumClasses() const
Returns the number of classes.
Definition: lda-estimate.h:64
void GetStats(SpMatrix< double > *total_covar, SpMatrix< double > *between_covar, Vector< double > *total_mean, double *sum) const
Extract a more processed form of the stats.
Definition: lda-estimate.cc:57
Real Sum() const
Returns sum of the elements.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void AddMat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
Extension of rank-N update: this <– beta*this + alpha * M * A * M^T.
Definition: sp-matrix.cc:982
SpMatrix< double > total_second_acc_
Definition: lda-estimate.h:95
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void Svd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt) const
Compute SVD (*this) = U diag(s) Vt.
void ZeroAccumulators()
Sets all accumulators to zero.
Definition: lda-estimate.cc:32
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Definition: sp-matrix.h:81
void Invert(Real *log_det=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Definition: kaldi-matrix.cc:38
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void SetZero()
Set vector to all zeros.
#define KALDI_LOG
Definition: kaldi-error.h:153
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
void Read(std::istream &in, bool binary, bool add=false)
Read function using C++ streams.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.