ivector-compute-lda.cc
Go to the documentation of this file.
1 // ivectorbin/ivector-compute-lda.cc
2 
3 // Copyright 2013 Daniel Povey
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include "base/kaldi-common.h"
22 #include "util/common-utils.h"
23 #include "gmm/am-diag-gmm.h"
25 #include "util/kaldi-thread.h"
26 
27 namespace kaldi {
28 
29 
31  public:
33  between_covar_(dim),
34  num_spk_(0),
35  num_utt_(0) { }
36 
38  void GetTotalCovar(SpMatrix<double> *tot_covar) const {
40  *tot_covar = tot_covar_;
41  tot_covar->Scale(1.0 / num_utt_);
42  }
43  void GetWithinCovar(SpMatrix<double> *within_covar) {
45  *within_covar = tot_covar_;
46  within_covar->AddSp(-1.0, between_covar_);
47  within_covar->Scale(1.0 / num_utt_);
48  }
49  void AccStats(const Matrix<double> &utts_of_this_spk) {
50  int32 num_utts = utts_of_this_spk.NumRows();
51  tot_covar_.AddMat2(1.0, utts_of_this_spk, kTrans, 1.0);
52  Vector<double> spk_average(Dim());
53  spk_average.AddRowSumMat(1.0 / num_utts, utts_of_this_spk);
54  between_covar_.AddVec2(num_utts, spk_average);
55  num_utt_ += num_utts;
56  num_spk_ += 1;
57  }
59  bool SingularTotCovar() { return (num_utt_ < Dim()); }
60  bool Empty() { return (num_utt_ - num_spk_ == 0); }
61  std::string Info() {
62  std::ostringstream ostr;
63  ostr << num_spk_ << " speakers, " << num_utt_ << " utterances. ";
64  return ostr.str();
65  }
66  int32 Dim() { return tot_covar_.NumRows(); }
67  // Use default constructor and assignment operator.
68  void AddStats(const CovarianceStats &other) {
69  tot_covar_.AddSp(1.0, other.tot_covar_);
71  num_spk_ += other.num_spk_;
72  num_utt_ += other.num_utt_;
73  }
74  private:
80 };
81 
82 
83 template<class Real>
85  Real floor,
86  MatrixBase<Real> *proj) {
87  int32 dim = covar.NumRows();
88  Matrix<Real> U(dim, dim);
89  Vector<Real> s(dim);
90  covar.Eig(&s, &U);
91  // Sort eigvenvalues from largest to smallest.
92  SortSvd(&s, &U);
93  // Floor eigenvalues to a small positive value.
94  int32 num_floored;
95  floor *= s(0); // Floor relative to the largest eigenvalue
96  s.ApplyFloor(floor, &num_floored);
97  if (num_floored > 0) {
98  KALDI_WARN << "Floored " << num_floored << " eigenvalues of covariance "
99  << "to " << floor;
100  }
101  // Next two lines computes projection proj, such that
102  // proj * covar * proj^T = I.
103  s.ApplyPow(-0.5);
104  proj->AddDiagVecMat(1.0, s, U, kTrans, 0.0);
105 }
106 
108  const std::map<std::string, Vector<BaseFloat> *> &utt2ivector,
109  const std::map<std::string, std::vector<std::string> > &spk2utt,
110  BaseFloat total_covariance_factor,
111  BaseFloat covariance_floor,
112  MatrixBase<BaseFloat> *lda_out) {
113  KALDI_ASSERT(!utt2ivector.empty());
114  int32 lda_dim = lda_out->NumRows(), dim = lda_out->NumCols();
115  KALDI_ASSERT(dim == utt2ivector.begin()->second->Dim());
116  KALDI_ASSERT(lda_dim > 0 && lda_dim <= dim);
117 
118  CovarianceStats stats(dim);
119 
120  std::map<std::string, std::vector<std::string> >::const_iterator iter;
121  for (iter = spk2utt.begin(); iter != spk2utt.end(); ++iter) {
122  const std::vector<std::string> &uttlist = iter->second;
123  KALDI_ASSERT(!uttlist.empty());
124 
125  int32 N = uttlist.size(); // number of utterances.
126  Matrix<double> utts_of_this_spk(N, dim);
127  for (int32 n = 0; n < N; n++) {
128  std::string utt = uttlist[n];
129  KALDI_ASSERT(utt2ivector.count(utt) != 0);
130  utts_of_this_spk.Row(n).CopyFromVec(
131  *(utt2ivector.find(utt)->second));
132  }
133  stats.AccStats(utts_of_this_spk);
134  }
135 
136  KALDI_LOG << "Stats have " << stats.Info();
137  KALDI_ASSERT(!stats.Empty());
138  KALDI_ASSERT(!stats.SingularTotCovar() &&
139  "Too little data for iVector dimension.");
140 
141 
142  SpMatrix<double> total_covar;
143  stats.GetTotalCovar(&total_covar);
144  SpMatrix<double> within_covar;
145  stats.GetWithinCovar(&within_covar);
146 
147 
148  SpMatrix<double> mat_to_normalize(dim);
149  mat_to_normalize.AddSp(total_covariance_factor, total_covar);
150  mat_to_normalize.AddSp(1.0 - total_covariance_factor, within_covar);
151 
152  Matrix<double> T(dim, dim);
153  ComputeNormalizingTransform(mat_to_normalize,
154  static_cast<double>(covariance_floor), &T);
155 
156  SpMatrix<double> between_covar(total_covar);
157  between_covar.AddSp(-1.0, within_covar);
158 
159  SpMatrix<double> between_covar_proj(dim);
160  between_covar_proj.AddMat2Sp(1.0, T, kNoTrans, between_covar, 0.0);
161 
162  Matrix<double> U(dim, dim);
163  Vector<double> s(dim);
164  between_covar_proj.Eig(&s, &U);
165  bool sort_on_absolute_value = false; // any negative ones will go last (they
166  // shouldn't exist anyway so doesn't
167  // really matter)
168  SortSvd(&s, &U, static_cast<Matrix<double>*>(NULL),
169  sort_on_absolute_value);
170 
171  KALDI_LOG << "Singular values of between-class covariance after projecting "
172  << "with interpolated [total/within] covariance with a weight of "
173  << total_covariance_factor << " on the total covariance, are: " << s;
174 
175  // U^T is the transform that will diagonalize the between-class covariance.
176  // U_part is just the part of U that corresponds to the kept dimensions.
177  SubMatrix<double> U_part(U, 0, dim, 0, lda_dim);
178 
179  // We first transform by T and then by U_part^T. This means T
180  // goes on the right.
181  Matrix<double> temp(lda_dim, dim);
182  temp.AddMatMat(1.0, U_part, kTrans, T, kNoTrans, 0.0);
183  lda_out->CopyFromMat(temp);
184 }
185 
187  std::map<std::string, Vector<BaseFloat> *> utt2ivector,
188  Vector<BaseFloat> *mean_out) {
189  int32 dim = utt2ivector.begin()->second->Dim();
190  size_t num_ivectors = utt2ivector.size();
191  Vector<double> mean(dim);
192  std::map<std::string, Vector<BaseFloat> *>::iterator iter;
193  for (iter = utt2ivector.begin(); iter != utt2ivector.end(); ++iter)
194  mean.AddVec(1.0 / num_ivectors, *(iter->second));
195  mean_out->Resize(dim);
196  mean_out->CopyFromVec(mean);
197  for (iter = utt2ivector.begin(); iter != utt2ivector.end(); ++iter)
198  iter->second->AddVec(-1.0, *mean_out);
199 }
200 
201 
202 
203 }
204 
205 int main(int argc, char *argv[]) {
206  using namespace kaldi;
207  typedef kaldi::int32 int32;
208  try {
209  const char *usage =
210  "Compute an LDA matrix for iVector system. Reads in iVectors per utterance,\n"
211  "and an utt2spk file which it uses to help work out the within-speaker and\n"
212  "between-speaker covariance matrices. Outputs an LDA projection to a\n"
213  "specified dimension. By default it will normalize so that the projected\n"
214  "within-class covariance is unit, but if you set --normalize-total-covariance\n"
215  "to true, it will normalize the total covariance.\n"
216  "Note: the transform we produce is actually an affine transform which will\n"
217  "also set the global mean to zero.\n"
218  "\n"
219  "Usage: ivector-compute-lda [options] <ivector-rspecifier> <utt2spk-rspecifier> "
220  "<lda-matrix-out>\n"
221  "e.g.: \n"
222  " ivector-compute-lda ark:ivectors.ark ark:utt2spk lda.mat\n";
223 
224  ParseOptions po(usage);
225 
226  int32 lda_dim = 100; // Dimension we reduce to
227  BaseFloat total_covariance_factor = 0.0,
228  covariance_floor = 1.0e-06;
229  bool binary = true;
230 
231  po.Register("dim", &lda_dim, "Dimension we keep with the LDA transform");
232  po.Register("total-covariance-factor", &total_covariance_factor,
233  "If this is 0.0 we normalize to make the within-class covariance "
234  "unit; if 1.0, the total covariance; if between, we normalize "
235  "an interpolated matrix.");
236  po.Register("covariance-floor", &covariance_floor, "Floor the eigenvalues "
237  "of the interpolated covariance matrix to the product of its "
238  "largest eigenvalue and this number.");
239  po.Register("binary", &binary, "Write output in binary mode");
240 
241  po.Read(argc, argv);
242 
243  if (po.NumArgs() != 3) {
244  po.PrintUsage();
245  exit(1);
246  }
247 
248  std::string ivector_rspecifier = po.GetArg(1),
249  utt2spk_rspecifier = po.GetArg(2),
250  lda_wxfilename = po.GetArg(3);
251 
252  KALDI_ASSERT(covariance_floor >= 0.0);
253 
254  int32 num_done = 0, num_err = 0, dim = 0;
255 
256  SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier);
257  RandomAccessTokenReader utt2spk_reader(utt2spk_rspecifier);
258 
259  std::map<std::string, Vector<BaseFloat> *> utt2ivector;
260  std::map<std::string, std::vector<std::string> > spk2utt;
261 
262  for (; !ivector_reader.Done(); ivector_reader.Next()) {
263  std::string utt = ivector_reader.Key();
264  const Vector<BaseFloat> &ivector = ivector_reader.Value();
265  if (utt2ivector.count(utt) != 0) {
266  KALDI_WARN << "Duplicate iVector found for utterance " << utt
267  << ", ignoring it.";
268  num_err++;
269  continue;
270  }
271  if (!utt2spk_reader.HasKey(utt)) {
272  KALDI_WARN << "utt2spk has no entry for utterance " << utt
273  << ", skipping it.";
274  num_err++;
275  continue;
276  }
277  std::string spk = utt2spk_reader.Value(utt);
278  utt2ivector[utt] = new Vector<BaseFloat>(ivector);
279  if (dim == 0) {
280  dim = ivector.Dim();
281  } else {
282  KALDI_ASSERT(dim == ivector.Dim() && "iVector dimension mismatch");
283  }
284  spk2utt[spk].push_back(utt);
285  num_done++;
286  }
287 
288  KALDI_LOG << "Read " << num_done << " utterances, "
289  << num_err << " with errors.";
290 
291  if (num_done == 0) {
292  KALDI_ERR << "Did not read any utterances.";
293  } else {
294  KALDI_LOG << "Computing within-class covariance.";
295  }
296 
297  Vector<BaseFloat> mean;
298  ComputeAndSubtractMean(utt2ivector, &mean);
299  KALDI_LOG << "2-norm of iVector mean is " << mean.Norm(2.0);
300 
301 
302  Matrix<BaseFloat> lda_mat(lda_dim, dim + 1); // LDA matrix without the offset term.
303  SubMatrix<BaseFloat> linear_part(lda_mat, 0, lda_dim, 0, dim);
304  ComputeLdaTransform(utt2ivector,
305  spk2utt,
306  total_covariance_factor,
307  covariance_floor,
308  &linear_part);
309  Vector<BaseFloat> offset(lda_dim);
310  offset.AddMatVec(-1.0, linear_part, kNoTrans, mean, 0.0);
311  lda_mat.CopyColFromVec(offset, dim); // add mean-offset to transform
312 
313  KALDI_VLOG(2) << "2-norm of transformed iVector mean is "
314  << offset.Norm(2.0);
315 
316  WriteKaldiObject(lda_mat, lda_wxfilename, binary);
317 
318  KALDI_LOG << "Wrote LDA transform to "
319  << PrintableWxfilename(lda_wxfilename);
320 
321  std::map<std::string, Vector<BaseFloat> *>::iterator iter;
322  for (iter = utt2ivector.begin(); iter != utt2ivector.end(); ++iter)
323  delete iter->second;
324  utt2ivector.clear();
325 
326  return 0;
327  } catch(const std::exception &e) {
328  std::cerr << e.what();
329  return -1;
330  }
331 }
void AddMat2(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const Real beta)
rank-N update: if (transM == kNoTrans) (*this) = beta*(*this) + alpha * M * M^T, or (if transM == kTr...
Definition: sp-matrix.cc:1110
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void CopyColFromVec(const VectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix.
void Scale(Real c)
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
bool SingularTotCovar()
Will return Empty() if the within-class covariance matrix would be zero.
kaldi::int32 int32
void Eig(VectorBase< Real > *s, MatrixBase< Real > *P=NULL) const
Solves the symmetric eigenvalue problem: at end we should have (*this) = P * diag(s) * P^T...
Definition: qr.cc:433
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
MatrixIndexT NumRows() const
KALDI_DISALLOW_COPY_AND_ASSIGN(CovarianceStats)
int main(int argc, char *argv[])
Real Norm(Real p) const
Compute the p-th norm of the vector.
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
static void ComputeNormalizingTransform(const SpMatrix< Real > &covar, MatrixBase< Real > *proj)
This function computes a projection matrix that when applied makes the covariance unit (i...
Definition: plda.cc:46
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=nullptr)
Applies floor to all elements.
Definition: kaldi-vector.h:149
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946
void ComputeLdaTransform(const std::map< std::string, Vector< BaseFloat > *> &utt2ivector, const std::map< std::string, std::vector< std::string > > &spk2utt, BaseFloat total_covariance_factor, BaseFloat covariance_floor, MatrixBase< BaseFloat > *lda_out)
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
const T & Value(const std::string &key)
void GetWithinCovar(SpMatrix< double > *within_covar)
void AddSp(const Real alpha, const SpMatrix< Real > &Ma)
Definition: sp-matrix.h:211
void GetTotalCovar(SpMatrix< double > *tot_covar) const
get total covariance, normalized per number of frames.
struct rnnlm::@11::@12 n
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
void ComputeAndSubtractMean(std::map< std::string, Vector< BaseFloat > *> utt2ivector, Vector< BaseFloat > *mean_out)
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
#define KALDI_WARN
Definition: kaldi-error.h:150
SpMatrix< double > tot_covar_
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
bool HasKey(const std::string &key)
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
SpMatrix< double > between_covar_
void AccStats(const Matrix< double > &utts_of_this_spk)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void AddMat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
Extension of rank-N update: this <– beta*this + alpha * M * A * M^T.
Definition: sp-matrix.cc:982
void ApplyPow(Real power)
Take all elements of vector to a power.
Definition: kaldi-vector.h:179
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
Definition: kaldi-io.h:257
std::string PrintableWxfilename(const std::string &wxfilename)
PrintableWxfilename turns the wxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:73
void AddStats(const CovarianceStats &other)
void AddDiagVecMat(const Real alpha, const VectorBase< Real > &v, const MatrixBase< Real > &M, MatrixTransposeType transM, Real beta=1.0)
*this = beta * *this + alpha * diag(v) * M [or M^T].
#define KALDI_LOG
Definition: kaldi-error.h:153
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Sub-matrix representation.
Definition: kaldi-matrix.h:988
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.