ivector-plda-scoring-dense.cc
Go to the documentation of this file.
1 // ivectorbin/ivector-plda-scoring-dense.cc
2 
3 // Copyright 2016-2018 David Snyder
4 // 2017-2018 Matthew Maciejewski
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #include "base/kaldi-common.h"
23 #include "util/common-utils.h"
24 #include "util/stl-utils.h"
25 #include "ivector/plda.h"
26 
27 namespace kaldi {
28 
29 bool EstPca(const Matrix<BaseFloat> &ivector_mat, BaseFloat target_energy,
30  const std::string &reco, Matrix<BaseFloat> *mat) {
31 
32  // If the target_energy is 1.0, it's equivalent to not applying the
33  // conversation-dependent PCA at all, so it's better to exit this
34  // function before doing any computation.
35  if (ApproxEqual(target_energy, 1.0, 0.001))
36  return false;
37 
38  int32 num_rows = ivector_mat.NumRows(),
39  num_cols = ivector_mat.NumCols();
41  SpMatrix<BaseFloat> sumsq;
42  sum.Resize(num_cols);
43  sumsq.Resize(num_cols);
44  sum.AddRowSumMat(1.0, ivector_mat);
45  sumsq.AddMat2(1.0, ivector_mat, kTrans, 1.0);
46  sum.Scale(1.0 / num_rows);
47  sumsq.Scale(1.0 / num_rows);
48  sumsq.AddVec2(-1.0, sum); // now sumsq is centered covariance.
49  int32 full_dim = sum.Dim();
50 
51  Matrix<BaseFloat> P(full_dim, full_dim);
52  Vector<BaseFloat> s(full_dim);
53 
54  try {
55  if (num_rows > num_cols)
56  sumsq.Eig(&s, &P);
57  else
58  Matrix<BaseFloat>(sumsq).Svd(&s, &P, NULL);
59  } catch (...) {
60  KALDI_WARN << "Unable to compute conversation dependent PCA for"
61  << " recording " << reco << ".";
62  return false;
63  }
64 
65  SortSvd(&s, &P);
66 
67  Matrix<BaseFloat> transform(P, kTrans); // Transpose of P. This is what
68  // appears in the transform.
69 
70  // We want the PCA transform to retain target_energy amount of the total
71  // energy.
72  BaseFloat total_energy = s.Sum();
73  BaseFloat energy = 0.0;
74  int32 dim = 1;
75  while (energy / total_energy <= target_energy) {
76  energy += s(dim-1);
77  dim++;
78  }
79  Matrix<BaseFloat> transform_float(transform);
80  mat->Resize(transform.NumCols(), transform.NumRows());
81  mat->CopyFromMat(transform);
82  mat->Resize(dim, transform_float.NumCols(), kCopyData);
83  return true;
84 }
85 
86 // Transforms i-vectors using the PLDA model.
87 void TransformIvectors(const Matrix<BaseFloat> &ivectors_in,
88  const PldaConfig &plda_config, const Plda &plda,
89  Matrix<BaseFloat> *ivectors_out) {
90  int32 dim = plda.Dim();
91  ivectors_out->Resize(ivectors_in.NumRows(), dim);
92  for (int32 i = 0; i < ivectors_in.NumRows(); i++) {
93  Vector<BaseFloat> transformed_ivector(dim);
94  plda.TransformIvector(plda_config, ivectors_in.Row(i), 1.0,
95  &transformed_ivector);
96  ivectors_out->Row(i).CopyFromVec(transformed_ivector);
97  }
98 }
99 
100 // Transform the i-vectors using the recording-dependent PCA matrix.
101 void ApplyPca(const Matrix<BaseFloat> &ivectors_in,
102  const Matrix<BaseFloat> &pca_mat, Matrix<BaseFloat> *ivectors_out) {
103  int32 transform_cols = pca_mat.NumCols(),
104  transform_rows = pca_mat.NumRows(),
105  feat_dim = ivectors_in.NumCols();
106  ivectors_out->Resize(ivectors_in.NumRows(), transform_rows);
107  KALDI_ASSERT(transform_cols == feat_dim);
108  ivectors_out->AddMatMat(1.0, ivectors_in, kNoTrans,
109  pca_mat, kTrans, 0.0);
110 }
111 
112 } // namespace kaldi
113 
114 int main(int argc, char *argv[]) {
115  using namespace kaldi;
116  typedef kaldi::int32 int32;
117  try {
118  const char *usage =
119  "Perform PLDA scoring for speaker diarization. The input reco2utt\n"
120  "should be of the form <recording-id> <seg1> <seg2> ... <segN> and\n"
121  "there should be one iVector for each segment. PLDA scoring is\n"
122  "performed between all pairs of iVectors in a recording and outputs\n"
123  "an archive of score matrices, one for each recording-id. The rows\n"
124  "and columns of the the matrix correspond the sorted order of the\n"
125  "segments.\n"
126  "Usage: ivector-plda-scoring-dense [options] <plda> <reco2utt>"
127  " <ivectors-rspecifier> <scores-wspecifier>\n"
128  "e.g.: \n"
129  " ivector-plda-scoring-dense plda reco2utt scp:ivectors.scp"
130  " ark:scores.ark ark,t:ivectors.1.ark\n";
131 
132  ParseOptions po(usage);
133  BaseFloat target_energy = 0.5;
134  PldaConfig plda_config;
135  plda_config.Register(&po);
136 
137  po.Register("target-energy", &target_energy,
138  "Reduce dimensionality of i-vectors using a recording-dependent"
139  " PCA such that this fraction of the total energy remains.");
140  KALDI_ASSERT(target_energy <= 1.0);
141 
142  po.Read(argc, argv);
143 
144  if (po.NumArgs() != 4) {
145  po.PrintUsage();
146  exit(1);
147  }
148 
149  std::string plda_rxfilename = po.GetArg(1),
150  reco2utt_rspecifier = po.GetArg(2),
151  ivector_rspecifier = po.GetArg(3),
152  scores_wspecifier = po.GetArg(4);
153 
154  Plda plda;
155  ReadKaldiObject(plda_rxfilename, &plda);
156 
157  SequentialTokenVectorReader reco2utt_reader(reco2utt_rspecifier);
158  RandomAccessBaseFloatVectorReader ivector_reader(ivector_rspecifier);
159  BaseFloatMatrixWriter scores_writer(scores_wspecifier);
160  int32 num_reco_err = 0,
161  num_reco_done = 0;
162  for (; !reco2utt_reader.Done(); reco2utt_reader.Next()) {
163  Plda this_plda(plda);
164  std::string reco = reco2utt_reader.Key();
165 
166  std::vector<std::string> uttlist = reco2utt_reader.Value();
167  std::vector<Vector<BaseFloat> > ivectors;
168 
169  for (size_t i = 0; i < uttlist.size(); i++) {
170  std::string utt = uttlist[i];
171 
172  if (!ivector_reader.HasKey(utt)) {
173  KALDI_ERR << "No iVector present in input for utterance " << utt;
174  }
175 
176  Vector<BaseFloat> ivector = ivector_reader.Value(utt);
177  ivectors.push_back(ivector);
178  }
179  if (ivectors.size() == 0) {
180  KALDI_WARN << "Not producing output for recording " << reco
181  << " since no segments had iVectors";
182  num_reco_err++;
183  } else {
184  Matrix<BaseFloat> ivector_mat(ivectors.size(), ivectors[0].Dim()),
185  ivector_mat_pca,
186  ivector_mat_plda,
187  pca_transform,
188  scores(ivectors.size(), ivectors.size());
189 
190  for (size_t i = 0; i < ivectors.size(); i++) {
191  ivector_mat.Row(i).CopyFromVec(ivectors[i]);
192  }
193  if (EstPca(ivector_mat, target_energy, reco, &pca_transform)) {
194  // Apply the PCA transform to the raw i-vectors.
195  ApplyPca(ivector_mat, pca_transform, &ivector_mat_pca);
196 
197  // Apply the PCA transform to the parameters of the PLDA model.
198  this_plda.ApplyTransform(Matrix<double>(pca_transform));
199 
200  // Now transform the i-vectors using the reduced PLDA model.
201  TransformIvectors(ivector_mat_pca, plda_config, this_plda,
202  &ivector_mat_plda);
203  } else {
204  // If EstPca returns false, we won't apply any PCA.
205  TransformIvectors(ivector_mat, plda_config, this_plda,
206  &ivector_mat_plda);
207  }
208  for (int32 i = 0; i < ivector_mat_plda.NumRows(); i++) {
209  for (int32 j = 0; j < ivector_mat_plda.NumRows(); j++) {
210  scores(i, j) = this_plda.LogLikelihoodRatio(Vector<double>(
211  ivector_mat_plda.Row(i)), 1.0,
212  Vector<double>(ivector_mat_plda.Row(j)));
213  }
214  }
215  scores_writer.Write(reco, scores);
216  num_reco_done++;
217  }
218  }
219  KALDI_LOG << "Processed " << num_reco_done << " recordings, "
220  << num_reco_err << " had errors.";
221  return (num_reco_done != 0 ? 0 : 1 );
222  } catch(const std::exception &e) {
223  std::cerr << e.what();
224  return -1;
225  }
226 }
void AddMat2(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const Real beta)
rank-N update: if (transM == kNoTrans) (*this) = beta*(*this) + alpha * M * M^T, or (if transM == kTr...
Definition: sp-matrix.cc:1110
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Packed symetric matrix class.
Definition: matrix-common.h:62
void Scale(Real c)
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
double TransformIvector(const PldaConfig &config, const VectorBase< double > &ivector, int32 num_enroll_examples, VectorBase< double > *transformed_ivector) const
Transforms an iVector into a space where the within-class variance is unit and between-class variance...
Definition: plda.cc:120
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Eig(VectorBase< Real > *s, MatrixBase< Real > *P=NULL) const
Solves the symmetric eigenvalue problem: at end we should have (*this) = P * diag(s) * P^T...
Definition: qr.cc:433
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
int main(int argc, char *argv[])
const T & Value(const std::string &key)
bool EstPca(const Matrix< BaseFloat > &ivector_mat, BaseFloat target_energy, const std::string &reco, Matrix< BaseFloat > *mat)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
#define KALDI_WARN
Definition: kaldi-error.h:150
double LogLikelihoodRatio(const VectorBase< double > &transformed_enroll_ivector, int32 num_enroll_utts, const VectorBase< double > &transformed_test_ivector) const
Returns the log-likelihood ratio log (p(test_ivector | same) / p(test_ivector | different)).
Definition: plda.cc:153
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void Scale(Real alpha)
Multiplies all elements by this constant.
void Register(OptionsItf *opts)
Definition: plda.h:56
bool HasKey(const std::string &key)
Real Sum() const
Returns sum of the elements.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void TransformIvectors(const Matrix< BaseFloat > &ivectors_in, const PldaConfig &plda_config, const Plda &plda, Matrix< BaseFloat > *ivectors_out)
A class representing a vector.
Definition: kaldi-vector.h:406
void ApplyTransform(const Matrix< double > &in_transform)
Apply a transform to the PLDA model.
Definition: plda.cc:220
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void ApplyPca(const Matrix< BaseFloat > &ivectors_in, const Matrix< BaseFloat > &pca_mat, Matrix< BaseFloat > *ivectors_out)
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Definition: sp-matrix.h:81
int32 Dim() const
Definition: plda.h:140
#define KALDI_LOG
Definition: kaldi-error.h:153
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.