ivector-plda-scoring-dense.cc File Reference
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "util/stl-utils.h"
#include "ivector/plda.h"
Include dependency graph for ivector-plda-scoring-dense.cc:

Go to the source code of this file.

Namespaces

 kaldi
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:
 

Functions

bool EstPca (const Matrix< BaseFloat > &ivector_mat, BaseFloat target_energy, const std::string &reco, Matrix< BaseFloat > *mat)
 
void TransformIvectors (const Matrix< BaseFloat > &ivectors_in, const PldaConfig &plda_config, const Plda &plda, Matrix< BaseFloat > *ivectors_out)
 
void ApplyPca (const Matrix< BaseFloat > &ivectors_in, const Matrix< BaseFloat > &pca_mat, Matrix< BaseFloat > *ivectors_out)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 114 of file ivector-plda-scoring-dense.cc.

References kaldi::ApplyPca(), Plda::ApplyTransform(), SequentialTableReader< Holder >::Done(), kaldi::EstPca(), ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), rnnlm::i, rnnlm::j, KALDI_ASSERT, KALDI_ERR, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), Plda::LogLikelihoodRatio(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), ParseOptions::PrintUsage(), ParseOptions::Read(), kaldi::ReadKaldiObject(), PldaConfig::Register(), ParseOptions::Register(), MatrixBase< Real >::Row(), kaldi::TransformIvectors(), RandomAccessTableReader< Holder >::Value(), SequentialTableReader< Holder >::Value(), and TableWriter< Holder >::Write().

114  {
115  using namespace kaldi;
116  typedef kaldi::int32 int32;
117  try {
118  const char *usage =
119  "Perform PLDA scoring for speaker diarization. The input reco2utt\n"
120  "should be of the form <recording-id> <seg1> <seg2> ... <segN> and\n"
121  "there should be one iVector for each segment. PLDA scoring is\n"
122  "performed between all pairs of iVectors in a recording and outputs\n"
123  "an archive of score matrices, one for each recording-id. The rows\n"
124  "and columns of the the matrix correspond the sorted order of the\n"
125  "segments.\n"
126  "Usage: ivector-plda-scoring-dense [options] <plda> <reco2utt>"
127  " <ivectors-rspecifier> <scores-wspecifier>\n"
128  "e.g.: \n"
129  " ivector-plda-scoring-dense plda reco2utt scp:ivectors.scp"
130  " ark:scores.ark ark,t:ivectors.1.ark\n";
131 
132  ParseOptions po(usage);
133  BaseFloat target_energy = 0.5;
134  PldaConfig plda_config;
135  plda_config.Register(&po);
136 
137  po.Register("target-energy", &target_energy,
138  "Reduce dimensionality of i-vectors using a recording-dependent"
139  " PCA such that this fraction of the total energy remains.");
140  KALDI_ASSERT(target_energy <= 1.0);
141 
142  po.Read(argc, argv);
143 
144  if (po.NumArgs() != 4) {
145  po.PrintUsage();
146  exit(1);
147  }
148 
149  std::string plda_rxfilename = po.GetArg(1),
150  reco2utt_rspecifier = po.GetArg(2),
151  ivector_rspecifier = po.GetArg(3),
152  scores_wspecifier = po.GetArg(4);
153 
154  Plda plda;
155  ReadKaldiObject(plda_rxfilename, &plda);
156 
157  SequentialTokenVectorReader reco2utt_reader(reco2utt_rspecifier);
158  RandomAccessBaseFloatVectorReader ivector_reader(ivector_rspecifier);
159  BaseFloatMatrixWriter scores_writer(scores_wspecifier);
160  int32 num_reco_err = 0,
161  num_reco_done = 0;
162  for (; !reco2utt_reader.Done(); reco2utt_reader.Next()) {
163  Plda this_plda(plda);
164  std::string reco = reco2utt_reader.Key();
165 
166  std::vector<std::string> uttlist = reco2utt_reader.Value();
167  std::vector<Vector<BaseFloat> > ivectors;
168 
169  for (size_t i = 0; i < uttlist.size(); i++) {
170  std::string utt = uttlist[i];
171 
172  if (!ivector_reader.HasKey(utt)) {
173  KALDI_ERR << "No iVector present in input for utterance " << utt;
174  }
175 
176  Vector<BaseFloat> ivector = ivector_reader.Value(utt);
177  ivectors.push_back(ivector);
178  }
179  if (ivectors.size() == 0) {
180  KALDI_WARN << "Not producing output for recording " << reco
181  << " since no segments had iVectors";
182  num_reco_err++;
183  } else {
184  Matrix<BaseFloat> ivector_mat(ivectors.size(), ivectors[0].Dim()),
185  ivector_mat_pca,
186  ivector_mat_plda,
187  pca_transform,
188  scores(ivectors.size(), ivectors.size());
189 
190  for (size_t i = 0; i < ivectors.size(); i++) {
191  ivector_mat.Row(i).CopyFromVec(ivectors[i]);
192  }
193  if (EstPca(ivector_mat, target_energy, reco, &pca_transform)) {
194  // Apply the PCA transform to the raw i-vectors.
195  ApplyPca(ivector_mat, pca_transform, &ivector_mat_pca);
196 
197  // Apply the PCA transform to the parameters of the PLDA model.
198  this_plda.ApplyTransform(Matrix<double>(pca_transform));
199 
200  // Now transform the i-vectors using the reduced PLDA model.
201  TransformIvectors(ivector_mat_pca, plda_config, this_plda,
202  &ivector_mat_plda);
203  } else {
204  // If EstPca returns false, we won't apply any PCA.
205  TransformIvectors(ivector_mat, plda_config, this_plda,
206  &ivector_mat_plda);
207  }
208  for (int32 i = 0; i < ivector_mat_plda.NumRows(); i++) {
209  for (int32 j = 0; j < ivector_mat_plda.NumRows(); j++) {
210  scores(i, j) = this_plda.LogLikelihoodRatio(Vector<double>(
211  ivector_mat_plda.Row(i)), 1.0,
212  Vector<double>(ivector_mat_plda.Row(j)));
213  }
214  }
215  scores_writer.Write(reco, scores);
216  num_reco_done++;
217  }
218  }
219  KALDI_LOG << "Processed " << num_reco_done << " recordings, "
220  << num_reco_err << " had errors.";
221  return (num_reco_done != 0 ? 0 : 1 );
222  } catch(const std::exception &e) {
223  std::cerr << e.what();
224  return -1;
225  }
226 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
bool EstPca(const Matrix< BaseFloat > &ivector_mat, BaseFloat target_energy, const std::string &reco, Matrix< BaseFloat > *mat)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
void Register(OptionsItf *opts)
Definition: plda.h:56
void TransformIvectors(const Matrix< BaseFloat > &ivectors_in, const PldaConfig &plda_config, const Plda &plda, Matrix< BaseFloat > *ivectors_out)
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void ApplyPca(const Matrix< BaseFloat > &ivectors_in, const Matrix< BaseFloat > &pca_mat, Matrix< BaseFloat > *ivectors_out)
#define KALDI_LOG
Definition: kaldi-error.h:153