doc/ivector-plda-scoring-dense_8cc_source.html

 // ivectorbin/ivector-plda-scoring-dense.cc

 // Copyright 2016-2018  David Snyder
 //           2017-2018  Matthew Maciejewski

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
 #include "util/stl-utils.h"
 #include "ivector/plda.h"

 namespace kaldi {

 bool EstPca(const Matrix<BaseFloat> &ivector_mat, BaseFloat target_energy,
   const std::string &reco, Matrix<BaseFloat> *mat) {

   // If the target_energy is 1.0, it's equivalent to not applying the
   // conversation-dependent PCA at all, so it's better to exit this
   // function before doing any computation.
   if (ApproxEqual(target_energy, 1.0, 0.001))
     return false;

   int32 num_rows = ivector_mat.NumRows(),
     num_cols = ivector_mat.NumCols();
   Vector<BaseFloat> sum;
   SpMatrix<BaseFloat> sumsq;
   sum.Resize(num_cols);
   sumsq.Resize(num_cols);
   sum.AddRowSumMat(1.0, ivector_mat);
   sumsq.AddMat2(1.0, ivector_mat, kTrans, 1.0);
   sum.Scale(1.0 / num_rows);
   sumsq.Scale(1.0 / num_rows);
   sumsq.AddVec2(-1.0, sum); // now sumsq is centered covariance.
   int32 full_dim = sum.Dim();

   Matrix<BaseFloat> P(full_dim, full_dim);
   Vector<BaseFloat> s(full_dim);

   try {
     if (num_rows > num_cols)
       sumsq.Eig(&s, &P);
     else
       Matrix<BaseFloat>(sumsq).Svd(&s, &P, NULL);
   } catch (...) {
     KALDI_WARN << "Unable to compute conversation dependent PCA for"
       << " recording " << reco << ".";
     return false;
   }

   SortSvd(&s, &P);

   Matrix<BaseFloat> transform(P, kTrans); // Transpose of P.  This is what
                                        // appears in the transform.

   // We want the PCA transform to retain target_energy amount of the total
   // energy.
   BaseFloat total_energy = s.Sum();
   BaseFloat energy = 0.0;
   int32 dim = 1;
   while (energy / total_energy <= target_energy) {
     energy += s(dim-1);
     dim++;
   }
   Matrix<BaseFloat> transform_float(transform);
   mat->Resize(transform.NumCols(), transform.NumRows());
   mat->CopyFromMat(transform);
   mat->Resize(dim, transform_float.NumCols(), kCopyData);
   return true;
 }

 // Transforms i-vectors using the PLDA model.
 void TransformIvectors(const Matrix<BaseFloat> &ivectors_in,
   const PldaConfig &plda_config, const Plda &plda,
   Matrix<BaseFloat> *ivectors_out) {
   int32 dim = plda.Dim();
   ivectors_out->Resize(ivectors_in.NumRows(), dim);
   for (int32 i = 0; i < ivectors_in.NumRows(); i++) {
     Vector<BaseFloat> transformed_ivector(dim);
     plda.TransformIvector(plda_config, ivectors_in.Row(i), 1.0,
       &transformed_ivector);
     ivectors_out->Row(i).CopyFromVec(transformed_ivector);
   }
 }

 // Transform the i-vectors using the recording-dependent PCA matrix.
 void ApplyPca(const Matrix<BaseFloat> &ivectors_in,
   const Matrix<BaseFloat> &pca_mat, Matrix<BaseFloat> *ivectors_out) {
   int32 transform_cols = pca_mat.NumCols(),
         transform_rows = pca_mat.NumRows(),
         feat_dim = ivectors_in.NumCols();
   ivectors_out->Resize(ivectors_in.NumRows(), transform_rows);
   KALDI_ASSERT(transform_cols == feat_dim);
   ivectors_out->AddMatMat(1.0, ivectors_in, kNoTrans,
     pca_mat, kTrans, 0.0);
 }

 } // namespace kaldi

 int main(int argc, char *argv[]) {
   using namespace kaldi;
   typedef kaldi::int32 int32;
   try {
     const char *usage =
       "Perform PLDA scoring for speaker diarization.  The input reco2utt\n"
       "should be of the form <recording-id> <seg1> <seg2> ... <segN> and\n"
       "there should be one iVector for each segment.  PLDA scoring is\n"
       "performed between all pairs of iVectors in a recording and outputs\n"
       "an archive of score matrices, one for each recording-id.  The rows\n"
       "and columns of the the matrix correspond the sorted order of the\n"
       "segments.\n"
       "Usage: ivector-plda-scoring-dense [options] <plda> <reco2utt>"
       " <ivectors-rspecifier> <scores-wspecifier>\n"
       "e.g.: \n"
       "  ivector-plda-scoring-dense plda reco2utt scp:ivectors.scp"
       " ark:scores.ark ark,t:ivectors.1.ark\n";

     ParseOptions po(usage);
     BaseFloat target_energy = 0.5;
     PldaConfig plda_config;
     plda_config.Register(&po);

     po.Register("target-energy", &target_energy,
       "Reduce dimensionality of i-vectors using a recording-dependent"
       " PCA such that this fraction of the total energy remains.");
     KALDI_ASSERT(target_energy <= 1.0);

     po.Read(argc, argv);

     if (po.NumArgs() != 4) {
       po.PrintUsage();
       exit(1);
     }

     std::string plda_rxfilename = po.GetArg(1),
       reco2utt_rspecifier = po.GetArg(2),
       ivector_rspecifier = po.GetArg(3),
       scores_wspecifier = po.GetArg(4);

     Plda plda;
     ReadKaldiObject(plda_rxfilename, &plda);

     SequentialTokenVectorReader reco2utt_reader(reco2utt_rspecifier);
     RandomAccessBaseFloatVectorReader ivector_reader(ivector_rspecifier);
     BaseFloatMatrixWriter scores_writer(scores_wspecifier);
     int32 num_reco_err = 0,
           num_reco_done = 0;
     for (; !reco2utt_reader.Done(); reco2utt_reader.Next()) {
       Plda this_plda(plda);
       std::string reco = reco2utt_reader.Key();

       std::vector<std::string> uttlist = reco2utt_reader.Value();
       std::vector<Vector<BaseFloat> > ivectors;

       for (size_t i = 0; i < uttlist.size(); i++) {
         std::string utt = uttlist[i];

         if (!ivector_reader.HasKey(utt)) {
           KALDI_ERR << "No iVector present in input for utterance " << utt;
         }

         Vector<BaseFloat> ivector = ivector_reader.Value(utt);
         ivectors.push_back(ivector);
       }
       if (ivectors.size() == 0) {
         KALDI_WARN << "Not producing output for recording " << reco
                    << " since no segments had iVectors";
         num_reco_err++;
       } else {
         Matrix<BaseFloat> ivector_mat(ivectors.size(), ivectors[0].Dim()),
                           ivector_mat_pca,
                           ivector_mat_plda,
                           pca_transform,
                           scores(ivectors.size(), ivectors.size());

         for (size_t i = 0; i < ivectors.size(); i++) {
           ivector_mat.Row(i).CopyFromVec(ivectors[i]);
         }
         if (EstPca(ivector_mat, target_energy, reco, &pca_transform)) {
           // Apply the PCA transform to the raw i-vectors.
           ApplyPca(ivector_mat, pca_transform, &ivector_mat_pca);

           // Apply the PCA transform to the parameters of the PLDA model.
           this_plda.ApplyTransform(Matrix<double>(pca_transform));

           // Now transform the i-vectors using the reduced PLDA model.
           TransformIvectors(ivector_mat_pca, plda_config, this_plda,
             &ivector_mat_plda);
         } else {
           // If EstPca returns false, we won't apply any PCA.
           TransformIvectors(ivector_mat, plda_config, this_plda,
           &ivector_mat_plda);
         }
         for (int32 i = 0; i < ivector_mat_plda.NumRows(); i++) {
           for (int32 j = 0; j < ivector_mat_plda.NumRows(); j++) {
             scores(i, j) = this_plda.LogLikelihoodRatio(Vector<double>(
               ivector_mat_plda.Row(i)), 1.0,
               Vector<double>(ivector_mat_plda.Row(j)));
           }
         }
         scores_writer.Write(reco, scores);
         num_reco_done++;
       }
     }
     KALDI_LOG << "Processed " << num_reco_done << " recordings, "
               << num_reco_err << " had errors.";
     return (num_reco_done != 0 ? 0 : 1 );
   } catch(const std::exception &e) {
     std::cerr << e.what();
     return -1;
   }
 }
kaldi::SpMatrix::AddMat2
void AddMat2(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const Real beta)
rank-N update: if (transM == kNoTrans) (*this) = beta*(*this) + alpha * M * M^T, or (if transM == kTr...
Definition: sp-matrix.cc:1110

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::SpMatrix
Packed symetric matrix class.
Definition: matrix-common.h:62

kaldi::Plda
Definition: plda.h:74

stl-utils.h

kaldi::PackedMatrix::Scale
void Scale(Real c)
Definition: packed-matrix.cc:33

rnnlm::j
int j
Definition: mikolov-rnnlm-lib.cc:66

kaldi::VectorBase::AddRowSumMat
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
Definition: kaldi-vector.cc:714

kaldi::MatrixBase::NumCols
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67

kaldi::ParseOptions::PrintUsage
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
Definition: parse-options.cc:393

kaldi::SequentialTableReader::Key
std::string Key()
Definition: kaldi-table-inl.h:918

kaldi::Plda::TransformIvector
double TransformIvector(const PldaConfig &config, const VectorBase< double > &ivector, int32 num_enroll_examples, VectorBase< double > *transformed_ivector) const
Transforms an iVector into a space where the within-class variance is unit and between-class variance...
Definition: plda.cc:120

kaldi::TableWriter
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

common-utils.h

kaldi::SpMatrix::Eig
void Eig(VectorBase< Real > *s, MatrixBase< Real > *P=NULL) const
Solves the symmetric eigenvalue problem: at end we should have (*this) = P * diag(s) * P^T...
Definition: qr.cc:433

kaldi::Matrix< BaseFloat >

kaldi::Vector::Resize
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
Definition: kaldi-vector.cc:190

kaldi::MatrixBase::CopyFromMat
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Definition: kaldi-matrix.cc:862

kaldi::TableWriter::Write
void Write(const std::string &key, const T &value) const
Definition: kaldi-table-inl.h:1511

kaldi::ParseOptions::Register
void Register(const std::string &name, bool *ptr, const std::string &doc)
Definition: parse-options.cc:56

kaldi::ReadKaldiObject
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832

kaldi::RandomAccessTableReader
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233

kaldi::kTrans
Definition: matrix-common.h:33

kaldi::SpMatrix::AddVec2
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946

kaldi::BaseFloat
float BaseFloat
Definition: kaldi-types.h:29

kaldi::ParseOptions
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36

kaldi::MatrixBase::Row
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188

float

main
int main(int argc, char *argv[])
Definition: ivector-plda-scoring-dense.cc:114

kaldi::RandomAccessTableReader::Value
const T & Value(const std::string &key)
Definition: kaldi-table-inl.h:2561

kaldi::EstPca
bool EstPca(const Matrix< BaseFloat > &ivector_mat, BaseFloat target_energy, const std::string &reco, Matrix< BaseFloat > *mat)
Definition: ivector-plda-scoring-dense.cc:29

kaldi::SequentialTableReader
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287

kaldi::MatrixBase::AddMatMat
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
Definition: kaldi-matrix.cc:171

kaldi::ParseOptions::Read
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
Definition: parse-options.cc:311

kaldi::SequentialTableReader::Done
bool Done()
Definition: kaldi-table-inl.h:948

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::kNoTrans
Definition: matrix-common.h:34

kaldi::ParseOptions::GetArg
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
Definition: parse-options.cc:202

KALDI_WARN
#define KALDI_WARN
Definition: kaldi-error.h:150

kaldi::SequentialTableReader::Next
void Next()
Definition: kaldi-table-inl.h:942

kaldi::Plda::LogLikelihoodRatio
double LogLikelihoodRatio(const VectorBase< double > &transformed_enroll_ivector, int32 num_enroll_utts, const VectorBase< double > &transformed_test_ivector) const
Returns the log-likelihood ratio log (p(test_ivector | same) / p(test_ivector | different)).
Definition: plda.cc:153

kaldi::VectorBase::Dim
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64

kaldi::VectorBase::Scale
void Scale(Real alpha)
Multiplies all elements by this constant.
Definition: kaldi-vector.cc:963

kaldi::PldaConfig::Register
void Register(OptionsItf *opts)
Definition: plda.h:56

kaldi::RandomAccessTableReader::HasKey
bool HasKey(const std::string &key)
Definition: kaldi-table-inl.h:2551

kaldi::kCopyData
Definition: matrix-common.h:40

kaldi::VectorBase::Sum
Real Sum() const
Returns sum of the elements.
Definition: kaldi-vector.cc:688

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::ParseOptions::NumArgs
int NumArgs() const
Number of positional parameters (c.f. argc-1).
Definition: parse-options.cc:198

kaldi::TransformIvectors
void TransformIvectors(const Matrix< BaseFloat > &ivectors_in, const PldaConfig &plda_config, const Plda &plda, Matrix< BaseFloat > *ivectors_out)
Definition: ivector-plda-scoring-dense.cc:87

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::SequentialTableReader::Value
T & Value()
Definition: kaldi-table-inl.h:934

kaldi::Plda::ApplyTransform
void ApplyTransform(const Matrix< double > &in_transform)
Apply a transform to the PLDA model.
Definition: plda.cc:220

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::MatrixBase::NumRows
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64

plda.h

kaldi::Matrix::Resize
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Definition: kaldi-matrix.cc:819

kaldi::ApplyPca
void ApplyPca(const Matrix< BaseFloat > &ivectors_in, const Matrix< BaseFloat > &pca_mat, Matrix< BaseFloat > *ivectors_out)
Definition: ivector-plda-scoring-dense.cc:101

kaldi::SpMatrix::Resize
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Definition: sp-matrix.h:81

kaldi::Plda::Dim
int32 Dim() const
Definition: plda.h:140

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

kaldi-common.h

kaldi::ApproxEqual
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265

kaldi::PldaConfig
Definition: plda.h:50

kaldi::SortSvd
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.
Definition: kaldi-matrix.cc:2580