doc/nnet3-acc-lda-stats_8cc_source.html

 // nnet3bin/nnet3-acc-lda-stats.cc

 // Copyright 2015  Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
 #include "hmm/transition-model.h"
 #include "nnet3/nnet-nnet.h"
 #include "nnet3/nnet-example-utils.h"
 #include "nnet3/nnet-optimize.h"
 #include "transform/lda-estimate.h"


 namespace kaldi {
 namespace nnet3 {

 class NnetLdaStatsAccumulator {
  public:
   NnetLdaStatsAccumulator(BaseFloat rand_prune,
                           const Nnet &nnet):
       rand_prune_(rand_prune), nnet_(nnet), compiler_(nnet) { }

   void AccStats(const NnetExample &eg) {
     ComputationRequest request;
     bool need_backprop = false, store_stats = false;
     GetComputationRequest(nnet_, eg, need_backprop, store_stats, &request);
     const NnetComputation &computation = *(compiler_.Compile(request));
     NnetComputeOptions options;
     if (GetVerboseLevel() >= 3)
       options.debug = true;
     NnetComputer computer(options, computation, nnet_, NULL);

     computer.AcceptInputs(nnet_, eg.io);
     computer.Run();
     const CuMatrixBase<BaseFloat> &nnet_output = computer.GetOutput("output");
     AccStatsFromOutput(eg, nnet_output);
   }

   void WriteStats(const std::string &stats_wxfilename, bool binary) {
     if (lda_stats_.TotCount() == 0) {
       KALDI_ERR << "Accumulated no stats.";
     } else {
       WriteKaldiObject(lda_stats_, stats_wxfilename, binary);
       KALDI_LOG << "Accumulated stats, soft frame count = "
                 << lda_stats_.TotCount() << ".  Wrote to "
                 << stats_wxfilename;
     }
   }
  private:
   void AccStatsFromOutput(const NnetExample &eg,
                           const CuMatrixBase<BaseFloat> &nnet_output) {
     BaseFloat rand_prune = rand_prune_;
     const NnetIo *output_supervision = NULL;
     for (size_t i = 0; i < eg.io.size(); i++)
       if (eg.io[i].name == "output")
         output_supervision = &(eg.io[i]);
     KALDI_ASSERT(output_supervision != NULL && "no output in eg named 'output'");
     int32 num_rows = output_supervision->features.NumRows(),
         num_pdfs = output_supervision->features.NumCols();
     KALDI_ASSERT(num_rows == nnet_output.NumRows());
     if (lda_stats_.Dim() == 0)
       lda_stats_.Init(num_pdfs, nnet_output.NumCols());
     if (output_supervision->features.Type() == kSparseMatrix) {
       const SparseMatrix<BaseFloat> &smat =
           output_supervision->features.GetSparseMatrix();
       for (int32 r = 0; r < num_rows; r++) {
         // the following, transferring row by row to CPU, would be wasteful
         // if we actually were using a GPU, but we don't anticipate doing this
         // in this program.
         CuSubVector<BaseFloat> cu_row(nnet_output, r);
         // "row" is actually just a redudant copy, since we're likely on CPU,
         // but we're about to do an outer product, so this doesn't dominate.
         Vector<BaseFloat> row(cu_row);

         const SparseVector<BaseFloat> &post(smat.Row(r));
         const std::pair<MatrixIndexT, BaseFloat> *post_data = post.Data(),
             *post_end = post_data + post.NumElements();
         for (; post_data != post_end; ++post_data) {
           MatrixIndexT pdf = post_data->first;
           BaseFloat weight = post_data->second;
           BaseFloat pruned_weight = RandPrune(weight, rand_prune);
           if (pruned_weight != 0.0)
             lda_stats_.Accumulate(row, pdf, pruned_weight);
         }
       }
     } else {
       Matrix<BaseFloat> output_mat;
       output_supervision->features.GetMatrix(&output_mat);
       for (int32 r = 0; r < num_rows; r++) {
         // the following, transferring row by row to CPU, would be wasteful
         // if we actually were using a GPU, but we don't anticipate doing this
         // in this program.
         CuSubVector<BaseFloat> cu_row(nnet_output, r);
         // "row" is actually just a redudant copy, since we're likely on CPU,
         // but we're about to do an outer product, so this doesn't dominate.
         Vector<BaseFloat> row(cu_row);

         SubVector<BaseFloat> post(output_mat, r);
         int32 num_pdfs = post.Dim();
         for (int32 pdf = 0; pdf < num_pdfs; pdf++) {
           BaseFloat weight = post(pdf);
           BaseFloat pruned_weight = RandPrune(weight, rand_prune);
           if (pruned_weight != 0.0)
             lda_stats_.Accumulate(row, pdf, pruned_weight);
         }
       }
     }
   }

   BaseFloat rand_prune_;
   const Nnet &nnet_;
   CachingOptimizingCompiler compiler_;
   LdaEstimate lda_stats_;

 };

 }
 }

 int main(int argc, char *argv[]) {
   try {
     using namespace kaldi;
     using namespace kaldi::nnet3;
     typedef kaldi::int32 int32;
     typedef kaldi::int64 int64;

     const char *usage =
         "Accumulate statistics in the same format as acc-lda (i.e. stats for\n"
         "estimation of LDA and similar types of transform), starting from nnet\n"
         "training examples.  This program puts the features through the network,\n"
         "and the network output will be the features; the supervision in the\n"
         "training examples is used for the class labels.  Used in obtaining\n"
         "feature transforms that help nnet training work better.\n"
         "\n"
         "Usage:  nnet3-acc-lda-stats [options] <raw-nnet-in> <training-examples-in> <lda-stats-out>\n"
         "e.g.:\n"
         "nnet3-acc-lda-stats 0.raw ark:1.egs 1.acc\n"
         "See also: nnet-get-feature-transform\n";

     bool binary_write = true;
     BaseFloat rand_prune = 0.0;

     ParseOptions po(usage);
     po.Register("binary", &binary_write, "Write output in binary mode");
     po.Register("rand-prune", &rand_prune,
                 "Randomized pruning threshold for posteriors");

     po.Read(argc, argv);

     if (po.NumArgs() != 3) {
       po.PrintUsage();
       exit(1);
     }

     std::string nnet_rxfilename = po.GetArg(1),
         examples_rspecifier = po.GetArg(2),
         lda_accs_wxfilename = po.GetArg(3);

     Nnet nnet;
     ReadKaldiObject(nnet_rxfilename, &nnet);

     NnetLdaStatsAccumulator accumulator(rand_prune, nnet);

     int64 num_egs = 0;

     SequentialNnetExampleReader example_reader(examples_rspecifier);
     for (; !example_reader.Done(); example_reader.Next(), num_egs++)
       accumulator.AccStats(example_reader.Value());

     KALDI_LOG << "Processed " << num_egs << " examples.";
     // the next command will die if we accumulated no stats.
     accumulator.WriteStats(lda_accs_wxfilename, binary_write);

     return 0;
   } catch(const std::exception &e) {
     std::cerr << e.what() << '\n';
     return -1;
   }
 }
kaldi::nnet3::NnetExample
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
Definition: nnet-example.h:111

kaldi::LdaEstimate::Accumulate
void Accumulate(const VectorBase< BaseFloat > &data, int32 class_id, BaseFloat weight=1.0)
Accumulates data.
Definition: lda-estimate.cc:45

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet3::NnetIo
Definition: nnet-example.h:33

main
int main(int argc, char *argv[])
Definition: nnet3-acc-lda-stats.cc:135

kaldi::nnet3::NnetLdaStatsAccumulator::nnet_
const Nnet & nnet_
Definition: nnet3-acc-lda-stats.cc:126

nnet-nnet.h

kaldi::LdaEstimate
Class for computing linear discriminant analysis (LDA) transform.
Definition: lda-estimate.h:57

kaldi::GeneralMatrix::GetMatrix
void GetMatrix(Matrix< BaseFloat > *mat) const
Outputs the contents as a matrix.
Definition: sparse-matrix.cc:817

kaldi::LdaEstimate::Dim
int32 Dim() const
Returns the dimensionality of the feature vectors.
Definition: lda-estimate.h:66

kaldi::ParseOptions::PrintUsage
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
Definition: parse-options.cc:393

kaldi::GetVerboseLevel
int32 GetVerboseLevel()
Get verbosity level, usually set via command line &#39;–verbose=&#39; switch.
Definition: kaldi-error.h:60

kaldi::RandPrune
Float RandPrune(Float post, BaseFloat prune_thresh, struct RandomState *state=NULL)
Definition: kaldi-math.h:174

kaldi::nnet3::CachingOptimizingCompiler
This class enables you to do the compilation and optimization in one call, and also ensures that if t...
Definition: nnet-optimize.h:219

kaldi::nnet3::NnetComputeOptions::debug
bool debug
Definition: nnet-compute.h:40

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

common-utils.h

kaldi::nnet3::NnetIo::features
GeneralMatrix features
The features or labels.
Definition: nnet-example.h:46

kaldi::Matrix< BaseFloat >

kaldi::nnet3::NnetLdaStatsAccumulator::rand_prune_
BaseFloat rand_prune_
Definition: nnet3-acc-lda-stats.cc:125

kaldi::LdaEstimate::Init
void Init(int32 num_classes, int32 dimension)
Allocates memory for accumulators.
Definition: lda-estimate.cc:26

kaldi::nnet3
Definition: dnn3_code_compilation.dox:22

kaldi::CuSubVector
Definition: matrix-common.h:73

kaldi::ParseOptions::Register
void Register(const std::string &name, bool *ptr, const std::string &doc)
Definition: parse-options.cc:56

kaldi::nnet3::NnetComputeOptions
Definition: nnet-compute.h:39

kaldi::kSparseMatrix
Definition: sparse-matrix.h:274

kaldi::ReadKaldiObject
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832

nnet-optimize.h

kaldi::GeneralMatrix::NumCols
MatrixIndexT NumCols() const
Definition: sparse-matrix.cc:791

kaldi::nnet3::NnetLdaStatsAccumulator::AccStatsFromOutput
void AccStatsFromOutput(const NnetExample &eg, const CuMatrixBase< BaseFloat > &nnet_output)
Definition: nnet3-acc-lda-stats.cc:65

kaldi::nnet3::ComputationRequest
Definition: nnet-computation.h:114

kaldi::BaseFloat
float BaseFloat
Definition: kaldi-types.h:29

kaldi::MatrixIndexT
int32 MatrixIndexT
Definition: matrix-common.h:98

kaldi::ParseOptions
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36

kaldi::nnet3::NnetLdaStatsAccumulator::AccStats
void AccStats(const NnetExample &eg)
Definition: nnet3-acc-lda-stats.cc:38

float

lda-estimate.h

kaldi::GeneralMatrix::Type
GeneralMatrixType Type() const
Returns the type of the matrix: kSparseMatrix, kCompressedMatrix or kFullMatrix.
Definition: sparse-matrix.cc:772

transition-model.h

kaldi::SparseVector
Definition: sparse-matrix.h:41

kaldi::nnet3::NnetComputer::AcceptInputs
void AcceptInputs(const Nnet &nnet, const std::vector< NnetIo > &io)
This convenience function calls AcceptInput() in turn on all the inputs in the training example...
Definition: nnet-compute.cc:663

kaldi::nnet3::Nnet
Definition: nnet-nnet.h:115

kaldi::SequentialTableReader
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287

nnet-example-utils.h

kaldi::ParseOptions::Read
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
Definition: parse-options.cc:311

kaldi::SequentialTableReader::Done
bool Done()
Definition: kaldi-table-inl.h:948

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::SparseVector::NumElements
MatrixIndexT NumElements() const
Returns the number of nonzero elements.
Definition: sparse-matrix.h:74

kaldi::ParseOptions::GetArg
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
Definition: parse-options.cc:202

kaldi::SequentialTableReader::Next
void Next()
Definition: kaldi-table-inl.h:942

kaldi::VectorBase::Dim
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64

kaldi::SparseMatrix
Definition: matrix-common.h:65

kaldi::nnet3::NnetLdaStatsAccumulator::lda_stats_
LdaEstimate lda_stats_
Definition: nnet3-acc-lda-stats.cc:128

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::nnet3::CachingOptimizingCompiler::Compile
std::shared_ptr< const NnetComputation > Compile(const ComputationRequest &request)
Does the compilation and returns a const pointer to the result, which is owned by this class...
Definition: nnet-optimize.cc:716

kaldi::ParseOptions::NumArgs
int NumArgs() const
Number of positional parameters (c.f. argc-1).
Definition: parse-options.cc:198

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

kaldi::LdaEstimate::TotCount
double TotCount()
Return total count of the data.
Definition: lda-estimate.h:72

kaldi::CuMatrixBase::NumCols
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216

kaldi::nnet3::NnetLdaStatsAccumulator::compiler_
CachingOptimizingCompiler compiler_
Definition: nnet3-acc-lda-stats.cc:127

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::nnet3::NnetComputer
class NnetComputer is responsible for executing the computation described in the "computation" object...
Definition: nnet-compute.h:59

kaldi::SequentialTableReader::Value
T & Value()
Definition: kaldi-table-inl.h:934

kaldi::nnet3::NnetComputation
Definition: nnet-computation.h:303

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::GeneralMatrix::NumRows
MatrixIndexT NumRows() const
Definition: sparse-matrix.cc:781

kaldi::nnet3::NnetLdaStatsAccumulator::WriteStats
void WriteStats(const std::string &stats_wxfilename, bool binary)
Definition: nnet3-acc-lda-stats.cc:54

kaldi::WriteKaldiObject
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
Definition: kaldi-io.h:257

kaldi::nnet3::NnetLdaStatsAccumulator::NnetLdaStatsAccumulator
NnetLdaStatsAccumulator(BaseFloat rand_prune, const Nnet &nnet)
Definition: nnet3-acc-lda-stats.cc:34

kaldi::nnet3::NnetLdaStatsAccumulator
Definition: nnet3-acc-lda-stats.cc:32

kaldi::GeneralMatrix::GetSparseMatrix
const SparseMatrix< BaseFloat > & GetSparseMatrix() const
Returns the contents as a SparseMatrix.
Definition: sparse-matrix.cc:854

kaldi::CuMatrixBase::NumRows
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215

kaldi::nnet3::NnetExample::io
std::vector< NnetIo > io
"io" contains the input and output.
Definition: nnet-example.h:116

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

kaldi::SparseVector::Data
std::pair< MatrixIndexT, Real > * Data()
Definition: sparse-matrix.cc:32

kaldi-common.h

kaldi::SubVector
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501

kaldi::nnet3::GetComputationRequest
void GetComputationRequest(const Nnet &nnet, const NnetExample &eg, bool need_model_derivative, bool store_component_stats, ComputationRequest *request)
This function takes a NnetExample (which should already have been frame-selected, if desired...
Definition: nnet-example-utils.cc:202

kaldi::SparseMatrix::Row
const SparseVector< Real > & Row(MatrixIndexT r) const
Definition: sparse-matrix.cc:543