doc/nnet-chain-diagnostics_8cc_source.html

 // nnet3/nnet-chain-diagnostics.cc

 // Copyright      2015    Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include "nnet3/nnet-chain-diagnostics.h"
 #include "nnet3/nnet-utils.h"

 namespace kaldi {
 namespace nnet3 {

 NnetChainComputeProb::NnetChainComputeProb(
     const NnetComputeProbOptions &nnet_config,
     const chain::ChainTrainingOptions &chain_config,
     const fst::StdVectorFst &den_fst,
     const Nnet &nnet):
     nnet_config_(nnet_config),
     chain_config_(chain_config),
     den_graph_(den_fst, nnet.OutputDim("output")),
     nnet_(nnet),
     compiler_(nnet, nnet_config_.optimize_config, nnet_config_.compiler_config),
     deriv_nnet_owned_(true),
     deriv_nnet_(NULL),
     num_minibatches_processed_(0) {
   if (nnet_config_.compute_deriv) {
     deriv_nnet_ = new Nnet(nnet_);
     ScaleNnet(0.0, deriv_nnet_);
     SetNnetAsGradient(deriv_nnet_); // force simple update
   } else if (nnet_config_.store_component_stats) {
     KALDI_ERR << "If you set store_component_stats == true and "
               << "compute_deriv == false, use the other constructor.";
   }
 }


 NnetChainComputeProb::NnetChainComputeProb(
     const NnetComputeProbOptions &nnet_config,
     const chain::ChainTrainingOptions &chain_config,
     const fst::StdVectorFst &den_fst,
     Nnet *nnet):
     nnet_config_(nnet_config),
     chain_config_(chain_config),
     den_graph_(den_fst, nnet->OutputDim("output")),
     nnet_(*nnet),
     compiler_(*nnet, nnet_config_.optimize_config, nnet_config_.compiler_config),
     deriv_nnet_owned_(false),
     deriv_nnet_(nnet),
     num_minibatches_processed_(0) {
   KALDI_ASSERT(den_graph_.NumPdfs() > 0);
   KALDI_ASSERT(nnet_config.store_component_stats && !nnet_config.compute_deriv);
 }


 const Nnet &NnetChainComputeProb::GetDeriv() const {
   if (!nnet_config_.compute_deriv)
     KALDI_ERR << "GetDeriv() called when no derivatives were requested.";
   return *deriv_nnet_;
 }

 NnetChainComputeProb::~NnetChainComputeProb() {
   if (deriv_nnet_owned_)
     delete deriv_nnet_;  // delete does nothing if pointer is NULL.
 }

 void NnetChainComputeProb::Reset() {
   num_minibatches_processed_ = 0;
   objf_info_.clear();
   if (deriv_nnet_) {
     ScaleNnet(0.0, deriv_nnet_);
     SetNnetAsGradient(deriv_nnet_);
   }
 }

 void NnetChainComputeProb::Compute(const NnetChainExample &chain_eg) {
   bool need_model_derivative = nnet_config_.compute_deriv,
       store_component_stats = nnet_config_.store_component_stats;
   ComputationRequest request;
   // if the options specify cross-entropy regularization, we'll be computing
   // this objective (not interpolated with the regular objective-- we give it a
   // separate name), but currently we won't make it contribute to the
   // derivative-- we just compute the derivative of the regular output.
   // This is because in the place where we use the derivative (the
   // model-combination code) we decided to keep it simple and just use the
   // regular objective.
   bool use_xent_regularization = (chain_config_.xent_regularize != 0.0),
       use_xent_derivative = false;
   GetChainComputationRequest(nnet_, chain_eg, need_model_derivative,
                              store_component_stats, use_xent_regularization,
                              use_xent_derivative, &request);
   std::shared_ptr<const NnetComputation> computation = compiler_.Compile(request);
   NnetComputer computer(nnet_config_.compute_config, *computation,
                         nnet_, deriv_nnet_);
   // give the inputs to the computer object.
   computer.AcceptInputs(nnet_, chain_eg.inputs);
   computer.Run();
   this->ProcessOutputs(chain_eg, &computer);
   if (nnet_config_.compute_deriv)
     computer.Run();
 }

 void NnetChainComputeProb::ProcessOutputs(const NnetChainExample &eg,
                                          NnetComputer *computer) {
   // There will normally be just one output here, named 'output',
   // but the code is more general than this.
   std::vector<NnetChainSupervision>::const_iterator iter = eg.outputs.begin(),
       end = eg.outputs.end();
   for (; iter != end; ++iter) {
     const NnetChainSupervision &sup = *iter;
     int32 node_index = nnet_.GetNodeIndex(sup.name);
     if (node_index < 0 ||
         !nnet_.IsOutputNode(node_index))
       KALDI_ERR << "Network has no output named " << sup.name;

     const CuMatrixBase<BaseFloat> &nnet_output = computer->GetOutput(sup.name);
     bool use_xent = (chain_config_.xent_regularize != 0.0);
     std::string xent_name = sup.name + "-xent";  // typically "output-xent".
     CuMatrix<BaseFloat> nnet_output_deriv, xent_deriv;
     if (nnet_config_.compute_deriv)
       nnet_output_deriv.Resize(nnet_output.NumRows(), nnet_output.NumCols(),
                                kUndefined);
     if (use_xent)
       xent_deriv.Resize(nnet_output.NumRows(), nnet_output.NumCols(),
                         kUndefined);

     BaseFloat tot_like, tot_l2_term, tot_weight;

     ComputeChainObjfAndDeriv(chain_config_, den_graph_,
                              sup.supervision, nnet_output,
                              &tot_like, &tot_l2_term, &tot_weight,
                              (nnet_config_.compute_deriv ? &nnet_output_deriv :
                               NULL), (use_xent ? &xent_deriv : NULL));

     // note: in this context we don't want to apply 'sup.deriv_weights' because
     // this code is used only in combination, where it's part of an L-BFGS
     // optimization algorithm, and in that case if there is a mismatch between
     // the computed objective function and the derivatives, it may cause errors
     // in the optimization procedure such as early termination.  (line search
     // and conjugate gradient descent both rely on the derivatives being
     // accurate, and don't fail gracefully if the derivatives are not accurate).

     ChainObjectiveInfo &totals = objf_info_[sup.name];
     totals.tot_weight += tot_weight;
     totals.tot_like += tot_like;
     totals.tot_l2_term += tot_l2_term;

     if (nnet_config_.compute_deriv)
       computer->AcceptInput(sup.name, &nnet_output_deriv);

     if (use_xent) {
       ChainObjectiveInfo &xent_totals = objf_info_[xent_name];
       // this block computes the cross-entropy objective.
       const CuMatrixBase<BaseFloat> &xent_output = computer->GetOutput(
           xent_name);
       // at this point, xent_deriv is posteriors derived from the numerator
       // computation.  note, xent_deriv has a factor of '.supervision.weight',
       // but so does tot_weight.
       BaseFloat xent_objf = TraceMatMat(xent_output, xent_deriv, kTrans);
       xent_totals.tot_weight += tot_weight;
       xent_totals.tot_like += xent_objf;
     }
     num_minibatches_processed_++;
   }
 }

 bool NnetChainComputeProb::PrintTotalStats() const {
   bool ans = false;
   unordered_map<std::string, ChainObjectiveInfo, StringHasher>::const_iterator
       iter, end;
   iter = objf_info_.begin();
   end = objf_info_.end();
   for (; iter != end; ++iter) {
     const std::string &name = iter->first;
     int32 node_index = nnet_.GetNodeIndex(name);
     KALDI_ASSERT(node_index >= 0);
     const ChainObjectiveInfo &info = iter->second;
     BaseFloat like = (info.tot_like / info.tot_weight),
         l2_term = (info.tot_l2_term / info.tot_weight),
         tot_objf = like + l2_term;
     if (info.tot_l2_term == 0.0) {
       KALDI_LOG << "Overall log-probability for '"
                 << name << "' is "
                 << like << " per frame"
                 << ", over " << info.tot_weight << " frames.";
     } else {
       KALDI_LOG << "Overall log-probability for '"
                 << name << "' is "
                 << like << " + " << l2_term << " = " << tot_objf << " per frame"
                 << ", over " << info.tot_weight << " frames.";
     }
     if (info.tot_weight > 0)
       ans = true;
   }
   return ans;
 }


 const ChainObjectiveInfo* NnetChainComputeProb::GetObjective(
     const std::string &output_name) const {
   unordered_map<std::string, ChainObjectiveInfo, StringHasher>::const_iterator
       iter = objf_info_.find(output_name);
   if (iter != objf_info_.end())
     return &(iter->second);
   else
     return NULL;
 }

 double NnetChainComputeProb::GetTotalObjective(double *total_weight) const {
   double tot_objectives = 0.0;
   double tot_weight = 0.0;
   unordered_map<std::string, ChainObjectiveInfo, StringHasher>::const_iterator
     iter = objf_info_.begin(), end = objf_info_.end();
   for (; iter != end; ++iter) {
     tot_objectives += iter->second.tot_like + iter->second.tot_l2_term;
     tot_weight += iter->second.tot_weight;
   }

   if (total_weight) *total_weight = tot_weight;
   return tot_objectives;
 }

 static bool HasXentOutputs(const Nnet &nnet) {
   const std::vector<std::string> node_names = nnet.GetNodeNames();
   for (std::vector<std::string>::const_iterator it = node_names.begin();
         it != node_names.end(); ++it) {
     int32 node_index = nnet.GetNodeIndex(*it);
     if (nnet.IsOutputNode(node_index) &&
         it->find("-xent") != std::string::npos) {
       return true;
     }
   }
   return false;
 }

 void RecomputeStats(const std::vector<NnetChainExample> &egs,
                     const chain::ChainTrainingOptions &chain_config_in,
                     const fst::StdVectorFst &den_fst,
                     Nnet *nnet) {
   KALDI_LOG << "Recomputing stats on nnet (affects batch-norm)";
   chain::ChainTrainingOptions chain_config(chain_config_in);
   if (HasXentOutputs(*nnet) &&
       chain_config.xent_regularize == 0) {
     // this forces it to compute the output for xent outputs,
     // usually 'output-xent', which
     // means that we'll be computing batch-norm stats for any
     // components in that branch that have batch-norm.
     chain_config.xent_regularize = 0.1;
   }

   ZeroComponentStats(nnet);
   NnetComputeProbOptions nnet_config;
   nnet_config.store_component_stats = true;
   NnetChainComputeProb prob_computer(nnet_config, chain_config, den_fst, nnet);
   for (size_t i = 0; i < egs.size(); i++)
     prob_computer.Compute(egs[i]);
   prob_computer.PrintTotalStats();
   KALDI_LOG << "Done recomputing stats.";
 }


 } // namespace nnet3
 } // namespace kaldi
kaldi::nnet3::NnetChainComputeProb::GetTotalObjective
double GetTotalObjective(double *tot_weight) const
Definition: nnet-chain-diagnostics.cc:221

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet3::NnetChainComputeProb::chain_config_
chain::ChainTrainingOptions chain_config_
Definition: nnet-chain-diagnostics.h:101

kaldi::nnet3::ScaleNnet
void ScaleNnet(BaseFloat scale, Nnet *nnet)
Scales the nnet parameters and stats by this scale.
Definition: nnet-utils.cc:312

kaldi::kUndefined
Definition: matrix-common.h:39

kaldi::nnet3::NnetChainComputeProb::nnet_
const Nnet & nnet_
Definition: nnet-chain-diagnostics.h:103

kaldi::nnet3::NnetChainComputeProb::deriv_nnet_owned_
bool deriv_nnet_owned_
Definition: nnet-chain-diagnostics.h:105

kaldi::nnet3::NnetChainComputeProb
This class is for computing objective-function values in a nnet3+chain setup, for diagnostics...
Definition: nnet-chain-diagnostics.h:54

kaldi::nnet3::NnetChainSupervision::supervision
chain::Supervision supervision
The supervision object, containing the FST.
Definition: nnet-chain-example.h:64

kaldi::nnet3::NnetChainExample::inputs
std::vector< NnetIo > inputs
&#39;inputs&#39; contains the input to the network– normally just it has just one element called "input"...
Definition: nnet-chain-example.h:115

kaldi::nnet3::NnetChainComputeProb::compiler_
CachingOptimizingCompiler compiler_
Definition: nnet-chain-diagnostics.h:104

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::nnet3::NnetChainComputeProb::objf_info_
unordered_map< std::string, ChainObjectiveInfo, StringHasher > objf_info_
Definition: nnet-chain-diagnostics.h:109

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

fst::StdVectorFst
fst::StdVectorFst StdVectorFst
Definition: deterministic-fst-test.cc:59

kaldi::nnet3::NnetComputeProbOptions::store_component_stats
bool store_component_stats
Definition: nnet-diagnostics.h:63

kaldi::nnet3::HasXentOutputs
static bool HasXentOutputs(const Nnet &nnet)
Definition: nnet-chain-diagnostics.cc:235

nnet-chain-diagnostics.h

kaldi::nnet3::NnetChainComputeProb::PrintTotalStats
bool PrintTotalStats() const
Definition: nnet-chain-diagnostics.cc:179

kaldi::nnet3::NnetChainSupervision::name
std::string name
the name of the output in the neural net; in simple setups it will just be "output".
Definition: nnet-chain-example.h:46

nnet-utils.h
This file contains some miscellaneous functions dealing with class Nnet.

kaldi::nnet3::SetNnetAsGradient
void SetNnetAsGradient(Nnet *nnet)
Sets nnet as gradient by Setting is_gradient_ to true and learning_rate_ to 1 for each UpdatableCompo...
Definition: nnet-utils.cc:292

kaldi::kTrans
Definition: matrix-common.h:33

kaldi::nnet3::NnetComputer::AcceptInput
void AcceptInput(const std::string &node_name, CuMatrix< BaseFloat > *input)
e.g.
Definition: nnet-compute.cc:547

kaldi::nnet3::ChainObjectiveInfo::tot_like
double tot_like
Definition: nnet-chain-diagnostics.h:38

kaldi::nnet3::ChainObjectiveInfo
Definition: nnet-chain-diagnostics.h:36

kaldi::nnet3::ComputationRequest
Definition: nnet-computation.h:114

kaldi::nnet3::NnetChainComputeProb::NnetChainComputeProb
NnetChainComputeProb(const NnetComputeProbOptions &nnet_config, const chain::ChainTrainingOptions &chain_config, const fst::StdVectorFst &den_fst, const Nnet &nnet)
Definition: nnet-chain-diagnostics.cc:26

kaldi::nnet3::NnetComputer::GetOutput
const CuMatrixBase< BaseFloat > & GetOutput(const std::string &node_name)
Definition: nnet-compute.cc:578

kaldi::nnet3::NnetChainComputeProb::den_graph_
chain::DenominatorGraph den_graph_
Definition: nnet-chain-diagnostics.h:102

kaldi::nnet3::Nnet::IsOutputNode
bool IsOutputNode(int32 node) const
Returns true if this is an output node, meaning that it is of type kDescriptor and is not directly fo...
Definition: nnet-nnet.cc:112

float

kaldi::nnet3::NnetChainExample::outputs
std::vector< NnetChainSupervision > outputs
&#39;outputs&#39; contains the chain output supervision.
Definition: nnet-chain-example.h:119

kaldi::nnet3::RecomputeStats
void RecomputeStats(const std::vector< NnetChainExample > &egs, const chain::ChainTrainingOptions &chain_config_in, const fst::StdVectorFst &den_fst, Nnet *nnet)
This function zeros the stored component-level stats in the nnet using ZeroComponentStats(), then recomputes them with the supplied egs.
Definition: nnet-chain-diagnostics.cc:248

kaldi::nnet3::NnetComputer::AcceptInputs
void AcceptInputs(const Nnet &nnet, const std::vector< NnetIo > &io)
This convenience function calls AcceptInput() in turn on all the inputs in the training example...
Definition: nnet-compute.cc:663

kaldi::nnet3::NnetChainComputeProb::Reset
void Reset()
Definition: nnet-chain-diagnostics.cc:79

kaldi::nnet3::NnetComputeProbOptions
Definition: nnet-diagnostics.h:55

kaldi::nnet3::NnetChainExample
NnetChainExample is like NnetExample, but specialized for lattice-free (chain) training.
Definition: nnet-chain-example.h:110

kaldi::nnet3::Nnet
Definition: nnet-nnet.h:115

kaldi::nnet3::NnetChainComputeProb::~NnetChainComputeProb
~NnetChainComputeProb()
Definition: nnet-chain-diagnostics.cc:74

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::nnet3::NnetComputeProbOptions::compute_deriv
bool compute_deriv
Definition: nnet-diagnostics.h:57

kaldi::nnet3::ChainObjectiveInfo::tot_l2_term
double tot_l2_term
Definition: nnet-chain-diagnostics.h:39

kaldi::nnet3::ZeroComponentStats
void ZeroComponentStats(Nnet *nnet)
Zeroes the component stats in all nonlinear components in the nnet.
Definition: nnet-utils.cc:269

kaldi::TraceMatMat
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
Definition: kaldi-matrix.cc:2692

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::nnet3::CachingOptimizingCompiler::Compile
std::shared_ptr< const NnetComputation > Compile(const ComputationRequest &request)
Does the compilation and returns a const pointer to the result, which is owned by this class...
Definition: nnet-optimize.cc:716

kaldi::nnet3::NnetChainComputeProb::nnet_config_
NnetComputeProbOptions nnet_config_
Definition: nnet-chain-diagnostics.h:100

kaldi::nnet3::ChainObjectiveInfo::tot_weight
double tot_weight
Definition: nnet-chain-diagnostics.h:37

kaldi::nnet3::NnetComputeProbOptions::compute_config
NnetComputeOptions compute_config
Definition: nnet-diagnostics.h:68

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

kaldi::nnet3::NnetComputer
class NnetComputer is responsible for executing the computation described in the "computation" object...
Definition: nnet-compute.h:59

kaldi::nnet3::NnetChainComputeProb::GetDeriv
const Nnet & GetDeriv() const
Definition: nnet-chain-diagnostics.cc:68

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::nnet3::NnetChainSupervision
Definition: nnet-chain-example.h:43

kaldi::nnet3::NnetChainComputeProb::Compute
void Compute(const NnetChainExample &chain_eg)
Definition: nnet-chain-diagnostics.cc:88

kaldi::nnet3::Nnet::GetNodeIndex
int32 GetNodeIndex(const std::string &node_name) const
returns index associated with this node name, or -1 if no such index.
Definition: nnet-nnet.cc:466

kaldi::nnet3::GetChainComputationRequest
void GetChainComputationRequest(const Nnet &nnet, const NnetChainExample &eg, bool need_model_derivative, bool store_component_stats, bool use_xent_regularization, bool use_xent_derivative, ComputationRequest *request)
This function takes a NnetChainExample and produces a ComputationRequest.
Definition: nnet-chain-example.cc:290

kaldi::nnet3::NnetChainComputeProb::deriv_nnet_
Nnet * deriv_nnet_
Definition: nnet-chain-diagnostics.h:106

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

kaldi::CuMatrix::Resize
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

kaldi::nnet3::NnetChainComputeProb::GetObjective
const ChainObjectiveInfo * GetObjective(const std::string &output_name) const
Definition: nnet-chain-diagnostics.cc:211

kaldi::nnet3::Nnet::GetNodeNames
const std::vector< std::string > & GetNodeNames() const
returns vector of node names (needed by some parsing code, for instance).
Definition: nnet-nnet.cc:63

kaldi::nnet3::NnetChainComputeProb::ProcessOutputs
void ProcessOutputs(const NnetChainExample &chain_eg, NnetComputer *computer)
Definition: nnet-chain-diagnostics.cc:115

kaldi::nnet3::NnetChainComputeProb::num_minibatches_processed_
int32 num_minibatches_processed_
Definition: nnet-chain-diagnostics.h:107

kaldi::nnet3::NnetComputer::Run
void Run()
This does either the forward or backward computation, depending when it is called (in a typical compu...
Definition: nnet-compute.cc:512