doc/decodable-am-nnet_8h_source.html

 // nnet2/decodable-am-nnet.h

 // Copyright 2012  Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #ifndef KALDI_NNET2_DECODABLE_AM_NNET_H_
 #define KALDI_NNET2_DECODABLE_AM_NNET_H_

 #include <vector>
 #include "base/kaldi-common.h"
 #include "gmm/am-diag-gmm.h"
 #include "hmm/transition-model.h"
 #include "itf/decodable-itf.h"
 #include "nnet2/am-nnet.h"
 #include "nnet2/nnet-compute.h"

 namespace kaldi {
 namespace nnet2 {


 class DecodableAmNnet: public DecodableInterface {
  public:
   DecodableAmNnet(const TransitionModel &trans_model,
                   const AmNnet &am_nnet,
                   const CuMatrixBase<BaseFloat> &feats,
                   bool pad_input = true, // if !pad_input, the NumIndices()
                                          // will be < feats.NumRows().
                   BaseFloat prob_scale = 1.0):
       trans_model_(trans_model) {
     // Note: we could make this more memory-efficient by doing the
     // computation in smaller chunks than the whole utterance, and not
     // storing the whole thing.  We'll leave this for later.
     int32 num_rows = feats.NumRows() -
         (pad_input ? 0 : am_nnet.GetNnet().LeftContext() +
                          am_nnet.GetNnet().RightContext());
     if (num_rows <= 0) {
       KALDI_WARN << "Input with " << feats.NumRows()  << " rows will produce "
                  << "empty output.";
       return;
     }
     CuMatrix<BaseFloat> log_probs(num_rows, trans_model.NumPdfs());
     // the following function is declared in nnet-compute.h
     NnetComputation(am_nnet.GetNnet(), feats, pad_input, &log_probs);
     log_probs.ApplyFloor(1.0e-20); // Avoid log of zero which leads to NaN.
     log_probs.ApplyLog();
     CuVector<BaseFloat> priors(am_nnet.Priors());
     KALDI_ASSERT(priors.Dim() == trans_model.NumPdfs() &&
                  "Priors in neural network not set up.");
     priors.ApplyLog();
     // subtract log-prior (divide by prior)
     log_probs.AddVecToRows(-1.0, priors);
     // apply probability scale.
     log_probs.Scale(prob_scale);
     // Transfer the log-probs to the CPU for faster access by the
     // decoding process.
     log_probs_.Swap(&log_probs);
   }

   // Note, frames are numbered from zero.  But transition_id is numbered
   // from one (this routine is called by FSTs).
   virtual BaseFloat LogLikelihood(int32 frame, int32 transition_id) {
     return log_probs_(frame,
                       trans_model_.TransitionIdToPdfFast(transition_id));
   }

   virtual int32 NumFramesReady() const { return log_probs_.NumRows(); }

   // Indices are one-based!  This is for compatibility with OpenFst.
   virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); }

   virtual bool IsLastFrame(int32 frame) const {
     KALDI_ASSERT(frame < NumFramesReady());
     return (frame == NumFramesReady() - 1);
   }

  protected:
   const TransitionModel &trans_model_;
   Matrix<BaseFloat> log_probs_; // actually not really probabilities, since we divide
   // by the prior -> they won't sum to one.

   KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableAmNnet);
 };


 class DecodableAmNnetParallel: public DecodableInterface {
  public:
   DecodableAmNnetParallel(
       const TransitionModel &trans_model,
       const AmNnet &am_nnet,
       const CuMatrix<BaseFloat> *feats,
       bool pad_input = true,
       BaseFloat prob_scale = 1.0):
       trans_model_(trans_model), am_nnet_(am_nnet), feats_(feats),
       pad_input_(pad_input), prob_scale_(prob_scale) {
     KALDI_ASSERT(feats_ != NULL);
   }

   void Compute() {
     log_probs_.Resize(feats_->NumRows(), trans_model_.NumPdfs());
     // the following function is declared in nnet-compute.h
     NnetComputation(am_nnet_.GetNnet(), *feats_,
                     pad_input_, &log_probs_);
     log_probs_.ApplyFloor(1.0e-20); // Avoid log of zero which leads to NaN.
     log_probs_.ApplyLog();
     CuVector<BaseFloat> priors(am_nnet_.Priors());
     KALDI_ASSERT(priors.Dim() == trans_model_.NumPdfs() &&
                  "Priors in neural network not set up.");
     priors.ApplyLog();
     // subtract log-prior (divide by prior)
     log_probs_.AddVecToRows(-1.0, priors);
     // apply probability scale.
     log_probs_.Scale(prob_scale_);
     delete feats_;
     feats_ = NULL;
   }

   // Note, frames are numbered from zero.  But state_index is numbered
   // from one (this routine is called by FSTs).
   virtual BaseFloat LogLikelihood(int32 frame, int32 transition_id) {
     if (feats_) Compute(); // this function sets feats_ to NULL.
     return log_probs_(frame,
                       trans_model_.TransitionIdToPdfFast(transition_id));
   }

   int32 NumFramesReady() const {
     if (feats_) {
       if (pad_input_) return feats_->NumRows();
       else {
         int32 ans = feats_->NumRows() - am_nnet_.GetNnet().LeftContext() -
             am_nnet_.GetNnet().RightContext();
         if (ans < 0) ans = 0;
         return ans;
       }
     } else {
       return log_probs_.NumRows();
     }
   }

   // Indices are one-based!  This is for compatibility with OpenFst.
   virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); }

   virtual bool IsLastFrame(int32 frame) const {
     KALDI_ASSERT(frame < NumFramesReady());
     return (frame == NumFramesReady() - 1);
   }
   ~DecodableAmNnetParallel() {
     delete feats_;
   }
  protected:
   const TransitionModel &trans_model_;
   const AmNnet &am_nnet_;
   CuMatrix<BaseFloat> log_probs_; // actually not really probabilities, since we divide
   // by the prior -> they won't sum to one.
   const CuMatrix<BaseFloat> *feats_;
   bool pad_input_;
   BaseFloat prob_scale_;
   KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableAmNnetParallel);
 };


 } // namespace nnet2
 } // namespace kaldi

 #endif  // KALDI_NNET2_DECODABLE_AM_NNET_H_
am-diag-gmm.h

kaldi::MatrixBase::ApplyLog
void ApplyLog()
Definition: kaldi-matrix.h:374

kaldi::nnet2::DecodableAmNnetParallel
This version of DecodableAmNnet is intended for a version of the decoder that processes different utt...
Definition: decodable-am-nnet.h:105

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet2::Nnet::LeftContext
int32 LeftContext() const
Returns the left-context summed over all the Components...
Definition: nnet-nnet.cc:42

kaldi::nnet2::DecodableAmNnetParallel::NumFramesReady
int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
Definition: decodable-am-nnet.h:145

kaldi::CuVector
Definition: matrix-common.h:74

kaldi::DecodableInterface
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
Definition: decodable-itf.h:82

kaldi::nnet2::DecodableAmNnet::trans_model_
const TransitionModel & trans_model_
Definition: decodable-am-nnet.h:93

kaldi::nnet2::AmNnet
Definition: am-nnet.h:38

kaldi::TransitionModel::TransitionIdToPdfFast
int32 TransitionIdToPdfFast(int32 trans_id) const
Definition: transition-model.h:334

kaldi::nnet2::DecodableAmNnet::DecodableAmNnet
DecodableAmNnet(const TransitionModel &trans_model, const AmNnet &am_nnet, const CuMatrixBase< BaseFloat > &feats, bool pad_input=true, BaseFloat prob_scale=1.0)
Definition: decodable-am-nnet.h:39

kaldi::nnet2::DecodableAmNnetParallel::log_probs_
CuMatrix< BaseFloat > log_probs_
Definition: decodable-am-nnet.h:172

kaldi::nnet2::DecodableAmNnetParallel::~DecodableAmNnetParallel
~DecodableAmNnetParallel()
Definition: decodable-am-nnet.h:166

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::Matrix< BaseFloat >

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

kaldi::nnet2::NnetComputation
void NnetComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g.
Definition: nnet-compute.cc:160

kaldi::nnet2::DecodableAmNnet::log_probs_
Matrix< BaseFloat > log_probs_
Definition: decodable-am-nnet.h:94

kaldi::Matrix::Swap
void Swap(Matrix< Real > *other)
Swaps the contents of *this and *other. Shallow swap.
Definition: kaldi-matrix.cc:2255

kaldi::nnet2::DecodableAmNnetParallel::feats_
const CuMatrix< BaseFloat > * feats_
Definition: decodable-am-nnet.h:174

kaldi::TransitionModel
Definition: transition-model.h:123

kaldi::nnet2::DecodableAmNnetParallel::IsLastFrame
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
Definition: decodable-am-nnet.h:162

am-nnet.h

kaldi::nnet2::DecodableAmNnetParallel::LogLikelihood
virtual BaseFloat LogLikelihood(int32 frame, int32 transition_id)
Returns the log likelihood, which will be negated in the decoder.
Definition: decodable-am-nnet.h:139

kaldi::nnet2::DecodableAmNnetParallel::DecodableAmNnetParallel
DecodableAmNnetParallel(const TransitionModel &trans_model, const AmNnet &am_nnet, const CuMatrix< BaseFloat > *feats, bool pad_input=true, BaseFloat prob_scale=1.0)
Definition: decodable-am-nnet.h:107

nnet-compute.h

float

kaldi::MatrixBase::Scale
void Scale(Real alpha)
Multiply each element with a scalar value.
Definition: kaldi-matrix.cc:1209

transition-model.h

kaldi::TransitionModel::NumTransitionIds
int32 NumTransitionIds() const
Returns the total number of transition-ids (note, these are one-based).
Definition: transition-model.h:175

kaldi::nnet2::DecodableAmNnet::IsLastFrame
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
Definition: decodable-am-nnet.h:87

kaldi::nnet2::AmNnet::Priors
const VectorBase< BaseFloat > & Priors() const
Definition: am-nnet.h:67

kaldi::MatrixBase::AddVecToRows
void AddVecToRows(const Real alpha, const VectorBase< OtherReal > &v)
[each row of *this] += alpha * v
Definition: kaldi-matrix.cc:3030

kaldi::nnet2::Nnet::RightContext
int32 RightContext() const
Returns the right-context summed over all the Components...
Definition: nnet-nnet.cc:56

kaldi::nnet2::DecodableAmNnetParallel::pad_input_
bool pad_input_
Definition: decodable-am-nnet.h:175

kaldi::nnet2::DecodableAmNnetParallel::Compute
void Compute()
Definition: decodable-am-nnet.h:118

kaldi::nnet2::DecodableAmNnetParallel::NumIndices
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
Definition: decodable-am-nnet.h:160

KALDI_WARN
#define KALDI_WARN
Definition: kaldi-error.h:150

kaldi::nnet2::DecodableAmNnet::LogLikelihood
virtual BaseFloat LogLikelihood(int32 frame, int32 transition_id)
Returns the log likelihood, which will be negated in the decoder.
Definition: decodable-am-nnet.h:77

decodable-itf.h

kaldi::nnet2::DecodableAmNnet::NumFramesReady
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
Definition: decodable-am-nnet.h:82

kaldi::nnet2::DecodableAmNnet::KALDI_DISALLOW_COPY_AND_ASSIGN
KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableAmNnet)

kaldi::nnet2::DecodableAmNnetParallel::trans_model_
const TransitionModel & trans_model_
Definition: decodable-am-nnet.h:170

kaldi::nnet2::DecodableAmNnetParallel::am_nnet_
const AmNnet & am_nnet_
Definition: decodable-am-nnet.h:171

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::MatrixBase::NumRows
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64

kaldi::nnet2::DecodableAmNnet
DecodableAmNnet is a decodable object that decodes with a neural net acoustic model of type AmNnet...
Definition: decodable-am-nnet.h:37

kaldi::Matrix::Resize
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Definition: kaldi-matrix.cc:819

kaldi::CuMatrixBase::NumRows
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215

kaldi::MatrixBase::ApplyFloor
void ApplyFloor(Real floor_val)
Definition: kaldi-matrix.h:354

kaldi::nnet2::DecodableAmNnet::NumIndices
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
Definition: decodable-am-nnet.h:85

kaldi-common.h

kaldi::nnet2::AmNnet::GetNnet
const Nnet & GetNnet() const
Definition: am-nnet.h:61

kaldi::TransitionModel::NumPdfs
int32 NumPdfs() const
Definition: transition-model.h:190

kaldi::nnet2::DecodableAmNnetParallel::prob_scale_
BaseFloat prob_scale_
Definition: decodable-am-nnet.h:176