doc/online-nnet2-decodable_8cc_source.html

 // nnet2/online-nnet2-decodable.cc

 // Copyright  2014  Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include "nnet2/online-nnet2-decodable.h"

 namespace kaldi {
 namespace nnet2 {

 DecodableNnet2Online::DecodableNnet2Online(
     const AmNnet &nnet,
     const TransitionModel &trans_model,
     const DecodableNnet2OnlineOptions &opts,
     OnlineFeatureInterface *input_feats):
     features_(input_feats),
     nnet_(nnet),
     trans_model_(trans_model),
     opts_(opts),
     feat_dim_(input_feats->Dim()),
     left_context_(nnet.GetNnet().LeftContext()),
     right_context_(nnet.GetNnet().RightContext()),
     num_pdfs_(nnet.GetNnet().OutputDim()),
     begin_frame_(-1) {
   KALDI_ASSERT(opts_.max_nnet_batch_size > 0);
   log_priors_ = nnet_.Priors();
   KALDI_ASSERT(log_priors_.Dim() == trans_model_.NumPdfs() &&
                "Priors in neural network not set up (or mismatch "
                "with transition model).");
   log_priors_.ApplyLog();
 }


 BaseFloat DecodableNnet2Online::LogLikelihood(int32 frame, int32 index) {
   ComputeForFrame(frame);
   int32 pdf_id = trans_model_.TransitionIdToPdf(index);
   KALDI_ASSERT(frame >= begin_frame_ &&
                frame < begin_frame_ + scaled_loglikes_.NumRows());
   return scaled_loglikes_(frame - begin_frame_, pdf_id);
 }


 bool DecodableNnet2Online::IsLastFrame(int32 frame) const {
   if (opts_.pad_input) { // normal case
     return features_->IsLastFrame(frame);
   } else {
     return features_->IsLastFrame(frame + left_context_ + right_context_);
   }
 }

 int32 DecodableNnet2Online::NumFramesReady() const {
   int32 features_ready = features_->NumFramesReady();
   if (features_ready == 0)
     return 0;
   bool input_finished = features_->IsLastFrame(features_ready - 1);
   if (opts_.pad_input) {
     // normal case... we'll pad with duplicates of first + last frame to get the
     // required left and right context.
     if (input_finished) return features_ready;
     else return std::max<int32>(0, features_ready - right_context_);
   } else {
     return std::max<int32>(0, features_ready - right_context_ - left_context_);
   }
 }

 void DecodableNnet2Online::ComputeForFrame(int32 frame) {
   int32 features_ready = features_->NumFramesReady();
   bool input_finished = features_->IsLastFrame(features_ready - 1);
   KALDI_ASSERT(frame >= 0);
   if (frame >= begin_frame_ &&
       frame < begin_frame_ + scaled_loglikes_.NumRows())
     return;
   KALDI_ASSERT(frame < NumFramesReady());

   int32 input_frame_begin;
   if (opts_.pad_input)
     input_frame_begin = frame - left_context_;
   else
     input_frame_begin = frame;
   int32 max_possible_input_frame_end = features_ready;
   if (input_finished && opts_.pad_input)
     max_possible_input_frame_end += right_context_;
   int32 input_frame_end = std::min<int32>(max_possible_input_frame_end,
                                           input_frame_begin +
                                           left_context_ + right_context_ +
                                           opts_.max_nnet_batch_size);
   KALDI_ASSERT(input_frame_end > input_frame_begin);
   Matrix<BaseFloat> features(input_frame_end - input_frame_begin,
                              feat_dim_);
   for (int32 t = input_frame_begin; t < input_frame_end; t++) {
     SubVector<BaseFloat> row(features, t - input_frame_begin);
     int32 t_modified = t;
     // The next two if-statements take care of "pad_input"
     if (t_modified < 0)
       t_modified = 0;
     if (t_modified >= features_ready)
       t_modified = features_ready - 1;
     features_->GetFrame(t_modified, &row);
   }
   CuMatrix<BaseFloat> cu_features;
   cu_features.Swap(&features);  // Copy to GPU, if we're using one.


   int32 num_frames_out = input_frame_end - input_frame_begin -
       left_context_ - right_context_;

   CuMatrix<BaseFloat> cu_posteriors(num_frames_out, num_pdfs_);

   // The "false" below tells it not to pad the input: we've already done
   // any padding that we needed to do.
   NnetComputation(nnet_.GetNnet(), cu_features,
                   false, &cu_posteriors);

   cu_posteriors.ApplyFloor(1.0e-20); // Avoid log of zero which leads to NaN.
   cu_posteriors.ApplyLog();
   // subtract log-prior (divide by prior)
   cu_posteriors.AddVecToRows(-1.0, log_priors_);
   // apply probability scale.
   cu_posteriors.Scale(opts_.acoustic_scale);

   // Transfer the scores the CPU for faster access by the
   // decoding process.
   scaled_loglikes_.Resize(0, 0);
   cu_posteriors.Swap(&scaled_loglikes_);

   begin_frame_ = frame;
 }

 } // namespace nnet2
 } // namespace kaldi
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet2::DecodableNnet2Online::num_pdfs_
int32 num_pdfs_
Definition: online-nnet2-decodable.h:100

kaldi::nnet2::AmNnet
Definition: am-nnet.h:38

kaldi::nnet2::DecodableNnet2OnlineOptions::pad_input
bool pad_input
Definition: online-nnet2-decodable.h:39

kaldi::CuMatrixBase::ApplyFloor
void ApplyFloor(Real floor_val)
Definition: cu-matrix.h:451

kaldi::OnlineFeatureInterface::GetFrame
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)=0
Gets the feature vector for this frame.

kaldi::nnet2::DecodableNnet2Online::scaled_loglikes_
Matrix< BaseFloat > scaled_loglikes_
Definition: online-nnet2-decodable.h:114

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::Matrix< BaseFloat >

kaldi::nnet2::DecodableNnet2Online::log_priors_
CuVector< BaseFloat > log_priors_
Definition: online-nnet2-decodable.h:96

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

kaldi::nnet2::NnetComputation
void NnetComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g.
Definition: nnet-compute.cc:160

kaldi::nnet2::DecodableNnet2Online::NumFramesReady
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
Definition: online-nnet2-decodable.cc:66

kaldi::TransitionModel::TransitionIdToPdf
int32 TransitionIdToPdf(int32 trans_id) const
Definition: transition-model.h:327

kaldi::nnet2::DecodableNnet2Online::ComputeForFrame
void ComputeForFrame(int32 frame)
If the neural-network outputs for this frame are not cached, it computes them (and possibly for some ...
Definition: online-nnet2-decodable.cc:81

kaldi::CuMatrixBase::ApplyLog
void ApplyLog()
Definition: cu-matrix.h:480

kaldi::nnet2::DecodableNnet2Online::opts_
DecodableNnet2OnlineOptions opts_
Definition: online-nnet2-decodable.h:95

kaldi::TransitionModel
Definition: transition-model.h:123

kaldi::CuMatrixBase::Scale
void Scale(Real value)
Definition: cu-matrix.cc:644

online-nnet2-decodable.h

kaldi::CuMatrixBase::AddVecToRows
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
Definition: cu-matrix.cc:1261

float

kaldi::CuMatrix::Swap
void Swap(Matrix< Real > *mat)
Definition: cu-matrix.cc:123

kaldi::nnet2::AmNnet::Priors
const VectorBase< BaseFloat > & Priors() const
Definition: am-nnet.h:67

kaldi::OnlineFeatureInterface::IsLastFrame
virtual bool IsLastFrame(int32 frame) const =0
Returns true if this is the last frame.

kaldi::nnet2::DecodableNnet2Online::nnet_
const AmNnet & nnet_
Definition: online-nnet2-decodable.h:93

kaldi::nnet2::DecodableNnet2OnlineOptions
Definition: online-nnet2-decodable.h:37

kaldi::nnet2::DecodableNnet2Online::begin_frame_
int32 begin_frame_
Definition: online-nnet2-decodable.h:103

kaldi::nnet2::DecodableNnet2Online::feat_dim_
int32 feat_dim_
Definition: online-nnet2-decodable.h:97

kaldi::nnet2::DecodableNnet2Online::IsLastFrame
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
Definition: online-nnet2-decodable.cc:58

kaldi::nnet2::DecodableNnet2Online::LogLikelihood
virtual BaseFloat LogLikelihood(int32 frame, int32 index)
Returns the scaled log likelihood.
Definition: online-nnet2-decodable.cc:49

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::MatrixBase::NumRows
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64

kaldi::nnet2::DecodableNnet2Online::DecodableNnet2Online
DecodableNnet2Online(const AmNnet &nnet, const TransitionModel &trans_model, const DecodableNnet2OnlineOptions &opts, OnlineFeatureInterface *input_feats)
Definition: online-nnet2-decodable.cc:25

kaldi::Matrix::Resize
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Definition: kaldi-matrix.cc:819

kaldi::OnlineFeatureInterface
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
Definition: online-feature-itf.h:49

kaldi::nnet2::DecodableNnet2Online::features_
OnlineFeatureInterface * features_
Definition: online-nnet2-decodable.h:92

kaldi::nnet2::DecodableNnet2OnlineOptions::acoustic_scale
BaseFloat acoustic_scale
Definition: online-nnet2-decodable.h:38

kaldi::nnet2::DecodableNnet2Online::trans_model_
const TransitionModel & trans_model_
Definition: online-nnet2-decodable.h:94

kaldi::nnet2::DecodableNnet2OnlineOptions::max_nnet_batch_size
int32 max_nnet_batch_size
Definition: online-nnet2-decodable.h:40

kaldi::OnlineFeatureInterface::NumFramesReady
virtual int32 NumFramesReady() const =0
returns the feature dimension.

kaldi::SubVector
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501

kaldi::nnet2::AmNnet::GetNnet
const Nnet & GetNnet() const
Definition: am-nnet.h:61

kaldi::nnet2::DecodableNnet2Online::left_context_
int32 left_context_
Definition: online-nnet2-decodable.h:98

kaldi::TransitionModel::NumPdfs
int32 NumPdfs() const
Definition: transition-model.h:190

kaldi::nnet2::DecodableNnet2Online::right_context_
int32 right_context_
Definition: online-nnet2-decodable.h:99