This Decodable object for class nnet2::AmNnet takes feature input from class OnlineFeatureInterface, unlike, say, class DecodableAmNnet which takes feature input from a matrix. More...

#include <online-nnet2-decodable.h>

Inheritance diagram for DecodableNnet2Online:

[legend]

Collaboration diagram for DecodableNnet2Online:

[legend]

Public Member Functions
	DecodableNnet2Online (const AmNnet &nnet, const TransitionModel &trans_model, const DecodableNnet2OnlineOptions &opts, OnlineFeatureInterface *input_feats)

virtual BaseFloat	LogLikelihood (int32 frame, int32 index)
	Returns the scaled log likelihood. More...

virtual bool	IsLastFrame (int32 frame) const
	Returns true if this is the last frame. More...

virtual int32	NumFramesReady () const
	The call NumFramesReady() will return the number of frames currently available for this decodable object. More...

virtual int32	NumIndices () const
	Indices are one-based! This is for compatibility with OpenFst. More...

Public Member Functions inherited from DecodableInterface
virtual	~DecodableInterface ()

Private Member Functions
void	ComputeForFrame (int32 frame)
	If the neural-network outputs for this frame are not cached, it computes them (and possibly for some succeeding frames) More...

	KALDI_DISALLOW_COPY_AND_ASSIGN (DecodableNnet2Online)

Private Attributes
OnlineFeatureInterface *	features_

const AmNnet &	nnet_

const TransitionModel &	trans_model_

DecodableNnet2OnlineOptions	opts_

CuVector< BaseFloat >	log_priors_

int32	feat_dim_

int32	left_context_

int32	right_context_

int32	num_pdfs_

int32	begin_frame_

Matrix< BaseFloat >	scaled_loglikes_

Detailed Description

This Decodable object for class nnet2::AmNnet takes feature input from class OnlineFeatureInterface, unlike, say, class DecodableAmNnet which takes feature input from a matrix.

Definition at line 68 of file online-nnet2-decodable.h.

Constructor & Destructor Documentation

◆ DecodableNnet2Online()

DecodableNnet2Online	(	const AmNnet &	nnet,
		const TransitionModel &	trans_model,
		const DecodableNnet2OnlineOptions &	opts,
		OnlineFeatureInterface *	input_feats
	)

Definition at line 25 of file online-nnet2-decodable.cc.

References KALDI_ASSERT, DecodableNnet2Online::log_priors_, DecodableNnet2OnlineOptions::max_nnet_batch_size, DecodableNnet2Online::nnet_, TransitionModel::NumPdfs(), DecodableNnet2Online::opts_, AmNnet::Priors(), and DecodableNnet2Online::trans_model_.

                                         :
     features_(input_feats),
     nnet_(nnet),
     trans_model_(trans_model),
     opts_(opts),
     feat_dim_(input_feats->Dim()),
     left_context_(nnet.GetNnet().LeftContext()),
     right_context_(nnet.GetNnet().RightContext()),
     num_pdfs_(nnet.GetNnet().OutputDim()),
     begin_frame_(-1) {
   KALDI_ASSERT(opts_.max_nnet_batch_size > 0);
   log_priors_ = nnet_.Priors();
   KALDI_ASSERT(log_priors_.Dim() == trans_model_.NumPdfs() &&
                "Priors in neural network not set up (or mismatch "
                "with transition model).");
   log_priors_.ApplyLog();
 }

Member Function Documentation

◆ ComputeForFrame()

void ComputeForFrame ( int32 frame )

private

If the neural-network outputs for this frame are not cached, it computes them (and possibly for some succeeding frames)

Definition at line 81 of file online-nnet2-decodable.cc.

Referenced by DecodableNnet2Online::LogLikelihood().

                                                       {
   int32 features_ready = features_->NumFramesReady();
   bool input_finished = features_->IsLastFrame(features_ready - 1);
   KALDI_ASSERT(frame >= 0);
   if (frame >= begin_frame_ &&
       frame < begin_frame_ + scaled_loglikes_.NumRows())
     return;
   KALDI_ASSERT(frame < NumFramesReady());
 
   int32 input_frame_begin;
   if (opts_.pad_input)
     input_frame_begin = frame - left_context_;
   else
     input_frame_begin = frame;
   int32 max_possible_input_frame_end = features_ready;
   if (input_finished && opts_.pad_input)
     max_possible_input_frame_end += right_context_;
   int32 input_frame_end = std::min<int32>(max_possible_input_frame_end,
                                           input_frame_begin +
                                           left_context_ + right_context_ +
                                           opts_.max_nnet_batch_size);
   KALDI_ASSERT(input_frame_end > input_frame_begin);
   Matrix<BaseFloat> features(input_frame_end - input_frame_begin,
                              feat_dim_);
   for (int32 t = input_frame_begin; t < input_frame_end; t++) {
     SubVector<BaseFloat> row(features, t - input_frame_begin);
     int32 t_modified = t;
     // The next two if-statements take care of "pad_input"
     if (t_modified < 0)
       t_modified = 0;
     if (t_modified >= features_ready)
       t_modified = features_ready - 1;
     features_->GetFrame(t_modified, &row);
   }
   CuMatrix<BaseFloat> cu_features;
   cu_features.Swap(&features);  // Copy to GPU, if we're using one.
 
 
   int32 num_frames_out = input_frame_end - input_frame_begin -
       left_context_ - right_context_;
 
   CuMatrix<BaseFloat> cu_posteriors(num_frames_out, num_pdfs_);
 
   // The "false" below tells it not to pad the input: we've already done
   // any padding that we needed to do.
   NnetComputation(nnet_.GetNnet(), cu_features,
                   false, &cu_posteriors);
 
   cu_posteriors.ApplyFloor(1.0e-20); // Avoid log of zero which leads to NaN.
   cu_posteriors.ApplyLog();
   // subtract log-prior (divide by prior)
   cu_posteriors.AddVecToRows(-1.0, log_priors_);
   // apply probability scale.
   cu_posteriors.Scale(opts_.acoustic_scale);
 
   // Transfer the scores the CPU for faster access by the
   // decoding process.
   scaled_loglikes_.Resize(0, 0);
   cu_posteriors.Swap(&scaled_loglikes_);
 
   begin_frame_ = frame;
 }

◆ IsLastFrame()

bool IsLastFrame ( int32 frame ) const

virtual

Returns true if this is the last frame.

Frames are zero-based, so the first frame is zero. IsLastFrame(-1) will return false, unless the file is empty (which is a case that I'm not sure all the code will handle, so be careful). Caution: the behavior of this function in an online setting is being changed somewhat. In future it may return false in cases where we haven't yet decided to terminate decoding, but later true if we decide to terminate decoding. The plan in future is to rely more on NumFramesReady(), and in future, IsLastFrame() would always return false in an online-decoding setting, and would only return true in a decoding-from-matrix setting where we want to allow the last delta or LDA features to be flushed out for compatibility with the baseline setup.

Implements DecodableInterface.

Definition at line 58 of file online-nnet2-decodable.cc.

References DecodableNnet2Online::features_, OnlineFeatureInterface::IsLastFrame(), DecodableNnet2Online::left_context_, DecodableNnet2Online::opts_, DecodableNnet2OnlineOptions::pad_input, and DecodableNnet2Online::right_context_.

                                                         {
   if (opts_.pad_input) { // normal case
     return features_->IsLastFrame(frame);
   } else {
     return features_->IsLastFrame(frame + left_context_ + right_context_);
   }
 }

◆ KALDI_DISALLOW_COPY_AND_ASSIGN()

KALDI_DISALLOW_COPY_AND_ASSIGN ( DecodableNnet2Online )

private

◆ LogLikelihood()

BaseFloat LogLikelihood	(	int32	frame,
		int32	index
	)

virtual

Returns the scaled log likelihood.

Implements DecodableInterface.

Definition at line 49 of file online-nnet2-decodable.cc.

References DecodableNnet2Online::begin_frame_, DecodableNnet2Online::ComputeForFrame(), KALDI_ASSERT, MatrixBase< Real >::NumRows(), DecodableNnet2Online::scaled_loglikes_, DecodableNnet2Online::trans_model_, and TransitionModel::TransitionIdToPdf().

Referenced by kaldi::nnet2::UnitTestNnetDecodable().

                                                                       {
   ComputeForFrame(frame);
   int32 pdf_id = trans_model_.TransitionIdToPdf(index);
   KALDI_ASSERT(frame >= begin_frame_ &&
                frame < begin_frame_ + scaled_loglikes_.NumRows());
   return scaled_loglikes_(frame - begin_frame_, pdf_id);
 }

◆ NumFramesReady()

int32 NumFramesReady ( ) const

virtual

The call NumFramesReady() will return the number of frames currently available for this decodable object.

This is for use in setups where you don't want the decoder to block while waiting for input. This is newly added as of Jan 2014, and I hope, going forward, to rely on this mechanism more than IsLastFrame to know when to stop decoding.

Reimplemented from DecodableInterface.

Definition at line 66 of file online-nnet2-decodable.cc.

References DecodableNnet2Online::features_, OnlineFeatureInterface::IsLastFrame(), DecodableNnet2Online::left_context_, OnlineFeatureInterface::NumFramesReady(), DecodableNnet2Online::opts_, DecodableNnet2OnlineOptions::pad_input, and DecodableNnet2Online::right_context_.

Referenced by DecodableNnet2Online::ComputeForFrame(), and kaldi::nnet2::UnitTestNnetDecodable().

                                                  {
   int32 features_ready = features_->NumFramesReady();
   if (features_ready == 0)
     return 0;
   bool input_finished = features_->IsLastFrame(features_ready - 1);
   if (opts_.pad_input) {
     // normal case... we'll pad with duplicates of first + last frame to get the
     // required left and right context.
     if (input_finished) return features_ready;
     else return std::max<int32>(0, features_ready - right_context_);
   } else {
     return std::max<int32>(0, features_ready - right_context_ - left_context_);
   }
 }