doc/nnet-compute-online_8cc_source.html

 // nnet2/nnet-compute-online.cc

 // Copyright 2014   Johns Hopkins University (author: Daniel Povey)
 //                  Guoguo Chen
 //                  Vijayaditya Peddinti

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include "nnet2/nnet-compute-online.h"
 #include <vector>

 namespace kaldi {
 namespace nnet2 {

 NnetOnlineComputer::NnetOnlineComputer(const Nnet &nnet, bool pad_input)
     : nnet_(nnet), pad_input_(pad_input),
       is_first_chunk_(true), finished_(false) {
   data_.resize(nnet_.NumComponents() + 1);
   reusable_component_inputs_.resize(nnet_.NumComponents()+1);
 }

 void NnetOnlineComputer::Compute(const CuMatrixBase<BaseFloat> &input,
                                  CuMatrix<BaseFloat> *output) {
   KALDI_ASSERT(output != NULL);
   KALDI_ASSERT(!finished_);
   int32 dim = input.NumCols();

   // If input is empty, we also set output to zero size.
   if (input.NumRows() == 0) {
     output->Resize(0, 0);
     return;
   } else {
     // store the last frame as it might be needed for padding when Flush() is
     // called.
     if (last_seen_input_frame_.Dim() != input.NumCols())
       last_seen_input_frame_.Resize(input.NumCols());
     last_seen_input_frame_.CopyFromVec(input.Row(input.NumRows() - 1));
   }

   // Checking if feature dimension matches that required by the neural network.
   if (dim != nnet_.InputDim()) {
     KALDI_ERR << "Feature dimension is " << dim << ", but network expects "
         << nnet_.InputDim();
   }
   // num_effective_input_rows is the effective number of input rows we have, for
   // purposes of computing how much output we will get.  It is the number of
   // actual input rows plus the amount of context stored at intermediate layers
   // of the network (which if we have previously done the computation, will
   // equal nnet_.LeftContext() + nnet_.RightContext()).
   int32 num_effective_input_rows = 0;
   // Initialize the first element of data_, with input
   CuMatrix<BaseFloat> &input_data(data_[0]);
   if (is_first_chunk_)  {
     is_first_chunk_ = false;
     // assert that all the component-wise input buffers are empty
     for (int32 i = 0; i < reusable_component_inputs_.size(); i++)
       KALDI_ASSERT(reusable_component_inputs_[0].NumRows() == 0);
     // Pad at the start of the file if necessary.
     if ((pad_input_) && (nnet_.LeftContext() > 0))  {
         input_data.Resize(nnet_.LeftContext() + input.NumRows(), dim);
         input_data.Range(0, nnet_.LeftContext(), 0,
                     dim).CopyRowsFromVec(input.Row(0));
         input_data.Range(nnet_.LeftContext(), input.NumRows(),
                     0, dim).CopyFromMat(input);
     } else {
       input_data.Resize(input.NumRows(), input.NumCols());
       input_data.CopyFromMat(input);
     }
     num_effective_input_rows = input_data.NumRows();
   } else {
     int32 extra_input_rows = 0;
     // checking if we did forward pass for any chunks before.
     // if we did a forward pass, component input buffers would be non-empty
     // these buffers store information equivalent to having an nnet_input
     // buffer of (nnet_.LeftContext() + nnet_.RightContext())
     for (int32 i = 0; i < reusable_component_inputs_.size(); i++)  {
       if (reusable_component_inputs_[i].NumRows() > 0) {
         extra_input_rows = nnet_.LeftContext() + nnet_.RightContext();
         break;
       }
     }
     // add unprocessed input from the previous calls
     input_data.Resize(input.NumRows() + unprocessed_buffer_.NumRows(), dim);
     if (unprocessed_buffer_.NumRows() > 0)
       input_data.Range(0, unprocessed_buffer_.NumRows(),
                        0, dim).CopyFromMat(unprocessed_buffer_);
     input_data.Range(unprocessed_buffer_.NumRows(), input.NumRows(),
                      0, dim).CopyFromMat(input);
     unprocessed_buffer_.Resize(0, 0); // clearing the unprocessed buffer
     num_effective_input_rows = input_data.NumRows() + extra_input_rows;
   }
   if (num_effective_input_rows >=
       nnet_.LeftContext() + nnet_.RightContext() + 1) {
     // we have sufficient frames to compute at least one nnet output
     nnet_.ComputeChunkInfo(num_effective_input_rows, 1, &chunk_info_);
     Propagate();
     *output = data_.back();
   } else {
     // store the input in the unprocessed_buffer_
     unprocessed_buffer_ = input_data;
     // not enough input context so just return an empty array
     output->Resize(0, 0);
   }

 }

 void NnetOnlineComputer::Flush(CuMatrix<BaseFloat> *output) {
   KALDI_ASSERT(!finished_ && !is_first_chunk_);
   int32 num_frames_padding = (pad_input_ ? nnet_.RightContext() : 0);
   int32 num_stored_frames = nnet_.LeftContext() + nnet_.RightContext();
   int32 num_effective_input_rows =  num_stored_frames + num_frames_padding;
   // If the amount of output would be empty return at this point.
   if (num_effective_input_rows < nnet_.LeftContext() + nnet_.RightContext() + 1) {
     output->Resize(0, 0);
     finished_ = true;
     return;
   }

   int32 dim = nnet_.InputDim();
   CuMatrix<BaseFloat> &input_data(data_[0]);
   KALDI_ASSERT(num_frames_padding > 0);  // else we would have returned above.
   input_data.Resize(num_frames_padding, dim);
   input_data.CopyRowsFromVec(last_seen_input_frame_);

   // Note, we later modify this chunk-info, it isn't quite correct right now
   // because we add extra data at intermediate layers, and the actual number of
   // input rows doesn't equal num_effective_input_rows.
   nnet_.ComputeChunkInfo(num_effective_input_rows, 1,
                          &chunk_info_);
   Propagate();
   *output = data_.back();
   finished_ = true;
 }

 void NnetOnlineComputer::Propagate() {
   // This method is like the normal nnet propagate, but we reuse the frames
   // computed from the previous chunk, at each component.

   for (int32 c = 0; c < nnet_.NumComponents(); c++) {
     // we assume that the chunks are always contiguous
     chunk_info_[c].MakeOffsetsContiguous();
     chunk_info_[c + 1].MakeOffsetsContiguous();

     const Component &component = nnet_.GetComponent(c);
     CuMatrix<BaseFloat> &input_data = data_[c], &output_data = data_[c + 1];
     CuMatrix<BaseFloat> input_data_temp;

     if (component.Context().size() > 1)  {
       int32 dim = component.InputDim();
       if (reusable_component_inputs_[c].NumRows() > 0) {
         // concatenate any frames computed by previous component
         // in the last call, to the input of the current component
         input_data_temp.Resize(reusable_component_inputs_[c].NumRows()
                                + input_data.NumRows(), dim);
         input_data_temp.Range(0, reusable_component_inputs_[c].NumRows(),
                        0, dim).CopyFromMat(reusable_component_inputs_[c]);
         input_data_temp.Range(reusable_component_inputs_[c].NumRows(),
                               input_data.NumRows(), 0, dim).CopyFromMat(
                                   input_data);
         input_data = input_data_temp;
       }
       // store any frames which can be reused in the next call
       reusable_component_inputs_[c].Resize(component.Context().back() -
                                 component.Context().front(), dim);
       reusable_component_inputs_[c].CopyFromMat(
           input_data.RowRange(input_data.NumRows() -
                               reusable_component_inputs_[c].NumRows(),
                               reusable_component_inputs_[c].NumRows()));
     }

     // chunk_info objects provided assume that we added all the reusable
     // context at the input of the nnet. However we are reusing hidden
     // activations computed in the previous call.
     // Hence we manipulate the chunk_info objects to reflect the state of the
     // actual chunk, each component is computing, in the current Propagate.
     // As before we always assume the chunks are contiguous.

     // modifying the input chunk_info
     int32 chunk_size_assumed = chunk_info_[c].ChunkSize();
     int32 last_offset = chunk_info_[c].GetOffset(chunk_size_assumed - 1);
     int32 first_offset = last_offset - input_data.NumRows() + 1;
     ChunkInfo input_chunk_info(chunk_info_[c].NumCols(),
                                chunk_info_[c].NumChunks(),
                                first_offset,
                                last_offset);
     // modifying the output chunk_info
     chunk_size_assumed = chunk_info_[c + 1].ChunkSize();
     last_offset = chunk_info_[c + 1].GetOffset(chunk_size_assumed - 1);
     first_offset = last_offset - (input_data.NumRows() -
                                   (component.Context().back() -
                                    component.Context().front())) + 1;
     ChunkInfo output_chunk_info(chunk_info_[c + 1].NumCols(),
                                 chunk_info_[c + 1].NumChunks(),
                                 first_offset,
                                 last_offset);
     component.Propagate(input_chunk_info, output_chunk_info,
                         input_data, &output_data);
   }
 }

 }  // namespace nnet2
 }  // namespace kaldi
kaldi::CuMatrixBase::CopyFromMat
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet2::Nnet::GetComponent
const Component & GetComponent(int32 c) const
Definition: nnet-nnet.cc:141

kaldi::nnet2::Nnet::LeftContext
int32 LeftContext() const
Returns the left-context summed over all the Components...
Definition: nnet-nnet.cc:42

kaldi::nnet2::Component::InputDim
virtual int32 InputDim() const =0
Get size of input vectors.

kaldi::nnet2::NnetOnlineComputer::data_
std::vector< CuMatrix< BaseFloat > > data_
Definition: nnet-compute-online.h:80

kaldi::nnet2::NnetOnlineComputer::reusable_component_inputs_
std::vector< CuMatrix< BaseFloat > > reusable_component_inputs_
Definition: nnet-compute-online.h:85

kaldi::CuMatrixBase::Row
const CuSubVector< Real > Row(MatrixIndexT i) const
Definition: cu-matrix.h:670

kaldi::nnet2::NnetOnlineComputer::pad_input_
bool pad_input_
Definition: nnet-compute-online.h:97

kaldi::nnet2::NnetOnlineComputer::chunk_info_
std::vector< ChunkInfo > chunk_info_
Definition: nnet-compute-online.h:82

kaldi::nnet2::Component
Abstract class, basic element of the network, it is a box with defined inputs, outputs, and tranformation functions interface.
Definition: nnet-component.h:157

kaldi::nnet2::NnetOnlineComputer::NnetOnlineComputer
NnetOnlineComputer(const Nnet &nnet, bool pad_input)
Definition: nnet-compute-online.cc:28

kaldi::CuMatrixBase::Range
CuSubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:653

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

kaldi::CuMatrixBase::CopyRowsFromVec
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
Definition: cu-matrix.cc:2301

kaldi::nnet2::NnetOnlineComputer::Flush
void Flush(CuMatrix< BaseFloat > *output)
Definition: nnet-compute-online.cc:120

kaldi::nnet2::Component::Context
virtual std::vector< int32 > Context() const
Return a vector describing the temporal context this component requires for each frame of output...
Definition: nnet-component.h:188

kaldi::nnet2::NnetOnlineComputer::last_seen_input_frame_
CuVector< BaseFloat > last_seen_input_frame_
Definition: nnet-compute-online.h:93

kaldi::nnet2::Nnet::NumComponents
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
Definition: nnet-nnet.h:69

kaldi::nnet2::NnetOnlineComputer::Propagate
void Propagate()
Definition: nnet-compute-online.cc:148

kaldi::nnet2::NnetOnlineComputer::is_first_chunk_
bool is_first_chunk_
Definition: nnet-compute-online.h:99

kaldi::nnet2::NnetOnlineComputer::finished_
bool finished_
Definition: nnet-compute-online.h:101

kaldi::nnet2::Nnet::RightContext
int32 RightContext() const
Returns the right-context summed over all the Components...
Definition: nnet-nnet.cc:56

kaldi::nnet2::ChunkInfo
ChunkInfo is a class whose purpose is to describe the structure of matrices holding features...
Definition: nnet-component.h:72

nnet-compute-online.h

kaldi::nnet2::Nnet
Definition: nnet-nnet.h:63

kaldi::nnet2::NnetOnlineComputer::nnet_
const Nnet & nnet_
Definition: nnet-compute-online.h:76

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::nnet2::NnetOnlineComputer::unprocessed_buffer_
CuMatrix< BaseFloat > unprocessed_buffer_
Definition: nnet-compute-online.h:89

kaldi::CuMatrixBase::RowRange
CuSubMatrix< Real > RowRange(const MatrixIndexT row_offset, const MatrixIndexT num_rows) const
Definition: cu-matrix.h:660

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

kaldi::CuMatrixBase::NumCols
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216

kaldi::nnet2::Component::Propagate
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const =0
Perform forward pass propagation Input->Output.

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::nnet2::NnetOnlineComputer::Compute
void Compute(const CuMatrixBase< BaseFloat > &input, CuMatrix< BaseFloat > *output)
Definition: nnet-compute-online.cc:35

kaldi::CuMatrixBase::NumRows
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215

kaldi::nnet2::Nnet::ComputeChunkInfo
void ComputeChunkInfo(int32 input_chunk_size, int32 num_chunks, std::vector< ChunkInfo > *chunk_info_out) const
Uses the output of the Context() functions of the network, to compute a vector of size NumComponents(...
Definition: nnet-nnet.cc:65

kaldi::CuMatrix::Resize
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

kaldi::nnet2::Nnet::InputDim
int32 InputDim() const
Dimension of the input features, e.g.
Definition: nnet-nnet.cc:36