nnet-compute-online.cc
Go to the documentation of this file.
1 // nnet2/nnet-compute-online.cc
2 
3 // Copyright 2014 Johns Hopkins University (author: Daniel Povey)
4 // Guoguo Chen
5 // Vijayaditya Peddinti
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
23 #include <vector>
24 
25 namespace kaldi {
26 namespace nnet2 {
27 
28 NnetOnlineComputer::NnetOnlineComputer(const Nnet &nnet, bool pad_input)
29  : nnet_(nnet), pad_input_(pad_input),
30  is_first_chunk_(true), finished_(false) {
31  data_.resize(nnet_.NumComponents() + 1);
33 }
34 
36  CuMatrix<BaseFloat> *output) {
37  KALDI_ASSERT(output != NULL);
39  int32 dim = input.NumCols();
40 
41  // If input is empty, we also set output to zero size.
42  if (input.NumRows() == 0) {
43  output->Resize(0, 0);
44  return;
45  } else {
46  // store the last frame as it might be needed for padding when Flush() is
47  // called.
48  if (last_seen_input_frame_.Dim() != input.NumCols())
49  last_seen_input_frame_.Resize(input.NumCols());
50  last_seen_input_frame_.CopyFromVec(input.Row(input.NumRows() - 1));
51  }
52 
53  // Checking if feature dimension matches that required by the neural network.
54  if (dim != nnet_.InputDim()) {
55  KALDI_ERR << "Feature dimension is " << dim << ", but network expects "
56  << nnet_.InputDim();
57  }
58  // num_effective_input_rows is the effective number of input rows we have, for
59  // purposes of computing how much output we will get. It is the number of
60  // actual input rows plus the amount of context stored at intermediate layers
61  // of the network (which if we have previously done the computation, will
62  // equal nnet_.LeftContext() + nnet_.RightContext()).
63  int32 num_effective_input_rows = 0;
64  // Initialize the first element of data_, with input
65  CuMatrix<BaseFloat> &input_data(data_[0]);
66  if (is_first_chunk_) {
67  is_first_chunk_ = false;
68  // assert that all the component-wise input buffers are empty
69  for (int32 i = 0; i < reusable_component_inputs_.size(); i++)
70  KALDI_ASSERT(reusable_component_inputs_[0].NumRows() == 0);
71  // Pad at the start of the file if necessary.
72  if ((pad_input_) && (nnet_.LeftContext() > 0)) {
73  input_data.Resize(nnet_.LeftContext() + input.NumRows(), dim);
74  input_data.Range(0, nnet_.LeftContext(), 0,
75  dim).CopyRowsFromVec(input.Row(0));
76  input_data.Range(nnet_.LeftContext(), input.NumRows(),
77  0, dim).CopyFromMat(input);
78  } else {
79  input_data.Resize(input.NumRows(), input.NumCols());
80  input_data.CopyFromMat(input);
81  }
82  num_effective_input_rows = input_data.NumRows();
83  } else {
84  int32 extra_input_rows = 0;
85  // checking if we did forward pass for any chunks before.
86  // if we did a forward pass, component input buffers would be non-empty
87  // these buffers store information equivalent to having an nnet_input
88  // buffer of (nnet_.LeftContext() + nnet_.RightContext())
89  for (int32 i = 0; i < reusable_component_inputs_.size(); i++) {
90  if (reusable_component_inputs_[i].NumRows() > 0) {
91  extra_input_rows = nnet_.LeftContext() + nnet_.RightContext();
92  break;
93  }
94  }
95  // add unprocessed input from the previous calls
96  input_data.Resize(input.NumRows() + unprocessed_buffer_.NumRows(), dim);
97  if (unprocessed_buffer_.NumRows() > 0)
98  input_data.Range(0, unprocessed_buffer_.NumRows(),
99  0, dim).CopyFromMat(unprocessed_buffer_);
100  input_data.Range(unprocessed_buffer_.NumRows(), input.NumRows(),
101  0, dim).CopyFromMat(input);
102  unprocessed_buffer_.Resize(0, 0); // clearing the unprocessed buffer
103  num_effective_input_rows = input_data.NumRows() + extra_input_rows;
104  }
105  if (num_effective_input_rows >=
106  nnet_.LeftContext() + nnet_.RightContext() + 1) {
107  // we have sufficient frames to compute at least one nnet output
108  nnet_.ComputeChunkInfo(num_effective_input_rows, 1, &chunk_info_);
109  Propagate();
110  *output = data_.back();
111  } else {
112  // store the input in the unprocessed_buffer_
113  unprocessed_buffer_ = input_data;
114  // not enough input context so just return an empty array
115  output->Resize(0, 0);
116  }
117 
118 }
119 
122  int32 num_frames_padding = (pad_input_ ? nnet_.RightContext() : 0);
123  int32 num_stored_frames = nnet_.LeftContext() + nnet_.RightContext();
124  int32 num_effective_input_rows = num_stored_frames + num_frames_padding;
125  // If the amount of output would be empty return at this point.
126  if (num_effective_input_rows < nnet_.LeftContext() + nnet_.RightContext() + 1) {
127  output->Resize(0, 0);
128  finished_ = true;
129  return;
130  }
131 
132  int32 dim = nnet_.InputDim();
133  CuMatrix<BaseFloat> &input_data(data_[0]);
134  KALDI_ASSERT(num_frames_padding > 0); // else we would have returned above.
135  input_data.Resize(num_frames_padding, dim);
137 
138  // Note, we later modify this chunk-info, it isn't quite correct right now
139  // because we add extra data at intermediate layers, and the actual number of
140  // input rows doesn't equal num_effective_input_rows.
141  nnet_.ComputeChunkInfo(num_effective_input_rows, 1,
142  &chunk_info_);
143  Propagate();
144  *output = data_.back();
145  finished_ = true;
146 }
147 
149  // This method is like the normal nnet propagate, but we reuse the frames
150  // computed from the previous chunk, at each component.
151 
152  for (int32 c = 0; c < nnet_.NumComponents(); c++) {
153  // we assume that the chunks are always contiguous
154  chunk_info_[c].MakeOffsetsContiguous();
155  chunk_info_[c + 1].MakeOffsetsContiguous();
156 
157  const Component &component = nnet_.GetComponent(c);
158  CuMatrix<BaseFloat> &input_data = data_[c], &output_data = data_[c + 1];
159  CuMatrix<BaseFloat> input_data_temp;
160 
161  if (component.Context().size() > 1) {
162  int32 dim = component.InputDim();
163  if (reusable_component_inputs_[c].NumRows() > 0) {
164  // concatenate any frames computed by previous component
165  // in the last call, to the input of the current component
166  input_data_temp.Resize(reusable_component_inputs_[c].NumRows()
167  + input_data.NumRows(), dim);
168  input_data_temp.Range(0, reusable_component_inputs_[c].NumRows(),
169  0, dim).CopyFromMat(reusable_component_inputs_[c]);
170  input_data_temp.Range(reusable_component_inputs_[c].NumRows(),
171  input_data.NumRows(), 0, dim).CopyFromMat(
172  input_data);
173  input_data = input_data_temp;
174  }
175  // store any frames which can be reused in the next call
176  reusable_component_inputs_[c].Resize(component.Context().back() -
177  component.Context().front(), dim);
178  reusable_component_inputs_[c].CopyFromMat(
179  input_data.RowRange(input_data.NumRows() -
180  reusable_component_inputs_[c].NumRows(),
181  reusable_component_inputs_[c].NumRows()));
182  }
183 
184  // chunk_info objects provided assume that we added all the reusable
185  // context at the input of the nnet. However we are reusing hidden
186  // activations computed in the previous call.
187  // Hence we manipulate the chunk_info objects to reflect the state of the
188  // actual chunk, each component is computing, in the current Propagate.
189  // As before we always assume the chunks are contiguous.
190 
191  // modifying the input chunk_info
192  int32 chunk_size_assumed = chunk_info_[c].ChunkSize();
193  int32 last_offset = chunk_info_[c].GetOffset(chunk_size_assumed - 1);
194  int32 first_offset = last_offset - input_data.NumRows() + 1;
195  ChunkInfo input_chunk_info(chunk_info_[c].NumCols(),
196  chunk_info_[c].NumChunks(),
197  first_offset,
198  last_offset);
199  // modifying the output chunk_info
200  chunk_size_assumed = chunk_info_[c + 1].ChunkSize();
201  last_offset = chunk_info_[c + 1].GetOffset(chunk_size_assumed - 1);
202  first_offset = last_offset - (input_data.NumRows() -
203  (component.Context().back() -
204  component.Context().front())) + 1;
205  ChunkInfo output_chunk_info(chunk_info_[c + 1].NumCols(),
206  chunk_info_[c + 1].NumChunks(),
207  first_offset,
208  last_offset);
209  component.Propagate(input_chunk_info, output_chunk_info,
210  input_data, &output_data);
211  }
212 }
213 
214 } // namespace nnet2
215 } // namespace kaldi
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
const Component & GetComponent(int32 c) const
Definition: nnet-nnet.cc:141
int32 LeftContext() const
Returns the left-context summed over all the Components...
Definition: nnet-nnet.cc:42
virtual int32 InputDim() const =0
Get size of input vectors.
std::vector< CuMatrix< BaseFloat > > data_
std::vector< CuMatrix< BaseFloat > > reusable_component_inputs_
const CuSubVector< Real > Row(MatrixIndexT i) const
Definition: cu-matrix.h:670
std::vector< ChunkInfo > chunk_info_
Abstract class, basic element of the network, it is a box with defined inputs, outputs, and tranformation functions interface.
NnetOnlineComputer(const Nnet &nnet, bool pad_input)
CuSubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:653
kaldi::int32 int32
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
Definition: cu-matrix.cc:2301
void Flush(CuMatrix< BaseFloat > *output)
virtual std::vector< int32 > Context() const
Return a vector describing the temporal context this component requires for each frame of output...
CuVector< BaseFloat > last_seen_input_frame_
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
Definition: nnet-nnet.h:69
int32 RightContext() const
Returns the right-context summed over all the Components...
Definition: nnet-nnet.cc:56
ChunkInfo is a class whose purpose is to describe the structure of matrices holding features...
#define KALDI_ERR
Definition: kaldi-error.h:147
CuMatrix< BaseFloat > unprocessed_buffer_
CuSubMatrix< Real > RowRange(const MatrixIndexT row_offset, const MatrixIndexT num_rows) const
Definition: cu-matrix.h:660
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const =0
Perform forward pass propagation Input->Output.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void Compute(const CuMatrixBase< BaseFloat > &input, CuMatrix< BaseFloat > *output)
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
void ComputeChunkInfo(int32 input_chunk_size, int32 num_chunks, std::vector< ChunkInfo > *chunk_info_out) const
Uses the output of the Context() functions of the network, to compute a vector of size NumComponents(...
Definition: nnet-nnet.cc:65
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50
int32 InputDim() const
Dimension of the input features, e.g.
Definition: nnet-nnet.cc:36