nnet-compute-online.h
Go to the documentation of this file.
1 // nnet2/nnet-compute-online.h
2 
3 // Copyright 2014 Johns Hopkins University (author: Daniel Povey)
4 // Guoguo Chen
5 // Vijayaditya Peddinti
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #ifndef KALDI_NNET2_NNET_COMPUTE_ONLINE_H_
23 #define KALDI_NNET2_NNET_COMPUTE_ONLINE_H_
24 
25 #include "nnet2/nnet-nnet.h"
26 #include <vector>
27 
28 namespace kaldi {
29 namespace nnet2 {
30 
31 /* This header provides functionality for doing forward computation in a situation
32  where you want to start from the beginning of a file and progressively compute
33  more, while re-using the hidden parts that (due to context) may be shared.
34  (note: this sharing is more of an issue in multi-splice networks where there is
35  splicing over time in the middle layers of the network).
36  Note: this doesn't do the final taking-the-log and correcting for the prior.
37  The current implementation is just an inefficient placeholder implementation;
38  later we'll modify it to properly use previously computed activations.
39 */
40 
42 
43  public:
44  // All the inputs and outputs are of type CuMatrix, in case we're doing the
45  // computation on the GPU (of course, if there is no GPU, it backs off to
46  // using the CPU).
47  // You should initialize an object of this type for each utterance you want
48  // to decode.
49 
50  // Note: pad_input will normally be true; it means that at the start and end
51  // of the file, we pad with repeats of the first/last frame, so that the total
52  // number of frames it outputs is the same as the number of input frames.
53  NnetOnlineComputer(const Nnet &nnet,
54  bool pad_input);
55 
56  // This function works as follows: given a chunk of input (interpreted
57  // as following in time any previously supplied data), do the computation
58  // and produce all the frames of output we can. In the middle of the
59  // file, the dimensions of input and output will be the same, but at
60  // the beginning of the file, output will have fewer frames than input
61  // due to required context.
62  // It is the responsibility of the user to keep track of frame indices, if
63  // required. This class won't output any frame twice.
64  void Compute(const CuMatrixBase<BaseFloat> &input,
65  CuMatrix<BaseFloat> *output);
66 
67  // This flushes out the last frames of output; you call this when all
68  // input has finished. It's invalid to call Compute or Flush after
69  // calling Flush. It's valid to call Flush if no frames have been
70  // input or if no frames have been output; this produces empty output.
71  void Flush(CuMatrix<BaseFloat> *output);
72 
73  private:
74  void Propagate();
75 
76  const Nnet &nnet_;
77 
78  // data_ contains the intermediate stages and the output of the most recent
79  // computation.
80  std::vector<CuMatrix<BaseFloat> > data_;
81 
82  std::vector<ChunkInfo> chunk_info_; // contains chunk_info(s) for the
83  // components
84 
85  std::vector<CuMatrix<BaseFloat> > reusable_component_inputs_;
86  // reusable data from previous chunk, this is a buffer to
87  // store the hidden activations before splice type components
88 
89  CuMatrix<BaseFloat> unprocessed_buffer_; // buffer to store unprocessed input
90  // from previous chunks (as we can have several chunks with insufficient
91  // context)
92 
93  CuVector<BaseFloat> last_seen_input_frame_; // stores the last seen frame
94  // for the sake of right padding the input. This is useful to deal with the
95  // scenario where the initial component is not a splice component.
96 
97  bool pad_input_; // pad input at the beginning of the decode
98 
100 
101  bool finished_; // forward-pass is complete
102 
104 };
105 
106 
107 } // namespace nnet2
108 } // namespace kaldi
109 
110 #endif // KALDI_NNET2_NNET_COMPUTE_ONLINE_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
std::vector< CuMatrix< BaseFloat > > data_
std::vector< CuMatrix< BaseFloat > > reusable_component_inputs_
std::vector< ChunkInfo > chunk_info_
NnetOnlineComputer(const Nnet &nnet, bool pad_input)
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void Flush(CuMatrix< BaseFloat > *output)
CuVector< BaseFloat > last_seen_input_frame_
KALDI_DISALLOW_COPY_AND_ASSIGN(NnetOnlineComputer)
CuMatrix< BaseFloat > unprocessed_buffer_
Matrix for CUDA computing.
Definition: matrix-common.h:69
void Compute(const CuMatrixBase< BaseFloat > &input, CuMatrix< BaseFloat > *output)