decodable-online-looped.h
Go to the documentation of this file.
1 // nnet3/decodable-online-looped.h
2 
3 // Copyright 2014-2017 Johns Hopkins Universithy (author: Daniel Povey)
4 // 2016 Api.ai (Author: Ilya Platonov)
5 
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #ifndef KALDI_NNET3_DECODABLE_ONLINE_LOOPED_H_
23 #define KALDI_NNET3_DECODABLE_ONLINE_LOOPED_H_
24 
25 #include "itf/online-feature-itf.h"
26 #include "itf/decodable-itf.h"
27 #include "nnet3/am-nnet-simple.h"
28 #include "nnet3/nnet-compute.h"
29 #include "nnet3/nnet-optimize.h"
31 #include "hmm/transition-model.h"
32 
33 namespace kaldi {
34 namespace nnet3 {
35 
36 
37 // The Decodable objects that we define in this header do the neural net
38 // computation in a way that's compatible with online feature extraction. It
39 // differs from the one declared in online-nnet3-decodable-simple.h because it
40 // uses the 'looped' network evaluation, which is more efficient because it
41 // re-uses hidden activations (and therefore doesn't have to pad chunks of data
42 // with extra left-context); it is applicable to TDNNs and to forwards-recurrent
43 // topologies like LSTMs, but not tobackwards-recurrent topologies such as
44 // BLSTMs.
45 
46 // The options are passed in the same way as in decodable-simple-looped.h,
47 // we use the same options and info class.
48 
49 
50 // This object is used as a base class for DecodableNnetLoopedOnline
51 // and DecodableAmNnetLoopedOnline.
52 // It takes care of the neural net computation and computations related to how
53 // many frames are ready (etc.), but it does not override the LogLikelihood() or
54 // NumIndices() functions so it is not usable as an object of type
55 // DecodableInterface.
57  public:
58  // Constructor. 'input_feature' is for the feature that will be given
59  // as 'input' to the neural network; 'ivector_feature' is for the iVector
60  // feature, or NULL if iVectors are not being used.
62  OnlineFeatureInterface *input_features,
63  OnlineFeatureInterface *ivector_features);
64 
65  // note: the LogLikelihood function is not overridden; the child
66  // class needs to do this.
67  //virtual BaseFloat LogLikelihood(int32 subsampled_frame, int32 index);
68 
69  // note: the frame argument is on the output of the network, i.e. after any
70  // subsampling, so we call it 'subsampled_frame'.
71  virtual bool IsLastFrame(int32 subsampled_frame) const;
72 
73  virtual int32 NumFramesReady() const;
74 
75  // Note: this function, present in the base-class, is overridden by the child class.
76  // virtual int32 NumIndices() const;
77 
78  // this is not part of the standard Decodable interface but I think is needed for
79  // something.
82  }
83 
91  void SetFrameOffset(int32 frame_offset);
92 
94  int32 GetFrameOffset() const { return frame_offset_; }
95 
96  protected:
97 
103  inline void EnsureFrameIsComputed(int32 subsampled_frame) {
105  "Frames must be accessed in order.");
106  while (subsampled_frame >= current_log_post_subsampled_offset_ +
108  AdvanceChunk();
109  }
110 
111  // The current log-posteriors that we got from the last time we
112  // ran the computation.
114 
115  // The number of chunks we have computed so far.
117 
118  // The time-offset of the current log-posteriors, equals
119  // (num_chunks_computed_ - 1) *
120  // (info_.frames_per_chunk_ / info_.opts_.frame_subsampling_factor).
122 
124 
125  // IsLastFrame(), NumFramesReady() and LogLikelihood() methods take into
126  // account this offset value. We initialize frame_offset_ as 0 and it stays as
127  // 0 unless SetFrameOffset() method is called.
129 
130  private:
131 
132  // This function does the computation for the next chunk. It will change
133  // current_log_post_ and current_log_post_subsampled_offset_, and
134  // increment num_chunks_computed_.
135  void AdvanceChunk();
136 
139 
141 
143 };
144 
145 // This decodable object takes indexes of the form (pdf_id + 1),
146 // or whatever the output-dimension of the neural network represents,
147 // plus one.
148 // It fully implements DecodableInterface.
149 // Note: whether or not division by the prior takes place depends on
150 // whether you supplied class AmNnetSimple (or just Nnet), to the constructor
151 // of the DecodableNnetSimpleLoopedInfo that you initialized this
152 // with.
154  public:
156  const DecodableNnetSimpleLoopedInfo &info,
157  OnlineFeatureInterface *input_features,
158  OnlineFeatureInterface *ivector_features):
159  DecodableNnetLoopedOnlineBase(info, input_features, ivector_features) { }
160 
161 
162  // returns the output-dim of the neural net.
163  virtual int32 NumIndices() const { return info_.output_dim; }
164 
165  // 'subsampled_frame' is a frame, but if frame-subsampling-factor != 1, it's a
166  // reduced-rate output frame (e.g. a 't' index divided by 3). 'index'
167  // represents the pdf-id (or other output of the network) PLUS ONE.
168  virtual BaseFloat LogLikelihood(int32 subsampled_frame, int32 index);
169 
170  private:
172 
173 };
174 
175 
176 // This is for traditional decoding where the graph has transition-ids
177 // on the arcs, and you need the TransitionModel to map those to
178 // pdf-ids.
179 // Note: whether or not division by the prior takes place depends on
180 // whether you supplied class AmNnetSimple (or just Nnet), to the constructor
181 // of the DecodableNnetSimpleLoopedInfo that you initialized this
182 // with.
184  public:
186  const TransitionModel &trans_model,
187  const DecodableNnetSimpleLoopedInfo &info,
188  OnlineFeatureInterface *input_features,
189  OnlineFeatureInterface *ivector_features):
190  DecodableNnetLoopedOnlineBase(info, input_features, ivector_features),
191  trans_model_(trans_model) { }
192 
193 
194  // returns the output-dim of the neural net.
195  virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); }
196 
197  // 'subsampled_frame' is a frame, but if frame-subsampling-factor != 1, it's a
198  // reduced-rate output frame (e.g. a 't' index divided by 3).
199  virtual BaseFloat LogLikelihood(int32 subsampled_frame,
200  int32 transition_id);
201 
202  private:
204 
206 
207 };
208 
209 
210 
211 
212 } // namespace nnet3
213 } // namespace kaldi
214 
215 #endif // KALDI_NNET3_DECODABLE_ONLINE_LOOPED_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableNnetLoopedOnlineBase)
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
Definition: decodable-itf.h:82
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
kaldi::int32 int32
void EnsureFrameIsComputed(int32 subsampled_frame)
If the neural-network outputs for this frame are not cached, this function computes them (and possibl...
virtual bool IsLastFrame(int32 subsampled_frame) const
Returns true if this is the last frame.
const NnetSimpleLoopedComputationOptions & opts
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
DecodableAmNnetLoopedOnline(const TransitionModel &trans_model, const DecodableNnetSimpleLoopedInfo &info, OnlineFeatureInterface *input_features, OnlineFeatureInterface *ivector_features)
DecodableNnetLoopedOnlineBase(const DecodableNnetSimpleLoopedInfo &info, OnlineFeatureInterface *input_features, OnlineFeatureInterface *ivector_features)
const DecodableNnetSimpleLoopedInfo & info_
int32 GetFrameOffset() const
Returns the frame offset value.
class NnetComputer is responsible for executing the computation described in the "computation" object...
Definition: nnet-compute.h:59
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
DecodableNnetLoopedOnline(const DecodableNnetSimpleLoopedInfo &info, OnlineFeatureInterface *input_features, OnlineFeatureInterface *ivector_features)
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
virtual BaseFloat LogLikelihood(int32 frame, int32 index)=0
Returns the log likelihood, which will be negated in the decoder.
When you instantiate class DecodableNnetSimpleLooped, you should give it a const reference to this cl...
void SetFrameOffset(int32 frame_offset)
Sets the frame offset value.