decodable-online-looped.cc
Go to the documentation of this file.
1 // nnet3/decodable-online-looped.cc
2 
3 // Copyright 2017 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
21 #include "nnet3/nnet-utils.h"
22 
23 namespace kaldi {
24 namespace nnet3 {
25 
28  OnlineFeatureInterface *input_features,
29  OnlineFeatureInterface *ivector_features):
30  num_chunks_computed_(0),
31  current_log_post_subsampled_offset_(-1),
32  info_(info),
33  frame_offset_(0),
34  input_features_(input_features),
35  ivector_features_(ivector_features),
36  computer_(info_.opts.compute_config, info_.computation,
37  info_.nnet, NULL) { // NULL is 'nnet_to_update'
38  // Check that feature dimensions match.
40  int32 nnet_input_dim = info_.nnet.InputDim("input"),
41  nnet_ivector_dim = info_.nnet.InputDim("ivector"),
42  feat_input_dim = input_features_->Dim(),
43  feat_ivector_dim = (ivector_features_ != NULL ?
44  ivector_features_->Dim() : -1);
45  if (nnet_input_dim != feat_input_dim) {
46  KALDI_ERR << "Input feature dimension mismatch: got " << feat_input_dim
47  << " but network expects " << nnet_input_dim;
48  }
49  if (nnet_ivector_dim != feat_ivector_dim) {
50  KALDI_ERR << "Ivector feature dimension mismatch: got " << feat_ivector_dim
51  << " but network expects " << nnet_ivector_dim;
52  }
53 }
54 
55 
57  // note: the ivector_features_ may have 2 or 3 fewer frames ready than
58  // input_features_, but we don't wait for them; we just use the most recent
59  // iVector we can.
60  int32 features_ready = input_features_->NumFramesReady();
61  if (features_ready == 0)
62  return 0;
63  bool input_finished = input_features_->IsLastFrame(features_ready - 1);
64 
66 
67  if (input_finished) {
68  // if the input has finished,... we'll pad with duplicates of the last frame
69  // as needed to get the required right context.
70  return (features_ready + sf - 1) / sf - frame_offset_;
71  } else {
72  // note: info_.right_context_ includes both the model context and any
73  // extra_right_context_ (but this
74  int32 non_subsampled_output_frames_ready =
75  std::max<int32>(0, features_ready - info_.frames_right_context);
76  int32 num_chunks_ready = non_subsampled_output_frames_ready /
78  // note: the division by the frame subsampling factor 'sf' below
79  // doesn't need any attention to rounding because info_.frames_per_chunk
80  // is always a multiple of 'sf' (see 'frames_per_chunk = GetChunksize..."
81  // in decodable-simple-looped.cc).
82  return num_chunks_ready * info_.frames_per_chunk / sf - frame_offset_;
83  }
84 }
85 
86 
87 // note: the frame-index argument is on the output of the network, i.e. after any
88 // subsampling, so we call it 'subsampled_frame'.
90  int32 subsampled_frame) const {
91  // To understand this code, compare it with the code of NumFramesReady(),
92  // it follows the same structure.
93  int32 features_ready = input_features_->NumFramesReady();
94  if (features_ready == 0) {
95  if (subsampled_frame == -1 && input_features_->IsLastFrame(-1)) {
96  // the attempt to handle this rather pathological case (input finished
97  // but no frames ready) is a little quixotic as we have not properly
98  // tested this and other parts of the code may die.
99  return true;
100  } else {
101  return false;
102  }
103  }
104  bool input_finished = input_features_->IsLastFrame(features_ready - 1);
105  if (!input_finished)
106  return false;
108  num_subsampled_frames_ready = (features_ready + sf - 1) / sf;
109  return (subsampled_frame + frame_offset_ == num_subsampled_frames_ready - 1);
110 }
111 
113  KALDI_ASSERT(0 <= frame_offset &&
114  frame_offset <= frame_offset_ + NumFramesReady());
115  frame_offset_ = frame_offset;
116 }
117 
119  // Prepare the input data for the next chunk of features.
120  // note: 'end' means one past the last.
121  int32 begin_input_frame, end_input_frame;
122  if (num_chunks_computed_ == 0) {
123  begin_input_frame = -info_.frames_left_context;
124  // note: end is last plus one.
125  end_input_frame = info_.frames_per_chunk + info_.frames_right_context;
126  } else {
127  // note: begin_input_frame will be the same as the previous end_input_frame.
128  // you can verify this directly if num_chunks_computed_ == 0, and then by
129  // induction.
130  begin_input_frame = num_chunks_computed_ * info_.frames_per_chunk +
132  end_input_frame = begin_input_frame + info_.frames_per_chunk;
133  }
134 
135  int32 num_feature_frames_ready = input_features_->NumFramesReady();
136  bool is_finished = input_features_->IsLastFrame(num_feature_frames_ready - 1);
137 
138  if (end_input_frame > num_feature_frames_ready && !is_finished) {
139  // we shouldn't be attempting to read past the end of the available features
140  // until we have reached the end of the input (i.e. the end-user called
141  // InputFinished(), announcing that there is no more waveform; at this point
142  // we pad as needed with copies of the last frame, to flush out the last of
143  // the output.
144  // If the following error happens, it likely indicates a bug in this
145  // decodable code somewhere (although it could possibly indicate the
146  // user asking for a frame that was not ready, which would be a misuse
147  // of this class.. it can be figured out from gdb as in either case it
148  // would be a bug in the code.
149  KALDI_ERR << "Attempt to access frame past the end of the available input";
150  }
151 
152 
153  CuMatrix<BaseFloat> feats_chunk;
154  { // this block sets 'feats_chunk'.
155  Matrix<BaseFloat> this_feats(end_input_frame - begin_input_frame,
156  input_features_->Dim());
157  for (int32 i = begin_input_frame; i < end_input_frame; i++) {
158  SubVector<BaseFloat> this_row(this_feats, i - begin_input_frame);
159  int32 input_frame = i;
160  if (input_frame < 0) input_frame = 0;
161  if (input_frame >= num_feature_frames_ready)
162  input_frame = num_feature_frames_ready - 1;
163  input_features_->GetFrame(input_frame, &this_row);
164  }
165  feats_chunk.Swap(&this_feats);
166  }
167  computer_.AcceptInput("input", &feats_chunk);
168 
169  if (info_.has_ivectors) {
171  KALDI_ASSERT(info_.request1.inputs.size() == 2);
172  // all but the 1st chunk should have 1 iVector, but there is no need to
173  // assume this.
174  int32 num_ivectors = (num_chunks_computed_ == 0 ?
175  info_.request1.inputs[1].indexes.size() :
176  info_.request2.inputs[1].indexes.size());
177  KALDI_ASSERT(num_ivectors > 0);
178 
180  // we just get the iVector from the last input frame we needed,
181  // reduced as necessary
182  // we don't bother trying to be 'accurate' in getting the iVectors
183  // for their 'correct' frames, because in general using the
184  // iVector from as large 't' as possible will be better.
185 
186  int32 most_recent_input_frame = num_feature_frames_ready - 1,
187  num_ivector_frames_ready = ivector_features_->NumFramesReady();
188 
189  if (num_ivector_frames_ready > 0) {
190  int32 ivector_frame_to_use = std::min<int32>(
191  most_recent_input_frame, num_ivector_frames_ready - 1);
192  ivector_features_->GetFrame(ivector_frame_to_use,
193  &ivector);
194  }
195  // else just leave the iVector zero (would only happen with very small
196  // chunk-size, like a chunk size of 2 which would be very inefficient; and
197  // only at file begin.
198 
199  // note: we expect num_ivectors to be 1 in practice.
200  Matrix<BaseFloat> ivectors(num_ivectors,
201  ivector.Dim());
202  ivectors.CopyRowsFromVec(ivector);
203  CuMatrix<BaseFloat> cu_ivectors;
204  cu_ivectors.Swap(&ivectors);
205  computer_.AcceptInput("ivector", &cu_ivectors);
206  }
207  computer_.Run();
208 
209  {
210  // Note: it's possible in theory that if you had weird recurrence that went
211  // directly from the output, the call to GetOutputDestructive() would cause
212  // a crash on the next chunk. If that happens, GetOutput() should be used
213  // instead of GetOutputDestructive(). But we don't anticipate this will
214  // happen in practice.
215  CuMatrix<BaseFloat> output;
216  computer_.GetOutputDestructive("output", &output);
217 
218  if (info_.log_priors.Dim() != 0) {
219  // subtract log-prior (divide by prior)
220  output.AddVecToRows(-1.0, info_.log_priors);
221  }
222  // apply the acoustic scale
223  output.Scale(info_.opts.acoustic_scale);
225  current_log_post_.Swap(&output);
226  }
230 
232 
234  (num_chunks_computed_ - 1) *
236 }
237 
239  int32 index) {
240  subsampled_frame += frame_offset_;
241  EnsureFrameIsComputed(subsampled_frame);
242  // note: we index by 'inde
243  return current_log_post_(
244  subsampled_frame - current_log_post_subsampled_offset_,
245  index - 1);
246 }
247 
248 
250  int32 index) {
251  subsampled_frame += frame_offset_;
252  EnsureFrameIsComputed(subsampled_frame);
253  return current_log_post_(
254  subsampled_frame - current_log_post_subsampled_offset_,
255  trans_model_.TransitionIdToPdfFast(index));
256 }
257 
258 
259 } // namespace nnet3
260 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int32 InputDim(const std::string &input_name) const
Definition: nnet-nnet.cc:669
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)=0
Gets the feature vector for this frame.
kaldi::int32 int32
virtual BaseFloat LogLikelihood(int32 subsampled_frame, int32 index)
Returns the log likelihood, which will be negated in the decoder.
std::vector< IoSpecification > inputs
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void EnsureFrameIsComputed(int32 subsampled_frame)
If the neural-network outputs for this frame are not cached, this function computes them (and possibl...
void Swap(Matrix< Real > *other)
Swaps the contents of *this and *other. Shallow swap.
virtual bool IsLastFrame(int32 subsampled_frame) const
Returns true if this is the last frame.
This file contains some miscellaneous functions dealing with class Nnet.
const NnetSimpleLoopedComputationOptions & opts
void Scale(Real value)
Definition: cu-matrix.cc:644
void AcceptInput(const std::string &node_name, CuMatrix< BaseFloat > *input)
e.g.
virtual BaseFloat LogLikelihood(int32 subsampled_frame, int32 transition_id)
Returns the log likelihood, which will be negated in the decoder.
DecodableNnetLoopedOnlineBase(const DecodableNnetSimpleLoopedInfo &info, OnlineFeatureInterface *input_features, OnlineFeatureInterface *ivector_features)
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
Definition: cu-matrix.cc:1261
void Swap(Matrix< Real > *mat)
Definition: cu-matrix.cc:123
const DecodableNnetSimpleLoopedInfo & info_
virtual bool IsLastFrame(int32 frame) const =0
Returns true if this is the last frame.
#define KALDI_ERR
Definition: kaldi-error.h:147
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
void GetOutputDestructive(const std::string &output_name, CuMatrix< BaseFloat > *output)
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
When you instantiate class DecodableNnetSimpleLooped, you should give it a const reference to this cl...
virtual int32 NumFramesReady() const =0
returns the feature dimension.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
virtual int32 Dim() const =0
void SetFrameOffset(int32 frame_offset)
Sets the frame offset value.
void Run()
This does either the forward or backward computation, depending when it is called (in a typical compu...