nnet-am-decodable-simple.cc
Go to the documentation of this file.
1 // nnet3/nnet-am-decodable-simple.cc
2 
3 // Copyright 2015 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
21 #include "nnet3/nnet-utils.h"
22 
23 namespace kaldi {
24 namespace nnet3 {
25 
26 
28  const NnetSimpleComputationOptions &opts,
29  const Nnet &nnet,
30  const VectorBase<BaseFloat> &priors,
31  const MatrixBase<BaseFloat> &feats,
32  CachingOptimizingCompiler *compiler,
33  const VectorBase<BaseFloat> *ivector,
34  const MatrixBase<BaseFloat> *online_ivectors,
35  int32 online_ivector_period):
36  opts_(opts),
37  nnet_(nnet),
38  output_dim_(nnet_.OutputDim("output")),
39  log_priors_(priors),
40  feats_(feats),
41  ivector_(ivector), online_ivector_feats_(online_ivectors),
42  online_ivector_period_(online_ivector_period),
43  compiler_(*compiler),
44  current_log_post_subsampled_offset_(0) {
46  (feats_.NumRows() + opts_.frame_subsampling_factor - 1) /
50  KALDI_ASSERT(!(ivector != NULL && online_ivectors != NULL));
51  KALDI_ASSERT(!(online_ivectors != NULL && online_ivector_period <= 0 &&
52  "You need to set the --online-ivector-period option!"));
53  log_priors_.ApplyLog();
55 }
56 
57 
59  const NnetSimpleComputationOptions &opts,
60  const TransitionModel &trans_model,
61  const AmNnetSimple &am_nnet,
62  const MatrixBase<BaseFloat> &feats,
63  const VectorBase<BaseFloat> *ivector,
64  const MatrixBase<BaseFloat> *online_ivectors,
65  int32 online_ivector_period,
66  CachingOptimizingCompiler *compiler):
67  compiler_(am_nnet.GetNnet(), opts.optimize_config, opts.compiler_config),
68  decodable_nnet_(opts, am_nnet.GetNnet(), am_nnet.Priors(),
69  feats, compiler != NULL ? compiler : &compiler_,
70  ivector, online_ivectors,
71  online_ivector_period),
72  trans_model_(trans_model) {
73  // note: we only use compiler_ if the passed-in 'compiler' is NULL.
74 }
75 
76 
77 
79  int32 transition_id) {
80  int32 pdf_id = trans_model_.TransitionIdToPdfFast(transition_id);
81  return decodable_nnet_.GetOutput(frame, pdf_id);
82 }
83 
85  if (ivector_ != NULL)
86  return ivector_->Dim();
87  else if (online_ivector_feats_ != NULL)
88  return online_ivector_feats_->NumCols();
89  else
90  return 0;
91 }
92 
94  KALDI_ASSERT(subsampled_frame >= 0 &&
95  subsampled_frame < num_subsampled_frames_);
96  int32 feature_dim = feats_.NumCols(),
97  ivector_dim = GetIvectorDim(),
98  nnet_input_dim = nnet_.InputDim("input"),
99  nnet_ivector_dim = std::max<int32>(0, nnet_.InputDim("ivector"));
100  if (feature_dim != nnet_input_dim)
101  KALDI_ERR << "Neural net expects 'input' features with dimension "
102  << nnet_input_dim << " but you provided "
103  << feature_dim;
104  if (ivector_dim != std::max<int32>(0, nnet_.InputDim("ivector")))
105  KALDI_ERR << "Neural net expects 'ivector' features with dimension "
106  << nnet_ivector_dim << " but you provided " << ivector_dim;
107 
108  int32 current_subsampled_frames_computed = current_log_post_.NumRows(),
109  current_subsampled_offset = current_log_post_subsampled_offset_;
110  KALDI_ASSERT(subsampled_frame < current_subsampled_offset ||
111  subsampled_frame >= current_subsampled_offset +
112  current_subsampled_frames_computed);
113 
114  // all subsampled frames pertain to the output of the network,
115  // they are output frames divided by opts_.frame_subsampling_factor.
116  int32 subsampling_factor = opts_.frame_subsampling_factor,
117  subsampled_frames_per_chunk = opts_.frames_per_chunk / subsampling_factor,
118  start_subsampled_frame = subsampled_frame,
119  num_subsampled_frames = std::min<int32>(num_subsampled_frames_ -
120  start_subsampled_frame,
121  subsampled_frames_per_chunk),
122  last_subsampled_frame = start_subsampled_frame + num_subsampled_frames - 1;
123  KALDI_ASSERT(num_subsampled_frames > 0);
124  // the output-frame numbers are the subsampled-frame numbers
125  int32 first_output_frame = start_subsampled_frame * subsampling_factor,
126  last_output_frame = last_subsampled_frame * subsampling_factor;
127 
128  KALDI_ASSERT(opts_.extra_left_context >= 0 && opts_.extra_right_context >= 0);
129  int32 extra_left_context = opts_.extra_left_context,
130  extra_right_context = opts_.extra_right_context;
131  if (first_output_frame == 0 && opts_.extra_left_context_initial >= 0)
132  extra_left_context = opts_.extra_left_context_initial;
133  if (last_subsampled_frame == num_subsampled_frames_ - 1 &&
134  opts_.extra_right_context_final >= 0)
135  extra_right_context = opts_.extra_right_context_final;
136  int32 left_context = nnet_left_context_ + extra_left_context,
137  right_context = nnet_right_context_ + extra_right_context;
138  int32 first_input_frame = first_output_frame - left_context,
139  last_input_frame = last_output_frame + right_context,
140  num_input_frames = last_input_frame + 1 - first_input_frame;
141  Vector<BaseFloat> ivector;
142  GetCurrentIvector(first_output_frame,
143  last_output_frame - first_output_frame,
144  &ivector);
145 
146  Matrix<BaseFloat> input_feats;
147  if (first_input_frame >= 0 &&
148  last_input_frame < feats_.NumRows()) {
149  SubMatrix<BaseFloat> input_feats(feats_.RowRange(first_input_frame,
150  num_input_frames));
151  DoNnetComputation(first_input_frame, input_feats, ivector,
152  first_output_frame, num_subsampled_frames);
153  } else {
154  Matrix<BaseFloat> feats_block(num_input_frames, feats_.NumCols());
155  int32 tot_input_feats = feats_.NumRows();
156  for (int32 i = 0; i < num_input_frames; i++) {
157  SubVector<BaseFloat> dest(feats_block, i);
158  int32 t = i + first_input_frame;
159  if (t < 0) t = 0;
160  if (t >= tot_input_feats) t = tot_input_feats - 1;
161  const SubVector<BaseFloat> src(feats_, t);
162  dest.CopyFromVec(src);
163  }
164  DoNnetComputation(first_input_frame, feats_block, ivector,
165  first_output_frame, num_subsampled_frames);
166  }
167 }
168 
169 // note: in the normal case (with no frame subsampling) you can ignore the
170 // 'subsampled_' in the variable name.
172  VectorBase<BaseFloat> *output) {
173  if (subsampled_frame < current_log_post_subsampled_offset_ ||
174  subsampled_frame >= current_log_post_subsampled_offset_ +
175  current_log_post_.NumRows())
176  EnsureFrameIsComputed(subsampled_frame);
177  output->CopyFromVec(current_log_post_.Row(
178  subsampled_frame - current_log_post_subsampled_offset_));
179 }
180 
182  int32 num_output_frames,
183  Vector<BaseFloat> *ivector) {
184  if (ivector_ != NULL) {
185  *ivector = *ivector_;
186  return;
187  } else if (online_ivector_feats_ == NULL) {
188  return;
189  }
190  KALDI_ASSERT(online_ivector_period_ > 0);
191  // frame_to_search is the frame that we want to get the most recent iVector
192  // for. We choose a point near the middle of the current window, the concept
193  // being that this is the fairest comparison to nnet2. Obviously we could do
194  // better by always taking the last frame's iVector, but decoding with
195  // 'online' ivectors is only really a mechanism to simulate online operation.
196  int32 frame_to_search = output_t_start + num_output_frames / 2;
197  int32 ivector_frame = frame_to_search / online_ivector_period_;
198  KALDI_ASSERT(ivector_frame >= 0);
199  if (ivector_frame >= online_ivector_feats_->NumRows()) {
200  int32 margin = ivector_frame - (online_ivector_feats_->NumRows() - 1);
201  if (margin * online_ivector_period_ > 50) {
202  // Half a second seems like too long to be explainable as edge effects.
203  KALDI_ERR << "Could not get iVector for frame " << frame_to_search
204  << ", only available till frame "
205  << online_ivector_feats_->NumRows()
206  << " * ivector-period=" << online_ivector_period_
207  << " (mismatched --online-ivector-period?)";
208  }
209  ivector_frame = online_ivector_feats_->NumRows() - 1;
210  }
211  *ivector = online_ivector_feats_->Row(ivector_frame);
212 }
213 
214 
216  int32 input_t_start,
217  const MatrixBase<BaseFloat> &input_feats,
218  const VectorBase<BaseFloat> &ivector,
219  int32 output_t_start,
220  int32 num_subsampled_frames) {
221  ComputationRequest request;
222  request.need_model_derivative = false;
223  request.store_component_stats = false;
224 
225  bool shift_time = true; // shift the 'input' and 'output' to a consistent
226  // time, to take advantage of caching in the compiler.
227  // An optimization.
228  int32 time_offset = (shift_time ? -output_t_start : 0);
229 
230  // First add the regular features-- named "input".
231  request.inputs.reserve(2);
232  request.inputs.push_back(
233  IoSpecification("input", time_offset + input_t_start,
234  time_offset + input_t_start + input_feats.NumRows()));
235  if (ivector.Dim() != 0) {
236  std::vector<Index> indexes;
237  indexes.push_back(Index(0, 0, 0));
238  request.inputs.push_back(IoSpecification("ivector", indexes));
239  }
240  IoSpecification output_spec;
241  output_spec.name = "output";
242  output_spec.has_deriv = false;
243  int32 subsample = opts_.frame_subsampling_factor;
244  output_spec.indexes.resize(num_subsampled_frames);
245  // leave n and x values at 0 (the constructor sets these).
246  for (int32 i = 0; i < num_subsampled_frames; i++)
247  output_spec.indexes[i].t = time_offset + output_t_start + i * subsample;
248  request.outputs.resize(1);
249  request.outputs[0].Swap(&output_spec);
250 
251  std::shared_ptr<const NnetComputation> computation = compiler_.Compile(request);
252  Nnet *nnet_to_update = NULL; // we're not doing any update.
253  NnetComputer computer(opts_.compute_config, *computation,
254  nnet_, nnet_to_update);
255 
256  CuMatrix<BaseFloat> input_feats_cu(input_feats);
257  computer.AcceptInput("input", &input_feats_cu);
258  CuMatrix<BaseFloat> ivector_feats_cu;
259  if (ivector.Dim() > 0) {
260  ivector_feats_cu.Resize(1, ivector.Dim());
261  ivector_feats_cu.Row(0).CopyFromVec(ivector);
262  computer.AcceptInput("ivector", &ivector_feats_cu);
263  }
264  computer.Run();
265  CuMatrix<BaseFloat> cu_output;
266  computer.GetOutputDestructive("output", &cu_output);
267  // subtract log-prior (divide by prior)
268  if (log_priors_.Dim() != 0)
269  cu_output.AddVecToRows(-1.0, log_priors_);
270  // apply the acoustic scale
271  cu_output.Scale(opts_.acoustic_scale);
272  current_log_post_.Resize(0, 0);
273  // the following statement just swaps the pointers if we're not using a GPU.
274  cu_output.Swap(&current_log_post_);
275  current_log_post_subsampled_offset_ = output_t_start / subsample;
276 }
277 
279  static bool warned_frames_per_chunk = false;
280  int32 nnet_modulus = nnet_.Modulus();
281  if (opts_.frame_subsampling_factor < 1 ||
282  opts_.frames_per_chunk < 1)
283  KALDI_ERR << "--frame-subsampling-factor and --frames-per-chunk must be > 0";
284  KALDI_ASSERT(nnet_modulus > 0);
285  int32 n = Lcm(opts_.frame_subsampling_factor, nnet_modulus);
286 
287  if (opts_.frames_per_chunk % n != 0) {
288  // round up to the nearest multiple of n.
289  int32 frames_per_chunk = n * ((opts_.frames_per_chunk + n - 1) / n);
290  if (!warned_frames_per_chunk) {
291  warned_frames_per_chunk = true;
292  if (nnet_modulus == 1) {
293  // simpler error message.
294  KALDI_LOG << "Increasing --frames-per-chunk from "
295  << opts_.frames_per_chunk << " to "
296  << frames_per_chunk << " to make it a multiple of "
297  << "--frame-subsampling-factor="
298  << opts_.frame_subsampling_factor;
299  } else {
300  KALDI_LOG << "Increasing --frames-per-chunk from "
301  << opts_.frames_per_chunk << " to "
302  << frames_per_chunk << " due to "
303  << "--frame-subsampling-factor="
304  << opts_.frame_subsampling_factor << " and "
305  << "nnet shift-invariance modulus = " << nnet_modulus;
306  }
307  }
308  opts_.frames_per_chunk = frames_per_chunk;
309  }
310 }
311 
312 
314  const NnetSimpleComputationOptions &opts,
315  const TransitionModel &trans_model,
316  const AmNnetSimple &am_nnet,
317  const MatrixBase<BaseFloat> &feats,
318  const VectorBase<BaseFloat> *ivector,
319  const MatrixBase<BaseFloat> *online_ivectors,
320  int32 online_ivector_period):
321  compiler_(am_nnet.GetNnet(), opts.optimize_config, opts.compiler_config),
322  trans_model_(trans_model),
323  feats_copy_(NULL),
324  ivector_copy_(NULL),
325  online_ivectors_copy_(NULL),
326  decodable_nnet_(NULL) {
327  try {
328  feats_copy_ = new Matrix<BaseFloat>(feats);
329  if (ivector != NULL)
330  ivector_copy_ = new Vector<BaseFloat>(*ivector);
331  if (online_ivectors != NULL)
332  online_ivectors_copy_ = new Matrix<BaseFloat>(*online_ivectors);
333  decodable_nnet_ = new DecodableNnetSimple(opts, am_nnet.GetNnet(),
334  am_nnet.Priors(), *feats_copy_,
337  online_ivector_period);
338 
339  } catch (...) {
340  DeletePointers();
341  KALDI_ERR << "Error occurred in constructor (see above)";
342  }
343 }
344 
346  // delete[] does nothing for null pointers, so we have no checks.
347  delete decodable_nnet_;
348  decodable_nnet_ = NULL;
349  delete feats_copy_;
350  feats_copy_ = NULL;
351  delete ivector_copy_;
352  ivector_copy_ = NULL;
353  delete online_ivectors_copy_;
354  online_ivectors_copy_ = NULL;
355 }
356 
357 
359  int32 transition_id) {
360  int32 pdf_id = trans_model_.TransitionIdToPdfFast(transition_id);
361  return decodable_nnet_->GetOutput(frame, pdf_id);
362 }
363 
364 
365 } // namespace nnet3
366 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
DecodableNnetSimple(const NnetSimpleComputationOptions &opts, const Nnet &nnet, const VectorBase< BaseFloat > &priors, const MatrixBase< BaseFloat > &feats, CachingOptimizingCompiler *compiler, const VectorBase< BaseFloat > *ivector=NULL, const MatrixBase< BaseFloat > *online_ivectors=NULL, int32 online_ivector_period=1)
This constructor takes features as input, and you can either supply a single iVector input...
bool store_component_stats
you should set need_component_stats to true if you need the average-activation and average-derivative...
const CuSubVector< Real > Row(MatrixIndexT i) const
Definition: cu-matrix.h:670
bool need_model_derivative
if need_model_derivative is true, then we&#39;ll be doing either model training or model-derivative compu...
int32 TransitionIdToPdfFast(int32 trans_id) const
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
This class enables you to do the compilation and optimization in one call, and also ensures that if t...
virtual BaseFloat LogLikelihood(int32 frame, int32 transition_id)
Returns the log likelihood, which will be negated in the decoder.
kaldi::int32 int32
std::vector< IoSpecification > inputs
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
virtual BaseFloat LogLikelihood(int32 frame, int32 transition_id)
Returns the log likelihood, which will be negated in the decoder.
const Nnet & GetNnet() const
void GetOutputForFrame(int32 frame, VectorBase< BaseFloat > *output)
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
Definition: nnet-common.h:44
I Lcm(I m, I n)
Returns the least common multiple of two integers.
Definition: kaldi-math.h:318
This file contains some miscellaneous functions dealing with class Nnet.
const MatrixBase< BaseFloat > & feats_
void GetCurrentIvector(int32 output_t_start, int32 num_output_frames, Vector< BaseFloat > *ivector)
void Scale(Real value)
Definition: cu-matrix.cc:644
void AcceptInput(const std::string &node_name, CuMatrix< BaseFloat > *input)
e.g.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
DecodableAmNnetSimple(const NnetSimpleComputationOptions &opts, const TransitionModel &trans_model, const AmNnetSimple &am_nnet, const MatrixBase< BaseFloat > &feats, const VectorBase< BaseFloat > *ivector=NULL, const MatrixBase< BaseFloat > *online_ivectors=NULL, int32 online_ivector_period=1, CachingOptimizingCompiler *compiler=NULL)
This constructor takes features as input, and you can either supply a single iVector input...
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
Definition: cu-matrix.cc:1261
void Swap(Matrix< Real > *mat)
Definition: cu-matrix.cc:123
struct rnnlm::@11::@12 n
void DoNnetComputation(int32 input_t_start, const MatrixBase< BaseFloat > &input_feats, const VectorBase< BaseFloat > &ivector, int32 output_t_start, int32 num_subsampled_frames)
#define KALDI_ERR
Definition: kaldi-error.h:147
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void GetSimpleNnetContext(int32 *nnet_left_context, int32 *nnet_right_context)
const VectorBase< BaseFloat > & Priors() const
std::shared_ptr< const NnetComputation > Compile(const ComputationRequest &request)
Does the compilation and returns a const pointer to the result, which is owned by this class...
std::vector< Index > indexes
bool IsSimpleNnet(const Nnet &nnet)
This function returns true if the nnet has the following properties: It has an output called "output"...
Definition: nnet-utils.cc:52
A class representing a vector.
Definition: kaldi-vector.h:406
class NnetComputer is responsible for executing the computation described in the "computation" object...
Definition: nnet-compute.h:59
void EnsureFrameIsComputed(int32 subsampled_frame)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
std::vector< IoSpecification > outputs
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void GetOutputDestructive(const std::string &output_name, CuMatrix< BaseFloat > *output)
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
#define KALDI_LOG
Definition: kaldi-error.h:153
Sub-matrix representation.
Definition: kaldi-matrix.h:988
BaseFloat GetOutput(int32 subsampled_frame, int32 pdf_id)
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50
DecodableAmNnetSimpleParallel(const NnetSimpleComputationOptions &opts, const TransitionModel &trans_model, const AmNnetSimple &am_nnet, const MatrixBase< BaseFloat > &feats, const VectorBase< BaseFloat > *ivector=NULL, const MatrixBase< BaseFloat > *online_ivectors=NULL, int32 online_ivector_period=1)
This decodable object is for use in multi-threaded decoding.
void Run()
This does either the forward or backward computation, depending when it is called (in a typical compu...