decodable-simple-looped.cc
Go to the documentation of this file.
1 // nnet3/decodable-simple-looped.cc
2 
3 // Copyright 2016 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
21 #include "nnet3/nnet-utils.h"
23 
24 namespace kaldi {
25 namespace nnet3 {
26 
27 
30  Nnet *nnet):
31  opts(opts), nnet(*nnet) {
32  Init(opts, nnet);
33 }
34 
37  const Vector<BaseFloat> &priors,
38  Nnet *nnet):
39  opts(opts), nnet(*nnet), log_priors(priors) {
40  if (log_priors.Dim() != 0)
41  log_priors.ApplyLog();
42  Init(opts, nnet);
43 }
44 
45 
48  AmNnetSimple *am_nnet):
49  opts(opts), nnet(am_nnet->GetNnet()), log_priors(am_nnet->Priors()) {
50  if (log_priors.Dim() != 0)
51  log_priors.ApplyLog();
52  Init(opts, &(am_nnet->GetNnet()));
53 }
54 
57  Nnet *nnet) {
58  opts.Check();
59  KALDI_ASSERT(IsSimpleNnet(*nnet));
60  has_ivectors = (nnet->InputDim("ivector") > 0);
61  int32 left_context, right_context;
62  int32 extra_right_context = 0;
63  ComputeSimpleNnetContext(*nnet, &left_context, &right_context);
64  frames_left_context = left_context + opts.extra_left_context_initial;
65  frames_right_context = right_context + extra_right_context;
67  opts.frames_per_chunk);
68  output_dim = nnet->OutputDim("output");
70  // note, ivector_period is hardcoded to the same as frames_per_chunk_.
71  int32 ivector_period = frames_per_chunk;
72  if (has_ivectors)
73  ModifyNnetIvectorPeriod(ivector_period, nnet);
74 
75  int32 num_sequences = 1; // we're processing one utterance at a time.
76 
79  ivector_period,
82  num_sequences,
84 
86  &computation);
88  KALDI_VLOG(3) << "Computation is:\n"
90 }
91 
92 
95  const MatrixBase<BaseFloat> &feats,
96  const VectorBase<BaseFloat> *ivector,
97  const MatrixBase<BaseFloat> *online_ivectors,
98  int32 online_ivector_period):
99  info_(info),
100  computer_(info_.opts.compute_config, info_.computation,
101  info_.nnet, NULL), // NULL is 'nnet_to_update'
102  feats_(feats),
103  ivector_(ivector), online_ivector_feats_(online_ivectors),
104  online_ivector_period_(online_ivector_period),
105  num_chunks_computed_(0),
106  current_log_post_subsampled_offset_(-1) {
108  (feats_.NumRows() + info_.opts.frame_subsampling_factor - 1) /
110  KALDI_ASSERT(!(ivector != NULL && online_ivectors != NULL));
111  KALDI_ASSERT(!(online_ivectors != NULL && online_ivector_period <= 0 &&
112  "You need to set the --online-ivector-period option!"));
113 }
114 
115 
117  int32 subsampled_frame, VectorBase<BaseFloat> *output) {
119  "Frames must be accessed in order.");
120  while (subsampled_frame >= current_log_post_subsampled_offset_ +
122  AdvanceChunk();
124  subsampled_frame - current_log_post_subsampled_offset_));
125 }
126 
128  if (ivector_ != NULL)
129  return ivector_->Dim();
130  else if (online_ivector_feats_ != NULL)
131  return online_ivector_feats_->NumCols();
132  else
133  return 0;
134 }
135 
136 
138  int32 begin_input_frame, end_input_frame;
139  if (num_chunks_computed_ == 0) {
140  begin_input_frame = -info_.frames_left_context;
141  // note: end is last plus one.
142  end_input_frame = info_.frames_per_chunk + info_.frames_right_context;
143  } else {
144  begin_input_frame = num_chunks_computed_ * info_.frames_per_chunk +
146  end_input_frame = begin_input_frame + info_.frames_per_chunk;
147  }
148  CuMatrix<BaseFloat> feats_chunk(end_input_frame - begin_input_frame,
149  feats_.NumCols(), kUndefined);
150 
151  int32 num_features = feats_.NumRows();
152  if (begin_input_frame >= 0 && end_input_frame <= num_features) {
153  SubMatrix<BaseFloat> this_feats(feats_,
154  begin_input_frame,
155  end_input_frame - begin_input_frame,
156  0, feats_.NumCols());
157  feats_chunk.CopyFromMat(this_feats);
158  } else {
159  Matrix<BaseFloat> this_feats(end_input_frame - begin_input_frame,
160  feats_.NumCols());
161  for (int32 r = begin_input_frame; r < end_input_frame; r++) {
162  int32 input_frame = r;
163  if (input_frame < 0) input_frame = 0;
164  if (input_frame >= num_features) input_frame = num_features - 1;
165  this_feats.Row(r - begin_input_frame).CopyFromVec(
166  feats_.Row(input_frame));
167  }
168  feats_chunk.CopyFromMat(this_feats);
169  }
170  computer_.AcceptInput("input", &feats_chunk);
171 
172  if (info_.has_ivectors) {
173  KALDI_ASSERT(info_.request1.inputs.size() == 2);
174  // all but the 1st chunk should have 1 iVector, but no need
175  // to assume this.
176  int32 num_ivectors = (num_chunks_computed_ == 0 ?
177  info_.request1.inputs[1].indexes.size() :
178  info_.request2.inputs[1].indexes.size());
179  KALDI_ASSERT(num_ivectors > 0);
180 
181  Vector<BaseFloat> ivector;
182  // we just get the iVector from the last input frame we needed...
183  // we don't bother trying to be 'accurate' in getting the iVectors
184  // for their 'correct' frames, because in general using the
185  // iVector from as large 't' as possible will be better.
186  GetCurrentIvector(end_input_frame, &ivector);
187  Matrix<BaseFloat> ivectors(num_ivectors,
188  ivector.Dim());
189  ivectors.CopyRowsFromVec(ivector);
190  CuMatrix<BaseFloat> cu_ivectors(ivectors);
191  computer_.AcceptInput("ivector", &cu_ivectors);
192  }
193  computer_.Run();
194 
195  {
196  // Note: it's possible in theory that if you had weird recurrence that went
197  // directly from the output, the call to GetOutputDestructive() would cause
198  // a crash on the next chunk. If that happens, GetOutput() should be used
199  // instead of GetOutputDestructive(). But we don't anticipate this will
200  // happen in practice.
201  CuMatrix<BaseFloat> output;
202  computer_.GetOutputDestructive("output", &output);
203 
204  if (info_.log_priors.Dim() != 0) {
205  // subtract log-prior (divide by prior)
206  output.AddVecToRows(-1.0, info_.log_priors);
207  }
208  // apply the acoustic scale
209  output.Scale(info_.opts.acoustic_scale);
211  current_log_post_.Swap(&output);
212  }
216 
218 
220  (num_chunks_computed_ - 1) *
222 }
223 
224 
226  Vector<BaseFloat> *ivector) {
227  if (!info_.has_ivectors)
228  return;
229  if (ivector_ != NULL) {
230  *ivector = *ivector_;
231  return;
232  } else if (online_ivector_feats_ == NULL) {
233  KALDI_ERR << "Neural net expects iVectors but none provided.";
234  }
236  int32 ivector_frame = input_frame / online_ivector_period_;
237  KALDI_ASSERT(ivector_frame >= 0);
238  if (ivector_frame >= online_ivector_feats_->NumRows())
239  ivector_frame = online_ivector_feats_->NumRows() - 1;
240  KALDI_ASSERT(ivector_frame >= 0 && "ivector matrix cannot be empty.");
241  *ivector = online_ivector_feats_->Row(ivector_frame);
242 }
243 
244 
246  const DecodableNnetSimpleLoopedInfo &info,
247  const TransitionModel &trans_model,
248  const MatrixBase<BaseFloat> &feats,
249  const VectorBase<BaseFloat> *ivector,
250  const MatrixBase<BaseFloat> *online_ivectors,
251  int32 online_ivector_period):
252  decodable_nnet_(info, feats, ivector, online_ivectors, online_ivector_period),
253  trans_model_(trans_model) { }
254 
256  int32 transition_id) {
257  int32 pdf_id = trans_model_.TransitionIdToPdfFast(transition_id);
258  return decodable_nnet_.GetOutput(frame, pdf_id);
259 }
260 
261 
262 
263 } // namespace nnet3
264 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void ModifyNnetIvectorPeriod(int32 ivector_period, Nnet *nnet)
This function modifies the descriptors in the neural network to change the periodicity with which it ...
int32 InputDim(const std::string &input_name) const
Definition: nnet-nnet.cc:669
const VectorBase< BaseFloat > * ivector_
const CuSubVector< Real > Row(MatrixIndexT i) const
Definition: cu-matrix.h:670
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
int32 TransitionIdToPdfFast(int32 trans_id) const
void GetCurrentIvector(int32 input_frame, Vector< BaseFloat > *ivector)
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
int32 GetChunkSize(const Nnet &nnet, int32 frame_subsampling_factor, int32 advised_chunk_size)
void CreateLoopedComputationRequest(const Nnet &nnet, int32 chunk_size, int32 frame_subsampling_factor, int32 ivector_period, int32 left_context_begin, int32 right_context, int32 num_sequences, ComputationRequest *request1, ComputationRequest *request2, ComputationRequest *request3)
This function creates computation request suitable for giving to ComputeLooped(). ...
const MatrixBase< BaseFloat > * online_ivector_feats_
kaldi::int32 int32
std::vector< IoSpecification > inputs
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void Swap(Matrix< Real > *other)
Swaps the contents of *this and *other. Shallow swap.
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
const Nnet & GetNnet() const
DecodableNnetSimpleLoopedInfo(const NnetSimpleLoopedComputationOptions &opts, Nnet *nnet)
int32 OutputDim(const std::string &output_name) const
Definition: nnet-nnet.cc:677
This file contains some miscellaneous functions dealing with class Nnet.
const NnetSimpleLoopedComputationOptions & opts
void Scale(Real value)
Definition: cu-matrix.cc:644
void AcceptInput(const std::string &node_name, CuMatrix< BaseFloat > *input)
e.g.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void CompileLooped(const Nnet &nnet, const NnetOptimizeOptions &optimize_opts, const ComputationRequest &request1, const ComputationRequest &request2, const ComputationRequest &request3, NnetComputation *computation)
CompileLooped() provides an internal interface for &#39;looped&#39; computation.
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
Definition: cu-matrix.cc:1261
virtual BaseFloat LogLikelihood(int32 frame, int32 transition_id)
Returns the log likelihood, which will be negated in the decoder.
void ComputeSimpleNnetContext(const Nnet &nnet, int32 *left_context, int32 *right_context)
ComputeSimpleNnetContext computes the left-context and right-context of a nnet.
Definition: nnet-utils.cc:146
#define KALDI_ERR
Definition: kaldi-error.h:147
void GetOutputForFrame(int32 subsampled_frame, VectorBase< BaseFloat > *output)
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
bool IsSimpleNnet(const Nnet &nnet)
This function returns true if the nnet has the following properties: It has an output called "output"...
Definition: nnet-utils.cc:52
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
BaseFloat GetOutput(int32 subsampled_frame, int32 pdf_id)
DecodableAmNnetSimpleLooped(const DecodableNnetSimpleLoopedInfo &info, const TransitionModel &trans_model, const MatrixBase< BaseFloat > &feats, const VectorBase< BaseFloat > *ivector=NULL, const MatrixBase< BaseFloat > *online_ivectors=NULL, int32 online_ivector_period=1)
This constructor takes features as input, and you can either supply a single iVector input...
void GetOutputDestructive(const std::string &output_name, CuMatrix< BaseFloat > *output)
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
When you instantiate class DecodableNnetSimpleLooped, you should give it a const reference to this cl...
void Init(const NnetSimpleLoopedComputationOptions &opts, Nnet *nnet)
Sub-matrix representation.
Definition: kaldi-matrix.h:988
DecodableNnetSimpleLooped(const DecodableNnetSimpleLoopedInfo &info, const MatrixBase< BaseFloat > &feats, const VectorBase< BaseFloat > *ivector=NULL, const MatrixBase< BaseFloat > *online_ivectors=NULL, int32 online_ivector_period=1)
This constructor takes features as input, and you can either supply a single iVector input...
const DecodableNnetSimpleLoopedInfo & info_
void Run()
This does either the forward or backward computation, depending when it is called (in a typical compu...