online-nnet2-decodable.cc
Go to the documentation of this file.
1 // nnet2/online-nnet2-decodable.cc
2 
3 // Copyright 2014 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
21 
22 namespace kaldi {
23 namespace nnet2 {
24 
26  const AmNnet &nnet,
27  const TransitionModel &trans_model,
28  const DecodableNnet2OnlineOptions &opts,
29  OnlineFeatureInterface *input_feats):
30  features_(input_feats),
31  nnet_(nnet),
32  trans_model_(trans_model),
33  opts_(opts),
34  feat_dim_(input_feats->Dim()),
35  left_context_(nnet.GetNnet().LeftContext()),
36  right_context_(nnet.GetNnet().RightContext()),
37  num_pdfs_(nnet.GetNnet().OutputDim()),
38  begin_frame_(-1) {
42  "Priors in neural network not set up (or mismatch "
43  "with transition model).");
44  log_priors_.ApplyLog();
45 }
46 
47 
48 
50  ComputeForFrame(frame);
51  int32 pdf_id = trans_model_.TransitionIdToPdf(index);
52  KALDI_ASSERT(frame >= begin_frame_ &&
54  return scaled_loglikes_(frame - begin_frame_, pdf_id);
55 }
56 
57 
59  if (opts_.pad_input) { // normal case
60  return features_->IsLastFrame(frame);
61  } else {
63  }
64 }
65 
67  int32 features_ready = features_->NumFramesReady();
68  if (features_ready == 0)
69  return 0;
70  bool input_finished = features_->IsLastFrame(features_ready - 1);
71  if (opts_.pad_input) {
72  // normal case... we'll pad with duplicates of first + last frame to get the
73  // required left and right context.
74  if (input_finished) return features_ready;
75  else return std::max<int32>(0, features_ready - right_context_);
76  } else {
77  return std::max<int32>(0, features_ready - right_context_ - left_context_);
78  }
79 }
80 
82  int32 features_ready = features_->NumFramesReady();
83  bool input_finished = features_->IsLastFrame(features_ready - 1);
84  KALDI_ASSERT(frame >= 0);
85  if (frame >= begin_frame_ &&
87  return;
88  KALDI_ASSERT(frame < NumFramesReady());
89 
90  int32 input_frame_begin;
91  if (opts_.pad_input)
92  input_frame_begin = frame - left_context_;
93  else
94  input_frame_begin = frame;
95  int32 max_possible_input_frame_end = features_ready;
96  if (input_finished && opts_.pad_input)
97  max_possible_input_frame_end += right_context_;
98  int32 input_frame_end = std::min<int32>(max_possible_input_frame_end,
99  input_frame_begin +
100  left_context_ + right_context_ +
102  KALDI_ASSERT(input_frame_end > input_frame_begin);
103  Matrix<BaseFloat> features(input_frame_end - input_frame_begin,
104  feat_dim_);
105  for (int32 t = input_frame_begin; t < input_frame_end; t++) {
106  SubVector<BaseFloat> row(features, t - input_frame_begin);
107  int32 t_modified = t;
108  // The next two if-statements take care of "pad_input"
109  if (t_modified < 0)
110  t_modified = 0;
111  if (t_modified >= features_ready)
112  t_modified = features_ready - 1;
113  features_->GetFrame(t_modified, &row);
114  }
115  CuMatrix<BaseFloat> cu_features;
116  cu_features.Swap(&features); // Copy to GPU, if we're using one.
117 
118 
119  int32 num_frames_out = input_frame_end - input_frame_begin -
120  left_context_ - right_context_;
121 
122  CuMatrix<BaseFloat> cu_posteriors(num_frames_out, num_pdfs_);
123 
124  // The "false" below tells it not to pad the input: we've already done
125  // any padding that we needed to do.
126  NnetComputation(nnet_.GetNnet(), cu_features,
127  false, &cu_posteriors);
128 
129  cu_posteriors.ApplyFloor(1.0e-20); // Avoid log of zero which leads to NaN.
130  cu_posteriors.ApplyLog();
131  // subtract log-prior (divide by prior)
132  cu_posteriors.AddVecToRows(-1.0, log_priors_);
133  // apply probability scale.
134  cu_posteriors.Scale(opts_.acoustic_scale);
135 
136  // Transfer the scores the CPU for faster access by the
137  // decoding process.
138  scaled_loglikes_.Resize(0, 0);
139  cu_posteriors.Swap(&scaled_loglikes_);
140 
141  begin_frame_ = frame;
142 }
143 
144 } // namespace nnet2
145 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void ApplyFloor(Real floor_val)
Definition: cu-matrix.h:451
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)=0
Gets the feature vector for this frame.
kaldi::int32 int32
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void NnetComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g.
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
int32 TransitionIdToPdf(int32 trans_id) const
void ComputeForFrame(int32 frame)
If the neural-network outputs for this frame are not cached, it computes them (and possibly for some ...
void Scale(Real value)
Definition: cu-matrix.cc:644
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
Definition: cu-matrix.cc:1261
void Swap(Matrix< Real > *mat)
Definition: cu-matrix.cc:123
const VectorBase< BaseFloat > & Priors() const
Definition: am-nnet.h:67
virtual bool IsLastFrame(int32 frame) const =0
Returns true if this is the last frame.
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
virtual BaseFloat LogLikelihood(int32 frame, int32 index)
Returns the scaled log likelihood.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
DecodableNnet2Online(const AmNnet &nnet, const TransitionModel &trans_model, const DecodableNnet2OnlineOptions &opts, OnlineFeatureInterface *input_feats)
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
virtual int32 NumFramesReady() const =0
returns the feature dimension.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
const Nnet & GetNnet() const
Definition: am-nnet.h:61