decodable-matrix.h
Go to the documentation of this file.
1 // decoder/decodable-matrix.h
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 // 2013 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #ifndef KALDI_DECODER_DECODABLE_MATRIX_H_
22 #define KALDI_DECODER_DECODABLE_MATRIX_H_
23 
24 #include <vector>
25 
26 #include "base/kaldi-common.h"
27 #include "hmm/transition-model.h"
28 #include "itf/decodable-itf.h"
29 #include "matrix/kaldi-matrix.h"
30 
31 namespace kaldi {
32 
33 
35  public:
36  // This constructor creates an object that will not delete "likes" when done.
38  const Matrix<BaseFloat> &likes,
39  BaseFloat scale): trans_model_(tm), likes_(&likes),
40  scale_(scale), delete_likes_(false) {
41  if (likes.NumCols() != tm.NumPdfs())
42  KALDI_ERR << "DecodableMatrixScaledMapped: mismatch, matrix has "
43  << likes.NumCols() << " cols but transition-model has "
44  << tm.NumPdfs() << " pdf-ids.";
45  }
46 
47  // This constructor creates an object that will delete "likes"
48  // when done.
50  BaseFloat scale,
51  const Matrix<BaseFloat> *likes):
52  trans_model_(tm), likes_(likes),
53  scale_(scale), delete_likes_(true) {
54  if (likes->NumCols() != tm.NumPdfs())
55  KALDI_ERR << "DecodableMatrixScaledMapped: mismatch, matrix has "
56  << likes->NumCols() << " cols but transition-model has "
57  << tm.NumPdfs() << " pdf-ids.";
58  }
59 
60  virtual int32 NumFramesReady() const { return likes_->NumRows(); }
61 
62  virtual bool IsLastFrame(int32 frame) const {
63  KALDI_ASSERT(frame < NumFramesReady());
64  return (frame == NumFramesReady() - 1);
65  }
66 
67  // Note, frames are numbered from zero.
68  virtual BaseFloat LogLikelihood(int32 frame, int32 tid) {
69  return scale_ * (*likes_)(frame, trans_model_.TransitionIdToPdfFast(tid));
70  }
71 
72  // Indices are one-based! This is for compatibility with OpenFst.
73  virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); }
74 
76  if (delete_likes_) delete likes_;
77  }
78  private:
79  const TransitionModel &trans_model_; // for tid to pdf mapping
84 };
85 
99  public:
100  // This constructor creates an object that will not delete "likes" when done.
101  // the frame_offset is the frame the row 0 of 'likes' corresponds to, would be
102  // greater than one if this is not the first chunk of likelihoods.
104  const MatrixBase<BaseFloat> &likes,
105  int32 frame_offset = 0);
106 
107  // This constructor creates an object that will delete "likes"
108  // when done.
110  const Matrix<BaseFloat> *likes,
111  int32 frame_offset = 0);
112 
113  virtual int32 NumFramesReady() const;
114 
115  virtual bool IsLastFrame(int32 frame) const;
116 
117  virtual BaseFloat LogLikelihood(int32 frame, int32 tid);
118 
119  // Note: these indices are 1-based.
120  virtual int32 NumIndices() const;
121 
122  virtual ~DecodableMatrixMapped();
123 
124  private:
125  const TransitionModel &trans_model_; // for tid to pdf mapping
129 
130  // raw_data_ and stride_ are a kind of fast look-aside for 'likes_', to be
131  // used when KALDI_PARANOID is false.
134 
136 };
137 
138 
153  public:
155  trans_model_(tm), frame_offset_(0), input_is_finished_(false) { }
156 
157  // this is not part of the generic Decodable interface.
158  int32 FirstAvailableFrame() const { return frame_offset_; }
159 
160  // Logically, this function appends 'loglikes' (interpreted as newly available
161  // frames) to the log-likelihoods stored in the class.
162  //
163  // This function is destructive of the input "loglikes" because it may
164  // under some circumstances do a shallow copy using Swap(). This function
165  // appends loglikes to any existing likelihoods you've previously supplied.
166  void AcceptLoglikes(Matrix<BaseFloat> *loglikes,
167  int32 frames_to_discard);
168 
169  void InputIsFinished() { input_is_finished_ = true; }
170 
171  virtual int32 NumFramesReady() const {
172  return loglikes_.NumRows() + frame_offset_;
173  }
174 
175  virtual bool IsLastFrame(int32 frame) const {
176  KALDI_ASSERT(frame < NumFramesReady());
177  return (frame == NumFramesReady() - 1 && input_is_finished_);
178  }
179 
180  virtual BaseFloat LogLikelihood(int32 frame, int32 tid) {
182 #ifdef KALDI_PARANOID
183  return loglikes_(frame - frame_offset_, pdf_id);
184 #else
185  // This does no checking, so will be faster.
186  return raw_data_[frame * stride_ + pdf_id];
187 #endif
188  }
189 
190  virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); }
191 
192  // nothing special to do in destructor.
194  private:
195  const TransitionModel &trans_model_; // for tid to pdf mapping
199 
200  // 'raw_data_' and 'stride_' are intended as a fast look-aside which is an
201  // alternative to accessing data_. raw_data_ is a faked version of
202  // data_->Data() as if it started from frame zero rather than frame_offset_.
203  // This simplifies the code of LogLikelihood(), in cases where KALDI_PARANOID
204  // is not defined.
207 
209 };
210 
211 
213  public:
215  BaseFloat scale):
216  likes_(likes), scale_(scale) { }
217 
218  virtual int32 NumFramesReady() const { return likes_.NumRows(); }
219 
220  virtual bool IsLastFrame(int32 frame) const {
221  KALDI_ASSERT(frame < NumFramesReady());
222  return (frame == NumFramesReady() - 1);
223  }
224 
225  // Note, frames are numbered from zero.
226  virtual BaseFloat LogLikelihood(int32 frame, int32 index) {
227  if (index > likes_.NumCols() || index <= 0 ||
228  frame < 0 || frame >= likes_.NumRows())
229  KALDI_ERR << "Invalid (frame, index - 1) = ("
230  << frame << ", " << index - 1 << ") for matrix of size "
231  << likes_.NumRows() << " x " << likes_.NumCols();
232  return scale_ * likes_(frame, index - 1);
233  }
234 
235  // Indices are one-based! This is for compatibility with OpenFst.
236  virtual int32 NumIndices() const { return likes_.NumCols(); }
237 
238  private:
242 };
243 } // namespace kaldi
244 
245 #endif // KALDI_DECODER_DECODABLE_MATRIX_H_
const TransitionModel & trans_model_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
virtual BaseFloat LogLikelihood(int32 frame, int32 index)
Returns the log likelihood, which will be negated in the decoder.
KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableMatrixScaledMapped)
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
Definition: decodable-itf.h:82
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
DecodableMatrixScaled(const Matrix< BaseFloat > &likes, BaseFloat scale)
const TransitionModel & trans_model_
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
int32 TransitionIdToPdfFast(int32 trans_id) const
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
kaldi::int32 int32
const Matrix< BaseFloat > * likes_to_delete_
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
This decodable class returns log-likes stored in a matrix; it supports repeatedly writing to the matr...
DecodableMatrixScaledMapped(const TransitionModel &tm, BaseFloat scale, const Matrix< BaseFloat > *likes)
DecodableMatrixScaledMapped(const TransitionModel &tm, const Matrix< BaseFloat > &likes, BaseFloat scale)
int32 NumTransitionIds() const
Returns the total number of transition-ids (note, these are one-based).
const Matrix< BaseFloat > & likes_
#define KALDI_ERR
Definition: kaldi-error.h:147
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
virtual BaseFloat LogLikelihood(int32 frame, int32 tid)
Returns the log likelihood, which will be negated in the decoder.
const Matrix< BaseFloat > * likes_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
const MatrixBase< BaseFloat > * likes_
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
const TransitionModel & trans_model_
This is like DecodableMatrixScaledMapped, but it doesn&#39;t support an acoustic scale, and it does support a frame offset, whereby you can state that the first row of &#39;likes&#39; is actually the n&#39;th row of the matrix of available log-likelihoods.
DecodableMatrixMappedOffset(const TransitionModel &tm)
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
virtual BaseFloat LogLikelihood(int32 frame, int32 tid)
Returns the log likelihood, which will be negated in the decoder.
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.