decodable-am-diag-gmm.h
Go to the documentation of this file.
1 // gmm/decodable-am-diag-gmm.h
2 
3 // Copyright 2009-2011 Saarland University; Microsoft Corporation;
4 // Lukas Burget
5 // 2013 Johns Hopkins Universith (author: Daniel Povey)
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #ifndef KALDI_GMM_DECODABLE_AM_DIAG_GMM_H_
23 #define KALDI_GMM_DECODABLE_AM_DIAG_GMM_H_
24 
25 #include <vector>
26 
27 #include "base/kaldi-common.h"
28 #include "gmm/am-diag-gmm.h"
29 #include "hmm/transition-model.h"
30 #include "itf/decodable-itf.h"
34 
35 namespace kaldi {
36 
44 
46  public:
52  const Matrix<BaseFloat> &feats,
53  BaseFloat log_sum_exp_prune = -1.0):
54  acoustic_model_(am), feature_matrix_(feats),
55  previous_frame_(-1), log_sum_exp_prune_(log_sum_exp_prune),
56  data_squared_(feats.NumCols()) {
58  }
59 
60  // Note, frames are numbered from zero. But state_index is numbered
61  // from one (this routine is called by FSTs).
62  virtual BaseFloat LogLikelihood(int32 frame, int32 state_index) {
63  return LogLikelihoodZeroBased(frame, state_index - 1);
64  }
65  virtual int32 NumFramesReady() const { return feature_matrix_.NumRows(); }
66 
67  // Indices are one-based! This is for compatibility with OpenFst.
68  virtual int32 NumIndices() const { return acoustic_model_.NumPdfs(); }
69 
70  virtual bool IsLastFrame(int32 frame) const {
71  KALDI_ASSERT(frame < NumFramesReady());
72  return (frame == NumFramesReady() - 1);
73  }
74 
75  protected:
76  void ResetLogLikeCache();
77  virtual BaseFloat LogLikelihoodZeroBased(int32 frame, int32 state_index);
78 
83 
88  };
89  std::vector<LikelihoodCacheRecord> log_like_cache_;
90  private:
92 
93 
95 };
96 
97 
99  public:
101  const TransitionModel &tm,
102  const Matrix<BaseFloat> &feats,
103  BaseFloat log_sum_exp_prune = -1.0)
104  : DecodableAmDiagGmmUnmapped(am, feats, log_sum_exp_prune),
105  trans_model_(tm) {}
106 
107  // Note, frames are numbered from zero.
108  virtual BaseFloat LogLikelihood(int32 frame, int32 tid) {
109  return LogLikelihoodZeroBased(frame,
110  trans_model_.TransitionIdToPdf(tid));
111  }
112  // Indices are one-based! This is for compatibility with OpenFst.
113  virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); }
114 
115  const TransitionModel *TransModel() { return &trans_model_; }
116  private: // want to access public to have pdf id information
117  const TransitionModel &trans_model_; // for tid to pdf mapping
119 };
120 
122  public:
124  const TransitionModel &tm,
125  const Matrix<BaseFloat> &feats,
126  BaseFloat scale,
127  BaseFloat log_sum_exp_prune = -1.0):
128  DecodableAmDiagGmmUnmapped(am, feats, log_sum_exp_prune), trans_model_(tm),
129  scale_(scale), delete_feats_(NULL) {}
130 
131  // This version of the initializer takes ownership of the pointer
132  // "feats" and will delete it when this class is destroyed.
134  const TransitionModel &tm,
135  BaseFloat scale,
136  BaseFloat log_sum_exp_prune,
137  Matrix<BaseFloat> *feats):
138  DecodableAmDiagGmmUnmapped(am, *feats, log_sum_exp_prune),
139  trans_model_(tm), scale_(scale), delete_feats_(feats) {}
140 
141  // Note, frames are numbered from zero but transition-ids from one.
142  virtual BaseFloat LogLikelihood(int32 frame, int32 tid) {
143  return scale_*LogLikelihoodZeroBased(frame,
144  trans_model_.TransitionIdToPdf(tid));
145  }
146  // Indices are one-based! This is for compatibility with OpenFst.
147  virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); }
148 
149  const TransitionModel *TransModel() { return &trans_model_; }
150 
152  delete delete_feats_;
153  }
154 
155  private: // want to access it public to have pdf id information
156  const TransitionModel &trans_model_; // for transition-id to pdf mapping
160 };
161 
162 } // namespace kaldi
163 
164 #endif // KALDI_GMM_DECODABLE_AM_DIAG_GMM_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
DecodableAmDiagGmmUnmapped is a decodable object that takes indices that correspond to pdf-id&#39;s plus ...
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
Definition: decodable-itf.h:82
virtual BaseFloat LogLikelihood(int32 frame, int32 tid)
Returns the log likelihood, which will be negated in the decoder.
kaldi::int32 int32
int32 hit_time
Frame for which this value is relevant.
virtual BaseFloat LogLikelihood(int32 frame, int32 tid)
Returns the log likelihood, which will be negated in the decoder.
Vector< BaseFloat > data_squared_
Cache for fast likelihood calculation.
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
const TransitionModel & trans_model_
const TransitionModel & trans_model_
const TransitionModel * TransModel()
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
DecodableAmDiagGmmScaled(const AmDiagGmm &am, const TransitionModel &tm, BaseFloat scale, BaseFloat log_sum_exp_prune, Matrix< BaseFloat > *feats)
DecodableAmDiagGmmScaled(const AmDiagGmm &am, const TransitionModel &tm, const Matrix< BaseFloat > &feats, BaseFloat scale, BaseFloat log_sum_exp_prune=-1.0)
const Matrix< BaseFloat > & feature_matrix_
std::vector< LikelihoodCacheRecord > log_like_cache_
int32 NumPdfs() const
Definition: am-diag-gmm.h:82
A class representing a vector.
Definition: kaldi-vector.h:406
KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableAmDiagGmmUnmapped)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
virtual BaseFloat LogLikelihood(int32 frame, int32 state_index)
Returns the log likelihood, which will be negated in the decoder.
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
DecodableAmDiagGmmUnmapped(const AmDiagGmm &am, const Matrix< BaseFloat > &feats, BaseFloat log_sum_exp_prune=-1.0)
If you set log_sum_exp_prune to a value greater than 0 it will prune in the LogSumExp operation (larg...
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
const TransitionModel * TransModel()
DecodableAmDiagGmm(const AmDiagGmm &am, const TransitionModel &tm, const Matrix< BaseFloat > &feats, BaseFloat log_sum_exp_prune=-1.0)
virtual BaseFloat LogLikelihoodZeroBased(int32 frame, int32 state_index)