decodable-am-diag-gmm-regtree.h
Go to the documentation of this file.
1 // transform/decodable-am-diag-gmm-regtree.h
2 
3 // Copyright 2009-2011 Saarland University; Microsoft Corporation;
4 // Lukas Burget
5 // 2013 Johns Hopkins Universith (author: Daniel Povey)
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #ifndef KALDI_TRANSFORM_DECODABLE_AM_DIAG_GMM_REGTREE_H_
23 #define KALDI_TRANSFORM_DECODABLE_AM_DIAG_GMM_REGTREE_H_
24 
25 #include <vector>
26 
27 #include "base/kaldi-common.h"
28 #include "gmm/am-diag-gmm.h"
29 #include "hmm/transition-model.h"
30 #include "itf/decodable-itf.h"
35 
36 namespace kaldi {
37 
39  public:
41  const TransitionModel &tm,
42  const Matrix<BaseFloat> &feats,
43  const RegtreeFmllrDiagGmm &fmllr_xform,
44  const RegressionTree &regtree,
45  BaseFloat scale,
46  BaseFloat log_sum_exp_prune = -1.0)
47  : DecodableAmDiagGmmUnmapped(am, feats, log_sum_exp_prune), trans_model_(tm),
48  scale_(scale), fmllr_xform_(fmllr_xform), regtree_(regtree),
49  valid_logdets_(false) {}
50 
51  // Note, frames are numbered from zero but transition-ids (tid) from one.
52  virtual BaseFloat LogLikelihood(int32 frame, int32 tid) {
53  return scale_*LogLikelihoodZeroBased(frame,
55  }
56 
57  virtual int32 NumFramesReady() const { return feature_matrix_.NumRows(); }
58 
59  // Indices are one-based! This is for compatibility with OpenFst.
60  virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); }
61 
62  protected:
63  virtual BaseFloat LogLikelihoodZeroBased(int32 frame, int32 state_index);
64 
65  const TransitionModel *TransModel() { return &trans_model_; }
66 
67  private:
68  const TransitionModel &trans_model_; // for transition-id to pdf mapping
72  std::vector< Vector<BaseFloat> > xformed_data_;
73  std::vector< Vector<BaseFloat> > xformed_data_squared_;
76 
78 };
79 
81  public:
83  const TransitionModel &tm,
84  const Matrix<BaseFloat> &feats,
85  const RegtreeMllrDiagGmm &mllr_xform,
86  const RegressionTree &regtree,
87  BaseFloat scale,
88  BaseFloat log_sum_exp_prune = -1.0):
89  DecodableAmDiagGmmUnmapped(am, feats, log_sum_exp_prune),
90  trans_model_(tm), scale_(scale), mllr_xform_(mllr_xform),
91  regtree_(regtree), data_squared_(feats.NumCols()) { InitCache(); }
93 
94  // Note, frames are numbered from zero but transition-ids (tid) from one.
95  virtual BaseFloat LogLikelihood(int32 frame, int32 tid) {
96  return scale_*LogLikelihoodZeroBased(frame,
98  }
99 
100  virtual int32 NumFramesReady() const { return feature_matrix_.NumRows(); }
101 
102  // Indices are one-based! This is for compatibility with OpenFst.
103  virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); }
104 
106 
107  protected:
108  virtual BaseFloat LogLikelihoodZeroBased(int32 frame, int32 state_index);
109 
110  private:
112  void InitCache();
115  const Matrix<BaseFloat>& GetXformedMeanInvVars(int32 state_index);
118  const Vector<BaseFloat>& GetXformedGconsts(int32 state_index);
119 
120  const TransitionModel &trans_model_; // for transition-id to pdf mapping
124  // we want it public to have access to the pdf ids
125 
127  std::vector< Matrix<BaseFloat>* > xformed_mean_invvars_;
129  std::vector< Vector<BaseFloat>* > xformed_gconsts_;
132  std::vector<bool> is_cached_;
133 
135 
137 };
138 
139 } // namespace kaldi
140 
141 #endif // KALDI_TRANSFORM_DECODABLE_AM_DIAG_GMM_REGTREE_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableAmDiagGmmRegtreeFmllr)
DecodableAmDiagGmmUnmapped is a decodable object that takes indices that correspond to pdf-id&#39;s plus ...
std::vector< Vector< BaseFloat > > xformed_data_
An MLLR mean transformation is an affine transformation of Gaussian means.
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
int32 TransitionIdToPdfFast(int32 trans_id) const
virtual BaseFloat LogLikelihood(int32 frame, int32 tid)
Returns the log likelihood, which will be negated in the decoder.
kaldi::int32 int32
Vector< BaseFloat > data_squared_
Cache for fast likelihood calculation.
std::vector< Vector< BaseFloat > *> xformed_gconsts_
Cache of transformed gconsts for each state.
An FMLLR (feature-space MLLR) transformation, also called CMLLR (constrained MLLR) is an affine trans...
Vector< BaseFloat > data_squared_
Cached for fast likelihood calculation.
DecodableAmDiagGmmRegtreeFmllr(const AmDiagGmm &am, const TransitionModel &tm, const Matrix< BaseFloat > &feats, const RegtreeFmllrDiagGmm &fmllr_xform, const RegressionTree &regtree, BaseFloat scale, BaseFloat log_sum_exp_prune=-1.0)
int32 NumTransitionIds() const
Returns the total number of transition-ids (note, these are one-based).
std::vector< Vector< BaseFloat > > xformed_data_squared_
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
A regression tree is a clustering of Gaussian densities in an acoustic model, such that the group of ...
std::vector< Matrix< BaseFloat > *> xformed_mean_invvars_
Cache of transformed means time inverse variances for each state.
virtual BaseFloat LogLikelihood(int32 frame, int32 tid)
Returns the log likelihood, which will be negated in the decoder.
const Matrix< BaseFloat > & feature_matrix_
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
A class representing a vector.
Definition: kaldi-vector.h:406
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
DecodableAmDiagGmmRegtreeMllr(const AmDiagGmm &am, const TransitionModel &tm, const Matrix< BaseFloat > &feats, const RegtreeMllrDiagGmm &mllr_xform, const RegressionTree &regtree, BaseFloat scale, BaseFloat log_sum_exp_prune=-1.0)
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
virtual BaseFloat LogLikelihoodZeroBased(int32 frame, int32 state_index)
std::vector< bool > is_cached_
Boolean variable per state to indicate whether the transformed means for that state are cached...