fmllr-diag-gmm.h
Go to the documentation of this file.
1 // transform/fmllr-diag-gmm.h
2 
3 // Copyright 2009-2011 Microsoft Corporation; Saarland University
4 // 2013 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #ifndef KALDI_TRANSFORM_FMLLR_DIAG_GMM_H_
23 #define KALDI_TRANSFORM_FMLLR_DIAG_GMM_H_
24 
25 #include <vector>
26 
27 #include "base/kaldi-common.h"
28 #include "gmm/am-diag-gmm.h"
29 #include "gmm/mle-full-gmm.h"
31 #include "util/kaldi-table.h"
32 #include "util/kaldi-holder.h"
33 
34 namespace kaldi {
35 
36 /* This header contains routines for performing global CMLLR,
37  without a regression tree (however, you can down-weight silence
38  in training using the program weight-silence-post on the
39  state-level posteriors). For regression-tree CMLLR, see
40  fmllr-diag-gmm.h
41 */
42 
43 struct FmllrOptions {
44  std::string update_type;
47  FmllrOptions(): update_type("full"), min_count(500.0), num_iters(40) { }
48  void Register(OptionsItf *opts) {
49  opts->Register("fmllr-update-type", &update_type,
50  "Update type for fMLLR (\"full\"|\"diag\"|\"offset\"|\"none\")");
51  opts->Register("fmllr-min-count", &min_count,
52  "Minimum count required to update fMLLR");
53  opts->Register("fmllr-num-iters", &num_iters,
54  "Number of iterations in fMLLR update phase.");
55  }
56 };
57 
58 
60 
62  public:
63  // If supplied, the "opts" will only be used to limit the
64  // stats that are accumulated, to the parts we'll need in the
65  // update.
67  opts_(opts) { }
68  explicit FmllrDiagGmmAccs(const FmllrDiagGmmAccs &other):
69  AffineXformStats(other), single_frame_stats_(other.single_frame_stats_),
70  opts_(other.opts_) {}
71  explicit FmllrDiagGmmAccs(int32 dim, const FmllrOptions &opts = FmllrOptions()):
72  opts_(opts) { Init(dim); }
73 
74  // The following initializer gives us an efficient way to
75  // compute these stats from full-cov Gaussian statistics
76  // (accumulated from a *diagonal* model (e.g. use
77  // AccumFullGmm::AccumulateFromPosteriors or
78  // AccumulateFromDiag).
79  FmllrDiagGmmAccs(const DiagGmm &gmm, const AccumFullGmm &fgmm_accs);
80 
81  void Init(size_t dim) {
82  AffineXformStats::Init(dim, dim); single_frame_stats_.Init(dim);
83  }
84  void Read(std::istream &in, bool binary, bool add) {
85  AffineXformStats::Read(in, binary, add);
86  single_frame_stats_.Init(Dim());
87  }
89  BaseFloat AccumulateForGmm(const DiagGmm &gmm,
90  const VectorBase<BaseFloat> &data,
91  BaseFloat weight);
92 
95  BaseFloat AccumulateForGmmPreselect(const DiagGmm &gmm,
96  const std::vector<int32> &gselect,
97  const VectorBase<BaseFloat> &data,
98  BaseFloat weight);
99 
101  void AccumulateFromPosteriors(const DiagGmm &gmm,
102  const VectorBase<BaseFloat> &data,
103  const VectorBase<BaseFloat> &posteriors);
104 
107  void AccumulateFromPosteriorsPreselect(
108  const DiagGmm &gmm,
109  const std::vector<int32> &gselect,
110  const VectorBase<BaseFloat> &data,
111  const VectorBase<BaseFloat> &posteriors);
112 
113 
115  void Update(const FmllrOptions &opts,
116  MatrixBase<BaseFloat> *fmllr_mat,
117  BaseFloat *objf_impr,
118  BaseFloat *count);
119 
120  // Note: we allow copy and assignment for this class.
121 
122  // Note: you can use the inherited AffineXformStats::Read
123  // and AffineXformStats::Write methods for writing/reading
124  // of the object. It is not necessary to store the other
125  // private variables of this class
126 
127  private:
128  // The things below, added in 2013, relate to an optimization that lets us
129  // speed up accumulation if there are multiple active pdfs per frame
130  // (e.g. when accumulating from lattices), or if we don't anticipate
131  // doing a "full" update.
132 
134  Vector<BaseFloat> x; // dim-dimensional features.
135  Vector<BaseFloat> a; // linear term in per-frame auxf; dim is model-dim.
136  Vector<BaseFloat> b; // quadratic term in per-frame auxf; dim is model-dim.
137  double count;
138  SingleFrameStats(int32 dim = 0) { Init(dim); }
139  SingleFrameStats(const SingleFrameStats &s): x(s.x), a(s.a), b(s.b),
140  count(s.count) {}
141  void Init(int32 dim);
142  };
143 
144  void CommitSingleFrameStats();
145 
146  void InitSingleFrameStats(const VectorBase<BaseFloat> &data);
147 
148  bool DataHasChanged(const VectorBase<BaseFloat> &data) const; // compares it to the
149  // data in single_frame_stats_, returns true if it's different.
150 
152 
153  // We only use the opts_ variable for its "update_type" data member,
154  // which limits what parts of the G matrix we accumulate.
156 
157 };
158 
159 
160 // Initializes the FMLLR matrix to its default values.
161 inline void InitFmllr(int32 dim,
162  Matrix<BaseFloat> *out_fmllr) {
163  out_fmllr->Resize(dim, dim+1);
164  out_fmllr->SetUnit(); // sets diagonal elements to one.
165 }
166 
167 // ComputeFmllr optimizes the FMLLR matrix, controlled by the options.
168 // It starts the optimization from the current value of the matrix (e.g. use
169 // InitFmllr to get this).
170 // Returns auxf improvement.
172  const FmllrOptions &opts,
173  Matrix<BaseFloat> *out_fmllr,
174  BaseFloat *logdet); // add this to likelihoods
175 
177  KALDI_ASSERT(fmllr_mat.NumRows() != 0 && fmllr_mat.NumCols() == fmllr_mat.NumRows()+1);
178  SubMatrix<BaseFloat> tmp(fmllr_mat,
179  0, fmllr_mat.NumRows(),
180  0, fmllr_mat.NumRows());
181  return tmp.LogDet();
182 }
183 
184 
189  const AffineXformStats &stats,
191  MatrixBase<BaseFloat> *out_xform);
192 
198  const AffineXformStats &stats,
199  MatrixBase<BaseFloat> *out_xform);
200 // Simpler implementation I am testing.
202  const AffineXformStats &stats,
203  MatrixBase<BaseFloat> *out_xform);
204 
207  const AffineXformStats &stats,
208  MatrixBase<BaseFloat> *out_xform);
209 
210 
214  const AffineXformStats &stats,
215  std::string fmllr_type, // "none", "offset", "diag", "full"
216  int32 num_iters,
217  MatrixBase<BaseFloat> *out_xform);
218 
221 float FmllrAuxFuncDiagGmm(const MatrixBase<float> &xform,
222  const AffineXformStats &stats);
223 double FmllrAuxFuncDiagGmm(const MatrixBase<double> &xform,
224  const AffineXformStats &stats);
225 
226 
227 
231  const AffineXformStats &stats,
232  MatrixBase<BaseFloat> *grad_out);
233 
234 
242  AffineXformStats *stats);
243 
254  AffineXformStats *stats);
255 
256 
263  double beta,
264  int32 row,
265  MatrixBase<double> *transform);
266 
267 
268 
269 
270 
271 } // namespace kaldi
272 
273 #endif // KALDI_TRANSFORM_FMLLR_DIAG_GMM_H_
void ApplyModelTransformToStats(const MatrixBase< BaseFloat > &xform, AffineXformStats *stats)
ApplyModelTransformToStats takes a transform "xform", which must be diagonal (i.e.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
FmllrDiagGmmAccs(int32 dim, const FmllrOptions &opts=FmllrOptions())
BaseFloat ComputeFmllrMatrixDiagGmmFull(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, int32 num_iters, MatrixBase< BaseFloat > *out_xform)
Updates the FMLLR matrix using Mark Gales&#39; row-by-row update.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
void InitFmllr(int32 dim, Matrix< BaseFloat > *out_fmllr)
This does not work with multiple feature transforms.
SingleFrameStats single_frame_stats_
std::string update_type
"full", "diag", "offset", "none"
kaldi::int32 int32
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
BaseFloat FmllrAuxfGradient(const MatrixBase< BaseFloat > &xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *grad_out)
Returns the (diagonal-GMM) FMLLR auxiliary function value given the transform and the stats...
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
void ApplyFeatureTransformToStats(const MatrixBase< BaseFloat > &xform, AffineXformStats *stats)
This function applies a feature-level transform to stats (useful for certain techniques based on fMLL...
BaseFloat ComputeFmllrMatrixDiagGmmOffset(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *out_xform)
This does offset-only fMLLR, i.e. it only estimates an offset.
const size_t count
FmllrDiagGmmAccs(const FmllrOptions &opts=FmllrOptions())
Class for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
Definition: mle-full-gmm.h:74
SingleFrameStats(const SingleFrameStats &s)
void Register(OptionsItf *opts)
BaseFloat ComputeFmllrMatrixDiagGmmDiagonal(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *out_xform)
This does diagonal fMLLR (i.e.
void Read(std::istream &in, bool binary, bool add)
BaseFloat ComputeFmllrMatrixDiagGmm(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, std::string fmllr_type, int32 num_iters, MatrixBase< BaseFloat > *out_xform)
This function internally calls ComputeFmllrMatrixDiagGmm{Full, Diagonal, Offset}, depending on "fmllr...
void Init(int32 dim, int32 num_gs)
void FmllrInnerUpdate(SpMatrix< double > &inv_G, VectorBase< double > &k, double beta, int32 row, MatrixBase< double > *transform)
This function does one row of the inner-loop fMLLR transform update.
A class representing a vector.
Definition: kaldi-vector.h:406
BaseFloat ComputeFmllrDiagGmm(const FmllrDiagGmmAccs &accs, const FmllrOptions &opts, Matrix< BaseFloat > *out_fmllr, BaseFloat *logdet)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
float FmllrAuxFuncDiagGmm(const MatrixBase< float > &xform, const AffineXformStats &stats)
Returns the (diagonal-GMM) FMLLR auxiliary function value given the transform and the stats...
Real LogDet(Real *det_sign=NULL) const
Returns logdet of matrix.
BaseFloat ComputeFmllrMatrixDiagGmmDiagonal2(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *out_xform)
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
void Init(size_t dim)
BaseFloat ComputeFmllrLogDet(const Matrix< BaseFloat > &fmllr_mat)
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
FmllrDiagGmmAccs(const FmllrDiagGmmAccs &other)
Sub-matrix representation.
Definition: kaldi-matrix.h:988
void Read(std::istream &in, bool binary, bool add)