fmllr-raw.h
Go to the documentation of this file.
1 // transform/fmllr-raw.h
2 
3 // Copyright 2013 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #ifndef KALDI_TRANSFORM_FMLLR_RAW_H_
22 #define KALDI_TRANSFORM_FMLLR_RAW_H_
23 
24 #include <vector>
25 
26 #include "base/kaldi-common.h"
27 #include "gmm/am-diag-gmm.h"
28 #include "gmm/mle-full-gmm.h"
30 #include "util/kaldi-table.h"
31 #include "util/kaldi-holder.h"
32 
33 namespace kaldi {
34 
35 
36 /*
37  This header contains classes and functions related to computing Constrained
38  MLLR (equivalently, fMLLR) on the raw MFCCs or similar, when they have been
39  spliced and projected with something like LDA+MLLT, but where our model is
40  built on top of the spliced and projected features. We back-project the
41  model estimation back to the original MFCCs so our transform optimizes the
42  data likelihood given our model in the projected space. We have to include
43  the rejected dimensions in this likelihood, too. The objective function
44  includes N times the log-determinant of the square part of the transform,
45  where N is the number of times we spliced consecutive features (e.g. N = 9,
46  if we spliced +- 4 frames of context).
47 
48  For concreteness (but without losing generality), assuming we spliced
49  13-dimensional MFCCs across 9 frames to get 117-dimensional features.
50 
51  Each of the 117-dim features is a linear function of the 13(13+1) transform
52  parameters. We have a particular vectorization of these parameters, from
53  which (with the transform) we work out the full quadratic auxiliary function
54  w.r.t. the parameters.
55 
56  This gives us a generic quadratic scalar function of the 13(13+1) parameters.
57  How to get this quadratic w.r.t. one row? Always keep updated the current
58  derivative w.r.t. one row. The quadratic w.r.t. that row can be read off.
59  The log-determinant is easy to work out from the cofactor.
60 
61  So the full stats will be a (13(13+1)) by (13(13+1)) SpMatrix, plus
62  a bias term.
63 
64  The update will iterate row by row, and work out the quadratic function
65  of the row.
66 */
67 
68 
72  FmllrRawOptions(): min_count(100.0), num_iters(20) { }
73  void Register(OptionsItf *opts) {
74  opts->Register("fmllr-min-count", &min_count,
75  "Minimum count required to update fMLLR");
76  opts->Register("fmllr-num-iters", &num_iters,
77  "Number of iterations in fMLLR update phase.");
78  }
79 };
80 
81 class FmllrRawAccs {
82  public:
84 
86  int32 RawDim() const { return raw_dim_; }
88  int32 FullDim() const { return full_transform_.NumRows(); }
90  int32 SpliceWidth() const { return FullDim() / RawDim(); }
92  int32 ModelDim() const { return model_dim_; }
93 
94  // Initializer takes the raw dimension of the features (e.g. 13 for typicaly
95  // MFCC features, and the full transform (e.g. an LDA+MLLT transform). This
96  // full transform is the transform extended with the "rejected rows" that
97  // we would normally discard; we need them for this type of estimation.
98  FmllrRawAccs(int32 raw_dim,
99  int32 model_dim,
100  const Matrix<BaseFloat> &full_transform);
101 
102 
108  BaseFloat AccumulateForGmm(const DiagGmm &gmm,
109  const VectorBase<BaseFloat> &data,
110  BaseFloat weight);
111 
114  void AccumulateFromPosteriors(const DiagGmm &gmm,
115  const VectorBase<BaseFloat> &data,
116  const VectorBase<BaseFloat> &posteriors);
117 
123  void Update(const FmllrRawOptions &opts,
124  MatrixBase<BaseFloat> *raw_fmllr_mat,
125  BaseFloat *objf_impr,
126  BaseFloat *count);
127 
128  void SetZero();
129  private:
131  Vector<BaseFloat> s; // [FullDim() + 1]-dimensional spliced data, plus 1.0
132  Vector<BaseFloat> transformed_data; // [FullDim()] Data times full transform, with offset.
133  double count;
134  Vector<double> a; // linear term in per-frame auxf; dim is model-dim.
135  Vector<double> b; // quadratic term in per-frame auxf; dim is model-dim.
136  };
137 
138  void CommitSingleFrameStats();
139 
140  void InitSingleFrameStats(const VectorBase<BaseFloat> &data);
141 
142  bool DataHasChanged(const VectorBase<BaseFloat> &data) const; // compares it to the
143  // data in single_frame_stats_, returns true if it's different.
144 
145 
147  double GetAuxf(const Vector<double> &simple_linear_stats,
148  const SpMatrix<double> &simple_quadratic_stats,
149  const Matrix<double> &fmllr_mat) const;
150 
154  void ConvertToSimpleStats(
155  Vector<double> *simple_linear_stats,
156  SpMatrix<double> *simple_quadratic_stats) const;
157 
160  void ComputeM(
161  std::vector<Matrix<double> > *M) const;
162 
173  void ConvertToPerRowStats(
174  const Vector<double> &simple_linear_stats,
175  const SpMatrix<double> &simple_quadratic_stats_sp,
176  Matrix<double> *linear_stats,
177  std::vector<SpMatrix<double> > *diag_stats,
178  std::vector<std::vector<Matrix<double> > > *off_diag_stats) const;
179 
180  int32 raw_dim_; // Raw MFCC dimension.
181  int32 model_dim_; // Model dimension
182 
183  Matrix<BaseFloat> full_transform_; // Does not include any offset term
184  // (last column).
185  Vector<BaseFloat> transform_offset_; // The offset term (or zero).
186 
187 
189 
190  double count_; // The data-count. Note: in accounting for the determinant, we will
191  // have to multiply this by the number of times the data is spliced
192  // together on each frame.
193 
194  SpMatrix<double> temp_; // [full_dim + 1][full_dim + 1], outer product of s.
195  Matrix<double> Q_; // linear stats, indexed [model_dim + 1][full_dim + 1]
196  Matrix<double> S_; // quadratic stats, indexed
197  // [model_dim + 1][((full_dim+1)*(full_dim+2))/2]
198 
200 };
201 
202 
203 
204 } // namespace kaldi
205 
206 #endif // KALDI_TRANSFORM_FMLLR_RAW_H_
Matrix< double > S_
Definition: fmllr-raw.h:196
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Vector< BaseFloat > transform_offset_
Definition: fmllr-raw.h:185
SingleFrameStats single_frame_stats_
Definition: fmllr-raw.h:188
int32 ModelDim() const
Dimension of the model.
Definition: fmllr-raw.h:92
int32 SpliceWidth() const
Number of frames that are spliced together each time.
Definition: fmllr-raw.h:90
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
kaldi::int32 int32
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
Definition: kaldi-utils.h:121
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
Matrix< BaseFloat > full_transform_
Definition: fmllr-raw.h:183
const size_t count
int32 FullDim() const
Full feature dimension after splicing.
Definition: fmllr-raw.h:88
Matrix< double > Q_
Definition: fmllr-raw.h:195
void Register(OptionsItf *opts)
Definition: fmllr-raw.h:73
Vector< BaseFloat > transformed_data
Definition: fmllr-raw.h:132
A class representing a vector.
Definition: kaldi-vector.h:406
SpMatrix< double > temp_
Definition: fmllr-raw.h:194
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
int32 RawDim() const
Dimension of raw MFCC (etc.) features.
Definition: fmllr-raw.h:86
Provides a vector abstraction class.
Definition: kaldi-vector.h:41