fmllr-sgmm2.h
Go to the documentation of this file.
1 // sgmm2/fmllr-sgmm2.h
2 
3 // Copyright 2009-2012 Saarland University (author: Arnab Ghoshal)
4 // Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #ifndef KALDI_SGMM2_FMLLR_SGMM2_H_
23 #define KALDI_SGMM2_FMLLR_SGMM2_H_
24 
25 #include <string>
26 #include <vector>
27 
28 #include "base/kaldi-common.h"
29 #include "sgmm2/am-sgmm2.h"
31 #include "util/kaldi-table.h"
32 #include "util/kaldi-holder.h"
33 #include "itf/options-itf.h"
34 
35 namespace kaldi {
36 
55 
57  fmllr_iters = 5;
58  step_iters = 10;
59  fmllr_min_count_basis = 100.0;
60  fmllr_min_count = 1000.0;
61  fmllr_min_count_full = 5000.0;
62  num_fmllr_bases = 50;
63  bases_occ_scale = 0.2;
64  }
65 
66  void Register(OptionsItf *opts);
67 };
68 
70  std::string module = "Sgmm2FmllrConfig: ";
71  opts->Register("fmllr-iters", &fmllr_iters, module+
72  "Number of iterations in FMLLR estimation.");
73  opts->Register("fmllr-step-iters", &step_iters, module+
74  "Number of iterations to find optimal FMLLR step size.");
75  opts->Register("fmllr-min-count-bases", &fmllr_min_count_basis, module+
76  "Minimum occupancy count to estimate FMLLR using basis matrices.");
77  opts->Register("fmllr-min-count", &fmllr_min_count, module+
78  "Minimum occupancy count to estimate FMLLR (without bases).");
79  opts->Register("fmllr-min-count-full", &fmllr_min_count_full, module+
80  "Minimum occupancy count to stop using basis matrices for FMLLR.");
81  opts->Register("fmllr-num-bases", &num_fmllr_bases, module+
82  "Number of FMLLR basis matrices.");
83  opts->Register("fmllr-bases-occ-scale", &bases_occ_scale, module+
84  "Scale per-speaker count to determine number of CMLLR bases.");
85 }
86 
87 
92  public:
93  void Init(const AmSgmm2 &sgmm, const Vector<BaseFloat> &state_occs);
94  void Write(std::ostream &out_stream, bool binary) const;
95  void Read(std::istream &in_stream, bool binary);
96  bool IsEmpty() const {
97  return (pre_xform_.NumRows() == 0 || inv_xform_.NumRows() == 0 ||
98  mean_scatter_.Dim() == 0);
99  }
100  bool HasBasis() const { return fmllr_bases_.size() != 0; }
101 
109  std::vector< Matrix<BaseFloat> > fmllr_bases_;
110 };
111 
112 inline void Sgmm2FmllrGlobalParams::Init(const AmSgmm2 &sgmm,
113  const Vector<BaseFloat> &state_occs) {
114  sgmm.ComputeFmllrPreXform(state_occs, &pre_xform_, &inv_xform_,
115  &mean_scatter_);
116 }
117 
123  public:
124  FmllrSgmm2Accs() : dim_(-1) {}
126 
127  void Init(int32 dim, int32 num_gaussians);
128  void SetZero() { stats_.SetZero(); }
129 
130  void Write(std::ostream &out_stream, bool binary) const;
131  void Read(std::istream &in_stream, bool binary, bool add);
132 
138  BaseFloat Accumulate(const AmSgmm2 &sgmm,
139  const VectorBase<BaseFloat> &data,
140  const Sgmm2PerFrameDerivedVars &frame_vars,
141  int32 state_index,
142  BaseFloat weight,
144 
145  void AccumulateFromPosteriors(const AmSgmm2 &sgmm,
146  const Sgmm2PerSpkDerivedVars &spk,
147  const VectorBase<BaseFloat> &data,
148  const std::vector<int32> &gauss_select,
149  const Matrix<BaseFloat> &posteriors,
150  int32 state_index);
151 
152  void AccumulateForFmllrSubspace(const AmSgmm2 &sgmm,
153  const Sgmm2FmllrGlobalParams &fmllr_globals,
154  SpMatrix<double> *grad_scatter);
155 
156  BaseFloat FmllrObjGradient(const AmSgmm2 &sgmm,
157  const Matrix<BaseFloat> &xform,
158  Matrix<BaseFloat> *grad_out,
159  Matrix<BaseFloat> *G_out) const;
160 
165  bool Update(const AmSgmm2 &model,
166  const Sgmm2FmllrGlobalParams &fmllr_globals,
167  const Sgmm2FmllrConfig &opts, Matrix<BaseFloat> *out_xform,
168  BaseFloat *frame_count, BaseFloat *auxf_improv) const;
169 
171  int32 Dim() const { return dim_; }
172  const AffineXformStats &stats() const { return stats_; }
173 
174  private:
177 
178  // Cannot have copy constructor and assigment operator
180 };
181 
186 void EstimateSgmm2FmllrSubspace(const SpMatrix<double> &fmllr_grad_scatter,
187  int32 num_fmllr_bases, int32 feat_dim,
188  Sgmm2FmllrGlobalParams *fmllr_globals,
189  double min_eig = 0.0);
190 
191 } // namespace kaldi
192 
193 #endif // KALDI_SGMM2_FMLLR_SGMM2_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Class for definition of the subspace Gmm acoustic model.
Definition: am-sgmm2.h:231
Class for computing the accumulators needed for the maximum-likelihood estimate of FMLLR transforms f...
Definition: fmllr-sgmm2.h:122
Matrix< BaseFloat > pre_xform_
Pre-transform matrix. Dim is [D][D+1].
Definition: fmllr-sgmm2.h:103
BaseFloat fmllr_min_count_full
Minimum occupancy count to stop using FMLLR bases and switch to regular FMLLR estimation.
Definition: fmllr-sgmm2.h:49
kaldi::int32 int32
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
Definition: kaldi-utils.h:121
void Register(OptionsItf *opts)
Definition: fmllr-sgmm2.h:69
void EstimateSgmm2FmllrSubspace(const SpMatrix< double > &fmllr_grad_scatter, int32 num_fmllr_bases, int32 feat_dim, Sgmm2FmllrGlobalParams *globals, double min_eig)
Computes the fMLLR basis matrices given the scatter of the vectorized gradients (eq: B...
Definition: fmllr-sgmm2.cc:506
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
BaseFloat bases_occ_scale
Scale per-speaker count to determine number of CMLLR bases.
Definition: fmllr-sgmm2.h:54
int32 Dim() const
Accessors.
Definition: fmllr-sgmm2.h:171
int32 fmllr_iters
Number of iterations in FMLLR estimation.
Definition: fmllr-sgmm2.h:41
Matrix< BaseFloat > inv_xform_
Inverse of pre-transform. Dim is [D][D+1].
Definition: fmllr-sgmm2.h:105
void Init(const AmSgmm2 &sgmm, const Vector< BaseFloat > &state_occs)
Definition: fmllr-sgmm2.h:112
Configuration variables needed in the estimation of FMLLR for SGMMs.
Definition: fmllr-sgmm2.h:40
int32 step_iters
Iterations to find optimal FMLLR step size.
Definition: fmllr-sgmm2.h:42
int32 num_fmllr_bases
Number of basis matrices to use for FMLLR estimation.
Definition: fmllr-sgmm2.h:52
A class representing a vector.
Definition: kaldi-vector.h:406
BaseFloat fmllr_min_count_basis
Minimum occupancy count to estimate FMLLR using basis matrices.
Definition: fmllr-sgmm2.h:44
const AffineXformStats & stats() const
Definition: fmllr-sgmm2.h:172
int32 dim_
Dimension of feature vectors.
Definition: fmllr-sgmm2.h:176
Global adaptation parameters.
Definition: fmllr-sgmm2.h:91
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
Vector< BaseFloat > mean_scatter_
Diagonal of mean-scatter matrix. Dim is [D].
Definition: fmllr-sgmm2.h:107
std::vector< Matrix< BaseFloat > > fmllr_bases_
{W}_b. [b][d][d], dim is [B][D][D+1].
Definition: fmllr-sgmm2.h:109
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
Definition: am-sgmm2.h:142
AffineXformStats stats_
Accumulated stats.
Definition: fmllr-sgmm2.h:175
BaseFloat fmllr_min_count
Minimum occupancy count to estimate FMLLR without basis matrices.
Definition: fmllr-sgmm2.h:46
void ComputeFmllrPreXform(const Vector< BaseFloat > &pdf_occs, Matrix< BaseFloat > *xform, Matrix< BaseFloat > *inv_xform, Vector< BaseFloat > *diag_mean_scatter) const
Computes the LDA-like pre-transform and its inverse as well as the eigenvalues of the scatter of the ...
Definition: am-sgmm2.cc:965