basis-fmllr-diag-gmm.h
Go to the documentation of this file.
1 // transform/basis-fmllr-diag-gmm.h
2 
3 // Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
4 // 2014 Johns Hopkins University (author: Daniel Povey)
5 // 2014 IMSL, PKU-HKUST (Author: Wei Shi)
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 
23 #ifndef KALDI_TRANSFORM_BASIS_FMLLR_DIAG_GMM_H_
24 #define KALDI_TRANSFORM_BASIS_FMLLR_DIAG_GMM_H_
25 
26 #include <vector>
27 #include <string>
28 
29 #include "base/kaldi-common.h"
30 #include "gmm/am-diag-gmm.h"
31 #include "gmm/mle-full-gmm.h"
32 #include "gmm/mle-am-diag-gmm.h"
34 #include "util/kaldi-table.h"
35 #include "util/kaldi-holder.h"
36 
37 namespace kaldi {
38 
39 /* This header contains routines for performing subspace CMLLR
40  (without a regression tree) for diagonal GMM acoustic model.
41 
42  Refer to Dan Povey's paper for derivations:
43  Daniel Povey, Kaisheng Yao. A basis representation of constrained
44  MLLR transforms for robust adaptation. Computer Speech and Language,
45  volume 26:35–51, 2012.
46 */
47 
50  BaseFloat size_scale; // how many basis elements we add for each new frame.
53  BasisFmllrOptions(): num_iters(10), size_scale(0.2), min_count(50.0), step_size_iters(3) { }
54  void Register(OptionsItf *opts) {
55  opts->Register("num-iters", &num_iters,
56  "Number of iterations in basis fMLLR update during testing");
57  opts->Register("size-scale", &size_scale,
58  "Scale (< 1.0) on speaker occupancy that gives number of "
59  "basis elements.");
60  opts->Register("fmllr-min-count", &min_count,
61  "Minimum count required to update fMLLR");
62  opts->Register("step-size-iters", &step_size_iters,
63  "Number of iterations in computing step size");
64  }
65 };
66 
74 
75  public:
77  explicit BasisFmllrAccus(int32 dim) {
78  dim_ = dim;
79  beta_ = 0;
80  ResizeAccus(dim);
81  }
82 
83  void ResizeAccus(int32 dim);
84 
86  void Write(std::ostream &out_stream, bool binary) const;
87  void Read(std::istream &in_stream, bool binary, bool add = false);
88 
94  void AccuGradientScatter(const AffineXformStats &spk_stats);
95 
101  double beta_;
102 };
103 
108 
109  public:
110  BasisFmllrEstimate(): dim_(0), basis_size_(0) { }
111  explicit BasisFmllrEstimate(int32 dim) {
112  dim_ = dim; basis_size_ = dim * (dim + 1);
113  }
114 
116  void Write(std::ostream &out_stream, bool binary) const;
117  void Read(std::istream &in_stream, bool binary);
118 
119 
126  void EstimateFmllrBasis(const AmDiagGmm &am_gmm,
127  const BasisFmllrAccus &basis_accus);
128 
134  void ComputeAmDiagPrecond(const AmDiagGmm &am_gmm,
135  SpMatrix<double> *pre_cond);
136 
137  int32 Dim() const { return dim_; }
138 
139  int32 BasisSize() const { return basis_size_; }
140 
150  double ComputeTransform(const AffineXformStats &spk_stats,
151  Matrix<BaseFloat> *out_xform,
152  Vector<BaseFloat> *coefficients,
153  BasisFmllrOptions options) const;
154 
155  private:
156 
159  std::vector< Matrix<BaseFloat> > fmllr_basis_;
164 };
165 
166 
167 } // namespace kaldi
168 
169 #endif // KALDI_TRANSFORM_BASIS_FMLLR_DIAG_GMM_H_
double beta_
Occupancy count.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Packed symetric matrix class.
Definition: matrix-common.h:62
kaldi::int32 int32
std::vector< Matrix< BaseFloat > > fmllr_basis_
Basis matrices.
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
Stats for fMLLR subspace estimation.
void Register(OptionsItf *opts)
int32 dim_
Feature dimension.
SpMatrix< BaseFloat > grad_scatter_
Gradient scatter. Dim is [(D+1)*D] [(D+1)*D].
int32 dim_
Feature dimension.
A class representing a vector.
Definition: kaldi-vector.h:406
int32 basis_size_
Number of bases D*(D+1)
Estimation functions for basis fMLLR.