lvtln.h
Go to the documentation of this file.
1 // transform/lvtln.h
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #ifndef KALDI_TRANSFORM_LVTLN_H_
22 #define KALDI_TRANSFORM_LVTLN_H_
23 
24 #include <vector>
25 
26 #include "base/kaldi-common.h"
27 #include "util/common-utils.h"
30 
31 
32 namespace kaldi {
33 
34 /*
35  Class for applying linear approximations to VTLN transforms;
36  see \ref transform_lvtln.
37 */
38 
39 
40 class LinearVtln {
41  public:
42  LinearVtln() { } // This initializer will probably be used prior to calling
43  // Read().
44 
45  LinearVtln(int32 dim, int32 num_classes, int32 default_class);
46  // This initializer sets up the
47  // model; the transforms will initially all be the same.
48 
49  // SetTransform is used when we initialize it as "normal" VTLN.
50  // It's not necessary to ever call this function. "transform" is "A",
51  // the square part of the transform matrix.
52  void SetTransform(int32 i, const MatrixBase<BaseFloat> &transform);
53 
54  void SetWarp(int32 i, BaseFloat warp);
55 
56  BaseFloat GetWarp(int32 i) const;
57 
58  // GetTransform gets the transform for class i. The caller must
59  // make sure the output matrix is sized Dim() by Dim().
60  void GetTransform(int32 i, MatrixBase<BaseFloat> *transform) const;
61 
62 
64  void ComputeTransform(const FmllrDiagGmmAccs &accs,
65  std::string norm_type, // type of regular fMLLR computation: "none", "offset", "diag"
66  BaseFloat logdet_scale, // scale on logdet (1.0 is "correct" but less may work better)
67  MatrixBase<BaseFloat> *Ws, // output fMLLR transform, should be size dim x dim+1
68  int32 *class_idx, // the transform that was chosen...
69  BaseFloat *logdet_out,
70  BaseFloat *objf_impr = NULL, // versus no transform
71  BaseFloat *count = NULL);
72 
73  void Read(std::istream &is, bool binary);
74 
75  void Write(std::ostream &os, bool binary) const;
76 
77  int32 Dim() const { KALDI_ASSERT(!A_.empty()); return A_[0].NumRows(); }
78  int32 NumClasses() const { return A_.size(); }
79  // This computes the offset term for this class given these
80  // stats.
81  void GetOffset(const FmllrDiagGmmAccs &speaker_stats,
82  int32 class_idx,
83  VectorBase<BaseFloat> *offset) const;
84 
85  friend class LinearVtlnStats;
86  protected:
87  int32 default_class_; // transform we return if we have no data.
88  std::vector<Matrix<BaseFloat> > A_; // Square parts of the FMLLR matrices.
89  std::vector<BaseFloat> logdets_;
90  std::vector<BaseFloat> warps_; // This variable can be used to store the
91  // warp factors that each transform correspond to.
92 
93 
94 };
95 
96 
97 
98 } // namespace kaldi
99 
100 #endif // KALDI_TRANSFORM_LVTLN_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
BaseFloat GetWarp(int32 i) const
Definition: lvtln.cc:180
friend class LinearVtlnStats
Definition: lvtln.h:85
void SetTransform(int32 i, const MatrixBase< BaseFloat > &transform)
Definition: lvtln.cc:166
std::vector< Matrix< BaseFloat > > A_
Definition: lvtln.h:88
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
int32 default_class_
Definition: lvtln.h:87
This does not work with multiple feature transforms.
kaldi::int32 int32
int32 NumClasses() const
Definition: lvtln.h:78
int32 Dim() const
Definition: lvtln.h:77
const size_t count
void GetTransform(int32 i, MatrixBase< BaseFloat > *transform) const
Definition: lvtln.cc:185
void Read(std::istream &is, bool binary)
Definition: lvtln.cc:45
void ComputeTransform(const FmllrDiagGmmAccs &accs, std::string norm_type, BaseFloat logdet_scale, MatrixBase< BaseFloat > *Ws, int32 *class_idx, BaseFloat *logdet_out, BaseFloat *objf_impr=NULL, BaseFloat *count=NULL)
Compute the transform for the speaker.
Definition: lvtln.cc:97
void GetOffset(const FmllrDiagGmmAccs &speaker_stats, int32 class_idx, VectorBase< BaseFloat > *offset) const
void SetWarp(int32 i, BaseFloat warp)
Definition: lvtln.cc:174
std::vector< BaseFloat > warps_
Definition: lvtln.h:90
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void Write(std::ostream &os, bool binary) const
Definition: lvtln.cc:74
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
std::vector< BaseFloat > logdets_
Definition: lvtln.h:89