doc/lvtln_8h_source.html

 // transform/lvtln.h

 // Copyright 2009-2011 Microsoft Corporation

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #ifndef KALDI_TRANSFORM_LVTLN_H_
 #define KALDI_TRANSFORM_LVTLN_H_

 #include <vector>

 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
 #include "transform/transform-common.h"
 #include "transform/fmllr-diag-gmm.h"


 namespace kaldi {

 /*
   Class for applying linear approximations to VTLN transforms;
   see \ref transform_lvtln.
 */


 class LinearVtln {
  public:
   LinearVtln() { } // This initializer will probably be used prior to calling
   // Read().

   LinearVtln(int32 dim, int32 num_classes, int32 default_class);
   // This initializer sets up the
   // model; the transforms will initially all be the same.

   // SetTransform is used when we initialize it as "normal" VTLN.
   // It's not necessary to ever call this function.  "transform" is "A",
   // the square part of the transform matrix.
   void SetTransform(int32 i, const MatrixBase<BaseFloat> &transform);

   void SetWarp(int32 i, BaseFloat warp);

   BaseFloat GetWarp(int32 i) const;

   // GetTransform gets the transform for class i.  The caller must
   // make sure the output matrix is sized Dim() by Dim().
   void GetTransform(int32 i, MatrixBase<BaseFloat> *transform) const;


   void ComputeTransform(const FmllrDiagGmmAccs &accs,
                         std::string norm_type,  // type of regular fMLLR computation: "none", "offset", "diag"
                         BaseFloat logdet_scale,  // scale on logdet (1.0 is "correct" but less may work better)
                         MatrixBase<BaseFloat> *Ws,  // output fMLLR transform, should be size dim x dim+1
                         int32 *class_idx,  // the transform that was chosen...
                         BaseFloat *logdet_out,
                         BaseFloat *objf_impr = NULL,  // versus no transform
                         BaseFloat *count = NULL);

   void Read(std::istream &is, bool binary);

   void Write(std::ostream &os, bool binary) const;

   int32 Dim() const { KALDI_ASSERT(!A_.empty()); return A_[0].NumRows(); }
   int32 NumClasses() const { return A_.size(); }
   // This computes the offset term for this class given these
   // stats.
   void GetOffset(const FmllrDiagGmmAccs &speaker_stats,
                  int32 class_idx,
                  VectorBase<BaseFloat> *offset) const;

   friend class LinearVtlnStats;
  protected:
   int32 default_class_;  // transform we return if we have no data.
   std::vector<Matrix<BaseFloat> > A_;  // Square parts of the FMLLR matrices.
   std::vector<BaseFloat> logdets_;
   std::vector<BaseFloat> warps_; // This variable can be used to store the
                                  // warp factors that each transform correspond to.


 };


 }  // namespace kaldi

 #endif  // KALDI_TRANSFORM_LVTLN_H_
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::LinearVtln::GetWarp
BaseFloat GetWarp(int32 i) const
Definition: lvtln.cc:180

kaldi::LinearVtln::LinearVtlnStats
friend class LinearVtlnStats
Definition: lvtln.h:85

kaldi::LinearVtln::LinearVtln
LinearVtln()
Definition: lvtln.h:42

kaldi::LinearVtln::SetTransform
void SetTransform(int32 i, const MatrixBase< BaseFloat > &transform)
Definition: lvtln.cc:166

kaldi::LinearVtln::A_
std::vector< Matrix< BaseFloat > > A_
Definition: lvtln.h:88

kaldi::MatrixBase
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49

kaldi::LinearVtln::default_class_
int32 default_class_
Definition: lvtln.h:87

kaldi::FmllrDiagGmmAccs
This does not work with multiple feature transforms.
Definition: fmllr-diag-gmm.h:61

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

common-utils.h

kaldi::LinearVtln
Definition: lvtln.h:40

kaldi::LinearVtln::NumClasses
int32 NumClasses() const
Definition: lvtln.h:78

kaldi::LinearVtln::Dim
int32 Dim() const
Definition: lvtln.h:77

count
const size_t count
Definition: arpa-file-parser-test.cc:66

kaldi::LinearVtln::GetTransform
void GetTransform(int32 i, MatrixBase< BaseFloat > *transform) const
Definition: lvtln.cc:185

float

kaldi::LinearVtln::Read
void Read(std::istream &is, bool binary)
Definition: lvtln.cc:45

kaldi::LinearVtln::ComputeTransform
void ComputeTransform(const FmllrDiagGmmAccs &accs, std::string norm_type, BaseFloat logdet_scale, MatrixBase< BaseFloat > *Ws, int32 *class_idx, BaseFloat *logdet_out, BaseFloat *objf_impr=NULL, BaseFloat *count=NULL)
Compute the transform for the speaker.
Definition: lvtln.cc:97

kaldi::LinearVtln::GetOffset
void GetOffset(const FmllrDiagGmmAccs &speaker_stats, int32 class_idx, VectorBase< BaseFloat > *offset) const

kaldi::LinearVtln::SetWarp
void SetWarp(int32 i, BaseFloat warp)
Definition: lvtln.cc:174

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::LinearVtln::warps_
std::vector< BaseFloat > warps_
Definition: lvtln.h:90

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::LinearVtln::Write
void Write(std::ostream &os, bool binary) const
Definition: lvtln.cc:74

transform-common.h

fmllr-diag-gmm.h

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi-common.h

kaldi::LinearVtln::logdets_
std::vector< BaseFloat > logdets_
Definition: lvtln.h:89