nnet-kl-hmm.h
Go to the documentation of this file.
1 // nnet/nnet-kl-hmm.h
2 
3 // Copyright 2013 Idiap Research Institute (Author: David Imseng)
4 // Karlsruhe Institute of Technology (Author: Ngoc Thang Vu)
5 // Brno University of Technology (Author: Karel Vesely)
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 
23 #ifndef KALDI_NNET_NNET_KL_HMM_H_
24 #define KALDI_NNET_NNET_KL_HMM_H_
25 
26 #include <vector>
27 
28 #include "nnet/nnet-component.h"
29 #include "cudamatrix/cu-math.h"
30 #include "cudamatrix/cu-rand.h"
31 #include "matrix/kaldi-vector.h"
32 #include "matrix/kaldi-matrix.h"
33 
34 namespace kaldi {
35 namespace nnet1 {
36 
37 class KlHmm : public Component {
38  public:
39  KlHmm(int32 dim_in, int32 dim_out):
40  Component(dim_in, dim_out),
41  kl_stats_(dim_out, dim_in, kSetZero)
42  { }
43 
45  { }
46 
47  Component* Copy() const { return new KlHmm(*this); }
48  ComponentType GetType() const { return kKlHmm; }
49 
52  if (kl_inv_q_.NumRows() == 0) {
53  // Copy the CudaMatrix to a Matrix
54  Matrix<BaseFloat> in_tmp(in.NumRows(), in.NumCols());
55  in.CopyToMat(&in_tmp);
56  // Check if there are posteriors in the Matrix (check on first row),
57  BaseFloat post_sum = in_tmp.Row(0).Sum();
58  KALDI_ASSERT(ApproxEqual(post_sum, 1.0));
59  // Get a tmp Matrix of the stats
60  Matrix<BaseFloat> kl_stats_tmp(kl_stats_);
61  // Init a vector to get the sum of the rows (for normalization)
63  // Get the sum of the posteriors for normalization
64  row_sum.AddColSumMat(1, kl_stats_tmp);
65  // Apply floor to make sure there is no zero
66  row_sum.ApplyFloor(1e-20);
67  // Invert the sum (to normalize)
68  row_sum.InvertElements();
69  // Normalizing the statistics vector
70  kl_stats_tmp.MulRowsVec(row_sum);
71  // Apply floor before inversion and logarithm
72  kl_stats_tmp.ApplyFloor(1e-20);
73  // Apply invesion
74  kl_stats_tmp.InvertElements();
75  // Apply logarithm
76  kl_stats_tmp.ApplyLog();
77  // Inverted and logged values
79  // Holds now log (1/Q)
80  kl_inv_q_.CopyFromMat(kl_stats_tmp);
81  }
82  // Get the logarithm of the features for the Entropy calculation
83  // Copy the CudaMatrix to a Matrix
84  Matrix<BaseFloat> in_log_tmp(in.NumRows(), in.NumCols());
85  in.CopyToMat(&in_log_tmp);
86  // Flooring and log
87  in_log_tmp.ApplyFloor(1e-20);
88  in_log_tmp.ApplyLog();
89  CuMatrix<BaseFloat> log_in(in.NumRows(), in.NumCols());
90  log_in.CopyFromMat(in_log_tmp);
91  // P*logP
92  CuMatrix<BaseFloat> tmp_entropy(in);
93  tmp_entropy.MulElements(log_in);
94  // Getting the entropy (sum P*logP)
95  CuVector<BaseFloat> in_entropy(in.NumRows(), kSetZero);
96  in_entropy.AddColSumMat(1, tmp_entropy);
97  // sum P*log (1/Q)
98  out->AddMatMat(1, in, kNoTrans, kl_inv_q_, kTrans, 0);
99  // (sum P*logP) + (sum P*log(1/Q)
100  out->AddVecToCols(1, in_entropy);
101  // return the negative KL-divergence
102  out->Scale(-1);
103  }
104 
106  const CuMatrixBase<BaseFloat> &out,
107  const CuMatrixBase<BaseFloat> &out_diff,
108  CuMatrixBase<BaseFloat> *in_diff) {
109  KALDI_ERR << "Unimplemented";
110  }
111 
113  void ReadData(std::istream &is, bool binary) {
114  kl_stats_.Read(is, binary);
117  }
118 
120  void WriteData(std::ostream &os, bool binary) const {
121  kl_stats_.Write(os, binary);
122  }
123 
125  void SetStats(const Matrix<BaseFloat> mat) {
126  KALDI_ASSERT(mat.NumRows() == output_dim_);
127  KALDI_ASSERT(mat.NumCols() == input_dim_);
128  kl_stats_.Resize(mat.NumRows(), mat.NumCols());
129  kl_stats_.CopyFromMat(mat);
130  }
131 
133  void Accumulate(const Matrix<BaseFloat> &posteriors,
134  const std::vector<int32> &alignment) {
135  KALDI_ASSERT(posteriors.NumRows() == alignment.size());
136  KALDI_ASSERT(posteriors.NumCols() == kl_stats_.NumCols());
137  int32 num_frames = alignment.size();
138  for (int32 i = 0; i < num_frames; i++) {
139  // Casting float posterior to double (fixing numerical issue),
140  Vector<double> temp(posteriors.Row(i));
141  // Sum the postiors grouped by states from the alignment,
142  kl_stats_.Row(alignment[i]).AddVec(1, temp);
143  }
144  }
145 
146  private:
149 };
150 
151 } // namespace nnet1
152 } // namespace kaldi
153 
154 #endif // KALDI_NNET_NNET_KL_HMM_H_
155 
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
Definition: nnet-kl-hmm.h:50
void Write(std::ostream &out, bool binary) const
write to stream.
void SetStats(const Matrix< BaseFloat > mat)
Set the statistics matrix.
Definition: nnet-kl-hmm.h:125
int32 input_dim_
Data members,.
void CopyToMat(MatrixBase< OtherReal > *dst, MatrixTransposeType trans=kNoTrans) const
Definition: cu-matrix.cc:447
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
CuMatrix< BaseFloat > kl_inv_q_
Definition: nnet-kl-hmm.h:148
void AddVecToCols(Real alpha, const CuVectorBase< Real > &col, Real beta=1.0)
(for each column c of *this), c = alpha * col + beta * c
Definition: cu-matrix.cc:1232
kaldi::int32 int32
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
ComponentType GetType() const
Get Type Identification of the component,.
Definition: nnet-kl-hmm.h:48
ComponentType
Component type identification mechanism,.
void ReadData(std::istream &is, bool binary)
Reads the component content.
Definition: nnet-kl-hmm.h:113
void Scale(Real value)
Definition: cu-matrix.cc:644
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void AddColSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the columns of the matrix, add to vector.
Definition: cu-vector.cc:1298
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
Definition: cu-matrix.cc:667
#define KALDI_ERR
Definition: kaldi-error.h:147
void Accumulate(const Matrix< BaseFloat > &posteriors, const std::vector< int32 > &alignment)
Accumulate the statistics for KL-HMM paramter estimation,.
Definition: nnet-kl-hmm.h:133
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
Definition: cu-matrix.cc:1291
int32 output_dim_
Dimension of the output of the Component,.
void MulRowsVec(const VectorBase< Real > &scale)
Equivalent to (*this) = diag(scale) * (*this).
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
void InvertElements()
Inverts all the elements of the matrix.
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
Definition: nnet-kl-hmm.h:105
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Abstract class, building block of the network.
KlHmm(int32 dim_in, int32 dim_out)
Definition: nnet-kl-hmm.h:39
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
void AddColSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of columns of M) + beta * *this.
void ApplyFloor(Real floor_val)
Definition: kaldi-matrix.h:354
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
Definition: nnet-kl-hmm.h:120
Matrix< double > kl_stats_
Definition: nnet-kl-hmm.h:147
Component * Copy() const
Copy component (deep copy),.
Definition: nnet-kl-hmm.h:47