compressed-transform-stats.cc
Go to the documentation of this file.
1 // transform/compressed-transform-stats.cc
2 
3 // Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <utility>
21 #include <vector>
22 using std::vector;
23 
25 
26 namespace kaldi {
27 
29  const AffineXformStats &input) {
30  int32 dim = input.Dim();
31  beta_ = input.beta_;
32  if (beta_ == 0.0) { // empty; no stats.
33  K_.Resize(dim, dim+1); // Will set to zero.
34  // This stores the dimension. Inefficient but this shouldn't happen often.
35  Matrix<float> empty;
36  G_.CopyFromMat(empty); // Sets G empty.
37  return;
38  }
39  KALDI_ASSERT(input.G_.size() == dim && input.K_.NumCols() == dim+1
40  && input.K_.NumRows() == dim && input.G_[0].NumRows() == dim+1);
41  // OK, we have valid, nonempty stats.
42  // We first slightly change the format of G.
43  Matrix<double> Gtmp(dim, 1 + (((dim+1)*(dim+2))/2));
44  // Gtmp will be compressed into G_. The first element of each
45  // row of Gtmp is the trace of the corresponding G[i], divided
46  // by (beta * dim). [this division is so we expect it to be
47  // approximately 1, to keep things in a good range so they
48  // can be more easily compressed.] The next (((dim+1)*(dim+2))/2))
49  // elements are the linearized form of the symmetric (d+1) by (d+1) matrix
50  // input.G_[i], normalized appropriately using that trace.
51 
52  Matrix<double> K_corrected(input.K_); // This K_corrected matrix is a version of the
53  // K_ matrix that we will correct to ensure that the derivative of the
54  // objective function around the default matrix stays the same after
55  // compression.
56 
57  SpMatrix<double> Gi_tmp(dim+1);
58  for (int32 i = 0; i < dim; i++) {
59  SubVector<double> this_row(Gtmp, i);
60  PrepareOneG(input.G_[i], beta_, &this_row);
61  ExtractOneG(this_row, beta_, &Gi_tmp);
62 
63  // At this stage we use the difference betwen Gi and Gi_tmp to
64  // make a correction to K_.
65  Vector<double> old_g_row(dim+1), new_g_row(dim+1);
66  old_g_row.CopyRowFromSp(input.G_[i], i); // i'th row of old G_i.
67  new_g_row.CopyRowFromSp(Gi_tmp, i); // i'th row of compressed+reconstructed G_i.
68  // The auxiliary function for the i'th row of the transform, v_i, is as follows
69  // [ignoring the determinant], where/ k_i is the i'th row of K:
70  // v_i . k_i - 0.5 v_i^T G_i u_i.
71  // Let u_i be the unit vector in the i'th dimension. This is the "default" value
72  // of v_i. The derivative of the auxf w.r.t. v_i, taken around this point, is:
73  // k_i - G_i u_i
74  // which is the same as k_i minus the i'th row (or column) of G_i
75  // we want the derivative to be unchanged after compression:
76  // new_ki - new_G_i u_i = old_ki - old_G_i u_i
77  // new_ki = old_ki - old_G_i u_i + new_G_i u_i.
78  // new_ki = old_ki - (i'th row of old G_i) + (i'th row of new G_i).
79 
80  SubVector<double> Ki(K_corrected, i);
81  Ki.AddVec(-1.0, old_g_row);
82  Ki.AddVec(+1.0, new_g_row);
83  }
84  K_.Resize(dim, dim+1);
85  K_.CopyFromMat(K_corrected);
86  G_.CopyFromMat(Gtmp);
87 }
88 
90  AffineXformStats *output) const {
91  int32 dim = K_.NumRows();
92  if (dim == 0) {
93  output->Init(0, 0);
94  return;
95  }
96  if (output->Dim() != dim || output->G_.size() != dim || beta_ == 0.0)
97  output->Init(dim, dim);
98  if (beta_ == 0.0) return; // Init() will have cleared it.
99  output->beta_ = beta_;
100  output->K_.CopyFromMat(K_);
101  Matrix<double> Gtmp(G_.NumRows(), G_.NumCols()); // CopyToMat no longer
102  // resizes, we have to provide correctly-sized matrix
103  G_.CopyToMat(&Gtmp);
104  for (int32 i = 0; i < dim; i++) {
105  SubVector<double> this_row(Gtmp, i);
106  ExtractOneG(this_row, beta_, &(output->G_[i]));
107  }
108 }
109 
110 void CompressedAffineXformStats::Write(std::ostream &os, bool binary) const {
111  WriteToken(os, binary, "<CompressedAffineXformStats>");
112  WriteBasicType(os, binary, beta_);
113  K_.Write(os, binary);
114  G_.Write(os, binary);
115  WriteToken(os, binary, "</CompressedAffineXformStats>");
116 }
117 
118 void CompressedAffineXformStats::Read(std::istream &is, bool binary) {
119  ExpectToken(is, binary, "<CompressedAffineXformStats>");
120  ReadBasicType(is, binary, &beta_);
121  K_.Read(is, binary);
122  G_.Read(is, binary);
123  ExpectToken(is, binary, "</CompressedAffineXformStats>");
124 }
125 
126 // Convert one G matrix into linearized, normalized form ready
127 // for compression. A static function.
129  double beta,
130  SubVector<double> *linearized) {
131  KALDI_ASSERT(beta != 0.0);
132  int32 dim = Gi.NumRows() - 1;
133  double raw_trace = Gi.Trace();
134  double norm_trace = (raw_trace / (beta * dim));
135  (*linearized)(0) = norm_trace; // should be around 1.
136  SubVector<double> linearized_matrix((*linearized), 1, ((dim+1)*(dim+2))/2);
137  TpMatrix<double> C(dim+1);
138  C.Cholesky(Gi); // Get the Cholesky factor: after we compress and uncompress
139  // this and re-create Gi, it's bound to be +ve semidefinite, which is a Good Thing.
140  C.Scale(sqrt(dim / raw_trace)); // This is the scaling that is equivalent
141  // to scaling Gi by dim / raw_trace, which would make the diagonals
142  // of Gi average to 1. We can reverse this when we decompress.
143  linearized_matrix.CopyFromPacked(C);
144 }
145 
146 // Reverse the process of PrepareOneG. A static function.
148  double beta,
149  SpMatrix<double> *Gi) {
150  int32 dim = Gi->NumRows() - 1;
151  KALDI_ASSERT(dim > 0);
152  double norm_trace = linearized(0);
153  double raw_trace = norm_trace * beta * dim;
154  TpMatrix<double> C(dim+1);
155  C.CopyFromVec(linearized.Range(1, ((dim+1)*(dim+2))/2));
156  Gi->AddTp2(raw_trace / dim, C, kNoTrans, 0.0);
157 }
158 
159 
160 
161 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void CopyFromVec(const SubVector< OtherReal > &orig)
CopyFromVec just interprets the vector as having the same layout as the packed matrix.
void Write(std::ostream &out, bool binary) const
write to stream.
Matrix< double > K_
K_ is the summed outer product of [mean times inverse variance] with [extended data], scaled by the occupation counts; dimension is dim by (dim+1)
void Scale(Real c)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
Real Trace() const
Definition: sp-matrix.cc:171
kaldi::int32 int32
void Write(std::ostream &os, bool binary) const
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
MatrixIndexT NumRows() const
void Read(std::istream &is, bool binary)
static void PrepareOneG(const SpMatrix< double > &Gi, double beta, SubVector< double > *linearized)
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void Cholesky(const SpMatrix< Real > &orig)
Definition: tp-matrix.cc:88
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
void AddTp2(const Real alpha, const TpMatrix< Real > &T, MatrixTransposeType transM, const Real beta=0.0)
The following function does: this <– beta*this + alpha * T * T^T.
Definition: sp-matrix.cc:1156
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
void CopyFromAffineXformStats(const AffineXformStats &input)
std::vector< SpMatrix< double > > G_
G_ is the outer product of extended-data, scaled by inverse variance, for each dimension.
void CopyToAffineXformStats(AffineXformStats *output) const
void Init(int32 dim, int32 num_gs)
MatrixIndexT NumRows() const
Returns number of rows (or zero for emtpy matrix).
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void Write(std::ostream &os, bool binary) const
static void ExtractOneG(const SubVector< double > &linearized, double beta, SpMatrix< double > *Gi)
MatrixIndexT NumCols() const
Returns number of columns (or zero for emtpy matrix).
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
void CopyToMat(MatrixBase< Real > *mat, MatrixTransposeType trans=kNoTrans) const
Copies contents to matrix.
double beta_
beta_ is the occupation count.
void Read(std::istream &is, bool binary)
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void CopyFromMat(const MatrixBase< Real > &mat, CompressionMethod method=kAutomaticMethod)
This will resize *this and copy the contents of mat to *this.
void CopyRowFromSp(const SpMatrix< OtherReal > &S, MatrixIndexT row)
Extracts a row of the symmetric matrix S.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94