transform-common.cc
Go to the documentation of this file.
1 // transform/transform-common.cc
2 
3 // Copyright 2009-2011 Saarland University; Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <vector>
21 
22 #include "base/kaldi-common.h"
24 
25 namespace kaldi {
26 
27 
28 void AffineXformStats::Init(int32 dim, int32 num_gs) {
29  if (dim == 0) {
30  if (num_gs != 0) {
31  KALDI_WARN << "Ignoring 'num_gs' (=" << num_gs << ") argument since "
32  << "dim = 0.";
33  }
34  beta_ = 0.0;
35  K_.Resize(0, 0);
36  G_.clear();
37  dim_ = 0;
38  } else {
39  beta_ = 0.0;
40  K_.Resize(dim, dim + 1, kSetZero);
41  G_.resize(num_gs);
42  for (int32 i = 0; i < num_gs; i++)
43  G_[i].Resize(dim + 1, kSetZero);
44  dim_ = dim;
45  }
46 }
47 
48 void AffineXformStats::Write(std::ostream &out, bool binary) const {
49  WriteToken(out, binary, "<DIMENSION>");
50  WriteBasicType(out, binary, dim_);
51  if (!binary) out << '\n';
52  WriteToken(out, binary, "<BETA>");
53  WriteBasicType(out, binary, beta_);
54  if (!binary) out << '\n';
55  WriteToken(out, binary, "<K>");
56  Matrix<BaseFloat> tmp_k(K_);
57  tmp_k.Write(out, binary);
58  WriteToken(out, binary, "<G>");
59  int32 g_size = static_cast<int32>(G_.size());
60  WriteBasicType(out, binary, g_size);
61  if (!binary) out << '\n';
62  for (std::vector< SpMatrix<double> >::const_iterator itr = G_.begin(),
63  end = G_.end(); itr != end; ++itr) {
64  SpMatrix<BaseFloat> tmp_g(*itr);
65  tmp_g.Write(out, binary);
66  }
67 }
68 
69 void AffineXformStats::Read(std::istream &in, bool binary, bool add) {
70  ExpectToken(in, binary, "<DIMENSION>");
71  ReadBasicType(in, binary, &dim_);
72  ExpectToken(in, binary, "<BETA>");
73  ReadBasicType(in, binary, &beta_);
74  ExpectToken(in, binary, "<K>");
75  Matrix<BaseFloat> tmp_k;
76  tmp_k.Read(in, binary);
77  K_.Resize(tmp_k.NumRows(), tmp_k.NumCols());
78  if (add) {
79  Matrix<double> tmp_k_d(tmp_k);
80  K_.AddMat(1.0, tmp_k_d, kNoTrans);
81  } else {
82  K_.CopyFromMat(tmp_k, kNoTrans);
83  }
84  ExpectToken(in, binary, "<G>");
85  int32 g_size;
86  ReadBasicType(in, binary, &g_size);
87  G_.resize(g_size);
88  SpMatrix<BaseFloat> tmp_g;
89  SpMatrix<double> tmp_g_d;
90  if (add) { tmp_g_d.Resize(tmp_g.NumRows()); }
91  for (size_t i = 0; i < G_.size(); i++) {
92  tmp_g.Read(in, binary, false /*no add*/);
93  G_[i].Resize(tmp_g.NumRows());
94  if (add) {
95  tmp_g_d.CopyFromSp(tmp_g);
96  G_[i].AddSp(1.0, tmp_g_d);
97  } else {
98  G_[i].CopyFromSp(tmp_g);
99  }
100  }
101 }
102 
103 
104 
106  beta_ = 0.0;
107  K_.SetZero();
108  for (std::vector< SpMatrix<double> >::iterator it = G_.begin(),
109  end = G_.end(); it != end; ++it) {
110  it->SetZero();
111  }
112 }
113 
115  KALDI_ASSERT(G_.size() == other.G_.size());
116  KALDI_ASSERT(dim_ == other.dim_);
117  beta_ = other.beta_;
118  K_.CopyFromMat(other.K_, kNoTrans);
119  for (size_t i = 0; i < G_.size(); i++)
120  G_[i].CopyFromSp(other.G_[i]);
121 }
122 
124  KALDI_ASSERT(G_.size() == other.G_.size());
125  KALDI_ASSERT(dim_ == other.dim_);
126  beta_ += other.beta_;
127  K_.AddMat(1.0, other.K_, kNoTrans);
128  for (size_t i = 0; i < G_.size(); i++)
129  G_[i].AddSp(1.0, other.G_[i]);
130 }
131 
133  bool b_is_affine,
134  Matrix<BaseFloat> *c) {
135  if (b.NumRows() == 0 || a.NumCols() == 0) {
136  KALDI_WARN << "Empty matrix in ComposeTransforms";
137  return false;
138  }
139  if (a.NumCols() == b.NumRows()) {
140  c->Resize(a.NumRows(), b.NumCols());
141  c->AddMatMat(1.0, a, kNoTrans, b, kNoTrans, 0.0); // c = a * b.
142  return true;
143  } else if (a.NumCols() == b.NumRows()+1) { // a is affine.
144  if (b_is_affine) { // append 0 0 0 0 ... 1 to b and multiply.
145  Matrix<BaseFloat> b_ext(b.NumRows()+1, b.NumCols());
146  SubMatrix<BaseFloat> b_part(b_ext, 0, b.NumRows(), 0, b.NumCols());
147  b_part.CopyFromMat(b);
148  b_ext(b.NumRows(), b.NumCols()-1) = 1.0; // so the last row is 0 0 0 0 ... 0 1
149  c->Resize(a.NumRows(), b.NumCols());
150  c->AddMatMat(1.0, a, kNoTrans, b_ext, kNoTrans, 0.0); // c = a * b_ext.
151  } else { // extend b by 1 row and column with all zeros except a 1 on diagonal.
152  Matrix<BaseFloat> b_ext(b.NumRows()+1, b.NumCols()+1);
153  SubMatrix<BaseFloat> b_part(b_ext, 0, b.NumRows(), 0, b.NumCols());
154  b_part.CopyFromMat(b);
155  b_ext(b.NumRows(), b.NumCols()) = 1.0; // so the last row is 0 0 0 0 ... 0 1;
156  // rest of last column is zero (this is the offset term)
157  c->Resize(a.NumRows(), b.NumCols()+1);
158  c->AddMatMat(1.0, a, kNoTrans, b_ext, kNoTrans, 0.0); // c = a * b_ext.
159  }
160  return true;
161  } else {
162  KALDI_ERR << "ComposeTransforms: mismatched dimensions, a has " << a.NumCols()
163  << " columns and b has " << b.NumRows() << " rows."; // this is fatal.
164  return false;
165  }
166 }
167 
169  VectorBase<BaseFloat> *vec) {
170  int32 dim = xform.NumRows();
171  KALDI_ASSERT(dim > 0 && xform.NumCols() == dim+1 && vec->Dim() == dim);
172  Vector<BaseFloat> tmp(dim+1);
173  SubVector<BaseFloat> tmp_part(tmp, 0, dim);
174  tmp_part.CopyFromVec(*vec);
175  tmp(dim) = 1.0;
176  // next line is: vec = 1.0 * xform * tmp + 0.0 * vec
177  vec->AddMatVec(1.0, xform, kNoTrans, tmp, 0.0);
178 }
179 
180 } // namespace kaldi
181 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void Write(std::ostream &out, bool binary) const
write to stream.
Matrix< double > K_
K_ is the summed outer product of [mean times inverse variance] with [extended data], scaled by the occupation counts; dimension is dim by (dim+1)
void Read(std::istream &in, bool binary, bool add=false)
void Write(std::ostream &out, bool binary) const
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
kaldi::int32 int32
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
MatrixIndexT NumRows() const
int32 dim_
dim_ is the feature dimension.
void CopyFromSp(const SpMatrix< Real > &other)
Definition: sp-matrix.h:85
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
void Write(std::ostream &out, bool binary) const
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void SetZero()
Sets matrix to zero.
void Add(const AffineXformStats &other)
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
std::vector< SpMatrix< double > > G_
G_ is the outer product of extended-data, scaled by inverse variance, for each dimension.
void Read(std::istream &in, bool binary, bool add)
void Init(int32 dim, int32 num_gs)
A class representing a vector.
Definition: kaldi-vector.h:406
bool ComposeTransforms(const Matrix< BaseFloat > &a, const Matrix< BaseFloat > &b, bool b_is_affine, Matrix< BaseFloat > *c)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void CopyStats(const AffineXformStats &other)
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Definition: sp-matrix.h:81
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void ApplyAffineTransform(const MatrixBase< BaseFloat > &xform, VectorBase< BaseFloat > *vec)
Applies the affine transform &#39;xform&#39; to the vector &#39;vec&#39; and overwrites the contents of &#39;vec&#39;...
double beta_
beta_ is the occupation count.
Sub-matrix representation.
Definition: kaldi-matrix.h:988
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501