packed-matrix.cc
Go to the documentation of this file.
1 // matrix/packed-matrix.cc
2 
3 // Copyright 2009-2012 Microsoft Corporation Saarland University
4 // Johns Hopkins University (Author: Daniel Povey);
5 // Haihua Xu
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 
13 // http://www.apache.org/licenses/LICENSE-2.0
14 
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
26 #include "matrix/cblas-wrappers.h"
27 #include "matrix/packed-matrix.h"
28 #include "matrix/kaldi-vector.h"
29 
30 namespace kaldi {
31 
32 template<typename Real>
33 void PackedMatrix<Real>::Scale(Real alpha) {
34  size_t nr = num_rows_,
35  sz = (nr * (nr + 1)) / 2;
36  cblas_Xscal(sz, alpha, data_, 1);
37 }
38 
39 template<typename Real>
40 void PackedMatrix<Real>::AddPacked(const Real alpha, const PackedMatrix<Real> &rMa) {
41  KALDI_ASSERT(num_rows_ == rMa.NumRows());
42  size_t nr = num_rows_,
43  sz = (nr * (nr + 1)) / 2;
44  cblas_Xaxpy(sz, alpha, rMa.Data(), 1, data_, 1);
45 }
46 
47 template<typename Real>
49  Real *data = data_;
50  size_t dim = num_rows_, size = ((dim*(dim+1))/2);
51  for (size_t i = 0; i < size; i++)
52  data[i] = RandGauss();
53 }
54 
55 template<typename Real>
57  if (r == 0) {
58  num_rows_ = 0;
59  data_ = 0;
60  return;
61  }
62  size_t size = ((static_cast<size_t>(r) * static_cast<size_t>(r + 1)) / 2);
63 
64  if (static_cast<size_t>(static_cast<MatrixIndexT>(size)) != size) {
65  KALDI_WARN << "Allocating packed matrix whose full dimension does not fit "
66  << "in MatrixIndexT: not all code is tested for this case.";
67  }
68 
69  void *data; // aligned memory block
70  void *temp;
71 
72  if ((data = KALDI_MEMALIGN(16, size * sizeof(Real), &temp)) != NULL) {
73  this->data_ = static_cast<Real *> (data);
74  this->num_rows_ = r;
75  } else {
76  throw std::bad_alloc();
77  }
78 }
79 
80 template<typename Real>
82  std::swap(data_, other->data_);
83  std::swap(num_rows_, other->num_rows_);
84 }
85 
86 template<typename Real>
88  std::swap(data_, other->data_);
89  std::swap(num_rows_, other->num_rows_);
90 }
91 
92 
93 template<typename Real>
95  // the next block uses recursion to handle what we have to do if
96  // resize_type == kCopyData.
97  if (resize_type == kCopyData) {
98  if (this->data_ == NULL || r == 0) resize_type = kSetZero; // nothing to copy.
99  else if (this->num_rows_ == r) { return; } // nothing to do.
100  else {
101  // set tmp to a packed matrix of the desired size.
103  size_t r_min = std::min(r, num_rows_);
104  size_t mem_size_min = sizeof(Real) * (r_min*(r_min+1))/2,
105  mem_size_full = sizeof(Real) * (r*(r+1))/2;
106  // Copy the contents to tmp.
107  memcpy(tmp.data_, data_, mem_size_min);
108  char *ptr = static_cast<char*>(static_cast<void*>(tmp.data_));
109  // Set the rest of the contents of tmp to zero.
110  memset(static_cast<void*>(ptr + mem_size_min), 0, mem_size_full-mem_size_min);
111  tmp.Swap(this);
112  return;
113  }
114  }
115  if (data_ != NULL) Destroy();
116  Init(r);
117  if (resize_type == kSetZero) SetZero();
118 }
119 
120 
121 
122 template<typename Real>
124  Real *ptr = data_;
125  for (MatrixIndexT i = 2; i <= num_rows_+1; i++) {
126  *ptr += r;
127  ptr += i;
128  }
129 }
130 
131 template<typename Real>
133  Real *ptr = data_;
134  for (MatrixIndexT i = 2; i <= num_rows_+1; i++) {
135  *ptr *= alpha;
136  ptr += i;
137  }
138 }
139 
140 template<typename Real>
141 void PackedMatrix<Real>::SetDiag(Real alpha) {
142  Real *ptr = data_;
143  for (MatrixIndexT i = 2; i <= num_rows_+1; i++) {
144  *ptr = alpha;
145  ptr += i;
146  }
147 }
148 
149 
150 
151 template<typename Real>
152 template<typename OtherReal>
154  KALDI_ASSERT(NumRows() == orig.NumRows());
155  if (sizeof(Real) == sizeof(OtherReal)) {
156  memcpy(data_, orig.Data(), SizeInBytes());
157  } else {
158  Real *dst = data_;
159  const OtherReal *src = orig.Data();
160  size_t nr = NumRows(),
161  size = (nr * (nr + 1)) / 2;
162  for (size_t i = 0; i < size; i++, dst++, src++)
163  *dst = *src;
164  }
165 }
166 
167 // template instantiations.
168 template
170 template
172 template
174 template
176 
177 
178 
179 template<typename Real>
180 template<typename OtherReal>
182  MatrixIndexT size = (NumRows()*(NumRows()+1)) / 2;
183  KALDI_ASSERT(vec.Dim() == size);
184  if (sizeof(Real) == sizeof(OtherReal)) {
185  memcpy(data_, vec.Data(), size * sizeof(Real));
186  } else {
187  Real *dst = data_;
188  const OtherReal *src = vec.Data();
189  for (MatrixIndexT i = 0; i < size; i++, dst++, src++)
190  *dst = *src;
191  }
192 }
193 
194 // template instantiations.
195 template
197 template
199 template
201 template
203 
204 
205 
206 template<typename Real>
208  memset(data_, 0, SizeInBytes());
209 }
210 
211 template<typename Real>
213  memset(data_, 0, SizeInBytes());
214  for (MatrixIndexT row = 0;row < num_rows_;row++)
215  (*this)(row, row) = 1.0;
216 }
217 
218 template<typename Real>
220  Real ans = 0.0;
221  for (MatrixIndexT row = 0;row < num_rows_;row++)
222  ans += (*this)(row, row);
223  return ans;
224 }
225 
226 template<typename Real>
228  // we need to free the data block if it was defined
229  if (data_ != NULL) KALDI_MEMALIGN_FREE(data_);
230  data_ = NULL;
231  num_rows_ = 0;
232 }
233 
234 
235 template<typename Real>
236 void PackedMatrix<Real>::Write(std::ostream &os, bool binary) const {
237  if (!os.good()) {
238  KALDI_ERR << "Failed to write vector to stream: stream not good";
239  }
240 
241  int32 size = this->NumRows(); // make the size 32-bit on disk.
242  KALDI_ASSERT(this->NumRows() == (MatrixIndexT) size);
243  MatrixIndexT num_elems = ((size+1)*(MatrixIndexT)size)/2;
244 
245  if(binary) {
246  std::string my_token = (sizeof(Real) == 4 ? "FP" : "DP");
247  WriteToken(os, binary, my_token);
248  WriteBasicType(os, binary, size);
249  // We don't use the built-in Kaldi write routines for the floats, as they are
250  // not efficient enough.
251  os.write((const char*) data_, sizeof(Real) * num_elems);
252  }
253  else {
254  if(size == 0)
255  os<<"[ ]\n";
256  else {
257  os<<"[\n";
258  MatrixIndexT i = 0;
259  for (int32 j = 0; j < size; j++) {
260  for (int32 k = 0; k < j + 1; k++) {
261  WriteBasicType(os, binary, data_[i++]);
262  }
263  os << ( (j==size-1)? "]\n" : "\n");
264  }
265  KALDI_ASSERT(i == num_elems);
266  }
267  }
268  if (os.fail()) {
269  KALDI_ERR << "Failed to write packed matrix to stream";
270  }
271 }
272 
273 // template<typename Real>
274 // void Save (std::ostream & os, const PackedMatrix<Real>& rM)
275 // {
276 // const Real* p_elem = rM.data();
277 // for (MatrixIndexT i = 0; i < rM.NumRows(); i++) {
278 // for (MatrixIndexT j = 0; j <= i ; j++) {
279 // os << *p_elem;
280 // p_elem++;
281 // if (j == i) {
282 // os << '\n';
283 // }
284 // else {
285 // os << ' ';
286 // }
287 // }
288 // }
289 // if (os.fail())
290 // KALDI_ERR("Failed to write packed matrix to stream");
291 // }
292 
293 
294 
295 
296 
297 template<typename Real>
298 void PackedMatrix<Real>::Read(std::istream& is, bool binary, bool add) {
299  if (add) {
300  PackedMatrix<Real> tmp;
301  tmp.Read(is, binary, false); // read without adding.
302  if (this->NumRows() == 0) this->Resize(tmp.NumRows());
303  else {
304  if (this->NumRows() != tmp.NumRows()) {
305  if (tmp.NumRows() == 0) return; // do nothing in this case.
306  else KALDI_ERR << "PackedMatrix::Read, size mismatch " << this->NumRows()
307  << " vs. " << tmp.NumRows();
308  }
309  }
310  this->AddPacked(1.0, tmp);
311  return;
312  } // now assume add == false.
313 
314  std::ostringstream specific_error;
315  MatrixIndexT pos_at_start = is.tellg();
316  int peekval = Peek(is, binary);
317  const char *my_token = (sizeof(Real) == 4 ? "FP" : "DP");
318  const char *new_format_token = "[";
319  bool is_new_format = false;//added by hxu
320  char other_token_start = (sizeof(Real) == 4 ? 'D' : 'F');
321  int32 size;
322  MatrixIndexT num_elems;
323 
324  if (peekval == other_token_start) { // need to instantiate the other type to read it.
325  typedef typename OtherReal<Real>::Real OtherType; // if Real == float, OtherType == double, and vice versa.
326  PackedMatrix<OtherType> other(this->NumRows());
327  other.Read(is, binary, false); // add is false at this point.
328  this->Resize(other.NumRows());
329  this->CopyFromPacked(other);
330  return;
331  }
332  std::string token;
333  ReadToken(is, binary, &token);
334  if (token != my_token) {
335  if(token != new_format_token) {
336  specific_error << ": Expected token " << my_token << ", got " << token;
337  goto bad;
338  }
339  //new format it is
340  is_new_format = true;
341  }
342  if(!is_new_format) {
343  ReadBasicType(is, binary, &size); // throws on error.
344  if ((MatrixIndexT)size != this->NumRows()) {
345  KALDI_ASSERT(size>=0);
346  this->Resize(size);
347  }
348  num_elems = ((size+1)*(MatrixIndexT)size)/2;
349  if (!binary) {
350  for (MatrixIndexT i = 0; i < num_elems; i++) {
351  ReadBasicType(is, false, data_+i); // will throw on error.
352  }
353  } else {
354  if (num_elems)
355  is.read(reinterpret_cast<char*>(data_), sizeof(Real)*num_elems);
356  }
357  if (is.fail()) goto bad;
358  return;
359  }
360  else {
361  std::vector<Real> data;
362  while(1) {
363  int32 num_lines = 0;
364  int i = is.peek();
365  if (i == -1) { specific_error << "Got EOF while reading matrix data"; goto bad; }
366  else if (static_cast<char>(i) == ']') { // Finished reading matrix.
367  is.get(); // eat the "]".
368  i = is.peek();
369  if (static_cast<char>(i) == '\r') {
370  is.get();
371  is.get(); // get \r\n (must eat what we wrote)
372  }// I don't actually understand what it's doing here
373  else if (static_cast<char>(i) == '\n') { is.get(); } // get \n (must eat what we wrote)
374 
375  if (is.fail()) {
376  KALDI_WARN << "After end of matrix data, read error.";
377  // we got the data we needed, so just warn for this error.
378  }
379  //now process the data:
380  num_lines = int32(sqrt(data.size()*2));
381 
382  KALDI_ASSERT(data.size() == num_lines*(num_lines+1)/2);
383 
384  this->Resize(num_lines);
385 
386  //std::cout<<data.size()<<' '<<num_lines<<'\n';
387 
388  for(int32 i = 0; i < data.size(); i++) {
389  data_[i] = data[i];
390  }
391  return;
392  //std::cout<<"here!!!!!hxu!!!!!"<<std::endl;
393  }
394  else if ( (i >= '0' && i <= '9') || i == '-' ) { // A number...
395  Real r;
396  is >> r;
397  if (is.fail()) {
398  specific_error << "Stream failure/EOF while reading matrix data.";
399  goto bad;
400  }
401  data.push_back(r);
402  }
403  else if (isspace(i)) {
404  is.get(); // eat the space and do nothing.
405  } else { // NaN or inf or error.
406  std::string str;
407  is >> str;
408  if (!KALDI_STRCASECMP(str.c_str(), "inf") ||
409  !KALDI_STRCASECMP(str.c_str(), "infinity")) {
410  data.push_back(std::numeric_limits<Real>::infinity());
411  KALDI_WARN << "Reading infinite value into matrix.";
412  } else if (!KALDI_STRCASECMP(str.c_str(), "nan")) {
413  data.push_back(std::numeric_limits<Real>::quiet_NaN());
414  KALDI_WARN << "Reading NaN value into matrix.";
415  } else {
416  specific_error << "Expecting numeric matrix data, got " << str;
417  goto bad;
418  }
419  }
420  }
421  }
422 bad:
423  KALDI_ERR << "Failed to read packed matrix from stream. " << specific_error.str()
424  << " File position at start is "
425  << pos_at_start << ", currently " << is.tellg();
426 }
427 
428 
429 // Instantiate PackedMatrix for float and double.
430 template
431 class PackedMatrix<float>;
432 
433 template
435 
436 
437 } // namespace kaldi
438 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void CopyFromVec(const SubVector< OtherReal > &orig)
CopyFromVec just interprets the vector as having the same layout as the packed matrix.
This class provides a way for switching between double and float types.
Definition: matrix-common.h:84
MatrixResizeType
Definition: matrix-common.h:37
Real Trace() const
< Set to random values of a normal distribution
void Scale(Real c)
void Read(std::istream &in, bool binary, bool add=false)
void Write(std::ostream &out, bool binary) const
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
Real * data_
data memory area
Definition: kaldi-matrix.h:808
void AddPacked(const Real alpha, const PackedMatrix< Real > &M)
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
float RandGauss(struct RandomState *state=NULL)
Definition: kaldi-math.h:155
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
A class for storing matrices.
Definition: kaldi-matrix.h:823
void ScaleDiag(const Real alpha)
void SetUnit()
< Set to zero
MatrixIndexT NumRows() const
int Peek(std::istream &is, bool binary)
Peek consumes whitespace (if binary == false) and then returns the peek() value of the stream...
Definition: io-funcs.cc:145
uint64 data_
MatrixIndexT num_rows_
void SetRandn()
< Set to unit matrix.
void AddToDiag(const Real r)
int32 MatrixIndexT
Definition: matrix-common.h:98
Packed matrix: base class for triangular and symmetric matrices.
Definition: matrix-common.h:64
void cblas_Xscal(const int N, const float alpha, float *data, const int inc)
MatrixIndexT num_rows_
< Number of columns
Definition: kaldi-matrix.h:813
#define KALDI_MEMALIGN(align, size, pp_orig)
Definition: kaldi-utils.h:58
void Swap(PackedMatrix< Real > *other)
Swaps the contents of *this and *other. Shallow swap.
void SetDiag(const Real alpha)
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_MEMALIGN_FREE(x)
Definition: kaldi-utils.h:60
#define KALDI_WARN
Definition: kaldi-error.h:150
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void Init(MatrixIndexT dim)
Init assumes the current contents of the class are is invalid (i.e.
void CopyFromPacked(const PackedMatrix< OtherReal > &orig)
void cblas_Xaxpy(const int N, const float alpha, const float *X, const int incX, float *Y, const int incY)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_STRCASECMP
Definition: kaldi-utils.h:147
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Set packed matrix to a specified size (can be zero).