compressed-matrix.h
Go to the documentation of this file.
1 // matrix/compressed-matrix.h
2 
3 // Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 // Frantisek Skala, Wei Shi
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #ifndef KALDI_MATRIX_COMPRESSED_MATRIX_H_
22 #define KALDI_MATRIX_COMPRESSED_MATRIX_H_ 1
23 
24 #include "matrix/kaldi-matrix.h"
25 
26 namespace kaldi {
27 
30 
31 
32 
33 /*
34  The enum CompressionMethod is used when creating a CompressedMatrix (a lossily
35  compressed matrix) from a regular Matrix. It dictates how we choose the
36  compressed format and how we choose the ranges of floats that are represented
37  by particular integers.
38 
39  kAutomaticMethod = 1 This is the default when you don't specify the
40  compression method. It is a shorthand for using
41  kSpeechFeature if the num-rows is more than 8, and
42  kTwoByteAuto otherwise.
43  kSpeechFeature = 2 This is the most complicated of the compression methods,
44  and was designed for speech features which have a roughly
45  Gaussian distribution with different ranges for each
46  dimension. Each element is stored in one byte, but there
47  is an 8-byte header per column; the spacing of the
48  integer values is not uniform but is in 3 ranges.
49  kTwoByteAuto = 3 Each element is stored in two bytes as a uint16, with
50  the representable range of values chosen automatically
51  with the minimum and maximum elements of the matrix as
52  its edges.
53  kTwoByteSignedInteger = 4
54  Each element is stored in two bytes as a uint16, with
55  the representable range of value chosen to coincide with
56  what you'd get if you stored signed integers, i.e.
57  [-32768.0, 32767.0]. Suitable for waveform data that
58  was previously stored as 16-bit PCM.
59  kOneByteAuto = 5 Each element is stored in one byte as a uint8, with the
60  representable range of values chosen automatically with
61  the minimum and maximum elements of the matrix as its
62  edges.
63  kOneByteUnsignedInteger = 6 Each element is stored in
64  one byte as a uint8, with the representable range of
65  values equal to [0.0, 255.0].
66  kOneByteZeroOne = 7 Each element is stored in
67  one byte as a uint8, with the representable range of
68  values equal to [0.0, 1.0]. Suitable for image data
69  that has previously been compressed as int8.
70 
71  // We can add new methods here as needed: if they just imply different ways
72  // of selecting the min_value and range, and a num-bytes = 1 or 2, they will
73  // be trivial to implement.
74 */
83 };
84 
85 
86 /*
87  This class does lossy compression of a matrix. It supports various compression
88  methods, see enum CompressionMethod.
89 */
90 
92  public:
93  CompressedMatrix(): data_(NULL) { }
94 
96 
97  template<typename Real>
98  explicit CompressedMatrix(const MatrixBase<Real> &mat,
100  data_(NULL) { CopyFromMat(mat, method); }
101 
115  const MatrixIndexT row_offset,
116  const MatrixIndexT num_rows,
117  const MatrixIndexT col_offset,
118  const MatrixIndexT num_cols,
119  bool allow_padding = false);
120 
121  void *Data() const { return this->data_; }
122 
124  template<typename Real>
125  void CopyFromMat(const MatrixBase<Real> &mat,
127 
129 
130  CompressedMatrix &operator = (const CompressedMatrix &mat); // assignment operator.
131 
132  template<typename Real>
133  CompressedMatrix &operator = (const MatrixBase<Real> &mat); // assignment operator.
134 
137  template<typename Real>
138  void CopyToMat(MatrixBase<Real> *mat,
139  MatrixTransposeType trans = kNoTrans) const;
140 
141  void Write(std::ostream &os, bool binary) const;
142 
143  void Read(std::istream &is, bool binary);
144 
146  inline MatrixIndexT NumRows() const { return (data_ == NULL) ? 0 :
147  (*reinterpret_cast<GlobalHeader*>(data_)).num_rows; }
148 
150  inline MatrixIndexT NumCols() const { return (data_ == NULL) ? 0 :
151  (*reinterpret_cast<GlobalHeader*>(data_)).num_cols; }
152 
155  template<typename Real>
156  void CopyRowToVec(MatrixIndexT row, VectorBase<Real> *v) const;
157 
160  template<typename Real>
161  void CopyColToVec(MatrixIndexT col, VectorBase<Real> *v) const;
162 
166  template<typename Real>
167  void CopyToMat(int32 row_offset,
168  int32 column_offset,
169  MatrixBase<Real> *dest) const;
170 
171  void Swap(CompressedMatrix *other) { std::swap(data_, other->data_); }
172 
173  void Clear();
174 
177  void Scale(float alpha);
178 
179  friend class Matrix<float>;
180  friend class Matrix<double>;
181  private:
182 
183  // This enum describes the different compressed-data formats: these are
184  // distinct from the compression methods although all of the methods apart
185  // from kAutomaticMethod dictate a particular compressed-data format.
186  //
187  // kOneByteWithColHeaders means there is a GlobalHeader and each
188  // column has a PerColHeader; the actual data is stored in
189  // one byte per element, in column-major order (the mapping
190  // from integers to floats is a little complicated).
191  // kTwoByte means there is a global header but no PerColHeader;
192  // the actual data is stored in two bytes per element in
193  // row-major order; it's decompressed as:
194  // uint16 i; GlobalHeader g;
195  // float f = g.min_value + i * (g.range / 65535.0)
196  // kOneByte means there is a global header but not PerColHeader;
197  // the data is stored in one byte per element in row-major
198  // order and is decompressed as:
199  // uint8 i; GlobalHeader g;
200  // float f = g.min_value + i * (g.range / 255.0)
201  enum DataFormat {
203  kTwoByte = 2,
205  };
206 
207 
208  // allocates data using new [], ensures byte alignment
209  // sufficient for float.
210  static void *AllocateData(int32 num_bytes);
211 
212  struct GlobalHeader {
213  int32 format; // Represents the enum DataFormat.
214  float min_value; // min_value and range represent the ranges of the integer
215  // data in the kTwoByte and kOneByte formats, and the
216  // range of the PerColHeader uint16's in the
217  // kOneByteWithColheaders format.
218  float range;
221  };
222 
223  // This function computes the global header for compressing this data.
224  template<typename Real>
225  static inline void ComputeGlobalHeader(const MatrixBase<Real> &mat,
226  CompressionMethod method,
227  GlobalHeader *header);
228 
229 
230  // The number of bytes we need to request when allocating 'data_'.
231  static MatrixIndexT DataSize(const GlobalHeader &header);
232 
233  // This struct is only used in format kOneByteWithColHeaders.
234  struct PerColHeader {
235  uint16 percentile_0;
239  };
240 
241  template<typename Real>
242  static void CompressColumn(const GlobalHeader &global_header,
243  const Real *data, MatrixIndexT stride,
244  int32 num_rows, PerColHeader *header,
245  uint8 *byte_data);
246  template<typename Real>
247  static void ComputeColHeader(const GlobalHeader &global_header,
248  const Real *data, MatrixIndexT stride,
249  int32 num_rows, PerColHeader *header);
250 
251  static inline uint16 FloatToUint16(const GlobalHeader &global_header,
252  float value);
253 
254  // this is used only in the kOneByte compression format.
255  static inline uint8 FloatToUint8(const GlobalHeader &global_header,
256  float value);
257 
258  static inline float Uint16ToFloat(const GlobalHeader &global_header,
259  uint16 value);
260 
261  // this is used only in the kOneByteWithColHeaders compression format.
262  static inline uint8 FloatToChar(float p0, float p25,
263  float p75, float p100,
264  float value);
265 
266  // this is used only in the kOneByteWithColHeaders compression format.
267  static inline float CharToFloat(float p0, float p25,
268  float p75, float p100,
269  uint8 value);
270 
271  void *data_; // first GlobalHeader, then PerColHeader (repeated), then
272  // the byte data for each column (repeated). Note: don't intersperse
273  // the byte data with the PerColHeaders, because of alignment issues.
274 
275 };
276 
278 
279 
280 } // namespace kaldi
281 
282 
283 #endif // KALDI_MATRIX_COMPRESSED_MATRIX_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void CopyRowToVec(MatrixIndexT row, VectorBase< Real > *v) const
Copies row #row of the matrix into vector v.
void CopyColToVec(MatrixIndexT col, VectorBase< Real > *v) const
Copies column #col of the matrix into vector v.
CompressedMatrix(const MatrixBase< Real > &mat, CompressionMethod method=kAutomaticMethod)
void Swap(CompressedMatrix *other)
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
kaldi::int32 int32
A class for storing matrices.
Definition: kaldi-matrix.h:823
CompressedMatrix & operator=(const CompressedMatrix &mat)
void Write(std::ostream &os, bool binary) const
void Read(std::istream &is, bool binary)
static uint16 FloatToUint16(const GlobalHeader &global_header, float value)
int32 MatrixIndexT
Definition: matrix-common.h:98
void Scale(float alpha)
scales all elements of matrix by alpha.
static float Uint16ToFloat(const GlobalHeader &global_header, uint16 value)
static void ComputeGlobalHeader(const MatrixBase< Real > &mat, CompressionMethod method, GlobalHeader *header)
static uint8 FloatToUint8(const GlobalHeader &global_header, float value)
MatrixIndexT NumRows() const
Returns number of rows (or zero for emtpy matrix).
static void CompressColumn(const GlobalHeader &global_header, const Real *data, MatrixIndexT stride, int32 num_rows, PerColHeader *header, uint8 *byte_data)
static float CharToFloat(float p0, float p25, float p75, float p100, uint8 value)
MatrixTransposeType
Definition: matrix-common.h:32
static uint8 FloatToChar(float p0, float p25, float p75, float p100, float value)
static void * AllocateData(int32 num_bytes)
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
static void ComputeColHeader(const GlobalHeader &global_header, const Real *data, MatrixIndexT stride, int32 num_rows, PerColHeader *header)
MatrixIndexT NumCols() const
Returns number of columns (or zero for emtpy matrix).
void CopyToMat(MatrixBase< Real > *mat, MatrixTransposeType trans=kNoTrans) const
Copies contents to matrix.
void CopyFromMat(const MatrixBase< Real > &mat, CompressionMethod method=kAutomaticMethod)
This will resize *this and copy the contents of mat to *this.
static MatrixIndexT DataSize(const GlobalHeader &header)