cu-compressed-matrix.cc
Go to the documentation of this file.
1 // cudamatrix/cu-compressed-matrix.cc
2 
3 // Copyright 2018 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #if HAVE_CUDA == 1
22 #include <cuda_runtime_api.h>
23 #include <cublas_v2.h>
24 #endif
25 
26 #include "base/timer.h"
27 #include "cudamatrix/cu-common.h"
28 #include "cudamatrix/cu-vector.h"
29 #include "cudamatrix/cu-device.h"
30 #include "cudamatrix/cu-kernels.h"
31 #include "cudamatrix/cu-array.h"
33 
34 namespace kaldi {
35 
36 
37 template <typename I>
39  data_(NULL), scale_(range / std::numeric_limits<I>::max()),
40  truncate_(truncate), num_rows_(0), num_cols_(0), stride_(0) {
41 #if HAVE_CUDA == 1
42  KALDI_ASSERT(CuDevice::Instantiate().Enabled());
43 #else
44  KALDI_ERR << "You instantiated CuCompressedMatrix while GPU use "
45  "was not compiled in.";
46 #endif
47 }
48 
49 template <typename I>
51 #if HAVE_CUDA == 1
52  if (data_ != NULL) {
53  // we don't bother timing this because Free() won't normally have to
54  // access the GPU at all (due to caching).
55  CuDevice::Instantiate().Free(data_);
56  data_ = NULL;
57  num_rows_ = 0;
58  num_cols_ = 0;
59  stride_ = 0;
60  }
61 #endif
62 }
63 
64 template <typename I>
66  const CuMatrixBase<BaseFloat> &mat) {
67 #if HAVE_CUDA == 1
68  KALDI_ASSERT(CuDevice::Instantiate().Enabled());
69  if (mat.NumRows() == 0)
70  return;
71  if (num_rows_ != mat.NumRows() || num_cols_ != mat.NumCols()) {
72  Destroy();
73  num_rows_ = mat.NumRows();
74  num_cols_ = mat.NumCols();
75  data_ = static_cast<I*>(
76  CuDevice::Instantiate().Malloc(sizeof(I) * num_rows_ * num_cols_));
78  }
79 
80  {
81  CuTimer tim;
82  dim3 dimGrid, dimBlock;
83  GetBlockSizesForSimpleMatrixOperation(NumRows(), NumCols(),
84  &dimGrid, &dimBlock);
85 
86  if (scale_ == 0.0) { // scale == 0 calls a different kernel from the others.
87  cuda_mat_compress_sign(dimGrid, dimBlock, mat.Data(), mat.Dim(),
88  data_, stride_);
89  } else {
90  cuda_mat_compress(dimGrid, dimBlock, mat.Data(), mat.Dim(),
91  data_, stride_, float(1.0 / scale_),
92  truncate_);
93  }
94  CU_SAFE_CALL(cudaGetLastError());
95 
96  CuDevice::Instantiate().AccuProfile(__func__, tim);
97  }
98 #endif
99 }
100 
101 template <typename I>
103 #if HAVE_CUDA == 1
104  KALDI_ASSERT(CuDevice::Instantiate().Enabled());
105  KALDI_ASSERT(mat->NumRows() == num_rows_ && mat->NumCols() == num_cols_);
106  {
107  CuTimer tim;
108  dim3 dimGrid, dimBlock;
109  GetBlockSizesForSimpleMatrixOperation(NumRows(), NumCols(),
110  &dimGrid, &dimBlock);
111  BaseFloat scale = (scale_ == 0.0 ? 1.0 : scale_);
112  cuda_mat_uncompress(dimGrid, dimBlock, mat->Data(), mat->Dim(),
113  data_, stride_, float(scale));
114  }
115 #endif
116 }
117 
118 
120  BaseFloat range,
121  bool truncat) {
122  if (t == kCompressedMatrixUint8) {
123  KALDI_ASSERT(range >= 0);
124  return new CuCompressedMatrix<uint8>(range);
125  } else if (t == kCompressedMatrixInt8) {
126  KALDI_ASSERT(range >= 0);
127  return new CuCompressedMatrix<int8>(range);
128  } else if (t == kCompressedMatrixUint16) {
129  KALDI_ASSERT(range > 0);
130  return new CuCompressedMatrix<uint16>(range);
131  } else if (t == kCompressedMatrixInt16) {
132  KALDI_ASSERT(range > 0);
133  return new CuCompressedMatrix<int16>(range);
134  } else {
135  KALDI_ERR << "Unknown compressed-matrix type";
136  return NULL;
137  }
138 }
139 
140 
141 
142 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual void CopyToMat(CuMatrixBase< BaseFloat > *mat) const
Copies the contents of *this to &#39;mat&#39;, which should be correctly sized beforehand.
uint64 data_
virtual MatrixIndexT NumRows() const
float BaseFloat
Definition: kaldi-types.h:29
CuCompressedMatrix(BaseFloat range, bool truncate=true)
Constructor which sets &#39;scale_&#39; according to scale_ = range / std::numeric_limits<I>::max().
virtual void CopyFromMat(const CuMatrixBase< BaseFloat > &mat)
Sets *this to an appropriately compressed copy of &#39;mat&#39;, which includes resizing *this.
Class CuCompressedMatrix, templated on an integer type (expected to be one of: int8, uint8, int16, uint16), this provides a way to approximate a CuMatrix in a more memory-efficient format.
#define KALDI_ERR
Definition: kaldi-error.h:147
virtual MatrixIndexT NumCols() const
const Real * Data() const
Return data pointer (const).
Definition: cu-matrix.h:746
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
::MatrixDim Dim() const
Definition: cu-matrix.h:221
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
Class CuCompressedMatrixBase is an abstract base class that allows you to compress a matrix of type C...
CuCompressedMatrixBase * NewCuCompressedMatrix(CuCompressedMatrixType t, BaseFloat range, bool truncat)
This function allocates a new CuCompressedMatrix with type determined by t, and with the &#39;range&#39; and ...