cu-common.cc
Go to the documentation of this file.
1 // cudamatrix/cu-common.cc
2 
3 // Copyright 2013 Karel Vesely
4 // 2015 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #ifndef KALDI_CUDAMATRIX_COMMON_H_
22 #define KALDI_CUDAMATRIX_COMMON_H_
23 
24 // This file contains some #includes, forward declarations
25 // and typedefs that are needed by all the main header
26 // files in this directory.
27 #include <mutex>
28 #include "base/kaldi-common.h"
29 #include "matrix/kaldi-blas.h"
30 #include "cudamatrix/cu-device.h"
31 #include "cudamatrix/cu-common.h"
33 
34 namespace kaldi {
35 
36 #if HAVE_CUDA == 1
37 
38 #ifdef USE_NVTX
39 NvtxTracer::NvtxTracer(const char* name) {
40  const uint32_t colors[] = { 0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, 0xff00ffff, 0xffff0000, 0xffffffff };
41  const int num_colors = sizeof(colors)/sizeof(uint32_t);
42  int color_id = ((int)name[0])%num_colors;
43  nvtxEventAttributes_t eventAttrib = {0};
44  eventAttrib.version = NVTX_VERSION;
45  eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
46  eventAttrib.colorType = NVTX_COLOR_ARGB;
47  eventAttrib.color = colors[color_id];
48  eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
49  eventAttrib.message.ascii = name;
50  nvtxRangePushEx(&eventAttrib);
51  // nvtxRangePushA(name);
52 }
53 NvtxTracer::~NvtxTracer() {
54  nvtxRangePop();
55 }
56 #endif
57 
58 cublasOperation_t KaldiTransToCuTrans(MatrixTransposeType kaldi_trans) {
59  cublasOperation_t cublas_trans;
60 
61  if (kaldi_trans == kNoTrans)
62  cublas_trans = CUBLAS_OP_N;
63  else if (kaldi_trans == kTrans)
64  cublas_trans = CUBLAS_OP_T;
65  else
66  cublas_trans = CUBLAS_OP_C;
67  return cublas_trans;
68 }
69 
70 void GetBlockSizesForSimpleMatrixOperation(int32 num_rows,
71  int32 num_cols,
72  dim3 *dimGrid,
73  dim3 *dimBlock) {
74  KALDI_ASSERT(num_rows > 0 && num_cols > 0);
75  int32 col_blocksize = 64, row_blocksize = 4;
76  while (col_blocksize > 1 &&
77  (num_cols + (num_cols / 2) <= col_blocksize ||
78  num_rows > 65535 * row_blocksize)) {
79  col_blocksize /= 2;
80  row_blocksize *= 2;
81  }
82 
83  dimBlock->x = col_blocksize;
84  dimBlock->y = row_blocksize;
85  dimBlock->z = 1;
86  dimGrid->x = n_blocks(num_cols, col_blocksize);
87  dimGrid->y = n_blocks(num_rows, row_blocksize);
88  KALDI_ASSERT(dimGrid->y <= 65535 &&
89  "Matrix has too many rows to process");
90  dimGrid->z = 1;
91 }
92 
93 const char* cublasGetStatusString(cublasStatus_t status) {
94  switch(status) {
95  case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS";
96  case CUBLAS_STATUS_NOT_INITIALIZED: return "CUBLAS_STATUS_NOT_INITIALIZED";
97  case CUBLAS_STATUS_ALLOC_FAILED: return "CUBLAS_STATUS_ALLOC_FAILED";
98  case CUBLAS_STATUS_INVALID_VALUE: return "CUBLAS_STATUS_INVALID_VALUE";
99  case CUBLAS_STATUS_ARCH_MISMATCH: return "CUBLAS_STATUS_ARCH_MISMATCH";
100  case CUBLAS_STATUS_MAPPING_ERROR: return "CUBLAS_STATUS_MAPPING_ERROR";
101  case CUBLAS_STATUS_EXECUTION_FAILED: return "CUBLAS_STATUS_EXECUTION_FAILED";
102  case CUBLAS_STATUS_INTERNAL_ERROR: return "CUBLAS_STATUS_INTERNAL_ERROR";
103  case CUBLAS_STATUS_NOT_SUPPORTED: return "CUBLAS_STATUS_NOT_SUPPORTED";
104  case CUBLAS_STATUS_LICENSE_ERROR: return "CUBLAS_STATUS_LICENSE_ERROR";
105  }
106  return "CUBLAS_STATUS_UNKNOWN_ERROR";
107 }
108 
109 const char* cusparseGetStatusString(cusparseStatus_t status) {
110  // detail info come from http://docs.nvidia.com/cuda/cusparse/index.html#cusparsestatust
111  switch(status) {
112  case CUSPARSE_STATUS_SUCCESS: return "CUSPARSE_STATUS_SUCCESS";
113  case CUSPARSE_STATUS_NOT_INITIALIZED: return "CUSPARSE_STATUS_NOT_INITIALIZED";
114  case CUSPARSE_STATUS_ALLOC_FAILED: return "CUSPARSE_STATUS_ALLOC_FAILED";
115  case CUSPARSE_STATUS_INVALID_VALUE: return "CUSPARSE_STATUS_INVALID_VALUE";
116  case CUSPARSE_STATUS_ARCH_MISMATCH: return "CUSPARSE_STATUS_ARCH_MISMATCH";
117  case CUSPARSE_STATUS_MAPPING_ERROR: return "CUSPARSE_STATUS_MAPPING_ERROR";
118  case CUSPARSE_STATUS_EXECUTION_FAILED: return "CUSPARSE_STATUS_EXECUTION_FAILED";
119  case CUSPARSE_STATUS_INTERNAL_ERROR: return "CUSPARSE_STATUS_INTERNAL_ERROR";
120  case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
121  case CUSPARSE_STATUS_ZERO_PIVOT: return "CUSPARSE_STATUS_ZERO_PIVOT";
122  }
123  return "CUSPARSE_STATUS_UNKNOWN_ERROR";
124 }
125 
126 const char* curandGetStatusString(curandStatus_t status) {
127  // detail info come from http://docs.nvidia.com/cuda/curand/group__HOST.html
128  switch(status) {
129  case CURAND_STATUS_SUCCESS: return "CURAND_STATUS_SUCCESS";
130  case CURAND_STATUS_VERSION_MISMATCH: return "CURAND_STATUS_VERSION_MISMATCH";
131  case CURAND_STATUS_NOT_INITIALIZED: return "CURAND_STATUS_NOT_INITIALIZED";
132  case CURAND_STATUS_ALLOCATION_FAILED: return "CURAND_STATUS_ALLOCATION_FAILED";
133  case CURAND_STATUS_TYPE_ERROR: return "CURAND_STATUS_TYPE_ERROR";
134  case CURAND_STATUS_OUT_OF_RANGE: return "CURAND_STATUS_OUT_OF_RANGE";
135  case CURAND_STATUS_LENGTH_NOT_MULTIPLE: return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
136  case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
137  case CURAND_STATUS_LAUNCH_FAILURE: return "CURAND_STATUS_LAUNCH_FAILURE";
138  case CURAND_STATUS_PREEXISTING_FAILURE: return "CURAND_STATUS_PREEXISTING_FAILURE";
139  case CURAND_STATUS_INITIALIZATION_FAILED: return "CURAND_STATUS_INITIALIZATION_FAILED";
140  case CURAND_STATUS_ARCH_MISMATCH: return "CURAND_STATUS_ARCH_MISMATCH";
141  case CURAND_STATUS_INTERNAL_ERROR: return "CURAND_STATUS_INTERNAL_ERROR";
142  }
143  return "CURAND_STATUS_UNKNOWN_ERROR";
144 }
145 #endif
146 
147 } // namespace
148 
149 
150 #endif // KALDI_CUDAMATRIX_COMMON_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
kaldi::int32 int32
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixTransposeType
Definition: matrix-common.h:32