cuda-gpu-available.cc File Reference
#include <unistd.h>
#include <errno.h>
#include "base/kaldi-common.h"
#include "cudamatrix/cu-device.h"
#include "cudamatrix/cu-matrix.h"
Include dependency graph for cuda-gpu-available.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 42 of file cuda-gpu-available.cc.

References KALDI_LOG, and KALDI_WARN.

42  {
43 
44  /* only for Doxygen documentation, never shown in command line */
45  const char *usage =
46  "Test if there is a GPU available, and if the GPU setup is correct.\n"
47  "A GPU is acquired and a small computation is done\n"
48  "(generating a random matrix and computing softmax for its rows).\n"
49  "\n"
50  "exit-code: 0 = success, 1 = compiled without GPU support, -1 = error\n"
51  "\n"
52  "Usage: cuda-gpu-available\n";
53 
54  char hostname[100] = "UNKNOWN-HOSTNAME";
55 #if !defined(_MSC_VER) && !defined(__CYGWIN__)
56  if (gethostname(hostname, 100)) {
57  KALDI_WARN << "Cannot get hostname, " << strerror(errno);
58  }
59 #endif
60  KALDI_LOG << "\n\n### IS CUDA GPU AVAILABLE? '" << hostname << "' ###";
61 #if HAVE_CUDA == 1
62  CuDevice::Instantiate().SelectGpuId("yes");
63  fprintf(stderr, "### HURRAY, WE GOT A CUDA GPU FOR COMPUTATION!!! ##\n\n");
64  fprintf(stderr, "### Testing CUDA setup with a small computation "
65  "(setup = cuda-toolkit + gpu-driver + kaldi):\n");
66  // the test of setup by computation,
67  try {
68  TestGpuComputation();
69  } catch (const std::exception &e) {
70  fprintf(stderr, "%s\n", e.what());
71  KALDI_LOG << "...\n"
72  << "### The CUDA setup is wrong! "
73  << "(\"invalid device function\" == problem with 'compute capability' "
74  << "in compiled kaldi)\n"
75  << "### Before posting the error to forum, please try following:\n"
76  << "### 1) update kaldi & cuda-toolkit (& GPU driver),\n"
77  << "### 2) re-run 'src/configure',\n"
78  << "### 3) re-compile kaldi by 'make clean; make -j depend; make -j'\n"
79  << "###\n"
80  << "### If the problem persists, please send us your:\n"
81  << "### - GPU model name, cuda-toolkit version, driver version "
82  << "(run nvidia-smi), variable $(CUDA_ARCH) from src/kaldi.mk";
83  return -1;
84  }
85  fprintf(stderr, "### Test OK!\n");
86  return 0;
87 #else
88  std::cerr
89  << "### CUDA WAS NOT COMPILED IN! ###\n"
90  << "To support CUDA, you must run 'configure' on a machine "
91  << "that has the CUDA compiler 'nvcc' available.\n";
92  return 1;
93 #endif
94 } catch (const std::exception &e) {
95  fprintf(stderr, "%s\n", e.what());
96  KALDI_LOG << "...\n"
97  << "### WE DID NOT GET A CUDA GPU!!! ###\n"
98  << "### If your system has a 'free' CUDA GPU, try re-installing "
99  << "latest 'CUDA toolkit' from NVidia (this updates GPU drivers too).\n"
100  << "### Otherwise 'nvidia-smi' shows the status of GPUs:\n"
101  << "### - The versions should match ('NVIDIA-SMI' and 'Driver Version'), "
102  << "otherwise reboot or reload kernel module,\n"
103  << "### - The GPU should be unused "
104  << "(no 'process' in list, low 'memory-usage' (<100MB), low 'gpu-fan' (<30%)),\n"
105  << "### - You should see your GPU (burnt GPUs may disappear from the list until reboot),";
106  return -1;
107 }
#define KALDI_WARN
Definition: kaldi-error.h:150
#define KALDI_LOG
Definition: kaldi-error.h:153