fml  0.1-0
Fused Matrix Library
card.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_CARD_H
6 #define FML_GPU_CARD_H
7 #pragma once
8 
9 
10 #include <memory>
11 #include <stdexcept>
12 
13 #include "arch/arch.hh"
14 
15 
16 namespace fml
17 {
19  inline int get_device_count()
20  {
21  int ngpus;
22  auto ret = fml::gpuprims::get_device_count(&ngpus);
23  if (ret != GPU_SUCCESS)
24  {
25  std::string s = fml::gpuprims::gpu_error_string(ret);
26  throw std::runtime_error(s);
27  }
28 
29  return ngpus;
30  }
31 
32 
43  class card
44  {
45  public:
46  card();
47  card(const int id=0);
48  card(const card &x);
49  ~card();
50 
51  void set(const int id);
52 
53  void info() const;
54 
55  void* mem_alloc(const size_t len);
56  void mem_set(void *ptr, const int value, const size_t len);
57  void mem_free(void *ptr);
58  void mem_cpu2gpu(void *dst, const void *src, const size_t len);
59  void mem_gpu2cpu(void *dst, const void *src, const size_t len);
60  void mem_gpu2gpu(void *dst, const void *src, const size_t len);
61 
62  void synch();
63  void check();
64 
65  void set_math_mode(gpublas_mathmode_t mode);
66 
69  int get_id() {return _id;};
70  int get_id() const {return _id;};
72  gpublas_handle_t blas_handle() {return _blas_handle;};
73  gpublas_handle_t blas_handle() const {return _blas_handle;};
75  gpulapack_handle_t lapack_handle() {return _lapack_handle;};
76  gpulapack_handle_t lapack_handle() const {return _lapack_handle;};
78  bool valid_card() const {return (_id!=UNINITIALIZED_CARD && _id!=DESTROYED_CARD);};
80 
81  protected:
82  int _id;
83  gpublas_handle_t _blas_handle;
84  gpulapack_handle_t _lapack_handle;
85 
86  private:
87  static const int UNINITIALIZED_CARD = -1;
88  static const int DESTROYED_CARD = -11;
89 
90  void init();
91  void cleanup();
92  gpu_error_t err;
93  void check_gpu_error();
94  };
95 
96 
97 
98  typedef std::shared_ptr<fml::card> card_sp_t;
99 
109  inline card_sp_t new_card(int id=0)
110  {
111  return std::make_shared<fml::card>(id);
112  }
113 }
114 
115 
116 
117 // -----------------------------------------------------------------------------
118 // public
119 // -----------------------------------------------------------------------------
120 
121 // constructors/destructor
122 
125 {
126  _id = UNINITIALIZED_CARD;
127  _blas_handle = NULL;
128  _lapack_handle = NULL;
129 }
130 
131 
132 
144 inline fml::card::card(const int id)
145 {
146  _id = id;
147  init();
148 
149  gpublas_status_t blas_status = fml::gpuprims::gpu_blas_init(&_blas_handle);
150  if (blas_status != GPUBLAS_STATUS_SUCCESS)
151  throw std::runtime_error("unable to initialize GPU BLAS");
152 
153  gpulapack_status_t lapack_status = fml::gpuprims::gpu_lapack_init(&_lapack_handle);
154  if (lapack_status != GPULAPACK_STATUS_SUCCESS)
155  throw std::runtime_error("unable to initialize GPU LAPACK");
156 }
157 
158 
159 
160 inline fml::card::card(const card &x)
161 {
162  _id = x.get_id();
163  _blas_handle = x.blas_handle();
164  _lapack_handle = x.lapack_handle();
165 }
166 
167 
168 
169 inline fml::card::~card()
170 {
171  cleanup();
172 }
173 
174 
175 
188 inline void fml::card::set(const int id)
189 {
190  if (id == _id)
191  return;
192 
193  cleanup();
194 
195  _id = id;
196  init();
197 
198  gpublas_status_t blas_status = fml::gpuprims::gpu_blas_init(&_blas_handle);
199  if (blas_status != GPUBLAS_STATUS_SUCCESS)
200  throw std::runtime_error("unable to initialize GPU BLAS");
201 
202  gpulapack_status_t lapack_status = fml::gpuprims::gpu_lapack_init(&_lapack_handle);
203  if (lapack_status != GPULAPACK_STATUS_SUCCESS)
204  throw std::runtime_error("unable to initialize GPU LAPACK");
205 }
206 
207 
208 
209 // printers
210 
216 inline void fml::card::info() const
217 {
218  fml::nvml::init();
219 
220 #ifdef FML_USE_CUDA
222  int version_major = version / 1000;
223  int version_minor = (version % 1000) / 10;
224 
225  nvmlDevice_t device = fml::nvml::device::get_handle_by_index(_id);
226  std::string name = fml::nvml::device::get_name(device);
227  double mem_used, mem_total;
228  fml::nvml::device::get_memory_info(device, &mem_used, &mem_total);
229 
230  std::string math_mode = gpublas::get_math_mode_string(_blas_handle);
231 
232  printf("## GPU %d ", _id);
233  printf("(%s) ", name.c_str());
234  printf("%.0f/%.0f MB ", mem_used/1024/1024, mem_total/1024/1024);
235  printf("- CUDA %d.%d ", version_major, version_minor);
236  printf("(math mode: %s)", math_mode.c_str());
237  printf("\n\n");
238 #else // FML_USE_HIP
239  printf("## GPU %d ", _id);
240 #endif
241 
243 }
244 
245 
246 
247 // gpu memory management
248 
259 inline void* fml::card::mem_alloc(const size_t len)
260 {
261  init();
262  void *ptr;
263  err = fml::gpuprims::gpu_malloc(&ptr, len);
264  check_gpu_error();
265  return ptr;
266 }
267 
268 
269 
284 inline void fml::card::mem_set(void *ptr, const int value, const size_t len)
285 {
286  init();
287  err = fml::gpuprims::gpu_memset(ptr, value, len);
288  check_gpu_error();
289 }
290 
291 
292 
303 inline void fml::card::mem_free(void *ptr)
304 {
305  init();
306  if (ptr)
307  {
308  err = fml::gpuprims::gpu_free(ptr);
309  check_gpu_error();
310  }
311 }
312 
313 
314 
327 inline void fml::card::mem_cpu2gpu(void *dst, const void *src, const size_t len)
328 {
329  init();
330  err = fml::gpuprims::gpu_memcpy(dst, src, len, GPU_MEMCPY_HOST_TO_DEVICE);
331  check_gpu_error();
332 }
333 
334 
335 
348 inline void fml::card::mem_gpu2cpu(void *dst, const void *src, const size_t len)
349 {
350  init();
351  err = fml::gpuprims::gpu_memcpy(dst, src, len, GPU_MEMCPY_DEVICE_TO_HOST);
352  check_gpu_error();
353 }
354 
355 
356 
369 inline void fml::card::mem_gpu2gpu(void *dst, const void *src, const size_t len)
370 {
371  init();
372  err = fml::gpuprims::gpu_memcpy(dst, src, len, GPU_MEMCPY_DEVICE_TO_DEVICE);
373  check_gpu_error();
374 }
375 
376 
377 
388 inline void fml::card::synch()
389 {
390  init();
391  err = fml::gpuprims::gpu_synch();
392  check_gpu_error();
393 }
394 
395 
396 
404 inline void fml::card::check()
405 {
406  err = fml::gpuprims::gpu_last_error();
407  check_gpu_error();
408 }
409 
410 
411 
412 
430 inline void fml::card::set_math_mode(gpublas_mathmode_t mode)
431 {
432  gpublas_status_t check = gpublas::set_math_mode(_blas_handle, mode);
433  gpublas::err::get_cublas_error_msg(check);
434 }
435 
436 
437 
438 // -----------------------------------------------------------------------------
439 // private
440 // -----------------------------------------------------------------------------
441 
442 inline void fml::card::init()
443 {
444  if (_id == UNINITIALIZED_CARD)
445  throw std::runtime_error("invalid card (uninitialized)");
446  else if (_id == DESTROYED_CARD)
447  throw std::runtime_error("invalid card (destroyed)");
448 
449  err = fml::gpuprims::gpu_set_device(_id);
450  check_gpu_error();
451 }
452 
453 
454 
455 inline void fml::card::cleanup()
456 {
457  init();
458 
459  if (_lapack_handle)
460  {
461  fml::gpuprims::gpu_lapack_free(_lapack_handle);
462  _lapack_handle = NULL;
463  }
464 
465  if (_blas_handle)
466  {
467  fml::gpuprims::gpu_blas_free(_blas_handle);
468  _blas_handle = NULL;
469  }
470 
471  err = fml::gpuprims::gpu_device_reset();
472 
473  _id = DESTROYED_CARD;
474 }
475 
476 
477 
478 inline void fml::card::check_gpu_error()
479 {
480  if (err != GPU_SUCCESS)
481  {
482  cleanup();
483 
484  std::string s = fml::gpuprims::gpu_error_string(err);
485  throw std::runtime_error(s);
486  }
487 }
488 
489 
490 #endif
fml::card::get_id
int get_id()
Definition: card.hh:69
fml::card::mem_gpu2gpu
void mem_gpu2gpu(void *dst, const void *src, const size_t len)
Copy device (GPU) data to other device (GPU) memory.
Definition: card.hh:369
fml::card::info
void info() const
Print some brief information about the GPU.
Definition: card.hh:216
fml::card::blas_handle
gpublas_handle_t blas_handle()
GPU BLAS handle.
Definition: card.hh:72
fml::card
GPU data and methods.
Definition: card.hh:43
fml::card::mem_cpu2gpu
void mem_cpu2gpu(void *dst, const void *src, const size_t len)
Copy host (CPU) data to device (GPU) memory.
Definition: card.hh:327
fml::card::valid_card
bool valid_card() const
Is the gpu data valid?
Definition: card.hh:78
fml::card::mem_free
void mem_free(void *ptr)
Free device memory.
Definition: card.hh:303
fml::new_card
card_sp_t new_card(int id=0)
Initialize a new card.
Definition: card.hh:109
fml::card::set_math_mode
void set_math_mode(gpublas_mathmode_t mode)
Manually set the GPU BLAS math mode (as supported by hardware).
Definition: card.hh:430
fml::card::mem_alloc
void * mem_alloc(const size_t len)
Allocate device memory.
Definition: card.hh:259
fml::nvml::init
void init()
Initialize NVML.
Definition: nvml.hh:93
fml::nvml::shutdown
void shutdown()
Shut down NVML.
Definition: nvml.hh:102
fml::card::mem_gpu2cpu
void mem_gpu2cpu(void *dst, const void *src, const size_t len)
Copy device (GPU) data to host (CPU) memory.
Definition: card.hh:348
fml
Core namespace.
Definition: dimops.hh:10
fml::card::mem_set
void mem_set(void *ptr, const int value, const size_t len)
Set device memory.
Definition: card.hh:284
fml::card::lapack_handle
gpulapack_handle_t lapack_handle()
GPU LAPACK handle.
Definition: card.hh:75
fml::card::set
void set(const int id)
Sets up the existing card object.
Definition: card.hh:188
fml::get_device_count
int get_device_count()
Return number of GPU devices.
Definition: card.hh:19
fml::card::synch
void synch()
Synchronize device.
Definition: card.hh:388
fml::card::card
card()
Create a new card object. Does not initialize any GPU data.
Definition: card.hh:124
fml::card::check
void check()
Check for (and throw if found) a CUDA error.
Definition: card.hh:404
fml::nvml::system::get_cuda_driver_version
int get_cuda_driver_version()
System CUDA driver version.
Definition: nvml.hh:119