13 #include "arch/arch.hh"
22 auto ret = fml::gpuprims::get_device_count(&ngpus);
23 if (ret != GPU_SUCCESS)
25 std::string s = fml::gpuprims::gpu_error_string(ret);
26 throw std::runtime_error(s);
51 void set(
const int id);
56 void mem_set(
void *ptr,
const int value,
const size_t len);
58 void mem_cpu2gpu(
void *dst,
const void *src,
const size_t len);
59 void mem_gpu2cpu(
void *dst,
const void *src,
const size_t len);
60 void mem_gpu2gpu(
void *dst,
const void *src,
const size_t len);
70 int get_id()
const {
return _id;};
73 gpublas_handle_t
blas_handle()
const {
return _blas_handle;};
76 gpulapack_handle_t
lapack_handle()
const {
return _lapack_handle;};
78 bool valid_card()
const {
return (_id!=UNINITIALIZED_CARD && _id!=DESTROYED_CARD);};
83 gpublas_handle_t _blas_handle;
84 gpulapack_handle_t _lapack_handle;
87 static const int UNINITIALIZED_CARD = -1;
88 static const int DESTROYED_CARD = -11;
93 void check_gpu_error();
98 typedef std::shared_ptr<fml::card> card_sp_t;
111 return std::make_shared<fml::card>(
id);
126 _id = UNINITIALIZED_CARD;
128 _lapack_handle = NULL;
149 gpublas_status_t blas_status = fml::gpuprims::gpu_blas_init(&_blas_handle);
150 if (blas_status != GPUBLAS_STATUS_SUCCESS)
151 throw std::runtime_error(
"unable to initialize GPU BLAS");
153 gpulapack_status_t lapack_status = fml::gpuprims::gpu_lapack_init(&_lapack_handle);
154 if (lapack_status != GPULAPACK_STATUS_SUCCESS)
155 throw std::runtime_error(
"unable to initialize GPU LAPACK");
169 inline fml::card::~card()
198 gpublas_status_t blas_status = fml::gpuprims::gpu_blas_init(&_blas_handle);
199 if (blas_status != GPUBLAS_STATUS_SUCCESS)
200 throw std::runtime_error(
"unable to initialize GPU BLAS");
202 gpulapack_status_t lapack_status = fml::gpuprims::gpu_lapack_init(&_lapack_handle);
203 if (lapack_status != GPULAPACK_STATUS_SUCCESS)
204 throw std::runtime_error(
"unable to initialize GPU LAPACK");
222 int version_major = version / 1000;
223 int version_minor = (version % 1000) / 10;
225 nvmlDevice_t device = fml::nvml::device::get_handle_by_index(_id);
226 std::string name = fml::nvml::device::get_name(device);
227 double mem_used, mem_total;
228 fml::nvml::device::get_memory_info(device, &mem_used, &mem_total);
230 std::string math_mode = gpublas::get_math_mode_string(_blas_handle);
232 printf(
"## GPU %d ", _id);
233 printf(
"(%s) ", name.c_str());
234 printf(
"%.0f/%.0f MB ", mem_used/1024/1024, mem_total/1024/1024);
235 printf(
"- CUDA %d.%d ", version_major, version_minor);
236 printf(
"(math mode: %s)", math_mode.c_str());
239 printf(
"## GPU %d ", _id);
263 err = fml::gpuprims::gpu_malloc(&ptr, len);
287 err = fml::gpuprims::gpu_memset(ptr, value, len);
308 err = fml::gpuprims::gpu_free(ptr);
330 err = fml::gpuprims::gpu_memcpy(dst, src, len, GPU_MEMCPY_HOST_TO_DEVICE);
351 err = fml::gpuprims::gpu_memcpy(dst, src, len, GPU_MEMCPY_DEVICE_TO_HOST);
372 err = fml::gpuprims::gpu_memcpy(dst, src, len, GPU_MEMCPY_DEVICE_TO_DEVICE);
391 err = fml::gpuprims::gpu_synch();
406 err = fml::gpuprims::gpu_last_error();
432 gpublas_status_t check = gpublas::set_math_mode(_blas_handle, mode);
433 gpublas::err::get_cublas_error_msg(check);
442 inline void fml::card::init()
444 if (_id == UNINITIALIZED_CARD)
445 throw std::runtime_error(
"invalid card (uninitialized)");
446 else if (_id == DESTROYED_CARD)
447 throw std::runtime_error(
"invalid card (destroyed)");
449 err = fml::gpuprims::gpu_set_device(_id);
455 inline void fml::card::cleanup()
461 fml::gpuprims::gpu_lapack_free(_lapack_handle);
462 _lapack_handle = NULL;
467 fml::gpuprims::gpu_blas_free(_blas_handle);
471 err = fml::gpuprims::gpu_device_reset();
473 _id = DESTROYED_CARD;
478 inline void fml::card::check_gpu_error()
480 if (err != GPU_SUCCESS)
484 std::string s = fml::gpuprims::gpu_error_string(err);
485 throw std::runtime_error(s);