5 #ifndef FML_GPU_GPUMAT_H
6 #define FML_GPU_GPUMAT_H
12 #include "../_internals/print.hh"
13 #include "../_internals/rand.hh"
14 #include "../_internals/types.hh"
15 #include "../_internals/unimat.hh"
17 #include "arch/arch.hh"
19 #include "internals/gpuscalar.hh"
20 #include "internals/kernelfuns.hh"
21 #include "internals/launcher.hh"
34 template <
typename REAL>
39 gpumat(std::shared_ptr<card> gpu);
41 gpumat(std::shared_ptr<card> gpu, REAL *data, len_t
nrows, len_t
ncols,
bool free_on_destruct=
false);
47 void inherit(std::shared_ptr<card> gpu, REAL *data, len_t
nrows, len_t
ncols,
bool free_on_destruct=
false);
50 void print(uint8_t ndigits=4,
bool add_final_blank=
true)
const;
59 void fill_runif(
const uint32_t seed,
const REAL min=0,
const REAL max=1);
60 void fill_runif(
const REAL min=0,
const REAL max=1);
61 void fill_rnorm(
const uint32_t seed,
const REAL mean=0,
const REAL sd=1);
62 void fill_rnorm(
const REAL mean=0,
const REAL sd=1);
66 void scale(
const REAL s);
73 REAL
get(
const len_t i)
const;
74 REAL
get(
const len_t i,
const len_t j)
const;
75 void set(
const len_t i,
const REAL v);
76 void set(
const len_t i,
const len_t j,
const REAL v);
86 std::shared_ptr<card> get_card()
const {
return c;};
87 dim3 get_blockdim()
const {
return dim_block;};
88 dim3 get_griddim()
const {
return dim_grid;};
91 std::shared_ptr<card> c;
99 void check_gpu(std::shared_ptr<card> gpu);
111 template <
typename REAL>
118 this->free_data =
false;
131 template <
typename REAL>
142 this->free_data =
true;
161 template <
typename REAL>
164 check_params(nrows, ncols);
169 const size_t len = (size_t) nrows * ncols *
sizeof(REAL);
170 this->data = (REAL*) this->c->mem_alloc(len);
175 dim_block = fml::kernel_launcher::dim_block2();
176 dim_grid = fml::kernel_launcher::dim_grid(this->m, this->n);
178 this->free_data =
true;
197 template <
typename REAL>
200 check_params(nrows, ncols);
209 dim_block = fml::kernel_launcher::dim_block2();
210 dim_grid = fml::kernel_launcher::dim_grid(this->m, this->n);
212 this->free_data = free_on_destruct;
217 template <
typename REAL>
222 this->data = x.data_ptr();
224 dim_block = fml::kernel_launcher::dim_block2();
225 dim_grid = fml::kernel_launcher::dim_grid(this->m, this->n);
227 this->c = x.get_card();
229 this->free_data =
false;
234 template <
typename REAL>
255 template <
typename REAL>
258 check_params(nrows, ncols);
260 const size_t len = (size_t) nrows * ncols *
sizeof(REAL);
261 const size_t oldlen = (size_t) this->m * this->n *
sizeof(REAL);
271 realloc_ptr = (REAL*) this->c->mem_alloc(len);
273 const size_t copylen = std::min(len, oldlen);
274 this->c->mem_gpu2gpu(realloc_ptr, this->data, copylen);
275 this->c->mem_free(this->data);
276 this->data = realloc_ptr;
281 dim_block = fml::kernel_launcher::dim_block2();
282 dim_grid = fml::kernel_launcher::dim_grid(this->m, this->n);
298 template <
typename REAL>
304 this->resize(nrows, ncols);
322 template <
typename REAL>
325 check_params(nrows, ncols);
336 dim_block = fml::kernel_launcher::dim_block2();
337 dim_grid = fml::kernel_launcher::dim_grid(this->m, this->n);
339 this->free_data = free_on_destruct;
345 template <
typename REAL>
350 const size_t len = (size_t) this->m * this->n *
sizeof(REAL);
351 this->c->mem_gpu2gpu(cpy.
data_ptr(), this->data, len);
366 template <
typename REAL>
369 for (
int i=0; i<this->m; i++)
371 for (
int j=0; j<this->n; j++)
374 this->c->mem_gpu2cpu(&tmp, this->data + (i + this->m*j),
sizeof(REAL));
375 this->printval(tmp, ndigits);
378 fml::print::putchar(
'\n');
382 fml::print::putchar(
'\n');
388 template <
typename REAL>
391 fml::print::printf(
"# gpumat ");
392 fml::print::printf(
"%dx%d ", this->m, this->n);
393 fml::print::printf(
"type=%s ",
typeid(REAL).name());
394 fml::print::printf(
"\n");
402 template <
typename REAL>
405 const size_t len = (size_t) this->m * this->n *
sizeof(REAL);
406 this->c->mem_set(this->data, 0, len);
416 template <
typename REAL>
419 fml::kernelfuns::kernel_fill_val<<<dim_grid, dim_block>>>(v, this->m, this->n, this->data);
431 template <
typename T>
435 T stop = (T) (this->m * this->n);
436 this->fill_linspace(start, stop);
439 template <
typename REAL>
446 fml::kernelfuns::kernel_fill_linspace<<<dim_grid, dim_block>>>(start, stop, this->m, this->n, this->data);
454 template <
typename REAL>
457 fml::kernelfuns::kernel_fill_eye<<<dim_grid, dim_block>>>(this->m, this->n, this->data);
472 template <
typename REAL>
475 fml::kernelfuns::kernel_fill_diag<<<dim_grid, dim_block>>>(v.
size(), v.
data_ptr(), this->m, this->n, this->data);
492 template <
typename REAL>
495 const size_t len = (size_t) this->m * this->n;
496 gpurand::gen_runif(seed, len, this->data);
497 fml::kernelfuns::kernel_fill_runif_update<<<dim_grid, dim_block>>>(min, max, this->m, this->n, this->data);
502 template <
typename REAL>
505 uint32_t seed = fml::rand::get_seed();
506 this->fill_runif(seed, min, max);
522 template <
typename REAL>
525 const size_t len = (size_t) this->m * this->n;
526 gpurand::gen_rnorm(seed, mean, sd, len, this->data);
530 template <
typename REAL>
533 uint32_t seed = fml::rand::get_seed();
534 this->fill_rnorm(mean, sd);
552 template <
typename REAL>
555 const len_t minmn = std::min(this->m, this->n);
558 fml::kernelfuns::kernel_diag<<<dim_grid, dim_block>>>(this->m, this->n, this->data, v.
data_ptr());
578 template <
typename REAL>
581 const len_t minmn = std::min(this->m, this->n);
584 fml::kernelfuns::kernel_antidiag<<<dim_grid, dim_block>>>(this->m, this->n, this->data, v.
data_ptr());
595 template <
typename REAL>
598 fml::kernelfuns::kernel_scale<<<dim_grid, dim_block>>>(s, this->m, this->n, this->data);
605 template <
typename REAL>
608 fml::kernelfuns::kernel_rev_rows<<<dim_grid, dim_block>>>(this->m, this->n, this->data);
615 template <
typename REAL>
618 fml::kernelfuns::kernel_rev_cols<<<dim_grid, dim_block>>>(this->m, this->n, this->data);
625 template <
typename REAL>
631 fml::kernelfuns::kernel_any_inf<<<dim_grid, dim_block>>>(this->m, this->n, this->data, has_inf_gpu.data_ptr());
633 has_inf_gpu.get_val(&has_inf);
636 return (
bool) has_inf;
641 template <
typename REAL>
647 fml::kernelfuns::kernel_any_nan<<<dim_grid, dim_block>>>(this->m, this->n, this->data, has_nan_gpu.data_ptr());
649 has_nan_gpu.get_val(&has_nan);
652 return (
bool) has_nan;
668 template <
typename REAL>
671 this->check_index(i);
674 this->c->mem_gpu2cpu(&ret, this->data + i,
sizeof(REAL));
686 template <
typename REAL>
689 this->check_index(i, j);
692 this->c->mem_gpu2cpu(&ret, this->data + (i + this->m*j),
sizeof(REAL));
706 template <
typename REAL>
709 this->check_index(i);
710 this->c->mem_cpu2gpu(this->data + i, &v,
sizeof(REAL));
722 template <
typename REAL>
725 this->check_index(i, j);
726 this->c->mem_cpu2gpu(this->data + (i + this->m*j), &v,
sizeof(REAL));
744 template <
typename REAL>
747 if (i < 0 || i >= this->m)
748 throw std::logic_error(
"invalid matrix row");
752 fml::kernelfuns::kernel_get_row<<<dim_grid, dim_block>>>(i, this->m, this->n, this->data, v.
data_ptr());
768 template <
typename REAL>
771 if (i < 0 || i >= this->m)
772 throw std::logic_error(
"invalid matrix row");
773 if (v.
size() != this->n)
774 throw std::runtime_error(
"non-conformable arguments");
776 fml::kernelfuns::kernel_set_row<<<dim_grid, dim_block>>>(i, this->m, this->n, this->data, v.
data_ptr());
795 template <
typename REAL>
798 if (j < 0 || j >= this->n)
799 throw std::logic_error(
"invalid matrix column");
803 fml::kernelfuns::kernel_get_col<<<dim_grid, dim_block>>>(j, this->m, this->n, this->data, v.
data_ptr());
819 template <
typename REAL>
822 if (i < 0 || i >= this->n)
823 throw std::logic_error(
"invalid matrix row");
824 if (v.
size() != this->m)
825 throw std::runtime_error(
"non-conformable arguments");
827 fml::kernelfuns::kernel_set_col<<<dim_grid, dim_block>>>(i, this->m, this->n, this->data, v.
data_ptr());
842 template <
typename T>
845 if (this->m != x.
nrows() || this->n != x.
ncols())
847 else if (this->c->get_id() != x.get_card()->get_id())
849 else if (this->data == x.
data_ptr())
855 fml::kernelfuns::kernel_all_eq<<<dim_grid, dim_block>>>(this->m, this->n, this->data, x.
data_ptr(), all_eq_gpu.data_ptr());
857 all_eq_gpu.get_val(&all_eq);
860 return (
bool) all_eq;
869 template <
typename T>
872 return !(*
this == x);
883 template <
typename REAL>
886 this->c = x.get_card();
892 this->free_data =
false;
902 template <
typename REAL>
905 if (this->free_data && this->data)
907 this->c->mem_free(this->data);
914 template <
typename REAL>
917 if (nrows < 0 || ncols < 0)
918 throw std::runtime_error(
"invalid dimensions");
923 template <
typename REAL>
926 if (!gpu->valid_card())
927 throw std::runtime_error(
"GPU card object is invalid");