fml  0.1-0
Fused Matrix Library
gpumat.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_GPUMAT_H
6 #define FML_GPU_GPUMAT_H
7 #pragma once
8 
9 
10 #include <cstdint>
11 
12 #include "../_internals/print.hh"
13 #include "../_internals/rand.hh"
14 #include "../_internals/types.hh"
15 #include "../_internals/unimat.hh"
16 
17 #include "arch/arch.hh"
18 
19 #include "internals/gpuscalar.hh"
20 #include "internals/kernelfuns.hh"
21 #include "internals/launcher.hh"
22 
23 #include "card.hh"
24 #include "gpuvec.hh"
25 
26 
27 namespace fml
28 {
34  template <typename REAL>
35  class gpumat : public fml::unimat<REAL>
36  {
37  public:
38  gpumat();
39  gpumat(std::shared_ptr<card> gpu);
40  gpumat(std::shared_ptr<card> gpu, len_t nrows, len_t ncols);
41  gpumat(std::shared_ptr<card> gpu, REAL *data, len_t nrows, len_t ncols, bool free_on_destruct=false);
42  gpumat(const gpumat &x);
43  ~gpumat();
44 
45  void resize(len_t nrows, len_t ncols);
46  void resize(std::shared_ptr<card> gpu, len_t nrows, len_t ncols);
47  void inherit(std::shared_ptr<card> gpu, REAL *data, len_t nrows, len_t ncols, bool free_on_destruct=false);
48  gpumat<REAL> dupe() const;
49 
50  void print(uint8_t ndigits=4, bool add_final_blank=true) const;
51  void info() const;
52 
53  void fill_zero();
54  void fill_val(const REAL v);
55  void fill_linspace();
56  void fill_linspace(const REAL start, const REAL stop);
57  void fill_eye();
58  void fill_diag(const gpuvec<REAL> &v);
59  void fill_runif(const uint32_t seed, const REAL min=0, const REAL max=1);
60  void fill_runif(const REAL min=0, const REAL max=1);
61  void fill_rnorm(const uint32_t seed, const REAL mean=0, const REAL sd=1);
62  void fill_rnorm(const REAL mean=0, const REAL sd=1);
63 
64  void diag(gpuvec<REAL> &v);
65  void antidiag(gpuvec<REAL> &v);
66  void scale(const REAL s);
67  void rev_rows();
68  void rev_cols();
69 
70  bool any_inf() const;
71  bool any_nan() const;
72 
73  REAL get(const len_t i) const;
74  REAL get(const len_t i, const len_t j) const;
75  void set(const len_t i, const REAL v);
76  void set(const len_t i, const len_t j, const REAL v);
77  void get_row(const len_t i, gpuvec<REAL> &v) const;
78  void set_row(const len_t i, const gpuvec<REAL> &v);
79  void get_col(const len_t j, gpuvec<REAL> &v) const;
80  void set_col(const len_t j, const gpuvec<REAL> &v);
81 
82  bool operator==(const gpumat<REAL> &x) const;
83  bool operator!=(const gpumat<REAL> &x) const;
85 
86  std::shared_ptr<card> get_card() const {return c;};
87  dim3 get_blockdim() const {return dim_block;};
88  dim3 get_griddim() const {return dim_grid;};
89 
90  protected:
91  std::shared_ptr<card> c;
92 
93  private:
94  dim3 dim_block;
95  dim3 dim_grid;
96 
97  void free();
98  void check_params(len_t nrows, len_t ncols);
99  void check_gpu(std::shared_ptr<card> gpu);
100  };
101 }
102 
103 
104 
105 // -----------------------------------------------------------------------------
106 // public
107 // -----------------------------------------------------------------------------
108 
109 // constructors/destructor
110 
111 template <typename REAL>
113 {
114  this->c=NULL;
115  this->m=0;
116  this->n=0;
117  this->data=NULL;
118  this->free_data = false;
119 }
120 
131 template <typename REAL>
132 fml::gpumat<REAL>::gpumat(std::shared_ptr<fml::card> gpu)
133 {
134  check_gpu(gpu);
135 
136  this->c = gpu;
137 
138  this->m = 0;
139  this->n = 0;
140  this->data = NULL;
141 
142  this->free_data = true;
143 }
144 
145 
146 
161 template <typename REAL>
162 fml::gpumat<REAL>::gpumat(std::shared_ptr<fml::card> gpu, len_t nrows, len_t ncols)
163 {
164  check_params(nrows, ncols);
165  check_gpu(gpu);
166 
167  this->c = gpu;
168 
169  const size_t len = (size_t) nrows * ncols * sizeof(REAL);
170  this->data = (REAL*) this->c->mem_alloc(len);
171 
172  this->m = nrows;
173  this->n = ncols;
174 
175  dim_block = fml::kernel_launcher::dim_block2();
176  dim_grid = fml::kernel_launcher::dim_grid(this->m, this->n);
177 
178  this->free_data = true;
179 }
180 
181 
182 
197 template <typename REAL>
198 fml::gpumat<REAL>::gpumat(std::shared_ptr<fml::card> gpu, REAL *data_, len_t nrows, len_t ncols, bool free_on_destruct)
199 {
200  check_params(nrows, ncols);
201  check_gpu(gpu);
202 
203  this->c = gpu;
204 
205  this->m = nrows;
206  this->n = ncols;
207  this->data = data_;
208 
209  dim_block = fml::kernel_launcher::dim_block2();
210  dim_grid = fml::kernel_launcher::dim_grid(this->m, this->n);
211 
212  this->free_data = free_on_destruct;
213 }
214 
215 
216 
217 template <typename REAL>
218 fml::gpumat<REAL>::gpumat(const gpumat<REAL> &x)
219 {
220  this->m = x.nrows();
221  this->n = x.ncols();
222  this->data = x.data_ptr();
223 
224  dim_block = fml::kernel_launcher::dim_block2();
225  dim_grid = fml::kernel_launcher::dim_grid(this->m, this->n);
226 
227  this->c = x.get_card();
228 
229  this->free_data = false;
230 }
231 
232 
233 
234 template <typename REAL>
236 {
237  this->free();
238  c = NULL;
239 }
240 
241 
242 
243 // memory management
244 
255 template <typename REAL>
256 void fml::gpumat<REAL>::resize(len_t nrows, len_t ncols)
257 {
258  check_params(nrows, ncols);
259 
260  const size_t len = (size_t) nrows * ncols * sizeof(REAL);
261  const size_t oldlen = (size_t) this->m * this->n * sizeof(REAL);
262 
263  if (len == oldlen)
264  {
265  this->m = nrows;
266  this->n = ncols;
267  return;
268  }
269 
270  REAL *realloc_ptr;
271  realloc_ptr = (REAL*) this->c->mem_alloc(len);
272 
273  const size_t copylen = std::min(len, oldlen);
274  this->c->mem_gpu2gpu(realloc_ptr, this->data, copylen);
275  this->c->mem_free(this->data);
276  this->data = realloc_ptr;
277 
278  this->m = nrows;
279  this->n = ncols;
280 
281  dim_block = fml::kernel_launcher::dim_block2();
282  dim_grid = fml::kernel_launcher::dim_grid(this->m, this->n);
283 }
284 
285 
286 
298 template <typename REAL>
299 void fml::gpumat<REAL>::resize(std::shared_ptr<fml::card> gpu, len_t nrows, len_t ncols)
300 {
301  check_gpu(gpu);
302 
303  this->c = gpu;
304  this->resize(nrows, ncols);
305 }
306 
307 
308 
322 template <typename REAL>
323 void fml::gpumat<REAL>::inherit(std::shared_ptr<fml::card> gpu, REAL *data, len_t nrows, len_t ncols, bool free_on_destruct)
324 {
325  check_params(nrows, ncols);
326  check_gpu(gpu);
327 
328  this->free();
329 
330  this->c = gpu;
331 
332  this->m = nrows;
333  this->n = ncols;
334  this->data = data;
335 
336  dim_block = fml::kernel_launcher::dim_block2();
337  dim_grid = fml::kernel_launcher::dim_grid(this->m, this->n);
338 
339  this->free_data = free_on_destruct;
340 }
341 
342 
343 
345 template <typename REAL>
347 {
348  fml::gpumat<REAL> cpy(this->c, this->m, this->n);
349 
350  const size_t len = (size_t) this->m * this->n * sizeof(REAL);
351  this->c->mem_gpu2gpu(cpy.data_ptr(), this->data, len);
352 
353  return cpy;
354 }
355 
356 
357 
358 // printers
359 
366 template <typename REAL>
367 void fml::gpumat<REAL>::print(uint8_t ndigits, bool add_final_blank) const
368 {
369  for (int i=0; i<this->m; i++)
370  {
371  for (int j=0; j<this->n; j++)
372  {
373  REAL tmp;
374  this->c->mem_gpu2cpu(&tmp, this->data + (i + this->m*j), sizeof(REAL));
375  this->printval(tmp, ndigits);
376  }
377 
378  fml::print::putchar('\n');
379  }
380 
381  if (add_final_blank)
382  fml::print::putchar('\n');
383 }
384 
385 
386 
388 template <typename REAL>
390 {
391  fml::print::printf("# gpumat ");
392  fml::print::printf("%dx%d ", this->m, this->n);
393  fml::print::printf("type=%s ", typeid(REAL).name());
394  fml::print::printf("\n");
395 }
396 
397 
398 
399 // fillers
400 
402 template <typename REAL>
404 {
405  const size_t len = (size_t) this->m * this->n * sizeof(REAL);
406  this->c->mem_set(this->data, 0, len);
407 }
408 
409 
410 
416 template <typename REAL>
417 void fml::gpumat<REAL>::fill_val(const REAL v)
418 {
419  fml::kernelfuns::kernel_fill_val<<<dim_grid, dim_block>>>(v, this->m, this->n, this->data);
420  this->c->check();
421 }
422 
423 
424 
431 template <typename T>
433 {
434  T start = 1;
435  T stop = (T) (this->m * this->n);
436  this->fill_linspace(start, stop);
437 }
438 
439 template <typename REAL>
440 void fml::gpumat<REAL>::fill_linspace(REAL start, REAL stop)
441 {
442  // if (start == stop)
443  // this->fill_val(start);
444  // else
445  // {
446  fml::kernelfuns::kernel_fill_linspace<<<dim_grid, dim_block>>>(start, stop, this->m, this->n, this->data);
447  this->c->check();
448  // }
449 }
450 
451 
452 
454 template <typename REAL>
456 {
457  fml::kernelfuns::kernel_fill_eye<<<dim_grid, dim_block>>>(this->m, this->n, this->data);
458  this->c->check();
459 }
460 
461 
462 
472 template <typename REAL>
474 {
475  fml::kernelfuns::kernel_fill_diag<<<dim_grid, dim_block>>>(v.size(), v.data_ptr(), this->m, this->n, this->data);
476  this->c->check();
477 }
478 
479 
480 
492 template <typename REAL>
493 void fml::gpumat<REAL>::fill_runif(const uint32_t seed, const REAL min, const REAL max)
494 {
495  const size_t len = (size_t) this->m * this->n;
496  gpurand::gen_runif(seed, len, this->data);
497  fml::kernelfuns::kernel_fill_runif_update<<<dim_grid, dim_block>>>(min, max, this->m, this->n, this->data);
498  this->c->check();
499 }
500 
502 template <typename REAL>
503 void fml::gpumat<REAL>::fill_runif(const REAL min, const REAL max)
504 {
505  uint32_t seed = fml::rand::get_seed();
506  this->fill_runif(seed, min, max);
507 }
508 
509 
510 
522 template <typename REAL>
523 void fml::gpumat<REAL>::fill_rnorm(const uint32_t seed, const REAL mean, const REAL sd)
524 {
525  const size_t len = (size_t) this->m * this->n;
526  gpurand::gen_rnorm(seed, mean, sd, len, this->data);
527 }
528 
530 template <typename REAL>
531 void fml::gpumat<REAL>::fill_rnorm(const REAL mean, const REAL sd)
532 {
533  uint32_t seed = fml::rand::get_seed();
534  this->fill_rnorm(mean, sd);
535 }
536 
537 
538 
552 template <typename REAL>
554 {
555  const len_t minmn = std::min(this->m, this->n);
556  v.resize(minmn);
557 
558  fml::kernelfuns::kernel_diag<<<dim_grid, dim_block>>>(this->m, this->n, this->data, v.data_ptr());
559  this->c->check();
560 }
561 
562 
563 
578 template <typename REAL>
580 {
581  const len_t minmn = std::min(this->m, this->n);
582  v.resize(minmn);
583 
584  fml::kernelfuns::kernel_antidiag<<<dim_grid, dim_block>>>(this->m, this->n, this->data, v.data_ptr());
585  this->c->check();
586 }
587 
588 
589 
595 template <typename REAL>
596 void fml::gpumat<REAL>::scale(const REAL s)
597 {
598  fml::kernelfuns::kernel_scale<<<dim_grid, dim_block>>>(s, this->m, this->n, this->data);
599  this->c->check();
600 }
601 
602 
603 
605 template <typename REAL>
607 {
608  fml::kernelfuns::kernel_rev_rows<<<dim_grid, dim_block>>>(this->m, this->n, this->data);
609  this->c->check();
610 }
611 
612 
613 
615 template <typename REAL>
617 {
618  fml::kernelfuns::kernel_rev_cols<<<dim_grid, dim_block>>>(this->m, this->n, this->data);
619  this->c->check();
620 }
621 
622 
623 
625 template <typename REAL>
627 {
628  int has_inf = 0;
629  gpuscalar<int> has_inf_gpu(c, has_inf);
630 
631  fml::kernelfuns::kernel_any_inf<<<dim_grid, dim_block>>>(this->m, this->n, this->data, has_inf_gpu.data_ptr());
632 
633  has_inf_gpu.get_val(&has_inf);
634  this->c->check();
635 
636  return (bool) has_inf;
637 }
638 
639 
640 
641 template <typename REAL>
642 bool fml::gpumat<REAL>::any_nan() const
643 {
644  int has_nan = 0;
645  gpuscalar<int> has_nan_gpu(c, has_nan);
646 
647  fml::kernelfuns::kernel_any_nan<<<dim_grid, dim_block>>>(this->m, this->n, this->data, has_nan_gpu.data_ptr());
648 
649  has_nan_gpu.get_val(&has_nan);
650  this->c->check();
651 
652  return (bool) has_nan;
653 }
654 
655 
656 
657 // operators
658 
668 template <typename REAL>
669 REAL fml::gpumat<REAL>::get(const len_t i) const
670 {
671  this->check_index(i);
672 
673  REAL ret;
674  this->c->mem_gpu2cpu(&ret, this->data + i, sizeof(REAL));
675  return ret;
676 }
677 
686 template <typename REAL>
687 REAL fml::gpumat<REAL>::get(const len_t i, const len_t j) const
688 {
689  this->check_index(i, j);
690 
691  REAL ret;
692  this->c->mem_gpu2cpu(&ret, this->data + (i + this->m*j), sizeof(REAL));
693  return ret;
694 }
695 
706 template <typename REAL>
707 void fml::gpumat<REAL>::set(const len_t i, const REAL v)
708 {
709  this->check_index(i);
710  this->c->mem_cpu2gpu(this->data + i, &v, sizeof(REAL));
711 }
712 
722 template <typename REAL>
723 void fml::gpumat<REAL>::set(const len_t i, const len_t j, const REAL v)
724 {
725  this->check_index(i, j);
726  this->c->mem_cpu2gpu(this->data + (i + this->m*j), &v, sizeof(REAL));
727 }
728 
729 
730 
744 template <typename REAL>
745 void fml::gpumat<REAL>::get_row(const len_t i, fml::gpuvec<REAL> &v) const
746 {
747  if (i < 0 || i >= this->m)
748  throw std::logic_error("invalid matrix row");
749 
750  v.resize(this->n);
751 
752  fml::kernelfuns::kernel_get_row<<<dim_grid, dim_block>>>(i, this->m, this->n, this->data, v.data_ptr());
753  this->c->check();
754 }
755 
756 
757 
768 template <typename REAL>
769 void fml::gpumat<REAL>::set_row(const len_t i, const fml::gpuvec<REAL> &v)
770 {
771  if (i < 0 || i >= this->m)
772  throw std::logic_error("invalid matrix row");
773  if (v.size() != this->n)
774  throw std::runtime_error("non-conformable arguments");
775 
776  fml::kernelfuns::kernel_set_row<<<dim_grid, dim_block>>>(i, this->m, this->n, this->data, v.data_ptr());
777  this->c->check();
778 }
779 
780 
781 
795 template <typename REAL>
796 void fml::gpumat<REAL>::get_col(const len_t j, fml::gpuvec<REAL> &v) const
797 {
798  if (j < 0 || j >= this->n)
799  throw std::logic_error("invalid matrix column");
800 
801  v.resize(this->m);
802 
803  fml::kernelfuns::kernel_get_col<<<dim_grid, dim_block>>>(j, this->m, this->n, this->data, v.data_ptr());
804  this->c->check();
805 }
806 
807 
808 
819 template <typename REAL>
820 void fml::gpumat<REAL>::set_col(const len_t i, const fml::gpuvec<REAL> &v)
821 {
822  if (i < 0 || i >= this->n)
823  throw std::logic_error("invalid matrix row");
824  if (v.size() != this->m)
825  throw std::runtime_error("non-conformable arguments");
826 
827  fml::kernelfuns::kernel_set_col<<<dim_grid, dim_block>>>(i, this->m, this->n, this->data, v.data_ptr());
828  this->c->check();
829 }
830 
831 
832 
842 template <typename T>
844 {
845  if (this->m != x.nrows() || this->n != x.ncols())
846  return false;
847  else if (this->c->get_id() != x.get_card()->get_id())
848  return false;
849  else if (this->data == x.data_ptr())
850  return true;
851 
852  int all_eq = 1;
853  fml::gpuscalar<int> all_eq_gpu(c, all_eq);
854 
855  fml::kernelfuns::kernel_all_eq<<<dim_grid, dim_block>>>(this->m, this->n, this->data, x.data_ptr(), all_eq_gpu.data_ptr());
856 
857  all_eq_gpu.get_val(&all_eq);
858  this->c->check();
859 
860  return (bool) all_eq;
861 }
862 
869 template <typename T>
871 {
872  return !(*this == x);
873 }
874 
875 
876 
883 template <typename REAL>
885 {
886  this->c = x.get_card();
887 
888  this->m = x.nrows();
889  this->n = x.ncols();
890  this->data = x.data_ptr();
891 
892  this->free_data = false;
893  return *this;
894 }
895 
896 
897 
898 // -----------------------------------------------------------------------------
899 // private
900 // -----------------------------------------------------------------------------
901 
902 template <typename REAL>
904 {
905  if (this->free_data && this->data)
906  {
907  this->c->mem_free(this->data);
908  this->data = NULL;
909  }
910 }
911 
912 
913 
914 template <typename REAL>
915 void fml::gpumat<REAL>::check_params(len_t nrows, len_t ncols)
916 {
917  if (nrows < 0 || ncols < 0)
918  throw std::runtime_error("invalid dimensions");
919 }
920 
921 
922 
923 template <typename REAL>
924 void fml::gpumat<REAL>::check_gpu(std::shared_ptr<fml::card> gpu)
925 {
926  if (!gpu->valid_card())
927  throw std::runtime_error("GPU card object is invalid");
928 }
929 
930 
931 #endif
fml::gpumat::fill_diag
void fill_diag(const gpuvec< REAL > &v)
Set diagonal entries of the matrix to those in the vector.
Definition: gpumat.hh:473
fml::unimat
Base matrix class. Not meant for direct use. Instead see cpumat, gpumat, and mpimat.
Definition: unimat.hh:30
fml::gpumat::print
void print(uint8_t ndigits=4, bool add_final_blank=true) const
Print all values in the object.
Definition: gpumat.hh:367
fml::gpumat::set_row
void set_row(const len_t i, const gpuvec< REAL > &v)
Set the specified row.
Definition: gpumat.hh:769
fml::gpumat::set
void set(const len_t i, const REAL v)
Set the storage at the specified index with the provided value.
Definition: gpumat.hh:707
fml::gpumat::fill_val
void fill_val(const REAL v)
Set all values to input value.
Definition: gpumat.hh:417
fml::gpumat::get
REAL get(const len_t i) const
Get the specified value.
Definition: gpumat.hh:669
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::gpuvec
Vector class for data held on a single GPU.
Definition: gpuvec.hh:32
fml::gpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: gpuvec.hh:225
fml::gpumat::operator=
gpumat< REAL > & operator=(const gpumat< REAL > &x)
Operator that sets the LHS to a shallow copy of the input. Desctruction of the LHS object will not re...
Definition: gpumat.hh:884
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::gpumat::set_col
void set_col(const len_t j, const gpuvec< REAL > &v)
Set the specified column.
Definition: gpumat.hh:820
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::gpumat::operator!=
bool operator!=(const gpumat< REAL > &x) const
See if the two objects are not the same. Uses same internal logic as the == method.
Definition: gpumat.hh:870
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml::gpumat::inherit
void inherit(std::shared_ptr< card > gpu, REAL *data, len_t nrows, len_t ncols, bool free_on_destruct=false)
Set the internal object storage to the specified array.
Definition: gpumat.hh:323
fml::gpumat::any_inf
bool any_inf() const
Are any values infinite?
Definition: gpumat.hh:626
fml::gpumat::dupe
gpumat< REAL > dupe() const
Duplicate the object in a deep copy.
Definition: gpumat.hh:346
fml::gpumat::fill_eye
void fill_eye()
Set diagonal entries to 1 and non-diagonal entries to 0.
Definition: gpumat.hh:455
fml::gpumat::get_row
void get_row(const len_t i, gpuvec< REAL > &v) const
Get the specified row.
Definition: gpumat.hh:745
fml
Core namespace.
Definition: dimops.hh:10
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::gpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: gpumat.hh:256
fml::gpumat::scale
void scale(const REAL s)
Multiply all values by the input value.
Definition: gpumat.hh:596
fml::gpumat::fill_zero
void fill_zero()
Set all values to zero.
Definition: gpumat.hh:403
fml::gpumat::fill_rnorm
void fill_rnorm(const uint32_t seed, const REAL mean=0, const REAL sd=1)
Set diagonal entries to 1 and non-diagonal entries to 0.
Definition: gpumat.hh:523
fml::gpumat::get_col
void get_col(const len_t j, gpuvec< REAL > &v) const
Get the specified column.
Definition: gpumat.hh:796
fml::gpumat
Matrix class for data held on a single GPU.
Definition: gpumat.hh:35
fml::gpumat::fill_linspace
void fill_linspace()
Set values to linearly spaced numbers.
Definition: gpumat.hh:432
fml::gpuscalar
Definition: gpuscalar.hh:16
fml::gpumat::fill_runif
void fill_runif(const uint32_t seed, const REAL min=0, const REAL max=1)
Set diagonal entries to 1 and non-diagonal entries to 0.
Definition: gpumat.hh:493
fml::gpumat::rev_rows
void rev_rows()
Reverse the rows of the matrix.
Definition: gpumat.hh:606
fml::gpumat::diag
void diag(gpuvec< REAL > &v)
Get the diagonal entries.
Definition: gpumat.hh:553
fml::gpumat::antidiag
void antidiag(gpuvec< REAL > &v)
Get the anti-diagonal entries, i.e. those on the bottom-left to top-right.
Definition: gpumat.hh:579
fml::gpumat::operator==
bool operator==(const gpumat< REAL > &x) const
See if the two objects are the same.
Definition: gpumat.hh:843
fml::gpumat::rev_cols
void rev_cols()
Reverse the columns of the matrix.
Definition: gpumat.hh:616
fml::gpumat::info
void info() const
Print some brief information about the object.
Definition: gpumat.hh:389