fml  0.1-0
Fused Matrix Library
gpuvec.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_GPUVEC_H
6 #define FML_GPU_GPUVEC_H
7 #pragma once
8 
9 
10 #include <cstdint>
11 
12 #include "../_internals/print.hh"
13 #include "../_internals/univec.hh"
14 
15 #include "arch/arch.hh"
16 
17 #include "internals/gpuscalar.hh"
18 #include "internals/kernelfuns.hh"
19 #include "internals/launcher.hh"
20 
21 #include "card.hh"
22 
23 
24 namespace fml
25 {
31  template <typename T>
32  class gpuvec : public fml::univec<T>
33  {
34  public:
35  gpuvec(std::shared_ptr<card> gpu);
36  gpuvec(std::shared_ptr<card> gpu, len_t size);
37  gpuvec(std::shared_ptr<card> gpu, T *data, len_t size, bool free_on_destruct=false);
38  gpuvec(const gpuvec &x);
39  ~gpuvec();
40 
41  void resize(len_t size);
42  void resize(std::shared_ptr<card> gpu, len_t size);
43  void inherit(std::shared_ptr<card> gpu);
44  void inherit(std::shared_ptr<card> gpu, T *data, len_t size, bool free_on_destruct=false);
45  gpuvec<T> dupe() const;
46 
47  void print(uint8_t ndigits=4, bool add_final_blank=true) const;
48  void info() const;
49 
50  void fill_zero();
51  void fill_val(const T v);
52  void fill_linspace();
53  void fill_linspace(const T start, const T stop);
54 
55  void scale(const T s);
56  void pow(const T p);
57  void rev();
58 
59  T sum() const;
60  T max() const;
61  T min() const;
62 
63  T get(const len_t i) const;
64  void set(const len_t i, const T v);
65 
66  bool operator==(const gpuvec<T> &x) const;
67  bool operator!=(const gpuvec<T> &x) const;
68  gpuvec<T>& operator=(const gpuvec<T> &x);
69 
70  std::shared_ptr<card> get_card() const {return c;};
71  dim3 get_blockdim() const {return dim_block;};
72  dim3 get_griddim() const {return dim_grid;};
73 
74  protected:
75  std::shared_ptr<card> c;
76 
77  private:
78  dim3 dim_block;
79  dim3 dim_grid;
80 
81  void free();
82  void check_params(len_t size);
83  void check_gpu(std::shared_ptr<card> gpu);
84  };
85 }
86 
87 
88 
89 // -----------------------------------------------------------------------------
90 // public
91 // -----------------------------------------------------------------------------
92 
93 // constructors/destructor
94 
105 template <typename T>
106 fml::gpuvec<T>::gpuvec(std::shared_ptr<fml::card> gpu)
107 {
108  check_gpu(gpu);
109 
110  this->c = gpu;
111 
112  this->_size = 0;
113  this->data = NULL;
114 
115  this->free_data = true;
116 }
117 
118 
119 
134 template <typename T>
135 fml::gpuvec<T>::gpuvec(std::shared_ptr<fml::card> gpu, len_t size)
136 {
137  check_params(size);
138  check_gpu(gpu);
139 
140  this->c = gpu;
141 
142  size_t len = (size_t) size * sizeof(T);
143  this->data = (T*) this->c->mem_alloc(len);
144 
145  this->_size = size;
146 
147  dim_block = fml::kernel_launcher::dim_block1();
148  dim_grid = fml::kernel_launcher::dim_grid(this->_size);
149 
150  this->free_data = true;
151 }
152 
153 
154 
168 template <typename T>
169 fml::gpuvec<T>::gpuvec(std::shared_ptr<fml::card> gpu, T *data_, len_t size, bool free_on_destruct)
170 {
171  check_params(size);
172  check_gpu(gpu);
173 
174  this->c = gpu;
175 
176  this->_size = size;
177  this->data = data_;
178 
179  dim_block = fml::kernel_launcher::dim_block1();
180  dim_grid = fml::kernel_launcher::dim_grid(this->_size);
181 
182  this->free_data = free_on_destruct;
183 }
184 
185 
186 
187 template <typename REAL>
189 {
190  this->_size = x.size();
191  this->data = x.data_ptr();
192 
193  dim_block = fml::kernel_launcher::dim_block1();
194  dim_grid = fml::kernel_launcher::dim_grid(this->_size);
195 
196  this->c = x.get_card();
197 
198  this->free_data = false;
199 }
200 
201 
202 
203 template <typename T>
205 {
206  this->free();
207  c = NULL;
208 }
209 
210 
211 
212 // memory management
213 
224 template <typename T>
225 void fml::gpuvec<T>::resize(len_t size)
226 {
227  check_params(size);
228 
229  if (this->_size == size)
230  return;
231 
232  size_t len = (size_t) size * sizeof(T);
233 
234  T *realloc_ptr;
235  realloc_ptr = (T*) this->c->mem_alloc(len);
236 
237  size_t oldlen = (size_t) this->_size * sizeof(T);
238  size_t copylen = std::min(len, oldlen);
239  this->c->mem_gpu2gpu(realloc_ptr, this->data, copylen);
240  this->c->mem_free(this->data);
241  this->data = realloc_ptr;
242 
243  this->_size = size;
244 
245  dim_block = fml::kernel_launcher::dim_block1();
246  dim_grid = fml::kernel_launcher::dim_grid(this->_size);
247 }
248 
249 
250 
262 template <typename T>
263 void fml::gpuvec<T>::resize(std::shared_ptr<fml::card> gpu, len_t size)
264 {
265  check_gpu(gpu);
266 
267  this->free();
268 
269  this->c = gpu;
270  this->resize(size);
271 }
272 
273 
274 
283 template <typename T>
284 void fml::gpuvec<T>::inherit(std::shared_ptr<fml::card> gpu)
285 {
286  check_gpu(gpu);
287 
288  this->c = gpu;
289 
290  this->_size = 0;
291  this->data = NULL;
292 
293  this->free_data = true;
294 }
295 
296 
297 
311 template <typename T>
312 void fml::gpuvec<T>::inherit(std::shared_ptr<fml::card> gpu, T *data, len_t size, bool free_on_destruct)
313 {
314  check_params(size);
315  check_gpu(gpu);
316 
317  this->free();
318 
319  this->c = gpu;
320 
321  this->_size = size;
322  this->data = data;
323 
324  dim_block = fml::kernel_launcher::dim_block1();
325  dim_grid = fml::kernel_launcher::dim_grid(this->_size);
326 
327  this->free_data = free_on_destruct;
328 }
329 
330 
331 
333 template <typename T>
335 {
336  fml::gpuvec<T> cpy(this->c, this->_size);
337 
338  size_t len = (size_t) this->_size * sizeof(T);
339  this->c->mem_gpu2gpu(cpy.data_ptr(), this->data, len);
340 
341  return cpy;
342 }
343 
344 
345 
346 // printers
347 
354 template <typename REAL>
355 void fml::gpuvec<REAL>::print(uint8_t ndigits, bool add_final_blank) const
356 {
357  for (int i=0; i<this->_size; i++)
358  {
359  REAL tmp;
360  this->c->mem_gpu2cpu(&tmp, this->data + i, sizeof(REAL));
361  this->printval(tmp, ndigits);
362  }
363 
364  fml::print::putchar('\n');
365  if (add_final_blank)
366  fml::print::putchar('\n');
367 }
368 
369 
370 
372 template <typename T>
374 {
375  fml::print::printf("# gpuvec ");
376  fml::print::printf("%d ", this->_size);
377  fml::print::printf("type=%s ", typeid(T).name());
378  fml::print::printf("\n");
379 }
380 
381 
382 
383 // fillers
384 
386 template <typename T>
388 {
389  size_t len = (size_t) this->_size * sizeof(T);
390  this->c->mem_set(this->data, 0, len);
391 }
392 
393 
394 
400 template <typename T>
402 {
403  fml::kernelfuns::kernel_fill_val<<<dim_grid, dim_block>>>(v, this->_size, 1, this->data);
404  this->c->check();
405 }
406 
407 
408 
415 template <typename T>
417 {
418  T start = 1;
419  T stop = (T) (this->_size);
420  this->fill_linspace(start, stop);
421 }
422 
423 template <typename T>
424 void fml::gpuvec<T>::fill_linspace(const T start, const T stop)
425 {
426  fml::kernelfuns::kernel_fill_linspace<<<dim_grid, dim_block>>>(start, stop, this->_size, 1, this->data);
427  this->c->check();
428 }
429 
430 
431 
437 template <typename T>
438 void fml::gpuvec<T>::scale(const T s)
439 {
440  fml::kernelfuns::kernel_scale<<<dim_grid, dim_block>>>(s, this->_size, 1, this->data);
441  this->c->check();
442 }
443 
444 
445 
451 template <typename T>
452 void fml::gpuvec<T>::pow(const T p)
453 {
454  fml::kernelfuns::kernel_pow<<<dim_grid, dim_block>>>(p, this->_size, 1, this->data);
455  this->c->check();
456 }
457 
458 
459 
461 template <typename T>
463 {
464  fml::kernelfuns::kernel_rev_rows<<<dim_grid, dim_block>>>(this->_size, 1, this->data);
465  this->c->check();
466 }
467 
468 
469 
471 template <typename T>
473 {
474  T s = 0;
475  fml::gpuscalar<T> s_gpu(c, s);
476 
477  fml::kernelfuns::kernel_sum<<<dim_grid, dim_block>>>(this->_size, this->data, s_gpu.data_ptr());
478  s_gpu.get_val(&s);
479  this->c->check();
480 
481  return s;
482 }
483 
484 
485 
487 template <typename T>
489 {
490  T mx = 0;
491  fml::gpuscalar<T> mx_gpu(c, mx);
492 
493  fml::kernelfuns::kernel_max<<<dim_grid, dim_block>>>(this->_size, this->data, mx_gpu.data_ptr());
494  mx_gpu.get_val(&mx);
495  this->c->check();
496 
497  return mx;
498 }
499 
500 
501 
503 template <typename T>
505 {
506  T mn;
507  fml::gpuscalar<T> mn_gpu(c);
508 
509  fml::kernelfuns::kernel_min<<<dim_grid, dim_block>>>(this->_size, this->data, mn_gpu.data_ptr());
510  mn_gpu.get_val(&mn);
511  this->c->check();
512 
513  return mn;
514 }
515 
516 
517 
518 // operators
519 
528 template <typename T>
529 T fml::gpuvec<T>::get(const len_t i) const
530 {
531  this->check_index(i);
532 
533  T ret;
534  this->c->mem_gpu2cpu(&ret, this->data + i, sizeof(T));
535  return ret;
536 }
537 
547 template <typename T>
548 void fml::gpuvec<T>::set(const len_t i, const T v)
549 {
550  this->check_index(i);
551  this->c->mem_cpu2gpu(this->data + i, &v, sizeof(T));
552 }
553 
554 
555 
564 template <typename T>
566 {
567  if (this->_size != x.size())
568  return false;
569  else if (this->c->get_id() != x.get_card()->get_id())
570  return false;
571  else if (this->data == x.data_ptr())
572  return true;
573 
574  int all_eq = 1;
575  fml::gpuscalar<int> all_eq_gpu(c, all_eq);
576 
577  fml::kernelfuns::kernel_all_eq<<<dim_grid, dim_block>>>(this->_size, 1, this->data, x.data_ptr(), all_eq_gpu.data_ptr());
578 
579  all_eq_gpu.get_val(&all_eq);
580  this->c->check();
581 
582  return (bool) all_eq;
583 }
584 
591 template <typename T>
593 {
594  return !(*this == x);
595 }
596 
597 
598 
605 template <typename T>
607 {
608  this->c = x.get_card();
609  this->_size = x.size();
610  this->data = x.data_ptr();
611 
612  this->free_data = false;
613  return *this;
614 }
615 
616 
617 
618 // -----------------------------------------------------------------------------
619 // private
620 // -----------------------------------------------------------------------------
621 
622 template <typename T>
624 {
625  if (this->free_data && this->data)
626  {
627  this->c->mem_free(this->data);
628  this->data = NULL;
629  }
630 }
631 
632 
633 
634 template <typename T>
635 void fml::gpuvec<T>::check_params(len_t size)
636 {
637  if (size < 0)
638  throw std::runtime_error("invalid dimensions");
639 }
640 
641 
642 
643 template <typename T>
644 void fml::gpuvec<T>::check_gpu(std::shared_ptr<fml::card> gpu)
645 {
646  if (!gpu->valid_card())
647  throw std::runtime_error("GPU card object is invalid");
648 }
649 
650 
651 #endif
fml::gpuvec::fill_val
void fill_val(const T v)
Set all values to input value.
Definition: gpuvec.hh:401
fml::univec
Base vector class. Not meant for direct use. Instead see cpuvec and gpuvec.
Definition: univec.hh:22
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::gpuvec
Vector class for data held on a single GPU.
Definition: gpuvec.hh:32
fml::gpuvec::fill_zero
void fill_zero()
Set all values to zero.
Definition: gpuvec.hh:387
fml::gpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: gpuvec.hh:225
fml::gpuvec::operator==
bool operator==(const gpuvec< T > &x) const
See if the two objects are the same.
Definition: gpuvec.hh:565
fml::gpuvec::max
T max() const
Maximum value of the vector.
Definition: gpuvec.hh:488
fml::gpuvec::scale
void scale(const T s)
Multiply all values by the input value.
Definition: gpuvec.hh:438
fml::gpuvec::get
T get(const len_t i) const
Get the specified value.
Definition: gpuvec.hh:529
fml::gpuvec::dupe
gpuvec< T > dupe() const
Duplicate the object in a deep copy.
Definition: gpuvec.hh:334
fml::gpuvec::rev
void rev()
Reverse the vector.
Definition: gpuvec.hh:462
fml
Core namespace.
Definition: dimops.hh:10
fml::gpuvec::info
void info() const
Print some brief information about the object.
Definition: gpuvec.hh:373
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::gpuvec::print
void print(uint8_t ndigits=4, bool add_final_blank=true) const
Copy data from a CPU object to another.
Definition: gpuvec.hh:355
fml::gpuvec::fill_linspace
void fill_linspace()
Set values to linearly spaced numbers.
Definition: gpuvec.hh:416
fml::gpuvec::min
T min() const
Minimum value of the vector.
Definition: gpuvec.hh:504
fml::gpuvec::operator!=
bool operator!=(const gpuvec< T > &x) const
See if the two objects are not the same. Uses same internal logic as the == method.
Definition: gpuvec.hh:592
fml::gpuvec::sum
T sum() const
Sum the vector.
Definition: gpuvec.hh:472
fml::gpuscalar
Definition: gpuscalar.hh:16
fml::gpuvec::operator=
gpuvec< T > & operator=(const gpuvec< T > &x)
Operator that sets the LHS to a shallow copy of the input. Desctruction of the LHS object will not re...
Definition: gpuvec.hh:606
fml::gpuvec::pow
void pow(const T p)
Raise every value of the vector to the given power.
Definition: gpuvec.hh:452
fml::gpuvec::set
void set(const len_t i, const T v)
Set the storage at the specified index with the provided value.
Definition: gpuvec.hh:548