5 #ifndef FML_GPU_GPUVEC_H
6 #define FML_GPU_GPUVEC_H
12 #include "../_internals/print.hh"
13 #include "../_internals/univec.hh"
15 #include "arch/arch.hh"
17 #include "internals/gpuscalar.hh"
18 #include "internals/kernelfuns.hh"
19 #include "internals/launcher.hh"
35 gpuvec(std::shared_ptr<card> gpu);
37 gpuvec(std::shared_ptr<card> gpu, T *data, len_t
size,
bool free_on_destruct=
false);
42 void resize(std::shared_ptr<card> gpu, len_t
size);
43 void inherit(std::shared_ptr<card> gpu);
44 void inherit(std::shared_ptr<card> gpu, T *data, len_t
size,
bool free_on_destruct=
false);
47 void print(uint8_t ndigits=4,
bool add_final_blank=
true)
const;
55 void scale(
const T s);
63 T
get(
const len_t i)
const;
64 void set(
const len_t i,
const T v);
70 std::shared_ptr<card> get_card()
const {
return c;};
71 dim3 get_blockdim()
const {
return dim_block;};
72 dim3 get_griddim()
const {
return dim_grid;};
75 std::shared_ptr<card> c;
82 void check_params(len_t
size);
83 void check_gpu(std::shared_ptr<card> gpu);
105 template <
typename T>
115 this->free_data =
true;
134 template <
typename T>
142 size_t len = (size_t) size *
sizeof(T);
143 this->data = (T*) this->c->mem_alloc(len);
147 dim_block = fml::kernel_launcher::dim_block1();
148 dim_grid = fml::kernel_launcher::dim_grid(this->_size);
150 this->free_data =
true;
168 template <
typename T>
179 dim_block = fml::kernel_launcher::dim_block1();
180 dim_grid = fml::kernel_launcher::dim_grid(this->_size);
182 this->free_data = free_on_destruct;
187 template <
typename REAL>
190 this->_size = x.
size();
193 dim_block = fml::kernel_launcher::dim_block1();
194 dim_grid = fml::kernel_launcher::dim_grid(this->_size);
196 this->c = x.get_card();
198 this->free_data =
false;
203 template <
typename T>
224 template <
typename T>
229 if (this->_size == size)
232 size_t len = (size_t) size *
sizeof(T);
235 realloc_ptr = (T*) this->c->mem_alloc(len);
237 size_t oldlen = (size_t) this->_size *
sizeof(T);
238 size_t copylen = std::min(len, oldlen);
239 this->c->mem_gpu2gpu(realloc_ptr, this->data, copylen);
240 this->c->mem_free(this->data);
241 this->data = realloc_ptr;
245 dim_block = fml::kernel_launcher::dim_block1();
246 dim_grid = fml::kernel_launcher::dim_grid(this->_size);
262 template <
typename T>
283 template <
typename T>
293 this->free_data =
true;
311 template <
typename T>
324 dim_block = fml::kernel_launcher::dim_block1();
325 dim_grid = fml::kernel_launcher::dim_grid(this->_size);
327 this->free_data = free_on_destruct;
333 template <
typename T>
338 size_t len = (size_t) this->_size *
sizeof(T);
339 this->c->mem_gpu2gpu(cpy.
data_ptr(), this->data, len);
354 template <
typename REAL>
357 for (
int i=0; i<this->_size; i++)
360 this->c->mem_gpu2cpu(&tmp, this->data + i,
sizeof(REAL));
361 this->printval(tmp, ndigits);
364 fml::print::putchar(
'\n');
366 fml::print::putchar(
'\n');
372 template <
typename T>
375 fml::print::printf(
"# gpuvec ");
376 fml::print::printf(
"%d ", this->_size);
377 fml::print::printf(
"type=%s ",
typeid(T).name());
378 fml::print::printf(
"\n");
386 template <
typename T>
389 size_t len = (size_t) this->_size *
sizeof(T);
390 this->c->mem_set(this->data, 0, len);
400 template <
typename T>
403 fml::kernelfuns::kernel_fill_val<<<dim_grid, dim_block>>>(v, this->_size, 1, this->data);
415 template <
typename T>
419 T stop = (T) (this->_size);
420 this->fill_linspace(start, stop);
423 template <
typename T>
426 fml::kernelfuns::kernel_fill_linspace<<<dim_grid, dim_block>>>(start, stop, this->_size, 1, this->data);
437 template <
typename T>
440 fml::kernelfuns::kernel_scale<<<dim_grid, dim_block>>>(s, this->_size, 1, this->data);
451 template <
typename T>
454 fml::kernelfuns::kernel_pow<<<dim_grid, dim_block>>>(p, this->_size, 1, this->data);
461 template <
typename T>
464 fml::kernelfuns::kernel_rev_rows<<<dim_grid, dim_block>>>(this->_size, 1, this->data);
471 template <
typename T>
477 fml::kernelfuns::kernel_sum<<<dim_grid, dim_block>>>(this->_size, this->data, s_gpu.data_ptr());
487 template <
typename T>
493 fml::kernelfuns::kernel_max<<<dim_grid, dim_block>>>(this->_size, this->data, mx_gpu.data_ptr());
503 template <
typename T>
509 fml::kernelfuns::kernel_min<<<dim_grid, dim_block>>>(this->_size, this->data, mn_gpu.data_ptr());
528 template <
typename T>
531 this->check_index(i);
534 this->c->mem_gpu2cpu(&ret, this->data + i,
sizeof(T));
547 template <
typename T>
550 this->check_index(i);
551 this->c->mem_cpu2gpu(this->data + i, &v,
sizeof(T));
564 template <
typename T>
567 if (this->_size != x.
size())
569 else if (this->c->get_id() != x.get_card()->get_id())
571 else if (this->data == x.
data_ptr())
577 fml::kernelfuns::kernel_all_eq<<<dim_grid, dim_block>>>(this->_size, 1, this->data, x.
data_ptr(), all_eq_gpu.data_ptr());
579 all_eq_gpu.get_val(&all_eq);
582 return (
bool) all_eq;
591 template <
typename T>
594 return !(*
this == x);
605 template <
typename T>
608 this->c = x.get_card();
609 this->_size = x.
size();
612 this->free_data =
false;
622 template <
typename T>
625 if (this->free_data && this->data)
627 this->c->mem_free(this->data);
634 template <
typename T>
638 throw std::runtime_error(
"invalid dimensions");
643 template <
typename T>
646 if (!gpu->valid_card())
647 throw std::runtime_error(
"GPU card object is invalid");