fml/html/gpuvec_8hh_source.html

// This file is part of fml which is released under the Boost Software

// License, Version 1.0. See accompanying file LICENSE or copy at

// https://www.boost.org/LICENSE_1_0.txt


#ifndef FML_GPU_GPUVEC_H

#define FML_GPU_GPUVEC_H

#pragma once


#include <cstdint>


#include "../_internals/print.hh"

#include "../_internals/univec.hh"


#include "arch/arch.hh"


#include "internals/gpuscalar.hh"

#include "internals/kernelfuns.hh"

#include "internals/launcher.hh"


#include "card.hh"


namespace fml

{

  template <typename T>

  class gpuvec : public fml::univec<T>

  {

    public:

      gpuvec(std::shared_ptr<card> gpu);

      gpuvec(std::shared_ptr<card> gpu, len_t size);

      gpuvec(std::shared_ptr<card> gpu, T *data, len_t size, bool free_on_destruct=false);

      gpuvec(const gpuvec &x);

      ~gpuvec();


      void resize(len_t size);

      void resize(std::shared_ptr<card> gpu, len_t size);

      void inherit(std::shared_ptr<card> gpu);

      void inherit(std::shared_ptr<card> gpu, T *data, len_t size, bool free_on_destruct=false);

      gpuvec<T> dupe() const;


      void print(uint8_t ndigits=4, bool add_final_blank=true) const;

      void info() const;


      void fill_zero();

      void fill_val(const T v);

      void fill_linspace();

      void fill_linspace(const T start, const T stop);


      void scale(const T s);

      void pow(const T p);

      void rev();


      T sum() const;

      T max() const;

      T min() const;


      T get(const len_t i) const;

      void set(const len_t i, const T v);


      bool operator==(const gpuvec<T> &x) const;

      bool operator!=(const gpuvec<T> &x) const;

      gpuvec<T>& operator=(const gpuvec<T> &x);


      std::shared_ptr<card> get_card() const {return c;};

      dim3 get_blockdim() const {return dim_block;};

      dim3 get_griddim() const {return dim_grid;};


    protected:

      std::shared_ptr<card> c;


    private:

      dim3 dim_block;

      dim3 dim_grid;


      void free();

      void check_params(len_t size);

      void check_gpu(std::shared_ptr<card> gpu);

  };

}


// -----------------------------------------------------------------------------

// public

// -----------------------------------------------------------------------------


// constructors/destructor


template <typename T>

fml::gpuvec<T>::gpuvec(std::shared_ptr<fml::card> gpu)

{

  check_gpu(gpu);


  this->c = gpu;


  this->_size = 0;

  this->data = NULL;


  this->free_data = true;

}


template <typename T>

fml::gpuvec<T>::gpuvec(std::shared_ptr<fml::card> gpu, len_t size)

{

  check_params(size);

  check_gpu(gpu);


  this->c = gpu;


  size_t len = (size_t) size * sizeof(T);

  this->data = (T*) this->c->mem_alloc(len);


  this->_size = size;


  dim_block = fml::kernel_launcher::dim_block1();

  dim_grid = fml::kernel_launcher::dim_grid(this->_size);


  this->free_data = true;

}


template <typename T>

fml::gpuvec<T>::gpuvec(std::shared_ptr<fml::card> gpu, T *data_, len_t size, bool free_on_destruct)

{

  check_params(size);

  check_gpu(gpu);


  this->c = gpu;


  this->_size = size;

  this->data = data_;


  dim_block = fml::kernel_launcher::dim_block1();

  dim_grid = fml::kernel_launcher::dim_grid(this->_size);


  this->free_data = free_on_destruct;

}


template <typename REAL>

fml::gpuvec<REAL>::gpuvec(const fml::gpuvec<REAL> &x)

{

  this->_size = x.size();

  this->data = x.data_ptr();


  dim_block = fml::kernel_launcher::dim_block1();

  dim_grid = fml::kernel_launcher::dim_grid(this->_size);


  this->c = x.get_card();


  this->free_data = false;

}


template <typename T>

fml::gpuvec<T>::~gpuvec()

{

  this->free();

  c = NULL;

}


// memory management


template <typename T>

void fml::gpuvec<T>::resize(len_t size)

{

  check_params(size);


  if (this->_size == size)

    return;


  size_t len = (size_t) size * sizeof(T);


  T *realloc_ptr;

  realloc_ptr = (T*) this->c->mem_alloc(len);


  size_t oldlen = (size_t) this->_size * sizeof(T);

  size_t copylen = std::min(len, oldlen);

  this->c->mem_gpu2gpu(realloc_ptr, this->data, copylen);

  this->c->mem_free(this->data);

  this->data = realloc_ptr;


  this->_size = size;


  dim_block = fml::kernel_launcher::dim_block1();

  dim_grid = fml::kernel_launcher::dim_grid(this->_size);

}


template <typename T>

void fml::gpuvec<T>::resize(std::shared_ptr<fml::card> gpu, len_t size)

{

  check_gpu(gpu);


  this->free();


  this->c = gpu;

  this->resize(size);

}


template <typename T>

void fml::gpuvec<T>::inherit(std::shared_ptr<fml::card> gpu)

{

  check_gpu(gpu);


  this->c = gpu;


  this->_size = 0;

  this->data = NULL;


  this->free_data = true;

}


template <typename T>

void fml::gpuvec<T>::inherit(std::shared_ptr<fml::card> gpu, T *data, len_t size, bool free_on_destruct)

{

  check_params(size);

  check_gpu(gpu);


  this->free();


  this->c = gpu;


  this->_size = size;

  this->data = data;


  dim_block = fml::kernel_launcher::dim_block1();

  dim_grid = fml::kernel_launcher::dim_grid(this->_size);


  this->free_data = free_on_destruct;

}


template <typename T>

fml::gpuvec<T> fml::gpuvec<T>::dupe() const

{

  fml::gpuvec<T> cpy(this->c, this->_size);


  size_t len = (size_t) this->_size * sizeof(T);

  this->c->mem_gpu2gpu(cpy.data_ptr(), this->data, len);


  return cpy;

}


// printers


template <typename REAL>

void fml::gpuvec<REAL>::print(uint8_t ndigits, bool add_final_blank) const

{

  for (int i=0; i<this->_size; i++)

  {

    REAL tmp;

    this->c->mem_gpu2cpu(&tmp, this->data + i, sizeof(REAL));

    this->printval(tmp, ndigits);

  }


  fml::print::putchar('\n');

  if (add_final_blank)

    fml::print::putchar('\n');

}


template <typename T>

void fml::gpuvec<T>::info() const

{

  fml::print::printf("# gpuvec ");

  fml::print::printf("%d ", this->_size);

  fml::print::printf("type=%s ", typeid(T).name());

  fml::print::printf("\n");

}


// fillers


template <typename T>

void fml::gpuvec<T>::fill_zero()

{

  size_t len = (size_t) this->_size * sizeof(T);

  this->c->mem_set(this->data, 0, len);

}


template <typename T>

void fml::gpuvec<T>::fill_val(const T v)

{

  fml::kernelfuns::kernel_fill_val<<<dim_grid, dim_block>>>(v, this->_size, 1, this->data);

  this->c->check();

}


template <typename T>

void fml::gpuvec<T>::fill_linspace()

{

  T start = 1;

  T stop = (T) (this->_size);

  this->fill_linspace(start, stop);

}


template <typename T>

void fml::gpuvec<T>::fill_linspace(const T start, const T stop)

{

  fml::kernelfuns::kernel_fill_linspace<<<dim_grid, dim_block>>>(start, stop, this->_size, 1, this->data);

  this->c->check();

}


template <typename T>

void fml::gpuvec<T>::scale(const T s)

{

  fml::kernelfuns::kernel_scale<<<dim_grid, dim_block>>>(s, this->_size, 1, this->data);

  this->c->check();

}


template <typename T>

void fml::gpuvec<T>::pow(const T p)

{

  fml::kernelfuns::kernel_pow<<<dim_grid, dim_block>>>(p, this->_size, 1, this->data);

  this->c->check();

}


template <typename T>

void fml::gpuvec<T>::rev()

{

  fml::kernelfuns::kernel_rev_rows<<<dim_grid, dim_block>>>(this->_size, 1, this->data);

  this->c->check();

}


template <typename T>

T fml::gpuvec<T>::sum() const

{

  T s = 0;

  fml::gpuscalar<T> s_gpu(c, s);


  fml::kernelfuns::kernel_sum<<<dim_grid, dim_block>>>(this->_size, this->data, s_gpu.data_ptr());

  s_gpu.get_val(&s);

  this->c->check();


  return s;

}


template <typename T>

T fml::gpuvec<T>::max() const

{

  T mx = 0;

  fml::gpuscalar<T> mx_gpu(c, mx);


  fml::kernelfuns::kernel_max<<<dim_grid, dim_block>>>(this->_size, this->data, mx_gpu.data_ptr());

  mx_gpu.get_val(&mx);

  this->c->check();


  return mx;

}


template <typename T>

T fml::gpuvec<T>::min() const

{

  T mn;

  fml::gpuscalar<T> mn_gpu(c);


  fml::kernelfuns::kernel_min<<<dim_grid, dim_block>>>(this->_size, this->data, mn_gpu.data_ptr());

  mn_gpu.get_val(&mn);

  this->c->check();


  return mn;

}


// operators


template <typename T>

T fml::gpuvec<T>::get(const len_t i) const

{

  this->check_index(i);


  T ret;

  this->c->mem_gpu2cpu(&ret, this->data + i, sizeof(T));

  return ret;

}


template <typename T>

void fml::gpuvec<T>::set(const len_t i, const T v)

{

  this->check_index(i);

  this->c->mem_cpu2gpu(this->data + i, &v, sizeof(T));

}


template <typename T>

bool fml::gpuvec<T>::operator==(const fml::gpuvec<T> &x) const

{

  if (this->_size != x.size())

    return false;

  else if (this->c->get_id() != x.get_card()->get_id())

    return false;

  else if (this->data == x.data_ptr())

    return true;


  int all_eq = 1;

  fml::gpuscalar<int> all_eq_gpu(c, all_eq);


  fml::kernelfuns::kernel_all_eq<<<dim_grid, dim_block>>>(this->_size, 1, this->data, x.data_ptr(), all_eq_gpu.data_ptr());


  all_eq_gpu.get_val(&all_eq);

  this->c->check();


  return (bool) all_eq;

}


template <typename T>

bool fml::gpuvec<T>::operator!=(const fml::gpuvec<T> &x) const

{

  return !(*this == x);

}


template <typename T>

fml::gpuvec<T>& fml::gpuvec<T>::operator=(const fml::gpuvec<T> &x)

{

  this->c = x.get_card();

  this->_size = x.size();

  this->data = x.data_ptr();


  this->free_data = false;

  return *this;

}


// -----------------------------------------------------------------------------

// private

// -----------------------------------------------------------------------------


template <typename T>

void fml::gpuvec<T>::free()

{

  if (this->free_data && this->data)

  {

    this->c->mem_free(this->data);

    this->data = NULL;

  }

}


template <typename T>

void fml::gpuvec<T>::check_params(len_t size)

{

  if (size < 0)

    throw std::runtime_error("invalid dimensions");

}


template <typename T>

void fml::gpuvec<T>::check_gpu(std::shared_ptr<fml::card> gpu)

{

  if (!gpu->valid_card())

    throw std::runtime_error("GPU card object is invalid");

}


#endif