fml  0.1-0
Fused Matrix Library
parmat.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_PAR_GPU_PARMAT_H
6 #define FML_PAR_GPU_PARMAT_H
7 #pragma once
8 
9 
10 #include "../../gpu/card.hh"
11 #include "../../gpu/gpumat.hh"
12 #include "../../gpu/gpuvec.hh"
13 
14 #include "../internals/parmat.hh"
15 
16 
17 namespace fml
18 {
19  template <typename REAL>
20  class parmat_gpu : public parmat<gpumat<REAL>, gpuvec<REAL>, REAL>
21  {
23 
24  public:
25  parmat_gpu(comm mpi_comm, card_sp_t gpu_card, const len_global_t nrows, const len_t ncols);
26  parmat_gpu(comm mpi_comm, card_sp_t gpu_card, const len_global_t nrows, const len_t ncols, const len_global_t nb4_);
27 
28  void print(uint8_t ndigits=4, bool add_final_blank=true);
29 
30  void fill_linspace(const REAL start, const REAL stop);
31  void fill_eye();
32  void fill_diag(const gpuvec<REAL> &d);
33 
34  card_sp_t get_card() const {return this->data.get_card();};
35  };
36 }
37 
38 
39 
40 template <typename REAL>
41 fml::parmat_gpu<REAL>::parmat_gpu(fml::comm mpi_comm, fml::card_sp_t gpu_card,
42  const len_global_t nrows, const len_t ncols)
43 {
44  this->r = mpi_comm;
45 
46  this->m_global = nrows;
47  len_t nrows_local = this->get_local_dim();
48  this->data.resize(gpu_card, nrows_local, ncols);
49 
50  this->num_preceding_rows();
51 }
52 
53 
54 
55 template <typename REAL>
56 fml::parmat_gpu<REAL>::parmat_gpu(fml::comm mpi_comm, fml::card_sp_t gpu_card,
57  const len_global_t nrows, const len_t ncols, const len_global_t nb4_)
58 {
59  this->r = mpi_comm;
60 
61  this->m_global = nrows;
62  len_t nrows_local = this->get_local_dim();
63  this->data.resize(gpu_card, nrows_local, ncols);
64 
65  this->nb4 = nb4_;
66 }
67 
68 
69 
70 template <typename REAL>
71 void fml::parmat_gpu<REAL>::print(uint8_t ndigits, bool add_final_blank)
72 {
73  len_t n = this->data.ncols();
74  fml::gpuvec<REAL> pv(this->data.get_card(), n);
75 
76  int myrank = this->r.rank();
77  if (myrank == 0)
78  this->data.print(ndigits, false);
79 
80  for (int rank=1; rank<this->r.size(); rank++)
81  {
82  if (rank == myrank)
83  {
84  len_t m = this->data.nrows();
85  this->r.send(1, &m, 0);
86 
87  for (int i=0; i<m; i++)
88  {
89  this->data.get_row(i, pv);
90  this->r.send(n, pv.data_ptr(), 0);
91  }
92  }
93  else if (myrank == 0)
94  {
95  len_t m;
96  this->r.recv(1, &m, rank);
97 
98  for (int i=0; i<m; i++)
99  {
100  this->r.recv(n, pv.data_ptr(), rank);
101  pv.print(ndigits, false);
102  }
103  }
104 
105  this->r.barrier();
106  }
107 
108  if (add_final_blank)
109  {
110  this->r.printf(0, "\n");
111  this->r.barrier();
112  }
113 }
114 
115 
116 
117 template <typename REAL>
118 void fml::parmat_gpu<REAL>::fill_linspace(const REAL start, const REAL stop)
119 {
120  if (start == stop)
121  this->fill_val(start);
122  else
123  {
124  const len_t m_local = this->data.nrows();
125  const len_t n = this->data.ncols();
126 
127  const REAL v = (stop-start)/((REAL) this->m_global*n - 1);
128 
129  // TODO
130  // kernelfuns::kernel_fill_linspace<<<dim_grid, dim_block>>>(start, stop, this->m, this->n, this->data);
131 
132  this->data.c->check();
133  }
134 }
135 
136 
137 
138 template <typename REAL>
140 {
141  fml::gpuvec<REAL> v(1);
142  v.fill_val(1);
143  this->fill_diag(v);
144 }
145 
146 
147 
148 template <typename REAL>
150 {
151 
152 }
153 
154 
155 #endif
fml::parmat_gpu
Definition: parmat.hh:20
fml::parmat
Definition: parmat.hh:23
fml::gpuvec
Vector class for data held on a single GPU.
Definition: gpuvec.hh:32
fml::comm
MPI communicator data and helpers.
Definition: comm.hh:24
fml
Core namespace.
Definition: dimops.hh:10