fml  0.1-0
Fused Matrix Library
parmat.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_PAR_CPU_PARMAT_H
6 #define FML_PAR_CPU_PARMAT_H
7 #pragma once
8 
9 
10 #include "../../_internals/omp.hh"
11 
12 #include "../../cpu/cpumat.hh"
13 #include "../../cpu/cpuvec.hh"
14 
15 #include "../internals/parmat.hh"
16 
17 
18 namespace fml
19 {
20  template <typename REAL>
21  class parmat_cpu : public parmat<cpumat<REAL>, cpuvec<REAL>, REAL>
22  {
24 
25  public:
26  parmat_cpu(comm mpi_comm);
27  parmat_cpu(comm mpi_comm, const len_global_t nrows, const len_t ncols);
28  parmat_cpu(comm mpi_comm, const len_global_t nrows, const len_t ncols, const len_global_t nb4_);
29 
30  void print(uint8_t ndigits=4, bool add_final_blank=true);
31 
32  // void resize(len_global_t nrows, len_t ncols);
33  // void inherit(cpumat<REAL> &data_);
34 
35  void fill_linspace(const REAL start, const REAL stop);
36  void fill_eye();
37  void fill_diag(const cpuvec<REAL> &d);
38  };
39 }
40 
41 
42 
43 template <typename REAL>
45 {
46  this->r = mpi_comm;
47  this->m_global = 0;
48  this->nb4 = 0;
49 }
50 
51 
52 
53 template <typename REAL>
54 fml::parmat_cpu<REAL>::parmat_cpu(fml::comm mpi_comm, const len_global_t nrows, const len_t ncols)
55 {
56  this->r = mpi_comm;
57 
58  this->m_global = nrows;
59  len_t nrows_local = this->get_local_dim();
60  this->data.resize(nrows_local, ncols);
61 
62  this->num_preceding_rows();
63 }
64 
65 
66 
67 template <typename REAL>
68 fml::parmat_cpu<REAL>::parmat_cpu(fml::comm mpi_comm, const len_global_t nrows, const len_t ncols, const len_global_t nb4_)
69 {
70  this->r = mpi_comm;
71 
72  this->m_global = nrows;
73  len_t nrows_local = this->get_local_dim();
74  this->data.resize(nrows_local, ncols);
75 
76  this->nb4 = nb4_;
77 }
78 
79 
80 
81 // template <typename REAL>
82 // void fml::parmat_cpu<REAL>::resize(len_global_t nrows, len_t ncols)
83 // {
84 //
85 // }
86 //
87 //
88 //
89 // template <typename REAL>
90 // void fml::parmat_cpu<REAL>::inherit(cpumat<REAL> &data_, bool free_on_destruct)
91 // {
92 //
93 // }
94 
95 
96 
97 template <typename REAL>
98 void fml::parmat_cpu<REAL>::print(uint8_t ndigits, bool add_final_blank)
99 {
100  len_t n = this->data.ncols();
101  cpuvec<REAL> pv(n);
102 
103  int myrank = this->r.rank();
104  if (myrank == 0)
105  this->data.print(ndigits, false);
106 
107  for (int rank=1; rank<this->r.size(); rank++)
108  {
109  if (rank == myrank)
110  {
111  len_t m = this->data.nrows();
112  this->r.send(1, &m, 0);
113 
114  for (int i=0; i<m; i++)
115  {
116  this->data.get_row(i, pv);
117  this->r.send(n, pv.data_ptr(), 0);
118  }
119  }
120  else if (myrank == 0)
121  {
122  len_t m;
123  this->r.recv(1, &m, rank);
124 
125  for (int i=0; i<m; i++)
126  {
127  this->r.recv(n, pv.data_ptr(), rank);
128  pv.print(ndigits, false);
129  }
130  }
131 
132  this->r.barrier();
133  }
134 
135  if (add_final_blank)
136  {
137  this->r.printf(0, "\n");
138  this->r.barrier();
139  }
140 }
141 
142 
143 
144 template <typename REAL>
145 void fml::parmat_cpu<REAL>::fill_linspace(const REAL start, const REAL stop)
146 {
147  if (start == stop)
148  this->fill_val(start);
149  else
150  {
151  const len_t m_local = this->data.nrows();
152  const len_t n = this->data.ncols();
153 
154  const REAL v = (stop-start)/((REAL) this->m_global*n - 1);
155  REAL *d_p = this->data.data_ptr();
156 
157  #pragma omp parallel for if(m_local*n > fml::omp::OMP_MIN_SIZE)
158  for (len_t j=0; j<n; j++)
159  {
160  #pragma omp simd
161  for (len_t i=0; i<m_local; i++)
162  {
163  d_p[i + m_local*j] = v*((REAL) i + this->nb4 + this->m_global*j) + start;
164  }
165  }
166  }
167 }
168 
169 
170 
171 template <typename REAL>
173 {
174  fml::cpuvec<REAL> v(1);
175  v(0) = (REAL) 1;
176  this->fill_diag(v);
177 }
178 
179 
180 
181 template <typename REAL>
183 {
184  const len_t m_local = this->data.nrows();
185  const len_t n = this->data.ncols();
186  REAL *x_p = this->data.data_ptr();
187  const REAL *d_p = d.data_ptr();
188 
189  #pragma omp for simd
190  for (len_t j=0; j<n; j++)
191  {
192  for (len_t i=0; i<m_local; i++)
193  {
194  const len_global_t gi = i + this->nb4;
195  if (gi == j)
196  x_p[i + m_local*j] = d_p[gi % d.size()];
197  else
198  x_p[i + m_local*j] = 0;
199  }
200  }
201 }
202 
203 
204 #endif
fml::parmat_cpu
Definition: parmat.hh:21
fml::parmat
Definition: parmat.hh:23
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::cpuvec
Vector class for data held on a single CPU.
Definition: cpuvec.hh:31
fml::comm
MPI communicator data and helpers.
Definition: comm.hh:24
fml
Core namespace.
Definition: dimops.hh:10
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26