fml  0.1-0
Fused Matrix Library
dimops.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_CPU_DIMOPS_H
6 #define FML_CPU_DIMOPS_H
7 #pragma once
8 
9 
10 #include <cmath>
11 
12 #include "../_internals/dimops.hh"
13 #include "../_internals/omp.hh"
14 
15 #include "cpumat.hh"
16 #include "cpuvec.hh"
17 
18 
19 namespace fml
20 {
22 namespace dimops
23 {
24  namespace internals
25  {
26  template <typename REAL>
27  static inline void col_sum(const len_t j, const len_t m, const REAL *x, REAL &sum)
28  {
29  sum = 0;
30 
31  #pragma omp simd reduction(+:sum)
32  for (len_t i=0; i<m; i++)
33  sum += x[i + m*j];
34  }
35 
36  template <typename REAL>
37  static inline void col_mean(const len_t j, const len_t m, const REAL *x, REAL &mean)
38  {
39  mean = 0;
40  col_sum(j, m, x, mean);
41  mean /= (REAL) m;
42  }
43 
44  template <typename REAL>
45  static inline void col_mean_and_var(const len_t j, const len_t m, const REAL *x, REAL &mean, REAL &var)
46  {
47  mean = 0;
48  var = 0;
49 
50  for (len_t i=0; i<m; i++)
51  {
52  REAL dt = x[i + m*j] - mean;
53  mean += dt/((REAL) i+1);
54  var += dt * (x[i + m*j] - mean);
55  }
56 
57  var = sqrt(var / ((REAL) m-1));
58  }
59  }
60 
61 
62 
71  template <typename REAL>
72  void rowsums(const cpumat<REAL> &x, cpuvec<REAL> &s)
73  {
74  const REAL *x_d = x.data_ptr();
75  const len_t m = x.nrows();
76  const len_t n = x.ncols();
77 
78  s.resize(m);
79  s.fill_zero();
80  REAL *s_d = s.data_ptr();
81 
82  for (len_t j=0; j<n; j++)
83  {
84  #pragma omp for simd
85  for (len_t i=0; i<m; i++)
86  s_d[i] += x_d[i + m*j];
87  }
88  }
89 
90 
91 
100  template <typename REAL>
101  void rowmeans(const cpumat<REAL> &x, cpuvec<REAL> &s)
102  {
103  rowsums(x, s);
104 
105  const len_t m = x.nrows();
106  const len_t n = x.ncols();
107  REAL *s_d = s.data_ptr();
108 
109  #pragma omp for simd
110  for (len_t i=0; i<m; i++)
111  s_d[i] /= (REAL) n;
112  }
113 
114 
115 
124  template <typename REAL>
125  void colsums(const cpumat<REAL> &x, cpuvec<REAL> &s)
126  {
127  const REAL *x_d = x.data_ptr();
128  const len_t m = x.nrows();
129  const len_t n = x.ncols();
130 
131  s.resize(n);
132  s.fill_zero();
133  REAL *s_d = s.data_ptr();
134 
135  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
136  for (len_t j=0; j<n; j++)
137  internals::col_sum(j, m, x_d, s_d[j]);
138  }
139 
140 
141 
150  template <typename REAL>
151  void colmeans(const cpumat<REAL> &x, cpuvec<REAL> &s)
152  {
153  const REAL *x_d = x.data_ptr();
154  const len_t m = x.nrows();
155  const len_t n = x.ncols();
156 
157  s.resize(n);
158  s.fill_zero();
159  REAL *s_d = s.data_ptr();
160 
161  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
162  for (len_t j=0; j<n; j++)
163  internals::col_mean(j, m, x_d, s_d[j]);
164  }
165 
166 
167 
179  template <typename REAL>
180  static inline void rowsweep(cpumat<REAL> &x, const cpuvec<REAL> &s, const sweep_op op)
181  {
182  const len_t m = x.nrows();
183  const len_t n = x.ncols();
184 
185  if (s.size() != m)
186  throw std::runtime_error("non-conformal arguments");
187 
188  REAL *x_d = x.data_ptr();
189  const REAL *s_d = s.data_ptr();
190 
191  if (op == SWEEP_ADD)
192  {
193  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
194  for (len_t j=0; j<n; j++)
195  {
196  #pragma omp simd
197  for (len_t i=0; i<m; i++)
198  x_d[i + m*j] += s_d[i];
199  }
200  }
201  else if (op == SWEEP_SUB)
202  {
203  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
204  for (len_t j=0; j<n; j++)
205  {
206  #pragma omp simd
207  for (len_t i=0; i<m; i++)
208  x_d[i + m*j] -= s_d[i];
209  }
210  }
211  else if (op == SWEEP_MUL)
212  {
213  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
214  for (len_t j=0; j<n; j++)
215  {
216  #pragma omp simd
217  for (len_t i=0; i<m; i++)
218  x_d[i + m*j] *= s_d[i];
219  }
220  }
221  else if (op == SWEEP_DIV)
222  {
223  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
224  for (len_t j=0; j<n; j++)
225  {
226  #pragma omp simd
227  for (len_t i=0; i<m; i++)
228  x_d[i + m*j] /= s_d[i];
229  }
230  }
231  }
232 
233 
234 
246  template <typename REAL>
247  static inline void colsweep(cpumat<REAL> &x, const cpuvec<REAL> &s, const sweep_op op)
248  {
249  const len_t m = x.nrows();
250  const len_t n = x.ncols();
251 
252  if (s.size() != n)
253  throw std::runtime_error("non-conformal arguments");
254 
255  REAL *x_d = x.data_ptr();
256  const REAL *s_d = s.data_ptr();
257 
258  if (op == SWEEP_ADD)
259  {
260  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
261  for (len_t j=0; j<n; j++)
262  {
263  #pragma omp simd
264  for (len_t i=0; i<m; i++)
265  x_d[i + m*j] += s_d[j];
266  }
267  }
268  else if (op == SWEEP_SUB)
269  {
270  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
271  for (len_t j=0; j<n; j++)
272  {
273  #pragma omp simd
274  for (len_t i=0; i<m; i++)
275  x_d[i + m*j] -= s_d[j];
276  }
277  }
278  else if (op == SWEEP_MUL)
279  {
280  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
281  for (len_t j=0; j<n; j++)
282  {
283  #pragma omp simd
284  for (len_t i=0; i<m; i++)
285  x_d[i + m*j] *= s_d[j];
286  }
287  }
288  else if (op == SWEEP_DIV)
289  {
290  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
291  for (len_t j=0; j<n; j++)
292  {
293  #pragma omp simd
294  for (len_t i=0; i<m; i++)
295  x_d[i + m*j] /= s_d[j];
296  }
297  }
298  }
299 
300 
301 
302  namespace internals
303  {
304  template <typename REAL>
305  static inline void center(cpumat<REAL> &x)
306  {
307  REAL *x_d = x.data_ptr();
308  const len_t m = x.nrows();
309  const len_t n = x.ncols();
310 
311  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
312  for (len_t j=0; j<n; j++)
313  {
314  REAL mean = 0;
315  internals::col_mean(j, m, x_d, mean);
316 
317  #pragma omp simd
318  for (len_t i=0; i<m; i++)
319  x_d[i + m*j] -= mean;
320  }
321  }
322 
323  template <typename REAL>
324  static inline void scale(cpumat<REAL> &x)
325  {
326  REAL *x_d = x.data_ptr();
327  const len_t m = x.nrows();
328  const len_t n = x.ncols();
329 
330  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
331  for (len_t j=0; j<n; j++)
332  {
333  REAL mean = 0;
334  REAL var = 0;
335  internals::col_mean_and_var(j, m, x_d, mean, var);
336 
337  #pragma omp simd
338  for (len_t i=0; i<m; i++)
339  x_d[i + m*j] /= var;
340  }
341  }
342 
343  template <typename REAL>
344  static inline void center_and_scale(cpumat<REAL> &x)
345  {
346  REAL *x_d = x.data_ptr();
347  const len_t m = x.nrows();
348  const len_t n = x.ncols();
349 
350  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
351  for (len_t j=0; j<n; j++)
352  {
353  REAL mean = 0;
354  REAL var = 0;
355  internals::col_mean_and_var(j, m, x_d, mean, var);
356 
357  #pragma omp simd
358  for (len_t i=0; i<m; i++)
359  x_d[i + m*j] = (x_d[i + m*j] - mean) / var;
360  }
361  }
362  }
363 
364 
365 
375  template <typename REAL>
376  void scale(const bool rm_mean, const bool rm_sd, cpumat<REAL> &x)
377  {
378  if (rm_mean && rm_sd)
379  internals::center_and_scale(x);
380  else if (rm_mean)
381  internals::center(x);
382  else if (rm_sd)
383  internals::scale(x);
384  }
385 }
386 }
387 
388 
389 #endif
fml::cpumat
Matrix class for data held on a single CPU.
Definition: cpumat.hh:36
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::cpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: cpuvec.hh:210
fml::cpuvec
Vector class for data held on a single CPU.
Definition: cpuvec.hh:31
fml::dimops::scale
void scale(const bool rm_mean, const bool rm_sd, cpumat< REAL > &x)
Remove the mean and/or the sd from a matrix.
Definition: dimops.hh:376
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml::dimops::SWEEP_ADD
@ SWEEP_ADD
Definition: dimops.hh:22
fml
Core namespace.
Definition: dimops.hh:10
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::cpuvec::fill_zero
void fill_zero()
Set all values to zero.
Definition: cpuvec.hh:317
fml::dimops::sweep_op
sweep_op
Definition: dimops.hh:14
fml::dimops::rowsums
void rowsums(const cpumat< REAL > &x, cpuvec< REAL > &s)
Compute the row sums.
Definition: dimops.hh:72
fml::dimops::colsums
void colsums(const cpumat< REAL > &x, cpuvec< REAL > &s)
Compute the column sums.
Definition: dimops.hh:125
fml::dimops::colmeans
void colmeans(const cpumat< REAL > &x, cpuvec< REAL > &s)
Compute the column means.
Definition: dimops.hh:151
fml::dimops::rowmeans
void rowmeans(const cpumat< REAL > &x, cpuvec< REAL > &s)
Compute the row means.
Definition: dimops.hh:101