5 #ifndef FML_CPU_DIMOPS_H
6 #define FML_CPU_DIMOPS_H
12 #include "../_internals/dimops.hh"
13 #include "../_internals/omp.hh"
26 template <
typename REAL>
27 static inline void col_sum(
const len_t j,
const len_t m,
const REAL *x, REAL &sum)
31 #pragma omp simd reduction(+:sum)
32 for (len_t i=0; i<m; i++)
36 template <
typename REAL>
37 static inline void col_mean(
const len_t j,
const len_t m,
const REAL *x, REAL &mean)
40 col_sum(j, m, x, mean);
44 template <
typename REAL>
45 static inline void col_mean_and_var(
const len_t j,
const len_t m,
const REAL *x, REAL &mean, REAL &var)
50 for (len_t i=0; i<m; i++)
52 REAL dt = x[i + m*j] - mean;
53 mean += dt/((REAL) i+1);
54 var += dt * (x[i + m*j] - mean);
57 var = sqrt(var / ((REAL) m-1));
71 template <
typename REAL>
75 const len_t m = x.
nrows();
76 const len_t n = x.
ncols();
82 for (len_t j=0; j<n; j++)
85 for (len_t i=0; i<m; i++)
86 s_d[i] += x_d[i + m*j];
100 template <
typename REAL>
105 const len_t m = x.
nrows();
106 const len_t n = x.
ncols();
110 for (len_t i=0; i<m; i++)
124 template <
typename REAL>
128 const len_t m = x.
nrows();
129 const len_t n = x.
ncols();
135 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
136 for (len_t j=0; j<n; j++)
137 internals::col_sum(j, m, x_d, s_d[j]);
150 template <
typename REAL>
154 const len_t m = x.
nrows();
155 const len_t n = x.
ncols();
161 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
162 for (len_t j=0; j<n; j++)
163 internals::col_mean(j, m, x_d, s_d[j]);
179 template <
typename REAL>
182 const len_t m = x.
nrows();
183 const len_t n = x.
ncols();
186 throw std::runtime_error(
"non-conformal arguments");
193 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
194 for (len_t j=0; j<n; j++)
197 for (len_t i=0; i<m; i++)
198 x_d[i + m*j] += s_d[i];
201 else if (op == SWEEP_SUB)
203 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
204 for (len_t j=0; j<n; j++)
207 for (len_t i=0; i<m; i++)
208 x_d[i + m*j] -= s_d[i];
211 else if (op == SWEEP_MUL)
213 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
214 for (len_t j=0; j<n; j++)
217 for (len_t i=0; i<m; i++)
218 x_d[i + m*j] *= s_d[i];
221 else if (op == SWEEP_DIV)
223 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
224 for (len_t j=0; j<n; j++)
227 for (len_t i=0; i<m; i++)
228 x_d[i + m*j] /= s_d[i];
246 template <
typename REAL>
247 static inline void colsweep(cpumat<REAL> &x,
const cpuvec<REAL> &s,
const sweep_op op)
249 const len_t m = x.nrows();
250 const len_t n = x.ncols();
253 throw std::runtime_error(
"non-conformal arguments");
255 REAL *x_d = x.data_ptr();
256 const REAL *s_d = s.data_ptr();
260 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
261 for (len_t j=0; j<n; j++)
264 for (len_t i=0; i<m; i++)
265 x_d[i + m*j] += s_d[j];
268 else if (op == SWEEP_SUB)
270 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
271 for (len_t j=0; j<n; j++)
274 for (len_t i=0; i<m; i++)
275 x_d[i + m*j] -= s_d[j];
278 else if (op == SWEEP_MUL)
280 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
281 for (len_t j=0; j<n; j++)
284 for (len_t i=0; i<m; i++)
285 x_d[i + m*j] *= s_d[j];
288 else if (op == SWEEP_DIV)
290 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
291 for (len_t j=0; j<n; j++)
294 for (len_t i=0; i<m; i++)
295 x_d[i + m*j] /= s_d[j];
304 template <
typename REAL>
305 static inline void center(cpumat<REAL> &x)
307 REAL *x_d = x.data_ptr();
308 const len_t m = x.nrows();
309 const len_t n = x.ncols();
311 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
312 for (len_t j=0; j<n; j++)
315 internals::col_mean(j, m, x_d, mean);
318 for (len_t i=0; i<m; i++)
319 x_d[i + m*j] -= mean;
323 template <
typename REAL>
324 static inline void scale(cpumat<REAL> &x)
326 REAL *x_d = x.data_ptr();
327 const len_t m = x.nrows();
328 const len_t n = x.ncols();
330 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
331 for (len_t j=0; j<n; j++)
335 internals::col_mean_and_var(j, m, x_d, mean, var);
338 for (len_t i=0; i<m; i++)
343 template <
typename REAL>
344 static inline void center_and_scale(cpumat<REAL> &x)
346 REAL *x_d = x.data_ptr();
347 const len_t m = x.nrows();
348 const len_t n = x.ncols();
350 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
351 for (len_t j=0; j<n; j++)
355 internals::col_mean_and_var(j, m, x_d, mean, var);
358 for (len_t i=0; i<m; i++)
359 x_d[i + m*j] = (x_d[i + m*j] - mean) / var;
375 template <
typename REAL>
378 if (rm_mean && rm_sd)
379 internals::center_and_scale(x);
381 internals::center(x);