fml  0.1-0
Fused Matrix Library
mpimat.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_MPI_MPIMAT_H
6 #define FML_MPI_MPIMAT_H
7 #pragma once
8 
9 
10 #include <algorithm>
11 #include <cmath>
12 #include <cstdlib>
13 #include <cstring>
14 #include <random>
15 #include <stdexcept>
16 
17 #include "grid.hh"
18 #include "internals/bcutils.hh"
19 
20 #include "../_internals/arraytools/src/arraytools.hpp"
21 
22 #include "../_internals/print.hh"
23 #include "../_internals/rand.hh"
24 #include "../_internals/omp.hh"
25 #include "../_internals/types.hh"
26 #include "../_internals/unimat.hh"
27 
28 #include "../cpu/cpuvec.hh"
29 
30 
31 namespace fml
32 {
39  template <typename REAL>
40  class mpimat : public fml::unimat<REAL>
41  {
42  public:
43  mpimat(const grid &blacs_grid);
44  mpimat(const grid &blacs_grid, int bf_rows, int bf_cols);
45  mpimat(const grid &blacs_grid, len_t nrows, len_t ncols, int bf_rows, int bf_cols);
46  mpimat(const grid &blacs_grid, REAL *data_, len_t nrows, len_t ncols, int bf_rows, int bf_cols, bool free_on_destruct=false);
47  mpimat(const mpimat &x);
48  ~mpimat();
49 
50  void resize(len_t nrows, len_t ncols);
51  void resize(len_t nrows, len_t ncols, int bf_rows, int bf_cols);
52  void inherit(grid &blacs_grid, REAL *data_, len_t nrows, len_t ncols, int bf_rows, int bf_cols, bool free_on_destruct=false);
53  mpimat<REAL> dupe() const;
54 
55  void print(uint8_t ndigits=4, bool add_final_blank=true) const;
56  void info() const;
57 
58  void fill_zero();
59  void fill_val(const REAL v);
60  void fill_linspace();
61  void fill_linspace(const REAL start, const REAL stop);
62  void fill_eye();
63  void fill_diag(const cpuvec<REAL> &v);
64  void fill_runif(const uint32_t seed, const REAL min=0, const REAL max=1);
65  void fill_runif(const REAL min=0, const REAL max=1);
66  void fill_rnorm(const uint32_t seed, const REAL mean=0, const REAL sd=1);
67  void fill_rnorm(const REAL mean=0, const REAL sd=1);
68 
69  void diag(cpuvec<REAL> &v);
70  void antidiag(cpuvec<REAL> &v);
71  void scale(const REAL s);
72  void rev_rows();
73  void rev_cols();
74 
75  bool any_inf() const;
76  bool any_nan() const;
77 
78  REAL get(const len_t i) const;
79  REAL get(const len_t i, const len_t j) const;
80  void set(const len_t i, const REAL v);
81  void set(const len_t i, const len_t j, const REAL v);
82  void get_row(const len_t i, cpuvec<REAL> &v) const;
83  void set_row(const len_t i, const cpuvec<REAL> &v);
84  void get_col(const len_t j, cpuvec<REAL> &v) const;
85  void set_col(const len_t j, const cpuvec<REAL> &v);
86 
87  bool operator==(const mpimat<REAL> &x) const;
88  bool operator!=(const mpimat<REAL> &x) const;
90 
91  len_local_t nrows_local() const {return m_local;};
92  len_local_t ncols_local() const {return n_local;};
93  int bf_rows() const {return mb;};
94  int bf_cols() const {return nb;};
95  int* desc_ptr() {return desc;};
96  const int* desc_ptr() const {return desc;};
97  const grid get_grid() const {return g;};
98 
99  protected:
100  len_local_t m_local;
101  len_local_t n_local;
102  int mb;
103  int nb;
104  int desc[9];
105  grid g;
106 
107  private:
108  void free();
109  void check_params(len_t nrows, len_t ncols, int bf_rows, int bf_cols);
110  void check_grid(const grid &blacs_grid);
111  REAL get_val_from_global_index(len_t gi, len_t gj) const;
112  };
113 }
114 
115 
116 
117 // -----------------------------------------------------------------------------
118 // public
119 // -----------------------------------------------------------------------------
120 
121 // constructors/destructor
122 
135 template <typename REAL>
137 {
138  check_grid(blacs_grid);
139 
140  this->m = 0;
141  this->n = 0;
142  this->m_local = 0;
143  this->n_local = 0;
144  this->data = NULL;
145 
146  this->mb = 0;
147  this->nb = 0;
148 
149  this->g = blacs_grid;
150 
151  this->free_data = true;
152 }
153 
154 
155 
173 template <typename REAL>
174 fml::mpimat<REAL>::mpimat(const fml::grid &blacs_grid, int bf_rows, int bf_cols)
175 {
176  check_grid(blacs_grid);
177 
178  this->m = 0;
179  this->n = 0;
180  this->m_local = 0;
181  this->n_local = 0;
182  this->data = NULL;
183 
184  this->mb = bf_rows;
185  this->nb = bf_cols;
186 
187  this->g = blacs_grid;
188 
189  this->free_data = true;
190 }
191 
192 
193 
212 template <typename REAL>
213 fml::mpimat<REAL>::mpimat(const fml::grid &blacs_grid, len_t nrows, len_t ncols, int bf_rows, int bf_cols)
214 {
215  check_params(nrows, ncols, bf_rows, bf_cols);
216  check_grid(blacs_grid);
217 
218  this->m_local = fml::bcutils::numroc(nrows, bf_rows, blacs_grid.myrow(), 0, blacs_grid.nprow());
219  this->n_local = fml::bcutils::numroc(ncols, bf_cols, blacs_grid.mycol(), 0, blacs_grid.npcol());
220 
221  fml::bcutils::descinit(this->desc, blacs_grid.ictxt(), nrows, ncols, bf_rows, bf_cols, this->m_local);
222 
223  const size_t len = (size_t) this->m_local * this->n_local * sizeof(REAL);
224  this->data = (REAL*) std::malloc(len);
225  if (this->data == NULL)
226  throw std::bad_alloc();
227 
228  this->m = nrows;
229  this->n = ncols;
230  this->mb = bf_rows;
231  this->nb = bf_cols;
232  this->g = blacs_grid;
233 
234  this->free_data = true;
235 }
236 
237 
238 
256 template <typename REAL>
257 fml::mpimat<REAL>::mpimat(const fml::grid &blacs_grid, REAL *data_, len_t nrows, len_t ncols, int bf_rows, int bf_cols, bool free_on_destruct)
258 {
259  check_params(nrows, ncols, bf_rows, bf_cols);
260  check_grid(blacs_grid);
261 
262  this->m_local = fml::bcutils::numroc(nrows, bf_rows, blacs_grid.myrow(), 0, blacs_grid.nprow());
263  this->n_local = fml::bcutils::numroc(ncols, bf_cols, blacs_grid.mycol(), 0, blacs_grid.npcol());
264 
265  fml::bcutils::descinit(this->desc, blacs_grid.ictxt(), nrows, ncols, bf_rows, bf_cols, this->m_local);
266 
267  this->m = nrows;
268  this->n = ncols;
269  this->mb = bf_rows;
270  this->nb = bf_cols;
271  this->g = blacs_grid;
272 
273  this->data = data_;
274 
275  this->free_data = free_on_destruct;
276 }
277 
278 
279 
280 template <typename REAL>
282 {
283  this->m = x.nrows();
284  this->n = x.ncols();
285 
286  this->m_local = x.nrows_local();
287  this->n_local = x.ncols_local();
288  this->mb = x.bf_rows();
289  this->nb = x.bf_cols();
290 
291  memcpy(this->desc, x.desc_ptr(), 9*sizeof(int));
292 
293  fml::grid g = x.get_grid();
294  this->g = g;
295 
296  this->data = x.data_ptr();
297 
298  this->free_data = false;
299 }
300 
301 
302 
303 template <typename REAL>
305 {
306  this->free();
307 }
308 
309 
310 
311 // memory management
312 
325 template <typename REAL>
326 void fml::mpimat<REAL>::resize(len_t nrows, len_t ncols)
327 {
328  check_params(nrows, ncols, this->mb, this->nb);
329 
330  const size_t len = (size_t) nrows * ncols * sizeof(REAL);
331  const size_t oldlen = (size_t) this->m * this->n * sizeof(REAL);
332 
333  if (len == oldlen)
334  {
335  this->m = nrows;
336  this->n = ncols;
337 
338  this->m_local = fml::bcutils::numroc(nrows, this->mb, this->g.myrow(), 0, this->g.nprow());
339  this->n_local = fml::bcutils::numroc(ncols, this->nb, this->g.mycol(), 0, this->g.npcol());
340 
341  fml::bcutils::descinit(this->desc, this->g.ictxt(), nrows, ncols, this->mb, this->nb, this->m_local);
342 
343  return;
344  }
345 
346  this->m_local = fml::bcutils::numroc(nrows, this->mb, this->g.myrow(), 0, this->g.nprow());
347  this->n_local = fml::bcutils::numroc(ncols, this->nb, this->g.mycol(), 0, this->g.npcol());
348 
349  void *realloc_ptr;
350  if (oldlen == 0)
351  realloc_ptr = malloc(len);
352  else
353  realloc_ptr = realloc(this->data, len);
354 
355  if (realloc_ptr == NULL)
356  throw std::bad_alloc();
357 
358  this->data = (REAL*) realloc_ptr;
359 
360  fml::bcutils::descinit(this->desc, this->g.ictxt(), nrows, ncols, this->mb, this->nb, this->m_local);
361 
362  this->m = nrows;
363  this->n = ncols;
364 }
365 
366 
367 
381 template <typename REAL>
382 void fml::mpimat<REAL>::resize(len_t nrows, len_t ncols, int bf_rows, int bf_cols)
383 {
384  check_params(nrows, ncols, bf_rows, bf_cols);
385 
386  const size_t len = (size_t) nrows * ncols * sizeof(REAL);
387  const size_t oldlen = (size_t) this->m * this->n * sizeof(REAL);
388 
389  if (len == oldlen && this->mb == bf_rows && this->nb == bf_cols)
390  {
391  this->m = nrows;
392  this->n = ncols;
393 
394  this->m_local = fml::bcutils::numroc(nrows, bf_rows, this->g.myrow(), 0, this->g.nprow());
395  this->n_local = fml::bcutils::numroc(ncols, bf_cols, this->g.mycol(), 0, this->g.npcol());
396 
397  fml::bcutils::descinit(this->desc, this->g.ictxt(), nrows, ncols, bf_rows, bf_cols, this->m_local);
398 
399  return;
400  }
401 
402  this->mb = bf_rows;
403  this->nb = bf_cols;
404 
405  this->m_local = fml::bcutils::numroc(nrows, this->mb, this->g.myrow(), 0, this->g.nprow());
406  this->n_local = fml::bcutils::numroc(ncols, this->nb, this->g.mycol(), 0, this->g.npcol());
407 
408  void *realloc_ptr;
409  if (oldlen == 0)
410  realloc_ptr = malloc(len);
411  else
412  realloc_ptr = realloc(this->data, len);
413 
414  if (realloc_ptr == NULL)
415  throw std::bad_alloc();
416 
417  this->data = (REAL*) realloc_ptr;
418 
419  fml::bcutils::descinit(this->desc, this->g.ictxt(), nrows, ncols, this->mb, this->nb, this->m_local);
420 
421  this->m = nrows;
422  this->n = ncols;
423 }
424 
425 
426 
442 template <typename REAL>
443 void fml::mpimat<REAL>::inherit(fml::grid &blacs_grid, REAL *data_, len_t nrows, len_t ncols, int bf_rows, int bf_cols, bool free_on_destruct)
444 {
445  check_params(nrows, ncols, bf_rows, bf_cols);
446  check_grid(blacs_grid);
447 
448  this->free();
449 
450  m_local = fml::bcutils::numroc(nrows, bf_rows, blacs_grid.myrow(), 0, blacs_grid.nprow());
451  n_local = fml::bcutils::numroc(ncols, bf_cols, blacs_grid.mycol(), 0, blacs_grid.npcol());
452  fml::bcutils::descinit(this->desc, blacs_grid.ictxt(), nrows, ncols, bf_rows, bf_cols, m_local);
453 
454  this->m = nrows;
455  this->n = ncols;
456  this->mb = bf_rows;
457  this->nb = bf_cols;
458  this->g = blacs_grid;
459 
460  this->data = data_;
461 
462  this->free_data = free_on_destruct;
463 }
464 
465 
466 
468 template <typename REAL>
470 {
471  fml::mpimat<REAL> dup(this->g, this->m, this->n, this->mb, this->nb);
472 
473  const size_t len = (size_t) this->m_local * this->n_local * sizeof(REAL);
474 
475  memcpy(dup.data_ptr(), this->data, len);
476  memcpy(dup.desc_ptr(), this->desc, 9*sizeof(int));
477 
478  return dup;
479 }
480 
481 
482 
483 // printers
484 
495 template <typename REAL>
496 void fml::mpimat<REAL>::print(uint8_t ndigits, bool add_final_blank) const
497 {
498  for (len_t gi=0; gi<this->m; gi++)
499  {
500  for (len_t gj=0; gj<this->n; gj++)
501  {
502  const int pr = fml::bcutils::g2p(gi, this->mb, this->g.nprow());
503  const int pc = fml::bcutils::g2p(gj, this->nb, this->g.npcol());
504 
505  const int i = fml::bcutils::g2l(gi, this->mb, this->g.nprow());
506  const int j = fml::bcutils::g2l(gj, this->nb, this->g.npcol());
507 
508  REAL d;
509  if (this->g.rank0())
510  {
511  if (pr == 0 && pc == 0)
512  d = this->data[i + this->m_local*j];
513  else
514  this->g.recv(1, 1, &d, pr, pc);
515 
516  this->printval(d, ndigits);
517  }
518  else if (pr == this->g.myrow() && pc == this->g.mycol())
519  {
520  d = this->data[i + this->m_local*j];
521  this->g.send(1, 1, &d, 0, 0);
522  }
523  }
524 
525  this->g.printf(0, 0, "\n");
526  }
527 
528  if (add_final_blank)
529  this->g.printf(0, 0, "\n");
530 }
531 
532 
533 
541 template <typename REAL>
543 {
544  if (this->g.rank0())
545  {
546  fml::print::printf("# mpimat");
547  fml::print::printf(" %dx%d", this->m, this->n);
548  fml::print::printf(" with %dx%d blocking", this->mb, this->nb);
549  fml::print::printf(" on %dx%d grid", this->g.nprow(), this->g.npcol());
550  fml::print::printf(" type=%s", typeid(REAL).name());
551  fml::print::printf("\n");
552  }
553 }
554 
555 
556 
557 // fillers
558 
564 template <typename REAL>
566 {
567  const size_t len = (size_t) m_local * n_local * sizeof(REAL);
568  memset(this->data, 0, len);
569 }
570 
571 
572 
580 template <typename REAL>
581 void fml::mpimat<REAL>::fill_val(const REAL v)
582 {
583  #pragma omp parallel for if((this->m_local)*(this->n_local) > fml::omp::OMP_MIN_SIZE)
584  for (len_t j=0; j<this->n_local; j++)
585  {
586  #pragma omp simd
587  for (len_t i=0; i<this->m_local; i++)
588  this->data[i + this->m_local*j] = v;
589  }
590 }
591 
592 
593 
602 template <typename T>
604 {
605  T start = 1;
606  T stop = (T) (this->m * this->n);
607  this->fill_linspace(start, stop);
608 }
609 
610 template <typename REAL>
611 void fml::mpimat<REAL>::fill_linspace(const REAL start, const REAL stop)
612 {
613  if (start == stop)
614  this->fill_val(start);
615  else
616  {
617  const REAL v = (stop-start)/((REAL) this->m*this->n - 1);
618 
619  #pragma omp parallel for if((this->m_local)*(this->n_local) > fml::omp::OMP_MIN_SIZE)
620  for (len_t j=0; j<this->n_local; j++)
621  {
622  #pragma omp simd
623  for (len_t i=0; i<this->m_local; i++)
624  {
625  const int gi = fml::bcutils::l2g(i, this->mb, this->g.nprow(), this->g.myrow());
626  const int gj = fml::bcutils::l2g(j, this->nb, this->g.npcol(), this->g.mycol());
627 
628  this->data[i + this->m_local*j] = v*((REAL) gi + this->m*gj) + start;
629  }
630  }
631  }
632 }
633 
634 template <>
635 inline void fml::mpimat<int>::fill_linspace(const int start, const int stop)
636 {
637  if (start == stop)
638  this->fill_val(start);
639  else
640  {
641  const float v = (stop-start)/((float) this->m*this->n - 1);
642 
643  #pragma omp parallel for if((this->m_local)*(this->n_local) > fml::omp::OMP_MIN_SIZE)
644  for (len_t j=0; j<this->n_local; j++)
645  {
646  #pragma omp simd
647  for (len_t i=0; i<this->m_local; i++)
648  {
649  const int gi = fml::bcutils::l2g(i, this->mb, this->g.nprow(), this->g.myrow());
650  const int gj = fml::bcutils::l2g(j, this->nb, this->g.npcol(), this->g.mycol());
651 
652  this->data[i + this->m_local*j] = (int) roundf(v*((float) gi + this->m*gj) + start);
653  }
654  }
655  }
656 }
657 
658 
659 
665 template <typename REAL>
667 {
668  fml::cpuvec<REAL> v(1);
669  v.set(0, 1);
670  this->fill_diag(v);
671 }
672 
673 
674 
686 template <typename REAL>
688 {
689  REAL *v_d = v.data_ptr();
690 
691  #pragma omp parallel for if((this->m_local)*(this->n_local) > fml::omp::OMP_MIN_SIZE)
692  for (len_local_t j=0; j<n_local; j++)
693  {
694  for (len_local_t i=0; i<m_local; i++)
695  {
696  const int gi = fml::bcutils::l2g(i, this->mb, this->g.nprow(), this->g.myrow());
697  const int gj = fml::bcutils::l2g(j, this->nb, this->g.npcol(), this->g.mycol());
698 
699  if (gi == gj)
700  this->data[i + this->m_local*j] = v_d[gi % v.size()];
701  else
702  this->data[i + this->m_local*j] = 0;
703  }
704  }
705 }
706 
707 
708 
717 template <typename REAL>
718 void fml::mpimat<REAL>::fill_runif(const uint32_t seed, const REAL min, const REAL max)
719 {
720  std::mt19937 mt(seed + g.myrow() + g.nprow()*g.mycol());
721  static std::uniform_real_distribution<REAL> dist(min, max);
722 
723  for (len_t j=0; j<this->n_local; j++)
724  {
725  for (len_t i=0; i<this->m_local; i++)
726  this->data[i + this->m_local*j] = dist(mt);
727  }
728 }
729 
731 template <typename REAL>
732 void fml::mpimat<REAL>::fill_runif(const REAL min, const REAL max)
733 {
734  uint32_t seed = fml::rand::get_seed() + (g.myrow() + g.nprow()*g.mycol());
735  this->fill_runif(seed, min, max);
736 }
737 
738 
739 
748 template <typename REAL>
749 void fml::mpimat<REAL>::fill_rnorm(const uint32_t seed, const REAL mean, const REAL sd)
750 {
751  std::mt19937 mt(seed + g.myrow() + g.nprow()*g.mycol());
752  static std::normal_distribution<REAL> dist(mean, sd);
753 
754  for (len_t j=0; j<this->n_local; j++)
755  {
756  for (len_t i=0; i<this->m_local; i++)
757  this->data[i + this->m_local*j] = dist(mt);
758  }
759 }
760 
762 template <typename REAL>
763 void fml::mpimat<REAL>::fill_rnorm(const REAL mean, const REAL sd)
764 {
765  uint32_t seed = fml::rand::get_seed() + (g.myrow() + g.nprow()*g.mycol());
766  this->fill_rnorm(seed, mean, sd);
767 }
768 
769 
770 
786 template <typename REAL>
788 {
789  const len_t minmn = std::min(this->m, this->n);
790  v.resize(minmn);
791  v.fill_zero();
792  REAL *v_ptr = v.data_ptr();
793 
794  #pragma omp parallel for if(minmn > fml::omp::OMP_MIN_SIZE)
795  for (len_t gi=0; gi<minmn; gi++)
796  {
797  const len_local_t i = fml::bcutils::g2l(gi, this->mb, this->g.nprow());
798  const len_local_t j = fml::bcutils::g2l(gi, this->nb, this->g.npcol());
799 
800  const int pr = fml::bcutils::g2p(gi, this->mb, this->g.nprow());
801  const int pc = fml::bcutils::g2p(gi, this->nb, this->g.npcol());
802 
803  if (pr == this->g.myrow() && pc == this->g.mycol())
804  v_ptr[gi] = this->data[i + this->m_local*j];
805  }
806 
807 
808  this->g.allreduce(minmn, 1, v_ptr, 'A');
809 }
810 
811 
812 
829 template <typename REAL>
831 {
832  const len_t minmn = std::min(this->m, this->n);
833  v.resize(minmn);
834  v.fill_zero();
835  REAL *v_ptr = v.data_ptr();
836 
837  #pragma omp parallel for if(minmn > fml::omp::OMP_MIN_SIZE)
838  for (len_t gi=0; gi<minmn; gi++)
839  {
840  const len_local_t i = fml::bcutils::g2l(this->m-1-gi, this->mb, this->g.nprow());
841  const len_local_t j = fml::bcutils::g2l(gi, this->nb, this->g.npcol());
842 
843  const int pr = fml::bcutils::g2p(this->m-1-gi, this->mb, this->g.nprow());
844  const int pc = fml::bcutils::g2p(gi, this->nb, this->g.npcol());
845 
846  if (pr == this->g.myrow() && pc == this->g.mycol())
847  v_ptr[gi] = this->data[i + this->m_local*j];
848  }
849 
850 
851  this->g.allreduce(minmn, 1, v_ptr, 'A');
852 }
853 
854 
855 
863 template <typename REAL>
864 void fml::mpimat<REAL>::scale(const REAL s)
865 {
866  #pragma omp parallel for if((this->m_local)*(this->n_local) > fml::omp::OMP_MIN_SIZE)
867  for (len_local_t j=0; j<this->n_local; j++)
868  {
869  #pragma omp simd
870  for (len_local_t i=0; i<this->m_local; i++)
871  this->data[i + this->m_local*j] *= s;
872  }
873 }
874 
875 
876 
882 template <typename REAL>
884 {
885  cpuvec<REAL> tmp(this->nb);
886  REAL *tmp_d = tmp.data_ptr();
887 
888  const int myrow = this->g.myrow();
889  const int mycol = this->g.mycol();
890 
891  for (len_t gj=0; gj<this->n; gj+=this->nb)
892  {
893  const len_t j = fml::bcutils::g2l(gj, this->nb, this->g.npcol());
894  const int pc = fml::bcutils::g2p(gj, this->nb, this->g.npcol());
895 
896  for (len_t gi=0; gi<this->m/2; gi++)
897  {
898  const len_t i = fml::bcutils::g2l(gi, this->mb, this->g.nprow());
899  const len_t gi_rev = this->m - gi - 1;
900 
901  const int pr = fml::bcutils::g2p(gi, this->mb, this->g.nprow());
902  const int pr_rev = fml::bcutils::g2p(gi_rev, this->mb, this->g.nprow());
903 
904  if ((pr == myrow || pr_rev == myrow) && pc == mycol)
905  {
906  const len_t i_rev = fml::bcutils::g2l(gi_rev, this->mb, this->g.nprow());
907  const len_t cplen = std::min(this->nb, this->n - gj);
908 
909  if (pr == pr_rev)
910  {
911  if (i != i_rev)
912  {
913  #pragma omp for simd
914  for (len_t jj=0; jj<cplen; jj++)
915  tmp_d[jj] = this->data[i + this->m_local*(j+jj)];
916 
917  #pragma omp for simd
918  for (len_t jj=0; jj<cplen; jj++)
919  this->data[i + this->m_local*(j+jj)] = this->data[i_rev + this->m_local*(j+jj)];
920 
921  #pragma omp for simd
922  for (len_t jj=0; jj<cplen; jj++)
923  this->data[i_rev + this->m_local*(j+jj)] = tmp_d[jj];
924  }
925  }
926  else
927  {
928  // oroginal indexed process sends/recvs and higher recvs/sends
929  if (pr == myrow)
930  {
931  len_t idx = i + this->m_local*j;
932  this->g.send(1, cplen, this->m_local, this->data + idx, pr_rev, pc);
933  this->g.recv(1, cplen, 1, tmp_d, pr_rev, pc);
934 
935  #pragma omp for simd
936  for (len_t jj=0; jj<cplen; jj++)
937  this->data[idx + this->m_local*jj] = tmp_d[jj];
938  }
939  else
940  {
941  len_t idx = i_rev + this->m_local*j;
942  this->g.recv(1, cplen, 1, tmp_d, pr, pc);
943  this->g.send(1, cplen, this->m_local, this->data + idx, pr, pc);
944 
945  #pragma omp for simd
946  for (len_t jj=0; jj<cplen; jj++)
947  this->data[idx + this->m_local*jj] = tmp_d[jj];
948  }
949  }
950  }
951 
952  this->g.barrier('R');
953  }
954  }
955 }
956 
957 
958 
964 template <typename REAL>
966 {
967  cpuvec<REAL> tmp(this->mb);
968  REAL *tmp_d = tmp.data_ptr();
969 
970  const int myrow = this->g.myrow();
971  const int mycol = this->g.mycol();
972 
973  for (len_t gj=0; gj<this->n/2; gj++)
974  {
975  const len_t j = fml::bcutils::g2l(gj, this->nb, this->g.npcol());
976  const len_t gj_rev = this->n - gj - 1;
977  const len_t j_rev = fml::bcutils::g2l(gj_rev, this->nb, this->g.npcol());
978 
979  const int pc = fml::bcutils::g2p(gj, this->nb, this->g.npcol());
980  const int pc_rev = fml::bcutils::g2p(gj_rev, this->nb, this->g.npcol());
981 
982  for (len_t gi=0; gi<this->m; gi+=this->mb)
983  {
984  const len_t i = fml::bcutils::g2l(gi, this->mb, this->g.nprow());
985  const int pr = fml::bcutils::g2p(gi, this->mb, this->g.nprow());
986 
987  if (pr == myrow && (pc == mycol || pc_rev == mycol))
988  {
989  const len_t cplen = std::min(this->mb, this->m - gi);
990 
991  if (pc == pc_rev)
992  {
993  if (j != j_rev)
994  {
995  #pragma omp for simd
996  for (len_t ii=0; ii<cplen; ii++)
997  tmp_d[ii] = this->data[i+ii + this->m_local*j];
998 
999  #pragma omp for simd
1000  for (len_t ii=0; ii<cplen; ii++)
1001  this->data[i+ii + this->m_local*j] = this->data[i+ii + this->m_local*j_rev];
1002 
1003  #pragma omp for simd
1004  for (len_t ii=0; ii<cplen; ii++)
1005  this->data[i+ii + this->m_local*j_rev] = tmp_d[ii];
1006  }
1007  }
1008  else
1009  {
1010  // oroginal indexed process sends/recvs and higher recvs/sends
1011  if (pc == mycol)
1012  {
1013  len_t idx = i + this->m_local*j;
1014  this->g.send(cplen, 1, this->m_local, this->data + idx, pr, pc_rev);
1015  this->g.recv(cplen, 1, 1, tmp_d, pr, pc_rev);
1016 
1017  #pragma omp for simd
1018  for (len_t ii=0; ii<cplen; ii++)
1019  this->data[idx+ii] = tmp_d[ii];
1020  }
1021  else
1022  {
1023  len_t idx = i + this->m_local*j_rev;
1024  this->g.recv(cplen, 1, 1, tmp_d, pr, pc);
1025  this->g.send(cplen, 1, this->m_local, this->data + idx, pr, pc);
1026 
1027  #pragma omp for simd
1028  for (len_t ii=0; ii<cplen; ii++)
1029  this->data[idx+ii] = tmp_d[ii];
1030  }
1031  }
1032  }
1033 
1034  this->g.barrier('C');
1035  }
1036  }
1037 }
1038 
1039 
1040 
1046 template <typename REAL>
1048 {
1049  int found_inf = 0;
1050  for (len_local_t j=0; j<n_local; j++)
1051  {
1052  for (len_local_t i=0; i<m_local; i++)
1053  {
1054  if (isinf(this->data[i + this->m_local*j]))
1055  {
1056  found_inf = 1;
1057  break;
1058  }
1059  }
1060  }
1061 
1062  this->g.allreduce(1, 1, &found_inf, 'A');
1063 
1064  return ((bool) found_inf);
1065 }
1066 
1067 
1068 
1074 template <typename REAL>
1076 {
1077  int found_nan = 0;
1078  for (len_local_t j=0; j<n_local; j++)
1079  {
1080  for (len_local_t i=0; i<m_local; i++)
1081  {
1082  if (isnan(this->data[i + this->m_local*j]))
1083  {
1084  found_nan = 1;
1085  break;
1086  }
1087  }
1088  }
1089 
1090  this->g.allreduce(1, 1, &found_nan, 'A');
1091 
1092  return ((bool) found_nan);
1093 }
1094 
1095 
1096 
1097 // operators
1098 
1112 template <typename REAL>
1113 REAL fml::mpimat<REAL>::get(const len_t i) const
1114 {
1115  this->check_index(i);
1116 
1117  int gi = i % this->m;
1118  int gj = i / this->m;
1119 
1120  REAL ret = this->get_val_from_global_index(gi, gj);
1121  return ret;
1122 }
1123 
1136 template <typename REAL>
1137 REAL fml::mpimat<REAL>::get(const len_t i, const len_t j) const
1138 {
1139  this->check_index(i, j);
1140 
1141  REAL ret = this->get_val_from_global_index(i, j);
1142  return ret;
1143 }
1144 
1157 template <typename REAL>
1158 void fml::mpimat<REAL>::set(const len_t i, const REAL v)
1159 {
1160  this->check_index(i);
1161 
1162  int gi = i % this->m;
1163  int gj = i / this->m;
1164 
1165  int pr = fml::bcutils::g2p(gi, this->mb, this->g.nprow());
1166  int pc = fml::bcutils::g2p(gj, this->nb, this->g.npcol());
1167 
1168  int li = fml::bcutils::g2l(gi, this->mb, this->g.nprow());
1169  int lj = fml::bcutils::g2l(gj, this->nb, this->g.npcol());
1170 
1171  if (pr == this->g.myrow() && pc == this->g.mycol())
1172  this->data[li + (this->m_local)*lj] = v;
1173 }
1174 
1186 template <typename REAL>
1187 void fml::mpimat<REAL>::set(const len_t i, const len_t j, const REAL v)
1188 {
1189  this->check_index(i, j);
1190 
1191  int pr = fml::bcutils::g2p(i, this->mb, this->g.nprow());
1192  int pc = fml::bcutils::g2p(j, this->nb, this->g.npcol());
1193 
1194  int li = fml::bcutils::g2l(i, this->mb, this->g.nprow());
1195  int lj = fml::bcutils::g2l(j, this->nb, this->g.npcol());
1196 
1197  if (pr == this->g.myrow() && pc == this->g.mycol())
1198  this->data[li + (this->m_local)*lj] = v;
1199 }
1200 
1201 
1202 
1220 template <typename REAL>
1221 void fml::mpimat<REAL>::get_row(const len_t i, fml::cpuvec<REAL> &v) const
1222 {
1223  if (i < 0 || i >= this->m)
1224  throw std::logic_error("invalid matrix row");
1225 
1226  v.resize(this->n);
1227  v.fill_zero();
1228  REAL *v_ptr = v.data_ptr();
1229 
1230  #pragma omp parallel for if(this->n > fml::omp::OMP_MIN_SIZE)
1231  for (len_t j=0; j<this->n; j++)
1232  {
1233  const len_local_t i_local = fml::bcutils::g2l(i, this->mb, this->g.nprow());
1234  const len_local_t j_local = fml::bcutils::g2l(j, this->nb, this->g.npcol());
1235 
1236  const int pr = fml::bcutils::g2p(i, this->mb, this->g.nprow());
1237  const int pc = fml::bcutils::g2p(j, this->nb, this->g.npcol());
1238 
1239  if (pr == this->g.myrow() && pc == this->g.mycol())
1240  v_ptr[j] = this->data[i_local + this->m_local*j_local];
1241  }
1242 
1243  this->g.allreduce(this->n, 1, v_ptr, 'A');
1244 }
1245 
1246 
1247 
1260 template <typename REAL>
1261 void fml::mpimat<REAL>::set_row(const len_t i, const fml::cpuvec<REAL> &v)
1262 {
1263  if (i < 0 || i >= this->m)
1264  throw std::logic_error("invalid matrix row");
1265  if (v.size() != this->n)
1266  throw std::runtime_error("non-conformable arguments");
1267 
1268  REAL *v_ptr = v.data_ptr();
1269  #pragma omp parallel for if(this->n > fml::omp::OMP_MIN_SIZE)
1270  for (len_t j=0; j<this->n; j++)
1271  {
1272  const len_local_t i_local = fml::bcutils::g2l(i, this->mb, this->g.nprow());
1273  const len_local_t j_local = fml::bcutils::g2l(j, this->nb, this->g.npcol());
1274 
1275  const int pr = fml::bcutils::g2p(i, this->mb, this->g.nprow());
1276  const int pc = fml::bcutils::g2p(j, this->nb, this->g.npcol());
1277 
1278  if (pr == this->g.myrow() && pc == this->g.mycol())
1279  this->data[i_local + this->m_local*j_local] = v_ptr[j];
1280  }
1281 }
1282 
1283 
1284 
1302 template <typename REAL>
1303 void fml::mpimat<REAL>::get_col(const len_t j, fml::cpuvec<REAL> &v) const
1304 {
1305  if (j < 0 || j >= this->n)
1306  throw std::logic_error("invalid matrix column");
1307 
1308  v.resize(this->m);
1309  v.fill_zero();
1310  REAL *v_ptr = v.data_ptr();
1311 
1312  #pragma omp parallel for if(this->m > fml::omp::OMP_MIN_SIZE)
1313  for (len_t i=0; i<this->m; i++)
1314  {
1315  const len_local_t i_local = fml::bcutils::g2l(i, this->mb, this->g.nprow());
1316  const len_local_t j_local = fml::bcutils::g2l(j, this->nb, this->g.npcol());
1317 
1318  const int pr = fml::bcutils::g2p(i, this->mb, this->g.nprow());
1319  const int pc = fml::bcutils::g2p(j, this->nb, this->g.npcol());
1320 
1321  if (pr == this->g.myrow() && pc == this->g.mycol())
1322  v_ptr[i] = this->data[i_local + this->m_local*j_local];
1323  }
1324 
1325  this->g.allreduce(this->m, 1, v_ptr, 'A');
1326 }
1327 
1328 
1329 
1347 template <typename REAL>
1348 void fml::mpimat<REAL>::set_col(const len_t j, const fml::cpuvec<REAL> &v)
1349 {
1350  if (j < 0 || j >= this->n)
1351  throw std::logic_error("invalid matrix column");
1352  if (v.size() != this->m)
1353  throw std::runtime_error("non-conformable arguments");
1354 
1355  REAL *v_ptr = v.data_ptr();
1356  #pragma omp parallel for if(this->m > fml::omp::OMP_MIN_SIZE)
1357  for (len_t i=0; i<this->m; i++)
1358  {
1359  const len_local_t i_local = fml::bcutils::g2l(i, this->mb, this->g.nprow());
1360  const len_local_t j_local = fml::bcutils::g2l(j, this->nb, this->g.npcol());
1361 
1362  const int pr = fml::bcutils::g2p(i, this->mb, this->g.nprow());
1363  const int pc = fml::bcutils::g2p(j, this->nb, this->g.npcol());
1364 
1365  if (pr == this->g.myrow() && pc == this->g.mycol())
1366  this->data[i_local + this->m_local*j_local] = v_ptr[i];
1367  }
1368 }
1369 
1370 
1371 
1386 template <typename REAL>
1388 {
1389  // same dim, same blocking, same grid
1390  if (this->m != x.nrows() || this->n != x.ncols())
1391  return false;
1392  else if (this->mb != x.bf_rows() || this->nb != x.bf_cols())
1393  return false;
1394  else if (this->g.ictxt() != x.g.ictxt())
1395  return false;
1396 
1397  const REAL *x_d = x.data_ptr();
1398  if (this->data == x_d)
1399  return true;
1400 
1401  int negation_ret = 0;
1402  for (len_t j=0; j<this->n_local; j++)
1403  {
1404  for (len_t i=0; i<this->m_local; i++)
1405  {
1406  const REAL a = this->data[i + this->m_local*j];
1407  const REAL b = x_d[i + this->m_local*j];
1408  if (!arraytools::fltcmp::eq(a, b))
1409  {
1410  negation_ret = 1;
1411  break;
1412  }
1413  }
1414  }
1415 
1416  g.allreduce(1, 1, &negation_ret, 'A');
1417 
1418  return !((bool) negation_ret);
1419 }
1420 
1429 template <typename REAL>
1431 {
1432  return !(*this == x);
1433 }
1434 
1435 
1436 
1445 template <typename REAL>
1447 {
1448  this->g = x.get_grid();
1449 
1450  this->m = x.nrows();
1451  this->n = x.ncols();
1452  this->data = x.data_ptr();
1453 
1454  this->m_local = x.nrows_local();
1455  this->n_local = x.ncols_local();
1456  this->mb = x.bf_rows();
1457  this->nb = x.bf_cols();
1458 
1459  memcpy(this->desc, x.desc_ptr(), 9*sizeof(int));
1460 
1461  this->free_data = false;
1462  return *this;
1463 }
1464 
1465 
1466 
1467 // -----------------------------------------------------------------------------
1468 // private
1469 // -----------------------------------------------------------------------------
1470 
1471 template <typename REAL>
1473 {
1474  if (this->free_data && this->data)
1475  {
1476  std::free(this->data);
1477  this->data = NULL;
1478  }
1479 }
1480 
1481 
1482 
1483 template <typename REAL>
1484 void fml::mpimat<REAL>::check_params(len_t nrows, len_t ncols, int bf_rows, int bf_cols)
1485 {
1486  if (nrows < 0 || ncols < 0)
1487  throw std::runtime_error("invalid dimensions");
1488 
1489  if (bf_rows <= 0 || bf_cols <= 0)
1490  throw std::runtime_error("invalid blocking factor");
1491 }
1492 
1493 
1494 
1495 template <typename REAL>
1496 void fml::mpimat<REAL>::check_grid(const fml::grid &blacs_grid)
1497 {
1498  if (!blacs_grid.valid_grid())
1499  throw std::runtime_error("invalid blacs grid");
1500 }
1501 
1502 
1503 
1504 template <typename REAL>
1505 REAL fml::mpimat<REAL>::get_val_from_global_index(len_t gi, len_t gj) const
1506 {
1507  REAL ret;
1508 
1509  int pr = fml::bcutils::g2p(gi, this->mb, this->g.nprow());
1510  int pc = fml::bcutils::g2p(gj, this->nb, this->g.npcol());
1511 
1512  int li = fml::bcutils::g2l(gi, this->mb, this->g.nprow());
1513  int lj = fml::bcutils::g2l(gj, this->nb, this->g.npcol());
1514 
1515  if (pr == this->g.myrow() && pc == this->g.mycol())
1516  ret = this->data[li + (this->m_local)*lj];
1517  else
1518  ret = (REAL) 0;
1519 
1520  g.allreduce(1, 1, &ret, 'A');
1521 
1522  return ret;
1523 }
1524 
1525 
1526 #endif
fml::mpimat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: mpimat.hh:326
fml::mpimat::inherit
void inherit(grid &blacs_grid, REAL *data_, len_t nrows, len_t ncols, int bf_rows, int bf_cols, bool free_on_destruct=false)
Set the internal object storage to the specified array.
Definition: mpimat.hh:443
fml::grid::ictxt
int ictxt() const
Definition: grid.hh:119
fml::mpimat::info
void info() const
Print some brief information about the object.
Definition: mpimat.hh:542
fml::unimat
Base matrix class. Not meant for direct use. Instead see cpumat, gpumat, and mpimat.
Definition: unimat.hh:30
fml::grid::allreduce
void allreduce(const int m, const int n, int *x, const char scope='A', const blacsops op=BLACS_SUM) const
Sum reduce operation across all processes in the grid.
Definition: grid.hh:420
fml::grid
2-dimensional MPI process grid.
Definition: grid.hh:70
fml::grid::send
void send(const int m, const int n, const int *x, const int rdest=0, const int cdest=0) const
Point-to-point send. Should be matched by a corresponding 'recv' call.
Definition: grid.hh:321
fml::mpimat::set
void set(const len_t i, const REAL v)
Set the storage at the specified index with the provided value.
Definition: mpimat.hh:1158
fml::grid::barrier
void barrier(const char scope='A') const
Execute a barrier across the specified scope of the BLACS grid.
Definition: grid.hh:404
fml::mpimat
Matrix class for data distributed over MPI in the 2-d block cyclic format.
Definition: mpimat.hh:40
fml::grid::mycol
int mycol() const
The process column (0-based index) of the calling process.
Definition: grid.hh:129
fml::mpimat::fill_rnorm
void fill_rnorm(const uint32_t seed, const REAL mean=0, const REAL sd=1)
Set diagonal entries to 1 and non-diagonal entries to 0.
Definition: mpimat.hh:749
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::mpimat::set_row
void set_row(const len_t i, const cpuvec< REAL > &v)
Set the specified row.
Definition: mpimat.hh:1261
fml::mpimat::set_col
void set_col(const len_t j, const cpuvec< REAL > &v)
Set the specified column.
Definition: mpimat.hh:1348
fml::grid::printf
void printf(const int row, const int col, const char *fmt,...) const
Helper wrapper around the C standard I/O 'printf()' function. Conceptually similar to guarding a norm...
Definition: grid.hh:263
fml::mpimat::dupe
mpimat< REAL > dupe() const
Duplicate the object in a deep copy.
Definition: mpimat.hh:469
fml::mpimat::operator=
mpimat< REAL > & operator=(const mpimat< REAL > &x)
Operator that sets the LHS to a shallow copy of the input. Desctruction of the LHS object will not re...
Definition: mpimat.hh:1446
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::cpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: cpuvec.hh:210
fml::mpimat::get_row
void get_row(const len_t i, cpuvec< REAL > &v) const
Get the specified row.
Definition: mpimat.hh:1221
fml::mpimat::get
REAL get(const len_t i) const
Get the specified value.
Definition: mpimat.hh:1113
fml::mpimat::fill_diag
void fill_diag(const cpuvec< REAL > &v)
Set diagonal entries of the matrix to those in the vector.
Definition: mpimat.hh:687
fml::mpimat::fill_runif
void fill_runif(const uint32_t seed, const REAL min=0, const REAL max=1)
Set diagonal entries to 1 and non-diagonal entries to 0.
Definition: mpimat.hh:718
fml::cpuvec
Vector class for data held on a single CPU.
Definition: cpuvec.hh:31
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::mpimat::operator!=
bool operator!=(const mpimat< REAL > &x) const
See if the two objects are not the same. Uses same internal logic as the == method.
Definition: mpimat.hh:1430
fml::mpimat::rev_cols
void rev_cols()
Reverse the columns of the matrix.
Definition: mpimat.hh:965
fml::mpimat::fill_linspace
void fill_linspace()
Set values to linearly spaced numbers.
Definition: mpimat.hh:603
fml::mpimat::scale
void scale(const REAL s)
Multiply all values by the input value.
Definition: mpimat.hh:864
fml::mpimat::mpimat
mpimat(const grid &blacs_grid)
Construct matrix object with no internal allocated storage.
Definition: mpimat.hh:136
fml::mpimat::rev_rows
void rev_rows()
Reverse the rows of the matrix.
Definition: mpimat.hh:883
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml::mpimat::fill_eye
void fill_eye()
Set diagonal entries to 1 and non-diagonal entries to 0.
Definition: mpimat.hh:666
fml::mpimat::any_nan
bool any_nan() const
Are any values NaN?
Definition: mpimat.hh:1075
fml::mpimat::any_inf
bool any_inf() const
Are any values infinite?
Definition: mpimat.hh:1047
fml::mpimat::diag
void diag(cpuvec< REAL > &v)
Get the diagonal entries.
Definition: mpimat.hh:787
fml::grid::recv
void recv(const int m, const int n, int *x, const int rsrc=0, const int csrc=0) const
Point-to-point receive. Should be matched by a corresponding 'send' call.
Definition: grid.hh:363
fml
Core namespace.
Definition: dimops.hh:10
fml::mpimat::fill_zero
void fill_zero()
Set all values to zero.
Definition: mpimat.hh:565
fml::mpimat::antidiag
void antidiag(cpuvec< REAL > &v)
Get the anti-diagonal entries, i.e. those on the bottom-left to top-right.
Definition: mpimat.hh:830
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::cpuvec::set
void set(const len_t i, const T v)
Set the storage at the specified index with the provided value.
Definition: cpuvec.hh:540
fml::mpimat::get_col
void get_col(const len_t j, cpuvec< REAL > &v) const
Get the specified column.
Definition: mpimat.hh:1303
fml::cpuvec::fill_zero
void fill_zero()
Set all values to zero.
Definition: cpuvec.hh:317
fml::mpimat::fill_val
void fill_val(const REAL v)
Set all values to input value.
Definition: mpimat.hh:581
fml::mpimat::operator==
bool operator==(const mpimat< REAL > &x) const
See if the two objects are the same.
Definition: mpimat.hh:1387
fml::grid::rank0
bool rank0() const
Check if the executing process is rank 0, i.e., if the process row and column are 0.
Definition: grid.hh:292
fml::grid::valid_grid
bool valid_grid() const
Is the BLACS grid valid?
Definition: grid.hh:133
fml::grid::npcol
int npcol() const
The number of processes columns in the BLACS context.
Definition: grid.hh:125
fml::mpimat::print
void print(uint8_t ndigits=4, bool add_final_blank=true) const
Print all values in the object.
Definition: mpimat.hh:496
fml::grid::myrow
int myrow() const
The process row (0-based index) of the calling process.
Definition: grid.hh:127
fml::grid::nprow
int nprow() const
The number of processes rows in the BLACS context.
Definition: grid.hh:123