5 #ifndef FML_MPI_MPIMAT_H
6 #define FML_MPI_MPIMAT_H
18 #include "internals/bcutils.hh"
20 #include "../_internals/arraytools/src/arraytools.hpp"
22 #include "../_internals/print.hh"
23 #include "../_internals/rand.hh"
24 #include "../_internals/omp.hh"
25 #include "../_internals/types.hh"
26 #include "../_internals/unimat.hh"
28 #include "../cpu/cpuvec.hh"
39 template <
typename REAL>
44 mpimat(
const grid &blacs_grid,
int bf_rows,
int bf_cols);
46 mpimat(
const grid &blacs_grid, REAL *data_, len_t
nrows, len_t
ncols,
int bf_rows,
int bf_cols,
bool free_on_destruct=
false);
52 void inherit(
grid &blacs_grid, REAL *data_, len_t
nrows, len_t
ncols,
int bf_rows,
int bf_cols,
bool free_on_destruct=
false);
55 void print(uint8_t ndigits=4,
bool add_final_blank=
true)
const;
64 void fill_runif(
const uint32_t seed,
const REAL min=0,
const REAL max=1);
65 void fill_runif(
const REAL min=0,
const REAL max=1);
66 void fill_rnorm(
const uint32_t seed,
const REAL mean=0,
const REAL sd=1);
67 void fill_rnorm(
const REAL mean=0,
const REAL sd=1);
71 void scale(
const REAL s);
78 REAL
get(
const len_t i)
const;
79 REAL
get(
const len_t i,
const len_t j)
const;
80 void set(
const len_t i,
const REAL v);
81 void set(
const len_t i,
const len_t j,
const REAL v);
91 len_local_t nrows_local()
const {
return m_local;};
92 len_local_t ncols_local()
const {
return n_local;};
93 int bf_rows()
const {
return mb;};
94 int bf_cols()
const {
return nb;};
95 int* desc_ptr() {
return desc;};
96 const int* desc_ptr()
const {
return desc;};
97 const grid get_grid()
const {
return g;};
109 void check_params(len_t
nrows, len_t
ncols,
int bf_rows,
int bf_cols);
110 void check_grid(
const grid &blacs_grid);
111 REAL get_val_from_global_index(len_t gi, len_t gj)
const;
135 template <
typename REAL>
138 check_grid(blacs_grid);
149 this->g = blacs_grid;
151 this->free_data =
true;
173 template <
typename REAL>
176 check_grid(blacs_grid);
187 this->g = blacs_grid;
189 this->free_data =
true;
212 template <
typename REAL>
215 check_params(nrows, ncols, bf_rows, bf_cols);
216 check_grid(blacs_grid);
218 this->m_local = fml::bcutils::numroc(nrows, bf_rows, blacs_grid.
myrow(), 0, blacs_grid.
nprow());
219 this->n_local = fml::bcutils::numroc(ncols, bf_cols, blacs_grid.
mycol(), 0, blacs_grid.
npcol());
221 fml::bcutils::descinit(this->desc, blacs_grid.
ictxt(), nrows, ncols, bf_rows, bf_cols, this->m_local);
223 const size_t len = (size_t) this->m_local * this->n_local *
sizeof(REAL);
224 this->data = (REAL*) std::malloc(len);
225 if (this->data == NULL)
226 throw std::bad_alloc();
232 this->g = blacs_grid;
234 this->free_data =
true;
256 template <
typename REAL>
259 check_params(nrows, ncols, bf_rows, bf_cols);
260 check_grid(blacs_grid);
262 this->m_local = fml::bcutils::numroc(nrows, bf_rows, blacs_grid.
myrow(), 0, blacs_grid.
nprow());
263 this->n_local = fml::bcutils::numroc(ncols, bf_cols, blacs_grid.
mycol(), 0, blacs_grid.
npcol());
265 fml::bcutils::descinit(this->desc, blacs_grid.
ictxt(), nrows, ncols, bf_rows, bf_cols, this->m_local);
271 this->g = blacs_grid;
275 this->free_data = free_on_destruct;
280 template <
typename REAL>
286 this->m_local = x.nrows_local();
287 this->n_local = x.ncols_local();
288 this->mb = x.bf_rows();
289 this->nb = x.bf_cols();
291 memcpy(this->desc, x.desc_ptr(), 9*
sizeof(
int));
298 this->free_data =
false;
303 template <
typename REAL>
325 template <
typename REAL>
328 check_params(nrows, ncols, this->mb, this->nb);
330 const size_t len = (size_t) nrows * ncols *
sizeof(REAL);
331 const size_t oldlen = (size_t) this->m * this->n *
sizeof(REAL);
338 this->m_local = fml::bcutils::numroc(nrows, this->mb, this->g.
myrow(), 0, this->g.nprow());
339 this->n_local = fml::bcutils::numroc(ncols, this->nb, this->g.
mycol(), 0, this->g.npcol());
341 fml::bcutils::descinit(this->desc, this->g.
ictxt(), nrows, ncols, this->mb, this->nb, this->m_local);
346 this->m_local = fml::bcutils::numroc(nrows, this->mb, this->g.
myrow(), 0, this->g.nprow());
347 this->n_local = fml::bcutils::numroc(ncols, this->nb, this->g.
mycol(), 0, this->g.npcol());
351 realloc_ptr = malloc(len);
353 realloc_ptr = realloc(this->data, len);
355 if (realloc_ptr == NULL)
356 throw std::bad_alloc();
358 this->data = (REAL*) realloc_ptr;
360 fml::bcutils::descinit(this->desc, this->g.
ictxt(), nrows, ncols, this->mb, this->nb, this->m_local);
381 template <
typename REAL>
384 check_params(nrows, ncols, bf_rows, bf_cols);
386 const size_t len = (size_t) nrows * ncols *
sizeof(REAL);
387 const size_t oldlen = (size_t) this->m * this->n *
sizeof(REAL);
389 if (len == oldlen && this->mb == bf_rows && this->nb == bf_cols)
394 this->m_local = fml::bcutils::numroc(nrows, bf_rows, this->g.
myrow(), 0, this->g.nprow());
395 this->n_local = fml::bcutils::numroc(ncols, bf_cols, this->g.
mycol(), 0, this->g.npcol());
397 fml::bcutils::descinit(this->desc, this->g.
ictxt(), nrows, ncols, bf_rows, bf_cols, this->m_local);
405 this->m_local = fml::bcutils::numroc(nrows, this->mb, this->g.
myrow(), 0, this->g.nprow());
406 this->n_local = fml::bcutils::numroc(ncols, this->nb, this->g.
mycol(), 0, this->g.npcol());
410 realloc_ptr = malloc(len);
412 realloc_ptr = realloc(this->data, len);
414 if (realloc_ptr == NULL)
415 throw std::bad_alloc();
417 this->data = (REAL*) realloc_ptr;
419 fml::bcutils::descinit(this->desc, this->g.
ictxt(), nrows, ncols, this->mb, this->nb, this->m_local);
442 template <
typename REAL>
445 check_params(nrows, ncols, bf_rows, bf_cols);
446 check_grid(blacs_grid);
450 m_local = fml::bcutils::numroc(nrows, bf_rows, blacs_grid.
myrow(), 0, blacs_grid.
nprow());
451 n_local = fml::bcutils::numroc(ncols, bf_cols, blacs_grid.
mycol(), 0, blacs_grid.
npcol());
452 fml::bcutils::descinit(this->desc, blacs_grid.
ictxt(), nrows, ncols, bf_rows, bf_cols, m_local);
458 this->g = blacs_grid;
462 this->free_data = free_on_destruct;
468 template <
typename REAL>
473 const size_t len = (size_t) this->m_local * this->n_local *
sizeof(REAL);
475 memcpy(dup.
data_ptr(), this->data, len);
476 memcpy(dup.desc_ptr(), this->desc, 9*
sizeof(
int));
495 template <
typename REAL>
498 for (len_t gi=0; gi<this->m; gi++)
500 for (len_t gj=0; gj<this->n; gj++)
502 const int pr = fml::bcutils::g2p(gi, this->mb, this->g.
nprow());
503 const int pc = fml::bcutils::g2p(gj, this->nb, this->g.
npcol());
505 const int i = fml::bcutils::g2l(gi, this->mb, this->g.
nprow());
506 const int j = fml::bcutils::g2l(gj, this->nb, this->g.
npcol());
511 if (pr == 0 && pc == 0)
512 d = this->data[i + this->m_local*j];
514 this->g.
recv(1, 1, &d, pr, pc);
516 this->printval(d, ndigits);
518 else if (pr == this->g.
myrow() && pc == this->g.mycol())
520 d = this->data[i + this->m_local*j];
521 this->g.
send(1, 1, &d, 0, 0);
525 this->g.
printf(0, 0,
"\n");
529 this->g.
printf(0, 0,
"\n");
541 template <
typename REAL>
546 fml::print::printf(
"# mpimat");
547 fml::print::printf(
" %dx%d", this->m, this->n);
548 fml::print::printf(
" with %dx%d blocking", this->mb, this->nb);
549 fml::print::printf(
" on %dx%d grid", this->g.
nprow(), this->g.npcol());
550 fml::print::printf(
" type=%s",
typeid(REAL).name());
551 fml::print::printf(
"\n");
564 template <
typename REAL>
567 const size_t len = (size_t) m_local * n_local *
sizeof(REAL);
568 memset(this->data, 0, len);
580 template <
typename REAL>
583 #pragma omp parallel for if((this->m_local)*(this->n_local) > fml::omp::OMP_MIN_SIZE)
584 for (len_t j=0; j<this->n_local; j++)
587 for (len_t i=0; i<this->m_local; i++)
588 this->data[i + this->m_local*j] = v;
602 template <
typename T>
606 T stop = (T) (this->m * this->n);
607 this->fill_linspace(start, stop);
610 template <
typename REAL>
614 this->fill_val(start);
617 const REAL v = (stop-start)/((REAL) this->m*this->n - 1);
619 #pragma omp parallel for if((this->m_local)*(this->n_local) > fml::omp::OMP_MIN_SIZE)
620 for (len_t j=0; j<this->n_local; j++)
623 for (len_t i=0; i<this->m_local; i++)
625 const int gi = fml::bcutils::l2g(i, this->mb, this->g.
nprow(), this->g.myrow());
626 const int gj = fml::bcutils::l2g(j, this->nb, this->g.
npcol(), this->g.mycol());
628 this->data[i + this->m_local*j] = v*((REAL) gi + this->m*gj) + start;
638 this->fill_val(start);
641 const float v = (stop-start)/((
float) this->m*this->n - 1);
643 #pragma omp parallel for if((this->m_local)*(this->n_local) > fml::omp::OMP_MIN_SIZE)
644 for (len_t j=0; j<this->n_local; j++)
647 for (len_t i=0; i<this->m_local; i++)
649 const int gi = fml::bcutils::l2g(i, this->mb, this->g.
nprow(), this->g.myrow());
650 const int gj = fml::bcutils::l2g(j, this->nb, this->g.
npcol(), this->g.mycol());
652 this->data[i + this->m_local*j] = (int) roundf(v*((
float) gi + this->m*gj) + start);
665 template <
typename REAL>
686 template <
typename REAL>
691 #pragma omp parallel for if((this->m_local)*(this->n_local) > fml::omp::OMP_MIN_SIZE)
692 for (len_local_t j=0; j<n_local; j++)
694 for (len_local_t i=0; i<m_local; i++)
696 const int gi = fml::bcutils::l2g(i, this->mb, this->g.
nprow(), this->g.myrow());
697 const int gj = fml::bcutils::l2g(j, this->nb, this->g.
npcol(), this->g.mycol());
700 this->data[i + this->m_local*j] = v_d[gi % v.
size()];
702 this->data[i + this->m_local*j] = 0;
717 template <
typename REAL>
721 static std::uniform_real_distribution<REAL> dist(min, max);
723 for (len_t j=0; j<this->n_local; j++)
725 for (len_t i=0; i<this->m_local; i++)
726 this->data[i + this->m_local*j] = dist(mt);
731 template <
typename REAL>
734 uint32_t seed = fml::rand::get_seed() + (g.
myrow() + g.
nprow()*g.
mycol());
735 this->fill_runif(seed, min, max);
748 template <
typename REAL>
752 static std::normal_distribution<REAL> dist(mean, sd);
754 for (len_t j=0; j<this->n_local; j++)
756 for (len_t i=0; i<this->m_local; i++)
757 this->data[i + this->m_local*j] = dist(mt);
762 template <
typename REAL>
765 uint32_t seed = fml::rand::get_seed() + (g.
myrow() + g.
nprow()*g.
mycol());
766 this->fill_rnorm(seed, mean, sd);
786 template <
typename REAL>
789 const len_t minmn = std::min(this->m, this->n);
794 #pragma omp parallel for if(minmn > fml::omp::OMP_MIN_SIZE)
795 for (len_t gi=0; gi<minmn; gi++)
797 const len_local_t i = fml::bcutils::g2l(gi, this->mb, this->g.
nprow());
798 const len_local_t j = fml::bcutils::g2l(gi, this->nb, this->g.
npcol());
800 const int pr = fml::bcutils::g2p(gi, this->mb, this->g.
nprow());
801 const int pc = fml::bcutils::g2p(gi, this->nb, this->g.
npcol());
803 if (pr == this->g.
myrow() && pc == this->g.mycol())
804 v_ptr[gi] = this->data[i + this->m_local*j];
829 template <
typename REAL>
832 const len_t minmn = std::min(this->m, this->n);
837 #pragma omp parallel for if(minmn > fml::omp::OMP_MIN_SIZE)
838 for (len_t gi=0; gi<minmn; gi++)
840 const len_local_t i = fml::bcutils::g2l(this->m-1-gi, this->mb, this->g.
nprow());
841 const len_local_t j = fml::bcutils::g2l(gi, this->nb, this->g.
npcol());
843 const int pr = fml::bcutils::g2p(this->m-1-gi, this->mb, this->g.
nprow());
844 const int pc = fml::bcutils::g2p(gi, this->nb, this->g.
npcol());
846 if (pr == this->g.
myrow() && pc == this->g.mycol())
847 v_ptr[gi] = this->data[i + this->m_local*j];
863 template <
typename REAL>
866 #pragma omp parallel for if((this->m_local)*(this->n_local) > fml::omp::OMP_MIN_SIZE)
867 for (len_local_t j=0; j<this->n_local; j++)
870 for (len_local_t i=0; i<this->m_local; i++)
871 this->data[i + this->m_local*j] *= s;
882 template <
typename REAL>
888 const int myrow = this->g.
myrow();
889 const int mycol = this->g.
mycol();
891 for (len_t gj=0; gj<this->n; gj+=this->nb)
893 const len_t j = fml::bcutils::g2l(gj, this->nb, this->g.
npcol());
894 const int pc = fml::bcutils::g2p(gj, this->nb, this->g.
npcol());
896 for (len_t gi=0; gi<this->m/2; gi++)
898 const len_t i = fml::bcutils::g2l(gi, this->mb, this->g.
nprow());
899 const len_t gi_rev = this->m - gi - 1;
901 const int pr = fml::bcutils::g2p(gi, this->mb, this->g.
nprow());
902 const int pr_rev = fml::bcutils::g2p(gi_rev, this->mb, this->g.
nprow());
904 if ((pr == myrow || pr_rev == myrow) && pc == mycol)
906 const len_t i_rev = fml::bcutils::g2l(gi_rev, this->mb, this->g.
nprow());
907 const len_t cplen = std::min(this->nb, this->n - gj);
914 for (len_t jj=0; jj<cplen; jj++)
915 tmp_d[jj] = this->data[i + this->m_local*(j+jj)];
918 for (len_t jj=0; jj<cplen; jj++)
919 this->data[i + this->m_local*(j+jj)] = this->data[i_rev + this->m_local*(j+jj)];
922 for (len_t jj=0; jj<cplen; jj++)
923 this->data[i_rev + this->m_local*(j+jj)] = tmp_d[jj];
931 len_t idx = i + this->m_local*j;
932 this->g.
send(1, cplen, this->m_local, this->data + idx, pr_rev, pc);
933 this->g.
recv(1, cplen, 1, tmp_d, pr_rev, pc);
936 for (len_t jj=0; jj<cplen; jj++)
937 this->data[idx + this->m_local*jj] = tmp_d[jj];
941 len_t idx = i_rev + this->m_local*j;
942 this->g.
recv(1, cplen, 1, tmp_d, pr, pc);
943 this->g.
send(1, cplen, this->m_local, this->data + idx, pr, pc);
946 for (len_t jj=0; jj<cplen; jj++)
947 this->data[idx + this->m_local*jj] = tmp_d[jj];
964 template <
typename REAL>
970 const int myrow = this->g.
myrow();
971 const int mycol = this->g.
mycol();
973 for (len_t gj=0; gj<this->n/2; gj++)
975 const len_t j = fml::bcutils::g2l(gj, this->nb, this->g.
npcol());
976 const len_t gj_rev = this->n - gj - 1;
977 const len_t j_rev = fml::bcutils::g2l(gj_rev, this->nb, this->g.
npcol());
979 const int pc = fml::bcutils::g2p(gj, this->nb, this->g.
npcol());
980 const int pc_rev = fml::bcutils::g2p(gj_rev, this->nb, this->g.
npcol());
982 for (len_t gi=0; gi<this->m; gi+=this->mb)
984 const len_t i = fml::bcutils::g2l(gi, this->mb, this->g.
nprow());
985 const int pr = fml::bcutils::g2p(gi, this->mb, this->g.
nprow());
987 if (pr == myrow && (pc == mycol || pc_rev == mycol))
989 const len_t cplen = std::min(this->mb, this->m - gi);
996 for (len_t ii=0; ii<cplen; ii++)
997 tmp_d[ii] = this->data[i+ii + this->m_local*j];
1000 for (len_t ii=0; ii<cplen; ii++)
1001 this->data[i+ii + this->m_local*j] = this->data[i+ii + this->m_local*j_rev];
1003 #pragma omp for simd
1004 for (len_t ii=0; ii<cplen; ii++)
1005 this->data[i+ii + this->m_local*j_rev] = tmp_d[ii];
1013 len_t idx = i + this->m_local*j;
1014 this->g.
send(cplen, 1, this->m_local, this->data + idx, pr, pc_rev);
1015 this->g.
recv(cplen, 1, 1, tmp_d, pr, pc_rev);
1017 #pragma omp for simd
1018 for (len_t ii=0; ii<cplen; ii++)
1019 this->data[idx+ii] = tmp_d[ii];
1023 len_t idx = i + this->m_local*j_rev;
1024 this->g.
recv(cplen, 1, 1, tmp_d, pr, pc);
1025 this->g.
send(cplen, 1, this->m_local, this->data + idx, pr, pc);
1027 #pragma omp for simd
1028 for (len_t ii=0; ii<cplen; ii++)
1029 this->data[idx+ii] = tmp_d[ii];
1046 template <
typename REAL>
1050 for (len_local_t j=0; j<n_local; j++)
1052 for (len_local_t i=0; i<m_local; i++)
1054 if (isinf(this->data[i + this->m_local*j]))
1062 this->g.
allreduce(1, 1, &found_inf,
'A');
1064 return ((
bool) found_inf);
1074 template <
typename REAL>
1078 for (len_local_t j=0; j<n_local; j++)
1080 for (len_local_t i=0; i<m_local; i++)
1082 if (isnan(this->data[i + this->m_local*j]))
1090 this->g.
allreduce(1, 1, &found_nan,
'A');
1092 return ((
bool) found_nan);
1112 template <
typename REAL>
1115 this->check_index(i);
1117 int gi = i % this->m;
1118 int gj = i / this->m;
1120 REAL ret = this->get_val_from_global_index(gi, gj);
1136 template <
typename REAL>
1139 this->check_index(i, j);
1141 REAL ret = this->get_val_from_global_index(i, j);
1157 template <
typename REAL>
1160 this->check_index(i);
1162 int gi = i % this->m;
1163 int gj = i / this->m;
1165 int pr = fml::bcutils::g2p(gi, this->mb, this->g.
nprow());
1166 int pc = fml::bcutils::g2p(gj, this->nb, this->g.
npcol());
1168 int li = fml::bcutils::g2l(gi, this->mb, this->g.
nprow());
1169 int lj = fml::bcutils::g2l(gj, this->nb, this->g.
npcol());
1171 if (pr == this->g.
myrow() && pc == this->g.mycol())
1172 this->data[li + (this->m_local)*lj] = v;
1186 template <
typename REAL>
1189 this->check_index(i, j);
1191 int pr = fml::bcutils::g2p(i, this->mb, this->g.
nprow());
1192 int pc = fml::bcutils::g2p(j, this->nb, this->g.
npcol());
1194 int li = fml::bcutils::g2l(i, this->mb, this->g.
nprow());
1195 int lj = fml::bcutils::g2l(j, this->nb, this->g.
npcol());
1197 if (pr == this->g.
myrow() && pc == this->g.mycol())
1198 this->data[li + (this->m_local)*lj] = v;
1220 template <
typename REAL>
1223 if (i < 0 || i >= this->m)
1224 throw std::logic_error(
"invalid matrix row");
1230 #pragma omp parallel for if(this->n > fml::omp::OMP_MIN_SIZE)
1231 for (len_t j=0; j<this->n; j++)
1233 const len_local_t i_local = fml::bcutils::g2l(i, this->mb, this->g.
nprow());
1234 const len_local_t j_local = fml::bcutils::g2l(j, this->nb, this->g.
npcol());
1236 const int pr = fml::bcutils::g2p(i, this->mb, this->g.
nprow());
1237 const int pc = fml::bcutils::g2p(j, this->nb, this->g.
npcol());
1239 if (pr == this->g.
myrow() && pc == this->g.mycol())
1240 v_ptr[j] = this->data[i_local + this->m_local*j_local];
1243 this->g.
allreduce(this->n, 1, v_ptr,
'A');
1260 template <
typename REAL>
1263 if (i < 0 || i >= this->m)
1264 throw std::logic_error(
"invalid matrix row");
1265 if (v.
size() != this->n)
1266 throw std::runtime_error(
"non-conformable arguments");
1269 #pragma omp parallel for if(this->n > fml::omp::OMP_MIN_SIZE)
1270 for (len_t j=0; j<this->n; j++)
1272 const len_local_t i_local = fml::bcutils::g2l(i, this->mb, this->g.
nprow());
1273 const len_local_t j_local = fml::bcutils::g2l(j, this->nb, this->g.
npcol());
1275 const int pr = fml::bcutils::g2p(i, this->mb, this->g.
nprow());
1276 const int pc = fml::bcutils::g2p(j, this->nb, this->g.
npcol());
1278 if (pr == this->g.
myrow() && pc == this->g.mycol())
1279 this->data[i_local + this->m_local*j_local] = v_ptr[j];
1302 template <
typename REAL>
1305 if (j < 0 || j >= this->n)
1306 throw std::logic_error(
"invalid matrix column");
1312 #pragma omp parallel for if(this->m > fml::omp::OMP_MIN_SIZE)
1313 for (len_t i=0; i<this->m; i++)
1315 const len_local_t i_local = fml::bcutils::g2l(i, this->mb, this->g.
nprow());
1316 const len_local_t j_local = fml::bcutils::g2l(j, this->nb, this->g.
npcol());
1318 const int pr = fml::bcutils::g2p(i, this->mb, this->g.
nprow());
1319 const int pc = fml::bcutils::g2p(j, this->nb, this->g.
npcol());
1321 if (pr == this->g.
myrow() && pc == this->g.mycol())
1322 v_ptr[i] = this->data[i_local + this->m_local*j_local];
1325 this->g.
allreduce(this->m, 1, v_ptr,
'A');
1347 template <
typename REAL>
1350 if (j < 0 || j >= this->n)
1351 throw std::logic_error(
"invalid matrix column");
1352 if (v.
size() != this->m)
1353 throw std::runtime_error(
"non-conformable arguments");
1356 #pragma omp parallel for if(this->m > fml::omp::OMP_MIN_SIZE)
1357 for (len_t i=0; i<this->m; i++)
1359 const len_local_t i_local = fml::bcutils::g2l(i, this->mb, this->g.
nprow());
1360 const len_local_t j_local = fml::bcutils::g2l(j, this->nb, this->g.
npcol());
1362 const int pr = fml::bcutils::g2p(i, this->mb, this->g.
nprow());
1363 const int pc = fml::bcutils::g2p(j, this->nb, this->g.
npcol());
1365 if (pr == this->g.
myrow() && pc == this->g.mycol())
1366 this->data[i_local + this->m_local*j_local] = v_ptr[i];
1386 template <
typename REAL>
1390 if (this->m != x.
nrows() || this->n != x.
ncols())
1392 else if (this->mb != x.bf_rows() || this->nb != x.bf_cols())
1394 else if (this->g.
ictxt() != x.g.ictxt())
1398 if (this->data == x_d)
1401 int negation_ret = 0;
1402 for (len_t j=0; j<this->n_local; j++)
1404 for (len_t i=0; i<this->m_local; i++)
1406 const REAL a = this->data[i + this->m_local*j];
1407 const REAL b = x_d[i + this->m_local*j];
1408 if (!arraytools::fltcmp::eq(a, b))
1418 return !((bool) negation_ret);
1429 template <
typename REAL>
1432 return !(*
this == x);
1445 template <
typename REAL>
1448 this->g = x.get_grid();
1450 this->m = x.
nrows();
1451 this->n = x.
ncols();
1454 this->m_local = x.nrows_local();
1455 this->n_local = x.ncols_local();
1456 this->mb = x.bf_rows();
1457 this->nb = x.bf_cols();
1459 memcpy(this->desc, x.desc_ptr(), 9*
sizeof(
int));
1461 this->free_data =
false;
1471 template <
typename REAL>
1474 if (this->free_data && this->data)
1476 std::free(this->data);
1483 template <
typename REAL>
1486 if (nrows < 0 || ncols < 0)
1487 throw std::runtime_error(
"invalid dimensions");
1489 if (bf_rows <= 0 || bf_cols <= 0)
1490 throw std::runtime_error(
"invalid blocking factor");
1495 template <
typename REAL>
1499 throw std::runtime_error(
"invalid blacs grid");
1504 template <
typename REAL>
1509 int pr = fml::bcutils::g2p(gi, this->mb, this->g.
nprow());
1510 int pc = fml::bcutils::g2p(gj, this->nb, this->g.
npcol());
1512 int li = fml::bcutils::g2l(gi, this->mb, this->g.
nprow());
1513 int lj = fml::bcutils::g2l(gj, this->nb, this->g.
npcol());
1515 if (pr == this->g.
myrow() && pc == this->g.mycol())
1516 ret = this->data[li + (this->m_local)*lj];