47template <
class IU,
class NU>
50template <
class IU,
class NU>
53template <
class IU,
class NU>
63template <
typename SR,
typename IU,
typename NU,
typename RHS,
typename LHS>
68 for(
IU j =0;
j<
A.dcsc->nzc; ++
j)
70 IU colid =
A.dcsc->jc[
j];
71 for(
IU i =
A.dcsc->cp[
j]; i<
A.dcsc->cp[
j+1]; ++i)
73 IU rowid =
A.dcsc->ir[i];
74 SR::axpy(
A.dcsc->numx[i],
x[colid],
y[rowid]);
81template <
typename SR,
typename IU,
typename NU,
typename RHS,
typename LHS>
103 #pragma omp parallel for
104 for(
IU j =0;
j<
A.dcsc->nzc; ++
j)
113 IU colid =
A.dcsc->jc[
j];
114 for(
IU i =
A.dcsc->cp[
j]; i<
A.dcsc->cp[
j+1]; ++i)
116 IU rowid =
A.dcsc->ir[i];
117 SR::axpy(
A.dcsc->numx[i],
x[colid],
loc2merge[rowid]);
121 #pragma omp parallel for
138template <
typename SR,
typename IU,
typename NUM,
typename DER,
typename IVT,
typename OVT>
147 if(
A.getnnz() > 0 &&
nnzx > 0)
149 int splits =
A.getnsplit();
154 std::vector< std::vector< int32_t > >
indy(splits);
155 std::vector< std::vector< OVT > >
numy(splits);
159 #pragma omp parallel for
161 for(
int i=0; i<splits; ++i)
166 SpMXSpV_ForThreading<SR>(*(
A.GetInternal(i)),
perpiece,
indx, numx,
nnzx,
indy[i],
numy[i], i*
perpiece,
SPA.V_localy[i],
SPA.V_isthere[i],
SPA.V_inds[i]);
168 SpMXSpV_ForThreading<SR>(*(
A.GetInternal(i)),
nlocrows -
perpiece*i,
indx, numx,
nnzx,
indy[i],
numy[i], i*
perpiece,
SPA.V_localy[i],
SPA.V_isthere[i],
SPA.V_inds[i]);
179 std::vector<int>
accum(splits+1, 0);
180 for(
int i=0; i<splits; ++i)
190 std::vector<int32_t>
end_recs(splits);
191 for(
int i=0; i<splits; ++i)
199 #pragma omp parallel for
201 for(
int i=0; i<splits; ++i)
214 while (k >= 0 &&
end_recs[k] == -1) k--;
232 int end =
indy[i].size();
244 std::vector<int32_t>().swap(
indy[i]);
245 std::vector<OVT>().swap(
numy[i]);
247 for(
int k=i+1; k < splits; ++k)
256 return accum[splits];
260 std::cout <<
"Something is wrong, splits should be nonzero for multithreaded execution" << std::endl;
279template <
typename SR,
typename IU,
typename NUM,
typename DER,
typename IVT,
typename OVT>
283 if(
A.getnnz() > 0 &&
nnzx > 0)
285 int splits =
A.getnsplit();
288 std::vector< std::vector<int32_t> >
indy(splits);
289 std::vector< std::vector< OVT > >
numy(splits);
294 #pragma omp parallel for
296 for(
int i=0; i<splits; ++i)
309 std::vector<int32_t>
end_recs(splits);
310 for(
int i=0; i<splits; ++i)
320 #pragma omp parallel for
322 for(
int i=0; i<splits; ++i)
329 for(
typename std::vector<int32_t>::iterator
it =
indy[i].begin();
it !=
indy[i].end(); ++
it)
342 #pragma omp parallel for
344 for(
int i=0; i<splits; ++i)
365 for(
typename std::vector<int32_t>::iterator
it =
indy[i].begin();
it !=
indy[i].end(); ++
it)
383 for(
int i=0; i< splits; ++i)
385 for(
int j=0;
j< p_c; ++
j)
393 std::cout <<
"Something is wrong, splits should be nonzero for multithreaded execution" << std::endl;
401template <
typename SR,
typename MIND,
typename VIND,
typename DER,
typename NUM,
typename IVT,
typename OVT>
404 if(
A.getnnz() > 0 &&
nnzx > 0)
406 if(
A.getnsplit() > 0)
408 std::cout <<
"Call dcsc_gespmv_threaded instead" << std::endl;
420template <
typename SR,
typename IU,
typename DER,
typename NUM,
typename IVT,
typename OVT>
424 if(
A.getnnz() > 0 &&
nnzx > 0)
426 if(
A.getnsplit() > 0)
444 std::vector<IU>
nzcs(
A.splits, 0);
445 std::vector<IU>
nnzs(
A.splits, 0);
446 std::vector < std::vector < std::pair<IU,IU> > >
colrowpairs(
A.splits);
447 if(
A.nnz > 0 &&
A.dcsc !=
NULL)
449 for(
IU i=0; i<
A.dcsc->nzc; ++i)
451 for(
IU j =
A.dcsc->cp[i];
j<
A.dcsc->cp[i+1]; ++
j)
453 IU colid =
A.dcsc->jc[i];
454 IU rowid =
A.dcsc->ir[
j];
473 for(
int i=0; i<
A.splits; ++i)
477 std::fill(
A.dcscarr[i]->numx,
A.dcscarr[i]->numx+
nnzs[i],
static_cast<bool>(1));
484 A.dcscarr[i]->cp[
curnzc++] = 0;
509template<
class SR,
class NUO,
class IU,
class NU1,
class NU2>
518 if(
A.isZero() ||
B.isZero())
552template<
class SR,
class NUO,
class IU,
class NU1,
class NU2>
560 if(
A.isZero() ||
B.isZero())
576template<
class SR,
class NUO,
class IU,
class NU1,
class NU2>
584 std::cout <<
"Tuples_AtXBt function has not been implemented yet !" << std::endl;
589template<
class SR,
class NUO,
class IU,
class NU1,
class NU2>
597 std::cout <<
"Tuples_AtXBn function has not been implemented yet !" << std::endl;
604template<
class SR,
class IU,
class NU>
617 for(
int i=1; i<
hsize; ++i)
621 std::cerr <<
"Dimensions do not match on MergeAll()" << std::endl;
628 std::tuple<IU, IU, int> *
heap =
new std::tuple<IU, IU, int> [
hsize];
633 for(
int i=0; i<
hsize; ++i)
640 std::tuple<IU, IU, NU> *
ntuples =
new std::tuple<IU,IU,NU>[
estnnz];
693template <
typename IU,
typename NU1,
typename NU2>
721 if(
A.jc[i] >
B->jc[
j]) ++
j;
722 else if(
A.jc[i] <
B->jc[
j]) ++i;
731 else if (
A.ir[
ii] >
B->ir[
jj]) ++
jj;
752 if(
A.jc[i] >
B->jc[
j]) ++
j;
753 else if(
A.jc[i] <
B->jc[
j])
756 for(
IU k =
A.cp[i-1]; k<
A.cp[i]; k++)
771 else if (
A.ir[
ii] <
B->ir[
jj])
782 while (
ii <
A.cp[i+1])
800 for(
IU k =
A.cp[i-1]; k<
A.cp[i]; ++k)
813template <
typename N_promote,
typename IU,
typename NU1,
typename NU2,
typename _BinaryOperation>
841 if(
A.jc[i] >
B->jc[
j]) ++
j;
842 else if(
A.jc[i] <
B->jc[
j]) ++i;
851 else if (
A.ir[
ii] >
B->ir[
jj]) ++
jj;
872 if(
A.jc[i] >
B->jc[
j]) ++
j;
873 else if(
A.jc[i] <
B->jc[
j])
876 for(
IU k =
A.cp[i-1]; k<
A.cp[i]; k++)
891 else if (
A.ir[
ii] <
B->ir[
jj])
902 while (
ii <
A.cp[i+1])
920 for(
IU k =
A.cp[i-1]; k<
A.cp[i]; ++k)
934template<
typename IU,
typename NU1,
typename NU2>
942 if(
A.nnz > 0 &&
B.nnz > 0)
959template<
typename N_promote,
typename IU,
typename NU1,
typename NU2,
typename _BinaryOperation>
967 if(
A.nnz > 0 &&
B.nnz > 0)
972 else if (
A.nnz > 0 &&
notB)
992template <
typename RETT,
typename IU,
typename NU1,
typename NU2,
typename _BinaryOperation,
typename _BinaryPredicate>
993Dcsc<IU, RETT> EWiseApply(
const Dcsc<IU,NU1> *
Ap,
const Dcsc<IU,NU2> *
Bp,
_BinaryOperation __binary_op,
_BinaryPredicate do_op,
bool allowANulls,
bool allowBNulls,
const NU1&
ANullVal,
const NU2&
BNullVal,
const bool allowIntersect)
1019 for(
IU k =
B.cp[
j-1]; k<
B.cp[
j]; ++k)
1054 for(
IU k =
A.cp[i-1]; k<
A.cp[i]; k++)
1082 while(i<
A.nzc &&
j<
B.nzc)
1084 if(
A.jc[i] >
B.jc[
j])
1091 for(
IU k =
B.cp[
j-1]; k<
B.cp[
j]; ++k)
1103 else if(
A.jc[i] <
B.jc[
j])
1110 for(
IU k =
A.cp[i-1]; k<
A.cp[i]; k++)
1128 while (
ii <
A.cp[i+1] &&
jj <
B.cp[
j+1])
1140 else if (
A.ir[
ii] >
B.ir[
jj])
1164 while (
ii <
A.cp[i+1])
1174 while (
jj <
B.cp[
j+1])
1193 for(
IU k =
A.cp[i-1]; k<
A.cp[i]; ++k)
1208 for(
IU k =
B.cp[
j-1]; k<
B.cp[
j]; ++k)
1223template <
typename RETT,
typename IU,
typename NU1,
typename NU2,
typename _BinaryOperation,
typename _BinaryPredicate>
1224SpDCCols<IU,RETT> EWiseApply (
const SpDCCols<IU,NU1> &
A,
const SpDCCols<IU,NU2> &
B,
_BinaryOperation __binary_op,
_BinaryPredicate do_op,
bool allowANulls,
bool allowBNulls,
const NU1&
ANullVal,
const NU2&
BNullVal,
const bool allowIntersect)
1229 Dcsc<IU, RETT> *
tdcsc =
new Dcsc<IU, RETT>(
EWiseApply<RETT>(
A.dcsc,
B.dcsc,
__binary_op,
do_op,
allowANulls,
allowBNulls,
ANullVal,
BNullVal,
allowIntersect));
static void SpIntersect(const Dcsc< IT, NT1 > &Adcsc, const Dcsc< IT, NT2 > &Bdcsc, Isect< IT > *&cols, Isect< IT > *&rows, Isect< IT > *&isect1, Isect< IT > *&isect2, Isect< IT > *&itr1, Isect< IT > *&itr2)
static void deallocate2D(T **array, I m)
static void ShrinkArray(NT *&array, IT newsize)
static void Print(const std::string &s)
void generic_gespmv_threaded_setbuffers(const SpMat< IU, NUM, DER > &A, const int32_t *indx, const IVT *numx, int32_t nnzx, int32_t *sendindbuf, OVT *sendnumbuf, int *cnts, int *dspls, int p_c)
void dcsc_gespmv_threaded(const SpDCCols< IU, NU > &A, const RHS *x, LHS *y)
SpMV with dense vector (multithreaded version)
SpTuples< IU, NUO > * Tuples_AtXBt(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
void BooleanRowSplit(SpDCCols< IU, bool > &A, int numsplits)
SpTuples< IU, NUO > * Tuples_AtXBn(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
SpTuples< IU, NUO > * Tuples_AnXBn(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
int generic_gespmv_threaded(const SpMat< IU, NUM, DER > &A, const int32_t *indx, const IVT *numx, int32_t nnzx, int32_t *&sendindbuf, OVT *&sendnumbuf, int *&sdispls, int p_c, PreAllocatedSPA< OVT > &SPA)
Dcsc< IU, N_promote > EWiseApply(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, _BinaryOperation __binary_op, bool notB, const NU2 &defaultBVal)
SpTuples< IU, NUO > * Tuples_AnXBt(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
void generic_gespmv(const SpMat< MIND, NUM, DER > &A, const VIND *indx, const IVT *numx, VIND nnzx, std::vector< VIND > &indy, std::vector< OVT > &numy, PreAllocatedSPA< OVT > &SPA)
SpTuples< IU, NU > MergeAll(const std::vector< SpTuples< IU, NU > * > &ArrSpTups, IU mstar=0, IU nstar=0, bool delarrs=false)
void dcsc_gespmv(const SpDCCols< IU, NU > &A, const RHS *x, LHS *y)
SpMV with dense vector.
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)