9#ifndef ApproxWeightPerfectMatching_h
10#define ApproxWeightPerfectMatching_h
15#include <parallel/algorithm>
16#include <parallel/numeric>
42template <
class IT,
class NT>
54 int *
sendcnt =
new int[nprocs];
55 int *
recvcnt =
new int[nprocs];
56 int *
sdispls =
new int[nprocs]();
57 int *
rdispls =
new int[nprocs]();
61 for(
IT i=0; i<nprocs; ++i)
75 for(
int i=0; i<nprocs; ++i)
90template <
class IT,
class NT>
102 int *
sendcnt =
new int[nprocs];
103 int *
recvcnt =
new int[nprocs];
104 int *
sdispls =
new int[nprocs]();
105 int *
rdispls =
new int[nprocs]();
109 for(
IT i=0; i<nprocs; ++i)
122 for(
int i=0; i<nprocs; ++i)
139template <
class IT,
class NT,
class DER>
142 auto commGrid =
A.getcommgrid();
143 int procrows = commGrid->GetGridRows();
144 int proccols = commGrid->GetGridCols();
146 IT m_perproc =
nrows / procrows;
147 IT n_perproc =
ncols / proccols;
150 pr = std::min(
static_cast<int>(
grow / m_perproc), procrows-1);
154 pc = std::min(
static_cast<int>(
gcol / n_perproc), proccols-1);
157 return commGrid->GetRank(pr, pc);
196 int *
recvcnt =
new int[nprocs];
197 int *
rdispls =
new int[nprocs]();
224template <
class IT,
class NT>
234#pragma omp parallel for
236 for(
int k=0; k<
param.lncol; ++k)
244 for(
IT cp = colptr[k]; cp < colptr[k+1]; ++cp)
246 IT li = dcsc->ir[cp];
271template <
class IT,
class NT,
class DER>
277 auto commGrid =
A.getcommgrid();
279 int myrank=commGrid->GetRank();
280 int pr = commGrid->GetGridRows();
281 int pc = commGrid->GetGridCols();
287 int rowrank = commGrid->GetRankInProcRow();
288 int colrank = commGrid->GetRankInProcCol();
291 DER* spSeq =
A.seqptr();
292 IT localRowStart = colrank * m_perproc;
301 IT j =
lj + localColStart;
307 IT i =
li + localRowStart;
333 minw = 99999999999999.0;
360 int rowroot = commGrid->GetDiagOfProcRow();
361 int pc = commGrid->GetGridCols();
375 std::vector <int>
dpls(pc);
377 for(
int i=1; i<pc; i++)
395#define L2_CACHE_SIZE 256000
411template <
class IT,
class NT>
435 for(
int k=0; k<
param.lncol; ++k)
440 for(
IT cp = colptr[k]; cp < colptr[k+1]; ++cp)
442 IT li = dcsc->ir[cp];
455 for(
int i=0; i<
param.nprocs; i++)
483 for(
int k=0; k<
param.lncol; ++k)
489 for(
IT cp = colptr[k]; cp < colptr[k+1]; ++cp)
491 IT li = dcsc->ir[cp];
551template <
class IT,
class NT>
552std::vector< std::tuple<IT,IT,IT,NT> >
Phase2(
const AWPM_param<IT>&
param, std::vector<std::tuple<IT,IT,NT>>&
recvTuples,
Dcsc<IT, NT>* dcsc,
const std::vector<IT>& colptr,
const std::vector<IT>&
RepMateR2C,
const std::vector<IT>&
RepMateC2R,
const std::vector<NT>&
RepMateWR2C,
const std::vector<NT>&
RepMateWC2R )
561 std::vector<std::vector<std::tuple<IT,IT, IT, NT>>>
tempTuples1 (
param.nprocs);
572 nBins = omp_get_num_threads() * 4;
577#pragma omp parallel for
579 for(
int i=0; i<
nBins; i++)
628 for(
int i=0; i<
param.nprocs; i++)
648#pragma omp parallel for
650 for(
int i=0; i<
nBins; i++)
746template <
class IT,
class NT>
747std::vector<std::vector<std::tuple<IT,IT, IT, NT>>>
Phase2_old(
const AWPM_param<IT>&
param, std::vector<std::tuple<IT,IT,NT>>&
recvTuples,
Dcsc<IT, NT>* dcsc,
const std::vector<IT>& colptr,
const std::vector<IT>&
RepMateR2C,
const std::vector<IT>&
RepMateC2R,
const std::vector<NT>&
RepMateWR2C,
const std::vector<NT>&
RepMateWC2R )
750 std::vector<std::vector<std::tuple<IT,IT, IT, NT>>>
tempTuples1 (
param.nprocs);
758 if(colptr[
mi-
param.localColStart+1] > colptr[
mi-
param.localColStart] )
763 if (
ele != dcsc->ir+colptr[
mi -
param.localColStart+1])
781template <
class IT,
class NT,
class DER>
787 auto commGrid =
A.getcommgrid();
788 int myrank=commGrid->GetRank();
792 int nprocs = commGrid->GetSize();
793 int pr = commGrid->GetGridRows();
794 int pc = commGrid->GetGridCols();
795 int rowrank = commGrid->GetRankInProcRow();
796 int colrank = commGrid->GetRankInProcCol();
797 int diagneigh = commGrid->GetComplementRank();
807 DER* spSeq =
A.seqptr();
809 IT lnrow = spSeq->getnrow();
810 IT lncol = spSeq->getncol();
811 IT localRowStart = colrank * m_perproc;
815 param.nprocs = nprocs;
820 param.m_perproc = m_perproc;
821 param.n_perproc = n_perproc;
822 param.localRowStart = localRowStart;
823 param.localColStart = localColStart;
824 param.myrank = myrank;
835 MPI_Sendrecv(&
xsize, 1,
MPI_INT, diagneigh,
TRX, &
trxsize, 1,
MPI_INT, diagneigh,
TRX,
World, &
status);
837 MPI_Sendrecv(
mateCol2Row.GetLocArr(),
xsize,
MPIType<IT>(), diagneigh,
TRX,
trxnums.data(),
trxsize,
MPIType<IT>(), diagneigh,
TRX,
World, &
status);
843 std::vector<int>
dpls(pc,0);
863 std::vector<int>
rdpls(pr,0);
873 std::vector<IT> colptr (lncol+1,-1);
880 colptr[lncol] = spSeq->getnnz();
881 for(
IT k=lncol-1; k>=0; k--)
885 colptr[k] = colptr[k+1];
907 if(myrank==0) std::cout <<
"Iteration " <<
iterations <<
". matching weight: sum = "<<
weightCur <<
" min = " <<
minw << std::endl;
914 std::vector<std::tuple<IT,IT,IT,NT>>
recvTuples1 =
Phase2(
param,
recvTuples, dcsc, colptr,
RepMateR2C,
RepMateC2R,
RepMateWR2C,
RepMateWC2R );
923#pragma omp parallel for
925 for(
int k=0; k<lncol; ++k)
937 IT lj =
j - localColStart;
946 std::vector<std::vector<std::tuple<IT,IT, IT, NT>>>
tempTuples1 (nprocs);
947 for(
int k=0; k<lncol; ++k)
973#pragma omp parallel for
975 for(
int k=0; k<lncol; ++k)
1000 std::vector<std::vector<std::tuple<IT,IT,IT, IT>>>
winnerTuples (nprocs);
1003 for(
int k=0; k<lncol; ++k)
1034#pragma omp parallel for
1051#pragma omp parallel for
1061#pragma omp parallel for
1093 template <
class IT,
class NT,
class DER>
1100 A.Apply([](
NT val){
return (
fabs(val));});
1111 A.Apply([](
NT val){
return log(val);});
1115 template <
class IT,
class NT>
1144 SpParHelper::Print(
"Maximal is not better that the natural ordering. Hence, keeping the natural ordering.\n");
1176 SpParHelper::Print(
"AWPM is not better that the natural ordering. Hence, keeping the natural ordering.\n");
std::shared_ptr< CommGrid > commGrid
static void Print(const std::string &s)
bool CheckMatching(FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row)
int OwnerProcs(SpParMat< IT, NT, DER > &A, IT grow, IT gcol, IT nrows, IT ncols)
std::vector< std::tuple< IT, IT, NT > > Phase1(const AWPM_param< IT > ¶m, Dcsc< IT, NT > *dcsc, const std::vector< IT > &colptr, const std::vector< IT > &RepMateR2C, const std::vector< IT > &RepMateC2R, const std::vector< NT > &RepMateWR2C, const std::vector< NT > &RepMateWC2R)
void AWPM(SpParMat< IT, NT, SpDCCols< IT, NT > > &A1, FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, bool optimizeProd=true)
std::vector< std::tuple< IT, IT, IT, NT > > Phase2(const AWPM_param< IT > ¶m, std::vector< std::tuple< IT, IT, NT > > &recvTuples, Dcsc< IT, NT > *dcsc, const std::vector< IT > &colptr, const std::vector< IT > &RepMateR2C, const std::vector< IT > &RepMateC2R, const std::vector< NT > &RepMateWR2C, const std::vector< NT > &RepMateWC2R)
NT MatchingWeight(std::vector< NT > &RepMateWC2R, MPI_Comm RowWorld, NT &minw)
void TransformWeight(SpParMat< IT, NT, DER > &A, bool applylog)
std::vector< std::tuple< IT, IT, NT > > ExchangeData(std::vector< std::vector< std::tuple< IT, IT, NT > > > &tempTuples, MPI_Comm World)
std::vector< std::tuple< IT, IT, IT, NT > > ExchangeData1(std::vector< std::vector< std::tuple< IT, IT, IT, NT > > > &tempTuples, MPI_Comm World)
NT Trace(SpParMat< IT, NT, DER > &A, IT &rettrnnz=0)
void UpdateMatching(FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, std::vector< IT > &RepMateR2C, std::vector< IT > &RepMateC2R)
void maximumMatching(SpParMat< IT, NT, DER > &A, FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, bool prune=true, bool randMM=false, bool maximizeWeight=false)
int ThreadBuffLenForBinning(int itemsize, int nbins)
std::vector< std::tuple< IT, IT > > MateBcast(std::vector< std::tuple< IT, IT > > sendTuples, MPI_Comm World)
void ReplicateMateWeights(const AWPM_param< IT > ¶m, Dcsc< IT, NT > *dcsc, const std::vector< IT > &colptr, std::vector< IT > &RepMateC2R, std::vector< NT > &RepMateWR2C, std::vector< NT > &RepMateWC2R)
std::vector< std::vector< std::tuple< IT, IT, IT, NT > > > Phase2_old(const AWPM_param< IT > ¶m, std::vector< std::tuple< IT, IT, NT > > &recvTuples, Dcsc< IT, NT > *dcsc, const std::vector< IT > &colptr, const std::vector< IT > &RepMateR2C, const std::vector< IT > &RepMateC2R, const std::vector< NT > &RepMateWR2C, const std::vector< NT > &RepMateWC2R)
void TwoThirdApprox(SpParMat< IT, NT, DER > &A, FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row)
void WeightedGreedy(Par_MAT_Double &A, FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, FullyDistVec< IT, IT > °Col)
std::shared_ptr< CommGrid > commGrid