COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
BPMaximumMatching.h
Go to the documentation of this file.
1#ifndef BP_MAXIMUM_MATCHING_H
2#define BP_MAXIMUM_MATCHING_H
3
4#include "CombBLAS.h"
5#include <mpi.h>
6#include <sys/time.h>
7#include <iostream>
8#include <functional>
9#include <algorithm>
10#include <vector>
11#include <string>
12#include <sstream>
13#include "MatchingDefs.h"
14
15namespace combblas {
16
25template <class IT, class DER>
27{
28
29 IT procsPerRow = ri.commGrid->GetGridCols(); // the number of processor in a row of processor grid
30 IT procsPerCol = ri.commGrid->GetGridRows(); // the number of processor in a column of processor grid
31
32
33 IT global_nrow = ri.TotalLength();
37
38
39 // The indices for FullyDistVec are offset'd to 1/p pieces
40 // The matrix indices are offset'd to 1/sqrt(p) pieces
41 // Add the corresponding offset before sending the data
42
43 std::vector< std::vector<IT> > rowid(procsPerRow); // rowid in the local matrix of each vector entry
44 std::vector< std::vector<IT> > colid(procsPerRow); // colid in the local matrix of each vector entry
45
46 IT locvec = ri.arr.size(); // nnz in local vector
47 IT roffset = ri.RowLenUntil(); // the number of vector elements in this processor row before the current processor
48 for(typename std::vector<IT>::size_type i=0; i< (unsigned)locvec; ++i)
49 {
50 if(ri.arr[i]>=0 && ri.arr[i]<ncol) // this specialized for matching. TODO: make it general purpose by passing a function
51 {
52 IT rowrec = (n_perproccol!=0) ? std::min(ri.arr[i] / n_perproccol, procsPerRow-1) : (procsPerRow-1);
53 // ri's numerical values give the colids and its local indices give rowids
54 rowid[rowrec].push_back( i + roffset);
55 colid[rowrec].push_back(ri.arr[i] - (rowrec * n_perproccol));
56 }
57
58 }
59
60
61
62 int * sendcnt = new int[procsPerRow];
63 int * recvcnt = new int[procsPerRow];
64 for(IT i=0; i<procsPerRow; ++i)
65 {
66 sendcnt[i] = rowid[i].size();
67 }
68
69 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, ri.commGrid->GetRowWorld()); // share the counts
70
71 int * sdispls = new int[procsPerRow]();
72 int * rdispls = new int[procsPerRow]();
75 IT p_nnz = accumulate(recvcnt,recvcnt+procsPerRow, static_cast<IT>(0));
76
77
78 IT * p_rows = new IT[p_nnz];
79 IT * p_cols = new IT[p_nnz];
80 IT * senddata = new IT[locvec];
81 for(int i=0; i<procsPerRow; ++i)
82 {
83 copy(rowid[i].begin(), rowid[i].end(), senddata+sdispls[i]);
84 std::vector<IT>().swap(rowid[i]); // clear memory of rowid
85 }
87
88 for(int i=0; i<procsPerRow; ++i)
89 {
90 copy(colid[i].begin(), colid[i].end(), senddata+sdispls[i]);
91 std::vector<IT>().swap(colid[i]); // clear memory of colid
92 }
94 delete [] senddata;
95
96 std::tuple<IT,IT,bool> * p_tuples = new std::tuple<IT,IT,bool>[p_nnz];
97 for(IT i=0; i< p_nnz; ++i)
98 {
99 p_tuples[i] = make_tuple(p_rows[i], p_cols[i], 1);
100 }
102
103
104 // Now create the local matrix
105 IT local_nrow = ri.MyRowLength();
106 int my_proccol = ri.commGrid->GetRankInProcRow();
107 IT local_ncol = (my_proccol<(procsPerCol-1))? (n_perproccol) : (global_ncol - (n_perproccol*(procsPerCol-1)));
108
109 // infer the concrete type SpMat<IT,IT>
111 DER_IT * PSeq = new DER_IT();
112 PSeq->Create( p_nnz, local_nrow, local_ncol, p_tuples); // deletion of tuples[] is handled by SpMat::Create
113
115 //Par_DCSC_Bool P (PSeq, ri.commGrid);
116 return P;
117}
118
119
120
121
122/***************************************************************************
123// Augment a matching by a set of vertex-disjoint augmenting paths.
124// The paths are explored level-by-level similar to the level-synchronous BFS
125// This approach is more effecient when we have many short augmenting paths
126***************************************************************************/
127
128template <typename IT>
130{
131
132 IT nrow = mateRow2Col.TotalLength();
133 IT ncol = mateCol2Row.TotalLength();
134 FullyDistSpVec<IT, IT> col(leaves, [](IT leaf){return leaf!=-1;});
136 FullyDistSpVec<IT, IT> nextcol(col.getcommgrid(), ncol);
137
138 while(col.getnnz()!=0)
139 {
140
141 row = col.Invert(nrow);
143 [](IT root, IT parent){return parent;},
144 [](IT root, IT parent){return true;},
145 false, (IT)-1);
146
147 col = row.Invert(ncol); // children array
149 [](IT child, IT mate){return mate;},
150 [](IT child, IT mate){return mate!=-1;},
151 false, (IT)-1);
152 mateRow2Col.Set(row);
153 mateCol2Row.Set(col);
154 col = nextcol;
155 }
156}
157
158
159/***************************************************************************
160// Augment a matching by a set of vertex-disjoint augmenting paths.
161// An MPI processor is responsible for a complete path.
162// This approach is more effecient when we have few long augmenting paths
163// We used one-sided MPI. Any PGAS language should be fine as well.
164// This function is not thread safe, hence multithreading is not used here
165 ***************************************************************************/
166
167template <typename IT>
169{
171 MPI_Win_create((IT*)mateRow2Col.GetLocArr(), mateRow2Col.LocArrSize() * sizeof(IT), sizeof(IT), MPI_INFO_NULL, mateRow2Col.commGrid->GetWorld(), &win_mateRow2Col);
172 MPI_Win_create((IT*)mateCol2Row.GetLocArr(), mateCol2Row.LocArrSize() * sizeof(IT), sizeof(IT), MPI_INFO_NULL, mateCol2Row.commGrid->GetWorld(), &win_mateCol2Row);
173 MPI_Win_create((IT*)parentsRow.GetLocArr(), parentsRow.LocArrSize() * sizeof(IT), sizeof(IT), MPI_INFO_NULL, parentsRow.commGrid->GetWorld(), &win_parentsRow);
174
175
176 IT* leaves_ptr = (IT*) leaves.GetLocArr();
177 //MPI_Win_fence(0, win_mateRow2Col);
178 //MPI_Win_fence(0, win_mateCol2Row);
179 //MPI_Win_fence(0, win_parentsRow);
180
181 IT row, col=100, nextrow;
182 int owner_row, owner_col;
184 int myrank;
185
187
188
189 for(IT i=0; i<leaves.LocArrSize(); i++)
190 {
191 int depth=0;
192 row = *(leaves_ptr+i);
193 while(row != - 1)
194 {
195
200
205
208 MPI_Win_unlock(owner_row, win_mateRow2Col); // we need this otherwise col might get overwritten before communication!
209 row = nextrow;
210
211 }
212 }
213
214 //MPI_Win_fence(0, win_mateRow2Col);
215 //MPI_Win_fence(0, win_mateCol2Row);
216 //MPI_Win_fence(0, win_parentsRow);
217
221}
222
223
224
225
226
227// Maximum cardinality matching
228// Output: mateRow2Col and mateRow2Col
229template <typename IT, typename NT,typename DER>
231 FullyDistVec<IT, IT>& mateCol2Row, bool prune=true, bool randMM = false, bool maximizeWeight = false)
232{
233 static MTRand GlobalMT(123); // for reproducible result
235
236 int nthreads=1;
237#ifdef THREADED
238#pragma omp parallel
239 {
240 nthreads = omp_get_num_threads();
241 }
242#endif
244
245 double tstart = MPI_Wtime();
246 int nprocs, myrank;
249
250 IT nrow = A.getnrow();
251 IT ncol = A.getncol();
252
255 FullyDistVec<IT, IT> leaves ( A.getcommgrid(), ncol, (IT) -1);
256
257 std::vector<std::vector<double> > timing;
258 std::vector<int> layers;
259 std::vector<int64_t> phaseMatched;
261
262 bool matched = true;
263 int phase = 0;
264 int totalLayer = 0;
266
267
269 MPI_Win_create((IT*)leaves.GetLocArr(), leaves.LocArrSize() * sizeof(IT), sizeof(IT), MPI_INFO_NULL, A.getcommgrid()->GetWorld(), &winLeaves);
270
271
272 while(matched)
273 {
275
276 std::vector<double> phase_timing(8,0);
277 leaves.Apply ( [](IT val){return (IT) -1;});
278 FullyDistVec<IT, IT> parentsRow ( A.getcommgrid(), nrow, (IT) -1);
281 [](VertexType vtx, IT mate){return vtx;},
282 [](VertexType vtx, IT mate){return mate==-1;},
283 true, VertexType());
284
285
286 if(randMM) //select rand
287 {
288 fringeCol.ApplyInd([](VertexType vtx, IT idx){return VertexType(idx,idx,GlobalMT.rand());});
289 }
290 else
291 {
292 fringeCol.ApplyInd([](VertexType vtx, IT idx){return VertexType(idx,idx);});
293 }
294
295 ++phase;
296 numUnmatchedCol = fringeCol.getnnz();
297 int layer = 0;
298
299
301 while(fringeCol.getnnz() > 0)
302 {
303 layer++;
304 t1 = MPI_Wtime();
305
306 //TODO: think about this semiring
309 else
311 phase_timing[0] += MPI_Wtime()-t1;
312
313
314
315 // remove vertices already having parents
316
317 t1 = MPI_Wtime();
319 [](VertexType vtx, IT parent){return vtx;},
320 [](VertexType vtx, IT parent){return parent==-1;},
321 false, VertexType());
322
323 // Set parent pointer
324 parentsRow.EWiseApply(fringeRow,
325 [](IT dval, VertexType svtx){return svtx.parent;},
326 [](IT dval, VertexType svtx){return true;},
327 false, VertexType());
328
329
331 [](VertexType vtx, IT mate){return vtx.root;},
332 [](VertexType vtx, IT mate){return mate==-1;},
333 false, VertexType());
334
335 phase_timing[1] += MPI_Wtime()-t1;
336
337
338 IT nnz_umFringeRow = umFringeRow.getnnz(); // careful about this timing
339
340 t1 = MPI_Wtime();
341 if(nnz_umFringeRow >0)
342 {
343 /*
344 if(nnz_umFringeRow < 25*nprocs)
345 {
346 leaves.GSet(umFringeRow,
347 [](IT valRoot, IT idxLeaf){return valRoot;},
348 [](IT valRoot, IT idxLeaf){return idxLeaf;},
349 winLeaves);
350 // There might be a bug here. It does not return the same output for different number of processes
351 // e.g., check with g7jac200sc.mtx matrix
352 }
353 else*/
354 {
355 FullyDistSpVec<IT, IT> temp1(A.getcommgrid(), ncol);
356 temp1 = umFringeRow.Invert(ncol);
357 leaves.Set(temp1);
358 }
359 }
360
361 phase_timing[2] += MPI_Wtime()-t1;
362
363
364
365
366 // matched row vertices in the the fringe
368 [](VertexType vtx, IT mate){return VertexType(mate, vtx.root);},
369 [](VertexType vtx, IT mate){return mate!=-1;},
370 false, VertexType());
371
372 t1 = MPI_Wtime();
373 if(nnz_umFringeRow>0 && prune)
374 {
375 fringeRow.FilterByVal (umFringeRow,[](VertexType vtx){return vtx.root;}, false);
376 }
377 double tprune = MPI_Wtime()-t1;
378 phase_timing[3] += tprune;
379
380
381 // Go to matched column from matched row in the fringe. parent is automatically set to itself.
382 t1 = MPI_Wtime();
383 fringeCol = fringeRow.Invert(ncol,
384 [](VertexType& vtx, const IT & index){return vtx.parent;},
385 [](VertexType& vtx, const IT & index){return vtx;},
386 [](VertexType& vtx1, VertexType& vtx2){return vtx1;});
387 phase_timing[4] += MPI_Wtime()-t1;
388
389
390
391
392 }
395
396 IT numMatchedCol = leaves.Count([](IT leaf){return leaf!=-1;});
397 phaseMatched.push_back(numMatchedCol);
399 if (numMatchedCol== 0) matched = false;
400 else
401 {
402
403 if(numMatchedCol < (2* nprocs * nprocs))
405 else
407 }
410
413 timing.push_back(phase_timing);
414 totalLayer += layer;
415 layers.push_back(layer);
416
417 }
418
419
421
422 //isMaximalmatching(A, mateRow2Col, mateCol2Row, unmatchedRow, unmatchedCol);
423 //isMatching(mateCol2Row, mateRow2Col); //todo there is a better way to check this
424
425
426 // print statistics
427 double combTime;
428 if(myrank == 0)
429 {
430 std::cout << "****** maximum matching runtime ********\n";
431 std::cout << std::endl;
432 std::cout << "========================================================================\n";
433 std::cout << " BFS Search \n";
434 std::cout << "===================== ==================================================\n";
435 std::cout << "Phase Layer Match SpMV EWOpp CmUqL Prun CmMC BFS Aug Total\n";
436 std::cout << "===================== ===================================================\n";
437
438 std::vector<double> totalTimes(timing[0].size(),0);
439 int nphases = timing.size();
440 for(int i=0; i<timing.size(); i++)
441 {
442 printf(" %3d %3d %8lld ", i+1, layers[i], phaseMatched[i]);
443 for(int j=0; j<timing[i].size(); j++)
444 {
445 totalTimes[j] += timing[i][j];
446 //timing[i][j] /= timing[i].back();
447 printf("%.2lf ", timing[i][j]);
448 }
449
450 printf("\n");
451 }
452
453 std::cout << "-----------------------------------------------------------------------\n";
454 std::cout << "Phase Layer UnMat SpMV EWOpp CmUqL Prun CmMC BFS Aug Total \n";
455 std::cout << "-----------------------------------------------------------------------\n";
456
457 combTime = totalTimes.back();
458 printf(" %3d %3d %8lld ", nphases, totalLayer/nphases, numUnmatchedCol);
459 for(int j=0; j<totalTimes.size()-1; j++)
460 {
461 printf("%.2lf ", totalTimes[j]);
462 }
463 printf("%.2lf\n", combTime);
464 }
465
466 IT nrows=A.getnrow();
467 IT matchedRow = mateRow2Col.Count([](IT mate){return mate!=-1;});
468 if(myrank==0)
469 {
470 std::cout << "***Final Maximum Matching***\n";
471 std::cout << "***Total-Rows Matched-Rows Total Time***\n";
472 printf("%lld %lld %lf \n",nrows, matchedRow, combTime);
473 printf("matched rows: %lld , which is: %lf percent \n",matchedRow, 100*(double)matchedRow/(nrows));
474 std::cout << "-------------------------------------------------------\n\n";
475 }
476
477}
478
479}
480
481#endif
482
MTRand GlobalMT
double rand()
std::shared_ptr< CommGrid > commGrid
int size
Definition common.h:20
void AugmentPath(FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, FullyDistVec< IT, IT > &parentsRow, FullyDistVec< IT, IT > &leaves)
void maximumMatching(SpParMat< IT, NT, DER > &A, FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, bool prune=true, bool randMM=false, bool maximizeWeight=false)
void DeleteAll(A arr1)
Definition Deleter.h:48
void AugmentLevel(FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, FullyDistVec< IT, IT > &parentsRow, FullyDistVec< IT, IT > &leaves)
SpParMat< IT, bool, DER > PermMat(const FullyDistVec< IT, IT > &ri, const IT ncol)
double A