COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
TopDownBFS.cpp
Go to the documentation of this file.
1/****************************************************************/
2/* Parallel Combinatorial BLAS Library (for Graph Computations) */
3/* version 1.6 -------------------------------------------------*/
4/* date: 6/15/2017 ---------------------------------------------*/
5/* authors: Ariful Azad, Aydin Buluc --------------------------*/
6/****************************************************************/
7/*
8 Copyright (c) 2010-2017, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29#define DETERMINISTIC
30#include "CombBLAS/CombBLAS.h"
31#include <mpi.h>
32#include <sys/time.h>
33#include <iostream>
34#include <functional>
35#include <algorithm>
36#include <vector>
37#include <string>
38#include <sstream>
39
41
47
48#define ITERS 16
49#define EDGEFACTOR 16
50using namespace std;
51using namespace combblas;
52
53// 64-bit floor(log2(x)) function
54// note: least significant bit is the "zeroth" bit
55// pre: v > 0
56unsigned int highestbitset(uint64_t v)
57{
58 // b in binary is {10,1100, 11110000, 1111111100000000 ...}
59 const uint64_t b[] = {0x2ULL, 0xCULL, 0xF0ULL, 0xFF00ULL, 0xFFFF0000ULL, 0xFFFFFFFF00000000ULL};
60 const unsigned int S[] = {1, 2, 4, 8, 16, 32};
61 int i;
62
63 unsigned int r = 0; // result of log2(v) will go here
64 for (i = 5; i >= 0; i--)
65 {
66 if (v & b[i]) // highestbitset is on the left half (i.e. v > S[i] for sure)
67 {
68 v >>= S[i];
69 r |= S[i];
70 }
71 }
72 return r;
73}
74
75template <class T>
76bool from_string(T & t, const string& s, std::ios_base& (*f)(std::ios_base&))
77{
78 istringstream iss(s);
79 return !(iss >> f >> t).fail();
80}
81
82
83template <typename PARMAT>
84void Symmetricize(PARMAT & A)
85{
86 // boolean addition is practically a "logical or"
87 // therefore this doesn't destruct any links
88 PARMAT AT = A;
89 AT.Transpose();
90 A += AT;
91}
92
97struct prunediscovered: public std::binary_function<int64_t, int64_t, int64_t >
98{
99 int64_t operator()(int64_t x, const int64_t & y) const
100 {
101 return ( y == -1 ) ? x: -1;
102 }
103};
104
105int main(int argc, char* argv[])
106{
107 int nprocs, myrank;
108#ifdef _OPENMP
109 int cblas_splits = omp_get_max_threads();
110 int provided, flag, claimed;
111 MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided );
112 MPI_Is_thread_main( &flag );
113 if (!flag)
114 SpParHelper::Print("This thread called init_thread but Is_thread_main gave false\n");
115 MPI_Query_thread( &claimed );
116 if (claimed != provided)
117 SpParHelper::Print("Query thread gave different thread level than requested\n");
118#else
119 MPI_Init(&argc, &argv);
120 int cblas_splits = 1;
121#endif
122
123 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
124 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
125 if(argc < 3)
126 {
127 if(myrank == 0)
128 {
129 cout << "Usage: ./tdbfs <Force,Input> <Scale Forced | Input Name> {FastGen}" << endl;
130 cout << "Example: ./tdbfs Force 25 FastGen" << endl;
131 }
132 MPI_Finalize();
133 return -1;
134 }
135 {
137 typedef SpParMat < int64_t, bool, SpDCCols<int64_t,bool> > PSpMat_Bool;
138 typedef SpParMat < int64_t, bool, SpDCCols<int32_t,bool> > PSpMat_s32p64; // sequentially use 32-bits for local matrices, but parallel semantics are 64-bits
139 typedef SpParMat < int64_t, int, SpDCCols<int32_t,int> > PSpMat_s32p64_Int; // similarly mixed, but holds integers as upposed to booleans
140 typedef SpParMat < int64_t, int64_t, SpDCCols<int64_t,int64_t> > PSpMat_Int64;
141 shared_ptr<CommGrid> fullWorld;
142 fullWorld.reset( new CommGrid(MPI_COMM_WORLD, 0, 0) );
143
144 // Declare objects
145 PSpMat_Bool A(fullWorld);
146 PSpMat_s32p64 Aeff(fullWorld);
147 FullyDistVec<int64_t, int64_t> degrees(fullWorld); // degrees of vertices (including multi-edges and self-loops)
148 FullyDistVec<int64_t, int64_t> nonisov(fullWorld); // id's of non-isolated (connected) vertices
149 unsigned scale;
150 OptBuf<int32_t, int64_t> optbuf; // let indices be 32-bits
151 bool scramble = false;
152
153 if(string(argv[1]) == string("Input")) // input option
154 {
155 A.ReadDistribute(string(argv[2]), 0); // read it from file
156 SpParHelper::Print("Read input\n");
157
158 PSpMat_Int64 * G = new PSpMat_Int64(A);
159 G->Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // identity is 0
160 delete G;
161
162 Symmetricize(A); // A += A';
164 A.Reduce(*ColSums, Column, plus<int64_t>(), static_cast<int64_t>(0)); // plus<int64_t> matches the type of the output vector
165 nonisov = ColSums->FindInds(bind2nd(greater<int64_t>(), 0)); // only the indices of non-isolated vertices
166 delete ColSums;
167 A = A(nonisov, nonisov);
168 Aeff = PSpMat_s32p64(A);
169 A.FreeMemory();
170 SpParHelper::Print("Symmetricized and pruned\n");
171
172 Aeff.OptimizeForGraph500(optbuf); // Should be called before threading is activated
173 #ifdef THREADED
174 ostringstream tinfo;
175 tinfo << "Threading activated with " << cblas_splits << " threads" << endl;
176 SpParHelper::Print(tinfo.str());
178 #endif
179 }
180 else if(string(argv[1]) == string("Binary"))
181 {
182 uint64_t n, m;
183 from_string(n,string(argv[3]),std::dec);
184 from_string(m,string(argv[4]),std::dec);
185
186 ostringstream outs;
187 outs << "Reading " << argv[2] << " with " << n << " vertices and " << m << " edges" << endl;
188 SpParHelper::Print(outs.str());
189 DistEdgeList<int64_t> * DEL = new DistEdgeList<int64_t>(argv[2], n, m);
190 SpParHelper::Print("Read binary input to distributed edge list\n");
191
192 PermEdges(*DEL);
193 SpParHelper::Print("Permuted Edges\n");
194
195 RenameVertices(*DEL);
196 //DEL->Dump32bit("graph_permuted");
197 SpParHelper::Print("Renamed Vertices\n");
198
199 // conversion from distributed edge list, keeps self-loops, sums duplicates
200 PSpMat_Int64 * G = new PSpMat_Int64(*DEL, false);
201 delete DEL; // free memory before symmetricizing
202 SpParHelper::Print("Created Int64 Sparse Matrix\n");
203
204 G->Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // Identity is 0
205
206 A = PSpMat_Bool(*G); // Convert to Boolean
207 delete G;
208 int64_t removed = A.RemoveLoops();
209
210 ostringstream loopinfo;
211 loopinfo << "Converted to Boolean and removed " << removed << " loops" << endl;
212 SpParHelper::Print(loopinfo.str());
213 A.PrintInfo();
214
217 A.Reduce(*ColSums, Column, plus<int64_t>(), static_cast<int64_t>(0));
218 A.Reduce(*RowSums, Row, plus<int64_t>(), static_cast<int64_t>(0));
219 ColSums->EWiseApply(*RowSums, plus<int64_t>());
220 delete RowSums;
221
222 nonisov = ColSums->FindInds(bind2nd(greater<int64_t>(), 0)); // only the indices of non-isolated vertices
223 delete ColSums;
224
225 SpParHelper::Print("Found (and permuted) non-isolated vertices\n");
226 nonisov.RandPerm(); // so that A(v,v) is load-balanced (both memory and time wise)
227 A.PrintInfo();
228 #ifndef NOPERMUTE
229 A(nonisov, nonisov, true); // in-place permute to save memory
230 SpParHelper::Print("Dropped isolated vertices from input\n");
231 A.PrintInfo();
232 #endif
233 Aeff = PSpMat_s32p64(A); // Convert to 32-bit local integers
234 A.FreeMemory();
235
236 Symmetricize(Aeff); // A += A';
237 SpParHelper::Print("Symmetricized\n");
238 //A.Dump("graph_symmetric");
239
240 Aeff.OptimizeForGraph500(optbuf); // Should be called before threading is activated
241 #ifdef THREADED
242 ostringstream tinfo;
243 tinfo << "Threading activated with " << cblas_splits << " threads" << endl;
244 SpParHelper::Print(tinfo.str());
246 #endif
247 }
248 else
249 {
250 if(string(argv[1]) == string("Force"))
251 {
252 scale = static_cast<unsigned>(atoi(argv[2]));
253 ostringstream outs;
254 outs << "Forcing scale to : " << scale << endl;
255 SpParHelper::Print(outs.str());
256
257 if(argc > 3 && string(argv[3]) == string("FastGen"))
258 {
259 SpParHelper::Print("Using fast vertex permutations; skipping edge permutations (like v2.1)\n");
260 scramble = true;
261 }
262 }
263 else
264 {
265 SpParHelper::Print("Unknown option\n");
266 MPI_Finalize();
267 return -1;
268 }
269 // this is an undirected graph, so A*x does indeed BFS
270 double initiator[4] = {.57, .19, .19, .05};
271
272 double t01 = MPI_Wtime();
273 double t02;
275 if(!scramble)
276 {
277 DEL->GenGraph500Data(initiator, scale, EDGEFACTOR);
278 SpParHelper::Print("Generated edge lists\n");
279 t02 = MPI_Wtime();
280 ostringstream tinfo;
281 tinfo << "Generation took " << t02-t01 << " seconds" << endl;
282 SpParHelper::Print(tinfo.str());
283
284 PermEdges(*DEL);
285 SpParHelper::Print("Permuted Edges\n");
286 //DEL->Dump64bit("edges_permuted");
287 //SpParHelper::Print("Dumped\n");
288
289 RenameVertices(*DEL); // intermediate: generates RandPerm vector, using MemoryEfficientPSort
290 SpParHelper::Print("Renamed Vertices\n");
291 }
292 else // fast generation
293 {
294 DEL->GenGraph500Data(initiator, scale, EDGEFACTOR, true, true ); // generate packed edges
295 SpParHelper::Print("Generated renamed edge lists\n");
296 t02 = MPI_Wtime();
297 ostringstream tinfo;
298 tinfo << "Generation took " << t02-t01 << " seconds" << endl;
299 SpParHelper::Print(tinfo.str());
300 }
301
302 // Start Kernel #1
303 MPI_Barrier(MPI_COMM_WORLD);
304 double t1 = MPI_Wtime();
305
306 // conversion from distributed edge list, keeps self-loops, sums duplicates
307 PSpMat_s32p64_Int * G = new PSpMat_s32p64_Int(*DEL, false);
308 delete DEL; // free memory before symmetricizing
309 SpParHelper::Print("Created Sparse Matrix (with int32 local indices and values)\n");
310
311 MPI_Barrier(MPI_COMM_WORLD);
312 double redts = MPI_Wtime();
313 G->Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // Identity is 0
314 MPI_Barrier(MPI_COMM_WORLD);
315 double redtf = MPI_Wtime();
316
317 ostringstream redtimeinfo;
318 redtimeinfo << "Calculated degrees in " << redtf-redts << " seconds" << endl;
319 SpParHelper::Print(redtimeinfo.str());
320 A = PSpMat_Bool(*G); // Convert to Boolean
321 delete G;
322 int64_t removed = A.RemoveLoops();
323
324 ostringstream loopinfo;
325 loopinfo << "Converted to Boolean and removed " << removed << " loops" << endl;
326 SpParHelper::Print(loopinfo.str());
327 A.PrintInfo();
328
331 A.Reduce(*ColSums, Column, plus<int64_t>(), static_cast<int64_t>(0));
332 A.Reduce(*RowSums, Row, plus<int64_t>(), static_cast<int64_t>(0));
333 SpParHelper::Print("Reductions done\n");
334 ColSums->EWiseApply(*RowSums, plus<int64_t>());
335 delete RowSums;
336 SpParHelper::Print("Intersection of colsums and rowsums found\n");
337
338 nonisov = ColSums->FindInds(bind2nd(greater<int64_t>(), 0)); // only the indices of non-isolated vertices
339 delete ColSums;
340
341 SpParHelper::Print("Found (and permuted) non-isolated vertices\n");
342 nonisov.RandPerm(); // so that A(v,v) is load-balanced (both memory and time wise)
343 A.PrintInfo();
344 #ifndef NOPERMUTE
345 A(nonisov, nonisov, true); // in-place permute to save memory
346 SpParHelper::Print("Dropped isolated vertices from input\n");
347 A.PrintInfo();
348 #endif
349
350 Aeff = PSpMat_s32p64(A); // Convert to 32-bit local integers
351 A.FreeMemory();
352 Symmetricize(Aeff); // A += A';
353 SpParHelper::Print("Symmetricized\n");
354
355 Aeff.OptimizeForGraph500(optbuf); // Should be called before threading is activated
356 #ifdef THREADED
357 ostringstream tinfo;
358 tinfo << "Threading activated with " << cblas_splits << " threads" << endl;
359 SpParHelper::Print(tinfo.str());
361 #endif
362 Aeff.PrintInfo();
363
364 MPI_Barrier(MPI_COMM_WORLD);
365 double t2=MPI_Wtime();
366
367 ostringstream k1timeinfo;
368 k1timeinfo << (t2-t1) - (redtf-redts) << " seconds elapsed for Kernel #1" << endl;
369 SpParHelper::Print(k1timeinfo.str());
370 }
371 Aeff.PrintInfo();
372 float balance = Aeff.LoadImbalance();
373 ostringstream outs;
374 outs << "Load balance: " << balance << endl;
375 SpParHelper::Print(outs.str());
376
377 MPI_Barrier(MPI_COMM_WORLD);
378 double t1 = MPI_Wtime();
379
380 // Now that every remaining vertex is non-isolated, randomly pick ITERS many of them as starting vertices
381 #ifndef NOPERMUTE
382 degrees = degrees(nonisov); // fix the degrees array too
383 degrees.PrintInfo("Degrees array");
384 #endif
385 // degrees.DebugPrint();
387 double nver = (double) degrees.TotalLength();
388
389#ifdef DETERMINISTIC
390 MTRand M(1);
391#else
392 MTRand M; // generate random numbers with Mersenne Twister
393#endif
394 vector<double> loccands(ITERS);
395 vector<int64_t> loccandints(ITERS);
396 if(myrank == 0)
397 {
398 for(int i=0; i<ITERS; ++i)
399 loccands[i] = M.rand();
400 copy(loccands.begin(), loccands.end(), ostream_iterator<double>(cout," ")); cout << endl;
401 transform(loccands.begin(), loccands.end(), loccands.begin(), bind2nd( multiplies<double>(), nver ));
402
403 for(int i=0; i<ITERS; ++i)
404 loccandints[i] = static_cast<int64_t>(loccands[i]);
405 copy(loccandints.begin(), loccandints.end(), ostream_iterator<double>(cout," ")); cout << endl;
406 }
407 MPI_Bcast(&(loccandints[0]), ITERS, MPIType<int64_t>(),0,MPI_COMM_WORLD);
408 for(int i=0; i<ITERS; ++i)
409 {
410 Cands.SetElement(i,loccandints[i]);
411 }
412
413 #define MAXTRIALS 1
414 for(int trials =0; trials < MAXTRIALS; trials++) // try different algorithms for BFS
415 {
421 MPI_Pcontrol(1,"BFS");
422
423 double MTEPS[ITERS]; double INVMTEPS[ITERS]; double TIMES[ITERS]; double EDGES[ITERS];
424 for(int i=0; i<ITERS; ++i)
425 {
426 // FullyDistVec ( shared_ptr<CommGrid> grid, IT globallen, NT initval);
427 FullyDistVec<int64_t, int64_t> parents ( Aeff.getcommgrid(), Aeff.getncol(), (int64_t) -1); // identity is -1
428
429 // FullyDistSpVec ( shared_ptr<CommGrid> grid, IT glen);
430 FullyDistSpVec<int64_t, int64_t> fringe(Aeff.getcommgrid(), Aeff.getncol()); // numerical values are stored 0-based
431
432 MPI_Barrier(MPI_COMM_WORLD);
433 double t1 = MPI_Wtime();
434
435 fringe.SetElement(Cands[i], Cands[i]);
436 int iterations = 0;
437 while(fringe.getnnz() > 0)
438 {
439 fringe.setNumToInd();
440 fringe = SpMV(Aeff, fringe,optbuf); // SpMV with sparse vector (with indexisvalue flag preset), optimization enabled
441 fringe = EWiseMult(fringe, parents, true, (int64_t) -1); // clean-up vertices that already has parents
442 parents.Set(fringe);
443 iterations++;
444 }
445 MPI_Barrier(MPI_COMM_WORLD);
446 double t2 = MPI_Wtime();
447
448 FullyDistSpVec<int64_t, int64_t> parentsp = parents.Find(bind2nd(greater<int64_t>(), -1));
449 parentsp.Apply(myset<int64_t>(1));
450
451 // we use degrees on the directed graph, so that we don't count the reverse edges in the teps score
452 int64_t nedges = EWiseMult(parentsp, degrees, false, (int64_t) 0).Reduce(plus<int64_t>(), (int64_t) 0);
453
454 ostringstream outnew;
455 outnew << i << "th starting vertex was " << Cands[i] << endl;
456 outnew << "Number iterations: " << iterations << endl;
457 outnew << "Number of vertices found: " << parentsp.Reduce(plus<int64_t>(), (int64_t) 0) << endl;
458 outnew << "Number of edges traversed: " << nedges << endl;
459 outnew << "BFS time: " << t2-t1 << " seconds" << endl;
460 outnew << "MTEPS: " << static_cast<double>(nedges) / (t2-t1) / 1000000.0 << endl;
461 outnew << "Total communication (average so far): " << (cblas_allgathertime + cblas_alltoalltime) / (i+1) << endl;
462 TIMES[i] = t2-t1;
463 EDGES[i] = nedges;
464 MTEPS[i] = static_cast<double>(nedges) / (t2-t1) / 1000000.0;
465 SpParHelper::Print(outnew.str());
466 }
467 SpParHelper::Print("Finished\n");
468 ostringstream os;
469 MPI_Pcontrol(-1,"BFS");
470
471
472 os << "Per iteration communication times: " << endl;
473 os << "AllGatherv: " << cblas_allgathertime / ITERS << endl;
474 os << "AlltoAllv: " << cblas_alltoalltime / ITERS << endl;
475 os << "Transvec: " << cblas_transvectime / ITERS << endl;
476
477 os << "Per iteration computation times: " << endl;
478 os << "MergeCont: " << cblas_mergeconttime / ITERS << endl;
479 os << "LocalSpmv: " << cblas_localspmvtime / ITERS << endl;
480
481 sort(EDGES, EDGES+ITERS);
482 os << "--------------------------" << endl;
483 os << "Min nedges: " << EDGES[0] << endl;
484 os << "First Quartile nedges: " << (EDGES[(ITERS/4)-1] + EDGES[ITERS/4])/2 << endl;
485 os << "Median nedges: " << (EDGES[(ITERS/2)-1] + EDGES[ITERS/2])/2 << endl;
486 os << "Third Quartile nedges: " << (EDGES[(3*ITERS/4) -1 ] + EDGES[3*ITERS/4])/2 << endl;
487 os << "Max nedges: " << EDGES[ITERS-1] << endl;
488 double mean = accumulate( EDGES, EDGES+ITERS, 0.0 )/ ITERS;
489 vector<double> zero_mean(ITERS); // find distances to the mean
490 transform(EDGES, EDGES+ITERS, zero_mean.begin(), bind2nd( minus<double>(), mean ));
491 // self inner-product is sum of sum of squares
492 double deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
493 deviation = sqrt( deviation / (ITERS-1) );
494 os << "Mean nedges: " << mean << endl;
495 os << "STDDEV nedges: " << deviation << endl;
496 os << "--------------------------" << endl;
497
498 sort(TIMES,TIMES+ITERS);
499 os << "Min time: " << TIMES[0] << " seconds" << endl;
500 os << "First Quartile time: " << (TIMES[(ITERS/4)-1] + TIMES[ITERS/4])/2 << " seconds" << endl;
501 os << "Median time: " << (TIMES[(ITERS/2)-1] + TIMES[ITERS/2])/2 << " seconds" << endl;
502 os << "Third Quartile time: " << (TIMES[(3*ITERS/4)-1] + TIMES[3*ITERS/4])/2 << " seconds" << endl;
503 os << "Max time: " << TIMES[ITERS-1] << " seconds" << endl;
504 mean = accumulate( TIMES, TIMES+ITERS, 0.0 )/ ITERS;
505 transform(TIMES, TIMES+ITERS, zero_mean.begin(), bind2nd( minus<double>(), mean ));
506 deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
507 deviation = sqrt( deviation / (ITERS-1) );
508 os << "Mean time: " << mean << " seconds" << endl;
509 os << "STDDEV time: " << deviation << " seconds" << endl;
510 os << "--------------------------" << endl;
511
512 sort(MTEPS, MTEPS+ITERS);
513 os << "Min MTEPS: " << MTEPS[0] << endl;
514 os << "First Quartile MTEPS: " << (MTEPS[(ITERS/4)-1] + MTEPS[ITERS/4])/2 << endl;
515 os << "Median MTEPS: " << (MTEPS[(ITERS/2)-1] + MTEPS[ITERS/2])/2 << endl;
516 os << "Third Quartile MTEPS: " << (MTEPS[(3*ITERS/4)-1] + MTEPS[3*ITERS/4])/2 << endl;
517 os << "Max MTEPS: " << MTEPS[ITERS-1] << endl;
518 transform(MTEPS, MTEPS+ITERS, INVMTEPS, safemultinv<double>()); // returns inf for zero teps
519 double hteps = static_cast<double>(ITERS) / accumulate(INVMTEPS, INVMTEPS+ITERS, 0.0);
520 os << "Harmonic mean of MTEPS: " << hteps << endl;
521 transform(INVMTEPS, INVMTEPS+ITERS, zero_mean.begin(), bind2nd(minus<double>(), 1/hteps));
522 deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
523 deviation = sqrt( deviation / (ITERS-1) ) * (hteps*hteps); // harmonic_std_dev
524 os << "Harmonic standard deviation of MTEPS: " << deviation << endl;
525 SpParHelper::Print(os.str());
526 }
527 }
528 MPI_Finalize();
529 return 0;
530}
531
SpParMat< int64_t, bool, SpDCCols< int64_t, bool > > PSpMat_Bool
SpParMat< int64_t, bool, SpDCCols< int32_t, bool > > PSpMat_s32p64
SpParMat< int64_t, int64_t, SpDCCols< int64_t, int64_t > > PSpMat_Int64
SelectMaxSRing< bool, int64_t > SR
Definition SpMMError.cpp:18
SpParMat< int64_t, int, SpDCCols< int32_t, int > > PSpMat_s32p64_Int
int main(int argc, char *argv[])
#define ITERS
double cblas_transvectime
double cblas_localspmvtime
int cblas_splits
double cblas_allgathertime
double cblas_alltoalltime
unsigned int highestbitset(uint64_t v)
#define EDGEFACTOR
void Symmetricize(PARMAT &A)
double cblas_mergeconttime
bool from_string(T &t, const string &s, std::ios_base &(*f)(std::ios_base &))
#define MAXTRIALS
double rand()
void GenGraph500Data(double initiator[4], int log_numverts, int edgefactor, bool scramble=false, bool packed=false)
NT Reduce(_BinaryOperation __binary_op, NT init) const
void Apply(_UnaryOperation __unary_op)
void SetElement(IT indx, NT numx)
Indexing is performed 0-based.
FullyDistVec< IT, IT > FindInds(_Predicate pred) const
Return the indices where pred is true.
void EWiseApply(const FullyDistVec< IT, NT2 > &other, _BinaryOperation __binary_op, _BinaryPredicate _do_op, const bool useExtendedBinOp)
FullyDistSpVec< IT, NT > Find(_Predicate pred) const
Return the elements for which pred is true.
void PrintInfo(std::string vectorname) const
void SetElement(IT indx, NT numx)
void Set(const FullyDistSpVec< IT, NT > &rhs)
static void Print(const std::string &s)
FullyDistVec< IT, NT > Reduce(Dim dim, _BinaryOperation __binary_op, NT id, _UnaryOperation __unary_op) const
Definition SpParMat.cpp:791
void ActivateThreading(int numsplits)
void PrintInfo() const
float LoadImbalance() const
Definition SpParMat.cpp:665
IT getncol() const
Definition SpParMat.cpp:694
void OptimizeForGraph500(OptBuf< LIT, OT > &optbuf)
std::shared_ptr< CommGrid > getcommgrid() const
Definition SpParMat.h:275
long int64_t
Definition compat.h:21
@ Column
Definition SpDefs.h:114
void PermEdges(DistEdgeList< IT > &DEL)
void RenameVertices(DistEdgeList< IU > &DEL)
MPI_Datatype MPIType< int64_t >(void)
Definition MPIType.cpp:64
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
Definition Friends.h:694
double A
unsigned __int64 uint64_t
Definition stdint.h:90
int64_t operator()(int64_t x, const int64_t &y) const