COMBINATORIAL_BLAS
1.6
Loading...
Searching...
No Matches
mpipspgemm.cpp
Go to the documentation of this file.
1
#include <mpi.h>
2
#include <
sys/time.h
>
3
#include <iostream>
4
#include <functional>
5
#include <algorithm>
6
#include <vector>
7
#include <string>
8
#include <sstream>
9
#include <
stdint.h
>
10
#include <cmath>
11
#include "CombBLAS/CombBLAS.h"
12
#include "
Glue.h
"
13
#include "
CCGrid.h
"
14
#include "
Reductions.h
"
15
#include "
Multiplier.h
"
16
#include "
SplitMatDist.h
"
17
18
19
using namespace
std;
20
using namespace
combblas
;
21
22
double
comm_bcast
;
23
double
comm_reduce
;
24
double
comp_summa
;
25
double
comp_reduce
;
26
double
comp_result
;
27
double
comp_reduce_layer
;
28
double
comp_split
;
29
double
comp_trans
;
30
double
comm_split
;
31
32
#define ITERS 5
33
34
int
main
(
int
argc
,
char
*
argv
[])
35
{
36
int
provided
;
37
38
MPI_Init_thread
(&
argc
, &
argv
,
MPI_THREAD_SERIALIZED
, &
provided
);
39
if
(
provided
<
MPI_THREAD_SERIALIZED
)
40
{
41
printf
(
"ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n"
);
42
MPI_Abort
(
MPI_COMM_WORLD
, 1);
43
}
44
45
46
47
int
nprocs, myrank;
48
MPI_Comm_size
(
MPI_COMM_WORLD
,&nprocs);
49
MPI_Comm_rank
(
MPI_COMM_WORLD
,&myrank);
50
51
if
(
argc
< 8)
52
{
53
if
(myrank == 0)
54
{
55
printf
(
"Usage (random): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type> <Scale> <EDGEFACTOR> <algo>\n"
);
56
printf
(
"Usage (input): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type=input> <matA> <matB> <algo>\n"
);
57
printf
(
"Example: ./mpipspgemm 4 4 2 ER 19 16 outer\n"
);
58
printf
(
"Example: ./mpipspgemm 4 4 2 Input matA.mtx matB.mtx column\n"
);
59
printf
(
"Type ER: Erdos-Renyi\n"
);
60
printf
(
"Type SSCA: R-MAT with SSCA benchmark parameters\n"
);
61
printf
(
"Type G500: R-MAT with Graph500 benchmark parameters\n"
);
62
printf
(
"algo: outer | column \n"
);
63
}
64
return
-1;
65
}
66
67
68
unsigned
GRROWS
= (
unsigned
)
atoi
(
argv
[1]);
69
unsigned
GRCOLS
= (
unsigned
)
atoi
(
argv
[2]);
70
unsigned
C_FACTOR
= (
unsigned
)
atoi
(
argv
[3]);
71
CCGrid
CMG
(
C_FACTOR
,
GRCOLS
);
72
int
nthreads
;
73
#pragma omp parallel
74
{
75
nthreads
= omp_get_num_threads();
76
}
77
78
79
if
(
GRROWS
!=
GRCOLS
)
80
{
81
SpParHelper::Print
(
"This version of the Combinatorial BLAS only works on a square logical processor grid\n"
);
82
MPI_Barrier
(
MPI_COMM_WORLD
);
83
MPI_Abort
(
MPI_COMM_WORLD
, 1);
84
}
85
86
int
layer_length
=
GRROWS
*
GRCOLS
;
87
if
(
layer_length
*
C_FACTOR
!= nprocs)
88
{
89
SpParHelper::Print
(
"The product of <GridRows> <GridCols> <Replicas> does not match the number of processes\n"
);
90
MPI_Barrier
(
MPI_COMM_WORLD
);
91
MPI_Abort
(
MPI_COMM_WORLD
, 1);
92
}
93
94
95
{
96
SpDCCols<int64_t, double>
splitA
,
splitB
;
97
SpDCCols<int64_t, double>
*
splitC
;
98
string
type;
99
shared_ptr<CommGrid>
layerGrid
;
100
layerGrid
.reset(
new
CommGrid
(
CMG
.layerWorld, 0, 0) );
101
FullyDistVec<int64_t, int64_t>
p(
layerGrid
);
// permutation vector defined on layers
102
103
if
(
string
(
argv
[4]) ==
string
(
"input"
))
// input option
104
{
105
string
fileA
(
argv
[5]);
106
string
fileB
(
argv
[6]);
107
108
double
t01
=
MPI_Wtime
();
109
SpDCCols<int64_t, double>
*
A
=
ReadMat<double>
(
fileA
,
CMG
,
true
, p);
110
SpDCCols<int64_t, double>
*
B
=
ReadMat<double>
(
fileB
,
CMG
,
true
, p);
111
SplitMat
(
CMG
,
A
,
splitA
,
false
);
112
SplitMat
(
CMG
,
B
,
splitB
,
true
);
//row-split
113
if
(myrank == 0)
cout
<<
"Matrices read and replicated along layers : time "
<<
MPI_Wtime
() -
t01
<<
endl
;
114
}
115
else
116
{
117
unsigned
scale
= (
unsigned
)
atoi
(
argv
[5]);
118
unsigned
EDGEFACTOR
= (
unsigned
)
atoi
(
argv
[6]);
119
double
initiator
[4];
120
if
(
string
(
argv
[4]) ==
string
(
"ER"
))
121
{
122
initiator
[0] = .25;
123
initiator
[1] = .25;
124
initiator
[2] = .25;
125
initiator
[3] = .25;
126
}
127
else
if
(
string
(
argv
[4]) ==
string
(
"G500"
))
128
{
129
initiator
[0] = .57;
130
initiator
[1] = .19;
131
initiator
[2] = .19;
132
initiator
[3] = .05;
133
EDGEFACTOR
= 16;
134
}
135
else
if
(
string
(
argv
[4]) ==
string
(
"SSCA"
))
136
{
137
initiator
[0] = .6;
138
initiator
[1] = .4/3;
139
initiator
[2] = .4/3;
140
initiator
[3] = .4/3;
141
EDGEFACTOR
= 8;
142
}
143
else
{
144
if
(myrank == 0)
145
printf
(
"The initiator parameter - %s - is not recognized.\n"
,
argv
[5]);
146
MPI_Abort
(
MPI_COMM_WORLD
, 1);
147
}
148
149
150
double
t01
=
MPI_Wtime
();
151
SpDCCols<int64_t, double>
*
A
=
GenMat<int64_t,double>
(
CMG
,
scale
,
EDGEFACTOR
,
initiator
,
true
);
152
SpDCCols<int64_t, double>
*
B
=
GenMat<int64_t,double>
(
CMG
,
scale
,
EDGEFACTOR
,
initiator
,
true
);
153
154
SplitMat
(
CMG
,
A
,
splitA
,
false
);
155
SplitMat
(
CMG
,
B
,
splitB
,
true
);
//row-split
156
if
(myrank == 0)
cout
<<
"RMATs Generated and replicated along layers : time "
<<
MPI_Wtime
() -
t01
<<
endl
;
157
158
}
159
160
int64_t
globalnnzA
=0,
globalnnzB
=0;
161
int64_t
localnnzA
=
splitA
.getnnz();
162
int64_t
localnnzB
=
splitB
.getnnz();
163
MPI_Allreduce
( &
localnnzA
, &
globalnnzA
, 1,
MPIType<int64_t>
(),
MPI_SUM
,
MPI_COMM_WORLD
);
164
MPI_Allreduce
( &
localnnzB
, &
globalnnzB
, 1,
MPIType<int64_t>
(),
MPI_SUM
,
MPI_COMM_WORLD
);
165
if
(myrank == 0)
cout
<<
"After split: nnzA= "
<<
globalnnzA
<<
" & nnzB= "
<<
globalnnzB
;
166
167
168
169
type =
string
(
argv
[7]);
170
if
(myrank == 0)
171
{
172
printf
(
"\n Processor Grid (row x col x layers x threads): %dx%dx%dx%d \n"
,
CMG
.GridRows,
CMG
.GridCols,
CMG
.GridLayers,
nthreads
);
173
printf
(
" prow pcol layer thread comm_bcast comm_scatter comp_summa comp_merge comp_scatter comp_result other total\n"
);
174
}
175
if
(type ==
string
(
"outer"
))
176
{
177
splitB
.Transpose();
//locally transpose for outer product
178
for
(
int
k=0; k<
ITERS
; k++)
179
{
180
splitC
=
multiply
(
splitA
,
splitB
,
CMG
,
true
,
false
);
// outer product
181
delete
splitC
;
182
}
183
}
184
185
else
// default column-threaded
186
{
187
for
(
int
k=0;
ITERS
>0 && k<
ITERS
-1; k++)
188
{
189
splitC
=
multiply
(
splitA
,
splitB
,
CMG
,
false
,
true
);
190
delete
splitC
;
191
}
192
splitC
=
multiply
(
splitA
,
splitB
,
CMG
,
false
,
true
);
193
int64_t
nnzC
=0;
194
int64_t
localnnzC
=
splitC
->getnnz();
195
MPI_Allreduce
( &
localnnzC
, &
nnzC
, 1,
MPIType<int64_t>
(),
MPI_SUM
,
MPI_COMM_WORLD
);
196
if
(myrank == 0)
cout
<<
"\n After multiplication: nnzC= "
<<
nnzC
<<
endl
<<
endl
;
197
delete
splitC
;
198
199
}
200
}
201
202
MPI_Finalize
();
203
return
0;
204
}
205
206
CCGrid.h
EDGEFACTOR
#define EDGEFACTOR
Definition
DirOptBFS.cpp:81
Glue.h
Multiplier.h
Reductions.h
SplitMatDist.h
combblas::CCGrid
Definition
CCGrid.h:7
combblas::CommGrid
Definition
CommGrid.h:45
combblas::DistEdgeList
Definition
DistEdgeList.h:82
combblas::SpParHelper::Print
static void Print(const std::string &s)
Definition
SpParHelper.cpp:811
main
int main(int argc, char *argv[])
Definition
mpipspgemm.cpp:34
comm_reduce
double comm_reduce
Definition
mpipspgemm.cpp:23
comp_split
double comp_split
Definition
mpipspgemm.cpp:28
ITERS
#define ITERS
Definition
mpipspgemm.cpp:32
comm_bcast
double comm_bcast
Definition
mpipspgemm.cpp:22
comm_split
double comm_split
Definition
mpipspgemm.cpp:30
comp_reduce
double comp_reduce
Definition
mpipspgemm.cpp:25
comp_result
double comp_result
Definition
mpipspgemm.cpp:26
comp_trans
double comp_trans
Definition
mpipspgemm.cpp:29
comp_summa
double comp_summa
Definition
mpipspgemm.cpp:24
comp_reduce_layer
double comp_reduce_layer
Definition
mpipspgemm.cpp:27
combblas
Definition
CCGrid.h:4
combblas::SplitMat
void SplitMat(CCGrid &CMG, SpDCCols< IT, NT > *localmat, SpDCCols< IT, NT > &splitmat, bool rowsplit=false)
Definition
SplitMatDist.h:144
combblas::multiply
SpDCCols< IT, NT > * multiply(SpDCCols< IT, NT > &splitA, SpDCCols< IT, NT > &splitB, CCGrid &CMG, bool isBT, bool threaded)
Definition
Multiplier.h:11
A
double A
B
double B
Definition
options.h:15
stdint.h
time.h
3DSpGEMM
mpipspgemm.cpp
Generated by
1.9.8