-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcount-2paths-sparseVector.C
147 lines (128 loc) · 4.3 KB
/
count-2paths-sparseVector.C
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#include "gettime.h"
#include "graphIO.h"
#include "myVector.h"
#include "parallel.h"
#include "parseCommandLine.h"
#include "sparseSet.h"
// Takes as input a file in SNAP format
//(http://snap.stanford.edu/data/index.html).
// Currently assumes a directed graph where each directed edge
// appears once as a pair (u,v).
int parallel_main(int argc, char *argv[]) {
commandLine P(
argc, argv,
"-batch <batchSize> -totalEdges <totalEdges> <input SNAP file>");
char *iFile = P.getArgument(0);
long batchSize = P.getOptionLongValue("-batch", 10000);
long totalEdges = P.getOptionLongValue("-totalEdges", 1000000);
edgeArray<uintT> G = readSNAP<uintT>(iFile);
// minimum of totalEdges and number of edges in the graph
totalEdges = min(totalEdges, (long)G.nonZeros);
cout << "starting timer\n";
timer t;
t.start();
long n = max(G.numRows, G.numCols); // number of vertices
myVector *inEdges = newA(myVector, n);
myVector *outEdges = newA(myVector, n);
for (long i = 0; i < n; i++) {
inEdges[i].init();
outEdges[i].init();
}
edgeArray<uintT> TwoHopG;
long numBatches = 1 + (totalEdges - 1) / batchSize;
long listCount = 0;
// sparseSets to store the in/out edges in a batch
sparseSet batchInEdges = sparseSet(batchSize, 1);
sparseSet batchOutEdges = sparseSet(batchSize, 1);
for (long i = 0; i < numBatches; i++) {
// clear sparseSets
batchInEdges.clearA();
batchOutEdges.clearA();
// add edges in this batch to sparseSet
for (long j = i * batchSize;
j < min((long)(i + 1) * batchSize, (long)totalEdges); j++) {
uintT src = G.E[j].u;
uintT dst = G.E[j].v;
batchOutEdges.insert(src, dst);
batchInEdges.insert(dst, src);
}
// extract the entries from the sparseSets. In.A is an array of
// pairs (a,b) where a is the vertex id and b is a pointer to its
// myVector of in-edges. Out.A is an array of pairs (a,b) where a
// is the vertex id and b is a pointer to its myVector of
// out-edges.
_seq<kvPair> In = batchInEdges.entries();
_seq<kvPair> Out = batchOutEdges.entries();
// loop through all vertices in batch with in-neighbors
for (long k = 0; k < In.n; k++) {
uintE v = In.A[k].first;
myVector *vIn = In.A[k].second;
// intersect vertex v's in-neighbors with its
// out-neighbors from batch
myVector *vOut = batchOutEdges.find(v);
if (vOut != NULL) {
for (long g = 0; g < vIn->size(); g++) {
for (long h = 0; h < vOut->size(); h++) {
listCount++;
}
}
}
// intersect vertex v's in-neighbors with its
// out-neighbors from existing graph
for (long h = 0; h < outEdges[v].size(); h++) {
for (long g = 0; g < vIn->size(); g++) {
listCount++;
}
}
}
for (long k = 0; k < Out.n; k++) {
uintE v = Out.A[k].first;
myVector *vOut = Out.A[k].second;
// intersect vertex v's out-neighbors with its
// in-neighbors from existing graph
for (long h = 0; h < inEdges[v].size(); h++) {
for (long g = 0; g < vOut->size(); g++) {
listCount++;
}
}
}
// add batch edges to graph
for (long k = 0; k < In.n; k++) {
uintE v = In.A[k].first;
myVector *vIn = In.A[k].second;
for (long g = 0; g < vIn->size(); g++) {
inEdges[v].add(vIn->get(g));
}
}
for (long k = 0; k < Out.n; k++) {
uintE v = Out.A[k].first;
myVector *vOut = Out.A[k].second;
for (long g = 0; g < vOut->size(); g++) {
outEdges[v].add(vOut->get(g));
}
}
In.del();
Out.del();
// another way to add batch edges to graph. cache locality seems to be
// worse.
// for (long j = i * batchSize;
// j < min((long)(i + 1) * batchSize, (long)totalEdges); j++) {
// uintT src = G.E[j].u;
// uintT dst = G.E[j].v;
// outEdges[src].add(dst);
// inEdges[dst].add(src);
// }
}
cout << "total count via listing = " << listCount << endl;
t.reportTotal("total time");
// check answer
long checkCount = 0;
for (long i = 0; i < n; i++) {
checkCount += outEdges[i].size() * inEdges[i].size();
}
cout << "expected count = " << checkCount << endl;
batchInEdges.del();
batchOutEdges.del();
free(inEdges);
free(outEdges);
}