-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcount-2paths-vector.C
124 lines (106 loc) · 3.74 KB
/
count-2paths-vector.C
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#include "gettime.h"
#include "graphIO.h"
#include "parallel.h"
#include "parseCommandLine.h"
#include "myVector.h"
// Takes as input a file in SNAP format
//(http://snap.stanford.edu/data/index.html).
// Currently assumes a directed graph where each directed edge
// appears once as a pair (u,v).
int parallel_main(int argc, char *argv[]) {
commandLine P(
argc, argv,
"-batch <batchSize> -totalEdges <totalEdges> <input SNAP file>");
char *iFile = P.getArgument(0);
long batchSize = P.getOptionLongValue("-batch", 10000);
long totalEdges = P.getOptionLongValue("-totalEdges", 1000000);
edgeArray<uintT> G = readSNAP<uintT>(iFile);
// minimum of totalEdges and number of edges in the graph
totalEdges = min(totalEdges, (long)G.nonZeros);
cout << "starting timer\n";
timer t;
t.start();
long n = max(G.numRows, G.numCols); // number of vertices
// memoryPool M(1 << 30);
myVector *inEdges = newA(myVector, n);
myVector *outEdges = newA(myVector, n);
uintT *degrees = newA(uintT, n); // so we can skip nodes with degree < 2
for (long i = 0; i < n; i++) {
inEdges[i].init();
outEdges[i].init();
degrees[i] = 0;
}
long numBatches = 1 + (totalEdges - 1) / batchSize;
long listCount = 0;
for (long i = 0; i < numBatches; i++) {
myVector *batchInEdges = newA(myVector, n);
myVector *batchOutEdges = newA(myVector, n);
// needed because malloc call from newA doesn't call default ctor.
// we may consider adding "placement new" call to newA:
// https://stackoverflow.com/questions/2995099/malloc-and-constructors
for (long j = 0; j < n; j++) { batchInEdges[j].A = NULL; }
for (long j = 0; j < n; j++) { batchOutEdges[j].A = NULL; }
// edges seen in this batch
for (long j = i * batchSize;
j < min((long)(i + 1) * batchSize, (long)totalEdges); j++) {
uintT src = G.E[j].u;
uintT dst = G.E[j].v;
// init if this is the first time we get an out-edge for this vertex
if (batchOutEdges[src].A == NULL) {
batchOutEdges[src].init();
}
batchOutEdges[src].add(dst);
// init if this is the first time we get an in-edge for this vertex
if (batchInEdges[dst].A == NULL) {
batchInEdges[dst].init();
}
batchInEdges[dst].add(src);
degrees[src]++;
degrees[dst]++;
}
for (long k = 0; k < n; k++) {
// skip vertices with degree smaller than 2, as it isn't possible to have
// it in the center of a 2-hop path
if (degrees[k] < 2) {
continue;
}
// new incoming edges generate 2 hop paths with new outgoing
// edges and with existing outgoing edges
if (batchInEdges[k].A != NULL) {
for (long g = 0; g < batchInEdges[k].size(); g++) {
if (batchOutEdges[k].A != NULL) {
for (long h = 0; h < batchOutEdges[k].size(); h++) {
listCount++;
}
}
for (long h = 0; h < outEdges[k].size(); h++) {
listCount++;
}
}
}
// new outgoing edges generate 2 hop paths with existing
// incoming edges
if (batchOutEdges[k].A != NULL) {
for (long g = 0; g < batchOutEdges[k].size(); g++) {
for (long h = 0; h < inEdges[k].size(); h++) {
listCount++;
}
}
}
}
// store edges processed so far for future passes
for (long j = i * batchSize;
j < min((long)(i + 1) * batchSize, (long)totalEdges); j++) {
uintT src = G.E[j].u;
uintT dst = G.E[j].v;
outEdges[src].add(dst);
inEdges[dst].add(src);
}
free(batchInEdges);
free(batchOutEdges);
}
cout << "total count via listing = " << listCount << endl;
t.reportTotal("total time");
free(inEdges);
free(outEdges);
}