-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcuckoo.cpp
452 lines (420 loc) · 20.5 KB
/
cuckoo.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
// Copyright 2017 Akamai Technologies, Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
//
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cuckoo.h"
#include <cassert>
#include <math.h>
#include <cstring>
#include <cstdlib>
#include <openssl/sha.h>
// ### Creating a digest {#creating}
Cuckoo::Cuckoo(unsigned probability, unsigned entries, int maxCount) {
// Given the following inputs:
// * `P`, an integer smaller than 256, that indicates the probability of a false positive that is acceptable, expressed as `1/2\*\*P`.
// * `N`, an integer that represents the number of entries - a prime number smaller than 2\*\*32
assert(probability < 256);
assert(entries < pow(2,32));
// 1. Let `f` be the number of bits per fingerprint, calculated as `P + 3`
unsigned fingerprintSize = probability + 3;
// 3. Let `allocated` be the closest power of 2 that is larger than `N`.
unsigned logEntries = ceil(log2(entries));
unsigned allocationEntries = (unsigned)pow(2, logEntries);
// 4. Let `bytes` be `f`\*`allocated`\*`b`/8 rounded up to the nearest integer
unsigned bytes = ceil((float)(fingerprintSize * allocationEntries * BucketSize) / 8.0);
// 5. Add 5 to `bytes`
bytes += 5;
// 6. Allocate memory of `bytes` and set it to zero. Assign it to `digest-value`.
m_digest = new unsigned char[bytes];
memset(m_digest, 0, bytes);
// 7. Set the first byte to `P`
m_digest[0] = (char)fingerprintSize;
// 8. Set the second till fifth bytes to `N` in big endian form
bigEndianWrite(m_digest, 1, 4, entries);
m_digestSize = bytes;
m_maxCount = maxCount;
}
Cuckoo::~Cuckoo() {
delete m_digest;
}
// ### Adding a URL to the Digest-Value {#adding}
unsigned Cuckoo::add(std::string URL, std::string ETag) {
//Given the following inputs:
//
//* `URL` a string corresponding to the Effective Request URI ({{RFC7230}}, Section 5.5) of a cached
//response {{RFC7234}}
//* `ETag` a string corresponding to the entity-tag {{RFC7232}} of a cached response {{RFC7234}} (if
//the ETag is available; otherwise, null);
//* `maxcount` - max number of cuckoo hops
//* `digest-value`
int maxCount = m_maxCount;
unsigned char* digest = m_digest;
size_t digestSize = m_digestSize;
// 1. Let `f` be the value of the first byte of `digest-value`.
unsigned fingerprintSize = digest[0];
// 3. Let `N` be the value of the second to fifth bytes of `digest-value` in big endian form.
unsigned long entries = bigEndianRead(digest, 1, 4);
// 4. Let `key` be the return value of {{key}} with `URL` and `ETag` as inputs.
std::string keyStr = key(URL, ETag);
// 5. Let `h1` be the return value of {{hash}} with `key` and `N` as inputs.
unsigned long h1 = hash(keyStr, entries);
// 6. Let `dest_fingerprint` be the return value of {{fingerprint}} with `key` and `f` as inputs.
unsigned long destinationFingerprintValue = fingerprint(keyStr, fingerprintSize);
// 7. Let `h2` be the return value of {{hash2}} with `h1`, `dest_fingerprint` and `N` as inputs.
unsigned long h2 = alternateHash(h1, destinationFingerprintValue, entries);
// 8. Let `h` be either `h1` or `h2`, picked in random.
int randomNumber = rand() % 2;
unsigned long h = (randomNumber != 0) ? h1 : h2;
// 9. While `maxcount` is larger than zero:
while (maxCount) {
// 1. Let `position_start` be 40 + `h` * `f` \* `b`.
unsigned positionStart = 40 + h * fingerprintSize * BucketSize;
// 2. Let `position_end` be `position_start` + `f` \* `b`.
unsigned positionEnd = positionStart + fingerprintSize * BucketSize;
// Make sure we're not writing outside the table.
assert(ceil(positionEnd / 8) <= digestSize);
unsigned long fingerprintValue;
// 3. While `position_start` < `position_end`:
while (positionStart < positionEnd) {
// 1. Let `bits` be `f` bits from `digest_value` starting at `position_start`.
fingerprintValue = readFingerprint(digest, positionStart, fingerprintSize);
// 2. If `bits` is all zeros, set `bits` to `dest_fingerprint` and terminate these steps.
if (fingerprintValue == 0) {
writeFingerprint(digest, positionStart, fingerprintSize, destinationFingerprintValue);
return maxCount;
}
// 3. Add `f` to `position_start`.
positionStart += fingerprintSize;
}
// 4. Let `e` be a random number from 0 to `b`.
int elementToThrow = rand() % BucketSize;
unsigned positionStartBefore = positionStart;
// 5. Substract `f` * (`b` - `e`) from `position_start`.
positionStart -= fingerprintSize * (BucketSize - elementToThrow);
// 6. Let `bits` be `f` bits from `digest_value` starting at `position_start`.
// 7. Let `fingerprint` be the value of bits, read as big endian.
fingerprintValue = readFingerprint(digest, positionStart, fingerprintSize);
// 8. Set `bits` to `dest_fingerprint`.
writeFingerprint(digest, positionStart, fingerprintSize, destinationFingerprintValue);
// 9. Set `dest_fingerprint` to `fingerprint`.
destinationFingerprintValue = fingerprintValue;
// 10. Let `h` be {{hash2}} with `h`, `dest_fingerprint` and `N` as inputs.
h = alternateHash(h, destinationFingerprintValue, entries);
// 11. Substract 1 from `maxcount`.
--maxCount;
}
// 10. Return an error.
printf("ERROR - maxcount reached\n");
return 0;
}
// ### Removing a URL to the Digest-Value {#removing}
void Cuckoo::remove(std::string URL, std::string ETag) {
// Given the following inputs:
//
// * `URL` a string corresponding to the Effective Request URI ({{RFC7230}}, Section 5.5) of a cached
// response {{RFC7234}}
// * `ETag` a string corresponding to the entity-tag {{RFC7232}} of a cached response {{RFC7234}} (if
// the ETag is available; otherwise, null);
// * `digest-value`
unsigned char* digest = m_digest;
size_t digestSize = m_digestSize;
// 1. Let `f` be the value of the first byte of `digest-value`.
unsigned fingerprintSize = digest[0];
// 3. Let `N` be the value of the second to fifth bytes of `digest-value` in big endian form.
unsigned long entries = bigEndianRead(digest, 1, 4);
// 4. Let `key` be the return value of {{key}} with `URL` and `ETag` as inputs.
std::string keyStr = key(URL, ETag);
// 5. Let `h1` be the return value of {{hash}} with `key` and `N` as inputs.
unsigned long h1 = hash(keyStr, entries);
// 6. Let `fingerprint` be the return value of {{fingerprint}} with `key` and `f` as inputs.
unsigned long destinationFingerprintValue = fingerprint(keyStr, fingerprintSize);
// 7. Let `h2` be the return value of {{hash2}} with `h1`, `fingerprint` and `N` as inputs.
unsigned long h2 = alternateHash(h1, destinationFingerprintValue, entries);
// 8. Let `hashes` be an array containing `h1` and `h2`.
unsigned long hashes[2];
hashes[0] = h1;
hashes[1] = h2;
// 9. For each `h` in `hashes`:
for (int i = 0; i < 2; ++i) {
unsigned long h = hashes[i];
// 1. Let `position_start` be 40 + `h` \* `f` \* `b`.
unsigned positionStart = 40 + h * fingerprintSize * BucketSize;
// 2. Let `position_end` be `position_start` + `f` \* `b`.
unsigned positionEnd = positionStart + fingerprintSize * BucketSize;
unsigned long fingerprintValue;
// Make sure we're not reading outside the table.
assert(ceil(positionStart / 8) <= digestSize);
// 3. While `position_start` < `position_end`:
while (positionStart < positionEnd) {
// 1. Let `bits` be `f` bits from `digest_value` starting at `position_start`.
fingerprintValue = readFingerprint(digest, positionStart, fingerprintSize);
// 2. If `bits` is `fingerprint`, set `bits` to all zeros and terminate these steps.
if (fingerprintValue == destinationFingerprintValue) {
writeFingerprint(digest, positionStart, fingerprintSize, 0);
return;
}
// 3. Add `f` to `position_start`.
positionStart += fingerprintSize;
}
}
}
// ### Querying the Digest for a Value {#querying}
bool Cuckoo::query(std::string URL, std::string ETag) {
//Given the following inputs:
//
//* `URL` a string corresponding to the Effective Request URI ({{RFC7230}}, Section 5.5) of a cached
//response {{RFC7234}}.
//* `ETag` a string corresponding to the entity-tag {{RFC7232}} of a cached response {{RFC7234}} (if
//the ETag is available; otherwise, null).
//* `digest-value`, an array of bits.
const unsigned char* digest = m_digest;
size_t digestSize = m_digestSize;
// 1. Let `f` be the value of the first byte of `digest-value`.
unsigned fingerprintSize = digest[0];
// 3. Let `N` be the value of the second to fifth bytes of `digest-value` in big endian form.
unsigned long entries = bigEndianRead(digest, 1, 4);
// 4. Let `key` be the return value of {{key}} with `URL` and `ETag` as inputs.
std::string keyStr = key(URL, ETag);
// 5. Let `h1` be the return value of {{hash}} with `key` and `N` as inputs.
unsigned long h1 = hash(keyStr, entries);
// 6. Let `fingerprint` be the return value of {{fingerprint}} with `key` and `f` as inputs.
unsigned long destinationFingerprintValue = fingerprint(keyStr, fingerprintSize);
// 7. Let `h2` be the return value of {{hash2}} with `h1`, `fingerprint` and `N` as inputs.
unsigned long h2 = alternateHash(h1, destinationFingerprintValue, entries);
// 8. Let `hashes` be an array containing `h1` and `h2`.
unsigned long hashes[2];
hashes[0] = h1;
hashes[1] = h2;
// 9. For each `h` in `hashes`:
for (int i = 0; i < 2; ++i) {
unsigned long h = hashes[i];
// 1. Let `position_start` be 40 + `h` \* `f` \* `b`.
unsigned positionStart = 40 + h * fingerprintSize * BucketSize;
// 2. Let `position_end` be `position_start` + `f` \* `b`.
unsigned positionEnd = positionStart + fingerprintSize * BucketSize;
unsigned long fingerprintValue;
// Make sure we're not reading beyond the table.
assert(ceil(positionStart / 8) <= digestSize);
// 3. While `position_start` < `position_end`:
while (positionStart < positionEnd) {
// 1. Let `bits` be `f` bits from `digest_value` starting at `position_start`.
fingerprintValue = readFingerprint(digest, positionStart, fingerprintSize);
// 2. If `bits` is `fingerprint`, return true
if (fingerprintValue == destinationFingerprintValue) {
return true;
}
// 3. Add `f` to `position_start`.
positionStart += fingerprintSize;
}
}
// 14. Return false.
return false;
}
// ### Computing the key {#key}
std::string Cuckoo::key(std::string URL, std::string ETag) {
// Given the following inputs:
//
// * `URL`, an array of characters
// * `ETag`, an array of characters
// 1. Let `key` be `URL` converted to an ASCII string by percent-encoding as appropriate {{RFC3986}}.
// TODO(yoav): Convert to ascii.
std::string keyString = URL;
// 2. If `ETag` is not null:
if (!ETag.empty()) {
// 1. Append `ETag` to `key` as an ASCII string, including both the `weak` indicator (if present) and double quotes, as per {{RFC7232}}, Section 2.3.
// TODO(yoav): Only add the double quotes if they are not already there.
keyString += "\"" + ETag + "\"";
}
return keyString;
}
// ### Computing a Hash Value {#hash}
unsigned Cuckoo::hash(std::string key, unsigned entries) {
// Given the following inputs:
//
// * `key`, an array of characters.
// * `N`, an integer
// 1. Let `hash-value` be the SHA-256 message digest {{RFC6234}} of `key`, truncated to 32 bits, expressed as an integer.
unsigned char hash[SHA256_DIGEST_LENGTH];
SHA256_CTX sha256;
SHA256_Init(&sha256);
SHA256_Update(&sha256, key.c_str(), key.size());
SHA256_Final(hash, &sha256);
// Truncate to 32 bits
unsigned long hashValue = bigEndianRead((const unsigned char *)hash, SHA256_DIGEST_LENGTH - 4, 4);
// 2. Return `hash-value` modulo N.
return hashValue % entries;
}
// ### Computing an Alternative Hash Value {#hash2}
unsigned Cuckoo::alternateHash(unsigned hash1, unsigned long fingerprintValue, unsigned entries) {
// Given the following inputs:
//
// * `hash1`, an integer indicating the previous hash
// * `fingerprint`, an integer indicating the fingerprint value
// * `N`, an integer indicating the number of entries in the digest
// Note: This is not thread safe (but a bit faster)!!!
static char fingerprintString[20];
// 1. Let `fingerprint-string` be the value of `fingerprint` in base 10, expressed as a string.
sprintf((char*)fingerprintString, "%lu", fingerprintValue);
// 2. Let `h2` be the return value of {{hash}} with `fingerprint-string` and `N` as inputs, XORed with `h1`.
unsigned long hash2 = hash(std::string(fingerprintString), entries);
hash2 ^= hash1;
// 3. Return `hash2`.
return hash2;
}
// ### Computing a fingerprint value {#fingerprint}
unsigned long Cuckoo::fingerprint(std::string key, unsigned fingerprintSize) {
// Given the following inputs:
//
// * `key`, an array of characters
// * `f`, an integer indicating the number of output bits
assert(fingerprintSize > 0);
// 1. Let `hash-value` be the SHA-256 message digest {{RFC6234}} of `key`, expressed as an integer.
unsigned char hash[SHA256_DIGEST_LENGTH];
SHA256_CTX sha256;
SHA256_Init(&sha256);
SHA256_Update(&sha256, key.c_str(), key.size());
SHA256_Final(hash, &sha256);
unsigned long fingerprintValue;
// Note: the implementation below diverges from the literal specified steps, but achieves the same goals.
// 2. Let `h` be the number of bits in `hash-value`
// 3. Let `fingerprint-value` be 0
// 4. While `fingerprint-value` is 0 and `h` > `f`:
for (unsigned i = SHA256_DIGEST_LENGTH * 8 - fingerprintSize; i >= fingerprintSize; i -= fingerprintSize) {
// 1. Let `fingerprint-value` be the `f` least significant bits of `hash-value`.
// 2. Let `hash-value` be the `h`-`f` most significant bits of `hash-value`.
// 3. Substract `f` from `h`.
fingerprintValue = readFingerprint(hash, i, fingerprintSize);
if (fingerprintValue != 0)
break;
}
// 5. If `fingerprint-value` is 0, let `fingerprint-value` be 1.
if (fingerprintValue == 0)
fingerprintValue = 1;
// 6. Return `fingerprint-value`.
return fingerprintValue;
}
// Utility functions below
void Cuckoo::bigEndianWrite(unsigned char* digest, unsigned startPosition, size_t length, unsigned long number) {
const unsigned long digit = 0xff;
for (int i = length - 1; i >= 0; --i) {
unsigned long temp = number & digit;
digest[startPosition + i] = (unsigned char)temp;
number >>= 8;
}
}
unsigned long Cuckoo::bigEndianRead(const unsigned char* digest, unsigned startPosition, size_t length) {
unsigned long readNumber = 0;
for (unsigned i = 0; i < length; ++i) {
readNumber <<= 8;
readNumber += digest[startPosition + i];
}
return readNumber;
}
unsigned long Cuckoo::readFingerprint(const unsigned char* hash, unsigned positionInBits, unsigned fingerprintSizeInBits) {
const float Bits = 8.0;
unsigned endPositionInBits = positionInBits + fingerprintSizeInBits;
unsigned startPositionInBytes = floor((float)positionInBits / Bits);
unsigned endPositionInBytes = ceil((float)endPositionInBits / Bits);
unsigned lengthInBytes = endPositionInBytes - startPositionInBytes;
unsigned long hashValue = bigEndianRead((const unsigned char *)(hash), startPositionInBytes, lengthInBytes);
unsigned extraBitsAtStart = positionInBits - startPositionInBytes * Bits;
unsigned extraBitsAtEnd = endPositionInBytes * Bits - endPositionInBits;
unsigned extraBytes = sizeof(hashValue) - lengthInBytes;
// TODO(yoav): There's probably a better way to do this, but I'm too jetlagged to think.
hashValue <<= extraBytes * (unsigned)Bits + extraBitsAtStart;
hashValue >>= extraBytes * (unsigned)Bits + extraBitsAtStart + extraBitsAtEnd;
return hashValue;
}
void Cuckoo::writeFingerprint(unsigned char* hash, unsigned positionInBits, unsigned fingerprintSizeInBits, unsigned long fingerprintValue) {
const float Bits = 8.0;
unsigned endPositionInBits = positionInBits + fingerprintSizeInBits;
unsigned startPositionInBytes = floor((float)positionInBits / Bits);
unsigned endPositionInBytes = ceil((float)endPositionInBits / Bits);
unsigned lengthInBytes = endPositionInBytes - startPositionInBytes;
unsigned long hashValue = bigEndianRead((const unsigned char *)(hash), startPositionInBytes, lengthInBytes);
unsigned extraBitsAtStart = positionInBits - startPositionInBytes * Bits;
unsigned extraBitsAtEnd = endPositionInBytes * Bits - endPositionInBits;
unsigned extraBytes = sizeof(hashValue) - lengthInBytes;
unsigned long bitmap = -1;
bitmap >>= extraBitsAtEnd + fingerprintSizeInBits;
bitmap <<= extraBitsAtEnd + fingerprintSizeInBits;
bitmap += (unsigned)(pow(2, extraBitsAtEnd)) - 1;
hashValue &= bitmap;
fingerprintValue <<= extraBitsAtEnd;
hashValue |= fingerprintValue;
bigEndianWrite(hash, startPositionInBytes, lengthInBytes, hashValue);
}
// !!! Test only code from here on out !!!
unsigned char hex(unsigned char input) {
if (input >= '0' && input <= '9')
return input - '0';
if (input >= 'a' && input <= 'f') {
return 10 + input - 'a';
}
if (input >= 'A' && input <= 'F') {
return 10 + input - 'A';
}
return 0;
}
void Cuckoo::runTests() {
// Test bigEndianRead
std::string hashStr = "1234567";
unsigned long hashNumber = bigEndianRead((const unsigned char*)hashStr.c_str(), 1, 4);
if (hashNumber != 842216501) {
printf("Fail - hashNumber %lu", hashNumber);
}
// Test bigEndianWrite
unsigned char testDigest[10];
testDigest[0] = 49;
bigEndianWrite(testDigest, 1, 4, 842216501);
testDigest[5] = 0;
if ((char*)testDigest != std::string("12345")) {
printf("Fail - bigEndianWrite got %s\n", testDigest);
}
// Test readFingerprint
hashStr = "0123456789abcdef";
unsigned char fingerprintHash[17];
fingerprintHash[16] = 0;
strncpy((char*)fingerprintHash, hashStr.c_str(), 16);
unsigned long fingerprintValue = readFingerprint(fingerprintHash, 121, 7);
if (fingerprintValue != 102)
printf("FAIL - readFingerprint %lu\n", fingerprintValue);
fingerprintValue = readFingerprint(fingerprintHash, 117, 7);
if (fingerprintValue!= 86)
printf("FAIL - readFingerprint %lu\n", fingerprintValue);
char shaInStr[65] = "2442a9b40768c5ccff9366514374eeca86fd6a3156b11d5f7aaad7b1e3fbbb08";
unsigned char fingerprintHash2[32];
for (int i = 0; i < 32; ++i) {
unsigned char letter = hex(shaInStr[2*i]) * 16 + hex(shaInStr[2*i+1]);
fingerprintHash2[i] = letter;
}
fingerprintValue = readFingerprint(fingerprintHash2, 247, 9);
if (fingerprintValue!= 264)
printf("FAIL - readFingerprint %lu\n", fingerprintValue);
// Test writeFingerprint
writeFingerprint(fingerprintHash, 8, 8, 90);
writeFingerprint(fingerprintHash, 20, 4, 5);
writeFingerprint(fingerprintHash, 28, 3, 6);
if (strcmp((const char*)fingerprintHash, "0Z5=456789abcdef"))
printf("FAIL - writeFingerprint %s\n", fingerprintHash);
// Test hash calculation
std::string keyStr = key("https://example.com/bla.gif", "34bfac");
unsigned long hashValue = hash(keyStr, 2513);
if (hashValue != 1233)
printf("FAIL - hash calculation %lu\n", hashValue);
// Test fingerprint
keyStr = key("https://example.com/bla.gif", "34bfac");
fingerprintValue = fingerprint(keyStr, 9);
if (fingerprintValue != 264)
printf("FAIL - fingerprint calculation %lu\n", fingerprintValue);
}