Skip to content

Commit

Permalink
added guid hasher
Browse files Browse the repository at this point in the history
  • Loading branch information
SteveRuble committed Sep 27, 2021
1 parent ec818fe commit f13d406
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 18 deletions.
12 changes: 9 additions & 3 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Bloomn provides a modern, high performance bloom filter implementation.
- Supports scalable bloom filters for scenarios where set size is unknown.
- Thread safe.
- High test coverage.
- Default key hasher handles string, byte[], and numeric types.
- Default key hasher handles `string`, `byte[]`, `Guid`, and numeric types.


## Examples
Expand All @@ -19,6 +19,11 @@ See [tests/Bloomn.Tests/Examples](tests/Bloomn.Tests/Examples)

### Using Service Provider
```c#
using Bloomn;
using Bloomn.Extensions;

// ...
Directory.CreateDirectory("./Data");
const string filePath = "./Data/filter.json";

Expand Down Expand Up @@ -86,8 +91,9 @@ var filter = BloomFilter.Builder<int>()

For maximum flexibility and performance you can check whether a key is present
and defer the add until later (so you only need to calculate the hash once).
This is at least 15% faster than checking and adding as separate operations,
although it will be more if your key is large.
This is a tiny bit faster than checking and adding as separate operations.
I designed this API before I figured out some performance improvements that
made it barely useful.

It's important to dispose of the check result to avoid memory leaks.

Expand Down
15 changes: 14 additions & 1 deletion src/Bloomn/DefaultHasherFactoryV1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ public class DefaultHasherFactoryV1 :
IKeyHasherFactory<long>,
IKeyHasherFactory<float>,
IKeyHasherFactory<double>,
IKeyHasherFactory<decimal>
IKeyHasherFactory<decimal>,
IKeyHasherFactory<Guid>
{
Func<int, uint> IKeyHasherFactory<int>.CreateHasher(int seed, int modulus) => key => CreateDeterministicHash(key, seed, modulus);

Expand All @@ -26,6 +27,18 @@ public class DefaultHasherFactoryV1 :

Func<double, uint> IKeyHasherFactory<double>.CreateHasher(int seed, int modulus) => key => CreateDeterministicHash(key, seed, modulus);

Func<Guid, uint> IKeyHasherFactory<Guid>.CreateHasher(int seed, int modulus)
{
var useed = (uint) seed;

return key =>
{
Span<byte> g = stackalloc byte[16];
key.TryWriteBytes(g);
var h = Compute(g, (uint) g.Length, useed);
return (uint) Math.Abs(h % modulus);
};
}

Func<byte[], uint> IKeyHasherFactory<byte[]>.CreateHasher(int seed, int modulus)
{
Expand Down
70 changes: 56 additions & 14 deletions tests/Bloomn.Benchmarks/DefaultHasherBenchmarks.cs
Original file line number Diff line number Diff line change
@@ -1,37 +1,79 @@
using System;
using System.Collections.Generic;
using System.Linq;
using BenchmarkDotNet.Attributes;

namespace Bloomn.Benchmarks
{
[SimpleJob(launchCount: 1, warmupCount: 1, targetCount: 5, invocationCount: 100)]
[MaxIterationCount(100)]
[SimpleJob(launchCount: 1, warmupCount: 1, targetCount: 5, invocationCount: 1)]
[MemoryDiagnoser]
public class DefaultHasherBenchmarks
{

public const int OperationsPerInvoke = 100;
public const int OperationsPerInvoke = 10000;

public readonly List<int> Ints = Enumerable.Range(0, OperationsPerInvoke).ToList();

[Benchmark(OperationsPerInvoke = OperationsPerInvoke)]
public void IntHasherBenchmark()
{
var b = new HashBenchmark<int>();
b.Execute(Ints);
}

public readonly List<double> Doubles = Enumerable.Range(0, OperationsPerInvoke).Select(x => x * (double)123.456).ToList();

public Func<int, uint> IntHasher { get; set; }
[Benchmark(OperationsPerInvoke = OperationsPerInvoke)]
public void DoublesHasherBenchmark()
{
var b = new HashBenchmark<double>();
b.Execute(Doubles);
}

public DefaultHasherBenchmarks()
public readonly List<Guid> Guids = Enumerable.Range(0, OperationsPerInvoke).Select(x => Guid.NewGuid()).ToList();

[Benchmark(OperationsPerInvoke = OperationsPerInvoke)]
public void GuidHasherBenchmark()
{
IntHasher = CreateHasher<int>();
var b = new HashBenchmark<Guid>();
b.Execute(Guids);
}

public Func<T, uint> CreateHasher<T>()
public readonly List<string> Strings = Enumerable.Range(0, OperationsPerInvoke).Select(x => Guid.NewGuid().ToString()).ToList();

[Benchmark(OperationsPerInvoke = OperationsPerInvoke)]
public void StringHasherBenchmark()
{
var factory = (IKeyHasherFactory<T>) new DefaultHasherFactoryV1();
return factory.CreateHasher(1, 1234567);
var b = new HashBenchmark<string>();
b.Execute(Strings);
}

private static readonly Random Random = new Random();
public readonly List<byte[]> Bytes = Enumerable.Range(0, OperationsPerInvoke).Select(x =>
{
var b = new byte[64];
Random.NextBytes(b);
return b;
}).ToList();

[Benchmark(OperationsPerInvoke = OperationsPerInvoke)]
public void IntHasherBenchmark()
public void BytesHasherBenchmark()
{
var b = new HashBenchmark<byte[]>();
b.Execute(Bytes);
}

public class HashBenchmark<T>
{
var hasher = IntHasher;
uint total = 0;
for (int i = 0; i < OperationsPerInvoke; i++)
public void Execute(List<T> keys)
{
total += hasher(i);
var factory = (IKeyHasherFactory<T>) new DefaultHasherFactoryV1();
var hasher = factory.CreateHasher(0, 12345);
uint total = 0;
foreach (var key in keys)
{
total += hasher(key);
}
}
}
}
Expand Down

0 comments on commit f13d406

Please sign in to comment.