From aad7a7bdea5bcff4cea97368e99d1d001f675482 Mon Sep 17 00:00:00 2001 From: SteveRuble Date: Sun, 26 Sep 2021 11:31:15 -0400 Subject: [PATCH] add benchmarks, reduce allocations --- .gitignore | 5 +- Bloomn.sln | 6 + readme.md | 9 +- src/Bloomn/BloomFilter.cs | 7 + src/Bloomn/BloomFilterBuilder.cs | 63 ++-- src/Bloomn/BloomFilterDimensions.cs | 81 ----- src/Bloomn/BloomFilterDimensionsBuilder.cs | 97 +++++ .../{Callbacks.cs => BloomFilterEvents.cs} | 24 +- src/Bloomn/BloomFilterOptions.cs | 12 +- src/Bloomn/BloomFilterState.cs | 22 +- src/Bloomn/DefaultHasherFactoryV1.cs | 342 ++++++++++++++++++ src/Bloomn/FixedSizeBloomFilter.cs | 45 +-- src/Bloomn/IBloomFilterBuilder.cs | 13 + src/Bloomn/IBloomFilterOptionsBuilder.cs | 12 + src/Bloomn/MathHelpers.cs | 36 ++ src/Bloomn/Murmur3HasherFactory.cs | 118 ------ src/Bloomn/OptionsValidator.cs | 2 +- src/Bloomn/PreparedAdd.cs | 2 +- src/Bloomn/ScalingBloomFilter.cs | 2 +- .../Bloomn.Benchmarks.csproj | 16 + .../DefaultHasherBenchmarks.cs | 38 ++ tests/Bloomn.Benchmarks/Program.cs | 13 + .../Bloomn.Benchmarks/SingleItemBenchmarks.cs | 112 ++++++ ...gerTests.cs => BloomFilterBuilderTests.cs} | 2 +- tests/Bloomn.Tests/BloomFilterTests.cs | 51 +-- tests/Bloomn.Tests/Examples/ExampleProgram.cs | 19 + .../Examples/NewBuilderExample.cs | 66 ++++ .../Examples/ServiceProviderExample.cs | 70 ++++ .../Extensions/DependencyInjectionTests.cs | 6 +- tests/Bloomn.Tests/PerformanceExperiments.cs | 2 + tests/Bloomn.Tests/ScalingFilterTests.cs | 2 +- 31 files changed, 983 insertions(+), 312 deletions(-) create mode 100644 src/Bloomn/BloomFilter.cs create mode 100644 src/Bloomn/BloomFilterDimensionsBuilder.cs rename src/Bloomn/{Callbacks.cs => BloomFilterEvents.cs} (73%) create mode 100644 src/Bloomn/DefaultHasherFactoryV1.cs create mode 100644 src/Bloomn/IBloomFilterBuilder.cs create mode 100644 src/Bloomn/IBloomFilterOptionsBuilder.cs create mode 100644 src/Bloomn/MathHelpers.cs delete mode 100644 src/Bloomn/Murmur3HasherFactory.cs create mode 100644 tests/Bloomn.Benchmarks/Bloomn.Benchmarks.csproj create mode 100644 tests/Bloomn.Benchmarks/DefaultHasherBenchmarks.cs create mode 100644 tests/Bloomn.Benchmarks/Program.cs create mode 100644 tests/Bloomn.Benchmarks/SingleItemBenchmarks.cs rename tests/Bloomn.Tests/{BloomFilterManagerTests.cs => BloomFilterBuilderTests.cs} (92%) create mode 100644 tests/Bloomn.Tests/Examples/ExampleProgram.cs create mode 100644 tests/Bloomn.Tests/Examples/NewBuilderExample.cs create mode 100644 tests/Bloomn.Tests/Examples/ServiceProviderExample.cs diff --git a/.gitignore b/.gitignore index 769e20b..375244d 100644 --- a/.gitignore +++ b/.gitignore @@ -42,4 +42,7 @@ coverage.json .vscode/ # Snapshotter mismatches -**/__mismatch__/ \ No newline at end of file +**/__mismatch__/ + +# Benchmark results +tests/Bloomn.Benchmarks/BenchmarkDotNet.Artifacts diff --git a/Bloomn.sln b/Bloomn.sln index 4079333..2bc48d8 100644 --- a/Bloomn.sln +++ b/Bloomn.sln @@ -4,6 +4,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Bloomn", "src\Bloomn\Bloomn EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Bloomn.Tests", "tests\Bloomn.Tests\Bloomn.Tests.csproj", "{171BBD20-CB99-41CD-9F23-EA7BF99241E4}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Bloomn.Benchmarks", "tests\Bloomn.Benchmarks\Bloomn.Benchmarks.csproj", "{D00EC957-A24D-4061-907C-F7E9E24D489C}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -18,5 +20,9 @@ Global {171BBD20-CB99-41CD-9F23-EA7BF99241E4}.Debug|Any CPU.Build.0 = Debug|Any CPU {171BBD20-CB99-41CD-9F23-EA7BF99241E4}.Release|Any CPU.ActiveCfg = Release|Any CPU {171BBD20-CB99-41CD-9F23-EA7BF99241E4}.Release|Any CPU.Build.0 = Release|Any CPU + {D00EC957-A24D-4061-907C-F7E9E24D489C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D00EC957-A24D-4061-907C-F7E9E24D489C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D00EC957-A24D-4061-907C-F7E9E24D489C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D00EC957-A24D-4061-907C-F7E9E24D489C}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal diff --git a/readme.md b/readme.md index 6e6370d..0d935e1 100644 --- a/readme.md +++ b/readme.md @@ -2,9 +2,14 @@ Bloomn provides a modern, high performance bloom filter implementation. -### Features +## Features - Provides a very low allocation API for demanding scenarios - Provides a simpler API for simpler scenarios - Bloom filter state can be exported, serialized, and imported -- Integrates with standard .NET dependency injection framework. +- Integrates with standard .NET dependency injection framework + + +## Examples + + diff --git a/src/Bloomn/BloomFilter.cs b/src/Bloomn/BloomFilter.cs new file mode 100644 index 0000000..c7db0c8 --- /dev/null +++ b/src/Bloomn/BloomFilter.cs @@ -0,0 +1,7 @@ +namespace Bloomn +{ + public static class BloomFilter + { + public static IBloomFilterBuilder Builder() => new BloomFilterBuilder(new BloomFilterOptions()); + } +} \ No newline at end of file diff --git a/src/Bloomn/BloomFilterBuilder.cs b/src/Bloomn/BloomFilterBuilder.cs index f76c983..7a75c46 100644 --- a/src/Bloomn/BloomFilterBuilder.cs +++ b/src/Bloomn/BloomFilterBuilder.cs @@ -5,28 +5,14 @@ namespace Bloomn { - public interface IBloomFilterOptionsBuilder - { - IBloomFilterBuilder WithCapacityAndErrorRate(int capacity, double errorRate); - IBloomFilterBuilder WithDimensions(BloomFilterDimensions dimensions); - IBloomFilterBuilder WithScaling(double capacityScaling = 2, double errorRateScaling = 0.8); - IBloomFilterBuilder WithHasher(IKeyHasherFactory hasherFactory); - } - - public interface IBloomFilterBuilder : IBloomFilterOptionsBuilder - { - IBloomFilterBuilder WithOptions(BloomFilterOptions options); - IBloomFilterBuilder WithProfile(string profile); - IBloomFilterBuilder WithState(BloomFilterState state); - IBloomFilter Build(); - } - - internal class BloomFilterBuilder : IBloomFilterBuilder + internal class BloomFilterBuilder : IBloomFilterBuilder, IBloomFilterOptionsBuilder { + private bool _validateStateAgainstOptions; private readonly IOptionsSnapshot>? _optionsSnapshot; public BloomFilterBuilder(IOptionsSnapshot> options) { + _validateStateAgainstOptions = true; _optionsSnapshot = options; Options = options.Value; } @@ -40,13 +26,15 @@ public BloomFilterBuilder(BloomFilterOptions options) internal BloomFilterState? State { get; set; } - public IBloomFilterBuilder WithCapacityAndErrorRate(int capacity, double errorRate) + IBloomFilterOptionsBuilder IBloomFilterOptionsBuilder.WithCapacityAndFalsePositiveProbability(int capacity, double falsePositiveProbability) { - return WithDimensions(BloomFilterDimensions.ForCapacityAndErrorRate(capacity, errorRate)); + _validateStateAgainstOptions = true; + return WithDimensions(BloomFilterDimensions.ForCapacityAndErrorRate(capacity, falsePositiveProbability)); } - public IBloomFilterBuilder WithDimensions(BloomFilterDimensions dimensions) + public IBloomFilterOptionsBuilder WithDimensions(BloomFilterDimensions dimensions) { + _validateStateAgainstOptions = true; Options.Dimensions = new BloomFilterDimensionsBuilder { FalsePositiveProbability = dimensions.FalsePositiveProbability, @@ -58,8 +46,9 @@ public IBloomFilterBuilder WithDimensions(BloomFilterDimensions dimensions return this; } - public IBloomFilterBuilder WithScaling(double capacityScaling = 2, double errorRateScaling = 0.8) + public IBloomFilterOptionsBuilder WithScaling(double capacityScaling = 2, double errorRateScaling = 0.8) { + _validateStateAgainstOptions = true; Options.Scaling = new BloomFilterScaling { MaxCapacityBehavior = MaxCapacityBehavior.Scale, @@ -69,23 +58,47 @@ public IBloomFilterBuilder WithScaling(double capacityScaling = 2, double return this; } - public IBloomFilterBuilder WithHasher(IKeyHasherFactory hasherFactory) + public IBloomFilterOptionsBuilder WithHasher(IKeyHasherFactory hasherFactory) { + _validateStateAgainstOptions = true; Options.SetHasher(hasherFactory); return this; } public IBloomFilterBuilder WithOptions(BloomFilterOptions options) { + _validateStateAgainstOptions = true; Options = options; return this; } + public IBloomFilterBuilder WithOptions(Action> configure) + { + _validateStateAgainstOptions = true; + configure(this); + return this; + } + + public IBloomFilterOptionsBuilder WithCallbacks(BloomFilterEvents events) + { + _validateStateAgainstOptions = true; + Options.Events = events; + return this; + } + + public IBloomFilterOptionsBuilder IgnoreCapacityLimits() + { + _validateStateAgainstOptions = true; + Options.Scaling = Options.Scaling with {MaxCapacityBehavior = MaxCapacityBehavior.Ignore}; + return this; + } + public IBloomFilterBuilder WithProfile(string profile) { + _validateStateAgainstOptions = true; if (_optionsSnapshot == null) { - throw new InvalidOperationException("This builder was not "); + throw new BloomFilterException(BloomFilterExceptionCode.InvalidOptions, "This builder was not acquired from a service provider that could inject options."); } Options = _optionsSnapshot.Get(profile); @@ -106,11 +119,11 @@ public IBloomFilter Build() { Dimensions = Options.GetDimensions(), Scaling = Options.Scaling, - HashAlgorithm = Options.GetHasher().Algorithm + HashAlgorithm = Options.GetHasherFactory().Algorithm }; var state = State; - if (state != null) + if (state != null && _validateStateAgainstOptions) { var parametersFromState = state?.Parameters; diff --git a/src/Bloomn/BloomFilterDimensions.cs b/src/Bloomn/BloomFilterDimensions.cs index 827341c..6397c12 100644 --- a/src/Bloomn/BloomFilterDimensions.cs +++ b/src/Bloomn/BloomFilterDimensions.cs @@ -1,6 +1,5 @@ using System; using System.Collections.Generic; -using System.Diagnostics.CodeAnalysis; namespace Bloomn { @@ -135,84 +134,4 @@ public static int k(double p) // ReSharper restore InconsistentNaming } } - - public class BloomFilterDimensionsBuilder - { - public double? FalsePositiveProbability { get; set; } - public int? Capacity { get; set; } - public int? BitCount { get; set; } - public int? HashCount { get; set; } - - [MemberNotNullWhen(true, nameof(FalsePositiveProbability))] - [MemberNotNullWhen(true, nameof(Capacity))] - [MemberNotNullWhen(true, nameof(BitCount))] - [MemberNotNullWhen(true, nameof(HashCount))] - public bool FullySpecified => FalsePositiveProbability != null && Capacity != null && BitCount != null && HashCount != null; - - public bool Buildable => - Capacity.HasValue && FalsePositiveProbability.HasValue - || FalsePositiveProbability.HasValue && BitCount.HasValue - || Capacity.HasValue && FalsePositiveProbability.HasValue - || Capacity.HasValue && BitCount.HasValue - || FalsePositiveProbability.HasValue && BitCount.HasValue; - - public BloomFilterDimensions Build() - { - if (!Buildable) - { - throw new InvalidOperationException("Not enough parameters are set."); - } - - var makingProgress = true; - while (!FullySpecified && makingProgress) - { - makingProgress = false; - if (!HashCount.HasValue && Capacity.HasValue && BitCount.HasValue) - { - HashCount = BloomFilterDimensions.Equations.k(BitCount.Value, Capacity.Value); - makingProgress = true; - continue; - } - - if (!HashCount.HasValue && FalsePositiveProbability.HasValue) - { - HashCount = BloomFilterDimensions.Equations.k(FalsePositiveProbability.Value); - makingProgress = true; - continue; - } - - if (!BitCount.HasValue && Capacity.HasValue && FalsePositiveProbability.HasValue) - { - BitCount = BloomFilterDimensions.Equations.m(Capacity.Value, FalsePositiveProbability.Value); - makingProgress = true; - continue; - } - - if (!Capacity.HasValue && BitCount.HasValue && HashCount.HasValue && FalsePositiveProbability.HasValue) - { - Capacity = BloomFilterDimensions.Equations.n(BitCount.Value, HashCount.Value, FalsePositiveProbability.Value); - makingProgress = true; - continue; - } - - if (!FalsePositiveProbability.HasValue && BitCount.HasValue && Capacity.HasValue && HashCount.HasValue) - { - FalsePositiveProbability = BloomFilterDimensions.Equations.p(BitCount.Value, Capacity.Value, HashCount.Value); - makingProgress = true; - } - } - - if (!FullySpecified) - { - throw new InvalidOperationException($"Could not compute dimensions using provided values: {this}"); - } - - return new BloomFilterDimensions(FalsePositiveProbability.Value, Capacity.Value, BitCount.Value, HashCount.Value); - } - - public override string ToString() - { - return $"{nameof(FalsePositiveProbability)}: {FalsePositiveProbability}, {nameof(Capacity)}: {Capacity}, {nameof(BitCount)}: {BitCount}, {nameof(HashCount)}: {HashCount}"; - } - } } \ No newline at end of file diff --git a/src/Bloomn/BloomFilterDimensionsBuilder.cs b/src/Bloomn/BloomFilterDimensionsBuilder.cs new file mode 100644 index 0000000..07aa8fa --- /dev/null +++ b/src/Bloomn/BloomFilterDimensionsBuilder.cs @@ -0,0 +1,97 @@ +using System; +using System.Diagnostics.CodeAnalysis; + +namespace Bloomn +{ + public class BloomFilterDimensionsBuilder + { + public double? FalsePositiveProbability { get; set; } + public int? Capacity { get; set; } + public int? BitCount { get; set; } + public int? HashCount { get; set; } + + [MemberNotNullWhen(true, nameof(FalsePositiveProbability))] + [MemberNotNullWhen(true, nameof(Capacity))] + [MemberNotNullWhen(true, nameof(BitCount))] + [MemberNotNullWhen(true, nameof(HashCount))] + public bool FullySpecified => FalsePositiveProbability != null && Capacity != null && BitCount != null && HashCount != null; + + public bool Buildable => + Capacity.HasValue && FalsePositiveProbability.HasValue + || FalsePositiveProbability.HasValue && BitCount.HasValue + || Capacity.HasValue && FalsePositiveProbability.HasValue + || Capacity.HasValue && BitCount.HasValue + || FalsePositiveProbability.HasValue && BitCount.HasValue; + + public BloomFilterDimensions Build() + { + // Create a copy to mutate during building + return new BloomFilterDimensionsBuilder() + { + FalsePositiveProbability = this.FalsePositiveProbability, + Capacity = this.Capacity, + BitCount = this.BitCount, + HashCount = this.HashCount + }.ReallyBuild(); + } + + private BloomFilterDimensions ReallyBuild() + { + if (!Buildable) + { + throw new InvalidOperationException("Not enough parameters are set."); + } + + var makingProgress = true; + while (!FullySpecified && makingProgress) + { + makingProgress = false; + if (!HashCount.HasValue && Capacity.HasValue && BitCount.HasValue) + { + HashCount = BloomFilterDimensions.Equations.k(BitCount.Value, Capacity.Value); + makingProgress = true; + continue; + } + + if (!HashCount.HasValue && FalsePositiveProbability.HasValue) + { + HashCount = BloomFilterDimensions.Equations.k(FalsePositiveProbability.Value); + makingProgress = true; + continue; + } + + if (!BitCount.HasValue && Capacity.HasValue && FalsePositiveProbability.HasValue) + { + BitCount = BloomFilterDimensions.Equations.m(Capacity.Value, FalsePositiveProbability.Value); + makingProgress = true; + continue; + } + + if (!Capacity.HasValue && BitCount.HasValue && HashCount.HasValue && FalsePositiveProbability.HasValue) + { + Capacity = BloomFilterDimensions.Equations.n(BitCount.Value, HashCount.Value, FalsePositiveProbability.Value); + makingProgress = true; + continue; + } + + if (!FalsePositiveProbability.HasValue && BitCount.HasValue && Capacity.HasValue && HashCount.HasValue) + { + FalsePositiveProbability = BloomFilterDimensions.Equations.p(BitCount.Value, Capacity.Value, HashCount.Value); + makingProgress = true; + } + } + + if (!FullySpecified) + { + throw new InvalidOperationException($"Could not compute dimensions using provided values: {this}"); + } + + return new BloomFilterDimensions(FalsePositiveProbability.Value, Capacity.Value, BitCount.Value, HashCount.Value); + } + + public override string ToString() + { + return $"{nameof(FalsePositiveProbability)}: {FalsePositiveProbability}, {nameof(Capacity)}: {Capacity}, {nameof(BitCount)}: {BitCount}, {nameof(HashCount)}: {HashCount}"; + } + } +} \ No newline at end of file diff --git a/src/Bloomn/Callbacks.cs b/src/Bloomn/BloomFilterEvents.cs similarity index 73% rename from src/Bloomn/Callbacks.cs rename to src/Bloomn/BloomFilterEvents.cs index d89e2ba..015dbac 100644 --- a/src/Bloomn/Callbacks.cs +++ b/src/Bloomn/BloomFilterEvents.cs @@ -3,7 +3,7 @@ namespace Bloomn { - public class Callbacks + public class BloomFilterEvents { public Action? OnCapacityChanged { get; set; } public Action? OnCountChanged { get; set; } @@ -16,13 +16,13 @@ public class Callbacks internal class StateMetrics : IBloomFilterDimensions { - private readonly Callbacks _callbacks; + private readonly BloomFilterEvents _bloomFilterEvents; private long _count; private int _setBitCount; - public StateMetrics(BloomFilterParameters parameters, Callbacks callbacks) + public StateMetrics(BloomFilterParameters parameters, BloomFilterEvents bloomFilterEvents) { - _callbacks = callbacks; + _bloomFilterEvents = bloomFilterEvents; Id = parameters.Id; FalsePositiveProbability = parameters.Dimensions.FalsePositiveProbability; Capacity = parameters.Dimensions.Capacity; @@ -43,13 +43,13 @@ public StateMetrics(BloomFilterParameters parameters, Callbacks callbacks) public void OnCapacityChanged(int value) { Capacity = value; - _callbacks.OnCapacityChanged?.Invoke(Id, value); + _bloomFilterEvents.OnCapacityChanged?.Invoke(Id, value); } public void IncrementCount(long amount) { var value = Interlocked.Add(ref _count, amount); - _callbacks.OnCountChanged?.Invoke(Id, value); + _bloomFilterEvents.OnCountChanged?.Invoke(Id, value); } public void IncrementSetBitCount(int amount) @@ -60,33 +60,33 @@ public void IncrementSetBitCount(int amount) public void OnCountChanged(long value) { _count = value; - _callbacks.OnCountChanged?.Invoke(Id, _count); + _bloomFilterEvents.OnCountChanged?.Invoke(Id, _count); } public void OnBitCountChanged(int value) { BitCount = value; - _callbacks.OnBitCountChanged?.Invoke(Id, value); + _bloomFilterEvents.OnBitCountChanged?.Invoke(Id, value); } public void OnScaled(BloomFilterParameters parameters) { - _callbacks.OnScaled?.Invoke(Id, parameters); + _bloomFilterEvents.OnScaled?.Invoke(Id, parameters); } public void OnHit() { - _callbacks.OnHit?.Invoke(Id); + _bloomFilterEvents.OnHit?.Invoke(Id); } public void OnMiss() { - _callbacks.OnMiss?.Invoke(Id); + _bloomFilterEvents.OnMiss?.Invoke(Id); } public void OnFalsePositive() { - _callbacks.OnFalsePositive?.Invoke(Id); + _bloomFilterEvents.OnFalsePositive?.Invoke(Id); } } } \ No newline at end of file diff --git a/src/Bloomn/BloomFilterOptions.cs b/src/Bloomn/BloomFilterOptions.cs index e9befc2..f5f1e46 100644 --- a/src/Bloomn/BloomFilterOptions.cs +++ b/src/Bloomn/BloomFilterOptions.cs @@ -1,10 +1,12 @@ using System; +using System.Buffers; namespace Bloomn { public class BloomFilterOptions { - public static string DefaultHasherType = typeof(Murmur3HasherFactory).AssemblyQualifiedName!; + public static string DefaultHasherType = typeof(DefaultHasherFactoryV1).AssemblyQualifiedName!; + private IKeyHasherFactory? _keyHasher; public static BloomFilterOptions DefaultOptions { get; set; } = new(); @@ -15,7 +17,7 @@ public class BloomFilterOptions public BloomFilterScaling Scaling { get; set; } = new(); - public Callbacks Callbacks { get; set; } = new(); + public BloomFilterEvents Events { get; set; } = new(); public bool DiscardInconsistentState { get; set; } @@ -30,7 +32,7 @@ public BloomFilterDimensions GetDimensions() return Dimensions?.Build() ?? new BloomFilterDimensions(); } - public IKeyHasherFactory GetHasher() + public IKeyHasherFactory GetHasherFactory() { if (_keyHasher == null) { @@ -42,8 +44,8 @@ public IKeyHasherFactory GetHasher() { if (HasherType == DefaultHasherType) { - throw new BloomFilterException(BloomFilterExceptionCode.InvalidOptions, "The default hasher can handle keys of type string and byte[]. If you " + - $"need to support keys of type {typeof(T)} you will need to implement {typeof(IKeyHasherFactory)} " + + throw new BloomFilterException(BloomFilterExceptionCode.InvalidOptions, "The default hasher can handle keys of type string, byte[], int, long, float, double, and decimal. " + + $"If you need to support keys of type {typeof(T)} you will need to implement {typeof(IKeyHasherFactory)} " + "and set HasherType to the assembly-qualified name, or pass an instance to the " + "SetHasher method of the options builder."); } diff --git a/src/Bloomn/BloomFilterState.cs b/src/Bloomn/BloomFilterState.cs index 7a1e343..2124a02 100644 --- a/src/Bloomn/BloomFilterState.cs +++ b/src/Bloomn/BloomFilterState.cs @@ -1,5 +1,7 @@ using System; using System.Collections.Generic; +using System.IO; +using System.IO.Compression; using System.Linq; using System.Text.Json; using System.Text.Json.Serialization; @@ -80,8 +82,12 @@ public override BloomFilterState Read(ref Utf8JsonReader reader, Type typeToConv result.BitArrays = new List(); for (; reader.TokenType != JsonTokenType.EndArray; reader.Read()) { - var bits = reader.GetBytesFromBase64(); - result.BitArrays.Add(bits); + var compressedBits = reader.GetBytesFromBase64(); + using var compressedStream = new MemoryStream(compressedBits); + using var gz = new DeflateStream(compressedStream, CompressionMode.Decompress); + using var bitStream = new MemoryStream(); + gz.CopyTo(bitStream); + result.BitArrays.Add(bitStream.ToArray()); } break; @@ -107,9 +113,17 @@ public override void Write(Utf8JsonWriter writer, BloomFilterState value, JsonSe if (value.BitArrays != null && value.BitArrays.Any()) { writer.WritePropertyName("bits"); - writer.WriteStartArray(); - foreach (var bitArray in value.BitArrays) writer.WriteBase64StringValue(bitArray); + writer.WriteStartArray(); + foreach (var bitArray in value.BitArrays) + { + using var m = new MemoryStream(); + using var gz = new DeflateStream(m, CompressionLevel.Optimal); + gz.Write(bitArray); + gz.Flush(); + var compressedBits = m.ToArray(); + writer.WriteBase64StringValue(compressedBits); + } writer.WriteEndArray(); } diff --git a/src/Bloomn/DefaultHasherFactoryV1.cs b/src/Bloomn/DefaultHasherFactoryV1.cs new file mode 100644 index 0000000..fe352da --- /dev/null +++ b/src/Bloomn/DefaultHasherFactoryV1.cs @@ -0,0 +1,342 @@ +using System; +using System.Collections.Generic; +using System.ComponentModel; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Bloomn +{ + public class DefaultHasherFactoryV1 : + IKeyHasherFactory, + IKeyHasherFactory, + IKeyHasherFactory, + IKeyHasherFactory, + IKeyHasherFactory, + IKeyHasherFactory, + IKeyHasherFactory + { + Func IKeyHasherFactory.CreateHasher(int seed, int modulus) => key => CreateDeterministicHash(key, seed, modulus); + + Func IKeyHasherFactory.CreateHasher(int seed, int modulus) => key => CreateDeterministicHash(key, seed, modulus); + + Func IKeyHasherFactory.CreateHasher(int seed, int modulus) => key => CreateDeterministicHash(key, seed, modulus); + + Func IKeyHasherFactory.CreateHasher(int seed, int modulus) => key => CreateDeterministicHash(key, seed, modulus); + + Func IKeyHasherFactory.CreateHasher(int seed, int modulus) => key => CreateDeterministicHash(key, seed, modulus); + + + Func IKeyHasherFactory.CreateHasher(int seed, int modulus) + { + var useed = (uint) seed; + + return key => + { + var h = Compute(key, (uint) key.Length, useed); + return (uint) Math.Abs(h % modulus); + }; + } + + public string Algorithm => "default"; + + private static uint CreateDeterministicHash(T source, int seed, int modulus) + { + return (uint) Math.Abs(DeterministicHashCode.Combine(seed, source) % modulus); + } + + public Func CreateHasher(int seed, int modulus) + { + var useed = (uint) seed; + + return key => + { + var bytes = MemoryMarshal.AsBytes(key.AsSpan()); + var h = Compute(bytes, (uint) bytes.Length, useed); + return (uint) Math.Abs(h % modulus); + }; + } + + public static uint Compute(ReadOnlySpan data, uint length, uint seed) + { + var nblocks = length >> 2; + + var h1 = seed; + + const uint c1 = 0xcc9e2d51; + const uint c2 = 0x1b873593; + + //---------- + // body + + var i = 0; + + for (var j = nblocks; j > 0; --j) + { + var k1l = BitConverter.ToUInt32(data[i..]); + + k1l *= c1; + k1l = Rotl32(k1l, 15); + k1l *= c2; + + h1 ^= k1l; + h1 = Rotl32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + + i += 4; + } + + //---------- + // tail + + nblocks <<= 2; + + uint k1 = 0; + + var tailLength = length & 3; + + if (tailLength == 3) + { + k1 ^= (uint) data[2 + (int) nblocks] << 16; + } + + if (tailLength >= 2) + { + k1 ^= (uint) data[1 + (int) nblocks] << 8; + } + + if (tailLength >= 1) + { + k1 ^= data[(int) nblocks]; + k1 *= c1; + k1 = Rotl32(k1, 15); + k1 *= c2; + h1 ^= k1; + } + + //---------- + // finalization + + h1 ^= length; + + h1 = Fmix32(h1); + + return h1; + } + + private static uint Fmix32(uint h) + { + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; + } + + private static uint Rotl32(uint x, byte r) + { + return (x << r) | (x >> (32 - r)); + } + } + + /// + /// This is copied from the framework HashCode type, + /// with the seed set to a constant value rather than being + /// randomly set at startup. + /// + internal struct DeterministicHashCode + { + private const uint Seed = 1234567; + + private const uint Prime1 = 2654435761U; + private const uint Prime2 = 2246822519U; + private const uint Prime3 = 3266489917U; + private const uint Prime4 = 668265263U; + private const uint Prime5 = 374761393U; + + private uint _v1, _v2, _v3, _v4; + private uint _queue1, _queue2, _queue3; + private uint _length; + + public static int Combine(T1 value1) + { + // Provide a way of diffusing bits from something with a limited + // input hash space. For example, many enums only have a few + // possible hashes, only using the bottom few bits of the code. Some + // collections are built on the assumption that hashes are spread + // over a larger space, so diffusing the bits may help the + // collection work more efficiently. + + uint hc1 = (uint) (value1?.GetHashCode() ?? 0); + + uint hash = MixEmptyState(); + hash += 4; + + hash = QueueRound(hash, hc1); + + hash = MixFinal(hash); + return (int) hash; + } + + public static int Combine(T1 value1, T2 value2) + { + uint hc1 = (uint) (value1?.GetHashCode() ?? 0); + uint hc2 = (uint) (value2?.GetHashCode() ?? 0); + + uint hash = MixEmptyState(); + hash += 8; + + hash = QueueRound(hash, hc1); + hash = QueueRound(hash, hc2); + + hash = MixFinal(hash); + return (int) hash; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void Initialize(out uint v1, out uint v2, out uint v3, out uint v4) + { + v1 = unchecked(Seed + Prime1 + Prime2); + v2 = Seed + Prime2; + v3 = Seed; + v4 = unchecked(Seed - Prime1); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint Round(uint hash, uint input) + { + return BitOperations.RotateLeft(hash + input * Prime2, 13) * Prime1; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint QueueRound(uint hash, uint queuedValue) + { + return BitOperations.RotateLeft(hash + queuedValue * Prime3, 17) * Prime4; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint MixState(uint v1, uint v2, uint v3, uint v4) + { + return BitOperations.RotateLeft(v1, 1) + BitOperations.RotateLeft(v2, 7) + BitOperations.RotateLeft(v3, 12) + BitOperations.RotateLeft(v4, 18); + } + + private static uint MixEmptyState() + { + return Seed + Prime5; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint MixFinal(uint hash) + { + hash ^= hash >> 15; + hash *= Prime2; + hash ^= hash >> 13; + hash *= Prime3; + hash ^= hash >> 16; + return hash; + } + + public void Add(T value) + { + Add(value?.GetHashCode() ?? 0); + } + + public void Add(T value, IEqualityComparer? comparer) + { + Add(value is null ? 0 : (comparer?.GetHashCode(value) ?? value.GetHashCode())); + } + + private void Add(int value) + { + // The original xxHash works as follows: + // 0. Initialize immediately. We can't do this in a struct (no + // default ctor). + // 1. Accumulate blocks of length 16 (4 uints) into 4 accumulators. + // 2. Accumulate remaining blocks of length 4 (1 uint) into the + // hash. + // 3. Accumulate remaining blocks of length 1 into the hash. + + // There is no need for #3 as this type only accepts ints. _queue1, + // _queue2 and _queue3 are basically a buffer so that when + // ToHashCode is called we can execute #2 correctly. + + // We need to initialize the xxHash32 state (_v1 to _v4) lazily (see + // #0) nd the last place that can be done if you look at the + // original code is just before the first block of 16 bytes is mixed + // in. The xxHash32 state is never used for streams containing fewer + // than 16 bytes. + + // To see what's really going on here, have a look at the Combine + // methods. + + uint val = (uint) value; + + // Storing the value of _length locally shaves of quite a few bytes + // in the resulting machine code. + uint previousLength = _length++; + uint position = previousLength % 4; + + // Switch can't be inlined. + + if (position == 0) + _queue1 = val; + else if (position == 1) + _queue2 = val; + else if (position == 2) + _queue3 = val; + else // position == 3 + { + if (previousLength == 3) + Initialize(out _v1, out _v2, out _v3, out _v4); + + _v1 = Round(_v1, _queue1); + _v2 = Round(_v2, _queue2); + _v3 = Round(_v3, _queue3); + _v4 = Round(_v4, val); + } + } + + public int ToHashCode() + { + // Storing the value of _length locally shaves of quite a few bytes + // in the resulting machine code. + uint length = _length; + + // position refers to the *next* queue position in this method, so + // position == 1 means that _queue1 is populated; _queue2 would have + // been populated on the next call to Add. + uint position = length % 4; + + // If the length is less than 4, _v1 to _v4 don't contain anything + // yet. xxHash32 treats this differently. + + uint hash = length < 4 ? MixEmptyState() : MixState(_v1, _v2, _v3, _v4); + + // _length is incremented once per Add(Int32) and is therefore 4 + // times too small (xxHash length is in bytes, not ints). + + hash += length * 4; + + // Mix what remains in the queue + + // Switch can't be inlined right now, so use as few branches as + // possible by manually excluding impossible scenarios (position > 1 + // is always false if position is not > 0). + if (position > 0) + { + hash = QueueRound(hash, _queue1); + if (position > 1) + { + hash = QueueRound(hash, _queue2); + if (position > 2) + hash = QueueRound(hash, _queue3); + } + } + + hash = MixFinal(hash); + return (int) hash; + } + } +} \ No newline at end of file diff --git a/src/Bloomn/FixedSizeBloomFilter.cs b/src/Bloomn/FixedSizeBloomFilter.cs index 8988a9c..4124a09 100644 --- a/src/Bloomn/FixedSizeBloomFilter.cs +++ b/src/Bloomn/FixedSizeBloomFilter.cs @@ -1,6 +1,7 @@ using System; using System.Buffers; using System.Collections; +using System.Collections.Concurrent; using System.Linq; using System.Threading; @@ -15,9 +16,6 @@ public sealed class FixedSizeBloomFilter : IBloomFilter private readonly int _actualBitCount; private readonly BitArray[] _bitArrays; - - private readonly int _bitCount; - private readonly int _bitsPerSlice; private readonly int _hashCount; private readonly Func _hasher1; @@ -27,6 +25,10 @@ public sealed class FixedSizeBloomFilter : IBloomFilter private readonly ReaderWriterLockSlim _lock = new(); private readonly StateMetrics _metrics; + private readonly Func _applyPreparedAdd; + private readonly Action _release; + + internal MaxCapacityBehavior MaxCapacityBehavior; public FixedSizeBloomFilter(BloomFilterOptions options, BloomFilterState state) @@ -40,9 +42,9 @@ public FixedSizeBloomFilter(BloomFilterOptions options, BloomFilterState stat Parameters = state.Parameters; - var hasherFactory = options.GetHasher(); + var hasherFactory = options.GetHasherFactory(); - _metrics = new StateMetrics(Parameters, options.Callbacks); + _metrics = new StateMetrics(Parameters, options.Events); _bitsPerSlice = ComputeBitsPerSlice(state.Parameters.Dimensions.BitCount, state.Parameters.Dimensions.HashCount); @@ -60,13 +62,14 @@ public FixedSizeBloomFilter(BloomFilterOptions options, BloomFilterState stat MaxCapacityBehavior = Parameters.Scaling.MaxCapacityBehavior; - _bitCount = Parameters.Dimensions.BitCount; _hashCount = Parameters.Dimensions.HashCount; _actualBitCount = _bitsPerSlice * _hashCount; _indexPool = ArrayPool.Create(_hashCount, 10); _hasher1 = hasherFactory.CreateHasher(0, _bitsPerSlice); _hasher2 = hasherFactory.CreateHasher(Hash2Seed, _bitsPerSlice); + _applyPreparedAdd = ApplyPreparedAdd; + _release = Release; } @@ -127,31 +130,9 @@ public BloomFilterState GetState() internal static int ComputeBitsPerSlice(int bitCount, int hashCount) { var n = bitCount / hashCount; + // Hash distribution is best when modded by a prime number - if (n % 2 == 0) - { - n++; - } - - // The maximum prime gap at 1,346,294,310,749 is 582 so we should never hit it - var safety = n + 582; - int i, j; - for (i = n; i < safety; i += 2) - { - var limit = Math.Sqrt(i); - for (j = 3; j <= limit; j += 2) - if (i % j == 0) - { - break; - } - - if (j > limit) - { - return i; - } - } - - throw new Exception($"Prime above {n} not found in a reasonable time (your filter must be unreasonably large)."); + return MathHelpers.GetNextPrimeNumber(n); } public bool IsNotPresent(BloomFilterCheckRequest checkRequest) @@ -197,7 +178,7 @@ public bool IsNotPresent(BloomFilterCheckRequest checkRequest) _lock.ExitReadLock(); } } - + private PreparedAdd PrepareAdd(BloomFilterCheckRequest checkRequest) { _lock.EnterReadLock(); @@ -240,7 +221,7 @@ private PreparedAdd PrepareAdd(BloomFilterCheckRequest checkRequest) return PreparedAdd.AlreadyAdded; } - return new PreparedAdd(Id, indexes, ApplyPreparedAdd, Release); + return new PreparedAdd(Id, indexes, _applyPreparedAdd, _release); } finally { diff --git a/src/Bloomn/IBloomFilterBuilder.cs b/src/Bloomn/IBloomFilterBuilder.cs new file mode 100644 index 0000000..722cf35 --- /dev/null +++ b/src/Bloomn/IBloomFilterBuilder.cs @@ -0,0 +1,13 @@ +using System; + +namespace Bloomn +{ + public interface IBloomFilterBuilder + { + IBloomFilterBuilder WithOptions(BloomFilterOptions options); + IBloomFilterBuilder WithOptions(Action> configure); + IBloomFilterBuilder WithProfile(string profile); + IBloomFilterBuilder WithState(BloomFilterState state); + IBloomFilter Build(); + } +} \ No newline at end of file diff --git a/src/Bloomn/IBloomFilterOptionsBuilder.cs b/src/Bloomn/IBloomFilterOptionsBuilder.cs new file mode 100644 index 0000000..c6d3c2f --- /dev/null +++ b/src/Bloomn/IBloomFilterOptionsBuilder.cs @@ -0,0 +1,12 @@ +namespace Bloomn +{ + public interface IBloomFilterOptionsBuilder + { + IBloomFilterOptionsBuilder WithCapacityAndFalsePositiveProbability(int capacity, double falsePositiveProbability); + IBloomFilterOptionsBuilder WithDimensions(BloomFilterDimensions dimensions); + IBloomFilterOptionsBuilder WithScaling(double capacityScaling = 2, double errorRateScaling = 0.8); + IBloomFilterOptionsBuilder WithHasher(IKeyHasherFactory hasherFactory); + IBloomFilterOptionsBuilder WithCallbacks(BloomFilterEvents events); + IBloomFilterOptionsBuilder IgnoreCapacityLimits(); + } +} \ No newline at end of file diff --git a/src/Bloomn/MathHelpers.cs b/src/Bloomn/MathHelpers.cs new file mode 100644 index 0000000..2ace216 --- /dev/null +++ b/src/Bloomn/MathHelpers.cs @@ -0,0 +1,36 @@ +using System; + +namespace Bloomn +{ + internal static class MathHelpers + { + public static int GetNextPrimeNumber(int n) + { + if (n % 2 == 0) + { + n++; + } + + // The maximum prime gap at 1,346,294,310,749 is 582 so we should never hit it + var safety = n + 582; + int i, j; + for (i = n; i < safety; i += 2) + { + var limit = Math.Sqrt(i); + for (j = 3; j <= limit; j += 2) + if (i % j == 0) + { + break; + } + + if (j > limit) + { + return i; + } + } + + throw new BloomFilterException( BloomFilterExceptionCode.InvalidParameters,$"Prime above {n} not found in a reasonable time (your filter must be unreasonably large)."); + + } + } +} \ No newline at end of file diff --git a/src/Bloomn/Murmur3HasherFactory.cs b/src/Bloomn/Murmur3HasherFactory.cs deleted file mode 100644 index 30efa73..0000000 --- a/src/Bloomn/Murmur3HasherFactory.cs +++ /dev/null @@ -1,118 +0,0 @@ -using System; -using System.Runtime.InteropServices; - -namespace Bloomn -{ - public class Murmur3HasherFactory : - IKeyHasherFactory, - IKeyHasherFactory - { - Func IKeyHasherFactory.CreateHasher(int seed, int modulus) - { - var useed = (uint) seed; - - return key => - { - var h = Compute(key, (uint) key.Length, useed); - return (uint) (h % modulus); - }; - } - - public string Algorithm => "murmur3"; - - public Func CreateHasher(int seed, int modulus) - { - var useed = (uint) seed; - - return key => - { - var bytes = MemoryMarshal.AsBytes(key.AsSpan()); - var h = Compute(bytes, (uint) bytes.Length, useed); - return (uint) (h % modulus); - }; - } - - public static uint Compute(ReadOnlySpan data, uint length, uint seed) - { - var nblocks = length >> 2; - - var h1 = seed; - - const uint c1 = 0xcc9e2d51; - const uint c2 = 0x1b873593; - - //---------- - // body - - var i = 0; - - for (var j = nblocks; j > 0; --j) - { - var k1l = BitConverter.ToUInt32(data[i..]); - - k1l *= c1; - k1l = Rotl32(k1l, 15); - k1l *= c2; - - h1 ^= k1l; - h1 = Rotl32(h1, 13); - h1 = h1 * 5 + 0xe6546b64; - - i += 4; - } - - //---------- - // tail - - nblocks <<= 2; - - uint k1 = 0; - - var tailLength = length & 3; - - if (tailLength == 3) - { - k1 ^= (uint) data[2 + (int) nblocks] << 16; - } - - if (tailLength >= 2) - { - k1 ^= (uint) data[1 + (int) nblocks] << 8; - } - - if (tailLength >= 1) - { - k1 ^= data[(int) nblocks]; - k1 *= c1; - k1 = Rotl32(k1, 15); - k1 *= c2; - h1 ^= k1; - } - - //---------- - // finalization - - h1 ^= length; - - h1 = Fmix32(h1); - - return h1; - } - - private static uint Fmix32(uint h) - { - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - - return h; - } - - private static uint Rotl32(uint x, byte r) - { - return (x << r) | (x >> (32 - r)); - } - } -} \ No newline at end of file diff --git a/src/Bloomn/OptionsValidator.cs b/src/Bloomn/OptionsValidator.cs index 6d5c41e..2200b34 100644 --- a/src/Bloomn/OptionsValidator.cs +++ b/src/Bloomn/OptionsValidator.cs @@ -30,7 +30,7 @@ public ValidateOptionsResult Validate(string name, BloomFilterOptions options try { - var _ = options.GetHasher(); + var _ = options.GetHasherFactory(); } catch (Exception ex) { diff --git a/src/Bloomn/PreparedAdd.cs b/src/Bloomn/PreparedAdd.cs index f3b9345..33c17d5 100644 --- a/src/Bloomn/PreparedAdd.cs +++ b/src/Bloomn/PreparedAdd.cs @@ -4,11 +4,11 @@ namespace Bloomn { public readonly struct PreparedAdd : IDisposable { - private readonly Func? _add; public readonly string FilterId; internal readonly int[]? Indexes; internal readonly Action? Release; public readonly bool CanAdd; + private readonly Func? _add; public PreparedAdd(string filterId, int[]? indexes, Func? add, Action? release) { diff --git a/src/Bloomn/ScalingBloomFilter.cs b/src/Bloomn/ScalingBloomFilter.cs index 0c842c0..6f1d817 100644 --- a/src/Bloomn/ScalingBloomFilter.cs +++ b/src/Bloomn/ScalingBloomFilter.cs @@ -45,7 +45,7 @@ public ScalingBloomFilter(BloomFilterOptions options, BloomFilterState sta Parameters = state.Parameters; _bloomFilterScaling = state.Parameters.Scaling; - _metrics = new StateMetrics(Parameters, options.Callbacks); + _metrics = new StateMetrics(Parameters, options.Events); if (state.Parameters.Id == null) { diff --git a/tests/Bloomn.Benchmarks/Bloomn.Benchmarks.csproj b/tests/Bloomn.Benchmarks/Bloomn.Benchmarks.csproj new file mode 100644 index 0000000..95a730f --- /dev/null +++ b/tests/Bloomn.Benchmarks/Bloomn.Benchmarks.csproj @@ -0,0 +1,16 @@ + + + + Exe + net5.0 + + + + + + + + + + + diff --git a/tests/Bloomn.Benchmarks/DefaultHasherBenchmarks.cs b/tests/Bloomn.Benchmarks/DefaultHasherBenchmarks.cs new file mode 100644 index 0000000..638d802 --- /dev/null +++ b/tests/Bloomn.Benchmarks/DefaultHasherBenchmarks.cs @@ -0,0 +1,38 @@ +using System; +using BenchmarkDotNet.Attributes; + +namespace Bloomn.Benchmarks +{ + [SimpleJob(launchCount: 1, warmupCount: 1, targetCount: 5, invocationCount: 100)] + [MaxIterationCount(100)] + [MemoryDiagnoser] + public class DefaultHasherBenchmarks + { + + public const int OperationsPerInvoke = 100; + + public Func IntHasher { get; set; } + + public DefaultHasherBenchmarks() + { + IntHasher = CreateHasher(); + } + + public Func CreateHasher() + { + var factory = (IKeyHasherFactory) new DefaultHasherFactoryV1(); + return factory.CreateHasher(1, 1234567); + } + + [Benchmark(OperationsPerInvoke = OperationsPerInvoke)] + public void IntHasherBenchmark() + { + var hasher = IntHasher; + uint total = 0; + for (int i = 0; i < OperationsPerInvoke; i++) + { + total += hasher(i); + } + } + } +} \ No newline at end of file diff --git a/tests/Bloomn.Benchmarks/Program.cs b/tests/Bloomn.Benchmarks/Program.cs new file mode 100644 index 0000000..84f90ae --- /dev/null +++ b/tests/Bloomn.Benchmarks/Program.cs @@ -0,0 +1,13 @@ +using System; +using BenchmarkDotNet.Running; + +namespace Bloomn.Benchmarks +{ + class Program + { + static void Main(string[] args) + { + BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args); + } + } +} \ No newline at end of file diff --git a/tests/Bloomn.Benchmarks/SingleItemBenchmarks.cs b/tests/Bloomn.Benchmarks/SingleItemBenchmarks.cs new file mode 100644 index 0000000..b1f3bac --- /dev/null +++ b/tests/Bloomn.Benchmarks/SingleItemBenchmarks.cs @@ -0,0 +1,112 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Security.Cryptography.X509Certificates; +using System.Threading; +using BenchmarkDotNet.Attributes; + +namespace Bloomn.Benchmarks +{ + [SimpleJob(launchCount: 1, warmupCount: 1, targetCount: 5, invocationCount: 100)] + [MaxIterationCount(100)] + [MemoryDiagnoser] + public class SingleItemBenchmarks + { + public const int OperationsPerInvoke = 100; + + public int KeyIndex { get; set; } + public readonly IBloomFilter Fixed; + public readonly IBloomFilter Scaling; + + public SingleItemBenchmarks() + { + Scaling = BloomFilter.Builder() + .WithOptions(x => x.WithCapacityAndFalsePositiveProbability(1000, 0.02) + .WithScaling(4, 0.9)) + .Build(); + Fixed = BloomFilter.Builder() + .WithOptions(x => x.WithCapacityAndFalsePositiveProbability(1000, 0.02) + .WithScaling(4, 0.9)) + .Build(); + + Console.WriteLine("instantiated"); + } + + public IBloomFilter Sut + { + get + + { + switch (Behavior) + { + case MaxCapacityBehavior.Scale: return Scaling; + case MaxCapacityBehavior.Throw: return Fixed; + default: throw new ArgumentOutOfRangeException(nameof(Behavior), Behavior.ToString()); + } + } + } + + [Params(MaxCapacityBehavior.Scale, MaxCapacityBehavior.Throw)] + public MaxCapacityBehavior Behavior { get; set; } + + + [Benchmark(OperationsPerInvoke = OperationsPerInvoke)] + public void CheckOnly() + { + var sut = Sut; + for (int i = 0; i < OperationsPerInvoke; i++) + { + KeyIndex++; + var _ = sut.IsNotPresent(KeyIndex); + } + } + + [Benchmark(OperationsPerInvoke = OperationsPerInvoke)] + public void AddAndCheck() + { + var sut = Sut; + for (int i = 0; i < OperationsPerInvoke; i++) + { + KeyIndex++; + + sut.Add(KeyIndex); + + var _ = sut.IsNotPresent(KeyIndex); + } + } + + [Benchmark(OperationsPerInvoke = OperationsPerInvoke)] + public void CheckAndAdd() + { + var sut = Sut; + for (int i = 0; i < OperationsPerInvoke; i++) + { + KeyIndex++; + + if (sut.IsNotPresent(KeyIndex)) + { + sut.Add(KeyIndex); + } + } + } + + [Benchmark(OperationsPerInvoke = OperationsPerInvoke)] + public void PrepareAndCommit() + { + var sut = Sut; + for (int i = 0; i < OperationsPerInvoke; i++) + { + KeyIndex++; + + var prepared = sut.CheckAndPrepareAdd(KeyIndex); + { + if (prepared.IsNotPresent) + { + prepared.Add(); + } + } + prepared.Dispose(); + } + } + } +} \ No newline at end of file diff --git a/tests/Bloomn.Tests/BloomFilterManagerTests.cs b/tests/Bloomn.Tests/BloomFilterBuilderTests.cs similarity index 92% rename from tests/Bloomn.Tests/BloomFilterManagerTests.cs rename to tests/Bloomn.Tests/BloomFilterBuilderTests.cs index c48c29e..343d82c 100644 --- a/tests/Bloomn.Tests/BloomFilterManagerTests.cs +++ b/tests/Bloomn.Tests/BloomFilterBuilderTests.cs @@ -34,7 +34,7 @@ public void BuilderCanCreateDefaultInstance() var state = actual.GetState(); state.Parameters.ShouldNotBeNull(); - state.Parameters.HashAlgorithm.Should().Be(new Murmur3HasherFactory().Algorithm); + state.Parameters.HashAlgorithm.Should().Be(new DefaultHasherFactoryV1().Algorithm); } } } \ No newline at end of file diff --git a/tests/Bloomn.Tests/BloomFilterTests.cs b/tests/Bloomn.Tests/BloomFilterTests.cs index 6082eb6..198dca5 100644 --- a/tests/Bloomn.Tests/BloomFilterTests.cs +++ b/tests/Bloomn.Tests/BloomFilterTests.cs @@ -22,8 +22,8 @@ protected BloomFilterTestsBase(ITestOutputHelper testOutputHelper) Options = new BloomFilterOptions { - Callbacks = AddLoggingCallbacks - ? new Callbacks + Events = AddLoggingCallbacks + ? new BloomFilterEvents { OnCapacityChanged = (x, i) => testOutputHelper.WriteLine($"OnCapacityChanged({x}, {i})"), OnCountChanged = (x, i) => testOutputHelper.WriteLine($"OnCountChanged({x}, {i})"), @@ -33,7 +33,7 @@ protected BloomFilterTestsBase(ITestOutputHelper testOutputHelper) OnMiss = x => testOutputHelper.WriteLine($"OnMiss({x})"), OnFalsePositive = x => testOutputHelper.WriteLine($"OnFalsePositive({x})") } - : new Callbacks + : new BloomFilterEvents { // OnHit = (x) => testOutputHelper.WriteLine($"OnHit({x})"), OnScaled = (x, p) => testOutputHelper.WriteLine($"OnScaled({x}, {p})") @@ -81,15 +81,18 @@ public void CanPrepareAndCommitSingleItem() { var sut = Create(Options, new BloomFilterParameters("test").WithCapacityAndErrorRate(100, 0.1)); - var key = "test string"; - using (var entry = sut.CheckAndPrepareAdd(key)) + for (int i = 0; i < 10; i++) { - entry.IsNotPresent.Should().BeTrue("the key has not been added"); - sut.IsNotPresent(key).Should().BeTrue("the key has not been added"); + var key = Guid.NewGuid().ToString(); + using (var entry = sut.CheckAndPrepareAdd(key)) + { + entry.IsNotPresent.Should().BeTrue("the key has not been added"); + sut.IsNotPresent(key).Should().BeTrue("the key has not been added"); - entry.Add().Should().BeTrue("the key had not been added previously"); + entry.Add().Should().BeTrue("the key had not been added previously"); - sut.IsNotPresent(key).Should().BeFalse("the key has been added"); + sut.IsNotPresent(key).Should().BeFalse("the key has been added"); + } } } @@ -299,21 +302,21 @@ int GetFalsePositiveCount() var falsePositiveCount = 0; var count = 0; keyFactory(sampleSize).AsParallel() - .WithDegreeOfParallelism(threads) - .ForAll(s => - { - var c = Interlocked.Increment(ref count); - var f = falsePositiveCount; - if (!sut.IsNotPresent(s)) - { - f = Interlocked.Increment(ref falsePositiveCount); - var runningFpr = f / (double) c; - // _testOutputHelper.WriteLine($"False positive rate @ {c}: {runningFpr}"); - } - - - sut.Add(s); - }); + .WithDegreeOfParallelism(threads) + .ForAll(s => + { + var c = Interlocked.Increment(ref count); + var f = falsePositiveCount; + if (!sut.IsNotPresent(s)) + { + f = Interlocked.Increment(ref falsePositiveCount); + var runningFpr = f / (double) c; + // _testOutputHelper.WriteLine($"False positive rate @ {c}: {runningFpr}"); + } + + + sut.Add(s); + }); return falsePositiveCount; } } diff --git a/tests/Bloomn.Tests/Examples/ExampleProgram.cs b/tests/Bloomn.Tests/Examples/ExampleProgram.cs new file mode 100644 index 0000000..0a2a569 --- /dev/null +++ b/tests/Bloomn.Tests/Examples/ExampleProgram.cs @@ -0,0 +1,19 @@ +using Xunit.Abstractions; + +namespace Bloomn.Tests.Examples +{ + public class ExampleProgram + { + private readonly ITestOutputHelper _testOutputHelper; + + public ExampleProgram(ITestOutputHelper testOutputHelper) + { + _testOutputHelper = testOutputHelper; + } + + public void WriteLine(string line) + { + _testOutputHelper.WriteLine(line); + } + } +} \ No newline at end of file diff --git a/tests/Bloomn.Tests/Examples/NewBuilderExample.cs b/tests/Bloomn.Tests/Examples/NewBuilderExample.cs new file mode 100644 index 0000000..6100cdd --- /dev/null +++ b/tests/Bloomn.Tests/Examples/NewBuilderExample.cs @@ -0,0 +1,66 @@ +using System.IO; +using Bloomn.Extensions; +using Microsoft.Extensions.DependencyInjection; +using Xunit; +using Xunit.Abstractions; + +namespace Bloomn.Tests.Examples +{ + public class NewBuilderExample : ExampleProgram + { + public NewBuilderExample(ITestOutputHelper testOutputHelper) : base(testOutputHelper) + { + } + + + [Fact] + public void Execute() + { + Directory.CreateDirectory("./Data"); + const string filePath = "./Data/filter.json"; + + IBloomFilter filter; + + if (File.Exists(filePath)) + { + var serializedState = File.ReadAllText(filePath); + var state = BloomFilterState.Deserialize(serializedState); + // You can the filter directly from the state, without configuration. + // The state will be used to configure it. + filter = BloomFilter.Builder() + .WithState(state) + .Build(); + } + else + { + filter = BloomFilter.Builder() + .WithOptions(x => x.WithCapacityAndFalsePositiveProbability(1000, 0.02) + .WithScaling(4, 0.9)) + .Build(); + filter.Add(2); + filter.Add(3); + + for (var i = 3; i < 1000; i = MathHelpers.GetNextPrimeNumber(i + 1)) + { + filter.Add(i); + } + + var state = filter.GetState(); + var serializedState = state.Serialize(); + File.WriteAllText(filePath, serializedState); + } + + for (var i = 0; i < 1000; i++) + { + if (filter.IsNotPresent(i)) + { + WriteLine($"Not prime: {i}"); + } + else + { + WriteLine($"Probably prime: {i}"); + } + } + } + } +} \ No newline at end of file diff --git a/tests/Bloomn.Tests/Examples/ServiceProviderExample.cs b/tests/Bloomn.Tests/Examples/ServiceProviderExample.cs new file mode 100644 index 0000000..3a4216a --- /dev/null +++ b/tests/Bloomn.Tests/Examples/ServiceProviderExample.cs @@ -0,0 +1,70 @@ +using System.IO; +using Bloomn.Extensions; +using Microsoft.Extensions.DependencyInjection; +using Xunit; +using Xunit.Abstractions; + +namespace Bloomn.Tests.Examples +{ + public class ServiceProviderExample : ExampleProgram + { + public ServiceProviderExample(ITestOutputHelper testOutputHelper) : base(testOutputHelper) + { + } + + + [Fact] + public void Execute() + { + Directory.CreateDirectory("./Data"); + const string filePath = "./Data/filter.json"; + + var serviceProvider = new ServiceCollection() + .AddBloomFilters(c => + { + c.WithDefaultProfile(d => + d.WithCapacityAndFalsePositiveProbability(1000, 0.02) + .WithScaling(4, 0.9)); + }) + .BuildServiceProvider(); + + IBloomFilter filter; + + var builder = serviceProvider.GetRequiredService>(); + + if (File.Exists(filePath)) + { + var serializedState = File.ReadAllText(filePath); + var state = BloomFilterState.Deserialize(serializedState); + filter = builder.WithState(state).Build(); + } + else + { + filter = builder.Build(); + filter.Add(2); + filter.Add(3); + + for (var i = 3; i < 1000; i = MathHelpers.GetNextPrimeNumber(i + 1)) + { + filter.Add(i); + } + + var state = filter.GetState(); + var serializedState = state.Serialize(); + File.WriteAllText(filePath, serializedState); + } + + for (var i = 0; i < 1000; i++) + { + if (filter.IsNotPresent(i)) + { + WriteLine($"Not prime: {i}"); + } + else + { + WriteLine($"Probably prime: {i}"); + } + } + } + } +} \ No newline at end of file diff --git a/tests/Bloomn.Tests/Extensions/DependencyInjectionTests.cs b/tests/Bloomn.Tests/Extensions/DependencyInjectionTests.cs index ad1362d..d489ad9 100644 --- a/tests/Bloomn.Tests/Extensions/DependencyInjectionTests.cs +++ b/tests/Bloomn.Tests/Extensions/DependencyInjectionTests.cs @@ -30,7 +30,7 @@ public void CanResolveDefaultInstance() public void CanConfigureAndResolveDefaultBuilder() { var svc = new ServiceCollection() - .AddBloomFilters(x => x.WithDefaultProfile(b => b.WithCapacityAndErrorRate(1234, 0.0123))) + .AddBloomFilters(x => x.WithDefaultProfile(b => b.WithCapacityAndFalsePositiveProbability(1234, 0.0123))) .BuildServiceProvider(); var builder = svc.GetRequiredService>(); @@ -103,8 +103,8 @@ public void WhenProvidedConfigurationIsInvalidThenAnErrorIsThrown() public void CanConfigureAndResolveDefaultBuilderUsingCustomProfile() { var svc = new ServiceCollection() - .AddBloomFilters(x => x.WithDefaultProfile(b => b.WithCapacityAndErrorRate(1234, 0.0123)) - .WithProfile("custom", b => b.WithCapacityAndErrorRate(4321, 0.0321))) + .AddBloomFilters(x => x.WithDefaultProfile(b => b.WithCapacityAndFalsePositiveProbability(1234, 0.0123)) + .WithProfile("custom", b => b.WithCapacityAndFalsePositiveProbability(4321, 0.0321))) .BuildServiceProvider(); var builder = svc.GetRequiredService>(); diff --git a/tests/Bloomn.Tests/PerformanceExperiments.cs b/tests/Bloomn.Tests/PerformanceExperiments.cs index 14bf86f..5f2f814 100644 --- a/tests/Bloomn.Tests/PerformanceExperiments.cs +++ b/tests/Bloomn.Tests/PerformanceExperiments.cs @@ -5,6 +5,8 @@ namespace Bloomn.Tests { + public class TestWithOutput {} + public class PerformanceExperiments { private readonly ITestOutputHelper _testOutputHelper; diff --git a/tests/Bloomn.Tests/ScalingFilterTests.cs b/tests/Bloomn.Tests/ScalingFilterTests.cs index 35731d0..d72b7c4 100644 --- a/tests/Bloomn.Tests/ScalingFilterTests.cs +++ b/tests/Bloomn.Tests/ScalingFilterTests.cs @@ -93,7 +93,7 @@ public void CanExportAndImportState() var first = new ScalingBloomFilter(new BloomFilterOptions { - Callbacks = new Callbacks + Events = new BloomFilterEvents { OnScaled = (id, p) => TestOutputHelper.WriteLine($"{id} {p}") }