forked from cockroachdb/swiss
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmap.go
1717 lines (1556 loc) · 60.8 KB
/
map.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2024 The Cockroach Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package swiss is a Go implementation of Swiss Tables as described in
// https://abseil.io/about/design/swisstables. See also:
// https://faultlore.com/blah/hashbrown-tldr/.
//
// Google's C++ implementation:
//
// https://github.com/abseil/abseil-cpp/blob/master/absl/container/internal/raw_hash_set.h
//
// # Swiss Tables
//
// Swiss tables are hash tables that map keys to values, similar to Go's
// builtin map type. Swiss tables use open-addressing rather than chaining to
// handle collisions. If you're not familiar with open-addressing see
// https://en.wikipedia.org/wiki/Open_addressing. A hybrid between linear and
// quadratic probing is used - linear probing within groups of small fixed
// size and quadratic probing at the group level. The key design choice of
// Swiss tables is the usage of a separate metadata array that stores 1 byte
// per slot in the table. 7-bits of this "control byte" are taken from
// hash(key) and the remaining bit is used to indicate whether the slot is
// empty, full, or deleted. The metadata array allows quick probes. The Google
// implementation of Swiss tables uses SIMD on x86 CPUs in order to quickly
// check 16 slots at a time for a match. Neon on arm64 CPUs is apparently too
// high latency, but the generic version is still able to compare 8 bytes at
// time through bit tricks (SWAR, SIMD Within A Register).
//
// Google's Swiss Tables layout is N-1 slots where N is a power of 2 and
// N+groupSize control bytes. The [N:N+groupSize] control bytes mirror the
// first groupSize control bytes so that probe operations at the end of the
// control bytes array do not have to perform additional checks. The
// separation of control bytes and slots implies 2 cache misses for a large
// map (larger than L2 cache size) or a cold map. The swiss.Map implementation
// differs from Google's layout: it groups together 8 control bytes and 8
// slots which often results in 1 cache miss for a large or cold map rather
// than separate accesses for the controls and slots. The mirrored control
// bytes are no longer needed and and groups no longer start at arbitrary slot
// index, but only at those that are multiples of 8.
//
// Probing is done by taking the top 57 bits of hash(key)%N as the index into
// the groups slice and then performing a check of the groupSize control bytes
// within the group. Probing walks through groups in the table using quadratic
// probing until it finds a group that has at least one empty slot. See the
// comments on probeSeq for more details on the order in which groups are
// probed and the guarantee that every group is examined which means that in
// the worst case probing will end when an empty slot is encountered (the map
// can never be 100% full).
//
// Deletion is performed using tombstones (ctrlDeleted) with an optimization
// to mark a slot as empty if we can prove that doing so would not violate the
// probing behavior that a group of full slots causes probing to continue. It
// is invalid to take a group of full slots and mark one as empty as doing so
// would cause subsequent lookups to terminate at that group rather than
// continue to probe. We prove a slot was never part of a full group by
// looking for whether any of the groupSize-1 neighbors to the left and right
// of the deleting slot are empty which indicates that the slot was never part
// of a full group.
//
// # Extendible Hashing
//
// The Swiss table design has a significant caveat: resizing of the table is
// done all at once rather than incrementally. This can cause long-tail
// latency blips in some use cases. To address this caveat, extendible hashing
// (https://en.wikipedia.org/wiki/Extendible_hashing) is applied on top of the
// Swiss table foundation. In extendible hashing, there is a top-level
// directory containing entries pointing to buckets. In swiss.Map each bucket
// is a Swiss table as described above.
//
// The high bits of hash(key) are used to index into the bucket directory
// which is effectively a trie. The number of bits used is the globalDepth,
// resulting in 2^globalDepth directory entries. Adjacent entries in the
// directory are allowed to point to the same bucket which enables resizing to
// be done incrementally, one bucket at a time. Each bucket has a localDepth
// which is less than or equal to the globalDepth. If the localDepth for a
// bucket equals the globalDepth then only a single directory entry points to
// the bucket. Otherwise, more than one directory entry points to the bucket.
//
// The diagram below shows one possible scenario for the directory and
// buckets. With a globalDepth of 2 the directory contains 4 entries. The
// first 2 entries point to the same bucket which has a localDepth of 1, while
// the last 2 entries point to different buckets.
//
// dir(globalDepth=2)
// +----+
// | 00 | --\
// +----+ +--> bucket[localDepth=1]
// | 01 | --/
// +----+
// | 10 | ------> bucket[localDepth=2]
// +----+
// | 11 | ------> bucket[localDepth=2]
// +----+
//
// The index into the directory is "hash(key) >> (64 - globalDepth)".
//
// When a bucket gets too large (specified by a configurable threshold) it is
// split. When a bucket is split its localDepth is incremented. If its
// localDepth is less than or equal to its globalDepth then the newly split
// bucket can be installed in the directory. If the bucket's localDepth is
// greater than the globalDepth then the globalDepth is incremented and the
// directory is reallocated at twice its current size. In the diagram above,
// consider what happens if the bucket at dir[3] is split:
//
// dir(globalDepth=3)
// +-----+
// | 000 | --\
// +-----+ \
// | 001 | ----\
// +-----+ +--> bucket[localDepth=1]
// | 010 | ----/
// +-----+ /
// | 011 | --/
// +-----+
// | 100 | --\
// +-----+ +----> bucket[localDepth=2]
// | 101 | --/
// +-----+
// | 110 | --------> bucket[localDepth=3]
// +-----+
// | 111 | --------> bucket[localDepth=3]
// +-----+
//
// Note that the diagram above is very unlikely with a good hash function as
// the buckets will tend to fill at a similar rate.
//
// The split operation redistributes the records in a bucket into two buckets.
// This is done by walking over the records in the bucket to be split,
// computing hash(key) and using localDepth to extract the bit which
// determines whether to leave the record in the current bucket or to move it
// to the new bucket.
//
// Maps containing only a single bucket are optimized to avoid the directory
// indexing resulting in performance that is equivalent to a Swiss table
// without extendible hashing. A single bucket can be guaranteed by
// configuring a very large bucket size threshold via the
// WithMaxBucketCapacity option.
//
// In order to avoid a level of indirection when accessing a bucket, the
// bucket directory points to buckets by value rather than by pointer.
// Adjacent bucket[K,V]'s which share are logically the same bucket share the
// bucket.groups slice and have the same values for
// bucket.{groupMask,localDepth,index}. The other fields of a bucket are only
// valid for buckets where &m.dir[bucket.index] = &bucket (i.e. the first
// bucket in the directory with the specified index). During Get operations,
// any of the buckets with the same index may be used for retrieval. During
// Put and Delete operations an additional indirection is performed, though
// the common case is that this indirection is within the same cache line as
// it is to the immediately preceding bucket in the directory.
//
// # Implementation
//
// The implementation follows Google's Abseil implementation of Swiss Tables,
// and is heavily tuned, using unsafe and raw pointer arithmentic rather than
// Go slices to squeeze out every drop of performance. In order to support
// hashing of arbitrary keys, a hack is performed to extract the hash function
// from Go's implementation of map[K]struct{} by reaching into the internals
// of the type. (This might break in future version of Go, but is likely
// fixable unless the Go runtime does something drastic).
//
// # Performance
//
// A swiss.Map has similar or slightly better performance than Go's builtin
// map for small map sizes, and is much faster at large map sizes. See
// [README.md] for details.
//
// [README.md] https://github.com/cockroachdb/swiss/blob/main/README.md
package swiss
import (
"fmt"
"io"
"math"
"math/bits"
"strings"
"unsafe"
)
const (
groupSize = 8
maxAvgGroupLoad = 7
ctrlEmpty ctrl = 0b10000000
ctrlDeleted ctrl = 0b11111110
bitsetLSB = 0x0101010101010101
bitsetMSB = 0x8080808080808080
bitsetEmpty = bitsetLSB * uint64(ctrlEmpty)
bitsetDeleted = bitsetLSB * uint64(ctrlDeleted)
// The default maximum capacity a bucket is allowed to grow to before it
// will be split.
defaultMaxBucketCapacity uint32 = 4096
)
func init() {
// Don't add fields to the bucket unnecessarily. It is packed for
// efficiency so that we can fit 2 buckets into a 64-byte cache line on
// 64-bit architectures.
const expectedSize = ptrSize + 6*4
if unsafe.Sizeof(bucket[int, int]{}) != expectedSize {
panic(fmt.Sprintf("bucket size is not %d bytes", expectedSize))
}
}
// slot holds a key and value.
type slot[K comparable, V any] struct {
key K
value V
}
// Group holds groupSize control bytes and slots.
type Group[K comparable, V any] struct {
ctrls ctrlGroup
slots slotGroup[K, V]
}
// bucket implements Google's Swiss Tables hash table design. A Map is
// composed of 1 or more buckets that are addressed using extendible hashing.
type bucket[K comparable, V any] struct {
// groups is groupMask+1 in length and holds groupSize key/value slots and
// their control bytes.
groups unsafeSlice[Group[K, V]]
// groupMask is the number of groups minus 1 which is used to quickly
// compute i%N using a bitwise & operation. The groupMask only changes
// when a bucket is resized.
groupMask uint32
// Capacity, used, and growthLeft are only updated on mutation operations
// (Put, Delete). Read operations (Get) only access the groups and
// groupMask fields.
// The total number (always 2^N). Equal to `(groupMask+1)*groupSize`
// (unless the bucket is empty, when capacity is 0).
capacity uint32
// The number of filled slots (i.e. the number of elements in the bucket).
used uint32
// The number of slots we can still fill without needing to rehash.
//
// This is stored separately due to tombstones: we do not include
// tombstones in the growth capacity because we'd like to rehash when the
// table is filled with tombstones as otherwise probe sequences might get
// unacceptably long without triggering a rehash.
growthLeft uint32
// localDepth is the number of high bits from hash(key) used to generate
// an index for the global directory to locate this bucket. If localDepth
// is 0 this bucket is Map.bucket0. LocalDepth is only updated when a
// bucket splits.
localDepth uint32
// The index of the bucket within Map.dir. The buckets in
// Map.dir[index:index+2^(globalDepth-localDepth)] all share the same
// groups (and are logically the same bucket). Only the bucket at
// Map.dir[index] can be used for mutation operations (Put, Delete). The
// other buckets can be used for Get operations. Index is only updated
// when a bucket splits or the directory grows.
index uint32
}
// Map is an unordered map from keys to values with Put, Get, Delete, and All
// operations. Map is inspired by Google's Swiss Tables design as implemented
// in Abseil's flat_hash_map, combined with extendible hashing. By default, a
// Map[K,V] uses the same hash function as Go's builtin map[K]V, though a
// different hash function can be specified using the WithHash option.
//
// A Map is NOT goroutine-safe.
type Map[K comparable, V any] struct {
// The hash function to each keys of type K. The hash function is
// extracted from the Go runtime's implementation of map[K]struct{}.
hash hashFn
seed uintptr
// The allocator to use for the ctrls and slots slices.
allocator Allocator[K, V]
// bucket0 is always present and inlined in the Map to avoid a pointer
// indirection during the common case that the map contains a single
// bucket. bucket0 is also used during split operations as a temporary
// bucket to split into before the bucket is installed in the directory.
bucket0 bucket[K, V]
// The directory of buckets. See the comment on bucket.index for details
// on how the physical bucket values map to logical buckets.
dir unsafeSlice[bucket[K, V]]
// The number of filled slots across all buckets (i.e. the number of
// elements in the map).
used int
// globalShift is the number of bits to right shift a hash value to
// generate an index for the global directory. As a special case, if
// globalShift==0 then bucket0 is used and the directory is not accessed.
// Note that globalShift==(64-globalDepth). globalShift is used rather
// than globalDepth because the shifting is the more common operation than
// needing to compare globalDepth to a bucket's localDepth.
globalShift uint32
// The maximum capacity a bucket is allowed to grow to before it will be
// split.
maxBucketCapacity uint32
_ noCopy
}
func normalizeCapacity(capacity uint32) uint32 {
v := (uint32(1) << bits.Len32(uint32(capacity-1)))
if v != 0 {
return v
}
return uint32(1) << 31
}
// New constructs a new Map with the specified initial capacity. If
// initialCapacity is 0 the map will start out with zero capacity and will
// grow on the first insert. The zero value for a Map is not usable.
func New[K comparable, V any](initialCapacity int, options ...Option[K, V]) *Map[K, V] {
m := &Map[K, V]{}
m.Init(initialCapacity, options...)
return m
}
// Init initializes a Map with the specified initial capacity. If
// initialCapacity is 0 the map will start out with zero capacity and will
// grow on the first insert. The zero value for a Map is not usable and Init
// must be called before using the map.
//
// Init is intended for usage when a Map is embedded by value in another
// structure.
func (m *Map[K, V]) Init(initialCapacity int, options ...Option[K, V]) {
*m = Map[K, V]{
hash: getRuntimeHasher[K](),
seed: uintptr(fastrand64()),
allocator: defaultAllocator[K, V]{},
bucket0: bucket[K, V]{
// The groups slice for bucket0 in an empty map points to a single
// group where the controls are all marked as empty. This
// simplifies the logic for probing in Get, Put, and Delete. The
// empty controls will never match a probe operation, and if
// insertion is performed growthLeft==0 will trigger a resize of
// the bucket.
groups: makeUnsafeSlice(unsafeConvertSlice[Group[K, V]](emptyCtrls[:])),
},
maxBucketCapacity: defaultMaxBucketCapacity,
}
// Initialize the directory to point to bucket0.
m.dir = makeUnsafeSlice(unsafe.Slice(&m.bucket0, 1))
for _, op := range options {
op.apply(m)
}
if m.maxBucketCapacity < groupSize {
m.maxBucketCapacity = groupSize
}
m.maxBucketCapacity = normalizeCapacity(m.maxBucketCapacity)
if initialCapacity > 0 {
// We consider initialCapacity to be an indication from the caller
// about the number of records the map should hold. The realized
// capacity of a map is 7/8 of the number of slots, so we set the
// target capacity to initialCapacity*8/7.
targetCapacity := uintptr((initialCapacity * groupSize) / maxAvgGroupLoad)
if targetCapacity <= uintptr(m.maxBucketCapacity) {
// Normalize targetCapacity to the smallest value of the form 2^k.
m.bucket0.init(m, normalizeCapacity(uint32(targetCapacity)))
} else {
// If targetCapacity is larger than maxBucketCapacity we need to
// size the directory appropriately. We'll size each bucket to
// maxBucketCapacity and create enough buckets to hold
// initialCapacity.
nBuckets := (targetCapacity + uintptr(m.maxBucketCapacity) - 1) / uintptr(m.maxBucketCapacity)
globalDepth := uint32(bits.Len32(uint32(nBuckets) - 1))
m.growDirectory(globalDepth, 0 /* index */)
n := m.bucketCount()
for i := uint32(0); i < n; i++ {
b := m.dir.At(uintptr(i))
b.init(m, m.maxBucketCapacity)
b.localDepth = globalDepth
b.index = i
}
m.checkInvariants()
}
}
m.buckets(0, func(b *bucket[K, V]) bool {
b.checkInvariants(m)
return true
})
}
// Close closes the map, releasing any memory back to its configured
// allocator. It is unnecessary to close a map using the default allocator. It
// is invalid to use a Map after it has been closed, though Close itself is
// idempotent.
func (m *Map[K, V]) Close() {
m.buckets(0, func(b *bucket[K, V]) bool {
b.close(m.allocator)
return true
})
m.allocator = nil
}
// Put inserts an entry into the map, overwriting an existing value if an
// entry with the same key already exists.
func (m *Map[K, V]) Put(key K, value V) {
// Put is find composed with uncheckedPut. We perform find to see if the
// key is already present. If it is, we're done and overwrite the existing
// value. If the value isn't present we perform an uncheckedPut which
// inserts an entry known not to be in the table (violating this
// requirement will cause the table to behave erratically).
h := m.hash(noescape(unsafe.Pointer(&key)), m.seed)
b := m.mutableBucket(h)
// NB: Unlike the abseil swiss table implementation which uses a common
// find routine for Get, Put, and Delete, we have to manually inline the
// find routine for performance.
seq := makeProbeSeq(h1(h), b.groupMask)
startOffset := seq.offset
for ; ; seq = seq.next() {
g := b.groups.At(uintptr(seq.offset))
match := g.ctrls.matchH2(h2(h))
for match != 0 {
i := match.first()
slot := g.slots.At(i)
if key == slot.key {
slot.value = value
b.checkInvariants(m)
return
}
match = match.remove(i)
}
match = g.ctrls.matchEmpty()
if match != 0 {
// Finding an empty slot means we've reached the end of the probe
// sequence.
// If there is room left to grow in the bucket and we're at the
// start of the probe sequence we can just insert the new entry.
if b.growthLeft > 0 && seq.offset == startOffset {
i := match.first()
slot := g.slots.At(i)
slot.key = key
slot.value = value
g.ctrls.Set(i, ctrl(h2(h)))
b.growthLeft--
b.used++
m.used++
b.checkInvariants(m)
return
}
// Find the first empty or deleted slot in the key's probe
// sequence.
seq := makeProbeSeq(h1(h), b.groupMask)
for ; ; seq = seq.next() {
g := b.groups.At(uintptr(seq.offset))
match = g.ctrls.matchEmptyOrDeleted()
if match != 0 {
i := match.first()
// If there is room left to grow in the table or the slot
// is deleted (and thus we're overwriting it and not
// changing growthLeft) we can insert the entry here.
// Otherwise we need to rehash the bucket.
if b.growthLeft > 0 || g.ctrls.Get(i) == ctrlDeleted {
slot := g.slots.At(i)
slot.key = key
slot.value = value
if g.ctrls.Get(i) == ctrlEmpty {
b.growthLeft--
}
g.ctrls.Set(i, ctrl(h2(h)))
b.used++
m.used++
b.checkInvariants(m)
return
}
break
}
}
if invariants && b.growthLeft != 0 {
panic(fmt.Sprintf("invariant failed: growthLeft is unexpectedly non-zero: %d\n%#v", b.growthLeft, b))
}
b.rehash(m)
// We may have split the bucket in which case we have to
// re-determine which bucket the key resides on. This
// determination is quick in comparison to rehashing, resizing,
// and splitting, so just always do it.
b = m.mutableBucket(h)
// Note that we don't have to restart the entire Put process as we
// know the key doesn't exist in the map.
b.uncheckedPut(h, key, value)
b.used++
m.used++
b.checkInvariants(m)
return
}
}
}
// Get retrieves the value from the map for the specified key, returning
// ok=false if the key is not present.
func (m *Map[K, V]) Get(key K) (value V, ok bool) {
h := m.hash(noescape(unsafe.Pointer(&key)), m.seed)
b := m.bucket(h)
// NB: Unlike the abseil swiss table implementation which uses a common
// find routine for Get, Put, and Delete, we have to manually inline the
// find routine for performance.
// To find the location of a key in the table, we compute hash(key). From
// h1(hash(key)) and the capacity, we construct a probeSeq that visits
// every group of slots in some interesting order.
//
// We walk through these indices. At each index, we select the entire group
// starting with that index and extract potential candidates: occupied slots
// with a control byte equal to h2(hash(key)). If we find an empty slot in the
// group, we stop and return an error. The key at candidate slot y is compared
// with key; if key == m.slots[y].key we are done and return y; otherwise we
// continue to the next probe index. Tombstones (ctrlDeleted) effectively
// behave like full slots that never match the value we're looking for.
//
// The h2 bits ensure when we compare a key we are likely to have actually
// found the object. That is, the chance is low that keys compare false. Thus,
// when we search for an object, we are unlikely to call == many times. This
// likelyhood can be analyzed as follows (assuming that h2 is a random enough
// hash function).
//
// Let's assume that there are k "wrong" objects that must be examined in a
// probe sequence. For example, when doing a find on an object that is in the
// table, k is the number of objects between the start of the probe sequence
// and the final found object (not including the final found object). The
// expected number of objects with an h2 match is then k/128. Measurements and
// analysis indicate that even at high load factors, k is less than 32,
// meaning that the number of false positive comparisons we must perform is
// less than 1/8 per find.
seq := makeProbeSeq(h1(h), b.groupMask)
for ; ; seq = seq.next() {
g := b.groups.At(uintptr(seq.offset))
match := g.ctrls.matchH2(h2(h))
for match != 0 {
i := match.first()
slot := g.slots.At(i)
if key == slot.key {
return slot.value, true
}
match = match.remove(i)
}
match = g.ctrls.matchEmpty()
if match != 0 {
return value, false
}
}
}
// Delete deletes the entry corresponding to the specified key from the map.
// It is a noop to delete a non-existent key.
func (m *Map[K, V]) Delete(key K) {
// Delete is find composed with "deleted at": we perform find(key), and
// then delete at the resulting slot if found.
h := m.hash(noescape(unsafe.Pointer(&key)), m.seed)
b := m.mutableBucket(h)
// NB: Unlike the abseil swiss table implementation which uses a common
// find routine for Get, Put, and Delete, we have to manually inline the
// find routine for performance.
seq := makeProbeSeq(h1(h), b.groupMask)
for ; ; seq = seq.next() {
g := b.groups.At(uintptr(seq.offset))
match := g.ctrls.matchH2(h2(h))
for match != 0 {
i := match.first()
s := g.slots.At(i)
if key == s.key {
b.used--
m.used--
*s = slot[K, V]{}
// Only a full group can appear in the middle of a probe
// sequence (a group with at least one empty slot terminates
// probing). Once a group becomes full, it stays full until
// rehashing/resizing. So if the group isn't full now, we can
// simply remove the element. Otherwise, we create a tombstone
// to mark the slot as deleted.
if g.ctrls.matchEmpty() != 0 {
g.ctrls.Set(i, ctrlEmpty)
b.growthLeft++
} else {
g.ctrls.Set(i, ctrlDeleted)
}
b.checkInvariants(m)
return
}
match = match.remove(i)
}
match = g.ctrls.matchEmpty()
if match != 0 {
b.checkInvariants(m)
return
}
}
}
// Clear deletes all entries from the map resulting in an empty map.
func (m *Map[K, V]) Clear() {
m.buckets(0, func(b *bucket[K, V]) bool {
for i := uint32(0); i <= b.groupMask; i++ {
g := b.groups.At(uintptr(i))
for j := uint32(0); j < groupSize; j++ {
g.ctrls.Set(j, ctrlEmpty)
*g.slots.At(j) = slot[K, V]{}
}
}
b.used = 0
b.resetGrowthLeft()
return true
})
// Reset the hash seed to make it more difficult for attackers to
// repeatedly trigger hash collisions. See issue
// https://github.com/golang/go/issues/25237.
m.seed = uintptr(fastrand64())
m.used = 0
}
// All calls yield sequentially for each key and value present in the map. If
// yield returns false, range stops the iteration. The map can be mutated
// during iteration, though there is no guarantee that the mutations will be
// visible to the iteration.
//
// TODO(peter): The naming of All and its signature are meant to conform to
// the range-over-function Go proposal. When that proposal is accepted (which
// seems likely), we'll be able to iterate over the map by doing:
//
// for k, v := range m.All {
// fmt.Printf("%v: %v\n", k, v)
// }
//
// See https://github.com/golang/go/issues/61897.
func (m *Map[K, V]) All(yield func(key K, value V) bool) {
// Randomize iteration order by starting iteration at a random bucket and
// within each bucket at a random offset.
offset := uintptr(fastrand64())
m.buckets(offset>>32, func(b *bucket[K, V]) bool {
if b.used == 0 {
return true
}
// Snapshot the groups, and groupMask so that iteration remains valid
// if the map is resized during iteration.
groups := b.groups
groupMask := b.groupMask
offset32 := uint32(offset)
for i := uint32(0); i <= groupMask; i++ {
g := groups.At(uintptr((i + offset32) & groupMask))
// TODO(peter): Skip over groups that are composed of only empty
// or deleted slots using matchEmptyOrDeleted() and counting the
// number of bits set.
for j := uint32(0); j < groupSize; j++ {
k := (j + offset32) & (groupSize - 1)
// Match full entries which have a high-bit of zero.
if (g.ctrls.Get(k) & ctrlEmpty) != ctrlEmpty {
slot := g.slots.At(k)
if !yield(slot.key, slot.value) {
return false
}
}
}
}
return true
})
}
// GoString implements the fmt.GoStringer interface which is used when
// formatting using the "%#v" format specifier.
func (m *Map[K, V]) GoString() string {
var buf strings.Builder
fmt.Fprintf(&buf, "used=%d global-depth=%d bucket-count=%d\n", m.used, m.globalDepth(), m.bucketCount())
m.buckets(0, func(b *bucket[K, V]) bool {
fmt.Fprintf(&buf, "bucket %d (%p): local-depth=%d\n", b.index, b, b.localDepth)
b.goFormat(&buf)
return true
})
return buf.String()
}
// Len returns the number of entries in the map.
func (m *Map[K, V]) Len() int {
return m.used
}
// capacity returns the total capacity of all map buckets.
func (m *Map[K, V]) capacity() int {
var capacity int
m.buckets(0, func(b *bucket[K, V]) bool {
capacity += int(b.capacity)
return true
})
return capacity
}
const (
// ptrSize and shiftMask are used to optimize code generation for
// Map.bucket(), Map.bucketCount(), and bucketStep(). This technique was
// lifted from the Go runtime's runtime/map.go:bucketShift() routine. Note
// that ptrSize will be either 4 on 32-bit archs or 8 on 64-bit archs.
ptrSize = 4 << (^uintptr(0) >> 63)
shiftMask = ptrSize*8 - 1
)
// bucket returns the bucket corresponding to hash value h.
func (m *Map[K, V]) bucket(h uintptr) *bucket[K, V] {
// NB: It is faster to check for the single bucket case using a
// conditional than to index into the directory.
if m.globalShift == 0 {
return &m.bucket0
}
// When shifting by a variable amount the Go compiler inserts overflow
// checks that the shift is less than the maximum allowed (32 or 64).
// Masking the shift amount allows overflow checks to be elided.
return m.dir.At(h >> (m.globalShift & shiftMask))
}
func (m *Map[K, V]) mutableBucket(h uintptr) *bucket[K, V] {
// NB: It is faster to check for the single bucket case using a
// conditional than to to index into the directory.
if m.globalShift == 0 {
return &m.bucket0
}
// When shifting by a variable amount the Go compiler inserts overflow
// checks that the shift is less than the maximum allowed (32 or 64).
// Masking the shift amount allows overflow checks to be elided.
b := m.dir.At(h >> (m.globalShift & shiftMask))
// The mutable bucket is the one located at m.dir[b.index]. This will
// usually be either the current bucket b, or the immediately preceding
// bucket which is usually in the same cache line.
return m.dir.At(uintptr(b.index))
}
// buckets calls yield sequentially for each bucket in the map. If yield
// returns false, iteration stops. Offset specifies the bucket to start
// iteration at (used to randomize iteration order).
func (m *Map[K, V]) buckets(offset uintptr, yield func(b *bucket[K, V]) bool) {
b := m.dir.At(offset & uintptr(m.bucketCount()-1))
// We iterate over the first bucket in a logical group of buckets (i.e.
// buckets which share bucket.groups). The first bucket has the accurate
// bucket.used field and those are also the buckets that are stepped
// through using bucketStep().
b = m.dir.At(uintptr(b.index))
// Loop termination is handled by remembering the start bucket index and
// exiting when it is reached again. Note that the startIndex needs to be
// adjusted to account for the directory growing during iteration (i.e.
// due to a mutation), so we remember the starting global depth as well in
// order to perform that adjustment. Whenever the directory grows by
// doubling, every existing bucket index will be doubled.
startIndex := b.index
startGlobalDepth := m.globalDepth()
for {
originalGlobalDepth := m.globalDepth()
originalLocalDepth := b.localDepth
originalIndex := b.index
if !yield(b) {
break
}
// The size of the directory can grow if the yield function mutates
// the map. We want to iterate over each bucket once, and if a bucket
// splits while we're iterating over it we want to skip over all of
// the buckets newly split from the one we're iterating over. We do
// this by snapshotting the bucket's local depth and using the
// snapshotted local depth to compute the bucket step.
//
// Note that b.index will also change if the directory grows. Consider
// the directory below with a globalDepth of 2 containing 4 buckets,
// each of which has a localDepth of 2.
//
// dir b.index b.localDepth
// +-----+---------+--------------+
// | 00 | 0 | 2 |
// +-----+---------+--------------+
// | 01 | 1 | 2 |
// +-----+---------+--------------+
// | 10 | 2 | 2 | <--- iteration point
// +-----+---------+--------------+
// | 11 | 3 | 2 |
// +-----+---------+--------------+
//
// If the directory grows during iteration, the index of the bucket
// we're iterating over will change. If the bucket we're iterating
// over split, then the local depth will have increased. Notice how
// the bucket that was previously at index 1 now is at index 2 and is
// pointed to by 2 directory entries: 010 and 011. The bucket being
// iterated over which was previously at index 2 is now at index 4.
// Iteration within a bucket takes a snapshot of the controls and
// slots to make sure we don't miss keys during iteration or iterate
// over keys more than once. But we also need to take care of the case
// where the bucket we're iterating over splits. In this case, we need
// to skip over the bucket at index 5 which can be done by computing
// the bucketStep using the bucket's depth prior to calling yield
// which in this example will be 1<<(3-2)==2.
//
// dir b.index b.localDepth
// +-----+---------+--------------+
// | 000 | 0 | 2 |
// +-----+ | |
// | 001 | | |
// +-----+---------+--------------+
// | 010 | 2 | 2 |
// +-----+ | |
// | 011 | | |
// +-----+---------+--------------+
// | 100 | 4 | 3 |
// +-----+---------+--------------+
// | 101 | 5 | 3 |
// +-----+---------+--------------+
// | 110 | 6 | 2 |
// +-----+ | |
// | 111 | | |
// +-----+---------+--------------+
// After calling yield, b is no longer valid. We determine the next
// bucket to iterate over using the b.index we cached before calling
// yield and adjusting for any directory growth that happened during
// the yield call.
i := adjustBucketIndex(originalIndex, m.globalDepth(), originalGlobalDepth)
i += bucketStep(m.globalDepth(), originalLocalDepth)
i &= (m.bucketCount() - 1)
// Similar to the adjustment for b's index, we compute the starting
// bucket's new index accounting for directory growth.
adjustedStartIndex := adjustBucketIndex(startIndex, m.globalDepth(), startGlobalDepth)
if i == adjustedStartIndex {
break
}
b = m.dir.At(uintptr(i))
}
}
// globalDepth returns the number of bits from the top of the hash to use for
// indexing in the buckets directory.
func (m *Map[K, V]) globalDepth() uint32 {
if m.globalShift == 0 {
return 0
}
return 64 - m.globalShift
}
// bucketCount returns the number of buckets in the buckets directory.
func (m *Map[K, V]) bucketCount() uint32 {
const shiftMask = 31
return uint32(1) << (m.globalDepth() & shiftMask)
}
// bucketStep is the number of buckets to step over in the buckets directory
// to reach the next different bucket. A bucket occupies 1 or more contiguous
// entries in the buckets directory specified by the range:
//
// [b.index:b.index+bucketStep(m.globalDepth(), b.localDepth)]
func bucketStep(globalDepth, localDepth uint32) uint32 {
const shiftMask = 31
return uint32(1) << ((globalDepth - localDepth) & shiftMask)
}
// adjustBucketIndex adjusts the index of a bucket to account for the growth
// of the directory where index was captured at originalGlobalDepth and we're
// computing where that index will reside in the directory at
// currentGlobalDepth.
func adjustBucketIndex(index, currentGlobalDepth, originalGlobalDepth uint32) uint32 {
return index * (1 << (currentGlobalDepth - originalGlobalDepth))
}
// installBucket installs a bucket into the buckets directory, overwriting
// every index in the range of entries the bucket occupies.
func (m *Map[K, V]) installBucket(b *bucket[K, V]) *bucket[K, V] {
step := bucketStep(m.globalDepth(), b.localDepth)
for i := uint32(0); i < step; i++ {
*m.dir.At(uintptr(b.index + i)) = *b
}
return m.dir.At(uintptr(b.index))
}
// growDirectory grows the directory slice to 1<<newGlobalDepth buckets. Grow
// directory returns the new index location for the bucket specified by index.
func (m *Map[K, V]) growDirectory(newGlobalDepth, index uint32) (newIndex uint32) {
if invariants && newGlobalDepth > 32 {
panic(fmt.Sprintf("invariant failed: expectedly large newGlobalDepth %d->%d",
m.globalDepth(), newGlobalDepth))
}
newDir := makeUnsafeSlice(make([]bucket[K, V], 1<<newGlobalDepth))
// NB: It would be more natural to use Map.buckets() here, but that
// routine uses b.index during iteration which we're mutating in the loop
// below.
lastIndex := uint32(math.MaxUint32)
setNewIndex := true
for i, j, n := uint32(0), uint32(0), m.bucketCount(); i < n; i++ {
b := m.dir.At(uintptr(i))
if b.index == lastIndex {
continue
}
lastIndex = b.index
if b.index == index && setNewIndex {
newIndex = j
setNewIndex = false
}
b.index = j
step := bucketStep(newGlobalDepth, b.localDepth)
for k := uint32(0); k < step; k++ {
*newDir.At(uintptr(j + k)) = *b
}
j += step
}
// Zero out bucket0 if we're growing from 1 bucket (which uses bucket0) to
// more than 1 bucket.
if m.globalShift == 0 {
m.bucket0 = bucket[K, V]{}
}
m.dir = newDir
m.globalShift = 64 - newGlobalDepth
m.checkInvariants()
return newIndex
}
// checkInvariants verifies the internal consistency of the map's structure,
// checking conditions that should always be true for a correctly functioning
// map. If any of these invariants are violated, it panics, indicating a bug
// in the map implementation.
func (m *Map[K, V]) checkInvariants() {
if invariants {
if m.globalShift == 0 {
if m.dir.ptr != unsafe.Pointer(&m.bucket0) {
panic(fmt.Sprintf("directory (%p) does not point to bucket0 (%p)", m.dir.ptr, &m.bucket0))
}
if m.bucket0.localDepth != 0 {
panic(fmt.Sprintf("expected local-depth=0, but found %d", m.bucket0.localDepth))
}
} else {
for i, n := uint32(0), m.bucketCount(); i < n; i++ {
b := m.dir.At(uintptr(i))
if b == nil {
panic(fmt.Sprintf("dir[%d]: nil bucket", i))
}
if b.localDepth > m.globalDepth() {
panic(fmt.Sprintf("dir[%d]: local-depth=%d is greater than global-depth=%d",
i, b.localDepth, m.globalDepth()))
}
n := uint32(1) << (m.globalDepth() - b.localDepth)
if i < b.index || i >= b.index+n {
panic(fmt.Sprintf("dir[%d]: out of expected range [%d,%d)", i, b.index, b.index+n))
}
}
}
}
}
func (b *bucket[K, V]) close(allocator Allocator[K, V]) {
if b.capacity > 0 {
allocator.Free(b.groups.Slice(0, uintptr(b.groupMask+1)))
b.capacity = 0
b.used = 0
}
b.groups = makeUnsafeSlice([]Group[K, V](nil))
b.groupMask = 0
}
// tombstones returns the number of deleted (tombstone) entries in the bucket.
// A tombstone is a slot that has been deleted but is still considered
// occupied so as not to violate the probing invariant.
func (b *bucket[K, V]) tombstones() uint32 {
return (b.capacity*maxAvgGroupLoad)/groupSize - b.used - b.growthLeft
}