1 | //===-- HashTable BitMasks Generic Implementation ---------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "src/__support/common.h" |
10 | #include "src/__support/endian.h" |
11 | |
12 | namespace LIBC_NAMESPACE { |
13 | namespace internal { |
14 | |
15 | // GPU architectures are 64-bit but use 32-bit general purpose registers. |
16 | #ifdef LIBC_TARGET_ARCH_IS_GPU |
17 | using bitmask_t = uint32_t; |
18 | #else |
19 | using bitmask_t = uintptr_t; |
20 | #endif |
21 | |
22 | // Helper function to spread a byte across the whole word. |
23 | // Accumutively, the procedure looks like: |
24 | // byte = 0x00000000000000ff |
25 | // byte | (byte << 8) = 0x000000000000ffff |
26 | // byte | (byte << 16) = 0x00000000ffffffff |
27 | // byte | (byte << 32) = 0xffffffffffffffff |
28 | LIBC_INLINE constexpr bitmask_t repeat_byte(bitmask_t byte) { |
29 | size_t shift_amount = 8; |
30 | while (shift_amount < sizeof(bitmask_t) * 8) { |
31 | byte |= byte << shift_amount; |
32 | shift_amount <<= 1; |
33 | } |
34 | return byte; |
35 | } |
36 | |
37 | using BitMask = BitMaskAdaptor<bitmask_t, 0x8ull>; |
38 | using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>; |
39 | |
40 | struct Group { |
41 | bitmask_t data; |
42 | |
43 | // Load a group of control words from an arbitary address. |
44 | LIBC_INLINE static Group load(const void *addr) { |
45 | union { |
46 | bitmask_t value; |
47 | char bytes[sizeof(bitmask_t)]; |
48 | } data; |
49 | for (size_t i = 0; i < sizeof(bitmask_t); ++i) |
50 | data.bytes[i] = static_cast<const char *>(addr)[i]; |
51 | return {.data: data.value}; |
52 | } |
53 | |
54 | // Load a group of control words from an aligned address. |
55 | LIBC_INLINE static Group load_aligned(const void *addr) { |
56 | return *static_cast<const Group *>(addr); |
57 | } |
58 | |
59 | // Find out the lanes equal to the given byte and return the bitmask |
60 | // with corresponding bits set. |
61 | LIBC_INLINE IteratableBitMask match_byte(uint8_t byte) const { |
62 | // Given byte = 0x10, suppose the data is: |
63 | // |
64 | // data = [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ] |
65 | // |
66 | // First, we compare the byte using XOR operation: |
67 | // |
68 | // [ 0x10 | 0x10 | 0x10 | 0x10 | ... ] (0) |
69 | // ^ [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ] (1) |
70 | // = [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2) |
71 | // |
72 | // Notice that the equal positions will now be 0x00, so if we substract 0x01 |
73 | // respective to every byte, it will need to carry the substraction to upper |
74 | // bits (assume no carry from the hidden parts) |
75 | // [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2) |
76 | // - [ 0x01 | 0x01 | 0x01 | 0x01 | ... ] (3) |
77 | // = [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4) |
78 | // |
79 | // But there may be some bytes whose highest bit is already set after the |
80 | // xor operation. To rule out these positions, we AND them with the NOT |
81 | // of the XOR result: |
82 | // |
83 | // [ 0xFF | 0xFF | 0xEF | 0x1E | ... ] (5, NOT (2)) |
84 | // & [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4) |
85 | // = [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6) |
86 | // |
87 | // To make the bitmask iteratable, only one bit can be set in each stride. |
88 | // So we AND each byte with 0x80 and keep only the highest bit: |
89 | // |
90 | // [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6) |
91 | // & [ 0x80 | 0x80 | 0x80 | 0x80 | ... ] (7) |
92 | // = [ 0x80 | 0x80 | 0x00 | 0x00 | ... ] (8) |
93 | // |
94 | // However, there are possitbilites for false positives. For example, if the |
95 | // data is [ 0x10 | 0x11 | 0x10 | 0xF1 | ... ]. This only happens when there |
96 | // is a key only differs from the searched by the lowest bit. The claims |
97 | // are: |
98 | // |
99 | // - This never happens for `EMPTY` and `DELETED`, only full entries. |
100 | // - The check for key equality will catch these. |
101 | // - This only happens if there is at least 1 true match. |
102 | // - The chance of this happening is very low (< 1% chance per byte). |
103 | auto cmp = data ^ repeat_byte(byte); |
104 | auto result = LIBC_NAMESPACE::Endian::to_little_endian( |
105 | v: (cmp - repeat_byte(byte: 0x01)) & ~cmp & repeat_byte(byte: 0x80)); |
106 | return {BitMask{.word: result}}; |
107 | } |
108 | |
109 | // Find out the lanes equal to EMPTY or DELETE (highest bit set) and |
110 | // return the bitmask with corresponding bits set. |
111 | LIBC_INLINE BitMask mask_available() const { |
112 | return {LIBC_NAMESPACE::Endian::to_little_endian(v: data) & repeat_byte(byte: 0x80)}; |
113 | } |
114 | |
115 | LIBC_INLINE IteratableBitMask occupied() const { |
116 | return { |
117 | {.word: static_cast<bitmask_t>(mask_available().word ^ repeat_byte(byte: 0x80))}}; |
118 | } |
119 | }; |
120 | } // namespace internal |
121 | } // namespace LIBC_NAMESPACE |
122 | |