1//===-- HashTable BitMasks Generic Implementation ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "src/__support/common.h"
10#include "src/__support/endian.h"
11
12namespace LIBC_NAMESPACE {
13namespace internal {
14
15// GPU architectures are 64-bit but use 32-bit general purpose registers.
16#ifdef LIBC_TARGET_ARCH_IS_GPU
17using bitmask_t = uint32_t;
18#else
19using bitmask_t = uintptr_t;
20#endif
21
22// Helper function to spread a byte across the whole word.
23// Accumutively, the procedure looks like:
24// byte = 0x00000000000000ff
25// byte | (byte << 8) = 0x000000000000ffff
26// byte | (byte << 16) = 0x00000000ffffffff
27// byte | (byte << 32) = 0xffffffffffffffff
28LIBC_INLINE constexpr bitmask_t repeat_byte(bitmask_t byte) {
29 size_t shift_amount = 8;
30 while (shift_amount < sizeof(bitmask_t) * 8) {
31 byte |= byte << shift_amount;
32 shift_amount <<= 1;
33 }
34 return byte;
35}
36
37using BitMask = BitMaskAdaptor<bitmask_t, 0x8ull>;
38using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>;
39
40struct Group {
41 bitmask_t data;
42
43 // Load a group of control words from an arbitary address.
44 LIBC_INLINE static Group load(const void *addr) {
45 union {
46 bitmask_t value;
47 char bytes[sizeof(bitmask_t)];
48 } data;
49 for (size_t i = 0; i < sizeof(bitmask_t); ++i)
50 data.bytes[i] = static_cast<const char *>(addr)[i];
51 return {.data: data.value};
52 }
53
54 // Load a group of control words from an aligned address.
55 LIBC_INLINE static Group load_aligned(const void *addr) {
56 return *static_cast<const Group *>(addr);
57 }
58
59 // Find out the lanes equal to the given byte and return the bitmask
60 // with corresponding bits set.
61 LIBC_INLINE IteratableBitMask match_byte(uint8_t byte) const {
62 // Given byte = 0x10, suppose the data is:
63 //
64 // data = [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ]
65 //
66 // First, we compare the byte using XOR operation:
67 //
68 // [ 0x10 | 0x10 | 0x10 | 0x10 | ... ] (0)
69 // ^ [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ] (1)
70 // = [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2)
71 //
72 // Notice that the equal positions will now be 0x00, so if we substract 0x01
73 // respective to every byte, it will need to carry the substraction to upper
74 // bits (assume no carry from the hidden parts)
75 // [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2)
76 // - [ 0x01 | 0x01 | 0x01 | 0x01 | ... ] (3)
77 // = [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4)
78 //
79 // But there may be some bytes whose highest bit is already set after the
80 // xor operation. To rule out these positions, we AND them with the NOT
81 // of the XOR result:
82 //
83 // [ 0xFF | 0xFF | 0xEF | 0x1E | ... ] (5, NOT (2))
84 // & [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4)
85 // = [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6)
86 //
87 // To make the bitmask iteratable, only one bit can be set in each stride.
88 // So we AND each byte with 0x80 and keep only the highest bit:
89 //
90 // [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6)
91 // & [ 0x80 | 0x80 | 0x80 | 0x80 | ... ] (7)
92 // = [ 0x80 | 0x80 | 0x00 | 0x00 | ... ] (8)
93 //
94 // However, there are possitbilites for false positives. For example, if the
95 // data is [ 0x10 | 0x11 | 0x10 | 0xF1 | ... ]. This only happens when there
96 // is a key only differs from the searched by the lowest bit. The claims
97 // are:
98 //
99 // - This never happens for `EMPTY` and `DELETED`, only full entries.
100 // - The check for key equality will catch these.
101 // - This only happens if there is at least 1 true match.
102 // - The chance of this happening is very low (< 1% chance per byte).
103 auto cmp = data ^ repeat_byte(byte);
104 auto result = LIBC_NAMESPACE::Endian::to_little_endian(
105 v: (cmp - repeat_byte(byte: 0x01)) & ~cmp & repeat_byte(byte: 0x80));
106 return {BitMask{.word: result}};
107 }
108
109 // Find out the lanes equal to EMPTY or DELETE (highest bit set) and
110 // return the bitmask with corresponding bits set.
111 LIBC_INLINE BitMask mask_available() const {
112 return {LIBC_NAMESPACE::Endian::to_little_endian(v: data) & repeat_byte(byte: 0x80)};
113 }
114
115 LIBC_INLINE IteratableBitMask occupied() const {
116 return {
117 {.word: static_cast<bitmask_t>(mask_available().word ^ repeat_byte(byte: 0x80))}};
118 }
119};
120} // namespace internal
121} // namespace LIBC_NAMESPACE
122

source code of libc/src/__support/HashTable/generic/bitmask_impl.inc