1//===-- Portable string hash function ---------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_HASH_H
10#define LLVM_LIBC_SRC___SUPPORT_HASH_H
11
12#include "src/__support/CPP/bit.h" // rotl
13#include "src/__support/CPP/limits.h" // numeric_limits
14#include "src/__support/macros/attributes.h" // LIBC_INLINE
15#include "src/__support/uint128.h" // UInt128
16#include <stdint.h> // For uint64_t
17
18namespace LIBC_NAMESPACE {
19namespace internal {
20
21// Folded multiplication.
22// This function multiplies two 64-bit integers and xor the high and
23// low 64-bit parts of the result.
24LIBC_INLINE uint64_t folded_multiply(uint64_t x, uint64_t y) {
25 UInt128 p = static_cast<UInt128>(x) * static_cast<UInt128>(y);
26 uint64_t low = static_cast<uint64_t>(p);
27 uint64_t high = static_cast<uint64_t>(p >> 64);
28 return low ^ high;
29}
30
31// Read as little endian.
32// Shift-and-or implementation does not give a satisfactory code on aarch64.
33// Therefore, we use a union to read the value.
34template <typename T> LIBC_INLINE T read_little_endian(const void *ptr) {
35 const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
36 union {
37 T value;
38 uint8_t buffer[sizeof(T)];
39 } data;
40#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
41 // Compiler should able to optimize this as a load followed by a byte swap.
42 // On aarch64 (-mbig-endian), this compiles to the following for int:
43 // ldr w0, [x0]
44 // rev w0, w0
45 // ret
46 for (size_t i = 0; i < sizeof(T); ++i) {
47 data.buffer[i] = bytes[sizeof(T) - i - 1];
48 }
49#else
50 for (size_t i = 0; i < sizeof(T); ++i) {
51 data.buffer[i] = bytes[i];
52 }
53#endif
54 return data.value;
55}
56
57// Specialized read functions for small values. size must be <= 8.
58LIBC_INLINE void read_small_values(const void *ptr, size_t size, uint64_t &low,
59 uint64_t &high) {
60 const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
61 if (size >= 2) {
62 if (size >= 4) {
63 low = static_cast<uint64_t>(read_little_endian<uint32_t>(ptr: &bytes[0]));
64 high =
65 static_cast<uint64_t>(read_little_endian<uint32_t>(ptr: &bytes[size - 4]));
66 } else {
67 low = static_cast<uint64_t>(read_little_endian<uint16_t>(ptr: &bytes[0]));
68 high = static_cast<uint64_t>(bytes[size - 1]);
69 }
70 } else {
71 if (size > 0) {
72 low = static_cast<uint64_t>(bytes[0]);
73 high = static_cast<uint64_t>(bytes[0]);
74 } else {
75 low = 0;
76 high = 0;
77 }
78 }
79}
80
81// This constant comes from Kunth's prng (it empirically works well).
82LIBC_INLINE_VAR constexpr uint64_t MULTIPLE = 6364136223846793005;
83// Rotation amount for mixing.
84LIBC_INLINE_VAR constexpr uint64_t ROTATE = 23;
85
86// Randomly generated values. For now, we use the same values as in aHash as
87// they are widely tested.
88// https://github.com/tkaitchuck/aHash/blob/9f6a2ad8b721fd28da8dc1d0b7996677b374357c/src/random_state.rs#L38
89LIBC_INLINE_VAR constexpr uint64_t RANDOMNESS[2][4] = {
90 {0x243f6a8885a308d3, 0x13198a2e03707344, 0xa4093822299f31d0,
91 0x082efa98ec4e6c89},
92 {0x452821e638d01377, 0xbe5466cf34e90c6c, 0xc0ac29b7c97c50dd,
93 0x3f84d5b5b5470917},
94};
95
96// This is a portable string hasher. It is not cryptographically secure.
97// The quality of the hash is good enough to pass all tests in SMHasher.
98// The implementation is derived from the generic routine of aHash.
99class HashState {
100 uint64_t buffer;
101 uint64_t pad;
102 uint64_t extra_keys[2];
103 LIBC_INLINE void update(uint64_t low, uint64_t high) {
104 uint64_t combined =
105 folded_multiply(x: low ^ extra_keys[0], y: high ^ extra_keys[1]);
106 buffer = (buffer + pad) ^ combined;
107 buffer = cpp::rotl(value: buffer, rotate: ROTATE);
108 }
109 LIBC_INLINE static uint64_t mix(uint64_t seed) {
110 HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2],
111 RANDOMNESS[0][3]};
112 mixer.update(low: seed, high: 0);
113 return mixer.finish();
114 }
115
116public:
117 LIBC_INLINE constexpr HashState(uint64_t a, uint64_t b, uint64_t c,
118 uint64_t d)
119 : buffer(a), pad(b), extra_keys{c, d} {}
120 LIBC_INLINE HashState(uint64_t seed) {
121 // Mix one more round of the seed to make it stronger.
122 uint64_t mixed = mix(seed);
123 buffer = RANDOMNESS[1][0] ^ mixed;
124 pad = RANDOMNESS[1][1] ^ mixed;
125 extra_keys[0] = RANDOMNESS[1][2] ^ mixed;
126 extra_keys[1] = RANDOMNESS[1][3] ^ mixed;
127 }
128 LIBC_INLINE void update(const void *ptr, size_t size) {
129 uint8_t const *bytes = static_cast<const uint8_t *>(ptr);
130 buffer = (buffer + size) * MULTIPLE;
131 uint64_t low, high;
132 if (size > 8) {
133 if (size > 16) {
134 // update tail
135 low = read_little_endian<uint64_t>(ptr: &bytes[size - 16]);
136 high = read_little_endian<uint64_t>(ptr: &bytes[size - 8]);
137 update(low, high);
138 while (size > 16) {
139 low = read_little_endian<uint64_t>(ptr: &bytes[0]);
140 high = read_little_endian<uint64_t>(ptr: &bytes[8]);
141 update(low, high);
142 bytes += 16;
143 size -= 16;
144 }
145 } else {
146 low = read_little_endian<uint64_t>(ptr: &bytes[0]);
147 high = read_little_endian<uint64_t>(ptr: &bytes[size - 8]);
148 update(low, high);
149 }
150 } else {
151 read_small_values(ptr, size, low, high);
152 update(low, high);
153 }
154 }
155 LIBC_INLINE uint64_t finish() {
156 int rot = buffer & 63;
157 uint64_t folded = folded_multiply(x: buffer, y: pad);
158 return cpp::rotl(value: folded, rotate: rot);
159 }
160};
161
162} // namespace internal
163} // namespace LIBC_NAMESPACE
164
165#endif // LLVM_LIBC_SRC___SUPPORT_HASH_H
166

source code of libc/src/__support/hash.h