1 | //===-- Unittests for hash ------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "src/__support/CPP/new.h" |
10 | #include "src/__support/hash.h" |
11 | #include "src/stdlib/rand.h" |
12 | #include "src/stdlib/srand.h" |
13 | #include "src/string/memset.h" |
14 | #include "test/UnitTest/Test.h" |
15 | |
16 | template <class T> struct AlignedMemory { |
17 | T *data; |
18 | size_t offset; |
19 | std::align_val_t alignment; |
20 | AlignedMemory(size_t size, size_t alignment, size_t offset) |
21 | : offset(offset), alignment{alignment} { |
22 | size_t sz = size * sizeof(T); |
23 | size_t aligned = sz + ((-sz) & (alignment - 1)) + alignment; |
24 | LIBC_NAMESPACE::AllocChecker ac; |
25 | data = static_cast<T *>(operator new(aligned, this->alignment, ac)); |
26 | data += offset % alignment; |
27 | } |
28 | ~AlignedMemory() { operator delete(data - offset, alignment); } |
29 | }; |
30 | |
31 | size_t sizes[] = {0, 1, 23, 59, 1024, 5261}; |
32 | uint8_t values[] = {0, 1, 23, 59, 102, 255}; |
33 | |
34 | // Hash value should not change with different alignments. |
35 | TEST(LlvmLibcHashTest, SanityCheck) { |
36 | for (size_t sz : sizes) { |
37 | for (uint8_t val : values) { |
38 | uint64_t hash; |
39 | { |
40 | AlignedMemory<char> mem(sz, 64, 0); |
41 | LIBC_NAMESPACE::memset(ptr: mem.data, value: val, count: sz); |
42 | LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef}; |
43 | state.update(ptr: mem.data, size: sz); |
44 | hash = state.finish(); |
45 | } |
46 | for (size_t offset = 1; offset < 64; ++offset) { |
47 | AlignedMemory<char> mem(sz, 64, offset); |
48 | LIBC_NAMESPACE::memset(ptr: mem.data, value: val, count: sz); |
49 | LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef}; |
50 | state.update(ptr: mem.data, size: sz); |
51 | ASSERT_EQ(hash, state.finish()); |
52 | } |
53 | } |
54 | } |
55 | } |
56 | |
57 | static inline size_t popcnt(uint64_t x) { |
58 | size_t count = 0; |
59 | while (x) { |
60 | count += x & 1; |
61 | x >>= 1; |
62 | } |
63 | return count; |
64 | } |
65 | |
66 | // Mutate a single bit in a rather large input. The hash should change |
67 | // significantly. At least one fifth of the bits should not match. |
68 | TEST(LlvmLibcHashTest, Avalanche) { |
69 | for (size_t sz : sizes) { |
70 | for (uint8_t val : values) { |
71 | uint64_t hash; |
72 | AlignedMemory<char> mem(sz, 64, 0); |
73 | LIBC_NAMESPACE::memset(ptr: mem.data, value: val, count: sz); |
74 | { |
75 | LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890}; |
76 | state.update(ptr: mem.data, size: sz); |
77 | hash = state.finish(); |
78 | } |
79 | for (size_t i = 0; i < sz; ++i) { |
80 | for (size_t j = 0; j < 8; ++j) { |
81 | uint8_t mask = 1 << j; |
82 | mem.data[i] ^= mask; |
83 | { |
84 | LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890}; |
85 | state.update(ptr: mem.data, size: sz); |
86 | uint64_t new_hash = state.finish(); |
87 | ASSERT_GE(popcnt(hash ^ new_hash), size_t{13}); |
88 | } |
89 | mem.data[i] ^= mask; |
90 | } |
91 | } |
92 | } |
93 | } |
94 | } |
95 | |
96 | // Hash a random sequence of input. The LSB should be uniform enough such that |
97 | // values spread across the entire range. |
98 | TEST(LlvmLibcHashTest, UniformLSB) { |
99 | LIBC_NAMESPACE::srand(seed: 0xffffffff); |
100 | for (size_t sz : sizes) { |
101 | AlignedMemory<size_t> counters(sz, sizeof(size_t), 0); |
102 | LIBC_NAMESPACE::memset(ptr: counters.data, value: 0, count: sz * sizeof(size_t)); |
103 | for (size_t i = 0; i < 200 * sz; ++i) { |
104 | int randomness[8] = {LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(), |
105 | LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(), |
106 | LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(), |
107 | LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand()}; |
108 | { |
109 | LIBC_NAMESPACE::internal::HashState state{0x1a2b3c4d5e6f7a8b}; |
110 | state.update(ptr: randomness, size: sizeof(randomness)); |
111 | uint64_t hash = state.finish(); |
112 | counters.data[hash % sz]++; |
113 | } |
114 | } |
115 | for (size_t i = 0; i < sz; ++i) { |
116 | ASSERT_GE(counters.data[i], size_t{140}); |
117 | ASSERT_LE(counters.data[i], size_t{260}); |
118 | } |
119 | } |
120 | } |
121 | |
122 | // Hash a low entropy sequence. The MSB should be uniform enough such that |
123 | // there is no significant bias even if the value range is small. |
124 | // Top 7 bits is examined because it will be used as a secondary key in |
125 | // the hash table. |
126 | TEST(LlvmLibcHashTest, UniformMSB) { |
127 | size_t sz = 1 << 7; |
128 | AlignedMemory<size_t> counters(sz, sizeof(size_t), 0); |
129 | LIBC_NAMESPACE::memset(ptr: counters.data, value: 0, count: sz * sizeof(size_t)); |
130 | for (size_t i = 0; i < 200 * sz; ++i) { |
131 | LIBC_NAMESPACE::internal::HashState state{0xa1b2c3d4e5f6a7b8}; |
132 | state.update(ptr: &i, size: sizeof(i)); |
133 | uint64_t hash = state.finish(); |
134 | counters.data[hash >> 57]++; |
135 | } |
136 | for (size_t i = 0; i < sz; ++i) { |
137 | ASSERT_GE(counters.data[i], size_t{140}); |
138 | ASSERT_LE(counters.data[i], size_t{260}); |
139 | } |
140 | } |
141 | |