| 1 | //===-- Unittests for hash ------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "src/__support/CPP/new.h" |
| 10 | #include "src/__support/hash.h" |
| 11 | #include "src/stdlib/rand.h" |
| 12 | #include "src/stdlib/srand.h" |
| 13 | #include "src/string/memset.h" |
| 14 | #include "test/UnitTest/Test.h" |
| 15 | |
| 16 | template <class T> struct AlignedMemory { |
| 17 | T *data; |
| 18 | size_t offset; |
| 19 | std::align_val_t alignment; |
| 20 | AlignedMemory(size_t size, size_t alignment, size_t offset) |
| 21 | : offset(offset), alignment{alignment} { |
| 22 | size_t sz = size * sizeof(T); |
| 23 | size_t aligned = sz + ((-sz) & (alignment - 1)) + alignment; |
| 24 | LIBC_NAMESPACE::AllocChecker ac; |
| 25 | data = static_cast<T *>(operator new(aligned, this->alignment, ac)); |
| 26 | data += offset % alignment; |
| 27 | } |
| 28 | ~AlignedMemory() { operator delete(data - offset, alignment); } |
| 29 | }; |
| 30 | |
| 31 | size_t sizes[] = {0, 1, 23, 59, 1024, 5261}; |
| 32 | uint8_t values[] = {0, 1, 23, 59, 102, 255}; |
| 33 | |
| 34 | // Hash value should not change with different alignments. |
| 35 | TEST(LlvmLibcHashTest, SanityCheck) { |
| 36 | for (size_t sz : sizes) { |
| 37 | for (uint8_t val : values) { |
| 38 | uint64_t hash; |
| 39 | { |
| 40 | AlignedMemory<char> mem(sz, 64, 0); |
| 41 | LIBC_NAMESPACE::memset(mem.data, val, sz); |
| 42 | LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef}; |
| 43 | state.update(mem.data, sz); |
| 44 | hash = state.finish(); |
| 45 | } |
| 46 | for (size_t offset = 1; offset < 64; ++offset) { |
| 47 | AlignedMemory<char> mem(sz, 64, offset); |
| 48 | LIBC_NAMESPACE::memset(mem.data, val, sz); |
| 49 | LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef}; |
| 50 | state.update(mem.data, sz); |
| 51 | ASSERT_EQ(hash, state.finish()); |
| 52 | } |
| 53 | } |
| 54 | } |
| 55 | } |
| 56 | |
| 57 | static inline size_t popcnt(uint64_t x) { |
| 58 | size_t count = 0; |
| 59 | while (x) { |
| 60 | count += x & 1; |
| 61 | x >>= 1; |
| 62 | } |
| 63 | return count; |
| 64 | } |
| 65 | |
| 66 | // Mutate a single bit in a rather large input. The hash should change |
| 67 | // significantly. At least one fifth of the bits should not match. |
| 68 | TEST(LlvmLibcHashTest, Avalanche) { |
| 69 | for (size_t sz : sizes) { |
| 70 | for (uint8_t val : values) { |
| 71 | uint64_t hash; |
| 72 | AlignedMemory<char> mem(sz, 64, 0); |
| 73 | LIBC_NAMESPACE::memset(mem.data, val, sz); |
| 74 | { |
| 75 | LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890}; |
| 76 | state.update(mem.data, sz); |
| 77 | hash = state.finish(); |
| 78 | } |
| 79 | for (size_t i = 0; i < sz; ++i) { |
| 80 | for (size_t j = 0; j < 8; ++j) { |
| 81 | uint8_t mask = static_cast<uint8_t>(1 << j); |
| 82 | mem.data[i] ^= mask; |
| 83 | { |
| 84 | LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890}; |
| 85 | state.update(mem.data, sz); |
| 86 | uint64_t new_hash = state.finish(); |
| 87 | ASSERT_GE(popcnt(hash ^ new_hash), size_t{13}); |
| 88 | } |
| 89 | mem.data[i] ^= mask; |
| 90 | } |
| 91 | } |
| 92 | } |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | // Hash a random sequence of input. The LSB should be uniform enough such that |
| 97 | // values spread across the entire range. |
| 98 | TEST(LlvmLibcHashTest, UniformLSB) { |
| 99 | LIBC_NAMESPACE::srand(0xffffffff); |
| 100 | for (size_t sz : sizes) { |
| 101 | AlignedMemory<size_t> counters(sz, sizeof(size_t), 0); |
| 102 | LIBC_NAMESPACE::memset(counters.data, 0, sz * sizeof(size_t)); |
| 103 | for (size_t i = 0; i < 200 * sz; ++i) { |
| 104 | int randomness[8] = {LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(), |
| 105 | LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(), |
| 106 | LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(), |
| 107 | LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand()}; |
| 108 | { |
| 109 | LIBC_NAMESPACE::internal::HashState state{0x1a2b3c4d5e6f7a8b}; |
| 110 | state.update(randomness, sizeof(randomness)); |
| 111 | uint64_t hash = state.finish(); |
| 112 | counters.data[hash % sz]++; |
| 113 | } |
| 114 | } |
| 115 | for (size_t i = 0; i < sz; ++i) { |
| 116 | ASSERT_GE(counters.data[i], size_t{140}); |
| 117 | ASSERT_LE(counters.data[i], size_t{260}); |
| 118 | } |
| 119 | } |
| 120 | } |
| 121 | |
| 122 | // Hash a low entropy sequence. The MSB should be uniform enough such that |
| 123 | // there is no significant bias even if the value range is small. |
| 124 | // Top 7 bits is examined because it will be used as a secondary key in |
| 125 | // the hash table. |
| 126 | TEST(LlvmLibcHashTest, UniformMSB) { |
| 127 | size_t sz = 1 << 7; |
| 128 | AlignedMemory<size_t> counters(sz, sizeof(size_t), 0); |
| 129 | LIBC_NAMESPACE::memset(counters.data, 0, sz * sizeof(size_t)); |
| 130 | for (size_t i = 0; i < 200 * sz; ++i) { |
| 131 | LIBC_NAMESPACE::internal::HashState state{0xa1b2c3d4e5f6a7b8}; |
| 132 | state.update(&i, sizeof(i)); |
| 133 | uint64_t hash = state.finish(); |
| 134 | counters.data[hash >> 57]++; |
| 135 | } |
| 136 | for (size_t i = 0; i < sz; ++i) { |
| 137 | ASSERT_GE(counters.data[i], size_t{140}); |
| 138 | ASSERT_LE(counters.data[i], size_t{260}); |
| 139 | } |
| 140 | } |
| 141 | |