1//===-- Unittests for hash ------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "src/__support/CPP/new.h"
10#include "src/__support/hash.h"
11#include "src/stdlib/rand.h"
12#include "src/stdlib/srand.h"
13#include "src/string/memset.h"
14#include "test/UnitTest/Test.h"
15
16template <class T> struct AlignedMemory {
17 T *data;
18 size_t offset;
19 std::align_val_t alignment;
20 AlignedMemory(size_t size, size_t alignment, size_t offset)
21 : offset(offset), alignment{alignment} {
22 size_t sz = size * sizeof(T);
23 size_t aligned = sz + ((-sz) & (alignment - 1)) + alignment;
24 LIBC_NAMESPACE::AllocChecker ac;
25 data = static_cast<T *>(operator new(aligned, this->alignment, ac));
26 data += offset % alignment;
27 }
28 ~AlignedMemory() { operator delete(data - offset, alignment); }
29};
30
31size_t sizes[] = {0, 1, 23, 59, 1024, 5261};
32uint8_t values[] = {0, 1, 23, 59, 102, 255};
33
34// Hash value should not change with different alignments.
35TEST(LlvmLibcHashTest, SanityCheck) {
36 for (size_t sz : sizes) {
37 for (uint8_t val : values) {
38 uint64_t hash;
39 {
40 AlignedMemory<char> mem(sz, 64, 0);
41 LIBC_NAMESPACE::memset(ptr: mem.data, value: val, count: sz);
42 LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef};
43 state.update(ptr: mem.data, size: sz);
44 hash = state.finish();
45 }
46 for (size_t offset = 1; offset < 64; ++offset) {
47 AlignedMemory<char> mem(sz, 64, offset);
48 LIBC_NAMESPACE::memset(ptr: mem.data, value: val, count: sz);
49 LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef};
50 state.update(ptr: mem.data, size: sz);
51 ASSERT_EQ(hash, state.finish());
52 }
53 }
54 }
55}
56
57static inline size_t popcnt(uint64_t x) {
58 size_t count = 0;
59 while (x) {
60 count += x & 1;
61 x >>= 1;
62 }
63 return count;
64}
65
66// Mutate a single bit in a rather large input. The hash should change
67// significantly. At least one fifth of the bits should not match.
68TEST(LlvmLibcHashTest, Avalanche) {
69 for (size_t sz : sizes) {
70 for (uint8_t val : values) {
71 uint64_t hash;
72 AlignedMemory<char> mem(sz, 64, 0);
73 LIBC_NAMESPACE::memset(ptr: mem.data, value: val, count: sz);
74 {
75 LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890};
76 state.update(ptr: mem.data, size: sz);
77 hash = state.finish();
78 }
79 for (size_t i = 0; i < sz; ++i) {
80 for (size_t j = 0; j < 8; ++j) {
81 uint8_t mask = 1 << j;
82 mem.data[i] ^= mask;
83 {
84 LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890};
85 state.update(ptr: mem.data, size: sz);
86 uint64_t new_hash = state.finish();
87 ASSERT_GE(popcnt(hash ^ new_hash), size_t{13});
88 }
89 mem.data[i] ^= mask;
90 }
91 }
92 }
93 }
94}
95
96// Hash a random sequence of input. The LSB should be uniform enough such that
97// values spread across the entire range.
98TEST(LlvmLibcHashTest, UniformLSB) {
99 LIBC_NAMESPACE::srand(seed: 0xffffffff);
100 for (size_t sz : sizes) {
101 AlignedMemory<size_t> counters(sz, sizeof(size_t), 0);
102 LIBC_NAMESPACE::memset(ptr: counters.data, value: 0, count: sz * sizeof(size_t));
103 for (size_t i = 0; i < 200 * sz; ++i) {
104 int randomness[8] = {LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(),
105 LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(),
106 LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(),
107 LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand()};
108 {
109 LIBC_NAMESPACE::internal::HashState state{0x1a2b3c4d5e6f7a8b};
110 state.update(ptr: randomness, size: sizeof(randomness));
111 uint64_t hash = state.finish();
112 counters.data[hash % sz]++;
113 }
114 }
115 for (size_t i = 0; i < sz; ++i) {
116 ASSERT_GE(counters.data[i], size_t{140});
117 ASSERT_LE(counters.data[i], size_t{260});
118 }
119 }
120}
121
122// Hash a low entropy sequence. The MSB should be uniform enough such that
123// there is no significant bias even if the value range is small.
124// Top 7 bits is examined because it will be used as a secondary key in
125// the hash table.
126TEST(LlvmLibcHashTest, UniformMSB) {
127 size_t sz = 1 << 7;
128 AlignedMemory<size_t> counters(sz, sizeof(size_t), 0);
129 LIBC_NAMESPACE::memset(ptr: counters.data, value: 0, count: sz * sizeof(size_t));
130 for (size_t i = 0; i < 200 * sz; ++i) {
131 LIBC_NAMESPACE::internal::HashState state{0xa1b2c3d4e5f6a7b8};
132 state.update(ptr: &i, size: sizeof(i));
133 uint64_t hash = state.finish();
134 counters.data[hash >> 57]++;
135 }
136 for (size_t i = 0; i < sz; ++i) {
137 ASSERT_GE(counters.data[i], size_t{140});
138 ASSERT_LE(counters.data[i], size_t{260});
139 }
140}
141

source code of libc/test/src/__support/hash_test.cpp