hash_test.cpp source code [libc/test/src/__support/hash_test.cpp]

1	//===-- Unittests for hash ------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "src/__support/CPP/new.h"
10	#include "src/__support/hash.h"
11	#include "src/stdlib/rand.h"
12	#include "src/stdlib/srand.h"
13	#include "src/string/memset.h"
14	#include "test/UnitTest/Test.h"
15
16	template <class T> struct AlignedMemory {
17	T *data;
18	size_t offset;
19	std::align_val_t alignment;
20	AlignedMemory(size_t size, size_t alignment, size_t offset)
21	: offset(offset), alignment{alignment} {
22	size_t sz = size * sizeof(T);
23	size_t aligned = sz + ((-sz) & (alignment - `1`)) + alignment;
24	LIBC_NAMESPACE::AllocChecker ac;
25	data = static_cast<T >(operator* new(aligned, this->alignment, ac));
26	data += offset % alignment;
27	}
28	~AlignedMemory() { operator delete(data - offset, alignment); }
29	};
30
31	size_t sizes[] = {`0`, `1`, `23`, `59`, `1024`, `5261`};
32	uint8_t values[] = {`0`, `1`, `23`, `59`, `102`, `255`};
33
34	// Hash value should not change with different alignments.
35	TEST(LlvmLibcHashTest, SanityCheck) {
36	for (size_t sz : sizes) {
37	for (uint8_t val : values) {
38	uint64_t hash;
39	{
40	AlignedMemory<char> mem(sz, `64`, `0`);
41	LIBC_NAMESPACE::memset(mem.data, val, sz);
42	LIBC_NAMESPACE::internal::HashState state{`0x1234567890abcdef`};
43	state.update(mem.data, sz);
44	hash = state.finish();
45	}
46	for (size_t offset = `1`; offset < `64`; ++offset) {
47	AlignedMemory<char> mem(sz, `64`, offset);
48	LIBC_NAMESPACE::memset(mem.data, val, sz);
49	LIBC_NAMESPACE::internal::HashState state{`0x1234567890abcdef`};
50	state.update(mem.data, sz);
51	ASSERT_EQ(hash, state.finish());
52	}
53	}
54	}
55	}
56
57	static inline size_t popcnt(uint64_t x) {
58	size_t count = `0`;
59	while (x) {
60	count += x & `1`;
61	x >>= `1`;
62	}
63	return count;
64	}
65
66	// Mutate a single bit in a rather large input. The hash should change
67	// significantly. At least one fifth of the bits should not match.
68	TEST(LlvmLibcHashTest, Avalanche) {
69	for (size_t sz : sizes) {
70	for (uint8_t val : values) {
71	uint64_t hash;
72	AlignedMemory<char> mem(sz, `64`, `0`);
73	LIBC_NAMESPACE::memset(mem.data, val, sz);
74	{
75	LIBC_NAMESPACE::internal::HashState state{`0xabcdef1234567890`};
76	state.update(mem.data, sz);
77	hash = state.finish();
78	}
79	for (size_t i = `0`; i < sz; ++i) {
80	for (size_t j = `0`; j < `8`; ++j) {
81	uint8_t mask = static_cast<uint8_t>(`1` << j);
82	mem.data[i] ^= mask;
83	{
84	LIBC_NAMESPACE::internal::HashState state{`0xabcdef1234567890`};
85	state.update(mem.data, sz);
86	uint64_t new_hash = state.finish();
87	ASSERT_GE(popcnt(hash ^ new_hash), size_t{`13`});
88	}
89	mem.data[i] ^= mask;
90	}
91	}
92	}
93	}
94	}
95
96	// Hash a random sequence of input. The LSB should be uniform enough such that
97	// values spread across the entire range.
98	TEST(LlvmLibcHashTest, UniformLSB) {
99	LIBC_NAMESPACE::srand(`0xffffffff`);
100	for (size_t sz : sizes) {
101	AlignedMemory<size_t> counters(sz, sizeof(size_t), `0`);
102	LIBC_NAMESPACE::memset(counters.data, `0`, sz * sizeof(size_t));
103	for (size_t i = `0`; i < `200` * sz; ++i) {
104	int randomness[`8`] = {LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(),
105	LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(),
106	LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(),
107	LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand()};
108	{
109	LIBC_NAMESPACE::internal::HashState state{`0x1a2b3c4d5e6f7a8b`};
110	state.update(randomness, sizeof(randomness));
111	uint64_t hash = state.finish();
112	counters.data[hash % sz]++;
113	}
114	}
115	for (size_t i = `0`; i < sz; ++i) {
116	ASSERT_GE(counters.data[i], size_t{`140`});
117	ASSERT_LE(counters.data[i], size_t{`260`});
118	}
119	}
120	}
121
122	// Hash a low entropy sequence. The MSB should be uniform enough such that
123	// there is no significant bias even if the value range is small.
124	// Top 7 bits is examined because it will be used as a secondary key in
125	// the hash table.
126	TEST(LlvmLibcHashTest, UniformMSB) {
127	size_t sz = `1` << `7`;
128	AlignedMemory<size_t> counters(sz, sizeof(size_t), `0`);
129	LIBC_NAMESPACE::memset(counters.data, `0`, sz * sizeof(size_t));
130	for (size_t i = `0`; i < `200` * sz; ++i) {
131	LIBC_NAMESPACE::internal::HashState state{`0xa1b2c3d4e5f6a7b8`};
132	state.update(&i, sizeof(i));
133	uint64_t hash = state.finish();
134	counters.data[hash >> `57`]++;
135	}
136	for (size_t i = `0`; i < sz; ++i) {
137	ASSERT_GE(counters.data[i], size_t{`140`});
138	ASSERT_LE(counters.data[i], size_t{`260`});
139	}
140	}
141

source code of libc/test/src/__support/hash_test.cpp