Hashing.h source code [include/llvm-17/llvm/ADT/Hashing.h]

1	//===-- llvm/ADT/Hashing.h - Utilities for hashing --------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the newly proposed standard C++ interfaces for hashing
10	// arbitrary data and building hash functions for user-defined types. This
11	// interface was originally proposed in N3333[1] and is currently under review
12	// for inclusion in a future TR and/or standard.
13	//
14	// The primary interfaces provide are comprised of one type and three functions:
15	//
16	// -- 'hash_code' class is an opaque type representing the hash code for some
17	// data. It is the intended product of hashing, and can be used to implement
18	// hash tables, checksumming, and other common uses of hashes. It is not an
19	// integer type (although it can be converted to one) because it is risky
20	// to assume much about the internals of a hash_code. In particular, each
21	// execution of the program has a high probability of producing a different
22	// hash_code for a given input. Thus their values are not stable to save or
23	// persist, and should only be used during the execution for the
24	// construction of hashing datastructures.
25	//
26	// -- 'hash_value' is a function designed to be overloaded for each
27	// user-defined type which wishes to be used within a hashing context. It
28	// should be overloaded within the user-defined type's namespace and found
29	// via ADL. Overloads for primitive types are provided by this library.
30	//
31	// -- 'hash_combine' and 'hash_combine_range' are functions designed to aid
32	// programmers in easily and intuitively combining a set of data into
33	// a single hash_code for their object. They should only logically be used
34	// within the implementation of a 'hash_value' routine or similar context.
35	//
36	// Note that 'hash_combine_range' contains very special logic for hashing
37	// a contiguous array of integers or pointers. This logic is extremely* fast,*
38	// on a modern Intel "Gainestown" Xeon (Nehalem uarch) @2.2 GHz, these were
39	// benchmarked at over 6.5 GiB/s for large keys, and <20 cycles/hash for keys
40	// under 32-bytes.
41	//
42	//===----------------------------------------------------------------------===//
43
44	#ifndef LLVM_ADT_HASHING_H
45	#define LLVM_ADT_HASHING_H
46
47	#include "llvm/Support/DataTypes.h"
48	#include "llvm/Support/ErrorHandling.h"
49	#include "llvm/Support/SwapByteOrder.h"
50	#include "llvm/Support/type_traits.h"
51	#include <algorithm>
52	#include <cassert>
53	#include <cstring>
54	#include <optional>
55	#include <string>
56	#include <tuple>
57	#include <utility>
58
59	namespace llvm {
60	template <typename T, typename Enable> struct DenseMapInfo;
61
62	/// An opaque object representing a hash code.
63	///
64	/// This object represents the result of hashing some entity. It is intended to
65	/// be used to implement hashtables or other hashing-based data structures.
66	/// While it wraps and exposes a numeric value, this value should not be
67	/// trusted to be stable or predictable across processes or executions.
68	///
69	/// In order to obtain the hash_code for an object 'x':
70	/// \code
71	/// using llvm::hash_value;
72	/// llvm::hash_code code = hash_value(x);
73	/// \endcode
74	class hash_code {
75	size_t value;
76
77	public:
78	/// Default construct a hash_code.
79	/// Note that this leaves the value uninitialized.
80	hash_code() = default;
81
82	/// Form a hash code directly from a numerical value.
83	hash_code(size_t value) : value(value) {}
84
85	/// Convert the hash code to its numerical value for use.
86	/explicit/ operator size_t() const { return value; }
87
88	friend bool operator==(const hash_code &lhs, const hash_code &rhs) {
89	return lhs.value == rhs.value;
90	}
91	friend bool operator!=(const hash_code &lhs, const hash_code &rhs) {
92	return lhs.value != rhs.value;
93	}
94
95	/// Allow a hash_code to be directly run through hash_value.
96	friend size_t hash_value(const hash_code &code) { return code.value; }
97	};
98
99	/// Compute a hash_code for any integer value.
100	///
101	/// Note that this function is intended to compute the same hash_code for
102	/// a particular value without regard to the pre-promotion type. This is in
103	/// contrast to hash_combine which may produce different hash_codes for
104	/// differing argument types even if they would implicit promote to a common
105	/// type without changing the value.
106	template <typename T>
107	std::enable_if_t<is_integral_or_enum<T>::value, hash_code> hash_value(T value);
108
109	/// Compute a hash_code for a pointer's address.
110	///
111	/// N.B.: This hashes the address. Not the value and not the type.
112	template <typename T> hash_code hash_value(const T *ptr);
113
114	/// Compute a hash_code for a pair of objects.
115	template <typename T, typename U>
116	hash_code hash_value(const std::pair<T, U> &arg);
117
118	/// Compute a hash_code for a tuple.
119	template <typename... Ts>
120	hash_code hash_value(const std::tuple<Ts...> &arg);
121
122	/// Compute a hash_code for a standard string.
123	template <typename T>
124	hash_code hash_value(const std::basic_string<T> &arg);
125
126	/// Compute a hash_code for a standard string.
127	template <typename T> hash_code hash_value(const std::optional<T> &arg);
128
129	/// Override the execution seed with a fixed value.
130	///
131	/// This hashing library uses a per-execution seed designed to change on each
132	/// run with high probability in order to ensure that the hash codes are not
133	/// attackable and to ensure that output which is intended to be stable does
134	/// not rely on the particulars of the hash codes produced.
135	///
136	/// That said, there are use cases where it is important to be able to
137	/// reproduce exactly* a specific behavior. To that end, we provide a function*
138	/// which will forcibly set the seed to a fixed value. This must be done at the
139	/// start of the program, before any hashes are computed. Also, it cannot be
140	/// undone. This makes it thread-hostile and very hard to use outside of
141	/// immediately on start of a simple program designed for reproducible
142	/// behavior.
143	void set_fixed_execution_hash_seed(uint64_t fixed_value);
144
145
146	// All of the implementation details of actually computing the various hash
147	// code values are held within this namespace. These routines are included in
148	// the header file mainly to allow inlining and constant propagation.
149	namespace hashing {
150	namespace detail {
151
152	inline uint64_t fetch64(const char *p) {
153	uint64_t result;
154	memcpy(dest: &result, src: p, n: sizeof(result));
155	if (sys::IsBigEndianHost)
156	sys::swapByteOrder(Value&: result);
157	return result;
158	}
159
160	inline uint32_t fetch32(const char *p) {
161	uint32_t result;
162	memcpy(dest: &result, src: p, n: sizeof(result));
163	if (sys::IsBigEndianHost)
164	sys::swapByteOrder(Value&: result);
165	return result;
166	}
167
168	/// Some primes between 2^63 and 2^64 for various uses.
169	static constexpr uint64_t k0 = `0xc3a5c85c97cb3127ULL`;
170	static constexpr uint64_t k1 = `0xb492b66fbe98f273ULL`;
171	static constexpr uint64_t k2 = `0x9ae16a3b2f90404fULL`;
172	static constexpr uint64_t k3 = `0xc949d7c7509e6557ULL`;
173
174	/// Bitwise right rotate.
175	/// Normally this will compile to a single instruction, especially if the
176	/// shift is a manifest constant.
177	inline uint64_t rotate(uint64_t val, size_t shift) {
178	// Avoid shifting by 64: doing so yields an undefined result.
179	return shift == `0` ? val : ((val >> shift) \| (val << (`64` - shift)));
180	}
181
182	inline uint64_t shift_mix(uint64_t val) {
183	return val ^ (val >> `47`);
184	}
185
186	inline uint64_t hash_16_bytes(uint64_t low, uint64_t high) {
187	// Murmur-inspired hashing.
188	const uint64_t kMul = `0x9ddfea08eb382d69ULL`;
189	uint64_t a = (low ^ high) * kMul;
190	a ^= (a >> `47`);
191	uint64_t b = (high ^ a) * kMul;
192	b ^= (b >> `47`);
193	b *= kMul;
194	return b;
195	}
196
197	inline uint64_t hash_1to3_bytes(const char *s, size_t len, uint64_t seed) {
198	uint8_t a = s[`0`];
199	uint8_t b = s[len >> `1`];
200	uint8_t c = s[len - `1`];
201	uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << `8`);
202	uint32_t z = static_cast<uint32_t>(len) + (static_cast<uint32_t>(c) << `2`);
203	return shift_mix(val: y * k2 ^ z * k3 ^ seed) * k2;
204	}
205
206	inline uint64_t hash_4to8_bytes(const char *s, size_t len, uint64_t seed) {
207	uint64_t a = fetch32(p: s);
208	return hash_16_bytes(low: len + (a << `3`), high: seed ^ fetch32(p: s + len - `4`));
209	}
210
211	inline uint64_t hash_9to16_bytes(const char *s, size_t len, uint64_t seed) {
212	uint64_t a = fetch64(p: s);
213	uint64_t b = fetch64(p: s + len - `8`);
214	return hash_16_bytes(low: seed ^ a, high: rotate(val: b + len, shift: len)) ^ b;
215	}
216
217	inline uint64_t hash_17to32_bytes(const char *s, size_t len, uint64_t seed) {
218	uint64_t a = fetch64(p: s) * k1;
219	uint64_t b = fetch64(p: s + `8`);
220	uint64_t c = fetch64(p: s + len - `8`) * k2;
221	uint64_t d = fetch64(p: s + len - `16`) * k0;
222	return hash_16_bytes(low: llvm::rotr<uint64_t>(V: a - b, R: `43`) +
223	llvm::rotr<uint64_t>(V: c ^ seed, R: `30`) + d,
224	high: a + llvm::rotr<uint64_t>(V: b ^ k3, R: `20`) - c + len + seed);
225	}
226
227	inline uint64_t hash_33to64_bytes(const char *s, size_t len, uint64_t seed) {
228	uint64_t z = fetch64(p: s + `24`);
229	uint64_t a = fetch64(p: s) + (len + fetch64(p: s + len - `16`)) * k0;
230	uint64_t b = llvm::rotr<uint64_t>(V: a + z, R: `52`);
231	uint64_t c = llvm::rotr<uint64_t>(V: a, R: `37`);
232	a += fetch64(p: s + `8`);
233	c += llvm::rotr<uint64_t>(V: a, R: `7`);
234	a += fetch64(p: s + `16`);
235	uint64_t vf = a + z;
236	uint64_t vs = b + llvm::rotr<uint64_t>(V: a, R: `31`) + c;
237	a = fetch64(p: s + `16`) + fetch64(p: s + len - `32`);
238	z = fetch64(p: s + len - `8`);
239	b = llvm::rotr<uint64_t>(V: a + z, R: `52`);
240	c = llvm::rotr<uint64_t>(V: a, R: `37`);
241	a += fetch64(p: s + len - `24`);
242	c += llvm::rotr<uint64_t>(V: a, R: `7`);
243	a += fetch64(p: s + len - `16`);
244	uint64_t wf = a + z;
245	uint64_t ws = b + llvm::rotr<uint64_t>(V: a, R: `31`) + c;
246	uint64_t r = shift_mix(val: (vf + ws) * k2 + (wf + vs) * k0);
247	return shift_mix(val: (seed ^ (r * k0)) + vs) * k2;
248	}
249
250	inline uint64_t hash_short(const char *s, size_t length, uint64_t seed) {
251	if (length >= `4` && length <= `8`)
252	return hash_4to8_bytes(s, len: length, seed);
253	if (length > `8` && length <= `16`)
254	return hash_9to16_bytes(s, len: length, seed);
255	if (length > `16` && length <= `32`)
256	return hash_17to32_bytes(s, len: length, seed);
257	if (length > `32`)
258	return hash_33to64_bytes(s, len: length, seed);
259	if (length != `0`)
260	return hash_1to3_bytes(s, len: length, seed);
261
262	return k2 ^ seed;
263	}
264
265	/// The intermediate state used during hashing.
266	/// Currently, the algorithm for computing hash codes is based on CityHash and
267	/// keeps 56 bytes of arbitrary state.
268	struct hash_state {
269	uint64_t h0 = `0`, h1 = `0`, h2 = `0`, h3 = `0`, h4 = `0`, h5 = `0`, h6 = `0`;
270
271	/// Create a new hash_state structure and initialize it based on the
272	/// seed and the first 64-byte chunk.
273	/// This effectively performs the initial mix.
274	static hash_state create(const char *s, uint64_t seed) {
275	hash_state state = {.h0: `0`,
276	.h1: seed,
277	.h2: hash_16_bytes(low: seed, high: k1),
278	.h3: llvm::rotr<uint64_t>(V: seed ^ k1, R: `49`),
279	.h4: seed * k1,
280	.h5: shift_mix(val: seed),
281	.h6: `0`};
282	state.h6 = hash_16_bytes(low: state.h4, high: state.h5);
283	state.mix(s);
284	return state;
285	}
286
287	/// Mix 32-bytes from the input sequence into the 16-bytes of 'a'
288	/// and 'b', including whatever is already in 'a' and 'b'.
289	static void mix_32_bytes(const char *s, uint64_t &a, uint64_t &b) {
290	a += fetch64(p: s);
291	uint64_t c = fetch64(p: s + `24`);
292	b = llvm::rotr<uint64_t>(V: b + a + c, R: `21`);
293	uint64_t d = a;
294	a += fetch64(p: s + `8`) + fetch64(p: s + `16`);
295	b += llvm::rotr<uint64_t>(V: a, R: `44`) + d;
296	a += c;
297	}
298
299	/// Mix in a 64-byte buffer of data.
300	/// We mix all 64 bytes even when the chunk length is smaller, but we
301	/// record the actual length.
302	void mix(const char *s) {
303	h0 = llvm::rotr<uint64_t>(V: h0 + h1 + h3 + fetch64(p: s + `8`), R: `37`) * k1;
304	h1 = llvm::rotr<uint64_t>(V: h1 + h4 + fetch64(p: s + `48`), R: `42`) * k1;
305	h0 ^= h6;
306	h1 += h3 + fetch64(p: s + `40`);
307	h2 = llvm::rotr<uint64_t>(V: h2 + h5, R: `33`) * k1;
308	h3 = h4 * k1;
309	h4 = h0 + h5;
310	mix_32_bytes(s, a&: h3, b&: h4);
311	h5 = h2 + h6;
312	h6 = h1 + fetch64(p: s + `16`);
313	mix_32_bytes(s: s + `32`, a&: h5, b&: h6);
314	std::swap(a&: h2, b&: h0);
315	}
316
317	/// Compute the final 64-bit hash code value based on the current
318	/// state and the length of bytes hashed.
319	uint64_t finalize(size_t length) {
320	return hash_16_bytes(low: hash_16_bytes(low: h3, high: h5) + shift_mix(val: h1) * k1 + h2,
321	high: hash_16_bytes(low: h4, high: h6) + shift_mix(val: length) * k1 + h0);
322	}
323	};
324
325
326	/// A global, fixed seed-override variable.
327	///
328	/// This variable can be set using the \see llvm::set_fixed_execution_seed
329	/// function. See that function for details. Do not, under any circumstances,
330	/// set or read this variable.
331	extern uint64_t fixed_seed_override;
332
333	inline uint64_t get_execution_seed() {
334	// FIXME: This needs to be a per-execution seed. This is just a placeholder
335	// implementation. Switching to a per-execution seed is likely to flush out
336	// instability bugs and so will happen as its own commit.
337	//
338	// However, if there is a fixed seed override set the first time this is
339	// called, return that instead of the per-execution seed.
340	const uint64_t seed_prime = `0xff51afd7ed558ccdULL`;
341	static uint64_t seed = fixed_seed_override ? fixed_seed_override : seed_prime;
342	return seed;
343	}
344
345
346	/// Trait to indicate whether a type's bits can be hashed directly.
347	///
348	/// A type trait which is true if we want to combine values for hashing by
349	/// reading the underlying data. It is false if values of this type must
350	/// first be passed to hash_value, and the resulting hash_codes combined.
351	//
352	// FIXME: We want to replace is_integral_or_enum and is_pointer here with
353	// a predicate which asserts that comparing the underlying storage of two
354	// values of the type for equality is equivalent to comparing the two values
355	// for equality. For all the platforms we care about, this holds for integers
356	// and pointers, but there are platforms where it doesn't and we would like to
357	// support user-defined types which happen to satisfy this property.
358	template <typename T> struct is_hashable_data
359	: std::integral_constant<bool, ((is_integral_or_enum<T>::value \|\|
360	std::is_pointer<T>::value) &&
361	`64` % sizeof(T) == `0`)> {};
362
363	// Special case std::pair to detect when both types are viable and when there
364	// is no alignment-derived padding in the pair. This is a bit of a lie because
365	// std::pair isn't truly POD, but it's close enough in all reasonable
366	// implementations for our use case of hashing the underlying data.
367	template <typename T, typename U> struct is_hashable_data<std::pair<T, U> >
368	: std::integral_constant<bool, (is_hashable_data<T>::value &&
369	is_hashable_data<U>::value &&
370	(sizeof(T) + sizeof(U)) ==
371	sizeof(std::pair<T, U>))> {};
372
373	/// Helper to get the hashable data representation for a type.
374	/// This variant is enabled when the type itself can be used.
375	template <typename T>
376	std::enable_if_t<is_hashable_data<T>::value, T>
377	get_hashable_data(const T &value) {
378	return value;
379	}
380	/// Helper to get the hashable data representation for a type.
381	/// This variant is enabled when we must first call hash_value and use the
382	/// result as our data.
383	template <typename T>
384	std::enable_if_t<!is_hashable_data<T>::value, size_t>
385	get_hashable_data(const T &value) {
386	using ::llvm::hash_value;
387	return hash_value(value);
388	}
389
390	/// Helper to store data from a value into a buffer and advance the
391	/// pointer into that buffer.
392	///
393	/// This routine first checks whether there is enough space in the provided
394	/// buffer, and if not immediately returns false. If there is space, it
395	/// copies the underlying bytes of value into the buffer, advances the
396	/// buffer_ptr past the copied bytes, and returns true.
397	template <typename T>
398	bool store_and_advance(char &buffer_ptr, char* buffer_end, const* T& value,
399	size_t offset = `0`) {
400	size_t store_size = sizeof(value) - offset;
401	if (buffer_ptr + store_size > buffer_end)
402	return false;
403	const char value_data = reinterpret_cast<const* char *>(&value);
404	memcpy(dest: buffer_ptr, src: value_data + offset, n: store_size);
405	buffer_ptr += store_size;
406	return true;
407	}
408
409	/// Implement the combining of integral values into a hash_code.
410	///
411	/// This overload is selected when the value type of the iterator is
412	/// integral. Rather than computing a hash_code for each object and then
413	/// combining them, this (as an optimization) directly combines the integers.
414	template <typename InputIteratorT>
415	hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) {
416	const uint64_t seed = get_execution_seed();
417	char buffer[`64`], *buffer_ptr = buffer;
418	char *const buffer_end = std::end(arr&: buffer);
419	while (first != last && store_and_advance(buffer_ptr, buffer_end,
420	get_hashable_data(*first)))
421	++first;
422	if (first == last)
423	return hash_short(s: buffer, length: buffer_ptr - buffer, seed);
424	assert(buffer_ptr == buffer_end);
425
426	hash_state state = state.create(s: buffer, seed);
427	size_t length = `64`;
428	while (first != last) {
429	// Fill up the buffer. We don't clear it, which re-mixes the last round
430	// when only a partial 64-byte chunk is left.
431	buffer_ptr = buffer;
432	while (first != last && store_and_advance(buffer_ptr, buffer_end,
433	get_hashable_data(*first)))
434	++first;
435
436	// Rotate the buffer if we did a partial fill in order to simulate doing
437	// a mix of the last 64-bytes. That is how the algorithm works when we
438	// have a contiguous byte sequence, and we want to emulate that here.
439	std::rotate(first: buffer, middle: buffer_ptr, last: buffer_end);
440
441	// Mix this chunk into the current state.
442	state.mix(s: buffer);
443	length += buffer_ptr - buffer;
444	};
445
446	return state.finalize(length);
447	}
448
449	/// Implement the combining of integral values into a hash_code.
450	///
451	/// This overload is selected when the value type of the iterator is integral
452	/// and when the input iterator is actually a pointer. Rather than computing
453	/// a hash_code for each object and then combining them, this (as an
454	/// optimization) directly combines the integers. Also, because the integers
455	/// are stored in contiguous memory, this routine avoids copying each value
456	/// and directly reads from the underlying memory.
457	template <typename ValueT>
458	std::enable_if_t<is_hashable_data<ValueT>::value, hash_code>
459	hash_combine_range_impl(ValueT first, ValueT last) {
460	const uint64_t seed = get_execution_seed();
461	const char s_begin = reinterpret_cast<const* char *>(first);
462	const char s_end = reinterpret_cast<const* char *>(last);
463	const size_t length = std::distance(first: s_begin, last: s_end);
464	if (length <= `64`)
465	return hash_short(s: s_begin, length, seed);
466
467	const char *s_aligned_end = s_begin + (length & ~`63`);
468	hash_state state = state.create(s: s_begin, seed);
469	s_begin += `64`;
470	while (s_begin != s_aligned_end) {
471	state.mix(s: s_begin);
472	s_begin += `64`;
473	}
474	if (length & `63`)
475	state.mix(s: s_end - `64`);
476
477	return state.finalize(length);
478	}
479
480	} // namespace detail
481	} // namespace hashing
482
483
484	/// Compute a hash_code for a sequence of values.
485	///
486	/// This hashes a sequence of values. It produces the same hash_code as
487	/// 'hash_combine(a, b, c, ...)', but can run over arbitrary sized sequences
488	/// and is significantly faster given pointers and types which can be hashed as
489	/// a sequence of bytes.
490	template <typename InputIteratorT>
491	hash_code hash_combine_range(InputIteratorT first, InputIteratorT last) {
492	return ::llvm::hashing::detail::hash_combine_range_impl(first, last);
493	}
494
495
496	// Implementation details for hash_combine.
497	namespace hashing {
498	namespace detail {
499
500	/// Helper class to manage the recursive combining of hash_combine
501	/// arguments.
502	///
503	/// This class exists to manage the state and various calls involved in the
504	/// recursive combining of arguments used in hash_combine. It is particularly
505	/// useful at minimizing the code in the recursive calls to ease the pain
506	/// caused by a lack of variadic functions.
507	struct hash_combine_recursive_helper {
508	char buffer[`64`] = {};
509	hash_state state;
510	const uint64_t seed;
511
512	public:
513	/// Construct a recursive hash combining helper.
514	///
515	/// This sets up the state for a recursive hash combine, including getting
516	/// the seed and buffer setup.
517	hash_combine_recursive_helper()
518	: seed(get_execution_seed()) {}
519
520	/// Combine one chunk of data into the current in-flight hash.
521	///
522	/// This merges one chunk of data into the hash. First it tries to buffer
523	/// the data. If the buffer is full, it hashes the buffer into its
524	/// hash_state, empties it, and then merges the new chunk in. This also
525	/// handles cases where the data straddles the end of the buffer.
526	template <typename T>
527	char combine_data(size_t &length, char* buffer_ptr, char* *buffer_end, T data) {
528	if (!store_and_advance(buffer_ptr, buffer_end, data)) {
529	// Check for skew which prevents the buffer from being packed, and do
530	// a partial store into the buffer to fill it. This is only a concern
531	// with the variadic combine because that formation can have varying
532	// argument types.
533	size_t partial_store_size = buffer_end - buffer_ptr;
534	memcpy(buffer_ptr, &data, partial_store_size);
535
536	// If the store fails, our buffer is full and ready to hash. We have to
537	// either initialize the hash state (on the first full buffer) or mix
538	// this buffer into the existing hash state. Length tracks the hashed
539	// length, not the buffered length.
540	if (length == `0`) {
541	state = state.create(s: buffer, seed);
542	length = `64`;
543	} else {
544	// Mix this chunk into the current state and bump length up by 64.
545	state.mix(s: buffer);
546	length += `64`;
547	}
548	// Reset the buffer_ptr to the head of the buffer for the next chunk of
549	// data.
550	buffer_ptr = buffer;
551
552	// Try again to store into the buffer -- this cannot fail as we only
553	// store types smaller than the buffer.
554	if (!store_and_advance(buffer_ptr, buffer_end, data,
555	partial_store_size))
556	llvm_unreachable("buffer smaller than stored type");
557	}
558	return buffer_ptr;
559	}
560
561	/// Recursive, variadic combining method.
562	///
563	/// This function recurses through each argument, combining that argument
564	/// into a single hash.
565	template <typename T, typename ...Ts>
566	hash_code combine(size_t length, char buffer_ptr, char* *buffer_end,
567	const T &arg, const Ts &...args) {
568	buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg));
569
570	// Recurse to the next argument.
571	return combine(length, buffer_ptr, buffer_end, args...);
572	}
573
574	/// Base case for recursive, variadic combining.
575	///
576	/// The base case when combining arguments recursively is reached when all
577	/// arguments have been handled. It flushes the remaining buffer and
578	/// constructs a hash_code.
579	hash_code combine(size_t length, char buffer_ptr, char* *buffer_end) {
580	// Check whether the entire set of values fit in the buffer. If so, we'll
581	// use the optimized short hashing routine and skip state entirely.
582	if (length == `0`)
583	return hash_short(s: buffer, length: buffer_ptr - buffer, seed);
584
585	// Mix the final buffer, rotating it if we did a partial fill in order to
586	// simulate doing a mix of the last 64-bytes. That is how the algorithm
587	// works when we have a contiguous byte sequence, and we want to emulate
588	// that here.
589	std::rotate(first: buffer, middle: buffer_ptr, last: buffer_end);
590
591	// Mix this chunk into the current state.
592	state.mix(s: buffer);
593	length += buffer_ptr - buffer;
594
595	return state.finalize(length);
596	}
597	};
598
599	} // namespace detail
600	} // namespace hashing
601
602	/// Combine values into a single hash_code.
603	///
604	/// This routine accepts a varying number of arguments of any type. It will
605	/// attempt to combine them into a single hash_code. For user-defined types it
606	/// attempts to call a \see hash_value overload (via ADL) for the type. For
607	/// integer and pointer types it directly combines their data into the
608	/// resulting hash_code.
609	///
610	/// The result is suitable for returning from a user's hash_value
611	/// implementation* for their user-defined type. Consumers of a type should*
612	/// not* call this routine, they should instead call 'hash_value'.*
613	template <typename ...Ts> hash_code hash_combine(const Ts &...args) {
614	// Recursively hash each argument using a helper class.
615	::llvm::hashing::detail::hash_combine_recursive_helper helper;
616	return helper.combine(`0`, helper.buffer, helper.buffer + `64`, args...);
617	}
618
619	// Implementation details for implementations of hash_value overloads provided
620	// here.
621	namespace hashing {
622	namespace detail {
623
624	/// Helper to hash the value of a single integer.
625	///
626	/// Overloads for smaller integer types are not provided to ensure consistent
627	/// behavior in the presence of integral promotions. Essentially,
628	/// "hash_value('4')" and "hash_value('0' + 4)" should be the same.
629	inline hash_code hash_integer_value(uint64_t value) {
630	// Similar to hash_4to8_bytes but using a seed instead of length.
631	const uint64_t seed = get_execution_seed();
632	const char s = reinterpret_cast<const* char *>(&value);
633	const uint64_t a = fetch32(p: s);
634	return hash_16_bytes(low: seed + (a << `3`), high: fetch32(p: s + `4`));
635	}
636
637	} // namespace detail
638	} // namespace hashing
639
640	// Declared and documented above, but defined here so that any of the hashing
641	// infrastructure is available.
642	template <typename T>
643	std::enable_if_t<is_integral_or_enum<T>::value, hash_code> hash_value(T value) {
644	return ::llvm::hashing::detail::hash_integer_value(
645	value: static_cast<uint64_t>(value));
646	}
647
648	// Declared and documented above, but defined here so that any of the hashing
649	// infrastructure is available.
650	template <typename T> hash_code hash_value(const T *ptr) {
651	return ::llvm::hashing::detail::hash_integer_value(
652	value: reinterpret_cast<uintptr_t>(ptr));
653	}
654
655	// Declared and documented above, but defined here so that any of the hashing
656	// infrastructure is available.
657	template <typename T, typename U>
658	hash_code hash_value(const std::pair<T, U> &arg) {
659	return hash_combine(arg.first, arg.second);
660	}
661
662	template <typename... Ts> hash_code hash_value(const std::tuple<Ts...> &arg) {
663	return std::apply([](const auto &...xs) { return hash_combine(xs...); }, arg);
664	}
665
666	// Declared and documented above, but defined here so that any of the hashing
667	// infrastructure is available.
668	template <typename T>
669	hash_code hash_value(const std::basic_string<T> &arg) {
670	return hash_combine_range(arg.begin(), arg.end());
671	}
672
673	template <typename T> hash_code hash_value(const std::optional<T> &arg) {
674	return arg ? hash_combine(true, arg) : hash_value(value: false*);
675	}
676
677	template <> struct DenseMapInfo<hash_code, void> {
678	static inline hash_code getEmptyKey() { return hash_code (-`1`); }
679	static inline hash_code getTombstoneKey() { return hash_code (-`2`); }
680	static unsigned getHashValue(hash_code val) { return val; }
681	static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; }
682	};
683
684	} // namespace llvm
685
686	#endif
687

Provided by KDAB

Definitions

source code of include/llvm-17/llvm/ADT/Hashing.h