Hashing.h source code [include/llvm-20/llvm/ADT/Hashing.h]

1	//===-- llvm/ADT/Hashing.h - Utilities for hashing --------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the newly proposed standard C++ interfaces for hashing
10	// arbitrary data and building hash functions for user-defined types. This
11	// interface was originally proposed in N3333[1] and is currently under review
12	// for inclusion in a future TR and/or standard.
13	//
14	// The primary interfaces provide are comprised of one type and three functions:
15	//
16	// -- 'hash_code' class is an opaque type representing the hash code for some
17	// data. It is the intended product of hashing, and can be used to implement
18	// hash tables, checksumming, and other common uses of hashes. It is not an
19	// integer type (although it can be converted to one) because it is risky
20	// to assume much about the internals of a hash_code. In particular, each
21	// execution of the program has a high probability of producing a different
22	// hash_code for a given input. Thus their values are not stable to save or
23	// persist, and should only be used during the execution for the
24	// construction of hashing datastructures.
25	//
26	// -- 'hash_value' is a function designed to be overloaded for each
27	// user-defined type which wishes to be used within a hashing context. It
28	// should be overloaded within the user-defined type's namespace and found
29	// via ADL. Overloads for primitive types are provided by this library.
30	//
31	// -- 'hash_combine' and 'hash_combine_range' are functions designed to aid
32	// programmers in easily and intuitively combining a set of data into
33	// a single hash_code for their object. They should only logically be used
34	// within the implementation of a 'hash_value' routine or similar context.
35	//
36	// Note that 'hash_combine_range' contains very special logic for hashing
37	// a contiguous array of integers or pointers. This logic is extremely* fast,*
38	// on a modern Intel "Gainestown" Xeon (Nehalem uarch) @2.2 GHz, these were
39	// benchmarked at over 6.5 GiB/s for large keys, and <20 cycles/hash for keys
40	// under 32-bytes.
41	//
42	//===----------------------------------------------------------------------===//
43
44	#ifndef LLVM_ADT_HASHING_H
45	#define LLVM_ADT_HASHING_H
46
47	#include "llvm/Config/abi-breaking.h"
48	#include "llvm/Support/DataTypes.h"
49	#include "llvm/Support/ErrorHandling.h"
50	#include "llvm/Support/SwapByteOrder.h"
51	#include "llvm/Support/type_traits.h"
52	#include <algorithm>
53	#include <cassert>
54	#include <cstring>
55	#include <optional>
56	#include <string>
57	#include <tuple>
58	#include <utility>
59
60	namespace llvm {
61	template <typename T, typename Enable> struct DenseMapInfo;
62
63	/// An opaque object representing a hash code.
64	///
65	/// This object represents the result of hashing some entity. It is intended to
66	/// be used to implement hashtables or other hashing-based data structures.
67	/// While it wraps and exposes a numeric value, this value should not be
68	/// trusted to be stable or predictable across processes or executions.
69	///
70	/// In order to obtain the hash_code for an object 'x':
71	/// \code
72	/// using llvm::hash_value;
73	/// llvm::hash_code code = hash_value(x);
74	/// \endcode
75	class hash_code {
76	size_t value;
77
78	public:
79	/// Default construct a hash_code.
80	/// Note that this leaves the value uninitialized.
81	hash_code() = default;
82
83	/// Form a hash code directly from a numerical value.
84	hash_code(size_t value) : value(value) {}
85
86	/// Convert the hash code to its numerical value for use.
87	/explicit/ operator size_t() const { return value; }
88
89	friend bool operator==(const hash_code &lhs, const hash_code &rhs) {
90	return lhs.value == rhs.value;
91	}
92	friend bool operator!=(const hash_code &lhs, const hash_code &rhs) {
93	return lhs.value != rhs.value;
94	}
95
96	/// Allow a hash_code to be directly run through hash_value.
97	friend size_t hash_value(const hash_code &code) { return code.value; }
98	};
99
100	/// Compute a hash_code for any integer value.
101	///
102	/// Note that this function is intended to compute the same hash_code for
103	/// a particular value without regard to the pre-promotion type. This is in
104	/// contrast to hash_combine which may produce different hash_codes for
105	/// differing argument types even if they would implicit promote to a common
106	/// type without changing the value.
107	template <typename T>
108	std::enable_if_t<is_integral_or_enum<T>::value, hash_code> hash_value(T value);
109
110	/// Compute a hash_code for a pointer's address.
111	///
112	/// N.B.: This hashes the address. Not the value and not the type.
113	template <typename T> hash_code hash_value(const T *ptr);
114
115	/// Compute a hash_code for a pair of objects.
116	template <typename T, typename U>
117	hash_code hash_value(const std::pair<T, U> &arg);
118
119	/// Compute a hash_code for a tuple.
120	template <typename... Ts>
121	hash_code hash_value(const std::tuple<Ts...> &arg);
122
123	/// Compute a hash_code for a standard string.
124	template <typename T>
125	hash_code hash_value(const std::basic_string<T> &arg);
126
127	/// Compute a hash_code for a standard string.
128	template <typename T> hash_code hash_value(const std::optional<T> &arg);
129
130	// All of the implementation details of actually computing the various hash
131	// code values are held within this namespace. These routines are included in
132	// the header file mainly to allow inlining and constant propagation.
133	namespace hashing {
134	namespace detail {
135
136	inline uint64_t fetch64(const char *p) {
137	uint64_t result;
138	memcpy(dest: &result, src: p, n: sizeof(result));
139	if (sys::IsBigEndianHost)
140	sys::swapByteOrder(Value&: result);
141	return result;
142	}
143
144	inline uint32_t fetch32(const char *p) {
145	uint32_t result;
146	memcpy(dest: &result, src: p, n: sizeof(result));
147	if (sys::IsBigEndianHost)
148	sys::swapByteOrder(Value&: result);
149	return result;
150	}
151
152	/// Some primes between 2^63 and 2^64 for various uses.
153	static constexpr uint64_t k0 = `0xc3a5c85c97cb3127ULL`;
154	static constexpr uint64_t k1 = `0xb492b66fbe98f273ULL`;
155	static constexpr uint64_t k2 = `0x9ae16a3b2f90404fULL`;
156	static constexpr uint64_t k3 = `0xc949d7c7509e6557ULL`;
157
158	/// Bitwise right rotate.
159	/// Normally this will compile to a single instruction, especially if the
160	/// shift is a manifest constant.
161	inline uint64_t rotate(uint64_t val, size_t shift) {
162	// Avoid shifting by 64: doing so yields an undefined result.
163	return shift == `0` ? val : ((val >> shift) \| (val << (`64` - shift)));
164	}
165
166	inline uint64_t shift_mix(uint64_t val) {
167	return val ^ (val >> `47`);
168	}
169
170	inline uint64_t hash_16_bytes(uint64_t low, uint64_t high) {
171	// Murmur-inspired hashing.
172	const uint64_t kMul = `0x9ddfea08eb382d69ULL`;
173	uint64_t a = (low ^ high) * kMul;
174	a ^= (a >> `47`);
175	uint64_t b = (high ^ a) * kMul;
176	b ^= (b >> `47`);
177	b *= kMul;
178	return b;
179	}
180
181	inline uint64_t hash_1to3_bytes(const char *s, size_t len, uint64_t seed) {
182	uint8_t a = s[`0`];
183	uint8_t b = s[len >> `1`];
184	uint8_t c = s[len - `1`];
185	uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << `8`);
186	uint32_t z = static_cast<uint32_t>(len) + (static_cast<uint32_t>(c) << `2`);
187	return shift_mix(val: y * k2 ^ z * k3 ^ seed) * k2;
188	}
189
190	inline uint64_t hash_4to8_bytes(const char *s, size_t len, uint64_t seed) {
191	uint64_t a = fetch32(p: s);
192	return hash_16_bytes(low: len + (a << `3`), high: seed ^ fetch32(p: s + len - `4`));
193	}
194
195	inline uint64_t hash_9to16_bytes(const char *s, size_t len, uint64_t seed) {
196	uint64_t a = fetch64(p: s);
197	uint64_t b = fetch64(p: s + len - `8`);
198	return hash_16_bytes(low: seed ^ a, high: rotate(val: b + len, shift: len)) ^ b;
199	}
200
201	inline uint64_t hash_17to32_bytes(const char *s, size_t len, uint64_t seed) {
202	uint64_t a = fetch64(p: s) * k1;
203	uint64_t b = fetch64(p: s + `8`);
204	uint64_t c = fetch64(p: s + len - `8`) * k2;
205	uint64_t d = fetch64(p: s + len - `16`) * k0;
206	return hash_16_bytes(low: llvm::rotr<uint64_t>(V: a - b, R: `43`) +
207	llvm::rotr<uint64_t>(V: c ^ seed, R: `30`) + d,
208	high: a + llvm::rotr<uint64_t>(V: b ^ k3, R: `20`) - c + len + seed);
209	}
210
211	inline uint64_t hash_33to64_bytes(const char *s, size_t len, uint64_t seed) {
212	uint64_t z = fetch64(p: s + `24`);
213	uint64_t a = fetch64(p: s) + (len + fetch64(p: s + len - `16`)) * k0;
214	uint64_t b = llvm::rotr<uint64_t>(V: a + z, R: `52`);
215	uint64_t c = llvm::rotr<uint64_t>(V: a, R: `37`);
216	a += fetch64(p: s + `8`);
217	c += llvm::rotr<uint64_t>(V: a, R: `7`);
218	a += fetch64(p: s + `16`);
219	uint64_t vf = a + z;
220	uint64_t vs = b + llvm::rotr<uint64_t>(V: a, R: `31`) + c;
221	a = fetch64(p: s + `16`) + fetch64(p: s + len - `32`);
222	z = fetch64(p: s + len - `8`);
223	b = llvm::rotr<uint64_t>(V: a + z, R: `52`);
224	c = llvm::rotr<uint64_t>(V: a, R: `37`);
225	a += fetch64(p: s + len - `24`);
226	c += llvm::rotr<uint64_t>(V: a, R: `7`);
227	a += fetch64(p: s + len - `16`);
228	uint64_t wf = a + z;
229	uint64_t ws = b + llvm::rotr<uint64_t>(V: a, R: `31`) + c;
230	uint64_t r = shift_mix(val: (vf + ws) * k2 + (wf + vs) * k0);
231	return shift_mix(val: (seed ^ (r * k0)) + vs) * k2;
232	}
233
234	inline uint64_t hash_short(const char *s, size_t length, uint64_t seed) {
235	if (length >= `4` && length <= `8`)
236	return hash_4to8_bytes(s, len: length, seed);
237	if (length > `8` && length <= `16`)
238	return hash_9to16_bytes(s, len: length, seed);
239	if (length > `16` && length <= `32`)
240	return hash_17to32_bytes(s, len: length, seed);
241	if (length > `32`)
242	return hash_33to64_bytes(s, len: length, seed);
243	if (length != `0`)
244	return hash_1to3_bytes(s, len: length, seed);
245
246	return k2 ^ seed;
247	}
248
249	/// The intermediate state used during hashing.
250	/// Currently, the algorithm for computing hash codes is based on CityHash and
251	/// keeps 56 bytes of arbitrary state.
252	struct hash_state {
253	uint64_t h0 = `0`, h1 = `0`, h2 = `0`, h3 = `0`, h4 = `0`, h5 = `0`, h6 = `0`;
254
255	/// Create a new hash_state structure and initialize it based on the
256	/// seed and the first 64-byte chunk.
257	/// This effectively performs the initial mix.
258	static hash_state create(const char *s, uint64_t seed) {
259	hash_state state = {.h0: `0`,
260	.h1: seed,
261	.h2: hash_16_bytes(low: seed, high: k1),
262	.h3: llvm::rotr<uint64_t>(V: seed ^ k1, R: `49`),
263	.h4: seed * k1,
264	.h5: shift_mix(val: seed),
265	.h6: `0`};
266	state.h6 = hash_16_bytes(low: state.h4, high: state.h5);
267	state.mix(s);
268	return state;
269	}
270
271	/// Mix 32-bytes from the input sequence into the 16-bytes of 'a'
272	/// and 'b', including whatever is already in 'a' and 'b'.
273	static void mix_32_bytes(const char *s, uint64_t &a, uint64_t &b) {
274	a += fetch64(p: s);
275	uint64_t c = fetch64(p: s + `24`);
276	b = llvm::rotr<uint64_t>(V: b + a + c, R: `21`);
277	uint64_t d = a;
278	a += fetch64(p: s + `8`) + fetch64(p: s + `16`);
279	b += llvm::rotr<uint64_t>(V: a, R: `44`) + d;
280	a += c;
281	}
282
283	/// Mix in a 64-byte buffer of data.
284	/// We mix all 64 bytes even when the chunk length is smaller, but we
285	/// record the actual length.
286	void mix(const char *s) {
287	h0 = llvm::rotr<uint64_t>(V: h0 + h1 + h3 + fetch64(p: s + `8`), R: `37`) * k1;
288	h1 = llvm::rotr<uint64_t>(V: h1 + h4 + fetch64(p: s + `48`), R: `42`) * k1;
289	h0 ^= h6;
290	h1 += h3 + fetch64(p: s + `40`);
291	h2 = llvm::rotr<uint64_t>(V: h2 + h5, R: `33`) * k1;
292	h3 = h4 * k1;
293	h4 = h0 + h5;
294	mix_32_bytes(s, a&: h3, b&: h4);
295	h5 = h2 + h6;
296	h6 = h1 + fetch64(p: s + `16`);
297	mix_32_bytes(s: s + `32`, a&: h5, b&: h6);
298	std::swap(a&: h2, b&: h0);
299	}
300
301	/// Compute the final 64-bit hash code value based on the current
302	/// state and the length of bytes hashed.
303	uint64_t finalize(size_t length) {
304	return hash_16_bytes(low: hash_16_bytes(low: h3, high: h5) + shift_mix(val: h1) * k1 + h2,
305	high: hash_16_bytes(low: h4, high: h6) + shift_mix(val: length) * k1 + h0);
306	}
307	};
308
309	/// In LLVM_ENABLE_ABI_BREAKING_CHECKS builds, the seed is non-deterministic
310	/// per process (address of a function in LLVMSupport) to prevent having users
311	/// depend on the particular hash values. On platforms without ASLR, this is
312	/// still likely non-deterministic per build.
313	inline uint64_t get_execution_seed() {
314	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
315	return static_cast<uint64_t>(
316	reinterpret_cast<uintptr_t>(&install_fatal_error_handler));
317	#else
318	return `0xff51afd7ed558ccdULL`;
319	#endif
320	}
321
322
323	/// Trait to indicate whether a type's bits can be hashed directly.
324	///
325	/// A type trait which is true if we want to combine values for hashing by
326	/// reading the underlying data. It is false if values of this type must
327	/// first be passed to hash_value, and the resulting hash_codes combined.
328	//
329	// FIXME: We want to replace is_integral_or_enum and is_pointer here with
330	// a predicate which asserts that comparing the underlying storage of two
331	// values of the type for equality is equivalent to comparing the two values
332	// for equality. For all the platforms we care about, this holds for integers
333	// and pointers, but there are platforms where it doesn't and we would like to
334	// support user-defined types which happen to satisfy this property.
335	template <typename T> struct is_hashable_data
336	: std::integral_constant<bool, ((is_integral_or_enum<T>::value \|\|
337	std::is_pointer<T>::value) &&
338	`64` % sizeof(T) == `0`)> {};
339
340	// Special case std::pair to detect when both types are viable and when there
341	// is no alignment-derived padding in the pair. This is a bit of a lie because
342	// std::pair isn't truly POD, but it's close enough in all reasonable
343	// implementations for our use case of hashing the underlying data.
344	template <typename T, typename U> struct is_hashable_data<std::pair<T, U> >
345	: std::integral_constant<bool, (is_hashable_data<T>::value &&
346	is_hashable_data<U>::value &&
347	(sizeof(T) + sizeof(U)) ==
348	sizeof(std::pair<T, U>))> {};
349
350	/// Helper to get the hashable data representation for a type.
351	/// This variant is enabled when the type itself can be used.
352	template <typename T>
353	std::enable_if_t<is_hashable_data<T>::value, T>
354	get_hashable_data(const T &value) {
355	return value;
356	}
357	/// Helper to get the hashable data representation for a type.
358	/// This variant is enabled when we must first call hash_value and use the
359	/// result as our data.
360	template <typename T>
361	std::enable_if_t<!is_hashable_data<T>::value, size_t>
362	get_hashable_data(const T &value) {
363	using ::llvm::hash_value;
364	return hash_value(value);
365	}
366
367	/// Helper to store data from a value into a buffer and advance the
368	/// pointer into that buffer.
369	///
370	/// This routine first checks whether there is enough space in the provided
371	/// buffer, and if not immediately returns false. If there is space, it
372	/// copies the underlying bytes of value into the buffer, advances the
373	/// buffer_ptr past the copied bytes, and returns true.
374	template <typename T>
375	bool store_and_advance(char &buffer_ptr, char* buffer_end, const* T& value,
376	size_t offset = `0`) {
377	size_t store_size = sizeof(value) - offset;
378	if (buffer_ptr + store_size > buffer_end)
379	return false;
380	const char value_data = reinterpret_cast<const* char *>(&value);
381	memcpy(dest: buffer_ptr, src: value_data + offset, n: store_size);
382	buffer_ptr += store_size;
383	return true;
384	}
385
386	/// Implement the combining of integral values into a hash_code.
387	///
388	/// This overload is selected when the value type of the iterator is
389	/// integral. Rather than computing a hash_code for each object and then
390	/// combining them, this (as an optimization) directly combines the integers.
391	template <typename InputIteratorT>
392	hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) {
393	const uint64_t seed = get_execution_seed();
394	char buffer[`64`], *buffer_ptr = buffer;
395	char *const buffer_end = std::end(arr&: buffer);
396	while (first != last && store_and_advance(buffer_ptr, buffer_end,
397	get_hashable_data(*first)))
398	++first;
399	if (first == last)
400	return hash_short(s: buffer, length: buffer_ptr - buffer, seed);
401	assert(buffer_ptr == buffer_end);
402
403	hash_state state = state.create(s: buffer, seed);
404	size_t length = `64`;
405	while (first != last) {
406	// Fill up the buffer. We don't clear it, which re-mixes the last round
407	// when only a partial 64-byte chunk is left.
408	buffer_ptr = buffer;
409	while (first != last && store_and_advance(buffer_ptr, buffer_end,
410	get_hashable_data(*first)))
411	++first;
412
413	// Rotate the buffer if we did a partial fill in order to simulate doing
414	// a mix of the last 64-bytes. That is how the algorithm works when we
415	// have a contiguous byte sequence, and we want to emulate that here.
416	std::rotate(first: buffer, middle: buffer_ptr, last: buffer_end);
417
418	// Mix this chunk into the current state.
419	state.mix(s: buffer);
420	length += buffer_ptr - buffer;
421	};
422
423	return state.finalize(length);
424	}
425
426	/// Implement the combining of integral values into a hash_code.
427	///
428	/// This overload is selected when the value type of the iterator is integral
429	/// and when the input iterator is actually a pointer. Rather than computing
430	/// a hash_code for each object and then combining them, this (as an
431	/// optimization) directly combines the integers. Also, because the integers
432	/// are stored in contiguous memory, this routine avoids copying each value
433	/// and directly reads from the underlying memory.
434	template <typename ValueT>
435	std::enable_if_t<is_hashable_data<ValueT>::value, hash_code>
436	hash_combine_range_impl(ValueT first, ValueT last) {
437	const uint64_t seed = get_execution_seed();
438	const char s_begin = reinterpret_cast<const* char *>(first);
439	const char s_end = reinterpret_cast<const* char *>(last);
440	const size_t length = std::distance(first: s_begin, last: s_end);
441	if (length <= `64`)
442	return hash_short(s: s_begin, length, seed);
443
444	const char *s_aligned_end = s_begin + (length & ~`63`);
445	hash_state state = state.create(s: s_begin, seed);
446	s_begin += `64`;
447	while (s_begin != s_aligned_end) {
448	state.mix(s: s_begin);
449	s_begin += `64`;
450	}
451	if (length & `63`)
452	state.mix(s: s_end - `64`);
453
454	return state.finalize(length);
455	}
456
457	} // namespace detail
458	} // namespace hashing
459
460
461	/// Compute a hash_code for a sequence of values.
462	///
463	/// This hashes a sequence of values. It produces the same hash_code as
464	/// 'hash_combine(a, b, c, ...)', but can run over arbitrary sized sequences
465	/// and is significantly faster given pointers and types which can be hashed as
466	/// a sequence of bytes.
467	template <typename InputIteratorT>
468	hash_code hash_combine_range(InputIteratorT first, InputIteratorT last) {
469	return ::llvm::hashing::detail::hash_combine_range_impl(first, last);
470	}
471
472
473	// Implementation details for hash_combine.
474	namespace hashing {
475	namespace detail {
476
477	/// Helper class to manage the recursive combining of hash_combine
478	/// arguments.
479	///
480	/// This class exists to manage the state and various calls involved in the
481	/// recursive combining of arguments used in hash_combine. It is particularly
482	/// useful at minimizing the code in the recursive calls to ease the pain
483	/// caused by a lack of variadic functions.
484	struct hash_combine_recursive_helper {
485	char buffer[`64`] = {};
486	hash_state state;
487	const uint64_t seed;
488
489	public:
490	/// Construct a recursive hash combining helper.
491	///
492	/// This sets up the state for a recursive hash combine, including getting
493	/// the seed and buffer setup.
494	hash_combine_recursive_helper()
495	: seed(get_execution_seed()) {}
496
497	/// Combine one chunk of data into the current in-flight hash.
498	///
499	/// This merges one chunk of data into the hash. First it tries to buffer
500	/// the data. If the buffer is full, it hashes the buffer into its
501	/// hash_state, empties it, and then merges the new chunk in. This also
502	/// handles cases where the data straddles the end of the buffer.
503	template <typename T>
504	char combine_data(size_t &length, char* buffer_ptr, char* *buffer_end, T data) {
505	if (!store_and_advance(buffer_ptr, buffer_end, data)) {
506	// Check for skew which prevents the buffer from being packed, and do
507	// a partial store into the buffer to fill it. This is only a concern
508	// with the variadic combine because that formation can have varying
509	// argument types.
510	size_t partial_store_size = buffer_end - buffer_ptr;
511	memcpy(buffer_ptr, &data, partial_store_size);
512
513	// If the store fails, our buffer is full and ready to hash. We have to
514	// either initialize the hash state (on the first full buffer) or mix
515	// this buffer into the existing hash state. Length tracks the hashed
516	// length, not the buffered length.
517	if (length == `0`) {
518	state = state.create(s: buffer, seed);
519	length = `64`;
520	} else {
521	// Mix this chunk into the current state and bump length up by 64.
522	state.mix(s: buffer);
523	length += `64`;
524	}
525	// Reset the buffer_ptr to the head of the buffer for the next chunk of
526	// data.
527	buffer_ptr = buffer;
528
529	// Try again to store into the buffer -- this cannot fail as we only
530	// store types smaller than the buffer.
531	if (!store_and_advance(buffer_ptr, buffer_end, data,
532	partial_store_size))
533	llvm_unreachable("buffer smaller than stored type");
534	}
535	return buffer_ptr;
536	}
537
538	/// Recursive, variadic combining method.
539	///
540	/// This function recurses through each argument, combining that argument
541	/// into a single hash.
542	template <typename T, typename ...Ts>
543	hash_code combine(size_t length, char buffer_ptr, char* *buffer_end,
544	const T &arg, const Ts &...args) {
545	buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg));
546
547	// Recurse to the next argument.
548	return combine(length, buffer_ptr, buffer_end, args...);
549	}
550
551	/// Base case for recursive, variadic combining.
552	///
553	/// The base case when combining arguments recursively is reached when all
554	/// arguments have been handled. It flushes the remaining buffer and
555	/// constructs a hash_code.
556	hash_code combine(size_t length, char buffer_ptr, char* *buffer_end) {
557	// Check whether the entire set of values fit in the buffer. If so, we'll
558	// use the optimized short hashing routine and skip state entirely.
559	if (length == `0`)
560	return hash_short(s: buffer, length: buffer_ptr - buffer, seed);
561
562	// Mix the final buffer, rotating it if we did a partial fill in order to
563	// simulate doing a mix of the last 64-bytes. That is how the algorithm
564	// works when we have a contiguous byte sequence, and we want to emulate
565	// that here.
566	std::rotate(first: buffer, middle: buffer_ptr, last: buffer_end);
567
568	// Mix this chunk into the current state.
569	state.mix(s: buffer);
570	length += buffer_ptr - buffer;
571
572	return state.finalize(length);
573	}
574	};
575
576	} // namespace detail
577	} // namespace hashing
578
579	/// Combine values into a single hash_code.
580	///
581	/// This routine accepts a varying number of arguments of any type. It will
582	/// attempt to combine them into a single hash_code. For user-defined types it
583	/// attempts to call a \see hash_value overload (via ADL) for the type. For
584	/// integer and pointer types it directly combines their data into the
585	/// resulting hash_code.
586	///
587	/// The result is suitable for returning from a user's hash_value
588	/// implementation* for their user-defined type. Consumers of a type should*
589	/// not* call this routine, they should instead call 'hash_value'.*
590	template <typename ...Ts> hash_code hash_combine(const Ts &...args) {
591	// Recursively hash each argument using a helper class.
592	::llvm::hashing::detail::hash_combine_recursive_helper helper;
593	return helper.combine(`0`, helper.buffer, helper.buffer + `64`, args...);
594	}
595
596	// Implementation details for implementations of hash_value overloads provided
597	// here.
598	namespace hashing {
599	namespace detail {
600
601	/// Helper to hash the value of a single integer.
602	///
603	/// Overloads for smaller integer types are not provided to ensure consistent
604	/// behavior in the presence of integral promotions. Essentially,
605	/// "hash_value('4')" and "hash_value('0' + 4)" should be the same.
606	inline hash_code hash_integer_value(uint64_t value) {
607	// Similar to hash_4to8_bytes but using a seed instead of length.
608	const uint64_t seed = get_execution_seed();
609	const char s = reinterpret_cast<const* char *>(&value);
610	const uint64_t a = fetch32(p: s);
611	return hash_16_bytes(low: seed + (a << `3`), high: fetch32(p: s + `4`));
612	}
613
614	} // namespace detail
615	} // namespace hashing
616
617	// Declared and documented above, but defined here so that any of the hashing
618	// infrastructure is available.
619	template <typename T>
620	std::enable_if_t<is_integral_or_enum<T>::value, hash_code> hash_value(T value) {
621	return ::llvm::hashing::detail::hash_integer_value(
622	value: static_cast<uint64_t>(value));
623	}
624
625	// Declared and documented above, but defined here so that any of the hashing
626	// infrastructure is available.
627	template <typename T> hash_code hash_value(const T *ptr) {
628	return ::llvm::hashing::detail::hash_integer_value(
629	value: reinterpret_cast<uintptr_t>(ptr));
630	}
631
632	// Declared and documented above, but defined here so that any of the hashing
633	// infrastructure is available.
634	template <typename T, typename U>
635	hash_code hash_value(const std::pair<T, U> &arg) {
636	return hash_combine(arg.first, arg.second);
637	}
638
639	template <typename... Ts> hash_code hash_value(const std::tuple<Ts...> &arg) {
640	return std::apply([](const auto &...xs) { return hash_combine(xs...); }, arg);
641	}
642
643	// Declared and documented above, but defined here so that any of the hashing
644	// infrastructure is available.
645	template <typename T>
646	hash_code hash_value(const std::basic_string<T> &arg) {
647	return hash_combine_range(arg.begin(), arg.end());
648	}
649
650	template <typename T> hash_code hash_value(const std::optional<T> &arg) {
651	return arg ? hash_combine(true, arg) : hash_value(value: false*);
652	}
653
654	template <> struct DenseMapInfo<hash_code, void> {
655	static inline hash_code getEmptyKey() { return hash_code (-`1`); }
656	static inline hash_code getTombstoneKey() { return hash_code (-`2`); }
657	static unsigned getHashValue(hash_code val) {
658	return static_cast<unsigned>(size_t(val));
659	}
660	static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; }
661	};
662
663	} // namespace llvm
664
665	/// Implement std::hash so that hash_code can be used in STL containers.
666	namespace std {
667
668	template<>
669	struct hash<llvm::hash_code> {
670	size_t operator()(llvm::hash_code const& Val) const {
671	return Val;
672	}
673	};
674
675	} // namespace std;
676
677	#endif
678

source code of include/llvm-20/llvm/ADT/Hashing.h