1//===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements an interface allowing to conveniently build hashes of
10// various data types, without relying on the underlying hasher type to know
11// about hashed data types.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_SUPPORT_HASHBUILDER_H
16#define LLVM_SUPPORT_HASHBUILDER_H
17
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/Support/Endian.h"
23#include "llvm/Support/type_traits.h"
24
25#include <iterator>
26#include <optional>
27#include <utility>
28
29namespace llvm {
30
31namespace hashbuilder_detail {
32/// Trait to indicate whether a type's bits can be hashed directly (after
33/// endianness correction).
34template <typename U>
35struct IsHashableData
36 : std::integral_constant<bool, is_integral_or_enum<U>::value> {};
37
38} // namespace hashbuilder_detail
39
40/// Declares the hasher member, and functions forwarding directly to the hasher.
41template <typename HasherT> class HashBuilderBase {
42public:
43 template <typename HasherT_ = HasherT>
44 using HashResultTy = decltype(std::declval<HasherT_ &>().final());
45
46 HasherT &getHasher() { return Hasher; }
47
48 /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
49 ///
50 /// This may not take the size of `Data` into account.
51 /// Users of this function should pay attention to respect endianness
52 /// contraints.
53 void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); }
54
55 /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
56 ///
57 /// This may not take the size of `Data` into account.
58 /// Users of this function should pay attention to respect endianness
59 /// contraints.
60 void update(StringRef Data) {
61 update(
62 ArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), Data.size()));
63 }
64
65 /// Forward to `HasherT::final()` if available.
66 template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() {
67 return this->getHasher().final();
68 }
69
70 /// Forward to `HasherT::result()` if available.
71 template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() {
72 return this->getHasher().result();
73 }
74
75protected:
76 explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {}
77
78 template <typename... ArgTypes>
79 explicit HashBuilderBase(ArgTypes &&...Args)
80 : OptionalHasher(std::in_place, std::forward<ArgTypes>(Args)...),
81 Hasher(*OptionalHasher) {}
82
83private:
84 std::optional<HasherT> OptionalHasher;
85 HasherT &Hasher;
86};
87
88/// Implementation of the `HashBuilder` interface.
89///
90/// `support::endianness::native` is not supported. `HashBuilder` is
91/// expected to canonicalize `support::endianness::native` to one of
92/// `support::endianness::big` or `support::endianness::little`.
93template <typename HasherT, support::endianness Endianness>
94class HashBuilderImpl : public HashBuilderBase<HasherT> {
95 static_assert(Endianness != support::endianness::native,
96 "HashBuilder should canonicalize endianness");
97
98public:
99 explicit HashBuilderImpl(HasherT &Hasher)
100 : HashBuilderBase<HasherT>(Hasher) {}
101 template <typename... ArgTypes>
102 explicit HashBuilderImpl(ArgTypes &&...Args)
103 : HashBuilderBase<HasherT>(Args...) {}
104
105 /// Implement hashing for hashable data types, e.g. integral or enum values.
106 template <typename T>
107 std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value,
108 HashBuilderImpl &>
109 add(T Value) {
110 return adjustForEndiannessAndAdd(Value);
111 }
112
113 /// Support hashing `ArrayRef`.
114 ///
115 /// `Value.size()` is taken into account to ensure cases like
116 /// ```
117 /// builder.add({1});
118 /// builder.add({2, 3});
119 /// ```
120 /// and
121 /// ```
122 /// builder.add({1, 2});
123 /// builder.add({3});
124 /// ```
125 /// do not collide.
126 template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) {
127 // As of implementation time, simply calling `addRange(Value)` would also go
128 // through the `update` fast path. But that would rely on the implementation
129 // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call
130 // `update` to guarantee the fast path.
131 add(Value.size());
132 if (hashbuilder_detail::IsHashableData<T>::value &&
133 Endianness == support::endian::system_endianness()) {
134 this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
135 Value.size() * sizeof(T)));
136 } else {
137 for (auto &V : Value)
138 add(V);
139 }
140 return *this;
141 }
142
143 /// Support hashing `StringRef`.
144 ///
145 /// `Value.size()` is taken into account to ensure cases like
146 /// ```
147 /// builder.add("a");
148 /// builder.add("bc");
149 /// ```
150 /// and
151 /// ```
152 /// builder.add("ab");
153 /// builder.add("c");
154 /// ```
155 /// do not collide.
156 HashBuilderImpl &add(StringRef Value) {
157 // As of implementation time, simply calling `addRange(Value)` would also go
158 // through `update`. But that would rely on the implementation of
159 // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to
160 // guarantee the fast path.
161 add(Value.size());
162 this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
163 Value.size()));
164 return *this;
165 }
166
167 template <typename T>
168 using HasAddHashT =
169 decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>()));
170 /// Implement hashing for user-defined `struct`s.
171 ///
172 /// Any user-define `struct` can participate in hashing via `HashBuilder` by
173 /// providing a `addHash` templated function.
174 ///
175 /// ```
176 /// template <typename HasherT, support::endianness Endianness>
177 /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
178 /// const UserDefinedStruct &Value);
179 /// ```
180 ///
181 /// For example:
182 /// ```
183 /// struct SimpleStruct {
184 /// char c;
185 /// int i;
186 /// };
187 ///
188 /// template <typename HasherT, support::endianness Endianness>
189 /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
190 /// const SimpleStruct &Value) {
191 /// HBuilder.add(Value.c);
192 /// HBuilder.add(Value.i);
193 /// }
194 /// ```
195 ///
196 /// To avoid endianness issues, specializations of `addHash` should
197 /// generally rely on exising `add`, `addRange`, and `addRangeElements`
198 /// functions. If directly using `update`, an implementation must correctly
199 /// handle endianness.
200 ///
201 /// ```
202 /// struct __attribute__ ((packed)) StructWithFastHash {
203 /// int I;
204 /// char C;
205 ///
206 /// // If possible, we want to hash both `I` and `C` in a single
207 /// // `update` call for performance concerns.
208 /// template <typename HasherT, support::endianness Endianness>
209 /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
210 /// const StructWithFastHash &Value) {
211 /// if (Endianness == support::endian::system_endianness()) {
212 /// HBuilder.update(ArrayRef(
213 /// reinterpret_cast<const uint8_t *>(&Value), sizeof(Value)));
214 /// } else {
215 /// // Rely on existing `add` methods to handle endianness.
216 /// HBuilder.add(Value.I);
217 /// HBuilder.add(Value.C);
218 /// }
219 /// }
220 /// };
221 /// ```
222 ///
223 /// To avoid collisions, specialization of `addHash` for variable-size
224 /// types must take the size into account.
225 ///
226 /// For example:
227 /// ```
228 /// struct CustomContainer {
229 /// private:
230 /// size_t Size;
231 /// int Elements[100];
232 ///
233 /// public:
234 /// CustomContainer(size_t Size) : Size(Size) {
235 /// for (size_t I = 0; I != Size; ++I)
236 /// Elements[I] = I;
237 /// }
238 /// template <typename HasherT, support::endianness Endianness>
239 /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
240 /// const CustomContainer &Value) {
241 /// if (Endianness == support::endian::system_endianness()) {
242 /// HBuilder.update(ArrayRef(
243 /// reinterpret_cast<const uint8_t *>(&Value.Size),
244 /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0])));
245 /// } else {
246 /// // `addRange` will take care of encoding the size.
247 /// HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] +
248 /// Value.Size);
249 /// }
250 /// }
251 /// };
252 /// ```
253 template <typename T>
254 std::enable_if_t<is_detected<HasAddHashT, T>::value &&
255 !hashbuilder_detail::IsHashableData<T>::value,
256 HashBuilderImpl &>
257 add(const T &Value) {
258 addHash(*this, Value);
259 return *this;
260 }
261
262 template <typename T1, typename T2>
263 HashBuilderImpl &add(const std::pair<T1, T2> &Value) {
264 return add(Value.first, Value.second);
265 }
266
267 template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) {
268 std::apply([this](const auto &...Args) { this->add(Args...); }, Arg);
269 return *this;
270 }
271
272 /// A convenenience variadic helper.
273 /// It simply iterates over its arguments, in order.
274 /// ```
275 /// add(Arg1, Arg2);
276 /// ```
277 /// is equivalent to
278 /// ```
279 /// add(Arg1)
280 /// add(Arg2)
281 /// ```
282 template <typename... Ts>
283 std::enable_if_t<(sizeof...(Ts) > 1), HashBuilderImpl &>
284 add(const Ts &...Args) {
285 return (add(Args), ...);
286 }
287
288 template <typename ForwardIteratorT>
289 HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) {
290 add(std::distance(First, Last));
291 return addRangeElements(First, Last);
292 }
293
294 template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) {
295 return addRange(adl_begin(Range), adl_end(Range));
296 }
297
298 template <typename ForwardIteratorT>
299 HashBuilderImpl &addRangeElements(ForwardIteratorT First,
300 ForwardIteratorT Last) {
301 return addRangeElementsImpl(
302 First, Last,
303 typename std::iterator_traits<ForwardIteratorT>::iterator_category());
304 }
305
306 template <typename RangeT>
307 HashBuilderImpl &addRangeElements(const RangeT &Range) {
308 return addRangeElements(adl_begin(Range), adl_end(Range));
309 }
310
311 template <typename T>
312 using HasByteSwapT = decltype(support::endian::byte_swap(
313 std::declval<T &>(), support::endianness::little));
314 /// Adjust `Value` for the target endianness and add it to the hash.
315 template <typename T>
316 std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &>
317 adjustForEndiannessAndAdd(const T &Value) {
318 T SwappedValue = support::endian::byte_swap(Value, Endianness);
319 this->update(ArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue),
320 sizeof(SwappedValue)));
321 return *this;
322 }
323
324private:
325 // FIXME: Once available, specialize this function for `contiguous_iterator`s,
326 // and use it for `ArrayRef` and `StringRef`.
327 template <typename ForwardIteratorT>
328 HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First,
329 ForwardIteratorT Last,
330 std::forward_iterator_tag) {
331 for (auto It = First; It != Last; ++It)
332 add(*It);
333 return *this;
334 }
335
336 template <typename T>
337 std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value &&
338 Endianness == support::endian::system_endianness(),
339 HashBuilderImpl &>
340 addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) {
341 this->update(ArrayRef(reinterpret_cast<const uint8_t *>(First),
342 (Last - First) * sizeof(T)));
343 return *this;
344 }
345};
346
347/// Interface to help hash various types through a hasher type.
348///
349/// Via provided specializations of `add`, `addRange`, and `addRangeElements`
350/// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed
351/// without requiring any knowledge of hashed types from the hasher type.
352///
353/// The only method expected from the templated hasher type `HasherT` is:
354/// * void update(ArrayRef<uint8_t> Data)
355///
356/// Additionally, the following methods will be forwarded to the hasher type:
357/// * decltype(std::declval<HasherT &>().final()) final()
358/// * decltype(std::declval<HasherT &>().result()) result()
359///
360/// From a user point of view, the interface provides the following:
361/// * `template<typename T> add(const T &Value)`
362/// The `add` function implements hashing of various types.
363/// * `template <typename ItT> void addRange(ItT First, ItT Last)`
364/// The `addRange` function is designed to aid hashing a range of values.
365/// It explicitly adds the size of the range in the hash.
366/// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)`
367/// The `addRangeElements` function is also designed to aid hashing a range of
368/// values. In contrast to `addRange`, it **ignores** the size of the range,
369/// behaving as if elements were added one at a time with `add`.
370///
371/// User-defined `struct` types can participate in this interface by providing
372/// an `addHash` templated function. See the associated template specialization
373/// for details.
374///
375/// This interface does not impose requirements on the hasher
376/// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for
377/// variable-size types; for example for
378/// ```
379/// builder.add({1});
380/// builder.add({2, 3});
381/// ```
382/// and
383/// ```
384/// builder.add({1, 2});
385/// builder.add({3});
386/// ```
387/// . Thus, specializations of `add` and `addHash` for variable-size types must
388/// not assume that the hasher type considers the size as part of the hash; they
389/// must explicitly add the size to the hash. See for example specializations
390/// for `ArrayRef` and `StringRef`.
391///
392/// Additionally, since types are eventually forwarded to the hasher's
393/// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash
394/// computation (for example when computing `add((int)123)`).
395/// Specifiying a non-`native` `Endianness` template parameter allows to compute
396/// stable hash across platforms with different endianness.
397template <class HasherT, support::endianness Endianness>
398using HashBuilder =
399 HashBuilderImpl<HasherT, (Endianness == support::endianness::native
400 ? support::endian::system_endianness()
401 : Endianness)>;
402
403namespace hashbuilder_detail {
404class HashCodeHasher {
405public:
406 HashCodeHasher() : Code(0) {}
407 void update(ArrayRef<uint8_t> Data) {
408 hash_code DataCode = hash_value(S: Data);
409 Code = hash_combine(args: Code, args: DataCode);
410 }
411 hash_code Code;
412};
413
414using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher,
415 support::endianness::native>;
416} // namespace hashbuilder_detail
417
418/// Provide a default implementation of `hash_value` when `addHash(const T &)`
419/// is supported.
420template <typename T>
421std::enable_if_t<
422 is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value,
423 hash_code>
424hash_value(const T &Value) {
425 hashbuilder_detail::HashCodeHashBuilder HBuilder;
426 HBuilder.add(Value);
427 return HBuilder.getHasher().Code;
428}
429} // end namespace llvm
430
431#endif // LLVM_SUPPORT_HASHBUILDER_H
432

source code of include/llvm-17/llvm/Support/HashBuilder.h