1 | //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements an interface allowing to conveniently build hashes of |
10 | // various data types, without relying on the underlying hasher type to know |
11 | // about hashed data types. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_SUPPORT_HASHBUILDER_H |
16 | #define LLVM_SUPPORT_HASHBUILDER_H |
17 | |
18 | #include "llvm/ADT/ArrayRef.h" |
19 | #include "llvm/ADT/Hashing.h" |
20 | #include "llvm/ADT/STLExtras.h" |
21 | #include "llvm/ADT/StringRef.h" |
22 | #include "llvm/Support/Endian.h" |
23 | #include "llvm/Support/type_traits.h" |
24 | |
25 | #include <iterator> |
26 | #include <optional> |
27 | #include <utility> |
28 | |
29 | namespace llvm { |
30 | |
31 | namespace hashbuilder_detail { |
32 | /// Trait to indicate whether a type's bits can be hashed directly (after |
33 | /// endianness correction). |
34 | template <typename U> |
35 | struct IsHashableData |
36 | : std::integral_constant<bool, is_integral_or_enum<U>::value> {}; |
37 | |
38 | } // namespace hashbuilder_detail |
39 | |
40 | /// Declares the hasher member, and functions forwarding directly to the hasher. |
41 | template <typename HasherT> class HashBuilderBase { |
42 | public: |
43 | template <typename HasherT_ = HasherT> |
44 | using HashResultTy = decltype(std::declval<HasherT_ &>().final()); |
45 | |
46 | HasherT &getHasher() { return Hasher; } |
47 | |
48 | /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. |
49 | /// |
50 | /// This may not take the size of `Data` into account. |
51 | /// Users of this function should pay attention to respect endianness |
52 | /// contraints. |
53 | void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); } |
54 | |
55 | /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. |
56 | /// |
57 | /// This may not take the size of `Data` into account. |
58 | /// Users of this function should pay attention to respect endianness |
59 | /// contraints. |
60 | void update(StringRef Data) { |
61 | update( |
62 | ArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), Data.size())); |
63 | } |
64 | |
65 | /// Forward to `HasherT::final()` if available. |
66 | template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() { |
67 | return this->getHasher().final(); |
68 | } |
69 | |
70 | /// Forward to `HasherT::result()` if available. |
71 | template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() { |
72 | return this->getHasher().result(); |
73 | } |
74 | |
75 | protected: |
76 | explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {} |
77 | |
78 | template <typename... ArgTypes> |
79 | explicit HashBuilderBase(ArgTypes &&...Args) |
80 | : OptionalHasher(std::in_place, std::forward<ArgTypes>(Args)...), |
81 | Hasher(*OptionalHasher) {} |
82 | |
83 | private: |
84 | std::optional<HasherT> OptionalHasher; |
85 | HasherT &Hasher; |
86 | }; |
87 | |
88 | /// Interface to help hash various types through a hasher type. |
89 | /// |
90 | /// Via provided specializations of `add`, `addRange`, and `addRangeElements` |
91 | /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed |
92 | /// without requiring any knowledge of hashed types from the hasher type. |
93 | /// |
94 | /// The only method expected from the templated hasher type `HasherT` is: |
95 | /// * void update(ArrayRef<uint8_t> Data) |
96 | /// |
97 | /// Additionally, the following methods will be forwarded to the hasher type: |
98 | /// * decltype(std::declval<HasherT &>().final()) final() |
99 | /// * decltype(std::declval<HasherT &>().result()) result() |
100 | /// |
101 | /// From a user point of view, the interface provides the following: |
102 | /// * `template<typename T> add(const T &Value)` |
103 | /// The `add` function implements hashing of various types. |
104 | /// * `template <typename ItT> void addRange(ItT First, ItT Last)` |
105 | /// The `addRange` function is designed to aid hashing a range of values. |
106 | /// It explicitly adds the size of the range in the hash. |
107 | /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)` |
108 | /// The `addRangeElements` function is also designed to aid hashing a range of |
109 | /// values. In contrast to `addRange`, it **ignores** the size of the range, |
110 | /// behaving as if elements were added one at a time with `add`. |
111 | /// |
112 | /// User-defined `struct` types can participate in this interface by providing |
113 | /// an `addHash` templated function. See the associated template specialization |
114 | /// for details. |
115 | /// |
116 | /// This interface does not impose requirements on the hasher |
117 | /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for |
118 | /// variable-size types; for example for |
119 | /// ``` |
120 | /// builder.add({1}); |
121 | /// builder.add({2, 3}); |
122 | /// ``` |
123 | /// and |
124 | /// ``` |
125 | /// builder.add({1, 2}); |
126 | /// builder.add({3}); |
127 | /// ``` |
128 | /// . Thus, specializations of `add` and `addHash` for variable-size types must |
129 | /// not assume that the hasher type considers the size as part of the hash; they |
130 | /// must explicitly add the size to the hash. See for example specializations |
131 | /// for `ArrayRef` and `StringRef`. |
132 | /// |
133 | /// Additionally, since types are eventually forwarded to the hasher's |
134 | /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash |
135 | /// computation (for example when computing `add((int)123)`). |
136 | /// Specifiying a non-`native` `Endianness` template parameter allows to compute |
137 | /// stable hash across platforms with different endianness. |
138 | template <typename HasherT, llvm::endianness Endianness> |
139 | class HashBuilder : public HashBuilderBase<HasherT> { |
140 | public: |
141 | explicit HashBuilder(HasherT &Hasher) : HashBuilderBase<HasherT>(Hasher) {} |
142 | template <typename... ArgTypes> |
143 | explicit HashBuilder(ArgTypes &&...Args) |
144 | : HashBuilderBase<HasherT>(Args...) {} |
145 | |
146 | /// Implement hashing for hashable data types, e.g. integral or enum values. |
147 | template <typename T> |
148 | std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value, HashBuilder &> |
149 | add(T Value) { |
150 | return adjustForEndiannessAndAdd(Value); |
151 | } |
152 | |
153 | /// Support hashing `ArrayRef`. |
154 | /// |
155 | /// `Value.size()` is taken into account to ensure cases like |
156 | /// ``` |
157 | /// builder.add({1}); |
158 | /// builder.add({2, 3}); |
159 | /// ``` |
160 | /// and |
161 | /// ``` |
162 | /// builder.add({1, 2}); |
163 | /// builder.add({3}); |
164 | /// ``` |
165 | /// do not collide. |
166 | template <typename T> HashBuilder &add(ArrayRef<T> Value) { |
167 | // As of implementation time, simply calling `addRange(Value)` would also go |
168 | // through the `update` fast path. But that would rely on the implementation |
169 | // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call |
170 | // `update` to guarantee the fast path. |
171 | add(Value.size()); |
172 | if (hashbuilder_detail::IsHashableData<T>::value && |
173 | Endianness == llvm::endianness::native) { |
174 | this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), |
175 | Value.size() * sizeof(T))); |
176 | } else { |
177 | for (auto &V : Value) |
178 | add(V); |
179 | } |
180 | return *this; |
181 | } |
182 | |
183 | /// Support hashing `StringRef`. |
184 | /// |
185 | /// `Value.size()` is taken into account to ensure cases like |
186 | /// ``` |
187 | /// builder.add("a"); |
188 | /// builder.add("bc"); |
189 | /// ``` |
190 | /// and |
191 | /// ``` |
192 | /// builder.add("ab"); |
193 | /// builder.add("c"); |
194 | /// ``` |
195 | /// do not collide. |
196 | HashBuilder &add(StringRef Value) { |
197 | // As of implementation time, simply calling `addRange(Value)` would also go |
198 | // through `update`. But that would rely on the implementation of |
199 | // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to |
200 | // guarantee the fast path. |
201 | add(Value.size()); |
202 | this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), |
203 | Value.size())); |
204 | return *this; |
205 | } |
206 | |
207 | template <typename T> |
208 | using HasAddHashT = |
209 | decltype(addHash(std::declval<HashBuilder &>(), std::declval<T &>())); |
210 | /// Implement hashing for user-defined `struct`s. |
211 | /// |
212 | /// Any user-define `struct` can participate in hashing via `HashBuilder` by |
213 | /// providing a `addHash` templated function. |
214 | /// |
215 | /// ``` |
216 | /// template <typename HasherT, llvm::endianness Endianness> |
217 | /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder, |
218 | /// const UserDefinedStruct &Value); |
219 | /// ``` |
220 | /// |
221 | /// For example: |
222 | /// ``` |
223 | /// struct SimpleStruct { |
224 | /// char c; |
225 | /// int i; |
226 | /// }; |
227 | /// |
228 | /// template <typename HasherT, llvm::endianness Endianness> |
229 | /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder, |
230 | /// const SimpleStruct &Value) { |
231 | /// HBuilder.add(Value.c); |
232 | /// HBuilder.add(Value.i); |
233 | /// } |
234 | /// ``` |
235 | /// |
236 | /// To avoid endianness issues, specializations of `addHash` should |
237 | /// generally rely on exising `add`, `addRange`, and `addRangeElements` |
238 | /// functions. If directly using `update`, an implementation must correctly |
239 | /// handle endianness. |
240 | /// |
241 | /// ``` |
242 | /// struct __attribute__ ((packed)) StructWithFastHash { |
243 | /// int I; |
244 | /// char C; |
245 | /// |
246 | /// // If possible, we want to hash both `I` and `C` in a single |
247 | /// // `update` call for performance concerns. |
248 | /// template <typename HasherT, llvm::endianness Endianness> |
249 | /// friend void addHash(HashBuilder<HasherT, Endianness> &HBuilder, |
250 | /// const StructWithFastHash &Value) { |
251 | /// if (Endianness == llvm::endianness::native) { |
252 | /// HBuilder.update(ArrayRef( |
253 | /// reinterpret_cast<const uint8_t *>(&Value), sizeof(Value))); |
254 | /// } else { |
255 | /// // Rely on existing `add` methods to handle endianness. |
256 | /// HBuilder.add(Value.I); |
257 | /// HBuilder.add(Value.C); |
258 | /// } |
259 | /// } |
260 | /// }; |
261 | /// ``` |
262 | /// |
263 | /// To avoid collisions, specialization of `addHash` for variable-size |
264 | /// types must take the size into account. |
265 | /// |
266 | /// For example: |
267 | /// ``` |
268 | /// struct CustomContainer { |
269 | /// private: |
270 | /// size_t Size; |
271 | /// int Elements[100]; |
272 | /// |
273 | /// public: |
274 | /// CustomContainer(size_t Size) : Size(Size) { |
275 | /// for (size_t I = 0; I != Size; ++I) |
276 | /// Elements[I] = I; |
277 | /// } |
278 | /// template <typename HasherT, llvm::endianness Endianness> |
279 | /// friend void addHash(HashBuilder<HasherT, Endianness> &HBuilder, |
280 | /// const CustomContainer &Value) { |
281 | /// if (Endianness == llvm::endianness::native) { |
282 | /// HBuilder.update(ArrayRef( |
283 | /// reinterpret_cast<const uint8_t *>(&Value.Size), |
284 | /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0]))); |
285 | /// } else { |
286 | /// // `addRange` will take care of encoding the size. |
287 | /// HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] + |
288 | /// Value.Size); |
289 | /// } |
290 | /// } |
291 | /// }; |
292 | /// ``` |
293 | template <typename T> |
294 | std::enable_if_t<is_detected<HasAddHashT, T>::value && |
295 | !hashbuilder_detail::IsHashableData<T>::value, |
296 | HashBuilder &> |
297 | add(const T &Value) { |
298 | addHash(*this, Value); |
299 | return *this; |
300 | } |
301 | |
302 | template <typename T1, typename T2> |
303 | HashBuilder &add(const std::pair<T1, T2> &Value) { |
304 | return add(Value.first, Value.second); |
305 | } |
306 | |
307 | template <typename... Ts> HashBuilder &add(const std::tuple<Ts...> &Arg) { |
308 | std::apply([this](const auto &...Args) { this->add(Args...); }, Arg); |
309 | return *this; |
310 | } |
311 | |
312 | /// A convenenience variadic helper. |
313 | /// It simply iterates over its arguments, in order. |
314 | /// ``` |
315 | /// add(Arg1, Arg2); |
316 | /// ``` |
317 | /// is equivalent to |
318 | /// ``` |
319 | /// add(Arg1) |
320 | /// add(Arg2) |
321 | /// ``` |
322 | template <typename... Ts> |
323 | std::enable_if_t<(sizeof...(Ts) > 1), HashBuilder &> add(const Ts &...Args) { |
324 | return (add(Args), ...); |
325 | } |
326 | |
327 | template <typename ForwardIteratorT> |
328 | HashBuilder &addRange(ForwardIteratorT First, ForwardIteratorT Last) { |
329 | add(std::distance(First, Last)); |
330 | return addRangeElements(First, Last); |
331 | } |
332 | |
333 | template <typename RangeT> HashBuilder &addRange(const RangeT &Range) { |
334 | return addRange(adl_begin(Range), adl_end(Range)); |
335 | } |
336 | |
337 | template <typename ForwardIteratorT> |
338 | HashBuilder &addRangeElements(ForwardIteratorT First, ForwardIteratorT Last) { |
339 | return addRangeElementsImpl( |
340 | First, Last, |
341 | typename std::iterator_traits<ForwardIteratorT>::iterator_category()); |
342 | } |
343 | |
344 | template <typename RangeT> |
345 | HashBuilder &addRangeElements(const RangeT &Range) { |
346 | return addRangeElements(adl_begin(Range), adl_end(Range)); |
347 | } |
348 | |
349 | template <typename T> |
350 | using HasByteSwapT = decltype(support::endian::byte_swap( |
351 | std::declval<T &>(), llvm::endianness::little)); |
352 | /// Adjust `Value` for the target endianness and add it to the hash. |
353 | template <typename T> |
354 | std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilder &> |
355 | adjustForEndiannessAndAdd(const T &Value) { |
356 | T SwappedValue = support::endian::byte_swap(Value, Endianness); |
357 | this->update(ArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue), |
358 | sizeof(SwappedValue))); |
359 | return *this; |
360 | } |
361 | |
362 | private: |
363 | // FIXME: Once available, specialize this function for `contiguous_iterator`s, |
364 | // and use it for `ArrayRef` and `StringRef`. |
365 | template <typename ForwardIteratorT> |
366 | HashBuilder &addRangeElementsImpl(ForwardIteratorT First, |
367 | ForwardIteratorT Last, |
368 | std::forward_iterator_tag) { |
369 | for (auto It = First; It != Last; ++It) |
370 | add(*It); |
371 | return *this; |
372 | } |
373 | |
374 | template <typename T> |
375 | std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value && |
376 | Endianness == llvm::endianness::native, |
377 | HashBuilder &> |
378 | addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) { |
379 | this->update(ArrayRef(reinterpret_cast<const uint8_t *>(First), |
380 | (Last - First) * sizeof(T))); |
381 | return *this; |
382 | } |
383 | }; |
384 | |
385 | namespace hashbuilder_detail { |
386 | class HashCodeHasher { |
387 | public: |
388 | HashCodeHasher() : Code(0) {} |
389 | void update(ArrayRef<uint8_t> Data) { |
390 | hash_code DataCode = hash_value(S: Data); |
391 | Code = hash_combine(args: Code, args: DataCode); |
392 | } |
393 | hash_code Code; |
394 | }; |
395 | |
396 | using HashCodeHashBuilder = |
397 | HashBuilder<hashbuilder_detail::HashCodeHasher, llvm::endianness::native>; |
398 | } // namespace hashbuilder_detail |
399 | |
400 | /// Provide a default implementation of `hash_value` when `addHash(const T &)` |
401 | /// is supported. |
402 | template <typename T> |
403 | std::enable_if_t< |
404 | is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value, |
405 | hash_code> |
406 | hash_value(const T &Value) { |
407 | hashbuilder_detail::HashCodeHashBuilder HBuilder; |
408 | HBuilder.add(Value); |
409 | return HBuilder.getHasher().Code; |
410 | } |
411 | } // end namespace llvm |
412 | |
413 | #endif // LLVM_SUPPORT_HASHBUILDER_H |
414 | |