| 1 | //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements an interface allowing to conveniently build hashes of |
| 10 | // various data types, without relying on the underlying hasher type to know |
| 11 | // about hashed data types. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #ifndef LLVM_SUPPORT_HASHBUILDER_H |
| 16 | #define LLVM_SUPPORT_HASHBUILDER_H |
| 17 | |
| 18 | #include "llvm/ADT/ArrayRef.h" |
| 19 | #include "llvm/ADT/Hashing.h" |
| 20 | #include "llvm/ADT/STLExtras.h" |
| 21 | #include "llvm/ADT/StringRef.h" |
| 22 | #include "llvm/Support/Endian.h" |
| 23 | #include "llvm/Support/type_traits.h" |
| 24 | |
| 25 | #include <iterator> |
| 26 | #include <optional> |
| 27 | #include <utility> |
| 28 | |
| 29 | namespace llvm { |
| 30 | |
| 31 | namespace hashbuilder_detail { |
| 32 | /// Trait to indicate whether a type's bits can be hashed directly (after |
| 33 | /// endianness correction). |
| 34 | template <typename U> |
| 35 | struct IsHashableData |
| 36 | : std::integral_constant<bool, is_integral_or_enum<U>::value> {}; |
| 37 | |
| 38 | } // namespace hashbuilder_detail |
| 39 | |
| 40 | /// Declares the hasher member, and functions forwarding directly to the hasher. |
| 41 | template <typename HasherT> class HashBuilderBase { |
| 42 | public: |
| 43 | template <typename HasherT_ = HasherT> |
| 44 | using HashResultTy = decltype(std::declval<HasherT_ &>().final()); |
| 45 | |
| 46 | HasherT &getHasher() { return Hasher; } |
| 47 | |
| 48 | /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. |
| 49 | /// |
| 50 | /// This may not take the size of `Data` into account. |
| 51 | /// Users of this function should pay attention to respect endianness |
| 52 | /// contraints. |
| 53 | void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); } |
| 54 | |
| 55 | /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. |
| 56 | /// |
| 57 | /// This may not take the size of `Data` into account. |
| 58 | /// Users of this function should pay attention to respect endianness |
| 59 | /// contraints. |
| 60 | void update(StringRef Data) { |
| 61 | update( |
| 62 | ArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), Data.size())); |
| 63 | } |
| 64 | |
| 65 | /// Forward to `HasherT::final()` if available. |
| 66 | template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() { |
| 67 | return this->getHasher().final(); |
| 68 | } |
| 69 | |
| 70 | /// Forward to `HasherT::result()` if available. |
| 71 | template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() { |
| 72 | return this->getHasher().result(); |
| 73 | } |
| 74 | |
| 75 | protected: |
| 76 | explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {} |
| 77 | |
| 78 | template <typename... ArgTypes> |
| 79 | explicit HashBuilderBase(ArgTypes &&...Args) |
| 80 | : OptionalHasher(std::in_place, std::forward<ArgTypes>(Args)...), |
| 81 | Hasher(*OptionalHasher) {} |
| 82 | |
| 83 | private: |
| 84 | std::optional<HasherT> OptionalHasher; |
| 85 | HasherT &Hasher; |
| 86 | }; |
| 87 | |
| 88 | /// Implementation of the `HashBuilder` interface. |
| 89 | /// |
| 90 | /// `support::endianness::native` is not supported. `HashBuilder` is |
| 91 | /// expected to canonicalize `support::endianness::native` to one of |
| 92 | /// `support::endianness::big` or `support::endianness::little`. |
| 93 | template <typename HasherT, support::endianness Endianness> |
| 94 | class HashBuilderImpl : public HashBuilderBase<HasherT> { |
| 95 | static_assert(Endianness != support::endianness::native, |
| 96 | "HashBuilder should canonicalize endianness" ); |
| 97 | |
| 98 | public: |
| 99 | explicit HashBuilderImpl(HasherT &Hasher) |
| 100 | : HashBuilderBase<HasherT>(Hasher) {} |
| 101 | template <typename... ArgTypes> |
| 102 | explicit HashBuilderImpl(ArgTypes &&...Args) |
| 103 | : HashBuilderBase<HasherT>(Args...) {} |
| 104 | |
| 105 | /// Implement hashing for hashable data types, e.g. integral or enum values. |
| 106 | template <typename T> |
| 107 | std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value, |
| 108 | HashBuilderImpl &> |
| 109 | add(T Value) { |
| 110 | return adjustForEndiannessAndAdd(Value); |
| 111 | } |
| 112 | |
| 113 | /// Support hashing `ArrayRef`. |
| 114 | /// |
| 115 | /// `Value.size()` is taken into account to ensure cases like |
| 116 | /// ``` |
| 117 | /// builder.add({1}); |
| 118 | /// builder.add({2, 3}); |
| 119 | /// ``` |
| 120 | /// and |
| 121 | /// ``` |
| 122 | /// builder.add({1, 2}); |
| 123 | /// builder.add({3}); |
| 124 | /// ``` |
| 125 | /// do not collide. |
| 126 | template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) { |
| 127 | // As of implementation time, simply calling `addRange(Value)` would also go |
| 128 | // through the `update` fast path. But that would rely on the implementation |
| 129 | // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call |
| 130 | // `update` to guarantee the fast path. |
| 131 | add(Value.size()); |
| 132 | if (hashbuilder_detail::IsHashableData<T>::value && |
| 133 | Endianness == support::endian::system_endianness()) { |
| 134 | this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), |
| 135 | Value.size() * sizeof(T))); |
| 136 | } else { |
| 137 | for (auto &V : Value) |
| 138 | add(V); |
| 139 | } |
| 140 | return *this; |
| 141 | } |
| 142 | |
| 143 | /// Support hashing `StringRef`. |
| 144 | /// |
| 145 | /// `Value.size()` is taken into account to ensure cases like |
| 146 | /// ``` |
| 147 | /// builder.add("a"); |
| 148 | /// builder.add("bc"); |
| 149 | /// ``` |
| 150 | /// and |
| 151 | /// ``` |
| 152 | /// builder.add("ab"); |
| 153 | /// builder.add("c"); |
| 154 | /// ``` |
| 155 | /// do not collide. |
| 156 | HashBuilderImpl &add(StringRef Value) { |
| 157 | // As of implementation time, simply calling `addRange(Value)` would also go |
| 158 | // through `update`. But that would rely on the implementation of |
| 159 | // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to |
| 160 | // guarantee the fast path. |
| 161 | add(Value.size()); |
| 162 | this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), |
| 163 | Value.size())); |
| 164 | return *this; |
| 165 | } |
| 166 | |
| 167 | template <typename T> |
| 168 | using HasAddHashT = |
| 169 | decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>())); |
| 170 | /// Implement hashing for user-defined `struct`s. |
| 171 | /// |
| 172 | /// Any user-define `struct` can participate in hashing via `HashBuilder` by |
| 173 | /// providing a `addHash` templated function. |
| 174 | /// |
| 175 | /// ``` |
| 176 | /// template <typename HasherT, support::endianness Endianness> |
| 177 | /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder, |
| 178 | /// const UserDefinedStruct &Value); |
| 179 | /// ``` |
| 180 | /// |
| 181 | /// For example: |
| 182 | /// ``` |
| 183 | /// struct SimpleStruct { |
| 184 | /// char c; |
| 185 | /// int i; |
| 186 | /// }; |
| 187 | /// |
| 188 | /// template <typename HasherT, support::endianness Endianness> |
| 189 | /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, |
| 190 | /// const SimpleStruct &Value) { |
| 191 | /// HBuilder.add(Value.c); |
| 192 | /// HBuilder.add(Value.i); |
| 193 | /// } |
| 194 | /// ``` |
| 195 | /// |
| 196 | /// To avoid endianness issues, specializations of `addHash` should |
| 197 | /// generally rely on exising `add`, `addRange`, and `addRangeElements` |
| 198 | /// functions. If directly using `update`, an implementation must correctly |
| 199 | /// handle endianness. |
| 200 | /// |
| 201 | /// ``` |
| 202 | /// struct __attribute__ ((packed)) StructWithFastHash { |
| 203 | /// int I; |
| 204 | /// char C; |
| 205 | /// |
| 206 | /// // If possible, we want to hash both `I` and `C` in a single |
| 207 | /// // `update` call for performance concerns. |
| 208 | /// template <typename HasherT, support::endianness Endianness> |
| 209 | /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, |
| 210 | /// const StructWithFastHash &Value) { |
| 211 | /// if (Endianness == support::endian::system_endianness()) { |
| 212 | /// HBuilder.update(ArrayRef( |
| 213 | /// reinterpret_cast<const uint8_t *>(&Value), sizeof(Value))); |
| 214 | /// } else { |
| 215 | /// // Rely on existing `add` methods to handle endianness. |
| 216 | /// HBuilder.add(Value.I); |
| 217 | /// HBuilder.add(Value.C); |
| 218 | /// } |
| 219 | /// } |
| 220 | /// }; |
| 221 | /// ``` |
| 222 | /// |
| 223 | /// To avoid collisions, specialization of `addHash` for variable-size |
| 224 | /// types must take the size into account. |
| 225 | /// |
| 226 | /// For example: |
| 227 | /// ``` |
| 228 | /// struct CustomContainer { |
| 229 | /// private: |
| 230 | /// size_t Size; |
| 231 | /// int Elements[100]; |
| 232 | /// |
| 233 | /// public: |
| 234 | /// CustomContainer(size_t Size) : Size(Size) { |
| 235 | /// for (size_t I = 0; I != Size; ++I) |
| 236 | /// Elements[I] = I; |
| 237 | /// } |
| 238 | /// template <typename HasherT, support::endianness Endianness> |
| 239 | /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, |
| 240 | /// const CustomContainer &Value) { |
| 241 | /// if (Endianness == support::endian::system_endianness()) { |
| 242 | /// HBuilder.update(ArrayRef( |
| 243 | /// reinterpret_cast<const uint8_t *>(&Value.Size), |
| 244 | /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0]))); |
| 245 | /// } else { |
| 246 | /// // `addRange` will take care of encoding the size. |
| 247 | /// HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] + |
| 248 | /// Value.Size); |
| 249 | /// } |
| 250 | /// } |
| 251 | /// }; |
| 252 | /// ``` |
| 253 | template <typename T> |
| 254 | std::enable_if_t<is_detected<HasAddHashT, T>::value && |
| 255 | !hashbuilder_detail::IsHashableData<T>::value, |
| 256 | HashBuilderImpl &> |
| 257 | add(const T &Value) { |
| 258 | addHash(*this, Value); |
| 259 | return *this; |
| 260 | } |
| 261 | |
| 262 | template <typename T1, typename T2> |
| 263 | HashBuilderImpl &add(const std::pair<T1, T2> &Value) { |
| 264 | return add(Value.first, Value.second); |
| 265 | } |
| 266 | |
| 267 | template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) { |
| 268 | std::apply([this](const auto &...Args) { this->add(Args...); }, Arg); |
| 269 | return *this; |
| 270 | } |
| 271 | |
| 272 | /// A convenenience variadic helper. |
| 273 | /// It simply iterates over its arguments, in order. |
| 274 | /// ``` |
| 275 | /// add(Arg1, Arg2); |
| 276 | /// ``` |
| 277 | /// is equivalent to |
| 278 | /// ``` |
| 279 | /// add(Arg1) |
| 280 | /// add(Arg2) |
| 281 | /// ``` |
| 282 | template <typename... Ts> |
| 283 | std::enable_if_t<(sizeof...(Ts) > 1), HashBuilderImpl &> |
| 284 | add(const Ts &...Args) { |
| 285 | return (add(Args), ...); |
| 286 | } |
| 287 | |
| 288 | template <typename ForwardIteratorT> |
| 289 | HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) { |
| 290 | add(std::distance(First, Last)); |
| 291 | return addRangeElements(First, Last); |
| 292 | } |
| 293 | |
| 294 | template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) { |
| 295 | return addRange(adl_begin(Range), adl_end(Range)); |
| 296 | } |
| 297 | |
| 298 | template <typename ForwardIteratorT> |
| 299 | HashBuilderImpl &addRangeElements(ForwardIteratorT First, |
| 300 | ForwardIteratorT Last) { |
| 301 | return addRangeElementsImpl( |
| 302 | First, Last, |
| 303 | typename std::iterator_traits<ForwardIteratorT>::iterator_category()); |
| 304 | } |
| 305 | |
| 306 | template <typename RangeT> |
| 307 | HashBuilderImpl &addRangeElements(const RangeT &Range) { |
| 308 | return addRangeElements(adl_begin(Range), adl_end(Range)); |
| 309 | } |
| 310 | |
| 311 | template <typename T> |
| 312 | using HasByteSwapT = decltype(support::endian::byte_swap( |
| 313 | std::declval<T &>(), support::endianness::little)); |
| 314 | /// Adjust `Value` for the target endianness and add it to the hash. |
| 315 | template <typename T> |
| 316 | std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &> |
| 317 | adjustForEndiannessAndAdd(const T &Value) { |
| 318 | T SwappedValue = support::endian::byte_swap(Value, Endianness); |
| 319 | this->update(ArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue), |
| 320 | sizeof(SwappedValue))); |
| 321 | return *this; |
| 322 | } |
| 323 | |
| 324 | private: |
| 325 | // FIXME: Once available, specialize this function for `contiguous_iterator`s, |
| 326 | // and use it for `ArrayRef` and `StringRef`. |
| 327 | template <typename ForwardIteratorT> |
| 328 | HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First, |
| 329 | ForwardIteratorT Last, |
| 330 | std::forward_iterator_tag) { |
| 331 | for (auto It = First; It != Last; ++It) |
| 332 | add(*It); |
| 333 | return *this; |
| 334 | } |
| 335 | |
| 336 | template <typename T> |
| 337 | std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value && |
| 338 | Endianness == support::endian::system_endianness(), |
| 339 | HashBuilderImpl &> |
| 340 | addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) { |
| 341 | this->update(ArrayRef(reinterpret_cast<const uint8_t *>(First), |
| 342 | (Last - First) * sizeof(T))); |
| 343 | return *this; |
| 344 | } |
| 345 | }; |
| 346 | |
| 347 | /// Interface to help hash various types through a hasher type. |
| 348 | /// |
| 349 | /// Via provided specializations of `add`, `addRange`, and `addRangeElements` |
| 350 | /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed |
| 351 | /// without requiring any knowledge of hashed types from the hasher type. |
| 352 | /// |
| 353 | /// The only method expected from the templated hasher type `HasherT` is: |
| 354 | /// * void update(ArrayRef<uint8_t> Data) |
| 355 | /// |
| 356 | /// Additionally, the following methods will be forwarded to the hasher type: |
| 357 | /// * decltype(std::declval<HasherT &>().final()) final() |
| 358 | /// * decltype(std::declval<HasherT &>().result()) result() |
| 359 | /// |
| 360 | /// From a user point of view, the interface provides the following: |
| 361 | /// * `template<typename T> add(const T &Value)` |
| 362 | /// The `add` function implements hashing of various types. |
| 363 | /// * `template <typename ItT> void addRange(ItT First, ItT Last)` |
| 364 | /// The `addRange` function is designed to aid hashing a range of values. |
| 365 | /// It explicitly adds the size of the range in the hash. |
| 366 | /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)` |
| 367 | /// The `addRangeElements` function is also designed to aid hashing a range of |
| 368 | /// values. In contrast to `addRange`, it **ignores** the size of the range, |
| 369 | /// behaving as if elements were added one at a time with `add`. |
| 370 | /// |
| 371 | /// User-defined `struct` types can participate in this interface by providing |
| 372 | /// an `addHash` templated function. See the associated template specialization |
| 373 | /// for details. |
| 374 | /// |
| 375 | /// This interface does not impose requirements on the hasher |
| 376 | /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for |
| 377 | /// variable-size types; for example for |
| 378 | /// ``` |
| 379 | /// builder.add({1}); |
| 380 | /// builder.add({2, 3}); |
| 381 | /// ``` |
| 382 | /// and |
| 383 | /// ``` |
| 384 | /// builder.add({1, 2}); |
| 385 | /// builder.add({3}); |
| 386 | /// ``` |
| 387 | /// . Thus, specializations of `add` and `addHash` for variable-size types must |
| 388 | /// not assume that the hasher type considers the size as part of the hash; they |
| 389 | /// must explicitly add the size to the hash. See for example specializations |
| 390 | /// for `ArrayRef` and `StringRef`. |
| 391 | /// |
| 392 | /// Additionally, since types are eventually forwarded to the hasher's |
| 393 | /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash |
| 394 | /// computation (for example when computing `add((int)123)`). |
| 395 | /// Specifiying a non-`native` `Endianness` template parameter allows to compute |
| 396 | /// stable hash across platforms with different endianness. |
| 397 | template <class HasherT, support::endianness Endianness> |
| 398 | using HashBuilder = |
| 399 | HashBuilderImpl<HasherT, (Endianness == support::endianness::native |
| 400 | ? support::endian::system_endianness() |
| 401 | : Endianness)>; |
| 402 | |
| 403 | namespace hashbuilder_detail { |
| 404 | class HashCodeHasher { |
| 405 | public: |
| 406 | HashCodeHasher() : Code(0) {} |
| 407 | void update(ArrayRef<uint8_t> Data) { |
| 408 | hash_code DataCode = hash_value(S: Data); |
| 409 | Code = hash_combine(args: Code, args: DataCode); |
| 410 | } |
| 411 | hash_code Code; |
| 412 | }; |
| 413 | |
| 414 | using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher, |
| 415 | support::endianness::native>; |
| 416 | } // namespace hashbuilder_detail |
| 417 | |
| 418 | /// Provide a default implementation of `hash_value` when `addHash(const T &)` |
| 419 | /// is supported. |
| 420 | template <typename T> |
| 421 | std::enable_if_t< |
| 422 | is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value, |
| 423 | hash_code> |
| 424 | hash_value(const T &Value) { |
| 425 | hashbuilder_detail::HashCodeHashBuilder HBuilder; |
| 426 | HBuilder.add(Value); |
| 427 | return HBuilder.getHasher().Code; |
| 428 | } |
| 429 | } // end namespace llvm |
| 430 | |
| 431 | #endif // LLVM_SUPPORT_HASHBUILDER_H |
| 432 | |