| 1 | //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===// | 
| 2 | // | 
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
| 6 | // | 
| 7 | //===----------------------------------------------------------------------===// | 
| 8 | // | 
| 9 | // This file implements an interface allowing to conveniently build hashes of | 
| 10 | // various data types, without relying on the underlying hasher type to know | 
| 11 | // about hashed data types. | 
| 12 | // | 
| 13 | //===----------------------------------------------------------------------===// | 
| 14 |  | 
| 15 | #ifndef LLVM_SUPPORT_HASHBUILDER_H | 
| 16 | #define LLVM_SUPPORT_HASHBUILDER_H | 
| 17 |  | 
| 18 | #include "llvm/ADT/ArrayRef.h" | 
| 19 | #include "llvm/ADT/Hashing.h" | 
| 20 | #include "llvm/ADT/STLExtras.h" | 
| 21 | #include "llvm/ADT/StringRef.h" | 
| 22 | #include "llvm/Support/Endian.h" | 
| 23 | #include "llvm/Support/type_traits.h" | 
| 24 |  | 
| 25 | #include <iterator> | 
| 26 | #include <optional> | 
| 27 | #include <utility> | 
| 28 |  | 
| 29 | namespace llvm { | 
| 30 |  | 
| 31 | namespace hashbuilder_detail { | 
| 32 | /// Trait to indicate whether a type's bits can be hashed directly (after | 
| 33 | /// endianness correction). | 
| 34 | template <typename U> | 
| 35 | struct IsHashableData | 
| 36 |     : std::integral_constant<bool, is_integral_or_enum<U>::value> {}; | 
| 37 |  | 
| 38 | } // namespace hashbuilder_detail | 
| 39 |  | 
| 40 | /// Declares the hasher member, and functions forwarding directly to the hasher. | 
| 41 | template <typename HasherT> class HashBuilderBase { | 
| 42 | public: | 
| 43 |   template <typename HasherT_ = HasherT> | 
| 44 |   using HashResultTy = decltype(std::declval<HasherT_ &>().final()); | 
| 45 |  | 
| 46 |   HasherT &getHasher() { return Hasher; } | 
| 47 |  | 
| 48 |   /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. | 
| 49 |   /// | 
| 50 |   /// This may not take the size of `Data` into account. | 
| 51 |   /// Users of this function should pay attention to respect endianness | 
| 52 |   /// contraints. | 
| 53 |   void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); } | 
| 54 |  | 
| 55 |   /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. | 
| 56 |   /// | 
| 57 |   /// This may not take the size of `Data` into account. | 
| 58 |   /// Users of this function should pay attention to respect endianness | 
| 59 |   /// contraints. | 
| 60 |   void update(StringRef Data) { | 
| 61 |     update( | 
| 62 |         ArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), Data.size())); | 
| 63 |   } | 
| 64 |  | 
| 65 |   /// Forward to `HasherT::final()` if available. | 
| 66 |   template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() { | 
| 67 |     return this->getHasher().final(); | 
| 68 |   } | 
| 69 |  | 
| 70 |   /// Forward to `HasherT::result()` if available. | 
| 71 |   template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() { | 
| 72 |     return this->getHasher().result(); | 
| 73 |   } | 
| 74 |  | 
| 75 | protected: | 
| 76 |   explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {} | 
| 77 |  | 
| 78 |   template <typename... ArgTypes> | 
| 79 |   explicit HashBuilderBase(ArgTypes &&...Args) | 
| 80 |       : OptionalHasher(std::in_place, std::forward<ArgTypes>(Args)...), | 
| 81 |         Hasher(*OptionalHasher) {} | 
| 82 |  | 
| 83 | private: | 
| 84 |   std::optional<HasherT> OptionalHasher; | 
| 85 |   HasherT &Hasher; | 
| 86 | }; | 
| 87 |  | 
| 88 | /// Implementation of the `HashBuilder` interface. | 
| 89 | /// | 
| 90 | /// `support::endianness::native` is not supported. `HashBuilder` is | 
| 91 | /// expected to canonicalize `support::endianness::native` to one of | 
| 92 | /// `support::endianness::big` or `support::endianness::little`. | 
| 93 | template <typename HasherT, support::endianness Endianness> | 
| 94 | class HashBuilderImpl : public HashBuilderBase<HasherT> { | 
| 95 |   static_assert(Endianness != support::endianness::native, | 
| 96 |                 "HashBuilder should canonicalize endianness" ); | 
| 97 |  | 
| 98 | public: | 
| 99 |   explicit HashBuilderImpl(HasherT &Hasher) | 
| 100 |       : HashBuilderBase<HasherT>(Hasher) {} | 
| 101 |   template <typename... ArgTypes> | 
| 102 |   explicit HashBuilderImpl(ArgTypes &&...Args) | 
| 103 |       : HashBuilderBase<HasherT>(Args...) {} | 
| 104 |  | 
| 105 |   /// Implement hashing for hashable data types, e.g. integral or enum values. | 
| 106 |   template <typename T> | 
| 107 |   std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value, | 
| 108 |                    HashBuilderImpl &> | 
| 109 |   add(T Value) { | 
| 110 |     return adjustForEndiannessAndAdd(Value); | 
| 111 |   } | 
| 112 |  | 
| 113 |   /// Support hashing `ArrayRef`. | 
| 114 |   /// | 
| 115 |   /// `Value.size()` is taken into account to ensure cases like | 
| 116 |   /// ``` | 
| 117 |   /// builder.add({1}); | 
| 118 |   /// builder.add({2, 3}); | 
| 119 |   /// ``` | 
| 120 |   /// and | 
| 121 |   /// ``` | 
| 122 |   /// builder.add({1, 2}); | 
| 123 |   /// builder.add({3}); | 
| 124 |   /// ``` | 
| 125 |   /// do not collide. | 
| 126 |   template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) { | 
| 127 |     // As of implementation time, simply calling `addRange(Value)` would also go | 
| 128 |     // through the `update` fast path. But that would rely on the implementation | 
| 129 |     // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call | 
| 130 |     // `update` to guarantee the fast path. | 
| 131 |     add(Value.size()); | 
| 132 |     if (hashbuilder_detail::IsHashableData<T>::value && | 
| 133 |         Endianness == support::endian::system_endianness()) { | 
| 134 |       this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), | 
| 135 |                             Value.size() * sizeof(T))); | 
| 136 |     } else { | 
| 137 |       for (auto &V : Value) | 
| 138 |         add(V); | 
| 139 |     } | 
| 140 |     return *this; | 
| 141 |   } | 
| 142 |  | 
| 143 |   /// Support hashing `StringRef`. | 
| 144 |   /// | 
| 145 |   /// `Value.size()` is taken into account to ensure cases like | 
| 146 |   /// ``` | 
| 147 |   /// builder.add("a"); | 
| 148 |   /// builder.add("bc"); | 
| 149 |   /// ``` | 
| 150 |   /// and | 
| 151 |   /// ``` | 
| 152 |   /// builder.add("ab"); | 
| 153 |   /// builder.add("c"); | 
| 154 |   /// ``` | 
| 155 |   /// do not collide. | 
| 156 |   HashBuilderImpl &add(StringRef Value) { | 
| 157 |     // As of implementation time, simply calling `addRange(Value)` would also go | 
| 158 |     // through `update`. But that would rely on the implementation of | 
| 159 |     // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to | 
| 160 |     // guarantee the fast path. | 
| 161 |     add(Value.size()); | 
| 162 |     this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), | 
| 163 |                           Value.size())); | 
| 164 |     return *this; | 
| 165 |   } | 
| 166 |  | 
| 167 |   template <typename T> | 
| 168 |   using HasAddHashT = | 
| 169 |       decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>())); | 
| 170 |   /// Implement hashing for user-defined `struct`s. | 
| 171 |   /// | 
| 172 |   /// Any user-define `struct` can participate in hashing via `HashBuilder` by | 
| 173 |   /// providing a `addHash` templated function. | 
| 174 |   /// | 
| 175 |   /// ``` | 
| 176 |   /// template <typename HasherT, support::endianness Endianness> | 
| 177 |   /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder, | 
| 178 |   ///              const UserDefinedStruct &Value); | 
| 179 |   /// ``` | 
| 180 |   /// | 
| 181 |   /// For example: | 
| 182 |   /// ``` | 
| 183 |   /// struct SimpleStruct { | 
| 184 |   ///   char c; | 
| 185 |   ///   int i; | 
| 186 |   /// }; | 
| 187 |   /// | 
| 188 |   /// template <typename HasherT, support::endianness Endianness> | 
| 189 |   /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, | 
| 190 |   ///              const SimpleStruct &Value) { | 
| 191 |   ///   HBuilder.add(Value.c); | 
| 192 |   ///   HBuilder.add(Value.i); | 
| 193 |   /// } | 
| 194 |   /// ``` | 
| 195 |   /// | 
| 196 |   /// To avoid endianness issues, specializations of `addHash` should | 
| 197 |   /// generally rely on exising `add`, `addRange`, and `addRangeElements` | 
| 198 |   /// functions. If directly using `update`, an implementation must correctly | 
| 199 |   /// handle endianness. | 
| 200 |   /// | 
| 201 |   /// ``` | 
| 202 |   /// struct __attribute__ ((packed)) StructWithFastHash { | 
| 203 |   ///   int I; | 
| 204 |   ///   char C; | 
| 205 |   /// | 
| 206 |   ///   // If possible, we want to hash both `I` and `C` in a single | 
| 207 |   ///   // `update` call for performance concerns. | 
| 208 |   ///   template <typename HasherT, support::endianness Endianness> | 
| 209 |   ///   friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, | 
| 210 |   ///                       const StructWithFastHash &Value) { | 
| 211 |   ///     if (Endianness == support::endian::system_endianness()) { | 
| 212 |   ///       HBuilder.update(ArrayRef( | 
| 213 |   ///           reinterpret_cast<const uint8_t *>(&Value), sizeof(Value))); | 
| 214 |   ///     } else { | 
| 215 |   ///       // Rely on existing `add` methods to handle endianness. | 
| 216 |   ///       HBuilder.add(Value.I); | 
| 217 |   ///       HBuilder.add(Value.C); | 
| 218 |   ///     } | 
| 219 |   ///   } | 
| 220 |   /// }; | 
| 221 |   /// ``` | 
| 222 |   /// | 
| 223 |   /// To avoid collisions, specialization of `addHash` for variable-size | 
| 224 |   /// types must take the size into account. | 
| 225 |   /// | 
| 226 |   /// For example: | 
| 227 |   /// ``` | 
| 228 |   /// struct CustomContainer { | 
| 229 |   /// private: | 
| 230 |   ///   size_t Size; | 
| 231 |   ///   int Elements[100]; | 
| 232 |   /// | 
| 233 |   /// public: | 
| 234 |   ///   CustomContainer(size_t Size) : Size(Size) { | 
| 235 |   ///     for (size_t I = 0; I != Size; ++I) | 
| 236 |   ///       Elements[I] = I; | 
| 237 |   ///   } | 
| 238 |   ///   template <typename HasherT, support::endianness Endianness> | 
| 239 |   ///   friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, | 
| 240 |   ///                       const CustomContainer &Value) { | 
| 241 |   ///     if (Endianness == support::endian::system_endianness()) { | 
| 242 |   ///       HBuilder.update(ArrayRef( | 
| 243 |   ///           reinterpret_cast<const uint8_t *>(&Value.Size), | 
| 244 |   ///           sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0]))); | 
| 245 |   ///     } else { | 
| 246 |   ///       // `addRange` will take care of encoding the size. | 
| 247 |   ///       HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] + | 
| 248 |   ///       Value.Size); | 
| 249 |   ///     } | 
| 250 |   ///   } | 
| 251 |   /// }; | 
| 252 |   /// ``` | 
| 253 |   template <typename T> | 
| 254 |   std::enable_if_t<is_detected<HasAddHashT, T>::value && | 
| 255 |                        !hashbuilder_detail::IsHashableData<T>::value, | 
| 256 |                    HashBuilderImpl &> | 
| 257 |   add(const T &Value) { | 
| 258 |     addHash(*this, Value); | 
| 259 |     return *this; | 
| 260 |   } | 
| 261 |  | 
| 262 |   template <typename T1, typename T2> | 
| 263 |   HashBuilderImpl &add(const std::pair<T1, T2> &Value) { | 
| 264 |     return add(Value.first, Value.second); | 
| 265 |   } | 
| 266 |  | 
| 267 |   template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) { | 
| 268 |     std::apply([this](const auto &...Args) { this->add(Args...); }, Arg); | 
| 269 |     return *this; | 
| 270 |   } | 
| 271 |  | 
| 272 |   /// A convenenience variadic helper. | 
| 273 |   /// It simply iterates over its arguments, in order. | 
| 274 |   /// ``` | 
| 275 |   /// add(Arg1, Arg2); | 
| 276 |   /// ``` | 
| 277 |   /// is equivalent to | 
| 278 |   /// ``` | 
| 279 |   /// add(Arg1) | 
| 280 |   /// add(Arg2) | 
| 281 |   /// ``` | 
| 282 |   template <typename... Ts> | 
| 283 |   std::enable_if_t<(sizeof...(Ts) > 1), HashBuilderImpl &> | 
| 284 |   add(const Ts &...Args) { | 
| 285 |     return (add(Args), ...); | 
| 286 |   } | 
| 287 |  | 
| 288 |   template <typename ForwardIteratorT> | 
| 289 |   HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) { | 
| 290 |     add(std::distance(First, Last)); | 
| 291 |     return addRangeElements(First, Last); | 
| 292 |   } | 
| 293 |  | 
| 294 |   template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) { | 
| 295 |     return addRange(adl_begin(Range), adl_end(Range)); | 
| 296 |   } | 
| 297 |  | 
| 298 |   template <typename ForwardIteratorT> | 
| 299 |   HashBuilderImpl &addRangeElements(ForwardIteratorT First, | 
| 300 |                                     ForwardIteratorT Last) { | 
| 301 |     return addRangeElementsImpl( | 
| 302 |         First, Last, | 
| 303 |         typename std::iterator_traits<ForwardIteratorT>::iterator_category()); | 
| 304 |   } | 
| 305 |  | 
| 306 |   template <typename RangeT> | 
| 307 |   HashBuilderImpl &addRangeElements(const RangeT &Range) { | 
| 308 |     return addRangeElements(adl_begin(Range), adl_end(Range)); | 
| 309 |   } | 
| 310 |  | 
| 311 |   template <typename T> | 
| 312 |   using HasByteSwapT = decltype(support::endian::byte_swap( | 
| 313 |       std::declval<T &>(), support::endianness::little)); | 
| 314 |   /// Adjust `Value` for the target endianness and add it to the hash. | 
| 315 |   template <typename T> | 
| 316 |   std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &> | 
| 317 |   adjustForEndiannessAndAdd(const T &Value) { | 
| 318 |     T SwappedValue = support::endian::byte_swap(Value, Endianness); | 
| 319 |     this->update(ArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue), | 
| 320 |                           sizeof(SwappedValue))); | 
| 321 |     return *this; | 
| 322 |   } | 
| 323 |  | 
| 324 | private: | 
| 325 |   // FIXME: Once available, specialize this function for `contiguous_iterator`s, | 
| 326 |   // and use it for `ArrayRef` and `StringRef`. | 
| 327 |   template <typename ForwardIteratorT> | 
| 328 |   HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First, | 
| 329 |                                         ForwardIteratorT Last, | 
| 330 |                                         std::forward_iterator_tag) { | 
| 331 |     for (auto It = First; It != Last; ++It) | 
| 332 |       add(*It); | 
| 333 |     return *this; | 
| 334 |   } | 
| 335 |  | 
| 336 |   template <typename T> | 
| 337 |   std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value && | 
| 338 |                        Endianness == support::endian::system_endianness(), | 
| 339 |                    HashBuilderImpl &> | 
| 340 |   addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) { | 
| 341 |     this->update(ArrayRef(reinterpret_cast<const uint8_t *>(First), | 
| 342 |                           (Last - First) * sizeof(T))); | 
| 343 |     return *this; | 
| 344 |   } | 
| 345 | }; | 
| 346 |  | 
| 347 | /// Interface to help hash various types through a hasher type. | 
| 348 | /// | 
| 349 | /// Via provided specializations of `add`, `addRange`, and `addRangeElements` | 
| 350 | /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed | 
| 351 | /// without requiring any knowledge of hashed types from the hasher type. | 
| 352 | /// | 
| 353 | /// The only method expected from the templated hasher type `HasherT` is: | 
| 354 | /// * void update(ArrayRef<uint8_t> Data) | 
| 355 | /// | 
| 356 | /// Additionally, the following methods will be forwarded to the hasher type: | 
| 357 | /// * decltype(std::declval<HasherT &>().final()) final() | 
| 358 | /// * decltype(std::declval<HasherT &>().result()) result() | 
| 359 | /// | 
| 360 | /// From a user point of view, the interface provides the following: | 
| 361 | /// * `template<typename T> add(const T &Value)` | 
| 362 | ///   The `add` function implements hashing of various types. | 
| 363 | /// * `template <typename ItT> void addRange(ItT First, ItT Last)` | 
| 364 | ///   The `addRange` function is designed to aid hashing a range of values. | 
| 365 | ///   It explicitly adds the size of the range in the hash. | 
| 366 | /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)` | 
| 367 | ///   The `addRangeElements` function is also designed to aid hashing a range of | 
| 368 | ///   values. In contrast to `addRange`, it **ignores** the size of the range, | 
| 369 | ///   behaving as if elements were added one at a time with `add`. | 
| 370 | /// | 
| 371 | /// User-defined `struct` types can participate in this interface by providing | 
| 372 | /// an `addHash` templated function. See the associated template specialization | 
| 373 | /// for details. | 
| 374 | /// | 
| 375 | /// This interface does not impose requirements on the hasher | 
| 376 | /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for | 
| 377 | /// variable-size types; for example for | 
| 378 | /// ``` | 
| 379 | /// builder.add({1}); | 
| 380 | /// builder.add({2, 3}); | 
| 381 | /// ``` | 
| 382 | /// and | 
| 383 | /// ``` | 
| 384 | /// builder.add({1, 2}); | 
| 385 | /// builder.add({3}); | 
| 386 | /// ``` | 
| 387 | /// . Thus, specializations of `add` and `addHash` for variable-size types must | 
| 388 | /// not assume that the hasher type considers the size as part of the hash; they | 
| 389 | /// must explicitly add the size to the hash. See for example specializations | 
| 390 | /// for `ArrayRef` and `StringRef`. | 
| 391 | /// | 
| 392 | /// Additionally, since types are eventually forwarded to the hasher's | 
| 393 | /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash | 
| 394 | /// computation (for example when computing `add((int)123)`). | 
| 395 | /// Specifiying a non-`native` `Endianness` template parameter allows to compute | 
| 396 | /// stable hash across platforms with different endianness. | 
| 397 | template <class HasherT, support::endianness Endianness> | 
| 398 | using HashBuilder = | 
| 399 |     HashBuilderImpl<HasherT, (Endianness == support::endianness::native | 
| 400 |                                   ? support::endian::system_endianness() | 
| 401 |                                   : Endianness)>; | 
| 402 |  | 
| 403 | namespace hashbuilder_detail { | 
| 404 | class HashCodeHasher { | 
| 405 | public: | 
| 406 |   HashCodeHasher() : Code(0) {} | 
| 407 |   void update(ArrayRef<uint8_t> Data) { | 
| 408 |     hash_code DataCode = hash_value(S: Data); | 
| 409 |     Code = hash_combine(args: Code, args: DataCode); | 
| 410 |   } | 
| 411 |   hash_code Code; | 
| 412 | }; | 
| 413 |  | 
| 414 | using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher, | 
| 415 |                                         support::endianness::native>; | 
| 416 | } // namespace hashbuilder_detail | 
| 417 |  | 
| 418 | /// Provide a default implementation of `hash_value` when `addHash(const T &)` | 
| 419 | /// is supported. | 
| 420 | template <typename T> | 
| 421 | std::enable_if_t< | 
| 422 |     is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value, | 
| 423 |     hash_code> | 
| 424 | hash_value(const T &Value) { | 
| 425 |   hashbuilder_detail::HashCodeHashBuilder HBuilder; | 
| 426 |   HBuilder.add(Value); | 
| 427 |   return HBuilder.getHasher().Code; | 
| 428 | } | 
| 429 | } // end namespace llvm | 
| 430 |  | 
| 431 | #endif // LLVM_SUPPORT_HASHBUILDER_H | 
| 432 |  |