| 1 | // Copyright 2020 The Abseil Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "absl/hash/internal/low_level_hash.h" |
| 16 | |
| 17 | #include "absl/base/internal/unaligned_access.h" |
| 18 | #include "absl/numeric/bits.h" |
| 19 | #include "absl/numeric/int128.h" |
| 20 | |
| 21 | namespace absl { |
| 22 | ABSL_NAMESPACE_BEGIN |
| 23 | namespace hash_internal { |
| 24 | |
| 25 | static uint64_t Mix(uint64_t v0, uint64_t v1) { |
| 26 | #if !defined(__aarch64__) |
| 27 | // The default bit-mixer uses 64x64->128-bit multiplication. |
| 28 | absl::uint128 p = v0; |
| 29 | p *= v1; |
| 30 | return absl::Uint128Low64(v: p) ^ absl::Uint128High64(v: p); |
| 31 | #else |
| 32 | // The default bit-mixer above would perform poorly on some ARM microarchs, |
| 33 | // where calculating a 128-bit product requires a sequence of two |
| 34 | // instructions with a high combined latency and poor throughput. |
| 35 | // Instead, we mix bits using only 64-bit arithmetic, which is faster. |
| 36 | uint64_t p = v0 ^ absl::rotl(v1, 40); |
| 37 | p *= v1 ^ absl::rotl(v0, 39); |
| 38 | return p ^ (p >> 11); |
| 39 | #endif |
| 40 | } |
| 41 | |
| 42 | uint64_t LowLevelHash(const void* data, size_t len, uint64_t seed, |
| 43 | const uint64_t salt[5]) { |
| 44 | const uint8_t* ptr = static_cast<const uint8_t*>(data); |
| 45 | uint64_t starting_length = static_cast<uint64_t>(len); |
| 46 | uint64_t current_state = seed ^ salt[0]; |
| 47 | |
| 48 | if (len > 64) { |
| 49 | // If we have more than 64 bytes, we're going to handle chunks of 64 |
| 50 | // bytes at a time. We're going to build up two separate hash states |
| 51 | // which we will then hash together. |
| 52 | uint64_t duplicated_state = current_state; |
| 53 | |
| 54 | do { |
| 55 | uint64_t a = absl::base_internal::UnalignedLoad64(p: ptr); |
| 56 | uint64_t b = absl::base_internal::UnalignedLoad64(p: ptr + 8); |
| 57 | uint64_t c = absl::base_internal::UnalignedLoad64(p: ptr + 16); |
| 58 | uint64_t d = absl::base_internal::UnalignedLoad64(p: ptr + 24); |
| 59 | uint64_t e = absl::base_internal::UnalignedLoad64(p: ptr + 32); |
| 60 | uint64_t f = absl::base_internal::UnalignedLoad64(p: ptr + 40); |
| 61 | uint64_t g = absl::base_internal::UnalignedLoad64(p: ptr + 48); |
| 62 | uint64_t h = absl::base_internal::UnalignedLoad64(p: ptr + 56); |
| 63 | |
| 64 | uint64_t cs0 = Mix(v0: a ^ salt[1], v1: b ^ current_state); |
| 65 | uint64_t cs1 = Mix(v0: c ^ salt[2], v1: d ^ current_state); |
| 66 | current_state = (cs0 ^ cs1); |
| 67 | |
| 68 | uint64_t ds0 = Mix(v0: e ^ salt[3], v1: f ^ duplicated_state); |
| 69 | uint64_t ds1 = Mix(v0: g ^ salt[4], v1: h ^ duplicated_state); |
| 70 | duplicated_state = (ds0 ^ ds1); |
| 71 | |
| 72 | ptr += 64; |
| 73 | len -= 64; |
| 74 | } while (len > 64); |
| 75 | |
| 76 | current_state = current_state ^ duplicated_state; |
| 77 | } |
| 78 | |
| 79 | // We now have a data `ptr` with at most 64 bytes and the current state |
| 80 | // of the hashing state machine stored in current_state. |
| 81 | while (len > 16) { |
| 82 | uint64_t a = absl::base_internal::UnalignedLoad64(p: ptr); |
| 83 | uint64_t b = absl::base_internal::UnalignedLoad64(p: ptr + 8); |
| 84 | |
| 85 | current_state = Mix(v0: a ^ salt[1], v1: b ^ current_state); |
| 86 | |
| 87 | ptr += 16; |
| 88 | len -= 16; |
| 89 | } |
| 90 | |
| 91 | // We now have a data `ptr` with at most 16 bytes. |
| 92 | uint64_t a = 0; |
| 93 | uint64_t b = 0; |
| 94 | if (len > 8) { |
| 95 | // When we have at least 9 and at most 16 bytes, set A to the first 64 |
| 96 | // bits of the input and B to the last 64 bits of the input. Yes, they will |
| 97 | // overlap in the middle if we are working with less than the full 16 |
| 98 | // bytes. |
| 99 | a = absl::base_internal::UnalignedLoad64(p: ptr); |
| 100 | b = absl::base_internal::UnalignedLoad64(p: ptr + len - 8); |
| 101 | } else if (len > 3) { |
| 102 | // If we have at least 4 and at most 8 bytes, set A to the first 32 |
| 103 | // bits and B to the last 32 bits. |
| 104 | a = absl::base_internal::UnalignedLoad32(p: ptr); |
| 105 | b = absl::base_internal::UnalignedLoad32(p: ptr + len - 4); |
| 106 | } else if (len > 0) { |
| 107 | // If we have at least 1 and at most 3 bytes, read all of the provided |
| 108 | // bits into A, with some adjustments. |
| 109 | a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]); |
| 110 | b = 0; |
| 111 | } else { |
| 112 | a = 0; |
| 113 | b = 0; |
| 114 | } |
| 115 | |
| 116 | uint64_t w = Mix(v0: a ^ salt[1], v1: b ^ current_state); |
| 117 | uint64_t z = salt[1] ^ starting_length; |
| 118 | return Mix(v0: w, v1: z); |
| 119 | } |
| 120 | |
| 121 | } // namespace hash_internal |
| 122 | ABSL_NAMESPACE_END |
| 123 | } // namespace absl |
| 124 | |