| 1 | //===-- ubsan_type_hash_itanium.cpp ---------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Implementation of type hashing/lookup for Itanium C++ ABI. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "sanitizer_common/sanitizer_platform.h" |
| 14 | #include "ubsan_platform.h" |
| 15 | #if CAN_SANITIZE_UB && !defined(_MSC_VER) |
| 16 | #include "ubsan_type_hash.h" |
| 17 | |
| 18 | #include "sanitizer_common/sanitizer_common.h" |
| 19 | #include "sanitizer_common/sanitizer_ptrauth.h" |
| 20 | #include <stdint.h> |
| 21 | |
| 22 | // The following are intended to be binary compatible with the definitions |
| 23 | // given in the Itanium ABI. We make no attempt to be ODR-compatible with |
| 24 | // those definitions, since existing ABI implementations aren't. |
| 25 | |
| 26 | namespace std { |
| 27 | class type_info { |
| 28 | public: |
| 29 | typedef const char *__type_name_t; |
| 30 | virtual ~type_info(); |
| 31 | |
| 32 | const char *__type_name; |
| 33 | |
| 34 | __type_name_t name() const { |
| 35 | #if defined(__APPLE__) && defined(__LP64__) && !defined(__x86_64__) |
| 36 | uintptr_t __non_unique_rtti_bit = |
| 37 | (1ULL << ((__CHAR_BIT__ * sizeof(__type_name_t)) - 1)); |
| 38 | return (__type_name_t)((uintptr_t)__type_name & ~__non_unique_rtti_bit); |
| 39 | #else |
| 40 | return __type_name; |
| 41 | #endif |
| 42 | } |
| 43 | }; |
| 44 | } |
| 45 | |
| 46 | namespace __cxxabiv1 { |
| 47 | |
| 48 | /// Type info for classes with no bases, and base class for type info for |
| 49 | /// classes with bases. |
| 50 | class __class_type_info : public std::type_info { |
| 51 | ~__class_type_info() override; |
| 52 | }; |
| 53 | |
| 54 | /// Type info for classes with simple single public inheritance. |
| 55 | class __si_class_type_info : public __class_type_info { |
| 56 | public: |
| 57 | ~__si_class_type_info() override; |
| 58 | |
| 59 | const __class_type_info *__base_type; |
| 60 | }; |
| 61 | |
| 62 | class __base_class_type_info { |
| 63 | public: |
| 64 | const __class_type_info *__base_type; |
| 65 | long __offset_flags; |
| 66 | |
| 67 | enum __offset_flags_masks { |
| 68 | __virtual_mask = 0x1, |
| 69 | __public_mask = 0x2, |
| 70 | __offset_shift = 8 |
| 71 | }; |
| 72 | }; |
| 73 | |
| 74 | /// Type info for classes with multiple, virtual, or non-public inheritance. |
| 75 | class __vmi_class_type_info : public __class_type_info { |
| 76 | public: |
| 77 | ~__vmi_class_type_info() override; |
| 78 | |
| 79 | unsigned int flags; |
| 80 | unsigned int base_count; |
| 81 | __base_class_type_info base_info[1]; |
| 82 | }; |
| 83 | |
| 84 | } |
| 85 | |
| 86 | namespace abi = __cxxabiv1; |
| 87 | |
| 88 | using namespace __sanitizer; |
| 89 | |
| 90 | // We implement a simple two-level cache for type-checking results. For each |
| 91 | // (vptr,type) pair, a hash is computed. This hash is assumed to be globally |
| 92 | // unique; if it collides, we will get false negatives, but: |
| 93 | // * such a collision would have to occur on the *first* bad access, |
| 94 | // * the probability of such a collision is low (and for a 64-bit target, is |
| 95 | // negligible), and |
| 96 | // * the vptr, and thus the hash, can be affected by ASLR, so multiple runs |
| 97 | // give better coverage. |
| 98 | // |
| 99 | // The first caching layer is a small hash table with no chaining; buckets are |
| 100 | // reused as needed. The second caching layer is a large hash table with open |
| 101 | // chaining. We can freely evict from either layer since this is just a cache. |
| 102 | // |
| 103 | // FIXME: Make these hash table accesses thread-safe. The races here are benign: |
| 104 | // assuming the unsequenced loads and stores don't misbehave too badly, |
| 105 | // the worst case is false negatives or poor cache behavior, not false |
| 106 | // positives or crashes. |
| 107 | |
| 108 | /// Find a bucket to store the given hash value in. |
| 109 | static __ubsan::HashValue *getTypeCacheHashTableBucket(__ubsan::HashValue V) { |
| 110 | static const unsigned HashTableSize = 65537; |
| 111 | static __ubsan::HashValue __ubsan_vptr_hash_set[HashTableSize]; |
| 112 | |
| 113 | unsigned First = (V & 65535) ^ 1; |
| 114 | unsigned Probe = First; |
| 115 | for (int Tries = 5; Tries; --Tries) { |
| 116 | if (!__ubsan_vptr_hash_set[Probe] || __ubsan_vptr_hash_set[Probe] == V) |
| 117 | return &__ubsan_vptr_hash_set[Probe]; |
| 118 | Probe += ((V >> 16) & 65535) + 1; |
| 119 | if (Probe >= HashTableSize) |
| 120 | Probe -= HashTableSize; |
| 121 | } |
| 122 | // FIXME: Pick a random entry from the probe sequence to evict rather than |
| 123 | // just taking the first. |
| 124 | return &__ubsan_vptr_hash_set[First]; |
| 125 | } |
| 126 | |
| 127 | /// \brief Determine whether \p Derived has a \p Base base class subobject at |
| 128 | /// offset \p Offset. |
| 129 | static bool isDerivedFromAtOffset(const abi::__class_type_info *Derived, |
| 130 | const abi::__class_type_info *Base, |
| 131 | sptr Offset) { |
| 132 | if (Derived->name() == Base->name() || |
| 133 | __ubsan::checkTypeInfoEquality(TypeInfo1: Derived, TypeInfo2: Base)) |
| 134 | return Offset == 0; |
| 135 | |
| 136 | if (const abi::__si_class_type_info *SI = |
| 137 | dynamic_cast<const abi::__si_class_type_info*>(Derived)) |
| 138 | return isDerivedFromAtOffset(Derived: SI->__base_type, Base, Offset); |
| 139 | |
| 140 | const abi::__vmi_class_type_info *VTI = |
| 141 | dynamic_cast<const abi::__vmi_class_type_info*>(Derived); |
| 142 | if (!VTI) |
| 143 | // No base class subobjects. |
| 144 | return false; |
| 145 | |
| 146 | // Look for a base class which is derived from \p Base at the right offset. |
| 147 | for (unsigned int base = 0; base != VTI->base_count; ++base) { |
| 148 | // FIXME: Curtail the recursion if this base can't possibly contain the |
| 149 | // given offset. |
| 150 | sptr OffsetHere = VTI->base_info[base].__offset_flags >> |
| 151 | abi::__base_class_type_info::__offset_shift; |
| 152 | if (VTI->base_info[base].__offset_flags & |
| 153 | abi::__base_class_type_info::__virtual_mask) |
| 154 | // For now, just punt on virtual bases and say 'yes'. |
| 155 | // FIXME: OffsetHere is the offset in the vtable of the virtual base |
| 156 | // offset. Read the vbase offset out of the vtable and use it. |
| 157 | return true; |
| 158 | if (isDerivedFromAtOffset(Derived: VTI->base_info[base].__base_type, |
| 159 | Base, Offset: Offset - OffsetHere)) |
| 160 | return true; |
| 161 | } |
| 162 | |
| 163 | return false; |
| 164 | } |
| 165 | |
| 166 | /// \brief Find the derived-most dynamic base class of \p Derived at offset |
| 167 | /// \p Offset. |
| 168 | static const abi::__class_type_info *findBaseAtOffset( |
| 169 | const abi::__class_type_info *Derived, sptr Offset) { |
| 170 | if (!Offset) |
| 171 | return Derived; |
| 172 | |
| 173 | if (const abi::__si_class_type_info *SI = |
| 174 | dynamic_cast<const abi::__si_class_type_info*>(Derived)) |
| 175 | return findBaseAtOffset(Derived: SI->__base_type, Offset); |
| 176 | |
| 177 | const abi::__vmi_class_type_info *VTI = |
| 178 | dynamic_cast<const abi::__vmi_class_type_info*>(Derived); |
| 179 | if (!VTI) |
| 180 | // No base class subobjects. |
| 181 | return nullptr; |
| 182 | |
| 183 | for (unsigned int base = 0; base != VTI->base_count; ++base) { |
| 184 | sptr OffsetHere = VTI->base_info[base].__offset_flags >> |
| 185 | abi::__base_class_type_info::__offset_shift; |
| 186 | if (VTI->base_info[base].__offset_flags & |
| 187 | abi::__base_class_type_info::__virtual_mask) |
| 188 | // FIXME: Can't handle virtual bases yet. |
| 189 | continue; |
| 190 | if (const abi::__class_type_info *Base = |
| 191 | findBaseAtOffset(Derived: VTI->base_info[base].__base_type, |
| 192 | Offset: Offset - OffsetHere)) |
| 193 | return Base; |
| 194 | } |
| 195 | |
| 196 | return nullptr; |
| 197 | } |
| 198 | |
| 199 | namespace { |
| 200 | |
| 201 | struct VtablePrefix { |
| 202 | /// The offset from the vptr to the start of the most-derived object. |
| 203 | /// This will only be greater than zero in some virtual base class vtables |
| 204 | /// used during object con-/destruction, and will usually be exactly zero. |
| 205 | sptr Offset; |
| 206 | /// The type_info object describing the most-derived class type. |
| 207 | std::type_info *TypeInfo; |
| 208 | }; |
| 209 | VtablePrefix *getVtablePrefix(void *Vtable) { |
| 210 | Vtable = ptrauth_strip(Vtable, ptrauth_key_cxx_vtable_pointer); |
| 211 | VtablePrefix *Vptr = reinterpret_cast<VtablePrefix*>(Vtable); |
| 212 | VtablePrefix *Prefix = Vptr - 1; |
| 213 | if (!IsAccessibleMemoryRange(beg: (uptr)Prefix, size: sizeof(VtablePrefix))) |
| 214 | return nullptr; |
| 215 | if (!Prefix->TypeInfo) |
| 216 | // This can't possibly be a valid vtable. |
| 217 | return nullptr; |
| 218 | return Prefix; |
| 219 | } |
| 220 | |
| 221 | } |
| 222 | |
| 223 | bool __ubsan::checkDynamicType(void *Object, void *Type, HashValue Hash) { |
| 224 | // A crash anywhere within this function probably means the vptr is corrupted. |
| 225 | // FIXME: Perform these checks more cautiously. |
| 226 | |
| 227 | // Check whether this is something we've evicted from the cache. |
| 228 | HashValue *Bucket = getTypeCacheHashTableBucket(V: Hash); |
| 229 | if (*Bucket == Hash) { |
| 230 | __ubsan_vptr_type_cache[Hash % VptrTypeCacheSize] = Hash; |
| 231 | return true; |
| 232 | } |
| 233 | |
| 234 | void *VtablePtr = *reinterpret_cast<void **>(Object); |
| 235 | VtablePrefix *Vtable = getVtablePrefix(Vtable: VtablePtr); |
| 236 | if (!Vtable) |
| 237 | return false; |
| 238 | if (Vtable->Offset < -VptrMaxOffsetToTop || Vtable->Offset > VptrMaxOffsetToTop) { |
| 239 | // Too large or too small offset are signs of Vtable corruption. |
| 240 | return false; |
| 241 | } |
| 242 | |
| 243 | // Check that this is actually a type_info object for a class type. |
| 244 | abi::__class_type_info *Derived = |
| 245 | dynamic_cast<abi::__class_type_info*>(Vtable->TypeInfo); |
| 246 | if (!Derived) |
| 247 | return false; |
| 248 | |
| 249 | abi::__class_type_info *Base = (abi::__class_type_info*)Type; |
| 250 | if (!isDerivedFromAtOffset(Derived, Base, Offset: -Vtable->Offset)) |
| 251 | return false; |
| 252 | |
| 253 | // Success. Cache this result. |
| 254 | __ubsan_vptr_type_cache[Hash % VptrTypeCacheSize] = Hash; |
| 255 | *Bucket = Hash; |
| 256 | return true; |
| 257 | } |
| 258 | |
| 259 | __ubsan::DynamicTypeInfo |
| 260 | __ubsan::getDynamicTypeInfoFromVtable(void *VtablePtr) { |
| 261 | VtablePrefix *Vtable = getVtablePrefix(Vtable: VtablePtr); |
| 262 | if (!Vtable) |
| 263 | return DynamicTypeInfo(nullptr, 0, nullptr); |
| 264 | if (Vtable->Offset < -VptrMaxOffsetToTop || Vtable->Offset > VptrMaxOffsetToTop) |
| 265 | return DynamicTypeInfo(nullptr, Vtable->Offset, nullptr); |
| 266 | const abi::__class_type_info *ObjectType = findBaseAtOffset( |
| 267 | Derived: static_cast<const abi::__class_type_info*>(Vtable->TypeInfo), |
| 268 | Offset: -Vtable->Offset); |
| 269 | return DynamicTypeInfo(Vtable->TypeInfo->name(), -Vtable->Offset, |
| 270 | ObjectType ? ObjectType->name() : "<unknown>" ); |
| 271 | } |
| 272 | |
| 273 | bool __ubsan::checkTypeInfoEquality(const void *TypeInfo1, |
| 274 | const void *TypeInfo2) { |
| 275 | auto TI1 = static_cast<const std::type_info *>(TypeInfo1); |
| 276 | auto TI2 = static_cast<const std::type_info *>(TypeInfo2); |
| 277 | return SANITIZER_NON_UNIQUE_TYPEINFO && TI1->name()[0] != '*' && |
| 278 | TI2->name()[0] != '*' && !internal_strcmp(s1: TI1->name(), s2: TI2->name()); |
| 279 | } |
| 280 | |
| 281 | #endif // CAN_SANITIZE_UB && !SANITIZER_WINDOWS |
| 282 | |