| 1 | // This module implements Identifier, a short-optimized string allowed to |
| 2 | // contain only the ASCII characters hyphen, dot, 0-9, A-Z, a-z. |
| 3 | // |
| 4 | // As of mid-2021, the distribution of pre-release lengths on crates.io is: |
| 5 | // |
| 6 | // length count length count length count |
| 7 | // 0 355929 11 81 24 2 |
| 8 | // 1 208 12 48 25 6 |
| 9 | // 2 236 13 55 26 10 |
| 10 | // 3 1909 14 25 27 4 |
| 11 | // 4 1284 15 15 28 1 |
| 12 | // 5 1742 16 35 30 1 |
| 13 | // 6 3440 17 9 31 5 |
| 14 | // 7 5624 18 6 32 1 |
| 15 | // 8 1321 19 12 36 2 |
| 16 | // 9 179 20 2 37 379 |
| 17 | // 10 65 23 11 |
| 18 | // |
| 19 | // and the distribution of build metadata lengths is: |
| 20 | // |
| 21 | // length count length count length count |
| 22 | // 0 364445 8 7725 18 1 |
| 23 | // 1 72 9 16 19 1 |
| 24 | // 2 7 10 85 20 1 |
| 25 | // 3 28 11 17 22 4 |
| 26 | // 4 9 12 10 26 1 |
| 27 | // 5 68 13 9 27 1 |
| 28 | // 6 73 14 10 40 5 |
| 29 | // 7 53 15 6 |
| 30 | // |
| 31 | // Therefore it really behooves us to be able to use the entire 8 bytes of a |
| 32 | // pointer for inline storage. For both pre-release and build metadata there are |
| 33 | // vastly more strings with length exactly 8 bytes than the sum over all lengths |
| 34 | // longer than 8 bytes. |
| 35 | // |
| 36 | // To differentiate the inline representation from the heap allocated long |
| 37 | // representation, we'll allocate heap pointers with 2-byte alignment so that |
| 38 | // they are guaranteed to have an unset least significant bit. Then in the repr |
| 39 | // we store for pointers, we rotate a 1 into the most significant bit of the |
| 40 | // most significant byte, which is never set for an ASCII byte. |
| 41 | // |
| 42 | // Inline repr: |
| 43 | // |
| 44 | // 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx |
| 45 | // |
| 46 | // Heap allocated repr: |
| 47 | // |
| 48 | // 1ppppppp pppppppp pppppppp pppppppp pppppppp pppppppp pppppppp pppppppp 0 |
| 49 | // ^ most significant bit least significant bit of orig ptr, rotated out ^ |
| 50 | // |
| 51 | // Since the most significant bit doubles as a sign bit for the similarly sized |
| 52 | // signed integer type, the CPU has an efficient instruction for inspecting it, |
| 53 | // meaning we can differentiate between an inline repr and a heap allocated repr |
| 54 | // in one instruction. Effectively an inline repr always looks like a positive |
| 55 | // i64 while a heap allocated repr always looks like a negative i64. |
| 56 | // |
| 57 | // For the inline repr, we store \0 padding on the end of the stored characters, |
| 58 | // and thus the string length is readily determined efficiently by a cttz (count |
| 59 | // trailing zeros) or bsf (bit scan forward) instruction. |
| 60 | // |
| 61 | // For the heap allocated repr, the length is encoded as a base-128 varint at |
| 62 | // the head of the allocation. |
| 63 | // |
| 64 | // Empty strings are stored as an all-1 bit pattern, corresponding to -1i64. |
| 65 | // Consequently the all-0 bit pattern is never a legal representation in any |
| 66 | // repr, leaving it available as a niche for downstream code. For example this |
| 67 | // allows size_of::<Version>() == size_of::<Option<Version>>(). |
| 68 | |
| 69 | use crate::alloc::alloc::{alloc, dealloc, handle_alloc_error, Layout}; |
| 70 | use core::isize; |
| 71 | use core::mem; |
| 72 | use core::num::{NonZeroU64, NonZeroUsize}; |
| 73 | use core::ptr::{self, NonNull}; |
| 74 | use core::slice; |
| 75 | use core::str; |
| 76 | use core::usize; |
| 77 | |
| 78 | const PTR_BYTES: usize = mem::size_of::<NonNull<u8>>(); |
| 79 | |
| 80 | // If pointers are already 8 bytes or bigger, then 0. If pointers are smaller |
| 81 | // than 8 bytes, then Identifier will contain a byte array to raise its size up |
| 82 | // to 8 bytes total. |
| 83 | const TAIL_BYTES: usize = 8 * (PTR_BYTES < 8) as usize - PTR_BYTES * (PTR_BYTES < 8) as usize; |
| 84 | |
| 85 | #[repr (C, align(8))] |
| 86 | pub(crate) struct Identifier { |
| 87 | head: NonNull<u8>, |
| 88 | tail: [u8; TAIL_BYTES], |
| 89 | } |
| 90 | |
| 91 | impl Identifier { |
| 92 | pub(crate) const fn empty() -> Self { |
| 93 | // This is a separate constant because unsafe function calls are not |
| 94 | // allowed in a const fn body, only in a const, until later rustc than |
| 95 | // what we support. |
| 96 | const HEAD: NonNull<u8> = unsafe { NonNull::new_unchecked(!0 as *mut u8) }; |
| 97 | |
| 98 | // `mov rax, -1` |
| 99 | Identifier { |
| 100 | head: HEAD, |
| 101 | tail: [!0; TAIL_BYTES], |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | // SAFETY: string must be ASCII and not contain \0 bytes. |
| 106 | pub(crate) unsafe fn new_unchecked(string: &str) -> Self { |
| 107 | let len = string.len(); |
| 108 | debug_assert!(len <= isize::MAX as usize); |
| 109 | match len as u64 { |
| 110 | 0 => Self::empty(), |
| 111 | 1..=8 => { |
| 112 | let mut bytes = [0u8; mem::size_of::<Identifier>()]; |
| 113 | // SAFETY: string is big enough to read len bytes, bytes is big |
| 114 | // enough to write len bytes, and they do not overlap. |
| 115 | unsafe { ptr::copy_nonoverlapping(string.as_ptr(), bytes.as_mut_ptr(), len) }; |
| 116 | // SAFETY: the head field is nonzero because the input string |
| 117 | // was at least 1 byte of ASCII and did not contain \0. |
| 118 | unsafe { mem::transmute::<[u8; mem::size_of::<Identifier>()], Identifier>(bytes) } |
| 119 | } |
| 120 | 9..=0xff_ffff_ffff_ffff => { |
| 121 | // SAFETY: len is in a range that does not contain 0. |
| 122 | let size = bytes_for_varint(unsafe { NonZeroUsize::new_unchecked(len) }) + len; |
| 123 | let align = 2; |
| 124 | // On 32-bit and 16-bit architecture, check for size overflowing |
| 125 | // isize::MAX. Making an allocation request bigger than this to |
| 126 | // the allocator is considered UB. All allocations (including |
| 127 | // static ones) are limited to isize::MAX so we're guaranteed |
| 128 | // len <= isize::MAX, and we know bytes_for_varint(len) <= 5 |
| 129 | // because 128**5 > isize::MAX, which means the only problem |
| 130 | // that can arise is when isize::MAX - 5 <= len <= isize::MAX. |
| 131 | // This is pretty much guaranteed to be malicious input so we |
| 132 | // don't need to care about returning a good error message. |
| 133 | if mem::size_of::<usize>() < 8 { |
| 134 | let max_alloc = usize::MAX / 2 - align; |
| 135 | assert!(size <= max_alloc); |
| 136 | } |
| 137 | // SAFETY: align is not zero, align is a power of two, and |
| 138 | // rounding size up to align does not overflow isize::MAX. |
| 139 | let layout = unsafe { Layout::from_size_align_unchecked(size, align) }; |
| 140 | // SAFETY: layout's size is nonzero. |
| 141 | let ptr = unsafe { alloc(layout) }; |
| 142 | if ptr.is_null() { |
| 143 | handle_alloc_error(layout); |
| 144 | } |
| 145 | let mut write = ptr; |
| 146 | let mut varint_remaining = len; |
| 147 | while varint_remaining > 0 { |
| 148 | // SAFETY: size is bytes_for_varint(len) bytes + len bytes. |
| 149 | // This is writing the first bytes_for_varint(len) bytes. |
| 150 | unsafe { ptr::write(write, varint_remaining as u8 | 0x80) }; |
| 151 | varint_remaining >>= 7; |
| 152 | // SAFETY: still in bounds of the same allocation. |
| 153 | write = unsafe { write.add(1) }; |
| 154 | } |
| 155 | // SAFETY: size is bytes_for_varint(len) bytes + len bytes. This |
| 156 | // is writing to the last len bytes. |
| 157 | unsafe { ptr::copy_nonoverlapping(string.as_ptr(), write, len) }; |
| 158 | Identifier { |
| 159 | head: ptr_to_repr(ptr), |
| 160 | tail: [0; TAIL_BYTES], |
| 161 | } |
| 162 | } |
| 163 | 0x100_0000_0000_0000..=0xffff_ffff_ffff_ffff => { |
| 164 | unreachable!("please refrain from storing >64 petabytes of text in semver version" ); |
| 165 | } |
| 166 | #[cfg (no_exhaustive_int_match)] // rustc <1.33 |
| 167 | _ => unreachable!(), |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | pub(crate) fn is_empty(&self) -> bool { |
| 172 | // `cmp rdi, -1` -- basically: `repr as i64 == -1` |
| 173 | let empty = Self::empty(); |
| 174 | let is_empty = self.head == empty.head && self.tail == empty.tail; |
| 175 | // The empty representation does nothing on Drop. We can't let this one |
| 176 | // drop normally because `impl Drop for Identifier` calls is_empty; that |
| 177 | // would be an infinite recursion. |
| 178 | mem::forget(empty); |
| 179 | is_empty |
| 180 | } |
| 181 | |
| 182 | fn is_inline(&self) -> bool { |
| 183 | // `test rdi, rdi` -- basically: `repr as i64 >= 0` |
| 184 | self.head.as_ptr() as usize >> (PTR_BYTES * 8 - 1) == 0 |
| 185 | } |
| 186 | |
| 187 | fn is_empty_or_inline(&self) -> bool { |
| 188 | // `cmp rdi, -2` -- basically: `repr as i64 > -2` |
| 189 | self.is_empty() || self.is_inline() |
| 190 | } |
| 191 | |
| 192 | pub(crate) fn as_str(&self) -> &str { |
| 193 | if self.is_empty() { |
| 194 | "" |
| 195 | } else if self.is_inline() { |
| 196 | // SAFETY: repr is in the inline representation. |
| 197 | unsafe { inline_as_str(self) } |
| 198 | } else { |
| 199 | // SAFETY: repr is in the heap allocated representation. |
| 200 | unsafe { ptr_as_str(&self.head) } |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | pub(crate) fn ptr_eq(&self, rhs: &Self) -> bool { |
| 205 | self.head == rhs.head && self.tail == rhs.tail |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | impl Clone for Identifier { |
| 210 | fn clone(&self) -> Self { |
| 211 | if self.is_empty_or_inline() { |
| 212 | Identifier { |
| 213 | head: self.head, |
| 214 | tail: self.tail, |
| 215 | } |
| 216 | } else { |
| 217 | let ptr = repr_to_ptr(self.head); |
| 218 | // SAFETY: ptr is one of our own heap allocations. |
| 219 | let len = unsafe { decode_len(ptr) }; |
| 220 | let size = bytes_for_varint(len) + len.get(); |
| 221 | let align = 2; |
| 222 | // SAFETY: align is not zero, align is a power of two, and rounding |
| 223 | // size up to align does not overflow isize::MAX. This is just |
| 224 | // duplicating a previous allocation where all of these guarantees |
| 225 | // were already made. |
| 226 | let layout = unsafe { Layout::from_size_align_unchecked(size, align) }; |
| 227 | // SAFETY: layout's size is nonzero. |
| 228 | let clone = unsafe { alloc(layout) }; |
| 229 | if clone.is_null() { |
| 230 | handle_alloc_error(layout); |
| 231 | } |
| 232 | // SAFETY: new allocation cannot overlap the previous one (this was |
| 233 | // not a realloc). The argument ptrs are readable/writeable |
| 234 | // respectively for size bytes. |
| 235 | unsafe { ptr::copy_nonoverlapping(ptr, clone, size) } |
| 236 | Identifier { |
| 237 | head: ptr_to_repr(clone), |
| 238 | tail: [0; TAIL_BYTES], |
| 239 | } |
| 240 | } |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | impl Drop for Identifier { |
| 245 | fn drop(&mut self) { |
| 246 | if self.is_empty_or_inline() { |
| 247 | return; |
| 248 | } |
| 249 | let ptr: *mut u8 = repr_to_ptr_mut(self.head); |
| 250 | // SAFETY: ptr is one of our own heap allocations. |
| 251 | let len: NonZero = unsafe { decode_len(ptr) }; |
| 252 | let size: usize = bytes_for_varint(len) + len.get(); |
| 253 | let align: usize = 2; |
| 254 | // SAFETY: align is not zero, align is a power of two, and rounding |
| 255 | // size up to align does not overflow isize::MAX. These guarantees were |
| 256 | // made when originally allocating this memory. |
| 257 | let layout: Layout = unsafe { Layout::from_size_align_unchecked(size, align) }; |
| 258 | // SAFETY: ptr was previously allocated by the same allocator with the |
| 259 | // same layout. |
| 260 | unsafe { dealloc(ptr, layout) } |
| 261 | } |
| 262 | } |
| 263 | |
| 264 | impl PartialEq for Identifier { |
| 265 | fn eq(&self, rhs: &Self) -> bool { |
| 266 | if self.ptr_eq(rhs) { |
| 267 | // Fast path (most common) |
| 268 | true |
| 269 | } else if self.is_empty_or_inline() || rhs.is_empty_or_inline() { |
| 270 | false |
| 271 | } else { |
| 272 | // SAFETY: both reprs are in the heap allocated representation. |
| 273 | unsafe { ptr_as_str(&self.head) == ptr_as_str(&rhs.head) } |
| 274 | } |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | unsafe impl Send for Identifier {} |
| 279 | unsafe impl Sync for Identifier {} |
| 280 | |
| 281 | // We use heap pointers that are 2-byte aligned, meaning they have an |
| 282 | // insignificant 0 in the least significant bit. We take advantage of that |
| 283 | // unneeded bit to rotate a 1 into the most significant bit to make the repr |
| 284 | // distinguishable from ASCII bytes. |
| 285 | fn ptr_to_repr(original: *mut u8) -> NonNull<u8> { |
| 286 | // `mov eax, 1` |
| 287 | // `shld rax, rdi, 63` |
| 288 | let modified: usize = (original as usize | 1).rotate_right(1); |
| 289 | |
| 290 | // `original + (modified - original)`, but being mindful of provenance. |
| 291 | let diff: usize = modified.wrapping_sub(original as usize); |
| 292 | let modified: *mut u8 = original.wrapping_add(count:diff); |
| 293 | |
| 294 | // SAFETY: the most significant bit of repr is known to be set, so the value |
| 295 | // is not zero. |
| 296 | unsafe { NonNull::new_unchecked(ptr:modified) } |
| 297 | } |
| 298 | |
| 299 | // Shift out the 1 previously placed into the most significant bit of the least |
| 300 | // significant byte. Shift in a low 0 bit to reconstruct the original 2-byte |
| 301 | // aligned pointer. |
| 302 | fn repr_to_ptr(modified: NonNull<u8>) -> *const u8 { |
| 303 | // `lea rax, [rdi + rdi]` |
| 304 | let modified: *mut u8 = modified.as_ptr(); |
| 305 | let original: usize = (modified as usize) << 1; |
| 306 | |
| 307 | // `modified + (original - modified)`, but being mindful of provenance. |
| 308 | let diff: usize = original.wrapping_sub(modified as usize); |
| 309 | modified.wrapping_add(count:diff) |
| 310 | } |
| 311 | |
| 312 | fn repr_to_ptr_mut(repr: NonNull<u8>) -> *mut u8 { |
| 313 | repr_to_ptr(modified:repr) as *mut u8 |
| 314 | } |
| 315 | |
| 316 | // Compute the length of the inline string, assuming the argument is in short |
| 317 | // string representation. Short strings are stored as 1 to 8 nonzero ASCII |
| 318 | // bytes, followed by \0 padding for the remaining bytes. |
| 319 | // |
| 320 | // SAFETY: the identifier must indeed be in the inline representation. |
| 321 | unsafe fn inline_len(repr: &Identifier) -> NonZeroUsize { |
| 322 | // SAFETY: Identifier's layout is align(8) and at least size 8. We're doing |
| 323 | // an aligned read of the first 8 bytes from it. The bytes are not all zero |
| 324 | // because inline strings are at least 1 byte long and cannot contain \0. |
| 325 | let repr: NonZero = unsafe { ptr::read(src:repr as *const Identifier as *const NonZeroU64) }; |
| 326 | |
| 327 | // Rustc >=1.53 has intrinsics for counting zeros on a non-zeroable integer. |
| 328 | // On many architectures these are more efficient than counting on ordinary |
| 329 | // zeroable integers (bsf vs cttz). On rustc <1.53 without those intrinsics, |
| 330 | // we count zeros in the u64 rather than the NonZeroU64. |
| 331 | #[cfg (no_nonzero_bitscan)] |
| 332 | let repr = repr.get(); |
| 333 | |
| 334 | #[cfg (target_endian = "little" )] |
| 335 | let zero_bits_on_string_end: u32 = repr.leading_zeros(); |
| 336 | #[cfg (target_endian = "big" )] |
| 337 | let zero_bits_on_string_end = repr.trailing_zeros(); |
| 338 | |
| 339 | let nonzero_bytes: usize = 8 - zero_bits_on_string_end as usize / 8; |
| 340 | |
| 341 | // SAFETY: repr is nonzero, so it has at most 63 zero bits on either end, |
| 342 | // thus at least one nonzero byte. |
| 343 | unsafe { NonZeroUsize::new_unchecked(nonzero_bytes) } |
| 344 | } |
| 345 | |
| 346 | // SAFETY: repr must be in the inline representation, i.e. at least 1 and at |
| 347 | // most 8 nonzero ASCII bytes padded on the end with \0 bytes. |
| 348 | unsafe fn inline_as_str(repr: &Identifier) -> &str { |
| 349 | let ptr: *const u8 = repr as *const Identifier as *const u8; |
| 350 | let len: usize = unsafe { inline_len(repr) }.get(); |
| 351 | // SAFETY: we are viewing the nonzero ASCII prefix of the inline repr's |
| 352 | // contents as a slice of bytes. Input/output lifetimes are correctly |
| 353 | // associated. |
| 354 | let slice: &[u8] = unsafe { slice::from_raw_parts(data:ptr, len) }; |
| 355 | // SAFETY: the string contents are known to be only ASCII bytes, which are |
| 356 | // always valid UTF-8. |
| 357 | unsafe { str::from_utf8_unchecked(slice) } |
| 358 | } |
| 359 | |
| 360 | // Decode varint. Varints consist of between one and eight base-128 digits, each |
| 361 | // of which is stored in a byte with most significant bit set. Adjacent to the |
| 362 | // varint in memory there is guaranteed to be at least 9 ASCII bytes, each of |
| 363 | // which has an unset most significant bit. |
| 364 | // |
| 365 | // SAFETY: ptr must be one of our own heap allocations, with the varint header |
| 366 | // already written. |
| 367 | unsafe fn decode_len(ptr: *const u8) -> NonZeroUsize { |
| 368 | // SAFETY: There is at least one byte of varint followed by at least 9 bytes |
| 369 | // of string content, which is at least 10 bytes total for the allocation, |
| 370 | // so reading the first two is no problem. |
| 371 | let [first, second] = unsafe { ptr::read(ptr as *const [u8; 2]) }; |
| 372 | if second < 0x80 { |
| 373 | // SAFETY: the length of this heap allocated string has been encoded as |
| 374 | // one base-128 digit, so the length is at least 9 and at most 127. It |
| 375 | // cannot be zero. |
| 376 | unsafe { NonZeroUsize::new_unchecked((first & 0x7f) as usize) } |
| 377 | } else { |
| 378 | return unsafe { decode_len_cold(ptr) }; |
| 379 | |
| 380 | // Identifiers 128 bytes or longer. This is not exercised by any crate |
| 381 | // version currently published to crates.io. |
| 382 | #[cold ] |
| 383 | #[inline (never)] |
| 384 | unsafe fn decode_len_cold(mut ptr: *const u8) -> NonZeroUsize { |
| 385 | let mut len = 0; |
| 386 | let mut shift = 0; |
| 387 | loop { |
| 388 | // SAFETY: varint continues while there are bytes having the |
| 389 | // most significant bit set, i.e. until we start hitting the |
| 390 | // ASCII string content with msb unset. |
| 391 | let byte = unsafe { *ptr }; |
| 392 | if byte < 0x80 { |
| 393 | // SAFETY: the string length is known to be 128 bytes or |
| 394 | // longer. |
| 395 | return unsafe { NonZeroUsize::new_unchecked(len) }; |
| 396 | } |
| 397 | // SAFETY: still in bounds of the same allocation. |
| 398 | ptr = unsafe { ptr.add(1) }; |
| 399 | len += ((byte & 0x7f) as usize) << shift; |
| 400 | shift += 7; |
| 401 | } |
| 402 | } |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | // SAFETY: repr must be in the heap allocated representation, with varint header |
| 407 | // and string contents already written. |
| 408 | unsafe fn ptr_as_str(repr: &NonNull<u8>) -> &str { |
| 409 | let ptr: *const u8 = repr_to_ptr(*repr); |
| 410 | let len: NonZero = unsafe { decode_len(ptr) }; |
| 411 | let header: usize = bytes_for_varint(len); |
| 412 | let slice: &[u8] = unsafe { slice::from_raw_parts(data:ptr.add(count:header), len.get()) }; |
| 413 | // SAFETY: all identifier contents are ASCII bytes, which are always valid |
| 414 | // UTF-8. |
| 415 | unsafe { str::from_utf8_unchecked(slice) } |
| 416 | } |
| 417 | |
| 418 | // Number of base-128 digits required for the varint representation of a length. |
| 419 | fn bytes_for_varint(len: NonZeroUsize) -> usize { |
| 420 | #[cfg (no_nonzero_bitscan)] // rustc <1.53 |
| 421 | let len = len.get(); |
| 422 | |
| 423 | let usize_bits: usize = mem::size_of::<usize>() * 8; |
| 424 | let len_bits: usize = usize_bits - len.leading_zeros() as usize; |
| 425 | (len_bits + 6) / 7 |
| 426 | } |
| 427 | |