| 1 | //! Caches run-time feature detection so that it only needs to be computed |
| 2 | //! once. |
| 3 | |
| 4 | #![allow (dead_code)] // not used on all platforms |
| 5 | |
| 6 | use core::sync::atomic::{AtomicUsize, Ordering}; |
| 7 | |
| 8 | /// Sets the `bit` of `x`. |
| 9 | #[inline ] |
| 10 | const fn set_bit(x: u128, bit: u32) -> u128 { |
| 11 | x | 1 << bit |
| 12 | } |
| 13 | |
| 14 | /// Tests the `bit` of `x`. |
| 15 | #[inline ] |
| 16 | const fn test_bit(x: u128, bit: u32) -> bool { |
| 17 | x & (1 << bit) != 0 |
| 18 | } |
| 19 | |
| 20 | /// Unset the `bit of `x`. |
| 21 | #[inline ] |
| 22 | const fn unset_bit(x: u128, bit: u32) -> u128 { |
| 23 | x & !(1 << bit) |
| 24 | } |
| 25 | |
| 26 | /// Maximum number of features that can be cached. |
| 27 | const CACHE_CAPACITY: u32 = 93; |
| 28 | |
| 29 | /// This type is used to initialize the cache |
| 30 | // The derived `Default` implementation will initialize the field to zero, |
| 31 | // which is what we want. |
| 32 | #[derive (Copy, Clone, Default, PartialEq, Eq)] |
| 33 | pub(crate) struct Initializer(u128); |
| 34 | |
| 35 | // NOTE: the `debug_assert!` would catch that we do not add more Features than |
| 36 | // the one fitting our cache. |
| 37 | impl Initializer { |
| 38 | /// Tests the `bit` of the cache. |
| 39 | #[inline ] |
| 40 | pub(crate) fn test(self, bit: u32) -> bool { |
| 41 | debug_assert!(bit < CACHE_CAPACITY, "too many features, time to increase the cache size!" ); |
| 42 | test_bit(self.0, bit) |
| 43 | } |
| 44 | |
| 45 | /// Sets the `bit` of the cache. |
| 46 | #[inline ] |
| 47 | pub(crate) fn set(&mut self, bit: u32) { |
| 48 | debug_assert!(bit < CACHE_CAPACITY, "too many features, time to increase the cache size!" ); |
| 49 | let v: u128 = self.0; |
| 50 | self.0 = set_bit(x:v, bit); |
| 51 | } |
| 52 | |
| 53 | /// Unsets the `bit` of the cache. |
| 54 | #[inline ] |
| 55 | pub(crate) fn unset(&mut self, bit: u32) { |
| 56 | debug_assert!(bit < CACHE_CAPACITY, "too many features, time to increase the cache size!" ); |
| 57 | let v: u128 = self.0; |
| 58 | self.0 = unset_bit(x:v, bit); |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | /// This global variable is a cache of the features supported by the CPU. |
| 63 | // Note: the third slot is only used in x86 |
| 64 | // Another Slot can be added if needed without any change to `Initializer` |
| 65 | static CACHE: [Cache; 3] = [Cache::uninitialized(), Cache::uninitialized(), Cache::uninitialized()]; |
| 66 | |
| 67 | /// Feature cache with capacity for `size_of::<usize>() * 8 - 1` features. |
| 68 | /// |
| 69 | /// Note: 0 is used to represent an uninitialized cache, and (at least) the most |
| 70 | /// significant bit is set on any cache which has been initialized. |
| 71 | /// |
| 72 | /// Note: we use `Relaxed` atomic operations, because we are only interested in |
| 73 | /// the effects of operations on a single memory location. That is, we only need |
| 74 | /// "modification order", and not the full-blown "happens before". |
| 75 | struct Cache(AtomicUsize); |
| 76 | |
| 77 | impl Cache { |
| 78 | const CAPACITY: u32 = (core::mem::size_of::<usize>() * 8 - 1) as u32; |
| 79 | const MASK: usize = (1 << Cache::CAPACITY) - 1; |
| 80 | const INITIALIZED_BIT: usize = 1usize << Cache::CAPACITY; |
| 81 | |
| 82 | /// Creates an uninitialized cache. |
| 83 | #[allow (clippy::declare_interior_mutable_const)] |
| 84 | const fn uninitialized() -> Self { |
| 85 | Cache(AtomicUsize::new(0)) |
| 86 | } |
| 87 | |
| 88 | /// Is the `bit` in the cache set? Returns `None` if the cache has not been initialized. |
| 89 | #[inline ] |
| 90 | pub(crate) fn test(&self, bit: u32) -> Option<bool> { |
| 91 | let cached = self.0.load(Ordering::Relaxed); |
| 92 | if cached == 0 { None } else { Some(test_bit(cached as u128, bit)) } |
| 93 | } |
| 94 | |
| 95 | /// Initializes the cache. |
| 96 | #[inline ] |
| 97 | fn initialize(&self, value: usize) -> usize { |
| 98 | debug_assert_eq!((value & !Cache::MASK), 0); |
| 99 | self.0.store(value | Cache::INITIALIZED_BIT, Ordering::Relaxed); |
| 100 | value |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | cfg_select! { |
| 105 | feature = "std_detect_env_override" => { |
| 106 | #[inline] |
| 107 | fn disable_features(disable: &[u8], value: &mut Initializer) { |
| 108 | if let Ok(disable) = core::str::from_utf8(disable) { |
| 109 | for v in disable.split(" " ) { |
| 110 | let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32)); |
| 111 | } |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | #[inline] |
| 116 | fn initialize(mut value: Initializer) -> Initializer { |
| 117 | use core::ffi::CStr; |
| 118 | const RUST_STD_DETECT_UNSTABLE: &CStr = c"RUST_STD_DETECT_UNSTABLE" ; |
| 119 | cfg_select! { |
| 120 | windows => { |
| 121 | use alloc::vec; |
| 122 | #[link(name = "kernel32" )] |
| 123 | unsafe extern "system" { |
| 124 | fn GetEnvironmentVariableA(name: *const u8, buffer: *mut u8, size: u32) -> u32; |
| 125 | } |
| 126 | let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::<u8>(), core::ptr::null_mut(), 0) }; |
| 127 | if len > 0 { |
| 128 | // +1 to include the null terminator. |
| 129 | let mut env = vec![0; len as usize + 1]; |
| 130 | let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::<u8>(), env.as_mut_ptr(), len + 1) }; |
| 131 | if len > 0 { |
| 132 | disable_features(&env[..len as usize], &mut value); |
| 133 | } |
| 134 | } |
| 135 | } |
| 136 | _ => { |
| 137 | let env = unsafe { |
| 138 | libc::getenv(RUST_STD_DETECT_UNSTABLE.as_ptr()) |
| 139 | }; |
| 140 | if !env.is_null() { |
| 141 | let len = unsafe { libc::strlen(env) }; |
| 142 | let env = unsafe { core::slice::from_raw_parts(env as *const u8, len) }; |
| 143 | disable_features(env, &mut value); |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | do_initialize(value); |
| 148 | value |
| 149 | } |
| 150 | } |
| 151 | _ => { |
| 152 | #[inline ] |
| 153 | fn initialize(value: Initializer) -> Initializer { |
| 154 | do_initialize(value); |
| 155 | value |
| 156 | } |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | #[inline ] |
| 161 | fn do_initialize(value: Initializer) { |
| 162 | CACHE[0].initialize((value.0) as usize & Cache::MASK); |
| 163 | CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK); |
| 164 | CACHE[2].initialize((value.0 >> (2 * Cache::CAPACITY)) as usize & Cache::MASK); |
| 165 | } |
| 166 | |
| 167 | // We only have to detect features once, and it's fairly costly, so hint to LLVM |
| 168 | // that it should assume that cache hits are more common than misses (which is |
| 169 | // the point of caching). It's possibly unfortunate that this function needs to |
| 170 | // reach across modules like this to call `os::detect_features`, but it produces |
| 171 | // the best code out of several attempted variants. |
| 172 | // |
| 173 | // The `Initializer` that the cache was initialized with is returned, so that |
| 174 | // the caller can call `test()` on it without having to load the value from the |
| 175 | // cache again. |
| 176 | #[cold ] |
| 177 | fn detect_and_initialize() -> Initializer { |
| 178 | initialize(super::os::detect_features()) |
| 179 | } |
| 180 | |
| 181 | /// Tests the `bit` of the storage. If the storage has not been initialized, |
| 182 | /// initializes it with the result of `os::detect_features()`. |
| 183 | /// |
| 184 | /// On its first invocation, it detects the CPU features and caches them in the |
| 185 | /// `CACHE` global variable as an `AtomicU64`. |
| 186 | /// |
| 187 | /// It uses the `Feature` variant to index into this variable as a bitset. If |
| 188 | /// the bit is set, the feature is enabled, and otherwise it is disabled. |
| 189 | /// |
| 190 | /// If the feature `std_detect_env_override` is enabled looks for the env |
| 191 | /// variable `RUST_STD_DETECT_UNSTABLE` and uses its content to disable |
| 192 | /// Features that would had been otherwise detected. |
| 193 | #[inline ] |
| 194 | pub(crate) fn test(bit: u32) -> bool { |
| 195 | let (relative_bit: u32, idx: usize) = if bit < Cache::CAPACITY { |
| 196 | (bit, 0) |
| 197 | } else if bit < 2 * Cache::CAPACITY { |
| 198 | (bit - Cache::CAPACITY, 1) |
| 199 | } else { |
| 200 | (bit - 2 * Cache::CAPACITY, 2) |
| 201 | }; |
| 202 | CACHE[idx].test(relative_bit).unwrap_or_else(|| detect_and_initialize().test(bit)) |
| 203 | } |
| 204 | |