1 | //! Caches run-time feature detection so that it only needs to be computed |
2 | //! once. |
3 | |
4 | #![allow (dead_code)] // not used on all platforms |
5 | |
6 | use core::sync::atomic::Ordering; |
7 | |
8 | use core::sync::atomic::AtomicUsize; |
9 | |
10 | /// Sets the `bit` of `x`. |
11 | #[inline ] |
12 | const fn set_bit(x: u64, bit: u32) -> u64 { |
13 | x | 1 << bit |
14 | } |
15 | |
16 | /// Tests the `bit` of `x`. |
17 | #[inline ] |
18 | const fn test_bit(x: u64, bit: u32) -> bool { |
19 | x & (1 << bit) != 0 |
20 | } |
21 | |
22 | /// Unset the `bit of `x`. |
23 | #[inline ] |
24 | const fn unset_bit(x: u64, bit: u32) -> u64 { |
25 | x & !(1 << bit) |
26 | } |
27 | |
28 | /// Maximum number of features that can be cached. |
29 | const CACHE_CAPACITY: u32 = 62; |
30 | |
31 | /// This type is used to initialize the cache |
32 | // The derived `Default` implementation will initialize the field to zero, |
33 | // which is what we want. |
34 | #[derive(Copy, Clone, Default)] |
35 | pub(crate) struct Initializer(u64); |
36 | |
37 | // NOTE: the `debug_assert!` would catch that we do not add more Features than |
38 | // the one fitting our cache. |
39 | impl Initializer { |
40 | /// Tests the `bit` of the cache. |
41 | #[inline ] |
42 | pub(crate) fn test(self, bit: u32) -> bool { |
43 | debug_assert!( |
44 | bit < CACHE_CAPACITY, |
45 | "too many features, time to increase the cache size!" |
46 | ); |
47 | test_bit(self.0, bit) |
48 | } |
49 | |
50 | /// Sets the `bit` of the cache. |
51 | #[inline ] |
52 | pub(crate) fn set(&mut self, bit: u32) { |
53 | debug_assert!( |
54 | bit < CACHE_CAPACITY, |
55 | "too many features, time to increase the cache size!" |
56 | ); |
57 | let v = self.0; |
58 | self.0 = set_bit(v, bit); |
59 | } |
60 | |
61 | /// Unsets the `bit` of the cache. |
62 | #[inline ] |
63 | pub(crate) fn unset(&mut self, bit: u32) { |
64 | debug_assert!( |
65 | bit < CACHE_CAPACITY, |
66 | "too many features, time to increase the cache size!" |
67 | ); |
68 | let v = self.0; |
69 | self.0 = unset_bit(v, bit); |
70 | } |
71 | } |
72 | |
73 | /// This global variable is a cache of the features supported by the CPU. |
74 | // Note: on x64, we only use the first slot |
75 | static CACHE: [Cache; 2] = [Cache::uninitialized(), Cache::uninitialized()]; |
76 | |
77 | /// Feature cache with capacity for `size_of::<usize::MAX>() * 8 - 1` features. |
78 | /// |
79 | /// Note: 0 is used to represent an uninitialized cache, and (at least) the most |
80 | /// significant bit is set on any cache which has been initialized. |
81 | /// |
82 | /// Note: we use `Relaxed` atomic operations, because we are only interested in |
83 | /// the effects of operations on a single memory location. That is, we only need |
84 | /// "modification order", and not the full-blown "happens before". |
85 | struct Cache(AtomicUsize); |
86 | |
87 | impl Cache { |
88 | const CAPACITY: u32 = (core::mem::size_of::<usize>() * 8 - 1) as u32; |
89 | const MASK: usize = (1 << Cache::CAPACITY) - 1; |
90 | const INITIALIZED_BIT: usize = 1usize << Cache::CAPACITY; |
91 | |
92 | /// Creates an uninitialized cache. |
93 | #[allow (clippy::declare_interior_mutable_const)] |
94 | const fn uninitialized() -> Self { |
95 | Cache(AtomicUsize::new(0)) |
96 | } |
97 | |
98 | /// Is the `bit` in the cache set? Returns `None` if the cache has not been initialized. |
99 | #[inline ] |
100 | pub(crate) fn test(&self, bit: u32) -> Option<bool> { |
101 | let cached = self.0.load(Ordering::Relaxed); |
102 | if cached == 0 { |
103 | None |
104 | } else { |
105 | Some(test_bit(cached as u64, bit)) |
106 | } |
107 | } |
108 | |
109 | /// Initializes the cache. |
110 | #[inline ] |
111 | fn initialize(&self, value: usize) -> usize { |
112 | debug_assert_eq!((value & !Cache::MASK), 0); |
113 | self.0 |
114 | .store(value | Cache::INITIALIZED_BIT, Ordering::Relaxed); |
115 | value |
116 | } |
117 | } |
118 | |
119 | cfg_if::cfg_if! { |
120 | if #[cfg(feature = "std_detect_env_override" )] { |
121 | #[inline] |
122 | fn initialize(mut value: Initializer) -> Initializer { |
123 | let env = unsafe { |
124 | libc::getenv(b"RUST_STD_DETECT_UNSTABLE \0" .as_ptr() as *const libc::c_char) |
125 | }; |
126 | if !env.is_null() { |
127 | let len = unsafe { libc::strlen(env) }; |
128 | let env = unsafe { core::slice::from_raw_parts(env as *const u8, len) }; |
129 | if let Ok(disable) = core::str::from_utf8(env) { |
130 | for v in disable.split(" " ) { |
131 | let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32)); |
132 | } |
133 | } |
134 | } |
135 | do_initialize(value); |
136 | value |
137 | } |
138 | } else { |
139 | #[inline] |
140 | fn initialize(value: Initializer) -> Initializer { |
141 | do_initialize(value); |
142 | value |
143 | } |
144 | } |
145 | } |
146 | |
147 | #[inline ] |
148 | fn do_initialize(value: Initializer) { |
149 | CACHE[0].initialize((value.0) as usize & Cache::MASK); |
150 | CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK); |
151 | } |
152 | |
153 | // We only have to detect features once, and it's fairly costly, so hint to LLVM |
154 | // that it should assume that cache hits are more common than misses (which is |
155 | // the point of caching). It's possibly unfortunate that this function needs to |
156 | // reach across modules like this to call `os::detect_features`, but it produces |
157 | // the best code out of several attempted variants. |
158 | // |
159 | // The `Initializer` that the cache was initialized with is returned, so that |
160 | // the caller can call `test()` on it without having to load the value from the |
161 | // cache again. |
162 | #[cold ] |
163 | fn detect_and_initialize() -> Initializer { |
164 | initialize(super::os::detect_features()) |
165 | } |
166 | |
167 | /// Tests the `bit` of the storage. If the storage has not been initialized, |
168 | /// initializes it with the result of `os::detect_features()`. |
169 | /// |
170 | /// On its first invocation, it detects the CPU features and caches them in the |
171 | /// `CACHE` global variable as an `AtomicU64`. |
172 | /// |
173 | /// It uses the `Feature` variant to index into this variable as a bitset. If |
174 | /// the bit is set, the feature is enabled, and otherwise it is disabled. |
175 | /// |
176 | /// If the feature `std_detect_env_override` is enabled looks for the env |
177 | /// variable `RUST_STD_DETECT_UNSTABLE` and uses its content to disable |
178 | /// Features that would had been otherwise detected. |
179 | #[inline ] |
180 | pub(crate) fn test(bit: u32) -> bool { |
181 | let (relative_bit: u32, idx: i32) = if bit < Cache::CAPACITY { |
182 | (bit, 0) |
183 | } else { |
184 | (bit - Cache::CAPACITY, 1) |
185 | }; |
186 | CACHE[idx] |
187 | .test(relative_bit) |
188 | .unwrap_or_else(|| detect_and_initialize().test(bit)) |
189 | } |
190 | |