1 | // Copyright 2016-2021 Brian Smith. |
2 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any |
4 | // purpose with or without fee is hereby granted, provided that the above |
5 | // copyright notice and this permission notice appear in all copies. |
6 | // |
7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY |
10 | // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
12 | // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
13 | // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
14 | |
15 | use cfg_if::cfg_if; |
16 | |
17 | mod abi_assumptions { |
18 | use core::mem::size_of; |
19 | |
20 | // TOOD: Support targets that do not have SSE and SSE2 enabled, such as |
21 | // x86_64-unknown-linux-none. See |
22 | // https://github.com/briansmith/ring/issues/1793#issuecomment-1793243725, |
23 | // https://github.com/briansmith/ring/issues/1832, |
24 | // https://github.com/briansmith/ring/issues/1833. |
25 | const _ASSUMES_SSE2: () = |
26 | assert!(cfg!(target_feature = "sse" ) && cfg!(target_feature = "sse2" )); |
27 | |
28 | #[cfg (target_arch = "x86_64" )] |
29 | const _ASSUMED_POINTER_SIZE: usize = 8; |
30 | #[cfg (target_arch = "x86" )] |
31 | const _ASSUMED_POINTER_SIZE: usize = 4; |
32 | const _ASSUMED_USIZE_SIZE: () = assert!(size_of::<usize>() == _ASSUMED_POINTER_SIZE); |
33 | const _ASSUMED_REF_SIZE: () = assert!(size_of::<&'static u8>() == _ASSUMED_POINTER_SIZE); |
34 | |
35 | const _ASSUMED_ENDIANNESS: () = assert!(cfg!(target_endian = "little" )); |
36 | } |
37 | |
38 | pub(super) mod featureflags { |
39 | use super::super::CAPS_STATIC; |
40 | use crate::{ |
41 | cpu, |
42 | polyfill::{once_cell::race, usize_from_u32}, |
43 | }; |
44 | use core::num::NonZeroUsize; |
45 | |
46 | pub(in super::super) fn get_or_init() -> cpu::Features { |
47 | // SAFETY: `OPENSSL_cpuid_setup` must be called only in |
48 | // `INIT.call_once()` below. |
49 | prefixed_extern! { |
50 | fn OPENSSL_cpuid_setup(out: &mut [u32; 4]); |
51 | } |
52 | |
53 | let _: NonZeroUsize = FEATURES.get_or_init(|| { |
54 | let mut cpuid = [0; 4]; |
55 | // SAFETY: We assume that it is safe to execute CPUID and XGETBV. |
56 | unsafe { |
57 | OPENSSL_cpuid_setup(&mut cpuid); |
58 | } |
59 | let detected = super::cpuid_to_caps_and_set_c_flags(&cpuid); |
60 | let merged = CAPS_STATIC | detected; |
61 | |
62 | let merged = usize_from_u32(merged) | (1 << (super::Shift::Initialized as u32)); |
63 | NonZeroUsize::new(merged).unwrap() // Can't fail because we just set a bit. |
64 | }); |
65 | |
66 | // SAFETY: We initialized the CPU features as required. |
67 | // `INIT.call_once` has `happens-before` semantics. |
68 | unsafe { cpu::Features::new_after_feature_flags_written_and_synced_unchecked() } |
69 | } |
70 | |
71 | pub(in super::super) fn get(_cpu_features: cpu::Features) -> u32 { |
72 | // SAFETY: Since only `get_or_init()` could have created |
73 | // `_cpu_features`, and it only does so after `FEATURES.get_or_init()`, |
74 | // we know we are reading from `FEATURES` after initializing it. |
75 | // |
76 | // Also, 0 means "no features detected" to users, which is designed to |
77 | // be a safe configuration. |
78 | let features = FEATURES.get().map(NonZeroUsize::get).unwrap_or(0); |
79 | |
80 | // The truncation is lossless, as we set the value with a u32. |
81 | #[allow (clippy::cast_possible_truncation)] |
82 | let features = features as u32; |
83 | |
84 | features |
85 | } |
86 | |
87 | static FEATURES: race::OnceNonZeroUsize = race::OnceNonZeroUsize::new(); |
88 | |
89 | #[cfg (target_arch = "x86" )] |
90 | #[rustfmt::skip] |
91 | pub const STATIC_DETECTED: u32 = 0 |
92 | | (if cfg!(target_feature = "sse2" ) { super::Sse2::mask() } else { 0 }) |
93 | ; |
94 | |
95 | // Limited to x86_64-v2 features. |
96 | // TODO: Add missing x86-64-v3 features if we find real-world use of x86-64-v3. |
97 | // TODO: Add all features we use. |
98 | #[cfg (target_arch = "x86_64" )] |
99 | #[rustfmt::skip] |
100 | pub const STATIC_DETECTED: u32 = 0 |
101 | | if cfg!(target_feature = "sse4.1" ) { super::Sse41::mask() } else { 0 } |
102 | | if cfg!(target_feature = "ssse3" ) { super::Ssse3::mask() } else { 0 } |
103 | ; |
104 | |
105 | pub const FORCE_DYNAMIC_DETECTION: u32 = 0; |
106 | } |
107 | |
108 | fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 { |
109 | // "Intel" citations are for "Intel 64 and IA-32 Architectures Software |
110 | // Developer’s Manual", Combined Volumes, December 2024. |
111 | // "AMD" citations are for "AMD64 Technology AMD64 Architecture |
112 | // Programmer’s Manual, Volumes 1-5" Revision 4.08 April 2024. |
113 | |
114 | // The `prefixed_extern!` uses below assume this |
115 | #[cfg (target_arch = "x86_64" )] |
116 | use core::{mem::align_of, sync::atomic::AtomicU32}; |
117 | #[cfg (target_arch = "x86_64" )] |
118 | const _ATOMIC32_ALIGNMENT_EQUALS_U32_ALIGNMENT: () = |
119 | assert!(align_of::<AtomicU32>() == align_of::<u32>()); |
120 | |
121 | fn check(leaf: u32, bit: u32) -> bool { |
122 | let shifted = 1 << bit; |
123 | (leaf & shifted) == shifted |
124 | } |
125 | fn set(out: &mut u32, shift: Shift) { |
126 | let shifted = 1 << (shift as u32); |
127 | debug_assert_eq!(*out & shifted, 0); |
128 | *out |= shifted; |
129 | debug_assert_eq!(*out & shifted, shifted); |
130 | } |
131 | |
132 | #[cfg (target_arch = "x86_64" )] |
133 | let is_intel = check(cpuid[0], 30); // Synthesized by `OPENSSL_cpuid_setup` |
134 | |
135 | // CPUID leaf 1. |
136 | let leaf1_ecx = cpuid[1]; |
137 | |
138 | // Intel: "Structured Extended Feature Flags Enumeration Leaf" |
139 | #[cfg (target_arch = "x86_64" )] |
140 | let extended_features_ebx = cpuid[2]; |
141 | |
142 | let mut caps = 0; |
143 | |
144 | // AMD: "Collectively the SSE1, [...] are referred to as the legacy SSE |
145 | // instructions. All legacy SSE instructions support 128-bit vector |
146 | // operands." |
147 | |
148 | // Intel: "11.6.2 Checking for Intel SSE and SSE2 Support" |
149 | // We have to assume the prerequisites for SSE/SSE2 are met since we're |
150 | // already almost definitely using SSE registers if these target features |
151 | // are enabled. |
152 | // |
153 | // These also seem to help ensure CMOV support; There doesn't seem to be |
154 | // a `cfg!(target_feature = "cmov")`. It is likely that removing these |
155 | // assertions will remove the requirement for CMOV. With our without |
156 | // CMOV, it is likely that some of our timing side channel prevention does |
157 | // not work. Presumably the people who delete these are verifying that it |
158 | // all works fine. |
159 | const _SSE_REQUIRED: () = assert!(cfg!(target_feature = "sse" )); |
160 | const _SSE2_REQUIRED: () = assert!(cfg!(target_feature = "sse2" )); |
161 | |
162 | #[cfg (all(target_arch = "x86" , not(target_feature = "sse2" )))] |
163 | { |
164 | // If somebody is trying to compile for an x86 target without SSE2 |
165 | // and they deleted the `_SSE2_REQUIRED` const assertion above then |
166 | // they're probably trying to support a Linux/BSD/etc. distro that |
167 | // tries to support ancient x86 systems without SSE/SSE2. Try to |
168 | // reduce the harm caused, by implementing dynamic feature detection |
169 | // for them so that most systems will work like normal. |
170 | // |
171 | // Note that usually an x86-64 target with SSE2 disabled by default, |
172 | // usually `-none-` targets, will not support dynamically-detected use |
173 | // of SIMD registers via CPUID. A whole different mechanism is needed |
174 | // to support them. Same for i*86-*-none targets. |
175 | let leaf1_edx = cpuid[0]; |
176 | let sse1_available = check(leaf1_edx, 25); |
177 | let sse2_available = check(leaf1_edx, 26); |
178 | if sse1_available && sse2_available { |
179 | set(&mut caps, Shift::Sse2); |
180 | } |
181 | } |
182 | |
183 | // Sometimes people delete the `_SSE_REQUIRED`/`_SSE2_REQUIRED` const |
184 | // assertions in an attempt to support pre-SSE2 32-bit x86 systems. If they |
185 | // do, hopefully they won't delete these redundant assertions, so that |
186 | // x86_64 isn't affected. |
187 | #[cfg (target_arch = "x86_64" )] |
188 | const _SSE2_REQUIRED_X86_64: () = assert!(cfg!(target_feature = "sse2" )); |
189 | #[cfg (target_arch = "x86_64" )] |
190 | const _SSE_REQUIRED_X86_64: () = assert!(cfg!(target_feature = "sse2" )); |
191 | |
192 | // Intel: "12.7.2 Checking for SSSE3 Support" |
193 | // If/when we support dynamic detection of SSE/SSE2, make this conditional |
194 | // on SSE/SSE2. |
195 | if check(leaf1_ecx, 9) { |
196 | set(&mut caps, Shift::Ssse3); |
197 | } |
198 | |
199 | // Intel: "12.12.2 Checking for Intel SSE4.1 Support" |
200 | // If/when we support dynamic detection of SSE/SSE2, make this conditional |
201 | // on SSE/SSE2. |
202 | // XXX: We don't check for SSE3 and we're not sure if it is compatible for |
203 | // us to do so; does AMD advertise SSE3? TODO: address this. |
204 | // XXX: We don't condition this on SSSE3 being available. TODO: address |
205 | // this. |
206 | #[cfg (target_arch = "x86_64" )] |
207 | if check(leaf1_ecx, 19) { |
208 | set(&mut caps, Shift::Sse41); |
209 | } |
210 | |
211 | // AMD: "The extended SSE instructions include [...]." |
212 | |
213 | // Intel: "14.3 DETECTION OF INTEL AVX INSTRUCTIONS" |
214 | // `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't |
215 | // support AVX state. |
216 | let avx_available = check(leaf1_ecx, 28); |
217 | if avx_available { |
218 | set(&mut caps, Shift::Avx); |
219 | } |
220 | |
221 | // "14.7.1 Detection of Intel AVX2 Hardware support" |
222 | // XXX: We don't condition AVX2 on AVX. TODO: Address this. |
223 | // `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't |
224 | // support AVX state. |
225 | #[cfg (target_arch = "x86_64" )] |
226 | if check(extended_features_ebx, 5) { |
227 | set(&mut caps, Shift::Avx2); |
228 | |
229 | // Declared as `uint32_t` in the C code. |
230 | prefixed_extern! { |
231 | static avx2_available: AtomicU32; |
232 | } |
233 | // SAFETY: The C code only reads `avx2_available`, and its reads are |
234 | // synchronized through the `OnceNonZeroUsize` Acquire/Release |
235 | // semantics as we ensure we have a `cpu::Features` instance before |
236 | // calling into the C code. |
237 | let flag = unsafe { &avx2_available }; |
238 | flag.store(1, core::sync::atomic::Ordering::Relaxed); |
239 | } |
240 | |
241 | // Intel: "12.13.4 Checking for Intel AES-NI Support" |
242 | // If/when we support dynamic detection of SSE/SSE2, revisit this. |
243 | // TODO: Clarify "interesting" states like (!SSE && AVX && AES-NI) |
244 | // and AES-NI & !AVX. |
245 | // Each check of `ClMul`, `Aes`, and `Sha` must be paired with a check for |
246 | // an AVX feature (e.g. `Avx`) or an SSE feature (e.g. `Ssse3`), as every |
247 | // use will either be supported by SSE* or AVX* instructions. We then |
248 | // assume that those supporting instructions' prerequisites (e.g. OS |
249 | // support for AVX or SSE state, respectively) are the only prerequisites |
250 | // for these features. |
251 | if check(leaf1_ecx, 1) { |
252 | set(&mut caps, Shift::ClMul); |
253 | } |
254 | if check(leaf1_ecx, 25) { |
255 | set(&mut caps, Shift::Aes); |
256 | } |
257 | // See BoringSSL 69c26de93c82ad98daecaec6e0c8644cdf74b03f before enabling |
258 | // static feature detection for this. |
259 | #[cfg (target_arch = "x86_64" )] |
260 | if check(extended_features_ebx, 29) { |
261 | set(&mut caps, Shift::Sha); |
262 | } |
263 | |
264 | #[cfg (target_arch = "x86_64" )] |
265 | { |
266 | if is_intel { |
267 | set(&mut caps, Shift::IntelCpu); |
268 | } |
269 | |
270 | if check(leaf1_ecx, 22) { |
271 | set(&mut caps, Shift::Movbe); |
272 | } |
273 | |
274 | let adx_available = check(extended_features_ebx, 19); |
275 | if adx_available { |
276 | set(&mut caps, Shift::Adx); |
277 | } |
278 | |
279 | // Some 6th Generation (Skylake) CPUs claim to support BMI1 and BMI2 |
280 | // when they don't; see erratum "SKD052". The Intel document at |
281 | // https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/6th-gen-core-u-y-spec-update.pdf |
282 | // contains the footnote "Affects 6th Generation Intel Pentium processor |
283 | // family and Intel Celeron processor family". Further research indicates |
284 | // that Skylake Pentium/Celeron do not implement AVX or ADX. It turns |
285 | // out that we only use BMI1 and BMI2 in combination with ADX and/or |
286 | // AVX. |
287 | // |
288 | // rust `std::arch::is_x86_feature_detected` does a very similar thing |
289 | // but only looks at AVX, not ADX. Note that they reference an older |
290 | // version of the erratum labeled SKL052. |
291 | let believe_bmi_bits = !is_intel || (adx_available || avx_available); |
292 | |
293 | if check(extended_features_ebx, 3) && believe_bmi_bits { |
294 | set(&mut caps, Shift::Bmi1); |
295 | } |
296 | |
297 | let bmi2_available = check(extended_features_ebx, 8) && believe_bmi_bits; |
298 | if bmi2_available { |
299 | set(&mut caps, Shift::Bmi2); |
300 | } |
301 | |
302 | if adx_available && bmi2_available { |
303 | // Declared as `uint32_t` in the C code. |
304 | prefixed_extern! { |
305 | static adx_bmi2_available: AtomicU32; |
306 | } |
307 | // SAFETY: The C code only reads `adx_bmi2_available`, and its |
308 | // reads are synchronized through the `OnceNonZeroUsize` |
309 | // Acquire/Release semantics as we ensure we have a |
310 | // `cpu::Features` instance before calling into the C code. |
311 | let flag = unsafe { &adx_bmi2_available }; |
312 | flag.store(1, core::sync::atomic::Ordering::Relaxed); |
313 | } |
314 | } |
315 | |
316 | caps |
317 | } |
318 | |
319 | impl_get_feature! { |
320 | features: [ |
321 | { ("x86" , "x86_64" ) => ClMul }, |
322 | { ("x86" , "x86_64" ) => Ssse3 }, |
323 | { ("x86_64" ) => Sse41 }, |
324 | { ("x86_64" ) => Movbe }, |
325 | { ("x86" , "x86_64" ) => Aes }, |
326 | { ("x86" , "x86_64" ) => Avx }, |
327 | { ("x86_64" ) => Bmi1 }, |
328 | { ("x86_64" ) => Avx2 }, |
329 | { ("x86_64" ) => Bmi2 }, |
330 | { ("x86_64" ) => Adx }, |
331 | // See BoringSSL 69c26de93c82ad98daecaec6e0c8644cdf74b03f before enabling |
332 | // static feature detection for this. |
333 | { ("x86_64" ) => Sha }, |
334 | // x86_64 can just assume SSE2 is available. |
335 | { ("x86" ) => Sse2 }, |
336 | ], |
337 | } |
338 | |
339 | cfg_if! { |
340 | if #[cfg(target_arch = "x86_64" )] { |
341 | #[derive (Clone, Copy)] |
342 | pub(crate) struct IntelCpu(super::Features); |
343 | |
344 | impl super::GetFeature<IntelCpu> for super::features::Values { |
345 | fn get_feature(&self) -> Option<IntelCpu> { |
346 | const MASK: u32 = 1 << (Shift::IntelCpu as u32); |
347 | if (self.values() & MASK) == MASK { |
348 | Some(IntelCpu(self.cpu())) |
349 | } else { |
350 | None |
351 | } |
352 | } |
353 | } |
354 | } |
355 | } |
356 | |
357 | #[cfg (test)] |
358 | mod tests { |
359 | // This should always pass on any x86 system except very, very, old ones. |
360 | #[cfg (target_arch = "x86" )] |
361 | #[test ] |
362 | fn x86_has_sse2() { |
363 | use super::*; |
364 | use crate::cpu::{self, GetFeature as _}; |
365 | assert!(matches!(cpu::features().get_feature(), Some(Sse2 { .. }))) |
366 | } |
367 | } |
368 | |