1// Copyright 2016-2021 Brian Smith.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15use cfg_if::cfg_if;
16
17mod abi_assumptions {
18 use core::mem::size_of;
19
20 // TOOD: Support targets that do not have SSE and SSE2 enabled, such as
21 // x86_64-unknown-linux-none. See
22 // https://github.com/briansmith/ring/issues/1793#issuecomment-1793243725,
23 // https://github.com/briansmith/ring/issues/1832,
24 // https://github.com/briansmith/ring/issues/1833.
25 const _ASSUMES_SSE2: () =
26 assert!(cfg!(target_feature = "sse") && cfg!(target_feature = "sse2"));
27
28 #[cfg(target_arch = "x86_64")]
29 const _ASSUMED_POINTER_SIZE: usize = 8;
30 #[cfg(target_arch = "x86")]
31 const _ASSUMED_POINTER_SIZE: usize = 4;
32 const _ASSUMED_USIZE_SIZE: () = assert!(size_of::<usize>() == _ASSUMED_POINTER_SIZE);
33 const _ASSUMED_REF_SIZE: () = assert!(size_of::<&'static u8>() == _ASSUMED_POINTER_SIZE);
34
35 const _ASSUMED_ENDIANNESS: () = assert!(cfg!(target_endian = "little"));
36}
37
38pub(super) mod featureflags {
39 use super::super::CAPS_STATIC;
40 use crate::{
41 cpu,
42 polyfill::{once_cell::race, usize_from_u32},
43 };
44 use core::num::NonZeroUsize;
45
46 pub(in super::super) fn get_or_init() -> cpu::Features {
47 // SAFETY: `OPENSSL_cpuid_setup` must be called only in
48 // `INIT.call_once()` below.
49 prefixed_extern! {
50 fn OPENSSL_cpuid_setup(out: &mut [u32; 4]);
51 }
52
53 let _: NonZeroUsize = FEATURES.get_or_init(|| {
54 let mut cpuid = [0; 4];
55 // SAFETY: We assume that it is safe to execute CPUID and XGETBV.
56 unsafe {
57 OPENSSL_cpuid_setup(&mut cpuid);
58 }
59 let detected = super::cpuid_to_caps_and_set_c_flags(&cpuid);
60 let merged = CAPS_STATIC | detected;
61
62 let merged = usize_from_u32(merged) | (1 << (super::Shift::Initialized as u32));
63 NonZeroUsize::new(merged).unwrap() // Can't fail because we just set a bit.
64 });
65
66 // SAFETY: We initialized the CPU features as required.
67 // `INIT.call_once` has `happens-before` semantics.
68 unsafe { cpu::Features::new_after_feature_flags_written_and_synced_unchecked() }
69 }
70
71 pub(in super::super) fn get(_cpu_features: cpu::Features) -> u32 {
72 // SAFETY: Since only `get_or_init()` could have created
73 // `_cpu_features`, and it only does so after `FEATURES.get_or_init()`,
74 // we know we are reading from `FEATURES` after initializing it.
75 //
76 // Also, 0 means "no features detected" to users, which is designed to
77 // be a safe configuration.
78 let features = FEATURES.get().map(NonZeroUsize::get).unwrap_or(0);
79
80 // The truncation is lossless, as we set the value with a u32.
81 #[allow(clippy::cast_possible_truncation)]
82 let features = features as u32;
83
84 features
85 }
86
87 static FEATURES: race::OnceNonZeroUsize = race::OnceNonZeroUsize::new();
88
89 #[cfg(target_arch = "x86")]
90 #[rustfmt::skip]
91 pub const STATIC_DETECTED: u32 = 0
92 | (if cfg!(target_feature = "sse2") { super::Sse2::mask() } else { 0 })
93 ;
94
95 // Limited to x86_64-v2 features.
96 // TODO: Add missing x86-64-v3 features if we find real-world use of x86-64-v3.
97 // TODO: Add all features we use.
98 #[cfg(target_arch = "x86_64")]
99 #[rustfmt::skip]
100 pub const STATIC_DETECTED: u32 = 0
101 | if cfg!(target_feature = "sse4.1") { super::Sse41::mask() } else { 0 }
102 | if cfg!(target_feature = "ssse3") { super::Ssse3::mask() } else { 0 }
103 ;
104
105 pub const FORCE_DYNAMIC_DETECTION: u32 = 0;
106}
107
108fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 {
109 // "Intel" citations are for "Intel 64 and IA-32 Architectures Software
110 // Developer’s Manual", Combined Volumes, December 2024.
111 // "AMD" citations are for "AMD64 Technology AMD64 Architecture
112 // Programmer’s Manual, Volumes 1-5" Revision 4.08 April 2024.
113
114 // The `prefixed_extern!` uses below assume this
115 #[cfg(target_arch = "x86_64")]
116 use core::{mem::align_of, sync::atomic::AtomicU32};
117 #[cfg(target_arch = "x86_64")]
118 const _ATOMIC32_ALIGNMENT_EQUALS_U32_ALIGNMENT: () =
119 assert!(align_of::<AtomicU32>() == align_of::<u32>());
120
121 fn check(leaf: u32, bit: u32) -> bool {
122 let shifted = 1 << bit;
123 (leaf & shifted) == shifted
124 }
125 fn set(out: &mut u32, shift: Shift) {
126 let shifted = 1 << (shift as u32);
127 debug_assert_eq!(*out & shifted, 0);
128 *out |= shifted;
129 debug_assert_eq!(*out & shifted, shifted);
130 }
131
132 #[cfg(target_arch = "x86_64")]
133 let is_intel = check(cpuid[0], 30); // Synthesized by `OPENSSL_cpuid_setup`
134
135 // CPUID leaf 1.
136 let leaf1_ecx = cpuid[1];
137
138 // Intel: "Structured Extended Feature Flags Enumeration Leaf"
139 #[cfg(target_arch = "x86_64")]
140 let extended_features_ebx = cpuid[2];
141
142 let mut caps = 0;
143
144 // AMD: "Collectively the SSE1, [...] are referred to as the legacy SSE
145 // instructions. All legacy SSE instructions support 128-bit vector
146 // operands."
147
148 // Intel: "11.6.2 Checking for Intel SSE and SSE2 Support"
149 // We have to assume the prerequisites for SSE/SSE2 are met since we're
150 // already almost definitely using SSE registers if these target features
151 // are enabled.
152 //
153 // These also seem to help ensure CMOV support; There doesn't seem to be
154 // a `cfg!(target_feature = "cmov")`. It is likely that removing these
155 // assertions will remove the requirement for CMOV. With our without
156 // CMOV, it is likely that some of our timing side channel prevention does
157 // not work. Presumably the people who delete these are verifying that it
158 // all works fine.
159 const _SSE_REQUIRED: () = assert!(cfg!(target_feature = "sse"));
160 const _SSE2_REQUIRED: () = assert!(cfg!(target_feature = "sse2"));
161
162 #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
163 {
164 // If somebody is trying to compile for an x86 target without SSE2
165 // and they deleted the `_SSE2_REQUIRED` const assertion above then
166 // they're probably trying to support a Linux/BSD/etc. distro that
167 // tries to support ancient x86 systems without SSE/SSE2. Try to
168 // reduce the harm caused, by implementing dynamic feature detection
169 // for them so that most systems will work like normal.
170 //
171 // Note that usually an x86-64 target with SSE2 disabled by default,
172 // usually `-none-` targets, will not support dynamically-detected use
173 // of SIMD registers via CPUID. A whole different mechanism is needed
174 // to support them. Same for i*86-*-none targets.
175 let leaf1_edx = cpuid[0];
176 let sse1_available = check(leaf1_edx, 25);
177 let sse2_available = check(leaf1_edx, 26);
178 if sse1_available && sse2_available {
179 set(&mut caps, Shift::Sse2);
180 }
181 }
182
183 // Sometimes people delete the `_SSE_REQUIRED`/`_SSE2_REQUIRED` const
184 // assertions in an attempt to support pre-SSE2 32-bit x86 systems. If they
185 // do, hopefully they won't delete these redundant assertions, so that
186 // x86_64 isn't affected.
187 #[cfg(target_arch = "x86_64")]
188 const _SSE2_REQUIRED_X86_64: () = assert!(cfg!(target_feature = "sse2"));
189 #[cfg(target_arch = "x86_64")]
190 const _SSE_REQUIRED_X86_64: () = assert!(cfg!(target_feature = "sse2"));
191
192 // Intel: "12.7.2 Checking for SSSE3 Support"
193 // If/when we support dynamic detection of SSE/SSE2, make this conditional
194 // on SSE/SSE2.
195 if check(leaf1_ecx, 9) {
196 set(&mut caps, Shift::Ssse3);
197 }
198
199 // Intel: "12.12.2 Checking for Intel SSE4.1 Support"
200 // If/when we support dynamic detection of SSE/SSE2, make this conditional
201 // on SSE/SSE2.
202 // XXX: We don't check for SSE3 and we're not sure if it is compatible for
203 // us to do so; does AMD advertise SSE3? TODO: address this.
204 // XXX: We don't condition this on SSSE3 being available. TODO: address
205 // this.
206 #[cfg(target_arch = "x86_64")]
207 if check(leaf1_ecx, 19) {
208 set(&mut caps, Shift::Sse41);
209 }
210
211 // AMD: "The extended SSE instructions include [...]."
212
213 // Intel: "14.3 DETECTION OF INTEL AVX INSTRUCTIONS"
214 // `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't
215 // support AVX state.
216 let avx_available = check(leaf1_ecx, 28);
217 if avx_available {
218 set(&mut caps, Shift::Avx);
219 }
220
221 // "14.7.1 Detection of Intel AVX2 Hardware support"
222 // XXX: We don't condition AVX2 on AVX. TODO: Address this.
223 // `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't
224 // support AVX state.
225 #[cfg(target_arch = "x86_64")]
226 if check(extended_features_ebx, 5) {
227 set(&mut caps, Shift::Avx2);
228
229 // Declared as `uint32_t` in the C code.
230 prefixed_extern! {
231 static avx2_available: AtomicU32;
232 }
233 // SAFETY: The C code only reads `avx2_available`, and its reads are
234 // synchronized through the `OnceNonZeroUsize` Acquire/Release
235 // semantics as we ensure we have a `cpu::Features` instance before
236 // calling into the C code.
237 let flag = unsafe { &avx2_available };
238 flag.store(1, core::sync::atomic::Ordering::Relaxed);
239 }
240
241 // Intel: "12.13.4 Checking for Intel AES-NI Support"
242 // If/when we support dynamic detection of SSE/SSE2, revisit this.
243 // TODO: Clarify "interesting" states like (!SSE && AVX && AES-NI)
244 // and AES-NI & !AVX.
245 // Each check of `ClMul`, `Aes`, and `Sha` must be paired with a check for
246 // an AVX feature (e.g. `Avx`) or an SSE feature (e.g. `Ssse3`), as every
247 // use will either be supported by SSE* or AVX* instructions. We then
248 // assume that those supporting instructions' prerequisites (e.g. OS
249 // support for AVX or SSE state, respectively) are the only prerequisites
250 // for these features.
251 if check(leaf1_ecx, 1) {
252 set(&mut caps, Shift::ClMul);
253 }
254 if check(leaf1_ecx, 25) {
255 set(&mut caps, Shift::Aes);
256 }
257 // See BoringSSL 69c26de93c82ad98daecaec6e0c8644cdf74b03f before enabling
258 // static feature detection for this.
259 #[cfg(target_arch = "x86_64")]
260 if check(extended_features_ebx, 29) {
261 set(&mut caps, Shift::Sha);
262 }
263
264 #[cfg(target_arch = "x86_64")]
265 {
266 if is_intel {
267 set(&mut caps, Shift::IntelCpu);
268 }
269
270 if check(leaf1_ecx, 22) {
271 set(&mut caps, Shift::Movbe);
272 }
273
274 let adx_available = check(extended_features_ebx, 19);
275 if adx_available {
276 set(&mut caps, Shift::Adx);
277 }
278
279 // Some 6th Generation (Skylake) CPUs claim to support BMI1 and BMI2
280 // when they don't; see erratum "SKD052". The Intel document at
281 // https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/6th-gen-core-u-y-spec-update.pdf
282 // contains the footnote "Affects 6th Generation Intel Pentium processor
283 // family and Intel Celeron processor family". Further research indicates
284 // that Skylake Pentium/Celeron do not implement AVX or ADX. It turns
285 // out that we only use BMI1 and BMI2 in combination with ADX and/or
286 // AVX.
287 //
288 // rust `std::arch::is_x86_feature_detected` does a very similar thing
289 // but only looks at AVX, not ADX. Note that they reference an older
290 // version of the erratum labeled SKL052.
291 let believe_bmi_bits = !is_intel || (adx_available || avx_available);
292
293 if check(extended_features_ebx, 3) && believe_bmi_bits {
294 set(&mut caps, Shift::Bmi1);
295 }
296
297 let bmi2_available = check(extended_features_ebx, 8) && believe_bmi_bits;
298 if bmi2_available {
299 set(&mut caps, Shift::Bmi2);
300 }
301
302 if adx_available && bmi2_available {
303 // Declared as `uint32_t` in the C code.
304 prefixed_extern! {
305 static adx_bmi2_available: AtomicU32;
306 }
307 // SAFETY: The C code only reads `adx_bmi2_available`, and its
308 // reads are synchronized through the `OnceNonZeroUsize`
309 // Acquire/Release semantics as we ensure we have a
310 // `cpu::Features` instance before calling into the C code.
311 let flag = unsafe { &adx_bmi2_available };
312 flag.store(1, core::sync::atomic::Ordering::Relaxed);
313 }
314 }
315
316 caps
317}
318
319impl_get_feature! {
320 features: [
321 { ("x86", "x86_64") => ClMul },
322 { ("x86", "x86_64") => Ssse3 },
323 { ("x86_64") => Sse41 },
324 { ("x86_64") => Movbe },
325 { ("x86", "x86_64") => Aes },
326 { ("x86", "x86_64") => Avx },
327 { ("x86_64") => Bmi1 },
328 { ("x86_64") => Avx2 },
329 { ("x86_64") => Bmi2 },
330 { ("x86_64") => Adx },
331 // See BoringSSL 69c26de93c82ad98daecaec6e0c8644cdf74b03f before enabling
332 // static feature detection for this.
333 { ("x86_64") => Sha },
334 // x86_64 can just assume SSE2 is available.
335 { ("x86") => Sse2 },
336 ],
337}
338
339cfg_if! {
340 if #[cfg(target_arch = "x86_64")] {
341 #[derive(Clone, Copy)]
342 pub(crate) struct IntelCpu(super::Features);
343
344 impl super::GetFeature<IntelCpu> for super::features::Values {
345 fn get_feature(&self) -> Option<IntelCpu> {
346 const MASK: u32 = 1 << (Shift::IntelCpu as u32);
347 if (self.values() & MASK) == MASK {
348 Some(IntelCpu(self.cpu()))
349 } else {
350 None
351 }
352 }
353 }
354 }
355}
356
357#[cfg(test)]
358mod tests {
359 // This should always pass on any x86 system except very, very, old ones.
360 #[cfg(target_arch = "x86")]
361 #[test]
362 fn x86_has_sse2() {
363 use super::*;
364 use crate::cpu::{self, GetFeature as _};
365 assert!(matches!(cpu::features().get_feature(), Some(Sse2 { .. })))
366 }
367}
368

Provided by KDAB

Privacy Policy