1 | // Copyright (C) 2021 The Qt Company Ltd. |
2 | // Copyright (C) 2022 Intel Corporation. |
3 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
4 | |
5 | // we need ICC to define the prototype for _rdseed64_step |
6 | #define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES |
7 | #undef _FORTIFY_SOURCE // otherwise, the always_inline from stdio.h fail to inline |
8 | |
9 | #include "qsimd_p.h" |
10 | #include "qalgorithms.h" |
11 | |
12 | #include <stdio.h> |
13 | #include <string.h> |
14 | |
15 | #if defined(QT_NO_DEBUG) && !defined(NDEBUG) |
16 | # define NDEBUG |
17 | #endif |
18 | #include <assert.h> |
19 | |
20 | #ifdef Q_OS_LINUX |
21 | # include "../testlib/3rdparty/valgrind_p.h" |
22 | #endif |
23 | |
24 | #define QT_FUNCTION_TARGET_BASELINE |
25 | |
26 | #if defined(Q_OS_WIN) |
27 | # if !defined(Q_CC_GNU) |
28 | # include <intrin.h> |
29 | # endif |
30 | # if defined(Q_PROCESSOR_ARM_64) |
31 | # include <qt_windows.h> |
32 | # include <processthreadsapi.h> |
33 | # endif |
34 | #elif defined(Q_OS_LINUX) && defined(Q_PROCESSOR_MIPS_32) |
35 | # include "private/qcore_unix_p.h" |
36 | #elif QT_CONFIG(getauxval) && defined(Q_PROCESSOR_ARM) |
37 | # include <sys/auxv.h> |
38 | |
39 | // the kernel header definitions for HWCAP_* |
40 | // (the ones we need/may need anyway) |
41 | |
42 | // copied from <asm/hwcap.h> (ARM) |
43 | #define HWCAP_NEON 4096 |
44 | |
45 | // copied from <asm/hwcap.h> (ARM): |
46 | #define HWCAP2_AES (1 << 0) |
47 | #define HWCAP2_CRC32 (1 << 4) |
48 | |
49 | // copied from <asm/hwcap.h> (Aarch64) |
50 | #define HWCAP_AES (1 << 3) |
51 | #define HWCAP_CRC32 (1 << 7) |
52 | |
53 | // copied from <linux/auxvec.h> |
54 | #define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ |
55 | #define AT_HWCAP2 26 /* extension of AT_HWCAP */ |
56 | |
57 | #elif defined(Q_CC_GHS) |
58 | # include <INTEGRITY_types.h> |
59 | #elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM) |
60 | # include <sys/sysctl.h> |
61 | #endif |
62 | |
63 | QT_BEGIN_NAMESPACE |
64 | |
65 | template <typename T, uint N> QT_FUNCTION_TARGET_BASELINE |
66 | uint arraysize(T (&)[N]) |
67 | { |
68 | // Same as std::size, but with QT_FUNCTION_TARGET_BASELIE, |
69 | // otherwise some versions of GCC fail to compile. |
70 | return N; |
71 | } |
72 | |
73 | #if defined(Q_PROCESSOR_ARM) |
74 | /* Data: |
75 | neon |
76 | crc32 |
77 | aes |
78 | */ |
79 | static const char features_string[] = |
80 | "\0" |
81 | " neon\0" |
82 | " crc32\0" |
83 | " aes\0" ; |
84 | static const int features_indices[] = { 0, 1, 7, 14 }; |
85 | #elif defined(Q_PROCESSOR_MIPS) |
86 | /* Data: |
87 | dsp |
88 | dspr2 |
89 | */ |
90 | static const char features_string[] = |
91 | "\0" |
92 | " dsp\0" |
93 | " dspr2\0" ; |
94 | |
95 | static const int features_indices[] = { |
96 | 0, 1, 6 |
97 | }; |
98 | #elif defined(Q_PROCESSOR_X86) |
99 | # include "qsimd_x86.cpp" // generated by util/x86simdgen |
100 | #else |
101 | static const char features_string[] = "" ; |
102 | static const int features_indices[] = { 0 }; |
103 | #endif |
104 | // end generated |
105 | |
106 | #if defined (Q_OS_NACL) |
107 | static inline uint detectProcessorFeatures() |
108 | { |
109 | return 0; |
110 | } |
111 | #elif defined(Q_PROCESSOR_ARM) |
112 | static inline quint64 detectProcessorFeatures() |
113 | { |
114 | quint64 features = 0; |
115 | |
116 | #if QT_CONFIG(getauxval) |
117 | unsigned long auxvHwCap = getauxval(AT_HWCAP); |
118 | if (auxvHwCap != 0) { |
119 | # if defined(Q_PROCESSOR_ARM_64) |
120 | // For Aarch64: |
121 | features |= CpuFeatureNEON; // NEON is always available |
122 | if (auxvHwCap & HWCAP_CRC32) |
123 | features |= CpuFeatureCRC32; |
124 | if (auxvHwCap & HWCAP_AES) |
125 | features |= CpuFeatureAES; |
126 | # else |
127 | // For ARM32: |
128 | if (auxvHwCap & HWCAP_NEON) |
129 | features |= CpuFeatureNEON; |
130 | auxvHwCap = getauxval(AT_HWCAP2); |
131 | if (auxvHwCap & HWCAP2_CRC32) |
132 | features |= CpuFeatureCRC32; |
133 | if (auxvHwCap & HWCAP2_AES) |
134 | features |= CpuFeatureAES; |
135 | # endif |
136 | return features; |
137 | } |
138 | // fall back to compile-time flags if getauxval failed |
139 | #elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM) |
140 | unsigned feature; |
141 | size_t len = sizeof(feature); |
142 | if (sysctlbyname("hw.optional.neon" , &feature, &len, nullptr, 0) == 0) |
143 | features |= feature ? CpuFeatureNEON : 0; |
144 | if (sysctlbyname("hw.optional.armv8_crc32" , &feature, &len, nullptr, 0) == 0) |
145 | features |= feature ? CpuFeatureCRC32 : 0; |
146 | // There is currently no optional value for crypto/AES. |
147 | #if defined(__ARM_FEATURE_CRYPTO) |
148 | features |= CpuFeatureAES; |
149 | #endif |
150 | return features; |
151 | #elif defined(Q_OS_WIN) && defined(Q_PROCESSOR_ARM_64) |
152 | features |= CpuFeatureNEON; |
153 | if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0) |
154 | features |= CpuFeatureCRC32; |
155 | if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0) |
156 | features |= CpuFeatureAES; |
157 | return features; |
158 | #endif |
159 | #if defined(__ARM_NEON__) || defined(__ARM_NEON) |
160 | features |= CpuFeatureNEON; |
161 | #endif |
162 | #if defined(__ARM_FEATURE_CRC32) |
163 | features |= CpuFeatureCRC32; |
164 | #endif |
165 | #if defined(__ARM_FEATURE_CRYPTO) |
166 | features |= CpuFeatureAES; |
167 | #endif |
168 | |
169 | return features; |
170 | } |
171 | |
172 | #elif defined(Q_PROCESSOR_X86) |
173 | |
174 | #ifdef Q_PROCESSOR_X86_32 |
175 | # define PICreg "%%ebx" |
176 | #else |
177 | # define PICreg "%%rbx" |
178 | #endif |
179 | #ifdef __SSE2_MATH__ |
180 | # define X86_BASELINE "no-sse3" |
181 | #else |
182 | # define X86_BASELINE "no-sse" |
183 | #endif |
184 | |
185 | #if defined(Q_CC_GNU) |
186 | // lower the target for functions in this file |
187 | # undef QT_FUNCTION_TARGET_BASELINE |
188 | # define QT_FUNCTION_TARGET_BASELINE __attribute__((target(X86_BASELINE))) |
189 | # define QT_FUNCTION_TARGET_STRING_BASELINE_RDRND \ |
190 | X86_BASELINE "," QT_FUNCTION_TARGET_STRING_RDRND |
191 | #endif |
192 | |
193 | static bool checkRdrndWorks() noexcept; |
194 | |
195 | QT_FUNCTION_TARGET_BASELINE |
196 | static int maxBasicCpuidSupported() |
197 | { |
198 | #if defined(Q_CC_EMSCRIPTEN) |
199 | return 6; // All features supported by Emscripten |
200 | #elif defined(Q_CC_GNU) |
201 | qregisterint tmp1; |
202 | |
203 | # if Q_PROCESSOR_X86 < 5 |
204 | // check if the CPUID instruction is supported |
205 | long cpuid_supported; |
206 | asm ("pushf\n" |
207 | "pop %0\n" |
208 | "mov %0, %1\n" |
209 | "xor $0x00200000, %0\n" |
210 | "push %0\n" |
211 | "popf\n" |
212 | "pushf\n" |
213 | "pop %0\n" |
214 | "xor %1, %0\n" // %eax is now 0 if CPUID is not supported |
215 | : "=a" (cpuid_supported), "=r" (tmp1) |
216 | ); |
217 | if (!cpuid_supported) |
218 | return 0; |
219 | # endif |
220 | |
221 | int result; |
222 | asm ("xchg " PICreg", %1\n" |
223 | "cpuid\n" |
224 | "xchg " PICreg", %1\n" |
225 | : "=&a" (result), "=&r" (tmp1) |
226 | : "0" (0) |
227 | : "ecx" , "edx" ); |
228 | return result; |
229 | #elif defined(Q_OS_WIN) |
230 | // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0 |
231 | int info[4]; |
232 | __cpuid(info, 0); |
233 | return info[0]; |
234 | #elif defined(Q_CC_GHS) |
235 | unsigned int info[4]; |
236 | __CPUID(0, info); |
237 | return info[0]; |
238 | #else |
239 | return 0; |
240 | #endif |
241 | } |
242 | |
243 | QT_FUNCTION_TARGET_BASELINE |
244 | static void cpuidFeatures01(uint &ecx, uint &edx) |
245 | { |
246 | #if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) |
247 | qregisterint tmp1; |
248 | asm ("xchg " PICreg", %2\n" |
249 | "cpuid\n" |
250 | "xchg " PICreg", %2\n" |
251 | : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1) |
252 | : "a" (1)); |
253 | #elif defined(Q_OS_WIN) |
254 | int info[4]; |
255 | __cpuid(info, 1); |
256 | ecx = info[2]; |
257 | edx = info[3]; |
258 | #elif defined(Q_CC_GHS) |
259 | unsigned int info[4]; |
260 | __CPUID(1, info); |
261 | ecx = info[2]; |
262 | edx = info[3]; |
263 | #else |
264 | Q_UNUSED(ecx); |
265 | Q_UNUSED(edx); |
266 | #endif |
267 | } |
268 | |
269 | #ifdef Q_OS_WIN |
270 | inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} |
271 | #endif |
272 | |
273 | QT_FUNCTION_TARGET_BASELINE |
274 | static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx) |
275 | { |
276 | #if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) |
277 | qregisteruint rbx; // in case it's 64-bit |
278 | qregisteruint rcx = 0; |
279 | qregisteruint rdx = 0; |
280 | asm ("xchg " PICreg", %0\n" |
281 | "cpuid\n" |
282 | "xchg " PICreg", %0\n" |
283 | : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx) |
284 | : "a" (7)); |
285 | ebx = rbx; |
286 | ecx = rcx; |
287 | edx = rdx; |
288 | #elif defined(Q_OS_WIN) |
289 | int info[4]; |
290 | __cpuidex(info, 7, 0); |
291 | ebx = info[1]; |
292 | ecx = info[2]; |
293 | edx = info[3]; |
294 | #elif defined(Q_CC_GHS) |
295 | unsigned int info[4]; |
296 | __CPUIDEX(7, 0, info); |
297 | ebx = info[1]; |
298 | ecx = info[2]; |
299 | edx = info[3]; |
300 | #else |
301 | Q_UNUSED(ebx); |
302 | Q_UNUSED(ecx); |
303 | Q_UNUSED(edx); |
304 | #endif |
305 | } |
306 | |
307 | QT_FUNCTION_TARGET_BASELINE |
308 | #if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS)) |
309 | // fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int); |
310 | inline quint64 _xgetbv(__int64) { return 0; } |
311 | #endif |
312 | static void xgetbv(uint in, uint &eax, uint &edx) |
313 | { |
314 | #if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS) |
315 | asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction |
316 | : "=a" (eax), "=d" (edx) |
317 | : "c" (in)); |
318 | #elif defined(Q_OS_WIN) |
319 | quint64 result = _xgetbv(in); |
320 | eax = result; |
321 | edx = result >> 32; |
322 | #else |
323 | Q_UNUSED(in); |
324 | Q_UNUSED(eax); |
325 | Q_UNUSED(edx); |
326 | #endif |
327 | } |
328 | |
329 | QT_FUNCTION_TARGET_BASELINE |
330 | static quint64 adjustedXcr0(quint64 xcr0) |
331 | { |
332 | /* |
333 | * Some OSes hide their capability of context-switching the AVX512 state in |
334 | * the XCR0 register. They do that so the first time we execute an |
335 | * instruction that may access the AVX512 state (requiring the EVEX prefix) |
336 | * they allocate the necessary context switch space. |
337 | * |
338 | * This behavior is deprecated with the XFD (Extended Feature Disable) |
339 | * register, but we can't change existing OSes. |
340 | */ |
341 | #ifdef Q_OS_DARWIN |
342 | // from <machine/cpu_capabilities.h> in xnu |
343 | // <https://github.com/apple/darwin-xnu/blob/xnu-4903.221.2/osfmk/i386/cpu_capabilities.h> |
344 | constexpr quint64 kHasAVX512F = Q_UINT64_C(0x0000004000000000); |
345 | constexpr quintptr commpage = sizeof(void *) > 4 ? Q_UINT64_C(0x00007fffffe00000) : 0xffff0000; |
346 | constexpr quintptr cpu_capabilities64 = commpage + 0x10; |
347 | quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64); |
348 | if (capab & kHasAVX512F) |
349 | xcr0 |= XSave_Avx512State; |
350 | #endif |
351 | |
352 | return xcr0; |
353 | } |
354 | |
355 | QT_FUNCTION_TARGET_BASELINE |
356 | static quint64 detectProcessorFeatures() |
357 | { |
358 | quint64 features = 0; |
359 | int cpuidLevel = maxBasicCpuidSupported(); |
360 | #if Q_PROCESSOR_X86 < 5 |
361 | if (cpuidLevel < 1) |
362 | return 0; |
363 | #else |
364 | assert(cpuidLevel >= 1); |
365 | #endif |
366 | |
367 | uint results[X86CpuidMaxLeaf] = {}; |
368 | cpuidFeatures01(ecx&: results[Leaf01ECX], edx&: results[Leaf01EDX]); |
369 | if (cpuidLevel >= 7) |
370 | cpuidFeatures07_00(ebx&: results[Leaf07_00EBX], ecx&: results[Leaf07_00ECX], edx&: results[Leaf07_00EDX]); |
371 | |
372 | // populate our feature list |
373 | for (uint i = 0; i < arraysize(x86_locators); ++i) { |
374 | uint word = x86_locators[i] / 32; |
375 | uint bit = 1U << (x86_locators[i] % 32); |
376 | quint64 feature = Q_UINT64_C(1) << i; |
377 | if (results[word] & bit) |
378 | features |= feature; |
379 | } |
380 | |
381 | // now check the AVX state |
382 | quint64 xcr0 = 0; |
383 | if (results[Leaf01ECX] & (1u << 27)) { |
384 | // XGETBV enabled |
385 | uint xgetbvA = 0, xgetbvD = 0; |
386 | xgetbv(in: 0, eax&: xgetbvA, edx&: xgetbvD); |
387 | |
388 | xcr0 = xgetbvA; |
389 | if (sizeof(XSaveBits) > sizeof(xgetbvA)) |
390 | xcr0 |= quint64(xgetbvD) << 32; |
391 | xcr0 = adjustedXcr0(xcr0); |
392 | } |
393 | |
394 | for (auto req : xsave_requirements) { |
395 | if ((xcr0 & req.xsave_state) != req.xsave_state) |
396 | features &= ~req.cpu_features; |
397 | } |
398 | |
399 | if (features & CpuFeatureRDRND && !checkRdrndWorks()) |
400 | features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED); |
401 | |
402 | return features; |
403 | } |
404 | |
405 | #elif defined(Q_PROCESSOR_MIPS_32) |
406 | |
407 | #if defined(Q_OS_LINUX) |
408 | // |
409 | // Do not use QByteArray: it could use SIMD instructions itself at |
410 | // some point, thus creating a recursive dependency. Instead, use a |
411 | // QSimpleBuffer, which has the bare minimum needed to use memory |
412 | // dynamically and read lines from /proc/cpuinfo of arbitrary sizes. |
413 | // |
414 | struct QSimpleBuffer |
415 | { |
416 | static const int chunk_size = 256; |
417 | char *data; |
418 | unsigned alloc; |
419 | unsigned size; |
420 | |
421 | QSimpleBuffer() : data(nullptr), alloc(0), size(0) { } |
422 | ~QSimpleBuffer() { ::free(data); } |
423 | |
424 | void resize(unsigned newsize) |
425 | { |
426 | if (newsize > alloc) { |
427 | unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1); |
428 | if (newalloc < newsize) |
429 | newalloc = newsize; |
430 | if (newalloc != alloc) { |
431 | data = static_cast<char *>(::realloc(data, newalloc)); |
432 | alloc = newalloc; |
433 | } |
434 | } |
435 | size = newsize; |
436 | } |
437 | void append(const QSimpleBuffer &other, unsigned appendsize) |
438 | { |
439 | unsigned oldsize = size; |
440 | resize(oldsize + appendsize); |
441 | ::memcpy(data + oldsize, other.data, appendsize); |
442 | } |
443 | void popleft(unsigned amount) |
444 | { |
445 | if (amount >= size) |
446 | return resize(0); |
447 | size -= amount; |
448 | ::memmove(data, data + amount, size); |
449 | } |
450 | char *cString() |
451 | { |
452 | if (!alloc) |
453 | resize(1); |
454 | return (data[size] = '\0', data); |
455 | } |
456 | }; |
457 | |
458 | // |
459 | // Uses a scratch "buffer" (which must be used for all reads done in the |
460 | // same file descriptor) to read chunks of data from a file, to read |
461 | // one line at a time. Lines include the trailing newline character ('\n'). |
462 | // On EOF, line.size is zero. |
463 | // |
464 | static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer) |
465 | { |
466 | for (;;) { |
467 | char *newline = static_cast<char *>(::memchr(buffer.data, '\n', buffer.size)); |
468 | if (newline) { |
469 | unsigned piece_size = newline - buffer.data + 1; |
470 | line.append(buffer, piece_size); |
471 | buffer.popleft(piece_size); |
472 | line.resize(line.size - 1); |
473 | return; |
474 | } |
475 | if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) { |
476 | int oldsize = buffer.size; |
477 | buffer.resize(buffer.size + QSimpleBuffer::chunk_size); |
478 | buffer.size = oldsize; |
479 | } |
480 | ssize_t read_bytes = |
481 | ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size); |
482 | if (read_bytes > 0) |
483 | buffer.size += read_bytes; |
484 | else |
485 | return; |
486 | } |
487 | } |
488 | |
489 | // |
490 | // Checks if any line with a given prefix from /proc/cpuinfo contains |
491 | // a certain string, surrounded by spaces. |
492 | // |
493 | static bool procCpuinfoContains(const char *prefix, const char *string) |
494 | { |
495 | int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo" , O_RDONLY); |
496 | if (cpuinfo_fd == -1) |
497 | return false; |
498 | |
499 | unsigned string_len = ::strlen(string); |
500 | unsigned prefix_len = ::strlen(prefix); |
501 | QSimpleBuffer line, buffer; |
502 | bool present = false; |
503 | do { |
504 | line.resize(0); |
505 | bufReadLine(cpuinfo_fd, line, buffer); |
506 | char *colon = static_cast<char *>(::memchr(line.data, ':', line.size)); |
507 | if (colon && line.size > prefix_len + string_len) { |
508 | if (!::strncmp(prefix, line.data, prefix_len)) { |
509 | // prefix matches, next character must be ':' or space |
510 | if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) { |
511 | // Does it contain the string? |
512 | char *found = ::strstr(line.cString(), string); |
513 | if (found && ::isspace(found[-1]) && |
514 | (::isspace(found[string_len]) || found[string_len] == '\0')) { |
515 | present = true; |
516 | break; |
517 | } |
518 | } |
519 | } |
520 | } |
521 | } while (line.size); |
522 | |
523 | ::qt_safe_close(cpuinfo_fd); |
524 | return present; |
525 | } |
526 | #endif |
527 | |
528 | static inline quint64 detectProcessorFeatures() |
529 | { |
530 | // NOTE: MIPS 74K cores are the only ones supporting DSPr2. |
531 | quint64 flags = 0; |
532 | |
533 | #if defined __mips_dsp |
534 | flags |= CpuFeatureDSP; |
535 | # if defined __mips_dsp_rev && __mips_dsp_rev >= 2 |
536 | flags |= CpuFeatureDSPR2; |
537 | # elif defined(Q_OS_LINUX) |
538 | if (procCpuinfoContains("cpu model" , "MIPS 74Kc" ) || procCpuinfoContains("cpu model" , "MIPS 74Kf" )) |
539 | flags |= CpuFeatureDSPR2; |
540 | # endif |
541 | #elif defined(Q_OS_LINUX) |
542 | if (procCpuinfoContains("ASEs implemented" , "dsp" )) { |
543 | flags |= CpuFeatureDSP; |
544 | if (procCpuinfoContains("cpu model" , "MIPS 74Kc" ) || procCpuinfoContains("cpu model" , "MIPS 74Kf" )) |
545 | flags |= CpuFeatureDSPR2; |
546 | } |
547 | #endif |
548 | |
549 | return flags; |
550 | } |
551 | |
552 | #else |
553 | static inline uint detectProcessorFeatures() |
554 | { |
555 | return 0; |
556 | } |
557 | #endif |
558 | |
559 | // record what CPU features were enabled by default in this Qt build |
560 | static const quint64 minFeature = qCompilerCpuFeatures; |
561 | |
562 | static constexpr auto SimdInitialized = QCpuFeatureType(1) << (sizeof(QCpuFeatureType) * 8 - 1); |
563 | Q_ATOMIC(QCpuFeatureType) QT_MANGLE_NAMESPACE(qt_cpu_features)[1] = { 0 }; |
564 | |
565 | QT_FUNCTION_TARGET_BASELINE |
566 | uint64_t QT_MANGLE_NAMESPACE(qDetectCpuFeatures)() |
567 | { |
568 | auto minFeatureTest = minFeature; |
569 | #if defined(Q_OS_LINUX) && defined(Q_PROCESSOR_ARM_64) |
570 | // Yocto hard-codes CRC32+AES on. Since they are unlikely to be used |
571 | // automatically by compilers, we can just add runtime check. |
572 | minFeatureTest &= ~(CpuFeatureAES|CpuFeatureCRC32); |
573 | #endif |
574 | #if defined(Q_PROCESSOR_X86_64) && defined(cpu_feature_shstk) |
575 | // Controlflow Enforcement Technology (CET) is an OS-assisted |
576 | // hardware-feature, meaning the CPUID bit may be disabled if the OS |
577 | // doesn't support it, but that's ok. |
578 | minFeatureTest &= ~CpuFeatureSHSTK; |
579 | #endif |
580 | QCpuFeatureType f = detectProcessorFeatures(); |
581 | |
582 | // Intentionally NOT qgetenv (this code runs too early) |
583 | if (char *disable = getenv(name: "QT_NO_CPU_FEATURE" ); disable && *disable) { |
584 | #if _POSIX_C_SOURCE >= 200112L |
585 | char *saveptr = nullptr; |
586 | auto strtok = [&saveptr](char *str, const char *delim) { |
587 | return ::strtok_r(s: str, delim: delim, save_ptr: &saveptr); |
588 | }; |
589 | #endif |
590 | while (char *token = strtok(disable, " " )) { |
591 | disable = nullptr; |
592 | for (uint i = 0; i < arraysize(features_indices); ++i) { |
593 | if (strcmp(s1: token, s2: features_string + features_indices[i]) == 0) |
594 | f &= ~(Q_UINT64_C(1) << i); |
595 | } |
596 | } |
597 | } |
598 | |
599 | #ifdef RUNNING_ON_VALGRIND |
600 | bool runningOnValgrind = RUNNING_ON_VALGRIND; |
601 | #else |
602 | bool runningOnValgrind = false; |
603 | #endif |
604 | if (Q_UNLIKELY(!runningOnValgrind && minFeatureTest != 0 && (f & minFeatureTest) != minFeatureTest)) { |
605 | quint64 missing = minFeatureTest & ~quint64(f); |
606 | fprintf(stderr, format: "Incompatible processor. This Qt build requires the following features:\n " ); |
607 | for (uint i = 0; i < arraysize(features_indices); ++i) { |
608 | if (missing & (Q_UINT64_C(1) << i)) |
609 | fprintf(stderr, format: "%s" , features_string + features_indices[i]); |
610 | } |
611 | fprintf(stderr, format: "\n" ); |
612 | fflush(stderr); |
613 | qAbort(); |
614 | } |
615 | |
616 | assert((f & SimdInitialized) == 0); |
617 | f |= SimdInitialized; |
618 | std::atomic_store_explicit(QT_MANGLE_NAMESPACE(qt_cpu_features), i: f, m: std::memory_order_relaxed); |
619 | return f; |
620 | } |
621 | |
622 | QT_FUNCTION_TARGET_BASELINE |
623 | void qDumpCPUFeatures() |
624 | { |
625 | quint64 features = detectProcessorFeatures() & ~SimdInitialized; |
626 | printf(format: "Processor features: " ); |
627 | for (uint i = 0; i < arraysize(features_indices); ++i) { |
628 | if (features & (Q_UINT64_C(1) << i)) |
629 | printf(format: "%s%s" , features_string + features_indices[i], |
630 | minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "" ); |
631 | } |
632 | if ((features = (qCompilerCpuFeatures & ~features))) { |
633 | printf(format: "\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:" ); |
634 | for (uint i = 0; i < arraysize(features_indices); ++i) { |
635 | if (features & (Q_UINT64_C(1) << i)) |
636 | printf(format: "%s" , features_string + features_indices[i]); |
637 | } |
638 | printf(format: "\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!" ); |
639 | } |
640 | puts(s: "" ); |
641 | } |
642 | |
643 | #if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) |
644 | |
645 | # ifdef Q_PROCESSOR_X86_64 |
646 | # define _rdrandXX_step _rdrand64_step |
647 | # define _rdseedXX_step _rdseed64_step |
648 | # else |
649 | # define _rdrandXX_step _rdrand32_step |
650 | # define _rdseedXX_step _rdseed32_step |
651 | # endif |
652 | |
653 | // The parameter to _rdrand64_step & _rdseed64_step is unsigned long long for |
654 | // Clang and GCC but unsigned __int64 for MSVC and ICC, which is unsigned long |
655 | // long on Windows, but unsigned long on Linux. |
656 | namespace { |
657 | template <typename F> struct ; |
658 | template <typename T> struct <int (T *)> { using = T; }; |
659 | using randuint = ExtractParameter<decltype(_rdrandXX_step)>::Type; |
660 | } |
661 | |
662 | # if QT_COMPILER_SUPPORTS_HERE(RDSEED) |
663 | static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept |
664 | { |
665 | // Unlike for the RDRAND code below, the Intel whitepaper describing the |
666 | // use of the RDSEED instruction indicates we should not retry in a loop. |
667 | // If the independent bit generator used by RDSEED is out of entropy, it |
668 | // may take time to replenish. |
669 | // https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide |
670 | while (ptr + sizeof(randuint) / sizeof(*ptr) <= end) { |
671 | if (_rdseedXX_step(p: reinterpret_cast<randuint *>(ptr)) == 0) |
672 | goto out; |
673 | ptr += sizeof(randuint) / sizeof(*ptr); |
674 | } |
675 | |
676 | if (sizeof(*ptr) != sizeof(randuint) && ptr != end) { |
677 | if (_rdseed32_step(p: ptr) == 0) |
678 | goto out; |
679 | ++ptr; |
680 | } |
681 | |
682 | out: |
683 | return ptr; |
684 | } |
685 | # else |
686 | static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *) |
687 | { |
688 | return ptr; |
689 | } |
690 | # endif |
691 | |
692 | static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept |
693 | { |
694 | int retries = 10; |
695 | while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) { |
696 | if (_rdrandXX_step(p: reinterpret_cast<randuint *>(ptr))) |
697 | ptr += sizeof(randuint)/sizeof(*ptr); |
698 | else if (--retries == 0) |
699 | goto out; |
700 | } |
701 | |
702 | while (sizeof(*ptr) != sizeof(randuint) && ptr != end) { |
703 | bool ok = _rdrand32_step(p: ptr); |
704 | if (!ok && --retries) |
705 | continue; |
706 | if (ok) |
707 | ++ptr; |
708 | break; |
709 | } |
710 | |
711 | out: |
712 | return ptr; |
713 | } |
714 | |
715 | QT_FUNCTION_TARGET(BASELINE_RDRND) Q_DECL_COLD_FUNCTION |
716 | static bool checkRdrndWorks() noexcept |
717 | { |
718 | /* |
719 | * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a |
720 | * failing random generation instruction, which always returns |
721 | * 0xffffffff, even when generation was "successful". |
722 | * |
723 | * This code checks if hardware random generator generates four consecutive |
724 | * equal numbers. If it does, then we probably have a failing one and |
725 | * should disable it completely. |
726 | * |
727 | * https://bugreports.qt.io/browse/QTBUG-69423 |
728 | */ |
729 | constexpr qsizetype TestBufferSize = 4; |
730 | unsigned testBuffer[TestBufferSize] = {}; |
731 | |
732 | unsigned *end = qt_random_rdrnd(ptr: testBuffer, end: testBuffer + TestBufferSize); |
733 | if (end < testBuffer + 3) { |
734 | // Random generation didn't produce enough data for us to make a |
735 | // determination whether it's working or not. Assume it isn't, but |
736 | // don't print a warning. |
737 | return false; |
738 | } |
739 | |
740 | // Check the results for equality |
741 | if (testBuffer[0] == testBuffer[1] |
742 | && testBuffer[0] == testBuffer[2] |
743 | && (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) { |
744 | fprintf(stderr, format: "WARNING: CPU random generator seem to be failing, " |
745 | "disabling hardware random number generation\n" |
746 | "WARNING: RDRND generated:" ); |
747 | for (unsigned *ptr = testBuffer; ptr < end; ++ptr) |
748 | fprintf(stderr, format: " 0x%x" , *ptr); |
749 | fprintf(stderr, format: "\n" ); |
750 | return false; |
751 | } |
752 | |
753 | // We're good |
754 | return true; |
755 | } |
756 | |
757 | QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept |
758 | { |
759 | unsigned *ptr = reinterpret_cast<unsigned *>(buffer); |
760 | unsigned *end = ptr + count; |
761 | |
762 | if (qCpuHasFeature(RDSEED)) |
763 | ptr = qt_random_rdseed(ptr, end); |
764 | |
765 | // fill the buffer with RDRND if RDSEED didn't |
766 | ptr = qt_random_rdrnd(ptr, end); |
767 | return ptr - reinterpret_cast<unsigned *>(buffer); |
768 | } |
769 | #elif defined(Q_PROCESSOR_X86) && !defined(Q_OS_NACL) && !defined(Q_PROCESSOR_ARM) |
770 | static bool checkRdrndWorks() noexcept { return false; } |
771 | #endif // Q_PROCESSOR_X86 && RDRND |
772 | |
773 | #if QT_SUPPORTS_INIT_PRIORITY |
774 | namespace { |
775 | struct QSimdInitializer |
776 | { |
777 | inline QSimdInitializer() { QT_MANGLE_NAMESPACE(qDetectCpuFeatures)(); } |
778 | }; |
779 | } |
780 | |
781 | // This is intentionally a dynamic initialization of the variable |
782 | Q_DECL_INIT_PRIORITY(01) static QSimdInitializer initializer; |
783 | #endif |
784 | |
785 | QT_END_NAMESPACE |
786 | |