1 | // Copyright (C) 2021 The Qt Company Ltd. |
2 | // Copyright (C) 2022 Intel Corporation. |
3 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
4 | |
5 | // we need ICC to define the prototype for _rdseed64_step |
6 | #define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES |
7 | #undef _FORTIFY_SOURCE // otherwise, the always_inline from stdio.h fail to inline |
8 | |
9 | #include "qsimd_p.h" |
10 | #include "qalgorithms.h" |
11 | |
12 | #include <stdio.h> |
13 | #include <string.h> |
14 | |
15 | #if defined(QT_NO_DEBUG) && !defined(NDEBUG) |
16 | # define NDEBUG |
17 | #endif |
18 | #include <assert.h> |
19 | |
20 | #ifdef Q_OS_LINUX |
21 | # include "../testlib/3rdparty/valgrind/valgrind_p.h" |
22 | #endif |
23 | |
24 | #define QT_FUNCTION_TARGET_BASELINE |
25 | |
26 | #if defined(Q_OS_WIN) |
27 | # if !defined(Q_CC_GNU) |
28 | # include <intrin.h> |
29 | # endif |
30 | # if defined(Q_PROCESSOR_ARM_64) |
31 | # include <qt_windows.h> |
32 | # include <processthreadsapi.h> |
33 | # endif |
34 | #elif defined(Q_OS_LINUX) && defined(Q_PROCESSOR_MIPS_32) |
35 | # include "private/qcore_unix_p.h" |
36 | #elif QT_CONFIG(getauxval) && defined(Q_PROCESSOR_ARM) |
37 | # include <sys/auxv.h> |
38 | |
39 | // the kernel header definitions for HWCAP_* |
40 | // (the ones we need/may need anyway) |
41 | |
42 | // copied from <asm/hwcap.h> (ARM) |
43 | #define HWCAP_NEON 4096 |
44 | |
45 | // copied from <asm/hwcap.h> (ARM): |
46 | #define HWCAP2_AES (1 << 0) |
47 | #define HWCAP2_CRC32 (1 << 4) |
48 | |
49 | // copied from <asm/hwcap.h> (Aarch64) |
50 | #define HWCAP_AES (1 << 3) |
51 | #define HWCAP_CRC32 (1 << 7) |
52 | |
53 | // copied from <linux/auxvec.h> |
54 | #define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ |
55 | #define AT_HWCAP2 26 /* extension of AT_HWCAP */ |
56 | |
57 | #elif defined(Q_CC_GHS) |
58 | # include <INTEGRITY_types.h> |
59 | #elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM) |
60 | # include <sys/sysctl.h> |
61 | #endif |
62 | |
63 | QT_BEGIN_NAMESPACE |
64 | |
65 | template <typename T, uint N> QT_FUNCTION_TARGET_BASELINE |
66 | uint arraysize(T (&)[N]) |
67 | { |
68 | // Same as std::size, but with QT_FUNCTION_TARGET_BASELIE, |
69 | // otherwise some versions of GCC fail to compile. |
70 | return N; |
71 | } |
72 | |
73 | #if defined(Q_PROCESSOR_ARM) |
74 | /* Data: |
75 | neon |
76 | crc32 |
77 | aes |
78 | */ |
79 | static const char features_string[] = |
80 | "\0" |
81 | " neon\0" |
82 | " crc32\0" |
83 | " aes\0" ; |
84 | static const int features_indices[] = { 0, 1, 7, 14 }; |
85 | #elif defined(Q_PROCESSOR_MIPS) |
86 | /* Data: |
87 | dsp |
88 | dspr2 |
89 | */ |
90 | static const char features_string[] = |
91 | "\0" |
92 | " dsp\0" |
93 | " dspr2\0" ; |
94 | |
95 | static const int features_indices[] = { |
96 | 0, 1, 6 |
97 | }; |
98 | #elif defined(Q_PROCESSOR_X86) |
99 | # include "qsimd_x86.cpp" // generated by util/x86simdgen |
100 | #else |
101 | static const char features_string[] = "" ; |
102 | static const int features_indices[] = { 0 }; |
103 | #endif |
104 | // end generated |
105 | |
106 | #if defined(Q_PROCESSOR_ARM) |
107 | static inline quint64 detectProcessorFeatures() |
108 | { |
109 | quint64 features = 0; |
110 | |
111 | #if QT_CONFIG(getauxval) |
112 | unsigned long auxvHwCap = getauxval(AT_HWCAP); |
113 | if (auxvHwCap != 0) { |
114 | # if defined(Q_PROCESSOR_ARM_64) |
115 | // For Aarch64: |
116 | features |= CpuFeatureNEON; // NEON is always available |
117 | if (auxvHwCap & HWCAP_CRC32) |
118 | features |= CpuFeatureCRC32; |
119 | if (auxvHwCap & HWCAP_AES) |
120 | features |= CpuFeatureAES; |
121 | # else |
122 | // For ARM32: |
123 | if (auxvHwCap & HWCAP_NEON) |
124 | features |= CpuFeatureNEON; |
125 | auxvHwCap = getauxval(AT_HWCAP2); |
126 | if (auxvHwCap & HWCAP2_CRC32) |
127 | features |= CpuFeatureCRC32; |
128 | if (auxvHwCap & HWCAP2_AES) |
129 | features |= CpuFeatureAES; |
130 | # endif |
131 | return features; |
132 | } |
133 | // fall back to compile-time flags if getauxval failed |
134 | #elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM) |
135 | unsigned feature; |
136 | size_t len = sizeof(feature); |
137 | if (sysctlbyname("hw.optional.neon" , &feature, &len, nullptr, 0) == 0) |
138 | features |= feature ? CpuFeatureNEON : 0; |
139 | if (sysctlbyname("hw.optional.armv8_crc32" , &feature, &len, nullptr, 0) == 0) |
140 | features |= feature ? CpuFeatureCRC32 : 0; |
141 | if (sysctlbyname("hw.optional.arm.FEAT_AES" , &feature, &len, nullptr, 0) == 0) |
142 | features |= feature ? CpuFeatureAES : 0; |
143 | #if defined(__ARM_FEATURE_CRYPTO) |
144 | features |= CpuFeatureAES; |
145 | #endif |
146 | return features; |
147 | #elif defined(Q_OS_WIN) && defined(Q_PROCESSOR_ARM_64) |
148 | features |= CpuFeatureNEON; |
149 | if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0) |
150 | features |= CpuFeatureCRC32; |
151 | if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0) |
152 | features |= CpuFeatureAES; |
153 | return features; |
154 | #endif |
155 | #if defined(__ARM_NEON__) |
156 | features |= CpuFeatureNEON; |
157 | #endif |
158 | #if defined(__ARM_FEATURE_CRC32) |
159 | features |= CpuFeatureCRC32; |
160 | #endif |
161 | #if defined(__ARM_FEATURE_CRYPTO) |
162 | features |= CpuFeatureAES; |
163 | #endif |
164 | |
165 | return features; |
166 | } |
167 | |
168 | #elif defined(Q_PROCESSOR_X86) |
169 | |
170 | #ifdef Q_PROCESSOR_X86_32 |
171 | # define PICreg "%%ebx" |
172 | #else |
173 | # define PICreg "%%rbx" |
174 | #endif |
175 | #ifdef __SSE2_MATH__ |
176 | # define X86_BASELINE "no-sse3" |
177 | #else |
178 | # define X86_BASELINE "no-sse" |
179 | #endif |
180 | |
181 | #if defined(Q_CC_GNU) |
182 | // lower the target for functions in this file |
183 | # undef QT_FUNCTION_TARGET_BASELINE |
184 | # define QT_FUNCTION_TARGET_BASELINE __attribute__((target(X86_BASELINE))) |
185 | # define QT_FUNCTION_TARGET_STRING_BASELINE_RDRND \ |
186 | X86_BASELINE "," QT_FUNCTION_TARGET_STRING_RDRND |
187 | #endif |
188 | |
189 | static bool checkRdrndWorks() noexcept; |
190 | |
191 | QT_FUNCTION_TARGET_BASELINE |
192 | static int maxBasicCpuidSupported() |
193 | { |
194 | #if defined(Q_CC_EMSCRIPTEN) |
195 | return 6; // All features supported by Emscripten |
196 | #elif defined(Q_CC_GNU) |
197 | qregisterint tmp1; |
198 | |
199 | # if Q_PROCESSOR_X86 < 5 |
200 | // check if the CPUID instruction is supported |
201 | long cpuid_supported; |
202 | asm ("pushf\n" |
203 | "pop %0\n" |
204 | "mov %0, %1\n" |
205 | "xor $0x00200000, %0\n" |
206 | "push %0\n" |
207 | "popf\n" |
208 | "pushf\n" |
209 | "pop %0\n" |
210 | "xor %1, %0\n" // %eax is now 0 if CPUID is not supported |
211 | : "=a" (cpuid_supported), "=r" (tmp1) |
212 | ); |
213 | if (!cpuid_supported) |
214 | return 0; |
215 | # endif |
216 | |
217 | int result; |
218 | asm ("xchg " PICreg", %1\n" |
219 | "cpuid\n" |
220 | "xchg " PICreg", %1\n" |
221 | : "=&a" (result), "=&r" (tmp1) |
222 | : "0" (0) |
223 | : "ecx" , "edx" ); |
224 | return result; |
225 | #elif defined(Q_OS_WIN) |
226 | // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0 |
227 | int info[4]; |
228 | __cpuid(info, 0); |
229 | return info[0]; |
230 | #elif defined(Q_CC_GHS) |
231 | unsigned int info[4]; |
232 | __CPUID(0, info); |
233 | return info[0]; |
234 | #else |
235 | return 0; |
236 | #endif |
237 | } |
238 | |
239 | QT_FUNCTION_TARGET_BASELINE |
240 | static void cpuidFeatures01(uint &ecx, uint &edx) |
241 | { |
242 | #if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) |
243 | qregisterint tmp1; |
244 | asm ("xchg " PICreg", %2\n" |
245 | "cpuid\n" |
246 | "xchg " PICreg", %2\n" |
247 | : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1) |
248 | : "a" (1)); |
249 | #elif defined(Q_OS_WIN) |
250 | int info[4]; |
251 | __cpuid(info, 1); |
252 | ecx = info[2]; |
253 | edx = info[3]; |
254 | #elif defined(Q_CC_GHS) |
255 | unsigned int info[4]; |
256 | __CPUID(1, info); |
257 | ecx = info[2]; |
258 | edx = info[3]; |
259 | #else |
260 | Q_UNUSED(ecx); |
261 | Q_UNUSED(edx); |
262 | #endif |
263 | } |
264 | |
265 | #ifdef Q_OS_WIN |
266 | inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} |
267 | #endif |
268 | |
269 | QT_FUNCTION_TARGET_BASELINE |
270 | static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx) |
271 | { |
272 | #if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) |
273 | qregisteruint rbx; // in case it's 64-bit |
274 | qregisteruint rcx = 0; |
275 | qregisteruint rdx = 0; |
276 | asm ("xchg " PICreg", %0\n" |
277 | "cpuid\n" |
278 | "xchg " PICreg", %0\n" |
279 | : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx) |
280 | : "a" (7)); |
281 | ebx = rbx; |
282 | ecx = rcx; |
283 | edx = rdx; |
284 | #elif defined(Q_OS_WIN) |
285 | int info[4]; |
286 | __cpuidex(info, 7, 0); |
287 | ebx = info[1]; |
288 | ecx = info[2]; |
289 | edx = info[3]; |
290 | #elif defined(Q_CC_GHS) |
291 | unsigned int info[4]; |
292 | __CPUIDEX(7, 0, info); |
293 | ebx = info[1]; |
294 | ecx = info[2]; |
295 | edx = info[3]; |
296 | #else |
297 | Q_UNUSED(ebx); |
298 | Q_UNUSED(ecx); |
299 | Q_UNUSED(edx); |
300 | #endif |
301 | } |
302 | |
303 | QT_FUNCTION_TARGET_BASELINE |
304 | #if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS)) |
305 | // fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int); |
306 | inline quint64 _xgetbv(__int64) { return 0; } |
307 | #endif |
308 | static void xgetbv(uint in, uint &eax, uint &edx) |
309 | { |
310 | #if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS) |
311 | asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction |
312 | : "=a" (eax), "=d" (edx) |
313 | : "c" (in)); |
314 | #elif defined(Q_OS_WIN) |
315 | quint64 result = _xgetbv(in); |
316 | eax = result; |
317 | edx = result >> 32; |
318 | #else |
319 | Q_UNUSED(in); |
320 | Q_UNUSED(eax); |
321 | Q_UNUSED(edx); |
322 | #endif |
323 | } |
324 | |
325 | QT_FUNCTION_TARGET_BASELINE |
326 | static quint64 adjustedXcr0(quint64 xcr0) |
327 | { |
328 | /* |
329 | * Some OSes hide their capability of context-switching the AVX512 state in |
330 | * the XCR0 register. They do that so the first time we execute an |
331 | * instruction that may access the AVX512 state (requiring the EVEX prefix) |
332 | * they allocate the necessary context switch space. |
333 | * |
334 | * This behavior is deprecated with the XFD (Extended Feature Disable) |
335 | * register, but we can't change existing OSes. |
336 | */ |
337 | #ifdef Q_OS_DARWIN |
338 | // from <machine/cpu_capabilities.h> in xnu |
339 | // <https://github.com/apple/darwin-xnu/blob/xnu-4903.221.2/osfmk/i386/cpu_capabilities.h> |
340 | constexpr quint64 kHasAVX512F = Q_UINT64_C(0x0000004000000000); |
341 | constexpr quintptr commpage = sizeof(void *) > 4 ? Q_UINT64_C(0x00007fffffe00000) : 0xffff0000; |
342 | constexpr quintptr cpu_capabilities64 = commpage + 0x10; |
343 | quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64); |
344 | if (capab & kHasAVX512F) |
345 | xcr0 |= XSave_Avx512State; |
346 | #endif |
347 | |
348 | return xcr0; |
349 | } |
350 | |
351 | QT_FUNCTION_TARGET_BASELINE |
352 | static quint64 detectProcessorFeatures() |
353 | { |
354 | quint64 features = 0; |
355 | int cpuidLevel = maxBasicCpuidSupported(); |
356 | #if Q_PROCESSOR_X86 < 5 |
357 | if (cpuidLevel < 1) |
358 | return 0; |
359 | #else |
360 | assert(cpuidLevel >= 1); |
361 | #endif |
362 | |
363 | uint results[X86CpuidMaxLeaf] = {}; |
364 | cpuidFeatures01(ecx&: results[Leaf01ECX], edx&: results[Leaf01EDX]); |
365 | if (cpuidLevel >= 7) |
366 | cpuidFeatures07_00(ebx&: results[Leaf07_00EBX], ecx&: results[Leaf07_00ECX], edx&: results[Leaf07_00EDX]); |
367 | |
368 | // populate our feature list |
369 | for (uint i = 0; i < arraysize(x86_locators); ++i) { |
370 | uint word = x86_locators[i] / 32; |
371 | uint bit = 1U << (x86_locators[i] % 32); |
372 | quint64 feature = Q_UINT64_C(1) << i; |
373 | if (results[word] & bit) |
374 | features |= feature; |
375 | } |
376 | |
377 | // now check the AVX state |
378 | quint64 xcr0 = 0; |
379 | if (results[Leaf01ECX] & (1u << 27)) { |
380 | // XGETBV enabled |
381 | uint xgetbvA = 0, xgetbvD = 0; |
382 | xgetbv(in: 0, eax&: xgetbvA, edx&: xgetbvD); |
383 | |
384 | xcr0 = xgetbvA; |
385 | if (sizeof(XSaveBits) > sizeof(xgetbvA)) |
386 | xcr0 |= quint64(xgetbvD) << 32; |
387 | xcr0 = adjustedXcr0(xcr0); |
388 | } |
389 | |
390 | for (auto req : xsave_requirements) { |
391 | if ((xcr0 & req.xsave_state) != req.xsave_state) |
392 | features &= ~req.cpu_features; |
393 | } |
394 | |
395 | if (features & CpuFeatureRDRND && !checkRdrndWorks()) |
396 | features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED); |
397 | |
398 | return features; |
399 | } |
400 | |
401 | #elif defined(Q_PROCESSOR_MIPS_32) |
402 | |
403 | #if defined(Q_OS_LINUX) |
404 | // |
405 | // Do not use QByteArray: it could use SIMD instructions itself at |
406 | // some point, thus creating a recursive dependency. Instead, use a |
407 | // QSimpleBuffer, which has the bare minimum needed to use memory |
408 | // dynamically and read lines from /proc/cpuinfo of arbitrary sizes. |
409 | // |
410 | struct QSimpleBuffer |
411 | { |
412 | static const int chunk_size = 256; |
413 | char *data; |
414 | unsigned alloc; |
415 | unsigned size; |
416 | |
417 | QSimpleBuffer() : data(nullptr), alloc(0), size(0) { } |
418 | ~QSimpleBuffer() { ::free(data); } |
419 | |
420 | void resize(unsigned newsize) |
421 | { |
422 | if (newsize > alloc) { |
423 | unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1); |
424 | if (newalloc < newsize) |
425 | newalloc = newsize; |
426 | if (newalloc != alloc) { |
427 | data = static_cast<char *>(::realloc(data, newalloc)); |
428 | alloc = newalloc; |
429 | } |
430 | } |
431 | size = newsize; |
432 | } |
433 | void append(const QSimpleBuffer &other, unsigned appendsize) |
434 | { |
435 | unsigned oldsize = size; |
436 | resize(oldsize + appendsize); |
437 | ::memcpy(data + oldsize, other.data, appendsize); |
438 | } |
439 | void popleft(unsigned amount) |
440 | { |
441 | if (amount >= size) |
442 | return resize(0); |
443 | size -= amount; |
444 | ::memmove(data, data + amount, size); |
445 | } |
446 | char *cString() |
447 | { |
448 | if (!alloc) |
449 | resize(1); |
450 | return (data[size] = '\0', data); |
451 | } |
452 | }; |
453 | |
454 | // |
455 | // Uses a scratch "buffer" (which must be used for all reads done in the |
456 | // same file descriptor) to read chunks of data from a file, to read |
457 | // one line at a time. Lines include the trailing newline character ('\n'). |
458 | // On EOF, line.size is zero. |
459 | // |
460 | static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer) |
461 | { |
462 | for (;;) { |
463 | char *newline = static_cast<char *>(::memchr(buffer.data, '\n', buffer.size)); |
464 | if (newline) { |
465 | unsigned piece_size = newline - buffer.data + 1; |
466 | line.append(buffer, piece_size); |
467 | buffer.popleft(piece_size); |
468 | line.resize(line.size - 1); |
469 | return; |
470 | } |
471 | if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) { |
472 | int oldsize = buffer.size; |
473 | buffer.resize(buffer.size + QSimpleBuffer::chunk_size); |
474 | buffer.size = oldsize; |
475 | } |
476 | ssize_t read_bytes = |
477 | ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size); |
478 | if (read_bytes > 0) |
479 | buffer.size += read_bytes; |
480 | else |
481 | return; |
482 | } |
483 | } |
484 | |
485 | // |
486 | // Checks if any line with a given prefix from /proc/cpuinfo contains |
487 | // a certain string, surrounded by spaces. |
488 | // |
489 | static bool procCpuinfoContains(const char *prefix, const char *string) |
490 | { |
491 | int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo" , O_RDONLY); |
492 | if (cpuinfo_fd == -1) |
493 | return false; |
494 | |
495 | unsigned string_len = ::strlen(string); |
496 | unsigned prefix_len = ::strlen(prefix); |
497 | QSimpleBuffer line, buffer; |
498 | bool present = false; |
499 | do { |
500 | line.resize(0); |
501 | bufReadLine(cpuinfo_fd, line, buffer); |
502 | char *colon = static_cast<char *>(::memchr(line.data, ':', line.size)); |
503 | if (colon && line.size > prefix_len + string_len) { |
504 | if (!::strncmp(prefix, line.data, prefix_len)) { |
505 | // prefix matches, next character must be ':' or space |
506 | if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) { |
507 | // Does it contain the string? |
508 | char *found = ::strstr(line.cString(), string); |
509 | if (found && ::isspace(found[-1]) && |
510 | (::isspace(found[string_len]) || found[string_len] == '\0')) { |
511 | present = true; |
512 | break; |
513 | } |
514 | } |
515 | } |
516 | } |
517 | } while (line.size); |
518 | |
519 | ::qt_safe_close(cpuinfo_fd); |
520 | return present; |
521 | } |
522 | #endif |
523 | |
524 | static inline quint64 detectProcessorFeatures() |
525 | { |
526 | // NOTE: MIPS 74K cores are the only ones supporting DSPr2. |
527 | quint64 flags = 0; |
528 | |
529 | #if defined __mips_dsp |
530 | flags |= CpuFeatureDSP; |
531 | # if defined __mips_dsp_rev && __mips_dsp_rev >= 2 |
532 | flags |= CpuFeatureDSPR2; |
533 | # elif defined(Q_OS_LINUX) |
534 | if (procCpuinfoContains("cpu model" , "MIPS 74Kc" ) || procCpuinfoContains("cpu model" , "MIPS 74Kf" )) |
535 | flags |= CpuFeatureDSPR2; |
536 | # endif |
537 | #elif defined(Q_OS_LINUX) |
538 | if (procCpuinfoContains("ASEs implemented" , "dsp" )) { |
539 | flags |= CpuFeatureDSP; |
540 | if (procCpuinfoContains("cpu model" , "MIPS 74Kc" ) || procCpuinfoContains("cpu model" , "MIPS 74Kf" )) |
541 | flags |= CpuFeatureDSPR2; |
542 | } |
543 | #endif |
544 | |
545 | return flags; |
546 | } |
547 | |
548 | #else |
549 | static inline uint detectProcessorFeatures() |
550 | { |
551 | return 0; |
552 | } |
553 | #endif |
554 | |
555 | // record what CPU features were enabled by default in this Qt build |
556 | static const quint64 minFeature = qCompilerCpuFeatures; |
557 | |
558 | static constexpr auto SimdInitialized = QCpuFeatureType(1) << (sizeof(QCpuFeatureType) * 8 - 1); |
559 | Q_ATOMIC(QCpuFeatureType) QT_MANGLE_NAMESPACE(qt_cpu_features)[1] = { 0 }; |
560 | |
561 | QT_FUNCTION_TARGET_BASELINE |
562 | uint64_t QT_MANGLE_NAMESPACE(qDetectCpuFeatures)() |
563 | { |
564 | auto minFeatureTest = minFeature; |
565 | #if defined(Q_PROCESSOR_X86_64) && defined(cpu_feature_shstk) |
566 | // Controlflow Enforcement Technology (CET) is an OS-assisted |
567 | // hardware-feature, meaning the CPUID bit may be disabled if the OS |
568 | // doesn't support it, but that's ok. |
569 | minFeatureTest &= ~CpuFeatureSHSTK; |
570 | #endif |
571 | QCpuFeatureType f = detectProcessorFeatures(); |
572 | |
573 | // Intentionally NOT qgetenv (this code runs too early) |
574 | if (char *disable = getenv(name: "QT_NO_CPU_FEATURE" ); disable && *disable) { |
575 | #if _POSIX_C_SOURCE >= 200112L |
576 | char *saveptr = nullptr; |
577 | auto strtok = [&saveptr](char *str, const char *delim) { |
578 | return ::strtok_r(s: str, delim: delim, save_ptr: &saveptr); |
579 | }; |
580 | #endif |
581 | while (char *token = strtok(disable, " " )) { |
582 | disable = nullptr; |
583 | for (uint i = 0; i < arraysize(features_indices); ++i) { |
584 | if (strcmp(s1: token, s2: features_string + features_indices[i]) == 0) |
585 | f &= ~(Q_UINT64_C(1) << i); |
586 | } |
587 | } |
588 | } |
589 | |
590 | #ifdef RUNNING_ON_VALGRIND |
591 | bool runningOnValgrind = RUNNING_ON_VALGRIND; |
592 | #else |
593 | bool runningOnValgrind = false; |
594 | #endif |
595 | if (Q_UNLIKELY(!runningOnValgrind && minFeatureTest != 0 && (f & minFeatureTest) != minFeatureTest)) { |
596 | quint64 missing = minFeatureTest & ~quint64(f); |
597 | fprintf(stderr, format: "Incompatible processor. This Qt build requires the following features:\n " ); |
598 | for (uint i = 0; i < arraysize(features_indices); ++i) { |
599 | if (missing & (Q_UINT64_C(1) << i)) |
600 | fprintf(stderr, format: "%s" , features_string + features_indices[i]); |
601 | } |
602 | fprintf(stderr, format: "\n" ); |
603 | fflush(stderr); |
604 | qAbort(); |
605 | } |
606 | |
607 | assert((f & SimdInitialized) == 0); |
608 | f |= SimdInitialized; |
609 | std::atomic_store_explicit(QT_MANGLE_NAMESPACE(qt_cpu_features), i: f, m: std::memory_order_relaxed); |
610 | return f; |
611 | } |
612 | |
613 | QT_FUNCTION_TARGET_BASELINE |
614 | void qDumpCPUFeatures() |
615 | { |
616 | quint64 features = detectProcessorFeatures() & ~SimdInitialized; |
617 | printf(format: "Processor features: " ); |
618 | for (uint i = 0; i < arraysize(features_indices); ++i) { |
619 | if (features & (Q_UINT64_C(1) << i)) |
620 | printf(format: "%s%s" , features_string + features_indices[i], |
621 | minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "" ); |
622 | } |
623 | if ((features = (qCompilerCpuFeatures & ~features))) { |
624 | printf(format: "\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:" ); |
625 | for (uint i = 0; i < arraysize(features_indices); ++i) { |
626 | if (features & (Q_UINT64_C(1) << i)) |
627 | printf(format: "%s" , features_string + features_indices[i]); |
628 | } |
629 | printf(format: "\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!" ); |
630 | } |
631 | puts(s: "" ); |
632 | } |
633 | |
634 | #if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) |
635 | |
636 | # ifdef Q_PROCESSOR_X86_64 |
637 | # define _rdrandXX_step _rdrand64_step |
638 | # define _rdseedXX_step _rdseed64_step |
639 | # else |
640 | # define _rdrandXX_step _rdrand32_step |
641 | # define _rdseedXX_step _rdseed32_step |
642 | # endif |
643 | |
644 | // The parameter to _rdrand64_step & _rdseed64_step is unsigned long long for |
645 | // Clang and GCC but unsigned __int64 for MSVC and ICC, which is unsigned long |
646 | // long on Windows, but unsigned long on Linux. |
647 | namespace { |
648 | template <typename F> struct ; |
649 | template <typename T> struct <int (T *)> { using = T; }; |
650 | using randuint = ExtractParameter<decltype(_rdrandXX_step)>::Type; |
651 | } |
652 | |
653 | # if QT_COMPILER_SUPPORTS_HERE(RDSEED) |
654 | static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept |
655 | { |
656 | // Unlike for the RDRAND code below, the Intel whitepaper describing the |
657 | // use of the RDSEED instruction indicates we should not retry in a loop. |
658 | // If the independent bit generator used by RDSEED is out of entropy, it |
659 | // may take time to replenish. |
660 | // https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide |
661 | while (ptr + sizeof(randuint) / sizeof(*ptr) <= end) { |
662 | if (_rdseedXX_step(p: reinterpret_cast<randuint *>(ptr)) == 0) |
663 | goto out; |
664 | ptr += sizeof(randuint) / sizeof(*ptr); |
665 | } |
666 | |
667 | if (sizeof(*ptr) != sizeof(randuint) && ptr != end) { |
668 | if (_rdseed32_step(p: ptr) == 0) |
669 | goto out; |
670 | ++ptr; |
671 | } |
672 | |
673 | out: |
674 | return ptr; |
675 | } |
676 | # else |
677 | static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *) |
678 | { |
679 | return ptr; |
680 | } |
681 | # endif |
682 | |
683 | static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept |
684 | { |
685 | int retries = 10; |
686 | while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) { |
687 | if (_rdrandXX_step(p: reinterpret_cast<randuint *>(ptr))) |
688 | ptr += sizeof(randuint)/sizeof(*ptr); |
689 | else if (--retries == 0) |
690 | goto out; |
691 | } |
692 | |
693 | while (sizeof(*ptr) != sizeof(randuint) && ptr != end) { |
694 | bool ok = _rdrand32_step(p: ptr); |
695 | if (!ok && --retries) |
696 | continue; |
697 | if (ok) |
698 | ++ptr; |
699 | break; |
700 | } |
701 | |
702 | out: |
703 | return ptr; |
704 | } |
705 | |
706 | QT_FUNCTION_TARGET(BASELINE_RDRND) Q_DECL_COLD_FUNCTION |
707 | static bool checkRdrndWorks() noexcept |
708 | { |
709 | /* |
710 | * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a |
711 | * failing random generation instruction, which always returns |
712 | * 0xffffffff, even when generation was "successful". |
713 | * |
714 | * This code checks if hardware random generator generates four consecutive |
715 | * equal numbers. If it does, then we probably have a failing one and |
716 | * should disable it completely. |
717 | * |
718 | * https://bugreports.qt.io/browse/QTBUG-69423 |
719 | */ |
720 | constexpr qsizetype TestBufferSize = 4; |
721 | unsigned testBuffer[TestBufferSize] = {}; |
722 | |
723 | // But if the RDRND feature was statically enabled by the compiler, we |
724 | // assume that the RNG works. That's because the calls to qRandomCpu() will |
725 | // be guarded by qCpuHasFeature(RDRND) and that will be a constant true. |
726 | if (_compilerCpuFeatures & CpuFeatureRDRND) |
727 | return true; |
728 | |
729 | unsigned *end = qt_random_rdrnd(ptr: testBuffer, end: testBuffer + TestBufferSize); |
730 | if (end < testBuffer + 3) { |
731 | // Random generation didn't produce enough data for us to make a |
732 | // determination whether it's working or not. Assume it isn't, but |
733 | // don't print a warning. |
734 | return false; |
735 | } |
736 | |
737 | // Check the results for equality |
738 | if (testBuffer[0] == testBuffer[1] |
739 | && testBuffer[0] == testBuffer[2] |
740 | && (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) { |
741 | fprintf(stderr, format: "WARNING: CPU random generator seem to be failing, " |
742 | "disabling hardware random number generation\n" |
743 | "WARNING: RDRND generated:" ); |
744 | for (unsigned *ptr = testBuffer; ptr < end; ++ptr) |
745 | fprintf(stderr, format: " 0x%x" , *ptr); |
746 | fprintf(stderr, format: "\n" ); |
747 | return false; |
748 | } |
749 | |
750 | // We're good |
751 | return true; |
752 | } |
753 | |
754 | QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept |
755 | { |
756 | unsigned *ptr = reinterpret_cast<unsigned *>(buffer); |
757 | unsigned *end = ptr + count; |
758 | |
759 | if (qCpuHasFeature(RDSEED)) |
760 | ptr = qt_random_rdseed(ptr, end); |
761 | |
762 | // fill the buffer with RDRND if RDSEED didn't |
763 | ptr = qt_random_rdrnd(ptr, end); |
764 | return ptr - reinterpret_cast<unsigned *>(buffer); |
765 | } |
766 | #elif defined(Q_PROCESSOR_X86) && !defined(Q_PROCESSOR_ARM) |
767 | static bool checkRdrndWorks() noexcept { return false; } |
768 | #endif // Q_PROCESSOR_X86 && RDRND |
769 | |
770 | #if QT_SUPPORTS_INIT_PRIORITY |
771 | namespace { |
772 | struct QSimdInitializer |
773 | { |
774 | inline QSimdInitializer() { QT_MANGLE_NAMESPACE(qDetectCpuFeatures)(); } |
775 | }; |
776 | } |
777 | |
778 | // This is intentionally a dynamic initialization of the variable |
779 | Q_DECL_INIT_PRIORITY(01) static QSimdInitializer initializer; |
780 | #endif |
781 | |
782 | QT_END_NAMESPACE |
783 | |