1// Copyright (C) 2021 The Qt Company Ltd.
2// Copyright (C) 2022 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5// we need ICC to define the prototype for _rdseed64_step
6#define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES
7#undef _FORTIFY_SOURCE // otherwise, the always_inline from stdio.h fail to inline
8
9#include "qsimd_p.h"
10#include "qalgorithms.h"
11
12#include <stdio.h>
13#include <string.h>
14
15#if defined(QT_NO_DEBUG) && !defined(NDEBUG)
16# define NDEBUG
17#endif
18#include <assert.h>
19
20#ifdef Q_OS_LINUX
21# include "../testlib/3rdparty/valgrind/valgrind_p.h"
22#endif
23
24#define QT_FUNCTION_TARGET_BASELINE
25
26#if defined(Q_OS_WIN)
27# if !defined(Q_CC_GNU)
28# include <intrin.h>
29# endif
30# if defined(Q_PROCESSOR_ARM_64)
31# include <qt_windows.h>
32# include <processthreadsapi.h>
33# endif
34#elif defined(Q_OS_LINUX) && defined(Q_PROCESSOR_MIPS_32)
35# include "private/qcore_unix_p.h"
36#elif QT_CONFIG(getauxval) && defined(Q_PROCESSOR_ARM)
37# include <sys/auxv.h>
38
39// the kernel header definitions for HWCAP_*
40// (the ones we need/may need anyway)
41
42// copied from <asm/hwcap.h> (ARM)
43#define HWCAP_NEON 4096
44
45// copied from <asm/hwcap.h> (ARM):
46#define HWCAP2_AES (1 << 0)
47#define HWCAP2_CRC32 (1 << 4)
48
49// copied from <asm/hwcap.h> (Aarch64)
50#define HWCAP_AES (1 << 3)
51#define HWCAP_CRC32 (1 << 7)
52
53// copied from <linux/auxvec.h>
54#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
55#define AT_HWCAP2 26 /* extension of AT_HWCAP */
56
57#elif defined(Q_CC_GHS)
58# include <INTEGRITY_types.h>
59#elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM)
60# include <sys/sysctl.h>
61#endif
62
63QT_BEGIN_NAMESPACE
64
65template <typename T, uint N> QT_FUNCTION_TARGET_BASELINE
66uint arraysize(T (&)[N])
67{
68 // Same as std::size, but with QT_FUNCTION_TARGET_BASELIE,
69 // otherwise some versions of GCC fail to compile.
70 return N;
71}
72
73#if defined(Q_PROCESSOR_ARM)
74/* Data:
75 neon
76 crc32
77 aes
78 */
79static const char features_string[] =
80 "\0"
81 " neon\0"
82 " crc32\0"
83 " aes\0";
84static const int features_indices[] = { 0, 1, 7, 14 };
85#elif defined(Q_PROCESSOR_MIPS)
86/* Data:
87 dsp
88 dspr2
89*/
90static const char features_string[] =
91 "\0"
92 " dsp\0"
93 " dspr2\0";
94
95static const int features_indices[] = {
96 0, 1, 6
97};
98#elif defined(Q_PROCESSOR_X86)
99# include "qsimd_x86.cpp" // generated by util/x86simdgen
100#else
101static const char features_string[] = "";
102static const int features_indices[] = { 0 };
103#endif
104// end generated
105
106#if defined(Q_PROCESSOR_ARM)
107static inline quint64 detectProcessorFeatures()
108{
109 quint64 features = 0;
110
111#if QT_CONFIG(getauxval)
112 unsigned long auxvHwCap = getauxval(AT_HWCAP);
113 if (auxvHwCap != 0) {
114# if defined(Q_PROCESSOR_ARM_64)
115 // For Aarch64:
116 features |= CpuFeatureNEON; // NEON is always available
117 if (auxvHwCap & HWCAP_CRC32)
118 features |= CpuFeatureCRC32;
119 if (auxvHwCap & HWCAP_AES)
120 features |= CpuFeatureAES;
121# else
122 // For ARM32:
123 if (auxvHwCap & HWCAP_NEON)
124 features |= CpuFeatureNEON;
125 auxvHwCap = getauxval(AT_HWCAP2);
126 if (auxvHwCap & HWCAP2_CRC32)
127 features |= CpuFeatureCRC32;
128 if (auxvHwCap & HWCAP2_AES)
129 features |= CpuFeatureAES;
130# endif
131 return features;
132 }
133 // fall back to compile-time flags if getauxval failed
134#elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM)
135 unsigned feature;
136 size_t len = sizeof(feature);
137 if (sysctlbyname("hw.optional.neon", &feature, &len, nullptr, 0) == 0)
138 features |= feature ? CpuFeatureNEON : 0;
139 if (sysctlbyname("hw.optional.armv8_crc32", &feature, &len, nullptr, 0) == 0)
140 features |= feature ? CpuFeatureCRC32 : 0;
141 if (sysctlbyname("hw.optional.arm.FEAT_AES", &feature, &len, nullptr, 0) == 0)
142 features |= feature ? CpuFeatureAES : 0;
143#if defined(__ARM_FEATURE_CRYPTO)
144 features |= CpuFeatureAES;
145#endif
146 return features;
147#elif defined(Q_OS_WIN) && defined(Q_PROCESSOR_ARM_64)
148 features |= CpuFeatureNEON;
149 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0)
150 features |= CpuFeatureCRC32;
151 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0)
152 features |= CpuFeatureAES;
153 return features;
154#endif
155#if defined(__ARM_NEON__)
156 features |= CpuFeatureNEON;
157#endif
158#if defined(__ARM_FEATURE_CRC32)
159 features |= CpuFeatureCRC32;
160#endif
161#if defined(__ARM_FEATURE_CRYPTO)
162 features |= CpuFeatureAES;
163#endif
164
165 return features;
166}
167
168#elif defined(Q_PROCESSOR_X86)
169
170#ifdef Q_PROCESSOR_X86_32
171# define PICreg "%%ebx"
172#else
173# define PICreg "%%rbx"
174#endif
175#ifdef __SSE2_MATH__
176# define X86_BASELINE "no-sse3"
177#else
178# define X86_BASELINE "no-sse"
179#endif
180
181#if defined(Q_CC_GNU)
182// lower the target for functions in this file
183# undef QT_FUNCTION_TARGET_BASELINE
184# define QT_FUNCTION_TARGET_BASELINE __attribute__((target(X86_BASELINE)))
185# define QT_FUNCTION_TARGET_STRING_BASELINE_RDRND \
186 X86_BASELINE "," QT_FUNCTION_TARGET_STRING_RDRND
187#endif
188
189static bool checkRdrndWorks() noexcept;
190
191QT_FUNCTION_TARGET_BASELINE
192static int maxBasicCpuidSupported()
193{
194#if defined(Q_CC_EMSCRIPTEN)
195 return 6; // All features supported by Emscripten
196#elif defined(Q_CC_GNU)
197 qregisterint tmp1;
198
199# if Q_PROCESSOR_X86 < 5
200 // check if the CPUID instruction is supported
201 long cpuid_supported;
202 asm ("pushf\n"
203 "pop %0\n"
204 "mov %0, %1\n"
205 "xor $0x00200000, %0\n"
206 "push %0\n"
207 "popf\n"
208 "pushf\n"
209 "pop %0\n"
210 "xor %1, %0\n" // %eax is now 0 if CPUID is not supported
211 : "=a" (cpuid_supported), "=r" (tmp1)
212 );
213 if (!cpuid_supported)
214 return 0;
215# endif
216
217 int result;
218 asm ("xchg " PICreg", %1\n"
219 "cpuid\n"
220 "xchg " PICreg", %1\n"
221 : "=&a" (result), "=&r" (tmp1)
222 : "0" (0)
223 : "ecx", "edx");
224 return result;
225#elif defined(Q_OS_WIN)
226 // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
227 int info[4];
228 __cpuid(info, 0);
229 return info[0];
230#elif defined(Q_CC_GHS)
231 unsigned int info[4];
232 __CPUID(0, info);
233 return info[0];
234#else
235 return 0;
236#endif
237}
238
239QT_FUNCTION_TARGET_BASELINE
240static void cpuidFeatures01(uint &ecx, uint &edx)
241{
242#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
243 qregisterint tmp1;
244 asm ("xchg " PICreg", %2\n"
245 "cpuid\n"
246 "xchg " PICreg", %2\n"
247 : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
248 : "a" (1));
249#elif defined(Q_OS_WIN)
250 int info[4];
251 __cpuid(info, 1);
252 ecx = info[2];
253 edx = info[3];
254#elif defined(Q_CC_GHS)
255 unsigned int info[4];
256 __CPUID(1, info);
257 ecx = info[2];
258 edx = info[3];
259#else
260 Q_UNUSED(ecx);
261 Q_UNUSED(edx);
262#endif
263}
264
265#ifdef Q_OS_WIN
266inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
267#endif
268
269QT_FUNCTION_TARGET_BASELINE
270static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
271{
272#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
273 qregisteruint rbx; // in case it's 64-bit
274 qregisteruint rcx = 0;
275 qregisteruint rdx = 0;
276 asm ("xchg " PICreg", %0\n"
277 "cpuid\n"
278 "xchg " PICreg", %0\n"
279 : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx)
280 : "a" (7));
281 ebx = rbx;
282 ecx = rcx;
283 edx = rdx;
284#elif defined(Q_OS_WIN)
285 int info[4];
286 __cpuidex(info, 7, 0);
287 ebx = info[1];
288 ecx = info[2];
289 edx = info[3];
290#elif defined(Q_CC_GHS)
291 unsigned int info[4];
292 __CPUIDEX(7, 0, info);
293 ebx = info[1];
294 ecx = info[2];
295 edx = info[3];
296#else
297 Q_UNUSED(ebx);
298 Q_UNUSED(ecx);
299 Q_UNUSED(edx);
300#endif
301}
302
303QT_FUNCTION_TARGET_BASELINE
304#if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS))
305// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int);
306inline quint64 _xgetbv(__int64) { return 0; }
307#endif
308static void xgetbv(uint in, uint &eax, uint &edx)
309{
310#if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS)
311 asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction
312 : "=a" (eax), "=d" (edx)
313 : "c" (in));
314#elif defined(Q_OS_WIN)
315 quint64 result = _xgetbv(in);
316 eax = result;
317 edx = result >> 32;
318#else
319 Q_UNUSED(in);
320 Q_UNUSED(eax);
321 Q_UNUSED(edx);
322#endif
323}
324
325QT_FUNCTION_TARGET_BASELINE
326static quint64 adjustedXcr0(quint64 xcr0)
327{
328 /*
329 * Some OSes hide their capability of context-switching the AVX512 state in
330 * the XCR0 register. They do that so the first time we execute an
331 * instruction that may access the AVX512 state (requiring the EVEX prefix)
332 * they allocate the necessary context switch space.
333 *
334 * This behavior is deprecated with the XFD (Extended Feature Disable)
335 * register, but we can't change existing OSes.
336 */
337#ifdef Q_OS_DARWIN
338 // from <machine/cpu_capabilities.h> in xnu
339 // <https://github.com/apple/darwin-xnu/blob/xnu-4903.221.2/osfmk/i386/cpu_capabilities.h>
340 constexpr quint64 kHasAVX512F = Q_UINT64_C(0x0000004000000000);
341 constexpr quintptr commpage = sizeof(void *) > 4 ? Q_UINT64_C(0x00007fffffe00000) : 0xffff0000;
342 constexpr quintptr cpu_capabilities64 = commpage + 0x10;
343 quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64);
344 if (capab & kHasAVX512F)
345 xcr0 |= XSave_Avx512State;
346#endif
347
348 return xcr0;
349}
350
351QT_FUNCTION_TARGET_BASELINE
352static quint64 detectProcessorFeatures()
353{
354 quint64 features = 0;
355 int cpuidLevel = maxBasicCpuidSupported();
356#if Q_PROCESSOR_X86 < 5
357 if (cpuidLevel < 1)
358 return 0;
359#else
360 assert(cpuidLevel >= 1);
361#endif
362
363 uint results[X86CpuidMaxLeaf] = {};
364 cpuidFeatures01(ecx&: results[Leaf01ECX], edx&: results[Leaf01EDX]);
365 if (cpuidLevel >= 7)
366 cpuidFeatures07_00(ebx&: results[Leaf07_00EBX], ecx&: results[Leaf07_00ECX], edx&: results[Leaf07_00EDX]);
367
368 // populate our feature list
369 for (uint i = 0; i < arraysize(x86_locators); ++i) {
370 uint word = x86_locators[i] / 32;
371 uint bit = 1U << (x86_locators[i] % 32);
372 quint64 feature = Q_UINT64_C(1) << i;
373 if (results[word] & bit)
374 features |= feature;
375 }
376
377 // now check the AVX state
378 quint64 xcr0 = 0;
379 if (results[Leaf01ECX] & (1u << 27)) {
380 // XGETBV enabled
381 uint xgetbvA = 0, xgetbvD = 0;
382 xgetbv(in: 0, eax&: xgetbvA, edx&: xgetbvD);
383
384 xcr0 = xgetbvA;
385 if (sizeof(XSaveBits) > sizeof(xgetbvA))
386 xcr0 |= quint64(xgetbvD) << 32;
387 xcr0 = adjustedXcr0(xcr0);
388 }
389
390 for (auto req : xsave_requirements) {
391 if ((xcr0 & req.xsave_state) != req.xsave_state)
392 features &= ~req.cpu_features;
393 }
394
395 if (features & CpuFeatureRDRND && !checkRdrndWorks())
396 features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED);
397
398 return features;
399}
400
401#elif defined(Q_PROCESSOR_MIPS_32)
402
403#if defined(Q_OS_LINUX)
404//
405// Do not use QByteArray: it could use SIMD instructions itself at
406// some point, thus creating a recursive dependency. Instead, use a
407// QSimpleBuffer, which has the bare minimum needed to use memory
408// dynamically and read lines from /proc/cpuinfo of arbitrary sizes.
409//
410struct QSimpleBuffer
411{
412 static const int chunk_size = 256;
413 char *data;
414 unsigned alloc;
415 unsigned size;
416
417 QSimpleBuffer() : data(nullptr), alloc(0), size(0) { }
418 ~QSimpleBuffer() { ::free(data); }
419
420 void resize(unsigned newsize)
421 {
422 if (newsize > alloc) {
423 unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1);
424 if (newalloc < newsize)
425 newalloc = newsize;
426 if (newalloc != alloc) {
427 data = static_cast<char *>(::realloc(data, newalloc));
428 alloc = newalloc;
429 }
430 }
431 size = newsize;
432 }
433 void append(const QSimpleBuffer &other, unsigned appendsize)
434 {
435 unsigned oldsize = size;
436 resize(oldsize + appendsize);
437 ::memcpy(data + oldsize, other.data, appendsize);
438 }
439 void popleft(unsigned amount)
440 {
441 if (amount >= size)
442 return resize(0);
443 size -= amount;
444 ::memmove(data, data + amount, size);
445 }
446 char *cString()
447 {
448 if (!alloc)
449 resize(1);
450 return (data[size] = '\0', data);
451 }
452};
453
454//
455// Uses a scratch "buffer" (which must be used for all reads done in the
456// same file descriptor) to read chunks of data from a file, to read
457// one line at a time. Lines include the trailing newline character ('\n').
458// On EOF, line.size is zero.
459//
460static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer)
461{
462 for (;;) {
463 char *newline = static_cast<char *>(::memchr(buffer.data, '\n', buffer.size));
464 if (newline) {
465 unsigned piece_size = newline - buffer.data + 1;
466 line.append(buffer, piece_size);
467 buffer.popleft(piece_size);
468 line.resize(line.size - 1);
469 return;
470 }
471 if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) {
472 int oldsize = buffer.size;
473 buffer.resize(buffer.size + QSimpleBuffer::chunk_size);
474 buffer.size = oldsize;
475 }
476 ssize_t read_bytes =
477 ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size);
478 if (read_bytes > 0)
479 buffer.size += read_bytes;
480 else
481 return;
482 }
483}
484
485//
486// Checks if any line with a given prefix from /proc/cpuinfo contains
487// a certain string, surrounded by spaces.
488//
489static bool procCpuinfoContains(const char *prefix, const char *string)
490{
491 int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY);
492 if (cpuinfo_fd == -1)
493 return false;
494
495 unsigned string_len = ::strlen(string);
496 unsigned prefix_len = ::strlen(prefix);
497 QSimpleBuffer line, buffer;
498 bool present = false;
499 do {
500 line.resize(0);
501 bufReadLine(cpuinfo_fd, line, buffer);
502 char *colon = static_cast<char *>(::memchr(line.data, ':', line.size));
503 if (colon && line.size > prefix_len + string_len) {
504 if (!::strncmp(prefix, line.data, prefix_len)) {
505 // prefix matches, next character must be ':' or space
506 if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) {
507 // Does it contain the string?
508 char *found = ::strstr(line.cString(), string);
509 if (found && ::isspace(found[-1]) &&
510 (::isspace(found[string_len]) || found[string_len] == '\0')) {
511 present = true;
512 break;
513 }
514 }
515 }
516 }
517 } while (line.size);
518
519 ::qt_safe_close(cpuinfo_fd);
520 return present;
521}
522#endif
523
524static inline quint64 detectProcessorFeatures()
525{
526 // NOTE: MIPS 74K cores are the only ones supporting DSPr2.
527 quint64 flags = 0;
528
529#if defined __mips_dsp
530 flags |= CpuFeatureDSP;
531# if defined __mips_dsp_rev && __mips_dsp_rev >= 2
532 flags |= CpuFeatureDSPR2;
533# elif defined(Q_OS_LINUX)
534 if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
535 flags |= CpuFeatureDSPR2;
536# endif
537#elif defined(Q_OS_LINUX)
538 if (procCpuinfoContains("ASEs implemented", "dsp")) {
539 flags |= CpuFeatureDSP;
540 if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
541 flags |= CpuFeatureDSPR2;
542 }
543#endif
544
545 return flags;
546}
547
548#else
549static inline uint detectProcessorFeatures()
550{
551 return 0;
552}
553#endif
554
555// record what CPU features were enabled by default in this Qt build
556static const quint64 minFeature = qCompilerCpuFeatures;
557
558static constexpr auto SimdInitialized = QCpuFeatureType(1) << (sizeof(QCpuFeatureType) * 8 - 1);
559Q_ATOMIC(QCpuFeatureType) QT_MANGLE_NAMESPACE(qt_cpu_features)[1] = { 0 };
560
561QT_FUNCTION_TARGET_BASELINE
562uint64_t QT_MANGLE_NAMESPACE(qDetectCpuFeatures)()
563{
564 auto minFeatureTest = minFeature;
565#if defined(Q_PROCESSOR_X86_64) && defined(cpu_feature_shstk)
566 // Controlflow Enforcement Technology (CET) is an OS-assisted
567 // hardware-feature, meaning the CPUID bit may be disabled if the OS
568 // doesn't support it, but that's ok.
569 minFeatureTest &= ~CpuFeatureSHSTK;
570#endif
571 QCpuFeatureType f = detectProcessorFeatures();
572
573 // Intentionally NOT qgetenv (this code runs too early)
574 if (char *disable = getenv(name: "QT_NO_CPU_FEATURE"); disable && *disable) {
575#if _POSIX_C_SOURCE >= 200112L
576 char *saveptr = nullptr;
577 auto strtok = [&saveptr](char *str, const char *delim) {
578 return ::strtok_r(s: str, delim: delim, save_ptr: &saveptr);
579 };
580#endif
581 while (char *token = strtok(disable, " ")) {
582 disable = nullptr;
583 for (uint i = 0; i < arraysize(features_indices); ++i) {
584 if (strcmp(s1: token, s2: features_string + features_indices[i]) == 0)
585 f &= ~(Q_UINT64_C(1) << i);
586 }
587 }
588 }
589
590#ifdef RUNNING_ON_VALGRIND
591 bool runningOnValgrind = RUNNING_ON_VALGRIND;
592#else
593 bool runningOnValgrind = false;
594#endif
595 if (Q_UNLIKELY(!runningOnValgrind && minFeatureTest != 0 && (f & minFeatureTest) != minFeatureTest)) {
596 quint64 missing = minFeatureTest & ~quint64(f);
597 fprintf(stderr, format: "Incompatible processor. This Qt build requires the following features:\n ");
598 for (uint i = 0; i < arraysize(features_indices); ++i) {
599 if (missing & (Q_UINT64_C(1) << i))
600 fprintf(stderr, format: "%s", features_string + features_indices[i]);
601 }
602 fprintf(stderr, format: "\n");
603 fflush(stderr);
604 qAbort();
605 }
606
607 assert((f & SimdInitialized) == 0);
608 f |= SimdInitialized;
609 std::atomic_store_explicit(QT_MANGLE_NAMESPACE(qt_cpu_features), i: f, m: std::memory_order_relaxed);
610 return f;
611}
612
613QT_FUNCTION_TARGET_BASELINE
614void qDumpCPUFeatures()
615{
616 quint64 features = detectProcessorFeatures() & ~SimdInitialized;
617 printf(format: "Processor features: ");
618 for (uint i = 0; i < arraysize(features_indices); ++i) {
619 if (features & (Q_UINT64_C(1) << i))
620 printf(format: "%s%s", features_string + features_indices[i],
621 minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "");
622 }
623 if ((features = (qCompilerCpuFeatures & ~features))) {
624 printf(format: "\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:");
625 for (uint i = 0; i < arraysize(features_indices); ++i) {
626 if (features & (Q_UINT64_C(1) << i))
627 printf(format: "%s", features_string + features_indices[i]);
628 }
629 printf(format: "\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!");
630 }
631 puts(s: "");
632}
633
634#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND)
635
636# ifdef Q_PROCESSOR_X86_64
637# define _rdrandXX_step _rdrand64_step
638# define _rdseedXX_step _rdseed64_step
639# else
640# define _rdrandXX_step _rdrand32_step
641# define _rdseedXX_step _rdseed32_step
642# endif
643
644// The parameter to _rdrand64_step & _rdseed64_step is unsigned long long for
645// Clang and GCC but unsigned __int64 for MSVC and ICC, which is unsigned long
646// long on Windows, but unsigned long on Linux.
647namespace {
648template <typename F> struct ExtractParameter;
649template <typename T> struct ExtractParameter<int (T *)> { using Type = T; };
650using randuint = ExtractParameter<decltype(_rdrandXX_step)>::Type;
651}
652
653# if QT_COMPILER_SUPPORTS_HERE(RDSEED)
654static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept
655{
656 // Unlike for the RDRAND code below, the Intel whitepaper describing the
657 // use of the RDSEED instruction indicates we should not retry in a loop.
658 // If the independent bit generator used by RDSEED is out of entropy, it
659 // may take time to replenish.
660 // https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide
661 while (ptr + sizeof(randuint) / sizeof(*ptr) <= end) {
662 if (_rdseedXX_step(p: reinterpret_cast<randuint *>(ptr)) == 0)
663 goto out;
664 ptr += sizeof(randuint) / sizeof(*ptr);
665 }
666
667 if (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
668 if (_rdseed32_step(p: ptr) == 0)
669 goto out;
670 ++ptr;
671 }
672
673out:
674 return ptr;
675}
676# else
677static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *)
678{
679 return ptr;
680}
681# endif
682
683static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept
684{
685 int retries = 10;
686 while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) {
687 if (_rdrandXX_step(p: reinterpret_cast<randuint *>(ptr)))
688 ptr += sizeof(randuint)/sizeof(*ptr);
689 else if (--retries == 0)
690 goto out;
691 }
692
693 while (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
694 bool ok = _rdrand32_step(p: ptr);
695 if (!ok && --retries)
696 continue;
697 if (ok)
698 ++ptr;
699 break;
700 }
701
702out:
703 return ptr;
704}
705
706QT_FUNCTION_TARGET(BASELINE_RDRND) Q_DECL_COLD_FUNCTION
707static bool checkRdrndWorks() noexcept
708{
709 /*
710 * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a
711 * failing random generation instruction, which always returns
712 * 0xffffffff, even when generation was "successful".
713 *
714 * This code checks if hardware random generator generates four consecutive
715 * equal numbers. If it does, then we probably have a failing one and
716 * should disable it completely.
717 *
718 * https://bugreports.qt.io/browse/QTBUG-69423
719 */
720 constexpr qsizetype TestBufferSize = 4;
721 unsigned testBuffer[TestBufferSize] = {};
722
723 // But if the RDRND feature was statically enabled by the compiler, we
724 // assume that the RNG works. That's because the calls to qRandomCpu() will
725 // be guarded by qCpuHasFeature(RDRND) and that will be a constant true.
726 if (_compilerCpuFeatures & CpuFeatureRDRND)
727 return true;
728
729 unsigned *end = qt_random_rdrnd(ptr: testBuffer, end: testBuffer + TestBufferSize);
730 if (end < testBuffer + 3) {
731 // Random generation didn't produce enough data for us to make a
732 // determination whether it's working or not. Assume it isn't, but
733 // don't print a warning.
734 return false;
735 }
736
737 // Check the results for equality
738 if (testBuffer[0] == testBuffer[1]
739 && testBuffer[0] == testBuffer[2]
740 && (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) {
741 fprintf(stderr, format: "WARNING: CPU random generator seem to be failing, "
742 "disabling hardware random number generation\n"
743 "WARNING: RDRND generated:");
744 for (unsigned *ptr = testBuffer; ptr < end; ++ptr)
745 fprintf(stderr, format: " 0x%x", *ptr);
746 fprintf(stderr, format: "\n");
747 return false;
748 }
749
750 // We're good
751 return true;
752}
753
754QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept
755{
756 unsigned *ptr = reinterpret_cast<unsigned *>(buffer);
757 unsigned *end = ptr + count;
758
759 if (qCpuHasFeature(RDSEED))
760 ptr = qt_random_rdseed(ptr, end);
761
762 // fill the buffer with RDRND if RDSEED didn't
763 ptr = qt_random_rdrnd(ptr, end);
764 return ptr - reinterpret_cast<unsigned *>(buffer);
765}
766#elif defined(Q_PROCESSOR_X86) && !defined(Q_PROCESSOR_ARM)
767static bool checkRdrndWorks() noexcept { return false; }
768#endif // Q_PROCESSOR_X86 && RDRND
769
770#if QT_SUPPORTS_INIT_PRIORITY
771namespace {
772struct QSimdInitializer
773{
774 inline QSimdInitializer() { QT_MANGLE_NAMESPACE(qDetectCpuFeatures)(); }
775};
776}
777
778// This is intentionally a dynamic initialization of the variable
779Q_DECL_INIT_PRIORITY(01) static QSimdInitializer initializer;
780#endif
781
782QT_END_NAMESPACE
783

Provided by KDAB

Privacy Policy
Start learning QML with our Intro Training
Find out more

source code of qtbase/src/corelib/global/qsimd.cpp