1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Copyright (C) 2019 Intel Corporation. |
5 | ** Contact: https://www.qt.io/licensing/ |
6 | ** |
7 | ** This file is part of the QtCore module of the Qt Toolkit. |
8 | ** |
9 | ** $QT_BEGIN_LICENSE:LGPL$ |
10 | ** Commercial License Usage |
11 | ** Licensees holding valid commercial Qt licenses may use this file in |
12 | ** accordance with the commercial license agreement provided with the |
13 | ** Software or, alternatively, in accordance with the terms contained in |
14 | ** a written agreement between you and The Qt Company. For licensing terms |
15 | ** and conditions see https://www.qt.io/terms-conditions. For further |
16 | ** information use the contact form at https://www.qt.io/contact-us. |
17 | ** |
18 | ** GNU Lesser General Public License Usage |
19 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
20 | ** General Public License version 3 as published by the Free Software |
21 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
22 | ** packaging of this file. Please review the following information to |
23 | ** ensure the GNU Lesser General Public License version 3 requirements |
24 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
25 | ** |
26 | ** GNU General Public License Usage |
27 | ** Alternatively, this file may be used under the terms of the GNU |
28 | ** General Public License version 2.0 or (at your option) the GNU General |
29 | ** Public license version 3 or any later version approved by the KDE Free |
30 | ** Qt Foundation. The licenses are as published by the Free Software |
31 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
32 | ** included in the packaging of this file. Please review the following |
33 | ** information to ensure the GNU General Public License requirements will |
34 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
35 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
36 | ** |
37 | ** $QT_END_LICENSE$ |
38 | ** |
39 | ****************************************************************************/ |
40 | |
41 | // we need ICC to define the prototype for _rdseed64_step |
42 | #define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES |
43 | |
44 | #include "qsimd_p.h" |
45 | #include "qalgorithms.h" |
46 | #include <QByteArray> |
47 | #include <stdio.h> |
48 | |
49 | #ifdef Q_OS_LINUX |
50 | # include "../testlib/3rdparty/valgrind_p.h" |
51 | #endif |
52 | |
53 | #if defined(Q_OS_WIN) |
54 | # if !defined(Q_CC_GNU) |
55 | # include <intrin.h> |
56 | # endif |
57 | #elif defined(Q_OS_LINUX) && (defined(Q_PROCESSOR_ARM) || defined(Q_PROCESSOR_MIPS_32)) |
58 | #include "private/qcore_unix_p.h" |
59 | |
60 | // the kernel header definitions for HWCAP_* |
61 | // (the ones we need/may need anyway) |
62 | |
63 | // copied from <asm/hwcap.h> (ARM) |
64 | #define HWCAP_CRUNCH 1024 |
65 | #define HWCAP_THUMBEE 2048 |
66 | #define HWCAP_NEON 4096 |
67 | #define HWCAP_VFPv3 8192 |
68 | #define HWCAP_VFPv3D16 16384 |
69 | |
70 | // copied from <asm/hwcap.h> (ARM): |
71 | #define HWCAP2_CRC32 (1 << 4) |
72 | |
73 | // copied from <asm/hwcap.h> (Aarch64) |
74 | #define HWCAP_CRC32 (1 << 7) |
75 | |
76 | // copied from <linux/auxvec.h> |
77 | #define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ |
78 | #define AT_HWCAP2 26 /* extension of AT_HWCAP */ |
79 | |
80 | #elif defined(Q_CC_GHS) |
81 | #include <INTEGRITY_types.h> |
82 | #endif |
83 | |
84 | QT_BEGIN_NAMESPACE |
85 | |
86 | /* |
87 | * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note |
88 | * we remove the terminating -1 that the script adds. |
89 | */ |
90 | |
91 | // begin generated |
92 | #if defined(Q_PROCESSOR_ARM) |
93 | /* Data: |
94 | neon |
95 | crc32 |
96 | */ |
97 | static const char features_string[] = |
98 | " neon\0" |
99 | " crc32\0" |
100 | "\0" ; |
101 | static const int features_indices[] = { 0, 6 }; |
102 | #elif defined(Q_PROCESSOR_MIPS) |
103 | /* Data: |
104 | dsp |
105 | dspr2 |
106 | */ |
107 | static const char features_string[] = |
108 | " dsp\0" |
109 | " dspr2\0" |
110 | "\0" ; |
111 | |
112 | static const int features_indices[] = { |
113 | 0, 5 |
114 | }; |
115 | #elif defined(Q_PROCESSOR_X86) |
116 | # include "qsimd_x86.cpp" // generated by util/x86simdgen |
117 | #else |
118 | static const char features_string[] = "" ; |
119 | static const int features_indices[] = { }; |
120 | #endif |
121 | // end generated |
122 | |
123 | #if defined (Q_OS_NACL) |
124 | static inline uint detectProcessorFeatures() |
125 | { |
126 | return 0; |
127 | } |
128 | #elif defined(Q_PROCESSOR_ARM) |
129 | static inline quint64 detectProcessorFeatures() |
130 | { |
131 | quint64 features = 0; |
132 | |
133 | #if defined(Q_OS_LINUX) |
134 | # if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64) |
135 | features |= CpuFeatureNEON; // NEON is always available on ARMv8 64bit. |
136 | # endif |
137 | int auxv = qt_safe_open("/proc/self/auxv" , O_RDONLY); |
138 | if (auxv != -1) { |
139 | unsigned long vector[64]; |
140 | int nread; |
141 | while (features == 0) { |
142 | nread = qt_safe_read(auxv, (char *)vector, sizeof vector); |
143 | if (nread <= 0) { |
144 | // EOF or error |
145 | break; |
146 | } |
147 | |
148 | int max = nread / (sizeof vector[0]); |
149 | for (int i = 0; i < max; i += 2) { |
150 | if (vector[i] == AT_HWCAP) { |
151 | # if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64) |
152 | // For Aarch64: |
153 | if (vector[i+1] & HWCAP_CRC32) |
154 | features |= CpuFeatureCRC32; |
155 | # endif |
156 | // Aarch32, or ARMv7 or before: |
157 | if (vector[i+1] & HWCAP_NEON) |
158 | features |= CpuFeatureNEON; |
159 | } |
160 | # if defined(Q_PROCESSOR_ARM_32) |
161 | // For Aarch32: |
162 | if (vector[i] == AT_HWCAP2) { |
163 | if (vector[i+1] & HWCAP2_CRC32) |
164 | features |= CpuFeatureCRC32; |
165 | } |
166 | # endif |
167 | } |
168 | } |
169 | |
170 | qt_safe_close(auxv); |
171 | return features; |
172 | } |
173 | // fall back if /proc/self/auxv wasn't found |
174 | #endif |
175 | |
176 | #if defined(__ARM_NEON__) |
177 | features |= CpuFeatureNEON; |
178 | #endif |
179 | #if defined(__ARM_FEATURE_CRC32) |
180 | features |= CpuFeatureCRC32; |
181 | #endif |
182 | |
183 | return features; |
184 | } |
185 | |
186 | #elif defined(Q_PROCESSOR_X86) |
187 | |
188 | #ifdef Q_PROCESSOR_X86_32 |
189 | # define PICreg "%%ebx" |
190 | #else |
191 | # define PICreg "%%rbx" |
192 | #endif |
193 | |
194 | static bool checkRdrndWorks() noexcept; |
195 | |
196 | static int maxBasicCpuidSupported() |
197 | { |
198 | #if defined(Q_CC_EMSCRIPTEN) |
199 | return 6; // All features supported by Emscripten |
200 | #elif defined(Q_CC_GNU) |
201 | qregisterint tmp1; |
202 | |
203 | # if Q_PROCESSOR_X86 < 5 |
204 | // check if the CPUID instruction is supported |
205 | long cpuid_supported; |
206 | asm ("pushf\n" |
207 | "pop %0\n" |
208 | "mov %0, %1\n" |
209 | "xor $0x00200000, %0\n" |
210 | "push %0\n" |
211 | "popf\n" |
212 | "pushf\n" |
213 | "pop %0\n" |
214 | "xor %1, %0\n" // %eax is now 0 if CPUID is not supported |
215 | : "=a" (cpuid_supported), "=r" (tmp1) |
216 | ); |
217 | if (!cpuid_supported) |
218 | return 0; |
219 | # endif |
220 | |
221 | int result; |
222 | asm ("xchg " PICreg", %1\n" |
223 | "cpuid\n" |
224 | "xchg " PICreg", %1\n" |
225 | : "=&a" (result), "=&r" (tmp1) |
226 | : "0" (0) |
227 | : "ecx" , "edx" ); |
228 | return result; |
229 | #elif defined(Q_OS_WIN) |
230 | // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0 |
231 | int info[4]; |
232 | __cpuid(info, 0); |
233 | return info[0]; |
234 | #elif defined(Q_CC_GHS) |
235 | unsigned int info[4]; |
236 | __CPUID(0, info); |
237 | return info[0]; |
238 | #else |
239 | return 0; |
240 | #endif |
241 | } |
242 | |
243 | static void cpuidFeatures01(uint &ecx, uint &edx) |
244 | { |
245 | #if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) |
246 | qregisterint tmp1; |
247 | asm ("xchg " PICreg", %2\n" |
248 | "cpuid\n" |
249 | "xchg " PICreg", %2\n" |
250 | : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1) |
251 | : "a" (1)); |
252 | #elif defined(Q_OS_WIN) |
253 | int info[4]; |
254 | __cpuid(info, 1); |
255 | ecx = info[2]; |
256 | edx = info[3]; |
257 | #elif defined(Q_CC_GHS) |
258 | unsigned int info[4]; |
259 | __CPUID(1, info); |
260 | ecx = info[2]; |
261 | edx = info[3]; |
262 | #else |
263 | Q_UNUSED(ecx); |
264 | Q_UNUSED(edx); |
265 | #endif |
266 | } |
267 | |
268 | #ifdef Q_OS_WIN |
269 | inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} |
270 | #endif |
271 | |
272 | static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx) |
273 | { |
274 | #if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) |
275 | qregisteruint rbx; // in case it's 64-bit |
276 | qregisteruint rcx = 0; |
277 | qregisteruint rdx = 0; |
278 | asm ("xchg " PICreg", %0\n" |
279 | "cpuid\n" |
280 | "xchg " PICreg", %0\n" |
281 | : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx) |
282 | : "a" (7)); |
283 | ebx = rbx; |
284 | ecx = rcx; |
285 | edx = rdx; |
286 | #elif defined(Q_OS_WIN) |
287 | int info[4]; |
288 | __cpuidex(info, 7, 0); |
289 | ebx = info[1]; |
290 | ecx = info[2]; |
291 | edx = info[3]; |
292 | #elif defined(Q_CC_GHS) |
293 | unsigned int info[4]; |
294 | __CPUIDEX(7, 0, info); |
295 | ebx = info[1]; |
296 | ecx = info[2]; |
297 | edx = info[3]; |
298 | #else |
299 | Q_UNUSED(ebx); |
300 | Q_UNUSED(ecx); |
301 | Q_UNUSED(edx); |
302 | #endif |
303 | } |
304 | |
305 | #if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS)) |
306 | // fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int); |
307 | inline quint64 _xgetbv(__int64) { return 0; } |
308 | #endif |
309 | static void xgetbv(uint in, uint &eax, uint &edx) |
310 | { |
311 | #if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS) |
312 | asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction |
313 | : "=a" (eax), "=d" (edx) |
314 | : "c" (in)); |
315 | #elif defined(Q_OS_WIN) |
316 | quint64 result = _xgetbv(in); |
317 | eax = result; |
318 | edx = result >> 32; |
319 | #else |
320 | Q_UNUSED(in); |
321 | Q_UNUSED(eax); |
322 | Q_UNUSED(edx); |
323 | #endif |
324 | } |
325 | |
326 | static quint64 detectProcessorFeatures() |
327 | { |
328 | // Flags from the CR0 / XCR0 state register |
329 | enum XCR0Flags { |
330 | X87 = 1 << 0, |
331 | XMM0_15 = 1 << 1, |
332 | YMM0_15Hi128 = 1 << 2, |
333 | BNDRegs = 1 << 3, |
334 | BNDCSR = 1 << 4, |
335 | OpMask = 1 << 5, |
336 | ZMM0_15Hi256 = 1 << 6, |
337 | ZMM16_31 = 1 << 7, |
338 | |
339 | SSEState = XMM0_15, |
340 | AVXState = XMM0_15 | YMM0_15Hi128, |
341 | AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31 |
342 | }; |
343 | static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512; |
344 | static const quint64 AllAVX = CpuFeatureAVX | AllAVX2; |
345 | |
346 | quint64 features = 0; |
347 | int cpuidLevel = maxBasicCpuidSupported(); |
348 | #if Q_PROCESSOR_X86 < 5 |
349 | if (cpuidLevel < 1) |
350 | return 0; |
351 | #else |
352 | Q_ASSERT(cpuidLevel >= 1); |
353 | #endif |
354 | |
355 | uint results[X86CpuidMaxLeaf] = {}; |
356 | cpuidFeatures01(ecx&: results[Leaf1ECX], edx&: results[Leaf1EDX]); |
357 | if (cpuidLevel >= 7) |
358 | cpuidFeatures07_00(ebx&: results[Leaf7_0EBX], ecx&: results[Leaf7_0ECX], edx&: results[Leaf7_0EDX]); |
359 | |
360 | // populate our feature list |
361 | for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) { |
362 | uint word = x86_locators[i] / 32; |
363 | uint bit = 1U << (x86_locators[i] % 32); |
364 | quint64 feature = Q_UINT64_C(1) << (i + 1); |
365 | if (results[word] & bit) |
366 | features |= feature; |
367 | } |
368 | |
369 | // now check the AVX state |
370 | uint xgetbvA = 0, xgetbvD = 0; |
371 | if (results[Leaf1ECX] & (1u << 27)) { |
372 | // XGETBV enabled |
373 | xgetbv(in: 0, eax&: xgetbvA, edx&: xgetbvD); |
374 | } |
375 | |
376 | if ((xgetbvA & AVXState) != AVXState) { |
377 | // support for YMM registers is disabled, disable all AVX |
378 | features &= ~AllAVX; |
379 | } else if ((xgetbvA & AVX512State) != AVX512State) { |
380 | // support for ZMM registers or mask registers is disabled, disable all AVX512 |
381 | features &= ~AllAVX512; |
382 | } |
383 | |
384 | if (features & CpuFeatureRDRND && !checkRdrndWorks()) |
385 | features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED); |
386 | |
387 | return features; |
388 | } |
389 | |
390 | #elif defined(Q_PROCESSOR_MIPS_32) |
391 | |
392 | #if defined(Q_OS_LINUX) |
393 | // |
394 | // Do not use QByteArray: it could use SIMD instructions itself at |
395 | // some point, thus creating a recursive dependency. Instead, use a |
396 | // QSimpleBuffer, which has the bare minimum needed to use memory |
397 | // dynamically and read lines from /proc/cpuinfo of arbitrary sizes. |
398 | // |
399 | struct QSimpleBuffer { |
400 | static const int chunk_size = 256; |
401 | char *data; |
402 | unsigned alloc; |
403 | unsigned size; |
404 | |
405 | QSimpleBuffer(): data(0), alloc(0), size(0) {} |
406 | ~QSimpleBuffer() { ::free(data); } |
407 | |
408 | void resize(unsigned newsize) { |
409 | if (newsize > alloc) { |
410 | unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1); |
411 | if (newalloc < newsize) newalloc = newsize; |
412 | if (newalloc != alloc) { |
413 | data = static_cast<char*>(::realloc(data, newalloc)); |
414 | alloc = newalloc; |
415 | } |
416 | } |
417 | size = newsize; |
418 | } |
419 | void append(const QSimpleBuffer &other, unsigned appendsize) { |
420 | unsigned oldsize = size; |
421 | resize(oldsize + appendsize); |
422 | ::memcpy(data + oldsize, other.data, appendsize); |
423 | } |
424 | void popleft(unsigned amount) { |
425 | if (amount >= size) return resize(0); |
426 | size -= amount; |
427 | ::memmove(data, data + amount, size); |
428 | } |
429 | char* cString() { |
430 | if (!alloc) resize(1); |
431 | return (data[size] = '\0', data); |
432 | } |
433 | }; |
434 | |
435 | // |
436 | // Uses a scratch "buffer" (which must be used for all reads done in the |
437 | // same file descriptor) to read chunks of data from a file, to read |
438 | // one line at a time. Lines include the trailing newline character ('\n'). |
439 | // On EOF, line.size is zero. |
440 | // |
441 | static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer) |
442 | { |
443 | for (;;) { |
444 | char *newline = static_cast<char*>(::memchr(buffer.data, '\n', buffer.size)); |
445 | if (newline) { |
446 | unsigned piece_size = newline - buffer.data + 1; |
447 | line.append(buffer, piece_size); |
448 | buffer.popleft(piece_size); |
449 | line.resize(line.size - 1); |
450 | return; |
451 | } |
452 | if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) { |
453 | int oldsize = buffer.size; |
454 | buffer.resize(buffer.size + QSimpleBuffer::chunk_size); |
455 | buffer.size = oldsize; |
456 | } |
457 | ssize_t read_bytes = ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size); |
458 | if (read_bytes > 0) buffer.size += read_bytes; |
459 | else return; |
460 | } |
461 | } |
462 | |
463 | // |
464 | // Checks if any line with a given prefix from /proc/cpuinfo contains |
465 | // a certain string, surrounded by spaces. |
466 | // |
467 | static bool procCpuinfoContains(const char *prefix, const char *string) |
468 | { |
469 | int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo" , O_RDONLY); |
470 | if (cpuinfo_fd == -1) |
471 | return false; |
472 | |
473 | unsigned string_len = ::strlen(string); |
474 | unsigned prefix_len = ::strlen(prefix); |
475 | QSimpleBuffer line, buffer; |
476 | bool present = false; |
477 | do { |
478 | line.resize(0); |
479 | bufReadLine(cpuinfo_fd, line, buffer); |
480 | char *colon = static_cast<char*>(::memchr(line.data, ':', line.size)); |
481 | if (colon && line.size > prefix_len + string_len) { |
482 | if (!::strncmp(prefix, line.data, prefix_len)) { |
483 | // prefix matches, next character must be ':' or space |
484 | if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) { |
485 | // Does it contain the string? |
486 | char *found = ::strstr(line.cString(), string); |
487 | if (found && ::isspace(found[-1]) && |
488 | (::isspace(found[string_len]) || found[string_len] == '\0')) { |
489 | present = true; |
490 | break; |
491 | } |
492 | } |
493 | } |
494 | } |
495 | } while (line.size); |
496 | |
497 | ::qt_safe_close(cpuinfo_fd); |
498 | return present; |
499 | } |
500 | #endif |
501 | |
502 | static inline quint64 detectProcessorFeatures() |
503 | { |
504 | // NOTE: MIPS 74K cores are the only ones supporting DSPr2. |
505 | quint64 flags = 0; |
506 | |
507 | #if defined __mips_dsp |
508 | flags |= CpuFeatureDSP; |
509 | # if defined __mips_dsp_rev && __mips_dsp_rev >= 2 |
510 | flags |= CpuFeatureDSPR2; |
511 | # elif defined(Q_OS_LINUX) |
512 | if (procCpuinfoContains("cpu model" , "MIPS 74Kc" ) || procCpuinfoContains("cpu model" , "MIPS 74Kf" )) |
513 | flags |= CpuFeatureDSPR2; |
514 | # endif |
515 | #elif defined(Q_OS_LINUX) |
516 | if (procCpuinfoContains("ASEs implemented" , "dsp" )) { |
517 | flags |= CpuFeatureDSP; |
518 | if (procCpuinfoContains("cpu model" , "MIPS 74Kc" ) || procCpuinfoContains("cpu model" , "MIPS 74Kf" )) |
519 | flags |= CpuFeatureDSPR2; |
520 | } |
521 | #endif |
522 | |
523 | return flags; |
524 | } |
525 | |
526 | #else |
527 | static inline uint detectProcessorFeatures() |
528 | { |
529 | return 0; |
530 | } |
531 | #endif |
532 | |
533 | static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]); |
534 | |
535 | // record what CPU features were enabled by default in this Qt build |
536 | static const quint64 minFeature = qCompilerCpuFeatures; |
537 | |
538 | #ifdef Q_ATOMIC_INT64_IS_SUPPORTED |
539 | Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) }; |
540 | #else |
541 | Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) }; |
542 | #endif |
543 | |
544 | quint64 qDetectCpuFeatures() |
545 | { |
546 | quint64 f = detectProcessorFeatures(); |
547 | QByteArray disable = qgetenv(varName: "QT_NO_CPU_FEATURE" ); |
548 | if (!disable.isEmpty()) { |
549 | disable.prepend(c: ' '); |
550 | for (int i = 0; i < features_count; ++i) { |
551 | if (disable.contains(c: features_string + features_indices[i])) |
552 | f &= ~(Q_UINT64_C(1) << i); |
553 | } |
554 | } |
555 | |
556 | #ifdef RUNNING_ON_VALGRIND |
557 | bool runningOnValgrind = RUNNING_ON_VALGRIND; |
558 | #else |
559 | bool runningOnValgrind = false; |
560 | #endif |
561 | if (Q_UNLIKELY(!runningOnValgrind && minFeature != 0 && (f & minFeature) != minFeature)) { |
562 | quint64 missing = minFeature & ~f; |
563 | fprintf(stderr, format: "Incompatible processor. This Qt build requires the following features:\n " ); |
564 | for (int i = 0; i < features_count; ++i) { |
565 | if (missing & (Q_UINT64_C(1) << i)) |
566 | fprintf(stderr, format: "%s" , features_string + features_indices[i]); |
567 | } |
568 | fprintf(stderr, format: "\n" ); |
569 | fflush(stderr); |
570 | qFatal(msg: "Aborted. Incompatible processor: missing feature 0x%llx -%s." , missing, |
571 | features_string + features_indices[qCountTrailingZeroBits(v: missing)]); |
572 | } |
573 | |
574 | qt_cpu_features[0].storeRelaxed(newValue: f | quint32(QSimdInitialized)); |
575 | #ifndef Q_ATOMIC_INT64_IS_SUPPORTED |
576 | qt_cpu_features[1].storeRelaxed(f >> 32); |
577 | #endif |
578 | return f; |
579 | } |
580 | |
581 | void qDumpCPUFeatures() |
582 | { |
583 | quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized); |
584 | printf(format: "Processor features: " ); |
585 | for (int i = 0; i < features_count; ++i) { |
586 | if (features & (Q_UINT64_C(1) << i)) |
587 | printf(format: "%s%s" , features_string + features_indices[i], |
588 | minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "" ); |
589 | } |
590 | if ((features = (qCompilerCpuFeatures & ~features))) { |
591 | printf(format: "\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:" ); |
592 | for (int i = 0; i < features_count; ++i) { |
593 | if (features & (Q_UINT64_C(1) << i)) |
594 | printf(format: "%s" , features_string + features_indices[i]); |
595 | } |
596 | printf(format: "\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!" ); |
597 | } |
598 | puts(s: "" ); |
599 | } |
600 | |
601 | #if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) |
602 | |
603 | # ifdef Q_PROCESSOR_X86_64 |
604 | # define _rdrandXX_step _rdrand64_step |
605 | # define _rdseedXX_step _rdseed64_step |
606 | # else |
607 | # define _rdrandXX_step _rdrand32_step |
608 | # define _rdseedXX_step _rdseed32_step |
609 | # endif |
610 | |
611 | // The parameter to _rdrand64_step & _rdseed64_step is unsigned long long for |
612 | // Clang and GCC but unsigned __int64 for MSVC and ICC, which is unsigned long |
613 | // long on Windows, but unsigned long on Linux. |
614 | namespace { |
615 | template <typename F> struct ; |
616 | template <typename T> struct <int (T *)> { using = T; }; |
617 | using randuint = ExtractParameter<decltype(_rdrandXX_step)>::Type; |
618 | } |
619 | |
620 | # if QT_COMPILER_SUPPORTS_HERE(RDSEED) |
621 | static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept |
622 | { |
623 | // Unlike for the RDRAND code below, the Intel whitepaper describing the |
624 | // use of the RDSEED instruction indicates we should not retry in a loop. |
625 | // If the independent bit generator used by RDSEED is out of entropy, it |
626 | // may take time to replenish. |
627 | // https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide |
628 | while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) { |
629 | if (_rdseedXX_step(p: reinterpret_cast<randuint *>(ptr)) == 0) |
630 | goto out; |
631 | ptr += sizeof(randuint)/sizeof(*ptr); |
632 | } |
633 | |
634 | if (sizeof(*ptr) != sizeof(randuint) && ptr != end) { |
635 | if (_rdseed32_step(p: ptr) == 0) |
636 | goto out; |
637 | ++ptr; |
638 | } |
639 | |
640 | out: |
641 | return ptr; |
642 | } |
643 | # else |
644 | static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *) |
645 | { |
646 | return ptr; |
647 | } |
648 | # endif |
649 | |
650 | static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept |
651 | { |
652 | int retries = 10; |
653 | while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) { |
654 | if (_rdrandXX_step(p: reinterpret_cast<randuint *>(ptr))) |
655 | ptr += sizeof(randuint)/sizeof(*ptr); |
656 | else if (--retries == 0) |
657 | goto out; |
658 | } |
659 | |
660 | while (sizeof(*ptr) != sizeof(randuint) && ptr != end) { |
661 | bool ok = _rdrand32_step(p: ptr); |
662 | if (!ok && --retries) |
663 | continue; |
664 | if (ok) |
665 | ++ptr; |
666 | break; |
667 | } |
668 | |
669 | out: |
670 | return ptr; |
671 | } |
672 | |
673 | static QT_FUNCTION_TARGET(RDRND) Q_DECL_COLD_FUNCTION bool checkRdrndWorks() noexcept |
674 | { |
675 | /* |
676 | * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a |
677 | * failing random generation instruction, which always returns |
678 | * 0xffffffff, even when generation was "successful". |
679 | * |
680 | * This code checks if hardware random generator generates four consecutive |
681 | * equal numbers. If it does, then we probably have a failing one and |
682 | * should disable it completely. |
683 | * |
684 | * https://bugreports.qt.io/browse/QTBUG-69423 |
685 | */ |
686 | constexpr qsizetype TestBufferSize = 4; |
687 | unsigned testBuffer[TestBufferSize] = {}; |
688 | |
689 | unsigned *end = qt_random_rdrnd(ptr: testBuffer, end: testBuffer + TestBufferSize); |
690 | if (end < testBuffer + 3) { |
691 | // Random generation didn't produce enough data for us to make a |
692 | // determination whether it's working or not. Assume it isn't, but |
693 | // don't print a warning. |
694 | return false; |
695 | } |
696 | |
697 | // Check the results for equality |
698 | if (testBuffer[0] == testBuffer[1] |
699 | && testBuffer[0] == testBuffer[2] |
700 | && (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) { |
701 | fprintf(stderr, format: "WARNING: CPU random generator seem to be failing, " |
702 | "disabling hardware random number generation\n" |
703 | "WARNING: RDRND generated:" ); |
704 | for (unsigned *ptr = testBuffer; ptr < end; ++ptr) |
705 | fprintf(stderr, format: " 0x%x" , *ptr); |
706 | fprintf(stderr, format: "\n" ); |
707 | return false; |
708 | } |
709 | |
710 | // We're good |
711 | return true; |
712 | } |
713 | |
714 | QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept |
715 | { |
716 | unsigned *ptr = reinterpret_cast<unsigned *>(buffer); |
717 | unsigned *end = ptr + count; |
718 | |
719 | if (qCpuHasFeature(RDSEED)) |
720 | ptr = qt_random_rdseed(ptr, end); |
721 | |
722 | // fill the buffer with RDRND if RDSEED didn't |
723 | ptr = qt_random_rdrnd(ptr, end); |
724 | return ptr - reinterpret_cast<unsigned *>(buffer); |
725 | } |
726 | #elif defined(Q_PROCESSOR_X86) && !defined(Q_OS_NACL) && !defined(Q_PROCESSOR_ARM) |
727 | static bool checkRdrndWorks() noexcept { return false; } |
728 | #endif // Q_PROCESSOR_X86 && RDRND |
729 | |
730 | QT_END_NAMESPACE |
731 | |