| 1 | // Copyright (C) 2021 The Qt Company Ltd. |
| 2 | // Copyright (C) 2022 Intel Corporation. |
| 3 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
| 4 | |
| 5 | #ifndef QSIMD_P_H |
| 6 | #define QSIMD_P_H |
| 7 | |
| 8 | // |
| 9 | // W A R N I N G |
| 10 | // ------------- |
| 11 | // |
| 12 | // This file is not part of the Qt API. It exists purely as an |
| 13 | // implementation detail. This header file may change from version to |
| 14 | // version without notice, or even be removed. |
| 15 | // |
| 16 | // We mean it. |
| 17 | // |
| 18 | |
| 19 | #include <QtCore/private/qglobal_p.h> |
| 20 | #include <QtCore/qsimd.h> |
| 21 | |
| 22 | QT_WARNING_PUSH |
| 23 | QT_WARNING_DISABLE_CLANG("-Wundef" ) |
| 24 | QT_WARNING_DISABLE_GCC("-Wundef" ) |
| 25 | QT_WARNING_DISABLE_INTEL(103) |
| 26 | |
| 27 | #define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ |
| 28 | for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i) |
| 29 | |
| 30 | #define ALIGNMENT_PROLOGUE_32BYTES(ptr, i, length) \ |
| 31 | for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((8 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x7)) & 0x7))); ++i) |
| 32 | |
| 33 | #define SIMD_EPILOGUE(i, length, max) \ |
| 34 | for (int _i = 0; _i < max && i < length; ++i, ++_i) |
| 35 | |
| 36 | /* |
| 37 | * Code can use the following constructs to determine compiler support & status: |
| 38 | * - #ifdef __XXX__ (e.g: #ifdef __AVX__ or #ifdef __ARM_NEON__) |
| 39 | * If this test passes, then the compiler is already generating code for that |
| 40 | * given sub-architecture. The intrinsics for that sub-architecture are |
| 41 | * #included and can be used without restriction or runtime check. |
| 42 | * |
| 43 | * - #if QT_COMPILER_SUPPORTS(XXX) |
| 44 | * If this test passes, then the compiler is able to generate code for that |
| 45 | * given sub-architecture in another translation unit, given the right set of |
| 46 | * flags. Use of the intrinsics is not guaranteed. This is useful with |
| 47 | * runtime detection (see below). |
| 48 | * |
| 49 | * - #if QT_COMPILER_SUPPORTS_HERE(XXX) |
| 50 | * If this test passes, then the compiler is able to generate code for that |
| 51 | * given sub-architecture in this translation unit, even if it is not doing |
| 52 | * that now (it might be). Individual functions may be tagged with |
| 53 | * QT_FUNCTION_TARGET(XXX) to cause the compiler to generate code for that |
| 54 | * sub-arch. Only inside such functions is the use of the intrisics |
| 55 | * guaranteed to work. This is useful with runtime detection (see below). |
| 56 | * |
| 57 | * The distinction between QT_COMPILER_SUPPORTS and QT_COMPILER_SUPPORTS_HERE is |
| 58 | * historical: GCC 4.8 needed the distinction. |
| 59 | * |
| 60 | * Runtime detection of a CPU sub-architecture can be done with the |
| 61 | * qCpuHasFeature(XXX) function. There are two strategies for generating |
| 62 | * optimized code like that: |
| 63 | * |
| 64 | * 1) place the optimized code in a different translation unit (C or assembly |
| 65 | * sources) and pass the correct flags to the compiler to enable support. Those |
| 66 | * sources must not include qglobal.h, which means they cannot include this |
| 67 | * file either. The dispatcher function would look like this: |
| 68 | * |
| 69 | * void foo() |
| 70 | * { |
| 71 | * #if QT_COMPILER_SUPPORTS(XXX) |
| 72 | * if (qCpuHasFeature(XXX)) { |
| 73 | * foo_optimized_xxx(); |
| 74 | * return; |
| 75 | * } |
| 76 | * #endif |
| 77 | * foo_plain(); |
| 78 | * } |
| 79 | * |
| 80 | * 2) place the optimized code in a function tagged with QT_FUNCTION_TARGET and |
| 81 | * surrounded by #if QT_COMPILER_SUPPORTS_HERE(XXX). That code can freely use |
| 82 | * other Qt code. The dispatcher function would look like this: |
| 83 | * |
| 84 | * void foo() |
| 85 | * { |
| 86 | * #if QT_COMPILER_SUPPORTS_HERE(XXX) |
| 87 | * if (qCpuHasFeature(XXX)) { |
| 88 | * foo_optimized_xxx(); |
| 89 | * return; |
| 90 | * } |
| 91 | * #endif |
| 92 | * foo_plain(); |
| 93 | * } |
| 94 | */ |
| 95 | |
| 96 | #if defined(__MINGW64_VERSION_MAJOR) || defined(Q_CC_MSVC) |
| 97 | #include <intrin.h> |
| 98 | #endif |
| 99 | |
| 100 | #define QT_COMPILER_SUPPORTS(x) (QT_COMPILER_SUPPORTS_ ## x - 0) |
| 101 | |
| 102 | #if defined(Q_PROCESSOR_ARM) |
| 103 | # define QT_COMPILER_SUPPORTS_HERE(x) ((__ARM_FEATURE_ ## x) || (__ ## x ## __) || QT_COMPILER_SUPPORTS(x)) |
| 104 | # if defined(Q_CC_GNU) |
| 105 | /* GCC requires attributes for a function */ |
| 106 | # define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x))) |
| 107 | # else |
| 108 | # define QT_FUNCTION_TARGET(x) |
| 109 | # endif |
| 110 | #elif defined(Q_PROCESSOR_MIPS) |
| 111 | # define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __) |
| 112 | # define QT_FUNCTION_TARGET(x) |
| 113 | # if !defined(__MIPS_DSP__) && defined(__mips_dsp) && defined(Q_PROCESSOR_MIPS_32) |
| 114 | # define __MIPS_DSP__ |
| 115 | # endif |
| 116 | # if !defined(__MIPS_DSPR2__) && defined(__mips_dspr2) && defined(Q_PROCESSOR_MIPS_32) |
| 117 | # define __MIPS_DSPR2__ |
| 118 | # endif |
| 119 | #elif defined(Q_PROCESSOR_X86) |
| 120 | # if defined(Q_CC_CLANG) && defined(Q_CC_MSVC) |
| 121 | # define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __) |
| 122 | # else |
| 123 | # define QT_COMPILER_SUPPORTS_HERE(x) ((__ ## x ## __) || QT_COMPILER_SUPPORTS(x)) |
| 124 | # endif |
| 125 | # if defined(Q_CC_GNU) |
| 126 | /* GCC requires attributes for a function */ |
| 127 | # define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x))) |
| 128 | # else |
| 129 | # define QT_FUNCTION_TARGET(x) |
| 130 | # endif |
| 131 | #else |
| 132 | # define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __) |
| 133 | # define QT_FUNCTION_TARGET(x) |
| 134 | #endif |
| 135 | |
| 136 | #if defined(__SSE2__) && !defined(QT_COMPILER_SUPPORTS_SSE2) && !defined(QT_BOOTSTRAPPED) |
| 137 | // Intrinsic support appears to be missing, so pretend these features don't exist |
| 138 | # undef __SSE__ |
| 139 | # undef __SSE2__ |
| 140 | # undef __SSE3__ |
| 141 | # undef __SSSE3__ |
| 142 | # undef __SSE4_1__ |
| 143 | # undef __SSE4_2__ |
| 144 | # undef __AES__ |
| 145 | # undef __POPCNT__ |
| 146 | # undef __AVX__ |
| 147 | # undef __F16C__ |
| 148 | # undef __RDRND__ |
| 149 | # undef __AVX2__ |
| 150 | # undef __BMI__ |
| 151 | # undef __BMI2__ |
| 152 | # undef __FMA__ |
| 153 | # undef __MOVBE__ |
| 154 | # undef __RDSEED__ |
| 155 | # undef __AVX512F__ |
| 156 | # undef __AVX512ER__ |
| 157 | # undef __AVX512CD__ |
| 158 | # undef __AVX512PF__ |
| 159 | # undef __AVX512DQ__ |
| 160 | # undef __AVX512BW__ |
| 161 | # undef __AVX512VL__ |
| 162 | # undef __AVX512IFMA__ |
| 163 | # undef __AVX512VBMI__ |
| 164 | # undef __SHA__ |
| 165 | # undef __AVX512VBMI2__ |
| 166 | # undef __AVX512BITALG__ |
| 167 | # undef __AVX512VNNI__ |
| 168 | # undef __AVX512VPOPCNTDQ__ |
| 169 | # undef __GFNI__ |
| 170 | # undef __VAES__ |
| 171 | #endif |
| 172 | |
| 173 | #ifdef Q_PROCESSOR_X86 |
| 174 | /* -- x86 intrinsic support -- */ |
| 175 | |
| 176 | # if defined(QT_COMPILER_SUPPORTS_RDSEED) && defined(Q_OS_QNX) |
| 177 | // The compiler for QNX is missing the intrinsic |
| 178 | # undef QT_COMPILER_SUPPORTS_RDSEED |
| 179 | # endif |
| 180 | # if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2) |
| 181 | // MSVC doesn't define __SSE2__, so do it ourselves |
| 182 | # define __SSE__ 1 |
| 183 | # endif |
| 184 | |
| 185 | # if defined(Q_OS_WIN) && defined(Q_CC_GNU) && !defined(Q_CC_CLANG) |
| 186 | // 64-bit GCC on Windows does not support AVX, so we hack around it by forcing |
| 187 | // it to emit unaligned loads & stores |
| 188 | // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49001 |
| 189 | asm( |
| 190 | ".macro vmovapd args:vararg\n" |
| 191 | " vmovupd \\args\n" |
| 192 | ".endm\n" |
| 193 | ".macro vmovaps args:vararg\n" |
| 194 | " vmovups \\args\n" |
| 195 | ".endm\n" |
| 196 | ".macro vmovdqa args:vararg\n" |
| 197 | " vmovdqu \\args\n" |
| 198 | ".endm\n" |
| 199 | ".macro vmovdqa32 args:vararg\n" |
| 200 | " vmovdqu32 \\args\n" |
| 201 | ".endm\n" |
| 202 | ".macro vmovdqa64 args:vararg\n" |
| 203 | " vmovdqu64 \\args\n" |
| 204 | ".endm\n" |
| 205 | ); |
| 206 | # endif |
| 207 | |
| 208 | # if defined(Q_CC_GNU) && !defined(Q_OS_WASM) |
| 209 | // GCC 4.4 and Clang 2.8 added a few more intrinsics there |
| 210 | # include <x86intrin.h> |
| 211 | # endif |
| 212 | #ifdef Q_OS_WASM |
| 213 | # include <immintrin.h> |
| 214 | # endif |
| 215 | |
| 216 | # include <QtCore/private/qsimd_x86_p.h> |
| 217 | |
| 218 | // x86-64 sub-architecture version 3 |
| 219 | // |
| 220 | // The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2, |
| 221 | // BMI1, BMI2, FMA, LZCNT, MOVBE. This feature set was chosen as the version 3 |
| 222 | // of the x86-64 ISA (x86-64-v3) and is supported by GCC and Clang. On systems |
| 223 | // with the GNU libc, libraries with this feature can be installed on a |
| 224 | // "glibc-hwcaps/x86-64-v3" subdir. macOS's fat binaries support the "x86_64h" |
| 225 | // sub-architecture too. |
| 226 | |
| 227 | # if defined(__AVX2__) |
| 228 | // List of features present with -march=x86-64-v3 and not architecturally |
| 229 | // implied by __AVX2__ |
| 230 | # define ARCH_HASWELL_MACROS \ |
| 231 | (__AVX2__ + __BMI__ + __BMI2__ + __F16C__ + __FMA__ + __LZCNT__ + __POPCNT__) |
| 232 | # if ARCH_HASWELL_MACROS != 7 |
| 233 | # error "Please enable all x86-64-v3 extensions; you probably want to use -march=haswell or -march=x86-64-v3 instead of -mavx2" |
| 234 | # endif |
| 235 | static_assert(ARCH_HASWELL_MACROS, "Undeclared identifiers indicate which features are missing." ); |
| 236 | # define __haswell__ 1 |
| 237 | # undef ARCH_HASWELL_MACROS |
| 238 | # endif |
| 239 | |
| 240 | // x86-64 sub-architecture version 4 |
| 241 | // |
| 242 | // Similar to the above, x86-64-v4 matches the AVX512 variant of the Intel Core |
| 243 | // 6th generation (codename "Skylake"). AMD Zen4 is the their first processor |
| 244 | // with AVX512 support and it includes all of these too. The GNU libc subdir for |
| 245 | // this is "glibc-hwcaps/x86-64-v4". |
| 246 | // |
| 247 | # define ARCH_SKX_MACROS (__AVX512F__ + __AVX512BW__ + __AVX512CD__ + __AVX512DQ__ + __AVX512VL__) |
| 248 | # if ARCH_SKX_MACROS != 0 |
| 249 | # if ARCH_SKX_MACROS != 5 |
| 250 | # error "Please enable all x86-64-v4 extensions; you probably want to use -march=skylake-avx512 or -march=x86-64-v4 instead of -mavx512f" |
| 251 | # endif |
| 252 | static_assert(ARCH_SKX_MACROS, "Undeclared identifiers indicate which features are missing." ); |
| 253 | # define __skylake_avx512__ 1 |
| 254 | # endif |
| 255 | # undef ARCH_SKX_MACROS |
| 256 | #endif /* Q_PROCESSOR_X86 */ |
| 257 | |
| 258 | // NEON intrinsics |
| 259 | // note: as of GCC 4.9, does not support function targets for ARM |
| 260 | #if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(_M_ARM64) |
| 261 | #if defined(Q_CC_CLANG) |
| 262 | #define QT_FUNCTION_TARGET_STRING_NEON "neon" |
| 263 | #else |
| 264 | #define QT_FUNCTION_TARGET_STRING_NEON "+neon" // unused: gcc doesn't support function targets on non-aarch64, and on Aarch64 NEON is always available. |
| 265 | #endif |
| 266 | #ifndef __ARM_NEON__ |
| 267 | // __ARM_NEON__ is not defined on AArch64, but we need it in our NEON detection. |
| 268 | #define __ARM_NEON__ |
| 269 | #endif |
| 270 | |
| 271 | #ifndef Q_PROCESSOR_ARM_64 // vaddv is only available on Aarch64 |
| 272 | inline uint16_t vaddvq_u16(uint16x8_t v8) |
| 273 | { |
| 274 | const uint64x2_t v2 = vpaddlq_u32(vpaddlq_u16(v8)); |
| 275 | const uint64x1_t v1 = vadd_u64(vget_low_u64(v2), vget_high_u64(v2)); |
| 276 | return vget_lane_u16(vreinterpret_u16_u64(v1), 0); |
| 277 | } |
| 278 | |
| 279 | inline uint8_t vaddv_u8(uint8x8_t v8) |
| 280 | { |
| 281 | const uint64x1_t v1 = vpaddl_u32(vpaddl_u16(vpaddl_u8(v8))); |
| 282 | return vget_lane_u8(vreinterpret_u8_u64(v1), 0); |
| 283 | } |
| 284 | #endif |
| 285 | |
| 286 | // Missing NEON intrinsics, needed due different type definitions: |
| 287 | inline uint16x8_t qvsetq_n_u16(uint16_t v1, uint16_t v2, uint16_t v3, uint16_t v4, |
| 288 | uint16_t v5, uint16_t v6, uint16_t v7, uint16_t v8) { |
| 289 | #if defined(Q_CC_MSVC) && !defined(Q_CC_CLANG) |
| 290 | using u64 = uint64_t; |
| 291 | const uint16x8_t vmask = { |
| 292 | v1 | (v2 << 16) | (u64(v3) << 32) | (u64(v4) << 48), |
| 293 | v5 | (v6 << 16) | (u64(v7) << 32) | (u64(v8) << 48) |
| 294 | }; |
| 295 | #else |
| 296 | const uint16x8_t vmask = { v1, v2, v3, v4, v5, v6, v7, v8 }; |
| 297 | #endif |
| 298 | return vmask; |
| 299 | } |
| 300 | inline uint8x8_t qvset_n_u8(uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, |
| 301 | uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8) { |
| 302 | #if defined(Q_CC_MSVC) && !defined(Q_CC_CLANG) |
| 303 | using u64 = uint64_t; |
| 304 | const uint8x8_t vmask = { |
| 305 | v1 | (v2 << 8) | (v3 << 16) | (v4 << 24) | |
| 306 | (u64(v5) << 32) | (u64(v6) << 40) | (u64(v7) << 48) | (u64(v8) << 56) |
| 307 | }; |
| 308 | #else |
| 309 | const uint8x8_t vmask = { v1, v2, v3, v4, v5, v6, v7, v8 }; |
| 310 | #endif |
| 311 | return vmask; |
| 312 | } |
| 313 | inline uint8x16_t qvsetq_n_u8(uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, |
| 314 | uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, |
| 315 | uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, |
| 316 | uint8_t v13, uint8_t v14, uint8_t v15, uint8_t v16) { |
| 317 | #if defined(Q_CC_MSVC) && !defined(Q_CC_CLANG) |
| 318 | using u64 = uint64_t; |
| 319 | const uint8x16_t vmask = { |
| 320 | v1 | (v2 << 8) | (v3 << 16) | (v4 << 24) | |
| 321 | (u64(v5) << 32) | (u64(v6) << 40) | (u64(v7) << 48) | (u64(v8) << 56), |
| 322 | v9 | (v10 << 8) | (v11 << 16) | (v12 << 24) | |
| 323 | (u64(v13) << 32) | (u64(v14) << 40) | (u64(v15) << 48) | (u64(v16) << 56) |
| 324 | }; |
| 325 | #else |
| 326 | const uint8x16_t vmask = { v1, v2, v3, v4, v5, v6, v7, v8, |
| 327 | v9, v10, v11, v12, v13, v14, v15, v16}; |
| 328 | #endif |
| 329 | return vmask; |
| 330 | } |
| 331 | inline uint32x4_t qvsetq_n_u32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) |
| 332 | { |
| 333 | #if defined(Q_CC_MSVC) && !defined(Q_CC_CLANG) |
| 334 | return uint32x4_t{ (uint64_t(b) << 32) | a, (uint64_t(d) << 32) | c }; |
| 335 | #else |
| 336 | return uint32x4_t{ a, b, c, d }; |
| 337 | #endif |
| 338 | } |
| 339 | #endif |
| 340 | |
| 341 | #if defined(_M_ARM64) && __ARM_ARCH >= 800 |
| 342 | #define __ARM_FEATURE_CRYPTO 1 |
| 343 | #define __ARM_FEATURE_CRC32 1 |
| 344 | #endif |
| 345 | |
| 346 | #if defined(Q_PROCESSOR_ARM_64) |
| 347 | #if defined(Q_CC_CLANG) |
| 348 | #define QT_FUNCTION_TARGET_STRING_AES "crypto" |
| 349 | #define QT_FUNCTION_TARGET_STRING_CRC32 "crc" |
| 350 | #elif defined(Q_CC_GNU) |
| 351 | #define QT_FUNCTION_TARGET_STRING_AES "+crypto" |
| 352 | #define QT_FUNCTION_TARGET_STRING_CRC32 "+crc" |
| 353 | #elif defined(Q_CC_MSVC) |
| 354 | #define QT_FUNCTION_TARGET_STRING_AES |
| 355 | #define QT_FUNCTION_TARGET_STRING_CRC32 |
| 356 | #endif |
| 357 | #elif defined(Q_PROCESSOR_ARM_32) |
| 358 | #if defined(Q_CC_CLANG) |
| 359 | #define QT_FUNCTION_TARGET_STRING_AES "armv8-a,crypto" |
| 360 | #define QT_FUNCTION_TARGET_STRING_CRC32 "armv8-a,crc" |
| 361 | #elif defined(Q_CC_GNU) |
| 362 | #define QT_FUNCTION_TARGET_STRING_AES "arch=armv8-a+crypto" |
| 363 | #define QT_FUNCTION_TARGET_STRING_CRC32 "arch=armv8-a+crc" |
| 364 | #endif |
| 365 | #endif |
| 366 | |
| 367 | #ifndef Q_PROCESSOR_X86 |
| 368 | enum CPUFeatures { |
| 369 | #if defined(Q_PROCESSOR_ARM) |
| 370 | CpuFeatureNEON = 2, |
| 371 | CpuFeatureARM_NEON = CpuFeatureNEON, |
| 372 | CpuFeatureCRC32 = 4, |
| 373 | CpuFeatureAES = 8, |
| 374 | CpuFeatureARM_CRYPTO = CpuFeatureAES, |
| 375 | #elif defined(Q_PROCESSOR_MIPS) |
| 376 | CpuFeatureDSP = 2, |
| 377 | CpuFeatureDSPR2 = 4, |
| 378 | #endif |
| 379 | }; |
| 380 | |
| 381 | static const uint64_t qCompilerCpuFeatures = 0 |
| 382 | #if defined __ARM_NEON__ |
| 383 | | CpuFeatureNEON |
| 384 | #endif |
| 385 | #if !(defined(Q_OS_LINUX) && defined(Q_PROCESSOR_ARM_64)) |
| 386 | // Yocto Project recipes enable Crypto extension for all ARMv8 configs, |
| 387 | // even for targets without the Crypto extension. That's wrong, but as |
| 388 | // the compiler never generates the code for them on their own, most |
| 389 | // code never notices the problem. But we would. By not setting the |
| 390 | // bits here, we force a runtime detection. |
| 391 | #if defined __ARM_FEATURE_CRC32 |
| 392 | | CpuFeatureCRC32 |
| 393 | #endif |
| 394 | #if defined __ARM_FEATURE_CRYPTO |
| 395 | | CpuFeatureAES |
| 396 | #endif |
| 397 | #endif // Q_OS_LINUX && Q_PROCESSOR_ARM64 |
| 398 | #if defined __mips_dsp |
| 399 | | CpuFeatureDSP |
| 400 | #endif |
| 401 | #if defined __mips_dspr2 |
| 402 | | CpuFeatureDSPR2 |
| 403 | #endif |
| 404 | ; |
| 405 | #endif |
| 406 | |
| 407 | #ifdef __cplusplus |
| 408 | # include <atomic> |
| 409 | # define Q_ATOMIC(T) std::atomic<T> |
| 410 | QT_BEGIN_NAMESPACE |
| 411 | using std::atomic_load_explicit; |
| 412 | static constexpr auto memory_order_relaxed = std::memory_order_relaxed; |
| 413 | extern "C" { |
| 414 | #else |
| 415 | # include <stdatomic.h> |
| 416 | # define Q_ATOMIC(T) _Atomic(T) |
| 417 | #endif |
| 418 | |
| 419 | #ifdef Q_PROCESSOR_X86 |
| 420 | typedef uint64_t QCpuFeatureType; |
| 421 | static const QCpuFeatureType qCompilerCpuFeatures = _compilerCpuFeatures; |
| 422 | static const QCpuFeatureType CpuFeatureArchHaswell = cpu_haswell; |
| 423 | static const QCpuFeatureType CpuFeatureArchSkylakeAvx512 = cpu_skylake_avx512; |
| 424 | #else |
| 425 | typedef unsigned QCpuFeatureType; |
| 426 | #endif |
| 427 | extern Q_CORE_EXPORT Q_ATOMIC(QCpuFeatureType) QT_MANGLE_NAMESPACE(qt_cpu_features)[1]; |
| 428 | Q_CORE_EXPORT uint64_t QT_MANGLE_NAMESPACE(qDetectCpuFeatures)(); |
| 429 | |
| 430 | static inline uint64_t qCpuFeatures() |
| 431 | { |
| 432 | #ifdef QT_BOOTSTRAPPED |
| 433 | return qCompilerCpuFeatures; // no detection |
| 434 | #else |
| 435 | quint64 features = atomic_load_explicit(QT_MANGLE_NAMESPACE(qt_cpu_features), m: memory_order_relaxed); |
| 436 | if (!QT_SUPPORTS_INIT_PRIORITY) { |
| 437 | if (Q_UNLIKELY(features == 0)) |
| 438 | features = QT_MANGLE_NAMESPACE(qDetectCpuFeatures)(); |
| 439 | } |
| 440 | return features; |
| 441 | #endif |
| 442 | } |
| 443 | |
| 444 | #define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \ |
| 445 | || ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature)) |
| 446 | |
| 447 | #ifdef __cplusplus |
| 448 | } // extern "C" |
| 449 | |
| 450 | # if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) && !defined(QT_BOOTSTRAPPED) |
| 451 | Q_CORE_EXPORT qsizetype qRandomCpu(void *, qsizetype) noexcept; |
| 452 | |
| 453 | static inline bool qHasHwrng() |
| 454 | { |
| 455 | return qCpuHasFeature(RDRND); |
| 456 | } |
| 457 | # else |
| 458 | static inline qsizetype qRandomCpu(void *, qsizetype) noexcept |
| 459 | { |
| 460 | return 0; |
| 461 | } |
| 462 | static inline bool qHasHwrng() |
| 463 | { |
| 464 | return false; |
| 465 | } |
| 466 | # endif |
| 467 | |
| 468 | QT_END_NAMESPACE |
| 469 | |
| 470 | #endif // __cplusplus |
| 471 | |
| 472 | QT_WARNING_POP |
| 473 | |
| 474 | #endif // QSIMD_P_H |
| 475 | |