1/* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2024 Free Software Foundation, Inc.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <dl-hwcap.h>
20#include <libc-pointer-arith.h>
21#include <isa-level.h>
22#include <get-isa-level.h>
23#include <cacheinfo.h>
24#include <dl-cacheinfo.h>
25#include <dl-minsigstacksize.h>
26#include <dl-hwcap2.h>
27#include <gcc-macros.h>
28
29extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
30 attribute_hidden;
31
32#if defined SHARED
33extern void _dl_tlsdesc_dynamic_fxsave (void) attribute_hidden;
34extern void _dl_tlsdesc_dynamic_xsave (void) attribute_hidden;
35extern void _dl_tlsdesc_dynamic_xsavec (void) attribute_hidden;
36
37# ifdef __x86_64__
38# include <dl-plt-rewrite.h>
39
40static void
41TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
42{
43 /* We must be careful about where we put the call to
44 dl_plt_rewrite_supported() since it may generate
45 spurious SELinux log entries. It should only be
46 attempted if the user requested a PLT rewrite. */
47 if (valp->numval != 0 && dl_plt_rewrite_supported ())
48 {
49 /* Use JMPABS only on APX processors. */
50 const struct cpu_features *cpu_features = __get_cpu_features ();
51 GL (dl_x86_feature_control).plt_rewrite
52 = ((valp->numval > 1 && CPU_FEATURE_PRESENT_P (cpu_features, APX_F))
53 ? plt_rewrite_jmpabs
54 : plt_rewrite_jmp);
55 }
56}
57# else
58extern void _dl_tlsdesc_dynamic_fnsave (void) attribute_hidden;
59# endif
60#endif
61
62#ifdef __x86_64__
63extern void _dl_runtime_resolve_fxsave (void) attribute_hidden;
64extern void _dl_runtime_resolve_xsave (void) attribute_hidden;
65extern void _dl_runtime_resolve_xsavec (void) attribute_hidden;
66#endif
67
68#ifdef __LP64__
69static void
70TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
71{
72 if (valp->numval)
73 GLRO(dl_x86_cpu_features).preferred[index_arch_Prefer_MAP_32BIT_EXEC]
74 |= bit_arch_Prefer_MAP_32BIT_EXEC;
75}
76#endif
77
78#if CET_ENABLED
79extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
80 attribute_hidden;
81extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
82 attribute_hidden;
83
84# include <dl-cet.h>
85#endif
86
87unsigned long int _dl_x86_features_tlsdesc_state_size;
88
89static void
90update_active (struct cpu_features *cpu_features)
91{
92 /* Copy the cpuid bits to active bits for CPU featuress whose usability
93 in user space can be detected without additional OS support. */
94 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE3);
95 CPU_FEATURE_SET_ACTIVE (cpu_features, PCLMULQDQ);
96 CPU_FEATURE_SET_ACTIVE (cpu_features, SSSE3);
97 CPU_FEATURE_SET_ACTIVE (cpu_features, CMPXCHG16B);
98 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_1);
99 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_2);
100 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVBE);
101 CPU_FEATURE_SET_ACTIVE (cpu_features, POPCNT);
102 CPU_FEATURE_SET_ACTIVE (cpu_features, AES);
103 CPU_FEATURE_SET_ACTIVE (cpu_features, OSXSAVE);
104 CPU_FEATURE_SET_ACTIVE (cpu_features, TSC);
105 CPU_FEATURE_SET_ACTIVE (cpu_features, CX8);
106 CPU_FEATURE_SET_ACTIVE (cpu_features, CMOV);
107 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFSH);
108 CPU_FEATURE_SET_ACTIVE (cpu_features, MMX);
109 CPU_FEATURE_SET_ACTIVE (cpu_features, FXSR);
110 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE);
111 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE2);
112 CPU_FEATURE_SET_ACTIVE (cpu_features, HTT);
113 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI1);
114 CPU_FEATURE_SET_ACTIVE (cpu_features, HLE);
115 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI2);
116 CPU_FEATURE_SET_ACTIVE (cpu_features, ERMS);
117 CPU_FEATURE_SET_ACTIVE (cpu_features, RDSEED);
118 CPU_FEATURE_SET_ACTIVE (cpu_features, ADX);
119 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFLUSHOPT);
120 CPU_FEATURE_SET_ACTIVE (cpu_features, CLWB);
121 CPU_FEATURE_SET_ACTIVE (cpu_features, SHA);
122 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHWT1);
123 CPU_FEATURE_SET_ACTIVE (cpu_features, OSPKE);
124 CPU_FEATURE_SET_ACTIVE (cpu_features, WAITPKG);
125 CPU_FEATURE_SET_ACTIVE (cpu_features, GFNI);
126 CPU_FEATURE_SET_ACTIVE (cpu_features, RDPID);
127 CPU_FEATURE_SET_ACTIVE (cpu_features, RDRAND);
128 CPU_FEATURE_SET_ACTIVE (cpu_features, CLDEMOTE);
129 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIRI);
130 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIR64B);
131 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRM);
132 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM_ALWAYS_ABORT);
133 CPU_FEATURE_SET_ACTIVE (cpu_features, SERIALIZE);
134 CPU_FEATURE_SET_ACTIVE (cpu_features, TSXLDTRK);
135 CPU_FEATURE_SET_ACTIVE (cpu_features, LAHF64_SAHF64);
136 CPU_FEATURE_SET_ACTIVE (cpu_features, LZCNT);
137 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4A);
138 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHW);
139 CPU_FEATURE_SET_ACTIVE (cpu_features, TBM);
140 CPU_FEATURE_SET_ACTIVE (cpu_features, RDTSCP);
141 CPU_FEATURE_SET_ACTIVE (cpu_features, WBNOINVD);
142 CPU_FEATURE_SET_ACTIVE (cpu_features, RAO_INT);
143 CPU_FEATURE_SET_ACTIVE (cpu_features, CMPCCXADD);
144 CPU_FEATURE_SET_ACTIVE (cpu_features, FZLRM);
145 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRS);
146 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRCS);
147 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHI);
148 CPU_FEATURE_SET_ACTIVE (cpu_features, PTWRITE);
149
150 if (!CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
151 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM);
152
153#if CET_ENABLED && 0
154 CPU_FEATURE_SET_ACTIVE (cpu_features, IBT);
155 CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
156#endif
157
158 enum
159 {
160 os_xmm = 1,
161 os_ymm = 2,
162 os_zmm = 4
163 } os_vector_size = os_xmm;
164 /* Can we call xgetbv? */
165 if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
166 {
167 unsigned int xcrlow;
168 unsigned int xcrhigh;
169 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10);
170 asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
171 /* Is YMM and XMM state usable? */
172 if ((xcrlow & (bit_YMM_state | bit_XMM_state))
173 == (bit_YMM_state | bit_XMM_state))
174 {
175 /* Determine if AVX is usable. */
176 if (CPU_FEATURES_CPU_P (cpu_features, AVX))
177 {
178 os_vector_size |= os_ymm;
179 CPU_FEATURE_SET (cpu_features, AVX);
180 /* The following features depend on AVX being usable. */
181 /* Determine if AVX2 is usable. */
182 if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
183 {
184 CPU_FEATURE_SET (cpu_features, AVX2);
185
186 /* Unaligned load with 256-bit AVX registers are faster
187 on Intel/AMD processors with AVX2. */
188 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
189 |= bit_arch_AVX_Fast_Unaligned_Load;
190 }
191 /* Determine if AVX-IFMA is usable. */
192 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_IFMA);
193 /* Determine if AVX-NE-CONVERT is usable. */
194 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_NE_CONVERT);
195 /* Determine if AVX-VNNI is usable. */
196 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI);
197 /* Determine if AVX-VNNI-INT8 is usable. */
198 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI_INT8);
199 /* Determine if FMA is usable. */
200 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA);
201 /* Determine if VAES is usable. */
202 CPU_FEATURE_SET_ACTIVE (cpu_features, VAES);
203 /* Determine if VPCLMULQDQ is usable. */
204 CPU_FEATURE_SET_ACTIVE (cpu_features, VPCLMULQDQ);
205 /* Determine if XOP is usable. */
206 CPU_FEATURE_SET_ACTIVE (cpu_features, XOP);
207 /* Determine if F16C is usable. */
208 CPU_FEATURE_SET_ACTIVE (cpu_features, F16C);
209 }
210
211 /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
212 ZMM16-ZMM31 state are enabled. */
213 if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
214 | bit_ZMM16_31_state))
215 == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
216 {
217 os_vector_size |= os_zmm;
218 /* Determine if AVX512F is usable. */
219 if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
220 {
221 CPU_FEATURE_SET (cpu_features, AVX512F);
222 /* Determine if AVX512CD is usable. */
223 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512CD);
224 /* Determine if AVX512ER is usable. */
225 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512ER);
226 /* Determine if AVX512PF is usable. */
227 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512PF);
228 /* Determine if AVX512VL is usable. */
229 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512VL);
230 /* Determine if AVX512DQ is usable. */
231 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512DQ);
232 /* Determine if AVX512BW is usable. */
233 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512BW);
234 /* Determine if AVX512_4FMAPS is usable. */
235 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4FMAPS);
236 /* Determine if AVX512_4VNNIW is usable. */
237 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4VNNIW);
238 /* Determine if AVX512_BITALG is usable. */
239 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BITALG);
240 /* Determine if AVX512_IFMA is usable. */
241 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_IFMA);
242 /* Determine if AVX512_VBMI is usable. */
243 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI);
244 /* Determine if AVX512_VBMI2 is usable. */
245 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI2);
246 /* Determine if is AVX512_VNNI usable. */
247 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VNNI);
248 /* Determine if AVX512_VPOPCNTDQ is usable. */
249 CPU_FEATURE_SET_ACTIVE (cpu_features,
250 AVX512_VPOPCNTDQ);
251 /* Determine if AVX512_VP2INTERSECT is usable. */
252 CPU_FEATURE_SET_ACTIVE (cpu_features,
253 AVX512_VP2INTERSECT);
254 /* Determine if AVX512_BF16 is usable. */
255 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BF16);
256 /* Determine if AVX512_FP16 is usable. */
257 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_FP16);
258 }
259 }
260 }
261
262 if (CPU_FEATURES_CPU_P (cpu_features, AVX10)
263 && cpu_features->basic.max_cpuid >= 0x24)
264 {
265 __cpuid_count (
266 0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax,
267 cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx,
268 cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx,
269 cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx);
270 if (os_vector_size & os_xmm)
271 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM);
272 if (os_vector_size & os_ymm)
273 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM);
274 if (os_vector_size & os_zmm)
275 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM);
276 }
277
278 /* Are XTILECFG and XTILEDATA states usable? */
279 if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
280 == (bit_XTILECFG_state | bit_XTILEDATA_state))
281 {
282 /* Determine if AMX_BF16 is usable. */
283 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_BF16);
284 /* Determine if AMX_TILE is usable. */
285 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_TILE);
286 /* Determine if AMX_INT8 is usable. */
287 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_INT8);
288 /* Determine if AMX_FP16 is usable. */
289 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_FP16);
290 /* Determine if AMX_COMPLEX is usable. */
291 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_COMPLEX);
292 }
293
294 /* APX is usable only if the APX state is supported by kernel. */
295 if ((xcrlow & bit_APX_state) != 0)
296 CPU_FEATURE_SET_ACTIVE (cpu_features, APX_F);
297
298 /* These features are usable only when OSXSAVE is enabled. */
299 CPU_FEATURE_SET (cpu_features, XSAVE);
300 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEOPT);
301 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEC);
302 CPU_FEATURE_SET_ACTIVE (cpu_features, XGETBV_ECX_1);
303 CPU_FEATURE_SET_ACTIVE (cpu_features, XFD);
304
305 /* For _dl_runtime_resolve, set xsave_state_size to xsave area
306 size + integer register save size and align it to 64 bytes. */
307 if (cpu_features->basic.max_cpuid >= 0xd)
308 {
309 unsigned int eax, ebx, ecx, edx;
310
311 __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
312 if (ebx != 0)
313 {
314 /* NB: On AMX capable processors, ebx always includes AMX
315 states. */
316 unsigned int xsave_state_full_size
317 = ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
318
319 cpu_features->xsave_state_size
320 = xsave_state_full_size;
321 cpu_features->xsave_state_full_size
322 = xsave_state_full_size;
323 _dl_x86_features_tlsdesc_state_size = xsave_state_full_size;
324
325 /* Check if XSAVEC is available. */
326 if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
327 {
328 unsigned int xstate_comp_offsets[X86_XSTATE_MAX_ID + 1];
329 unsigned int xstate_comp_sizes[X86_XSTATE_MAX_ID + 1];
330 unsigned int i;
331
332 xstate_comp_offsets[0] = 0;
333 xstate_comp_offsets[1] = 160;
334 xstate_comp_offsets[2] = 576;
335 xstate_comp_sizes[0] = 160;
336 xstate_comp_sizes[1] = 256;
337
338 for (i = 2; i <= X86_XSTATE_MAX_ID; i++)
339 {
340 if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
341 {
342 __cpuid_count (0xd, i, eax, ebx, ecx, edx);
343 xstate_comp_sizes[i] = eax;
344 }
345 else
346 {
347 ecx = 0;
348 xstate_comp_sizes[i] = 0;
349 }
350
351 if (i > 2)
352 {
353 xstate_comp_offsets[i]
354 = (xstate_comp_offsets[i - 1]
355 + xstate_comp_sizes[i - 1]);
356 if ((ecx & (1 << 1)) != 0)
357 xstate_comp_offsets[i]
358 = ALIGN_UP (xstate_comp_offsets[i], 64);
359 }
360 }
361
362 /* Use XSAVEC. */
363 unsigned int size
364 = (xstate_comp_offsets[X86_XSTATE_MAX_ID]
365 + xstate_comp_sizes[X86_XSTATE_MAX_ID]);
366 if (size)
367 {
368 size = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
369 64);
370#ifdef __x86_64__
371 _dl_x86_features_tlsdesc_state_size = size;
372 /* Exclude the AMX space from the start of TILECFG
373 space to the end of TILEDATA space. If CPU
374 doesn't support AMX, TILECFG offset is the same
375 as TILEDATA + 1 offset. Otherwise, they are
376 multiples of 64. */
377 size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
378 - xstate_comp_offsets[X86_XSTATE_TILECFG_ID]);
379#endif
380 cpu_features->xsave_state_size = size;
381 CPU_FEATURE_SET (cpu_features, XSAVEC);
382 }
383 }
384 }
385 }
386 }
387
388 /* Determine if PKU is usable. */
389 if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
390 CPU_FEATURE_SET (cpu_features, PKU);
391
392 /* Determine if Key Locker instructions are usable. */
393 if (CPU_FEATURES_CPU_P (cpu_features, AESKLE))
394 {
395 CPU_FEATURE_SET (cpu_features, AESKLE);
396 CPU_FEATURE_SET_ACTIVE (cpu_features, KL);
397 CPU_FEATURE_SET_ACTIVE (cpu_features, WIDE_KL);
398 }
399
400 dl_check_hwcap2 (cpu_features);
401
402 cpu_features->isa_1 = get_isa_level (cpu_features);
403}
404
405static void
406get_extended_indices (struct cpu_features *cpu_features)
407{
408 unsigned int eax, ebx, ecx, edx;
409 __cpuid (0x80000000, eax, ebx, ecx, edx);
410 if (eax >= 0x80000001)
411 __cpuid (0x80000001,
412 cpu_features->features[CPUID_INDEX_80000001].cpuid.eax,
413 cpu_features->features[CPUID_INDEX_80000001].cpuid.ebx,
414 cpu_features->features[CPUID_INDEX_80000001].cpuid.ecx,
415 cpu_features->features[CPUID_INDEX_80000001].cpuid.edx);
416 if (eax >= 0x80000007)
417 __cpuid (0x80000007,
418 cpu_features->features[CPUID_INDEX_80000007].cpuid.eax,
419 cpu_features->features[CPUID_INDEX_80000007].cpuid.ebx,
420 cpu_features->features[CPUID_INDEX_80000007].cpuid.ecx,
421 cpu_features->features[CPUID_INDEX_80000007].cpuid.edx);
422 if (eax >= 0x80000008)
423 __cpuid (0x80000008,
424 cpu_features->features[CPUID_INDEX_80000008].cpuid.eax,
425 cpu_features->features[CPUID_INDEX_80000008].cpuid.ebx,
426 cpu_features->features[CPUID_INDEX_80000008].cpuid.ecx,
427 cpu_features->features[CPUID_INDEX_80000008].cpuid.edx);
428}
429
430static void
431get_common_indices (struct cpu_features *cpu_features,
432 unsigned int *family, unsigned int *model,
433 unsigned int *extended_model, unsigned int *stepping)
434{
435 if (family)
436 {
437 unsigned int eax;
438 __cpuid (1, eax,
439 cpu_features->features[CPUID_INDEX_1].cpuid.ebx,
440 cpu_features->features[CPUID_INDEX_1].cpuid.ecx,
441 cpu_features->features[CPUID_INDEX_1].cpuid.edx);
442 cpu_features->features[CPUID_INDEX_1].cpuid.eax = eax;
443 *family = (eax >> 8) & 0x0f;
444 *model = (eax >> 4) & 0x0f;
445 *extended_model = (eax >> 12) & 0xf0;
446 *stepping = eax & 0x0f;
447 if (*family == 0x0f)
448 {
449 *family += (eax >> 20) & 0xff;
450 *model += *extended_model;
451 }
452 }
453
454 if (cpu_features->basic.max_cpuid >= 7)
455 {
456 __cpuid_count (7, 0,
457 cpu_features->features[CPUID_INDEX_7].cpuid.eax,
458 cpu_features->features[CPUID_INDEX_7].cpuid.ebx,
459 cpu_features->features[CPUID_INDEX_7].cpuid.ecx,
460 cpu_features->features[CPUID_INDEX_7].cpuid.edx);
461 __cpuid_count (7, 1,
462 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.eax,
463 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ebx,
464 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ecx,
465 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.edx);
466 }
467
468 if (cpu_features->basic.max_cpuid >= 0xd)
469 __cpuid_count (0xd, 1,
470 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.eax,
471 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ebx,
472 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ecx,
473 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.edx);
474
475 if (cpu_features->basic.max_cpuid >= 0x14)
476 __cpuid_count (0x14, 0,
477 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.eax,
478 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ebx,
479 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ecx,
480 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.edx);
481
482 if (cpu_features->basic.max_cpuid >= 0x19)
483 __cpuid_count (0x19, 0,
484 cpu_features->features[CPUID_INDEX_19].cpuid.eax,
485 cpu_features->features[CPUID_INDEX_19].cpuid.ebx,
486 cpu_features->features[CPUID_INDEX_19].cpuid.ecx,
487 cpu_features->features[CPUID_INDEX_19].cpuid.edx);
488
489 dl_check_minsigstacksize (cpu_features);
490}
491
492_Static_assert (((index_arch_Fast_Unaligned_Load
493 == index_arch_Fast_Unaligned_Copy)
494 && (index_arch_Fast_Unaligned_Load
495 == index_arch_Prefer_PMINUB_for_stringop)
496 && (index_arch_Fast_Unaligned_Load
497 == index_arch_Slow_SSE4_2)
498 && (index_arch_Fast_Unaligned_Load
499 == index_arch_Fast_Rep_String)
500 && (index_arch_Fast_Unaligned_Load
501 == index_arch_Fast_Copy_Backward)),
502 "Incorrect index_arch_Fast_Unaligned_Load");
503
504
505/* Intel microarch list. */
506enum intel_microarch
507{
508 /* Atom processors. */
509 INTEL_ATOM_BONNELL,
510 INTEL_ATOM_SILVERMONT,
511 INTEL_ATOM_AIRMONT,
512 INTEL_ATOM_GOLDMONT,
513 INTEL_ATOM_GOLDMONT_PLUS,
514 INTEL_ATOM_SIERRAFOREST,
515 INTEL_ATOM_CLEARWATERFOREST,
516 INTEL_ATOM_GRANDRIDGE,
517 INTEL_ATOM_TREMONT,
518
519 /* Bigcore processors. */
520 INTEL_BIGCORE_MEROM,
521 INTEL_BIGCORE_PENRYN,
522 INTEL_BIGCORE_DUNNINGTON,
523 INTEL_BIGCORE_NEHALEM,
524 INTEL_BIGCORE_WESTMERE,
525 INTEL_BIGCORE_SANDYBRIDGE,
526 INTEL_BIGCORE_IVYBRIDGE,
527 INTEL_BIGCORE_HASWELL,
528 INTEL_BIGCORE_BROADWELL,
529 INTEL_BIGCORE_SKYLAKE,
530 INTEL_BIGCORE_KABYLAKE,
531 INTEL_BIGCORE_COMETLAKE,
532 INTEL_BIGCORE_SKYLAKE_AVX512,
533 INTEL_BIGCORE_CANNONLAKE,
534 INTEL_BIGCORE_ICELAKE,
535 INTEL_BIGCORE_TIGERLAKE,
536 INTEL_BIGCORE_ROCKETLAKE,
537 INTEL_BIGCORE_SAPPHIRERAPIDS,
538 INTEL_BIGCORE_RAPTORLAKE,
539 INTEL_BIGCORE_EMERALDRAPIDS,
540 INTEL_BIGCORE_METEORLAKE,
541 INTEL_BIGCORE_LUNARLAKE,
542 INTEL_BIGCORE_ARROWLAKE,
543 INTEL_BIGCORE_PANTHERLAKE,
544 INTEL_BIGCORE_GRANITERAPIDS,
545 INTEL_BIGCORE_DIAMONDRAPIDS,
546 INTEL_BIGCORE_WILDCATLAKE,
547 INTEL_BIGCORE_NOVALAKE,
548
549 /* Mixed (bigcore + atom SOC). */
550 INTEL_MIXED_LAKEFIELD,
551 INTEL_MIXED_ALDERLAKE,
552
553 /* KNL. */
554 INTEL_KNIGHTS_MILL,
555 INTEL_KNIGHTS_LANDING,
556
557 /* Unknown. */
558 INTEL_UNKNOWN,
559};
560
561static enum intel_microarch
562intel_get_fam6_microarch (unsigned int model,
563 __attribute__ ((unused)) unsigned int stepping)
564{
565 switch (model)
566 {
567 case 0x1C:
568 case 0x26:
569 return INTEL_ATOM_BONNELL;
570 case 0x27:
571 case 0x35:
572 case 0x36:
573 /* Really Saltwell, but Saltwell is just a die shrink of Bonnell
574 (microarchitecturally identical). */
575 return INTEL_ATOM_BONNELL;
576 case 0x37:
577 case 0x4A:
578 case 0x4D:
579 case 0x5D:
580 return INTEL_ATOM_SILVERMONT;
581 case 0x4C:
582 case 0x5A:
583 case 0x75:
584 return INTEL_ATOM_AIRMONT;
585 case 0x5C:
586 case 0x5F:
587 return INTEL_ATOM_GOLDMONT;
588 case 0x7A:
589 return INTEL_ATOM_GOLDMONT_PLUS;
590 case 0xAF:
591 return INTEL_ATOM_SIERRAFOREST;
592 case 0xDD:
593 return INTEL_ATOM_CLEARWATERFOREST;
594 case 0xB6:
595 return INTEL_ATOM_GRANDRIDGE;
596 case 0x86:
597 case 0x96:
598 case 0x9C:
599 return INTEL_ATOM_TREMONT;
600 case 0x0F:
601 case 0x16:
602 return INTEL_BIGCORE_MEROM;
603 case 0x17:
604 return INTEL_BIGCORE_PENRYN;
605 case 0x1D:
606 return INTEL_BIGCORE_DUNNINGTON;
607 case 0x1A:
608 case 0x1E:
609 case 0x1F:
610 case 0x2E:
611 return INTEL_BIGCORE_NEHALEM;
612 case 0x25:
613 case 0x2C:
614 case 0x2F:
615 return INTEL_BIGCORE_WESTMERE;
616 case 0x2A:
617 case 0x2D:
618 return INTEL_BIGCORE_SANDYBRIDGE;
619 case 0x3A:
620 case 0x3E:
621 return INTEL_BIGCORE_IVYBRIDGE;
622 case 0x3C:
623 case 0x3F:
624 case 0x45:
625 case 0x46:
626 return INTEL_BIGCORE_HASWELL;
627 case 0x3D:
628 case 0x47:
629 case 0x4F:
630 case 0x56:
631 return INTEL_BIGCORE_BROADWELL;
632 case 0x4E:
633 case 0x5E:
634 return INTEL_BIGCORE_SKYLAKE;
635 case 0x8E:
636 /*
637 Stepping = {9}
638 -> Amberlake
639 Stepping = {10}
640 -> Coffeelake
641 Stepping = {11, 12}
642 -> Whiskeylake
643 else
644 -> Kabylake
645
646 All of these are derivatives of Kabylake (Skylake client).
647 */
648 return INTEL_BIGCORE_KABYLAKE;
649 case 0x9E:
650 /*
651 Stepping = {10, 11, 12, 13}
652 -> Coffeelake
653 else
654 -> Kabylake
655
656 Coffeelake is a derivatives of Kabylake (Skylake client).
657 */
658 return INTEL_BIGCORE_KABYLAKE;
659 case 0xA5:
660 case 0xA6:
661 return INTEL_BIGCORE_COMETLAKE;
662 case 0x66:
663 return INTEL_BIGCORE_CANNONLAKE;
664 case 0x55:
665 /*
666 Stepping = {6, 7}
667 -> Cascadelake
668 Stepping = {11}
669 -> Cooperlake
670 else
671 -> Skylake-avx512
672
673 These are all microarchitecturally identical, so use
674 Skylake-avx512 for all of them.
675 */
676 return INTEL_BIGCORE_SKYLAKE_AVX512;
677 case 0x6A:
678 case 0x6C:
679 case 0x7D:
680 case 0x7E:
681 case 0x9D:
682 return INTEL_BIGCORE_ICELAKE;
683 case 0x8C:
684 case 0x8D:
685 return INTEL_BIGCORE_TIGERLAKE;
686 case 0xA7:
687 return INTEL_BIGCORE_ROCKETLAKE;
688 case 0x8F:
689 return INTEL_BIGCORE_SAPPHIRERAPIDS;
690 case 0xB7:
691 case 0xBA:
692 case 0xBF:
693 return INTEL_BIGCORE_RAPTORLAKE;
694 case 0xCF:
695 return INTEL_BIGCORE_EMERALDRAPIDS;
696 case 0xAA:
697 case 0xAC:
698 return INTEL_BIGCORE_METEORLAKE;
699 case 0xbd:
700 return INTEL_BIGCORE_LUNARLAKE;
701 case 0xb5:
702 case 0xc5:
703 case 0xc6:
704 return INTEL_BIGCORE_ARROWLAKE;
705 case 0xCC:
706 return INTEL_BIGCORE_PANTHERLAKE;
707 case 0xD5:
708 return INTEL_BIGCORE_WILDCATLAKE;
709 case 0xAD:
710 case 0xAE:
711 return INTEL_BIGCORE_GRANITERAPIDS;
712 case 0x8A:
713 return INTEL_MIXED_LAKEFIELD;
714 case 0x97:
715 case 0x9A:
716 case 0xBE:
717 return INTEL_MIXED_ALDERLAKE;
718 case 0x85:
719 return INTEL_KNIGHTS_MILL;
720 case 0x57:
721 return INTEL_KNIGHTS_LANDING;
722 default:
723 return INTEL_UNKNOWN;
724 }
725}
726
727static inline void
728init_cpu_features (struct cpu_features *cpu_features)
729{
730 unsigned int ebx, ecx, edx;
731 unsigned int family = 0;
732 unsigned int model = 0;
733 unsigned int stepping = 0;
734 enum cpu_features_kind kind;
735
736 /* Default is avoid non-temporal memset for non Intel/AMD hardware. This is,
737 as of writing this, we only have benchmarks indicatings it profitability
738 on Intel/AMD. */
739 cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
740 |= bit_arch_Avoid_Non_Temporal_Memset;
741
742 cpu_features->cachesize_non_temporal_divisor = 4;
743#if !HAS_CPUID
744 if (__get_cpuid_max (0, 0) == 0)
745 {
746 kind = arch_kind_other;
747 goto no_cpuid;
748 }
749#endif
750
751 __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
752
753 /* This spells out "GenuineIntel". */
754 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
755 {
756 unsigned int extended_model;
757
758 kind = arch_kind_intel;
759
760 get_common_indices (cpu_features, family: &family, model: &model, extended_model: &extended_model,
761 stepping: &stepping);
762
763 get_extended_indices (cpu_features);
764
765 update_active (cpu_features);
766
767 /* Benchmarks indicate non-temporal memset can be profitable on Intel
768 hardware. */
769 cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
770 &= ~bit_arch_Avoid_Non_Temporal_Memset;
771
772 enum intel_microarch microarch = INTEL_UNKNOWN;
773 if (family == 0x06)
774 {
775 model += extended_model;
776 microarch = intel_get_fam6_microarch (model, stepping);
777
778 /* Disable TSX on some processors to avoid TSX on kernels that
779 weren't updated with the latest microcode package (which
780 disables broken feature by default). */
781 switch (microarch)
782 {
783 default:
784 break;
785
786 case INTEL_BIGCORE_SKYLAKE_AVX512:
787 /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */
788 if (stepping <= 5)
789 goto disable_tsx;
790 break;
791
792 case INTEL_BIGCORE_KABYLAKE:
793 /* NB: Although the errata documents that for model == 0x8e
794 (kabylake skylake client), only 0xb stepping or lower are
795 impacted, the intention of the errata was to disable TSX on
796 all client processors on all steppings. Include 0xc
797 stepping which is an Intel Core i7-8665U, a client mobile
798 processor. */
799 if (stepping > 0xc)
800 break;
801 /* Fall through. */
802 case INTEL_BIGCORE_SKYLAKE:
803 /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
804 processors listed in:
805
806 https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
807 */
808disable_tsx:
809 CPU_FEATURE_UNSET (cpu_features, HLE);
810 CPU_FEATURE_UNSET (cpu_features, RTM);
811 CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
812 break;
813
814 case INTEL_BIGCORE_HASWELL:
815 /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working
816 TSX. Haswell also includes other model numbers that have
817 working TSX. */
818 if (model == 0x3f && stepping >= 4)
819 break;
820
821 CPU_FEATURE_UNSET (cpu_features, RTM);
822 break;
823 }
824 }
825 else if (family == 18)
826 switch (model)
827 {
828 case 0x01:
829 case 0x03:
830 microarch = INTEL_BIGCORE_NOVALAKE;
831 break;
832
833 default:
834 break;
835 }
836 else if (family == 19)
837 switch (model)
838 {
839 case 0x01:
840 microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
841 break;
842
843 default:
844 break;
845 }
846
847 switch (microarch)
848 {
849 /* Atom / KNL tuning. */
850 case INTEL_ATOM_BONNELL:
851 /* BSF is slow on Bonnell. */
852 cpu_features->preferred[index_arch_Slow_BSF]
853 |= bit_arch_Slow_BSF;
854 break;
855
856 /* Unaligned load versions are faster than SSSE3
857 on Airmont, Silvermont, Goldmont, and Goldmont Plus. */
858 case INTEL_ATOM_AIRMONT:
859 case INTEL_ATOM_SILVERMONT:
860 case INTEL_ATOM_GOLDMONT:
861 case INTEL_ATOM_GOLDMONT_PLUS:
862
863 /* Knights Landing. Enable Silvermont optimizations. */
864 case INTEL_KNIGHTS_LANDING:
865
866 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
867 |= (bit_arch_Fast_Unaligned_Load
868 | bit_arch_Fast_Unaligned_Copy
869 | bit_arch_Prefer_PMINUB_for_stringop
870 | bit_arch_Slow_SSE4_2);
871 break;
872
873 case INTEL_ATOM_TREMONT:
874 /* Enable rep string instructions, unaligned load, unaligned
875 copy, pminub and avoid SSE 4.2 on Tremont. */
876 cpu_features->preferred[index_arch_Fast_Rep_String]
877 |= (bit_arch_Fast_Rep_String
878 | bit_arch_Fast_Unaligned_Load
879 | bit_arch_Fast_Unaligned_Copy
880 | bit_arch_Prefer_PMINUB_for_stringop
881 | bit_arch_Slow_SSE4_2);
882 break;
883
884 /*
885 Default tuned Knights microarch.
886 case INTEL_KNIGHTS_MILL:
887 */
888
889 /*
890 Default tuned atom microarch.
891 case INTEL_ATOM_SIERRAFOREST:
892 case INTEL_ATOM_GRANDRIDGE:
893 case INTEL_ATOM_CLEARWATERFOREST:
894 */
895
896 /* Bigcore/Default Tuning. */
897 default:
898 default_tuning:
899 /* Unknown Intel processors. Assuming this is one of Core
900 i3/i5/i7 processors if AVX is available. */
901 if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
902 break;
903
904 enable_modern_features:
905 /* Rep string instructions, unaligned load, unaligned copy,
906 and pminub are fast on Intel Core i3, i5 and i7. */
907 cpu_features->preferred[index_arch_Fast_Rep_String]
908 |= (bit_arch_Fast_Rep_String
909 | bit_arch_Fast_Unaligned_Load
910 | bit_arch_Fast_Unaligned_Copy
911 | bit_arch_Prefer_PMINUB_for_stringop);
912 break;
913
914 case INTEL_BIGCORE_NEHALEM:
915 case INTEL_BIGCORE_WESTMERE:
916 /* Older CPUs prefer non-temporal stores at lower threshold. */
917 cpu_features->cachesize_non_temporal_divisor = 8;
918 goto enable_modern_features;
919
920 /* Older Bigcore microarch (smaller non-temporal store
921 threshold). */
922 case INTEL_BIGCORE_SANDYBRIDGE:
923 case INTEL_BIGCORE_IVYBRIDGE:
924 case INTEL_BIGCORE_HASWELL:
925 case INTEL_BIGCORE_BROADWELL:
926 cpu_features->cachesize_non_temporal_divisor = 8;
927 goto default_tuning;
928
929 /* Newer Bigcore microarch (larger non-temporal store
930 threshold). */
931 case INTEL_BIGCORE_SKYLAKE_AVX512:
932 case INTEL_BIGCORE_CANNONLAKE:
933 /* Benchmarks indicate non-temporal memset is not
934 necessarily profitable on SKX (and in some cases much
935 worse). This is likely unique to SKX due to its unique
936 mesh interconnect (not present on ICX or BWD). Disable
937 non-temporal on all Skylake servers. */
938 cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
939 |= bit_arch_Avoid_Non_Temporal_Memset;
940 /* fallthrough */
941 case INTEL_BIGCORE_COMETLAKE:
942 case INTEL_BIGCORE_SKYLAKE:
943 case INTEL_BIGCORE_KABYLAKE:
944 case INTEL_BIGCORE_ICELAKE:
945 case INTEL_BIGCORE_TIGERLAKE:
946 case INTEL_BIGCORE_ROCKETLAKE:
947 case INTEL_BIGCORE_RAPTORLAKE:
948 case INTEL_BIGCORE_METEORLAKE:
949 case INTEL_BIGCORE_LUNARLAKE:
950 case INTEL_BIGCORE_ARROWLAKE:
951 case INTEL_BIGCORE_PANTHERLAKE:
952 case INTEL_BIGCORE_WILDCATLAKE:
953 case INTEL_BIGCORE_NOVALAKE:
954 case INTEL_BIGCORE_SAPPHIRERAPIDS:
955 case INTEL_BIGCORE_EMERALDRAPIDS:
956 case INTEL_BIGCORE_GRANITERAPIDS:
957 case INTEL_BIGCORE_DIAMONDRAPIDS:
958 /* Default tuned Mixed (bigcore + atom SOC). */
959 case INTEL_MIXED_LAKEFIELD:
960 case INTEL_MIXED_ALDERLAKE:
961 cpu_features->cachesize_non_temporal_divisor = 2;
962 goto default_tuning;
963 }
964
965 /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
966 if AVX512ER is available. Don't use AVX512 to avoid lower CPU
967 frequency if AVX512ER isn't available. */
968 if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
969 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
970 |= bit_arch_Prefer_No_VZEROUPPER;
971 else
972 {
973 /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency
974 when ZMM load and store instructions are used. */
975 if (!CPU_FEATURES_CPU_P (cpu_features, AVX_VNNI))
976 cpu_features->preferred[index_arch_Prefer_No_AVX512]
977 |= bit_arch_Prefer_No_AVX512;
978
979 /* Avoid RTM abort triggered by VZEROUPPER inside a
980 transactionally executing RTM region. */
981 if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
982 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
983 |= bit_arch_Prefer_No_VZEROUPPER;
984 }
985
986 /* Avoid avoid short distance REP MOVSB on processor with FSRM. */
987 if (CPU_FEATURES_CPU_P (cpu_features, FSRM))
988 cpu_features->preferred[index_arch_Avoid_Short_Distance_REP_MOVSB]
989 |= bit_arch_Avoid_Short_Distance_REP_MOVSB;
990 }
991 /* This spells out "AuthenticAMD" or "HygonGenuine". */
992 else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
993 || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
994 {
995 unsigned int extended_model;
996
997 kind = arch_kind_amd;
998
999 get_common_indices (cpu_features, family: &family, model: &model, extended_model: &extended_model,
1000 stepping: &stepping);
1001
1002 get_extended_indices (cpu_features);
1003
1004 update_active (cpu_features);
1005
1006 ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx;
1007
1008 /* Benchmarks indicate non-temporal memset can be profitable on AMD
1009 hardware. */
1010 cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
1011 &= ~bit_arch_Avoid_Non_Temporal_Memset;
1012
1013 if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
1014 {
1015 /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
1016 FMA4 requires AVX, determine if FMA4 is usable here. */
1017 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA4);
1018 }
1019
1020 if (family == 0x15)
1021 {
1022 /* "Excavator" */
1023 if (model >= 0x60 && model <= 0x7f)
1024 {
1025 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
1026 |= (bit_arch_Fast_Unaligned_Load
1027 | bit_arch_Fast_Copy_Backward);
1028
1029 /* Unaligned AVX loads are slower.*/
1030 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
1031 &= ~bit_arch_AVX_Fast_Unaligned_Load;
1032 }
1033 }
1034 }
1035 /* This spells out "CentaurHauls" or " Shanghai ". */
1036 else if ((ebx == 0x746e6543 && ecx == 0x736c7561 && edx == 0x48727561)
1037 || (ebx == 0x68532020 && ecx == 0x20206961 && edx == 0x68676e61))
1038 {
1039 unsigned int extended_model, stepping;
1040
1041 kind = arch_kind_zhaoxin;
1042
1043 get_common_indices (cpu_features, family: &family, model: &model, extended_model: &extended_model,
1044 stepping: &stepping);
1045
1046 get_extended_indices (cpu_features);
1047
1048 update_active (cpu_features);
1049
1050 model += extended_model;
1051 if (family == 0x6)
1052 {
1053 if (model == 0xf || model == 0x19)
1054 {
1055 CPU_FEATURE_UNSET (cpu_features, AVX);
1056 CPU_FEATURE_UNSET (cpu_features, AVX2);
1057
1058 cpu_features->preferred[index_arch_Slow_SSE4_2]
1059 |= bit_arch_Slow_SSE4_2;
1060
1061 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
1062 &= ~bit_arch_AVX_Fast_Unaligned_Load;
1063 }
1064 }
1065 else if (family == 0x7)
1066 {
1067 if (model == 0x1b)
1068 {
1069 CPU_FEATURE_UNSET (cpu_features, AVX);
1070 CPU_FEATURE_UNSET (cpu_features, AVX2);
1071
1072 cpu_features->preferred[index_arch_Slow_SSE4_2]
1073 |= bit_arch_Slow_SSE4_2;
1074
1075 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
1076 &= ~bit_arch_AVX_Fast_Unaligned_Load;
1077 }
1078 else if (model == 0x3b)
1079 {
1080 CPU_FEATURE_UNSET (cpu_features, AVX);
1081 CPU_FEATURE_UNSET (cpu_features, AVX2);
1082
1083 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
1084 &= ~bit_arch_AVX_Fast_Unaligned_Load;
1085 }
1086 }
1087 }
1088 else
1089 {
1090 kind = arch_kind_other;
1091 get_common_indices (cpu_features, NULL, NULL, NULL, NULL);
1092 update_active (cpu_features);
1093 }
1094
1095 /* Support i586 if CX8 is available. */
1096 if (CPU_FEATURES_CPU_P (cpu_features, CX8))
1097 cpu_features->preferred[index_arch_I586] |= bit_arch_I586;
1098
1099 /* Support i686 if CMOV is available. */
1100 if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
1101 cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
1102
1103#if !HAS_CPUID
1104no_cpuid:
1105#endif
1106
1107 cpu_features->basic.kind = kind;
1108 cpu_features->basic.family = family;
1109 cpu_features->basic.model = model;
1110 cpu_features->basic.stepping = stepping;
1111
1112 dl_init_cacheinfo (cpu_features);
1113
1114 TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
1115
1116#ifdef __LP64__
1117 TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
1118 TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
1119#endif
1120
1121 /* Do not add the logic to disable XSAVE/XSAVEC if this glibc build
1122 requires AVX and therefore XSAVE or XSAVEC support. */
1123#ifndef GCCMACRO__AVX__
1124 bool disable_xsave_features = false;
1125
1126 if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))
1127 {
1128 /* These features are usable only if OSXSAVE is usable. */
1129 CPU_FEATURE_UNSET (cpu_features, XSAVE);
1130 CPU_FEATURE_UNSET (cpu_features, XSAVEOPT);
1131 CPU_FEATURE_UNSET (cpu_features, XSAVEC);
1132 CPU_FEATURE_UNSET (cpu_features, XGETBV_ECX_1);
1133 CPU_FEATURE_UNSET (cpu_features, XFD);
1134
1135 disable_xsave_features = true;
1136 }
1137
1138 if (disable_xsave_features
1139 || (!CPU_FEATURE_USABLE_P (cpu_features, XSAVE)
1140 && !CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)))
1141 {
1142 /* Clear xsave_state_size if both XSAVE and XSAVEC aren't usable. */
1143 cpu_features->xsave_state_size = 0;
1144
1145 CPU_FEATURE_UNSET (cpu_features, AVX);
1146 CPU_FEATURE_UNSET (cpu_features, AVX2);
1147 CPU_FEATURE_UNSET (cpu_features, AVX_VNNI);
1148 CPU_FEATURE_UNSET (cpu_features, FMA);
1149 CPU_FEATURE_UNSET (cpu_features, VAES);
1150 CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ);
1151 CPU_FEATURE_UNSET (cpu_features, XOP);
1152 CPU_FEATURE_UNSET (cpu_features, F16C);
1153 CPU_FEATURE_UNSET (cpu_features, AVX512F);
1154 CPU_FEATURE_UNSET (cpu_features, AVX512CD);
1155 CPU_FEATURE_UNSET (cpu_features, AVX512ER);
1156 CPU_FEATURE_UNSET (cpu_features, AVX512PF);
1157 CPU_FEATURE_UNSET (cpu_features, AVX512VL);
1158 CPU_FEATURE_UNSET (cpu_features, AVX512DQ);
1159 CPU_FEATURE_UNSET (cpu_features, AVX512BW);
1160 CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS);
1161 CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW);
1162 CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG);
1163 CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA);
1164 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI);
1165 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2);
1166 CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI);
1167 CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ);
1168 CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT);
1169 CPU_FEATURE_UNSET (cpu_features, AVX512_BF16);
1170 CPU_FEATURE_UNSET (cpu_features, AVX512_FP16);
1171 CPU_FEATURE_UNSET (cpu_features, AMX_BF16);
1172 CPU_FEATURE_UNSET (cpu_features, AMX_TILE);
1173 CPU_FEATURE_UNSET (cpu_features, AMX_INT8);
1174
1175 CPU_FEATURE_UNSET (cpu_features, FMA4);
1176 }
1177#endif
1178
1179#ifdef __x86_64__
1180 GLRO(dl_hwcap) = HWCAP_X86_64;
1181 if (cpu_features->basic.kind == arch_kind_intel)
1182 {
1183 const char *platform = NULL;
1184
1185 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD))
1186 {
1187 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER))
1188 {
1189 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF))
1190 platform = "xeon_phi";
1191 }
1192 else
1193 {
1194 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
1195 && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
1196 && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
1197 GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
1198 }
1199 }
1200
1201 if (platform == NULL
1202 && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
1203 && CPU_FEATURE_USABLE_P (cpu_features, FMA)
1204 && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
1205 && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
1206 && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
1207 && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
1208 && CPU_FEATURE_USABLE_P (cpu_features, POPCNT))
1209 platform = "haswell";
1210
1211 if (platform != NULL)
1212 GLRO(dl_platform) = platform;
1213 }
1214#else
1215 GLRO(dl_hwcap) = 0;
1216 if (CPU_FEATURE_USABLE_P (cpu_features, SSE2))
1217 GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
1218
1219 if (CPU_FEATURES_ARCH_P (cpu_features, I686))
1220 GLRO(dl_platform) = "i686";
1221 else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
1222 GLRO(dl_platform) = "i586";
1223#endif
1224
1225#if CET_ENABLED
1226 TUNABLE_GET (x86_ibt, tunable_val_t *,
1227 TUNABLE_CALLBACK (set_x86_ibt));
1228 TUNABLE_GET (x86_shstk, tunable_val_t *,
1229 TUNABLE_CALLBACK (set_x86_shstk));
1230#endif
1231
1232 if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
1233 || (GLRO(dl_x86_cpu_features).xsave_state_size != 0))
1234 {
1235 if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
1236 {
1237#ifdef __x86_64__
1238 GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsavec;
1239#endif
1240#ifdef SHARED
1241 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsavec;
1242#endif
1243 }
1244 else
1245 {
1246#ifdef __x86_64__
1247 GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsave;
1248#endif
1249#ifdef SHARED
1250 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsave;
1251#endif
1252 }
1253 }
1254 else
1255 {
1256#ifdef __x86_64__
1257 GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
1258# ifdef SHARED
1259 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
1260# endif
1261#else
1262# ifdef SHARED
1263 if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
1264 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
1265 else
1266 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
1267# endif
1268#endif
1269 }
1270
1271#ifdef SHARED
1272# ifdef __x86_64__
1273 TUNABLE_GET (plt_rewrite, tunable_val_t *,
1274 TUNABLE_CALLBACK (set_plt_rewrite));
1275# endif
1276#else
1277 /* NB: In libc.a, call init_cacheinfo. */
1278 init_cacheinfo ();
1279#endif
1280}
1281

source code of glibc/sysdeps/x86/cpu-features.c