1/* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2024 Free Software Foundation, Inc.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <dl-hwcap.h>
20#include <libc-pointer-arith.h>
21#include <isa-level.h>
22#include <get-isa-level.h>
23#include <cacheinfo.h>
24#include <dl-cacheinfo.h>
25#include <dl-minsigstacksize.h>
26#include <dl-hwcap2.h>
27#include <gcc-macros.h>
28
29extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
30 attribute_hidden;
31
32#if defined SHARED
33extern void _dl_tlsdesc_dynamic_fxsave (void) attribute_hidden;
34extern void _dl_tlsdesc_dynamic_xsave (void) attribute_hidden;
35extern void _dl_tlsdesc_dynamic_xsavec (void) attribute_hidden;
36
37# ifdef __x86_64__
38# include <dl-plt-rewrite.h>
39
40static void
41TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
42{
43 /* We must be careful about where we put the call to
44 dl_plt_rewrite_supported() since it may generate
45 spurious SELinux log entries. It should only be
46 attempted if the user requested a PLT rewrite. */
47 if (valp->numval != 0 && dl_plt_rewrite_supported ())
48 {
49 /* Use JMPABS only on APX processors. */
50 const struct cpu_features *cpu_features = __get_cpu_features ();
51 GL (dl_x86_feature_control).plt_rewrite
52 = ((valp->numval > 1 && CPU_FEATURE_PRESENT_P (cpu_features, APX_F))
53 ? plt_rewrite_jmpabs
54 : plt_rewrite_jmp);
55 }
56}
57# else
58extern void _dl_tlsdesc_dynamic_fnsave (void) attribute_hidden;
59# endif
60#endif
61
62#ifdef __x86_64__
63extern void _dl_runtime_resolve_fxsave (void) attribute_hidden;
64extern void _dl_runtime_resolve_xsave (void) attribute_hidden;
65extern void _dl_runtime_resolve_xsavec (void) attribute_hidden;
66#endif
67
68#ifdef __LP64__
69static void
70TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
71{
72 if (valp->numval)
73 GLRO(dl_x86_cpu_features).preferred[index_arch_Prefer_MAP_32BIT_EXEC]
74 |= bit_arch_Prefer_MAP_32BIT_EXEC;
75}
76#endif
77
78#if CET_ENABLED
79extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
80 attribute_hidden;
81extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
82 attribute_hidden;
83
84# include <dl-cet.h>
85#endif
86
87unsigned long int _dl_x86_features_tlsdesc_state_size;
88
89static void
90update_active (struct cpu_features *cpu_features)
91{
92 /* Copy the cpuid bits to active bits for CPU featuress whose usability
93 in user space can be detected without additional OS support. */
94 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE3);
95 CPU_FEATURE_SET_ACTIVE (cpu_features, PCLMULQDQ);
96 CPU_FEATURE_SET_ACTIVE (cpu_features, SSSE3);
97 CPU_FEATURE_SET_ACTIVE (cpu_features, CMPXCHG16B);
98 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_1);
99 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_2);
100 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVBE);
101 CPU_FEATURE_SET_ACTIVE (cpu_features, POPCNT);
102 CPU_FEATURE_SET_ACTIVE (cpu_features, AES);
103 CPU_FEATURE_SET_ACTIVE (cpu_features, OSXSAVE);
104 CPU_FEATURE_SET_ACTIVE (cpu_features, TSC);
105 CPU_FEATURE_SET_ACTIVE (cpu_features, CX8);
106 CPU_FEATURE_SET_ACTIVE (cpu_features, CMOV);
107 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFSH);
108 CPU_FEATURE_SET_ACTIVE (cpu_features, MMX);
109 CPU_FEATURE_SET_ACTIVE (cpu_features, FXSR);
110 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE);
111 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE2);
112 CPU_FEATURE_SET_ACTIVE (cpu_features, HTT);
113 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI1);
114 CPU_FEATURE_SET_ACTIVE (cpu_features, HLE);
115 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI2);
116 CPU_FEATURE_SET_ACTIVE (cpu_features, ERMS);
117 CPU_FEATURE_SET_ACTIVE (cpu_features, RDSEED);
118 CPU_FEATURE_SET_ACTIVE (cpu_features, ADX);
119 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFLUSHOPT);
120 CPU_FEATURE_SET_ACTIVE (cpu_features, CLWB);
121 CPU_FEATURE_SET_ACTIVE (cpu_features, SHA);
122 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHWT1);
123 CPU_FEATURE_SET_ACTIVE (cpu_features, OSPKE);
124 CPU_FEATURE_SET_ACTIVE (cpu_features, WAITPKG);
125 CPU_FEATURE_SET_ACTIVE (cpu_features, GFNI);
126 CPU_FEATURE_SET_ACTIVE (cpu_features, RDPID);
127 CPU_FEATURE_SET_ACTIVE (cpu_features, RDRAND);
128 CPU_FEATURE_SET_ACTIVE (cpu_features, CLDEMOTE);
129 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIRI);
130 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIR64B);
131 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRM);
132 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM_ALWAYS_ABORT);
133 CPU_FEATURE_SET_ACTIVE (cpu_features, SERIALIZE);
134 CPU_FEATURE_SET_ACTIVE (cpu_features, TSXLDTRK);
135 CPU_FEATURE_SET_ACTIVE (cpu_features, LAHF64_SAHF64);
136 CPU_FEATURE_SET_ACTIVE (cpu_features, LZCNT);
137 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4A);
138 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHW);
139 CPU_FEATURE_SET_ACTIVE (cpu_features, TBM);
140 CPU_FEATURE_SET_ACTIVE (cpu_features, RDTSCP);
141 CPU_FEATURE_SET_ACTIVE (cpu_features, WBNOINVD);
142 CPU_FEATURE_SET_ACTIVE (cpu_features, RAO_INT);
143 CPU_FEATURE_SET_ACTIVE (cpu_features, CMPCCXADD);
144 CPU_FEATURE_SET_ACTIVE (cpu_features, FZLRM);
145 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRS);
146 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRCS);
147 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHI);
148 CPU_FEATURE_SET_ACTIVE (cpu_features, PTWRITE);
149
150 if (!CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
151 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM);
152
153#if CET_ENABLED && 0
154 CPU_FEATURE_SET_ACTIVE (cpu_features, IBT);
155 CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
156#endif
157
158 enum
159 {
160 os_xmm = 1,
161 os_ymm = 2,
162 os_zmm = 4
163 } os_vector_size = os_xmm;
164 /* Can we call xgetbv? */
165 if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
166 {
167 unsigned int xcrlow;
168 unsigned int xcrhigh;
169 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10);
170 asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
171 /* Is YMM and XMM state usable? */
172 if ((xcrlow & (bit_YMM_state | bit_XMM_state))
173 == (bit_YMM_state | bit_XMM_state))
174 {
175 /* Determine if AVX is usable. */
176 if (CPU_FEATURES_CPU_P (cpu_features, AVX))
177 {
178 os_vector_size |= os_ymm;
179 CPU_FEATURE_SET (cpu_features, AVX);
180 /* The following features depend on AVX being usable. */
181 /* Determine if AVX2 is usable. */
182 if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
183 {
184 CPU_FEATURE_SET (cpu_features, AVX2);
185
186 /* Unaligned load with 256-bit AVX registers are faster
187 on Intel/AMD processors with AVX2. */
188 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
189 |= bit_arch_AVX_Fast_Unaligned_Load;
190 }
191 /* Determine if AVX-IFMA is usable. */
192 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_IFMA);
193 /* Determine if AVX-NE-CONVERT is usable. */
194 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_NE_CONVERT);
195 /* Determine if AVX-VNNI is usable. */
196 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI);
197 /* Determine if AVX-VNNI-INT8 is usable. */
198 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI_INT8);
199 /* Determine if FMA is usable. */
200 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA);
201 /* Determine if VAES is usable. */
202 CPU_FEATURE_SET_ACTIVE (cpu_features, VAES);
203 /* Determine if VPCLMULQDQ is usable. */
204 CPU_FEATURE_SET_ACTIVE (cpu_features, VPCLMULQDQ);
205 /* Determine if XOP is usable. */
206 CPU_FEATURE_SET_ACTIVE (cpu_features, XOP);
207 /* Determine if F16C is usable. */
208 CPU_FEATURE_SET_ACTIVE (cpu_features, F16C);
209 }
210
211 /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
212 ZMM16-ZMM31 state are enabled. */
213 if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
214 | bit_ZMM16_31_state))
215 == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
216 {
217 os_vector_size |= os_zmm;
218 /* Determine if AVX512F is usable. */
219 if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
220 {
221 CPU_FEATURE_SET (cpu_features, AVX512F);
222 /* Determine if AVX512CD is usable. */
223 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512CD);
224 /* Determine if AVX512ER is usable. */
225 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512ER);
226 /* Determine if AVX512PF is usable. */
227 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512PF);
228 /* Determine if AVX512VL is usable. */
229 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512VL);
230 /* Determine if AVX512DQ is usable. */
231 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512DQ);
232 /* Determine if AVX512BW is usable. */
233 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512BW);
234 /* Determine if AVX512_4FMAPS is usable. */
235 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4FMAPS);
236 /* Determine if AVX512_4VNNIW is usable. */
237 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4VNNIW);
238 /* Determine if AVX512_BITALG is usable. */
239 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BITALG);
240 /* Determine if AVX512_IFMA is usable. */
241 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_IFMA);
242 /* Determine if AVX512_VBMI is usable. */
243 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI);
244 /* Determine if AVX512_VBMI2 is usable. */
245 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI2);
246 /* Determine if is AVX512_VNNI usable. */
247 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VNNI);
248 /* Determine if AVX512_VPOPCNTDQ is usable. */
249 CPU_FEATURE_SET_ACTIVE (cpu_features,
250 AVX512_VPOPCNTDQ);
251 /* Determine if AVX512_VP2INTERSECT is usable. */
252 CPU_FEATURE_SET_ACTIVE (cpu_features,
253 AVX512_VP2INTERSECT);
254 /* Determine if AVX512_BF16 is usable. */
255 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BF16);
256 /* Determine if AVX512_FP16 is usable. */
257 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_FP16);
258 }
259 }
260 }
261
262 if (CPU_FEATURES_CPU_P (cpu_features, AVX10)
263 && cpu_features->basic.max_cpuid >= 0x24)
264 {
265 __cpuid_count (
266 0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax,
267 cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx,
268 cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx,
269 cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx);
270 if (os_vector_size & os_xmm)
271 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM);
272 if (os_vector_size & os_ymm)
273 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM);
274 if (os_vector_size & os_zmm)
275 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM);
276 }
277
278 /* Are XTILECFG and XTILEDATA states usable? */
279 if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
280 == (bit_XTILECFG_state | bit_XTILEDATA_state))
281 {
282 /* Determine if AMX_BF16 is usable. */
283 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_BF16);
284 /* Determine if AMX_TILE is usable. */
285 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_TILE);
286 /* Determine if AMX_INT8 is usable. */
287 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_INT8);
288 /* Determine if AMX_FP16 is usable. */
289 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_FP16);
290 /* Determine if AMX_COMPLEX is usable. */
291 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_COMPLEX);
292 }
293
294 /* APX is usable only if the APX state is supported by kernel. */
295 if ((xcrlow & bit_APX_state) != 0)
296 CPU_FEATURE_SET_ACTIVE (cpu_features, APX_F);
297
298 /* These features are usable only when OSXSAVE is enabled. */
299 CPU_FEATURE_SET (cpu_features, XSAVE);
300 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEOPT);
301 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEC);
302 CPU_FEATURE_SET_ACTIVE (cpu_features, XGETBV_ECX_1);
303 CPU_FEATURE_SET_ACTIVE (cpu_features, XFD);
304
305 /* For _dl_runtime_resolve, set xsave_state_size to xsave area
306 size + integer register save size and align it to 64 bytes. */
307 if (cpu_features->basic.max_cpuid >= 0xd)
308 {
309 unsigned int eax, ebx, ecx, edx;
310
311 __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
312 if (ebx != 0)
313 {
314 /* NB: On AMX capable processors, ebx always includes AMX
315 states. */
316 unsigned int xsave_state_full_size
317 = ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
318
319 cpu_features->xsave_state_size
320 = xsave_state_full_size;
321 cpu_features->xsave_state_full_size
322 = xsave_state_full_size;
323 _dl_x86_features_tlsdesc_state_size = xsave_state_full_size;
324
325 /* Check if XSAVEC is available. */
326 if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
327 {
328 unsigned int xstate_comp_offsets[X86_XSTATE_MAX_ID + 1];
329 unsigned int xstate_comp_sizes[X86_XSTATE_MAX_ID + 1];
330 unsigned int i;
331
332 xstate_comp_offsets[0] = 0;
333 xstate_comp_offsets[1] = 160;
334 xstate_comp_offsets[2] = 576;
335 xstate_comp_sizes[0] = 160;
336 xstate_comp_sizes[1] = 256;
337
338 for (i = 2; i <= X86_XSTATE_MAX_ID; i++)
339 {
340 if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
341 {
342 __cpuid_count (0xd, i, eax, ebx, ecx, edx);
343 xstate_comp_sizes[i] = eax;
344 }
345 else
346 {
347 ecx = 0;
348 xstate_comp_sizes[i] = 0;
349 }
350
351 if (i > 2)
352 {
353 xstate_comp_offsets[i]
354 = (xstate_comp_offsets[i - 1]
355 + xstate_comp_sizes[i - 1]);
356 if ((ecx & (1 << 1)) != 0)
357 xstate_comp_offsets[i]
358 = ALIGN_UP (xstate_comp_offsets[i], 64);
359 }
360 }
361
362 /* Use XSAVEC. */
363 unsigned int size
364 = (xstate_comp_offsets[X86_XSTATE_MAX_ID]
365 + xstate_comp_sizes[X86_XSTATE_MAX_ID]);
366 if (size)
367 {
368 size = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
369 64);
370#ifdef __x86_64__
371 _dl_x86_features_tlsdesc_state_size = size;
372 /* Exclude the AMX space from the start of TILECFG
373 space to the end of TILEDATA space. If CPU
374 doesn't support AMX, TILECFG offset is the same
375 as TILEDATA + 1 offset. Otherwise, they are
376 multiples of 64. */
377 size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
378 - xstate_comp_offsets[X86_XSTATE_TILECFG_ID]);
379#endif
380 cpu_features->xsave_state_size = size;
381 CPU_FEATURE_SET (cpu_features, XSAVEC);
382 }
383 }
384 }
385 }
386 }
387
388 /* Determine if PKU is usable. */
389 if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
390 CPU_FEATURE_SET (cpu_features, PKU);
391
392 /* Determine if Key Locker instructions are usable. */
393 if (CPU_FEATURES_CPU_P (cpu_features, AESKLE))
394 {
395 CPU_FEATURE_SET (cpu_features, AESKLE);
396 CPU_FEATURE_SET_ACTIVE (cpu_features, KL);
397 CPU_FEATURE_SET_ACTIVE (cpu_features, WIDE_KL);
398 }
399
400 dl_check_hwcap2 (cpu_features);
401
402 cpu_features->isa_1 = get_isa_level (cpu_features);
403}
404
405static void
406get_extended_indices (struct cpu_features *cpu_features)
407{
408 unsigned int eax, ebx, ecx, edx;
409 __cpuid (0x80000000, eax, ebx, ecx, edx);
410 if (eax >= 0x80000001)
411 __cpuid (0x80000001,
412 cpu_features->features[CPUID_INDEX_80000001].cpuid.eax,
413 cpu_features->features[CPUID_INDEX_80000001].cpuid.ebx,
414 cpu_features->features[CPUID_INDEX_80000001].cpuid.ecx,
415 cpu_features->features[CPUID_INDEX_80000001].cpuid.edx);
416 if (eax >= 0x80000007)
417 __cpuid (0x80000007,
418 cpu_features->features[CPUID_INDEX_80000007].cpuid.eax,
419 cpu_features->features[CPUID_INDEX_80000007].cpuid.ebx,
420 cpu_features->features[CPUID_INDEX_80000007].cpuid.ecx,
421 cpu_features->features[CPUID_INDEX_80000007].cpuid.edx);
422 if (eax >= 0x80000008)
423 __cpuid (0x80000008,
424 cpu_features->features[CPUID_INDEX_80000008].cpuid.eax,
425 cpu_features->features[CPUID_INDEX_80000008].cpuid.ebx,
426 cpu_features->features[CPUID_INDEX_80000008].cpuid.ecx,
427 cpu_features->features[CPUID_INDEX_80000008].cpuid.edx);
428}
429
430static void
431get_common_indices (struct cpu_features *cpu_features,
432 unsigned int *family, unsigned int *model,
433 unsigned int *extended_model, unsigned int *stepping)
434{
435 if (family)
436 {
437 unsigned int eax;
438 __cpuid (1, eax,
439 cpu_features->features[CPUID_INDEX_1].cpuid.ebx,
440 cpu_features->features[CPUID_INDEX_1].cpuid.ecx,
441 cpu_features->features[CPUID_INDEX_1].cpuid.edx);
442 cpu_features->features[CPUID_INDEX_1].cpuid.eax = eax;
443 *family = (eax >> 8) & 0x0f;
444 *model = (eax >> 4) & 0x0f;
445 *extended_model = (eax >> 12) & 0xf0;
446 *stepping = eax & 0x0f;
447 if (*family == 0x0f)
448 {
449 *family += (eax >> 20) & 0xff;
450 *model += *extended_model;
451 }
452 }
453
454 if (cpu_features->basic.max_cpuid >= 7)
455 {
456 __cpuid_count (7, 0,
457 cpu_features->features[CPUID_INDEX_7].cpuid.eax,
458 cpu_features->features[CPUID_INDEX_7].cpuid.ebx,
459 cpu_features->features[CPUID_INDEX_7].cpuid.ecx,
460 cpu_features->features[CPUID_INDEX_7].cpuid.edx);
461 __cpuid_count (7, 1,
462 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.eax,
463 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ebx,
464 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ecx,
465 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.edx);
466 }
467
468 if (cpu_features->basic.max_cpuid >= 0xd)
469 __cpuid_count (0xd, 1,
470 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.eax,
471 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ebx,
472 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ecx,
473 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.edx);
474
475 if (cpu_features->basic.max_cpuid >= 0x14)
476 __cpuid_count (0x14, 0,
477 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.eax,
478 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ebx,
479 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ecx,
480 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.edx);
481
482 if (cpu_features->basic.max_cpuid >= 0x19)
483 __cpuid_count (0x19, 0,
484 cpu_features->features[CPUID_INDEX_19].cpuid.eax,
485 cpu_features->features[CPUID_INDEX_19].cpuid.ebx,
486 cpu_features->features[CPUID_INDEX_19].cpuid.ecx,
487 cpu_features->features[CPUID_INDEX_19].cpuid.edx);
488
489 dl_check_minsigstacksize (cpu_features);
490}
491
492_Static_assert (((index_arch_Fast_Unaligned_Load
493 == index_arch_Fast_Unaligned_Copy)
494 && (index_arch_Fast_Unaligned_Load
495 == index_arch_Prefer_PMINUB_for_stringop)
496 && (index_arch_Fast_Unaligned_Load
497 == index_arch_Slow_SSE4_2)
498 && (index_arch_Fast_Unaligned_Load
499 == index_arch_Fast_Rep_String)
500 && (index_arch_Fast_Unaligned_Load
501 == index_arch_Fast_Copy_Backward)),
502 "Incorrect index_arch_Fast_Unaligned_Load");
503
504
505/* Intel microarch list. */
506enum intel_microarch
507{
508 /* Atom processors. */
509 INTEL_ATOM_BONNELL,
510 INTEL_ATOM_SILVERMONT,
511 INTEL_ATOM_AIRMONT,
512 INTEL_ATOM_GOLDMONT,
513 INTEL_ATOM_GOLDMONT_PLUS,
514 INTEL_ATOM_SIERRAFOREST,
515 INTEL_ATOM_CLEARWATERFOREST,
516 INTEL_ATOM_GRANDRIDGE,
517 INTEL_ATOM_TREMONT,
518
519 /* Bigcore processors. */
520 INTEL_BIGCORE_MEROM,
521 INTEL_BIGCORE_PENRYN,
522 INTEL_BIGCORE_DUNNINGTON,
523 INTEL_BIGCORE_NEHALEM,
524 INTEL_BIGCORE_WESTMERE,
525 INTEL_BIGCORE_SANDYBRIDGE,
526 INTEL_BIGCORE_IVYBRIDGE,
527 INTEL_BIGCORE_HASWELL,
528 INTEL_BIGCORE_BROADWELL,
529 INTEL_BIGCORE_SKYLAKE,
530 INTEL_BIGCORE_KABYLAKE,
531 INTEL_BIGCORE_COMETLAKE,
532 INTEL_BIGCORE_SKYLAKE_AVX512,
533 INTEL_BIGCORE_CANNONLAKE,
534 INTEL_BIGCORE_ICELAKE,
535 INTEL_BIGCORE_TIGERLAKE,
536 INTEL_BIGCORE_ROCKETLAKE,
537 INTEL_BIGCORE_SAPPHIRERAPIDS,
538 INTEL_BIGCORE_RAPTORLAKE,
539 INTEL_BIGCORE_EMERALDRAPIDS,
540 INTEL_BIGCORE_METEORLAKE,
541 INTEL_BIGCORE_LUNARLAKE,
542 INTEL_BIGCORE_ARROWLAKE,
543 INTEL_BIGCORE_PANTHERLAKE,
544 INTEL_BIGCORE_GRANITERAPIDS,
545 INTEL_BIGCORE_DIAMONDRAPIDS,
546
547 /* Mixed (bigcore + atom SOC). */
548 INTEL_MIXED_LAKEFIELD,
549 INTEL_MIXED_ALDERLAKE,
550
551 /* KNL. */
552 INTEL_KNIGHTS_MILL,
553 INTEL_KNIGHTS_LANDING,
554
555 /* Unknown. */
556 INTEL_UNKNOWN,
557};
558
559static enum intel_microarch
560intel_get_fam6_microarch (unsigned int model,
561 __attribute__ ((unused)) unsigned int stepping)
562{
563 switch (model)
564 {
565 case 0x1C:
566 case 0x26:
567 return INTEL_ATOM_BONNELL;
568 case 0x27:
569 case 0x35:
570 case 0x36:
571 /* Really Saltwell, but Saltwell is just a die shrink of Bonnell
572 (microarchitecturally identical). */
573 return INTEL_ATOM_BONNELL;
574 case 0x37:
575 case 0x4A:
576 case 0x4D:
577 case 0x5D:
578 return INTEL_ATOM_SILVERMONT;
579 case 0x4C:
580 case 0x5A:
581 case 0x75:
582 return INTEL_ATOM_AIRMONT;
583 case 0x5C:
584 case 0x5F:
585 return INTEL_ATOM_GOLDMONT;
586 case 0x7A:
587 return INTEL_ATOM_GOLDMONT_PLUS;
588 case 0xAF:
589 return INTEL_ATOM_SIERRAFOREST;
590 case 0xDD:
591 return INTEL_ATOM_CLEARWATERFOREST;
592 case 0xB6:
593 return INTEL_ATOM_GRANDRIDGE;
594 case 0x86:
595 case 0x96:
596 case 0x9C:
597 return INTEL_ATOM_TREMONT;
598 case 0x0F:
599 case 0x16:
600 return INTEL_BIGCORE_MEROM;
601 case 0x17:
602 return INTEL_BIGCORE_PENRYN;
603 case 0x1D:
604 return INTEL_BIGCORE_DUNNINGTON;
605 case 0x1A:
606 case 0x1E:
607 case 0x1F:
608 case 0x2E:
609 return INTEL_BIGCORE_NEHALEM;
610 case 0x25:
611 case 0x2C:
612 case 0x2F:
613 return INTEL_BIGCORE_WESTMERE;
614 case 0x2A:
615 case 0x2D:
616 return INTEL_BIGCORE_SANDYBRIDGE;
617 case 0x3A:
618 case 0x3E:
619 return INTEL_BIGCORE_IVYBRIDGE;
620 case 0x3C:
621 case 0x3F:
622 case 0x45:
623 case 0x46:
624 return INTEL_BIGCORE_HASWELL;
625 case 0x3D:
626 case 0x47:
627 case 0x4F:
628 case 0x56:
629 return INTEL_BIGCORE_BROADWELL;
630 case 0x4E:
631 case 0x5E:
632 return INTEL_BIGCORE_SKYLAKE;
633 case 0x8E:
634 /*
635 Stepping = {9}
636 -> Amberlake
637 Stepping = {10}
638 -> Coffeelake
639 Stepping = {11, 12}
640 -> Whiskeylake
641 else
642 -> Kabylake
643
644 All of these are derivatives of Kabylake (Skylake client).
645 */
646 return INTEL_BIGCORE_KABYLAKE;
647 case 0x9E:
648 /*
649 Stepping = {10, 11, 12, 13}
650 -> Coffeelake
651 else
652 -> Kabylake
653
654 Coffeelake is a derivatives of Kabylake (Skylake client).
655 */
656 return INTEL_BIGCORE_KABYLAKE;
657 case 0xA5:
658 case 0xA6:
659 return INTEL_BIGCORE_COMETLAKE;
660 case 0x66:
661 return INTEL_BIGCORE_CANNONLAKE;
662 case 0x55:
663 /*
664 Stepping = {6, 7}
665 -> Cascadelake
666 Stepping = {11}
667 -> Cooperlake
668 else
669 -> Skylake-avx512
670
671 These are all microarchitecturally identical, so use
672 Skylake-avx512 for all of them.
673 */
674 return INTEL_BIGCORE_SKYLAKE_AVX512;
675 case 0x6A:
676 case 0x6C:
677 case 0x7D:
678 case 0x7E:
679 case 0x9D:
680 return INTEL_BIGCORE_ICELAKE;
681 case 0x8C:
682 case 0x8D:
683 return INTEL_BIGCORE_TIGERLAKE;
684 case 0xA7:
685 return INTEL_BIGCORE_ROCKETLAKE;
686 case 0x8F:
687 return INTEL_BIGCORE_SAPPHIRERAPIDS;
688 case 0xB7:
689 case 0xBA:
690 case 0xBF:
691 return INTEL_BIGCORE_RAPTORLAKE;
692 case 0xCF:
693 return INTEL_BIGCORE_EMERALDRAPIDS;
694 case 0xAA:
695 case 0xAC:
696 return INTEL_BIGCORE_METEORLAKE;
697 case 0xbd:
698 return INTEL_BIGCORE_LUNARLAKE;
699 case 0xb5:
700 case 0xc5:
701 case 0xc6:
702 return INTEL_BIGCORE_ARROWLAKE;
703 case 0xCC:
704 return INTEL_BIGCORE_PANTHERLAKE;
705 case 0xAD:
706 case 0xAE:
707 return INTEL_BIGCORE_GRANITERAPIDS;
708 case 0x8A:
709 return INTEL_MIXED_LAKEFIELD;
710 case 0x97:
711 case 0x9A:
712 case 0xBE:
713 return INTEL_MIXED_ALDERLAKE;
714 case 0x85:
715 return INTEL_KNIGHTS_MILL;
716 case 0x57:
717 return INTEL_KNIGHTS_LANDING;
718 default:
719 return INTEL_UNKNOWN;
720 }
721}
722
723static inline void
724init_cpu_features (struct cpu_features *cpu_features)
725{
726 unsigned int ebx, ecx, edx;
727 unsigned int family = 0;
728 unsigned int model = 0;
729 unsigned int stepping = 0;
730 enum cpu_features_kind kind;
731
732 /* Default is avoid non-temporal memset for non Intel/AMD hardware. This is,
733 as of writing this, we only have benchmarks indicatings it profitability
734 on Intel/AMD. */
735 cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
736 |= bit_arch_Avoid_Non_Temporal_Memset;
737
738 cpu_features->cachesize_non_temporal_divisor = 4;
739#if !HAS_CPUID
740 if (__get_cpuid_max (0, 0) == 0)
741 {
742 kind = arch_kind_other;
743 goto no_cpuid;
744 }
745#endif
746
747 __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
748
749 /* This spells out "GenuineIntel". */
750 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
751 {
752 unsigned int extended_model;
753
754 kind = arch_kind_intel;
755
756 get_common_indices (cpu_features, family: &family, model: &model, extended_model: &extended_model,
757 stepping: &stepping);
758
759 get_extended_indices (cpu_features);
760
761 update_active (cpu_features);
762
763 /* Benchmarks indicate non-temporal memset can be profitable on Intel
764 hardware. */
765 cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
766 &= ~bit_arch_Avoid_Non_Temporal_Memset;
767
768 enum intel_microarch microarch = INTEL_UNKNOWN;
769 if (family == 0x06)
770 {
771 model += extended_model;
772 microarch = intel_get_fam6_microarch (model, stepping);
773
774 /* Disable TSX on some processors to avoid TSX on kernels that
775 weren't updated with the latest microcode package (which
776 disables broken feature by default). */
777 switch (microarch)
778 {
779 default:
780 break;
781
782 case INTEL_BIGCORE_SKYLAKE_AVX512:
783 /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */
784 if (stepping <= 5)
785 goto disable_tsx;
786 break;
787
788 case INTEL_BIGCORE_KABYLAKE:
789 /* NB: Although the errata documents that for model == 0x8e
790 (kabylake skylake client), only 0xb stepping or lower are
791 impacted, the intention of the errata was to disable TSX on
792 all client processors on all steppings. Include 0xc
793 stepping which is an Intel Core i7-8665U, a client mobile
794 processor. */
795 if (stepping > 0xc)
796 break;
797 /* Fall through. */
798 case INTEL_BIGCORE_SKYLAKE:
799 /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
800 processors listed in:
801
802 https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
803 */
804disable_tsx:
805 CPU_FEATURE_UNSET (cpu_features, HLE);
806 CPU_FEATURE_UNSET (cpu_features, RTM);
807 CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
808 break;
809
810 case INTEL_BIGCORE_HASWELL:
811 /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working
812 TSX. Haswell also includes other model numbers that have
813 working TSX. */
814 if (model == 0x3f && stepping >= 4)
815 break;
816
817 CPU_FEATURE_UNSET (cpu_features, RTM);
818 break;
819 }
820 }
821 else if (family == 19)
822 switch (model)
823 {
824 case 0x01:
825 microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
826 break;
827
828 default:
829 break;
830 }
831
832 switch (microarch)
833 {
834 /* Atom / KNL tuning. */
835 case INTEL_ATOM_BONNELL:
836 /* BSF is slow on Bonnell. */
837 cpu_features->preferred[index_arch_Slow_BSF]
838 |= bit_arch_Slow_BSF;
839 break;
840
841 /* Unaligned load versions are faster than SSSE3
842 on Airmont, Silvermont, Goldmont, and Goldmont Plus. */
843 case INTEL_ATOM_AIRMONT:
844 case INTEL_ATOM_SILVERMONT:
845 case INTEL_ATOM_GOLDMONT:
846 case INTEL_ATOM_GOLDMONT_PLUS:
847
848 /* Knights Landing. Enable Silvermont optimizations. */
849 case INTEL_KNIGHTS_LANDING:
850
851 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
852 |= (bit_arch_Fast_Unaligned_Load
853 | bit_arch_Fast_Unaligned_Copy
854 | bit_arch_Prefer_PMINUB_for_stringop
855 | bit_arch_Slow_SSE4_2);
856 break;
857
858 case INTEL_ATOM_TREMONT:
859 /* Enable rep string instructions, unaligned load, unaligned
860 copy, pminub and avoid SSE 4.2 on Tremont. */
861 cpu_features->preferred[index_arch_Fast_Rep_String]
862 |= (bit_arch_Fast_Rep_String
863 | bit_arch_Fast_Unaligned_Load
864 | bit_arch_Fast_Unaligned_Copy
865 | bit_arch_Prefer_PMINUB_for_stringop
866 | bit_arch_Slow_SSE4_2);
867 break;
868
869 /*
870 Default tuned Knights microarch.
871 case INTEL_KNIGHTS_MILL:
872 */
873
874 /*
875 Default tuned atom microarch.
876 case INTEL_ATOM_SIERRAFOREST:
877 case INTEL_ATOM_GRANDRIDGE:
878 case INTEL_ATOM_CLEARWATERFOREST:
879 */
880
881 /* Bigcore/Default Tuning. */
882 default:
883 default_tuning:
884 /* Unknown Intel processors. Assuming this is one of Core
885 i3/i5/i7 processors if AVX is available. */
886 if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
887 break;
888
889 enable_modern_features:
890 /* Rep string instructions, unaligned load, unaligned copy,
891 and pminub are fast on Intel Core i3, i5 and i7. */
892 cpu_features->preferred[index_arch_Fast_Rep_String]
893 |= (bit_arch_Fast_Rep_String
894 | bit_arch_Fast_Unaligned_Load
895 | bit_arch_Fast_Unaligned_Copy
896 | bit_arch_Prefer_PMINUB_for_stringop);
897 break;
898
899 case INTEL_BIGCORE_NEHALEM:
900 case INTEL_BIGCORE_WESTMERE:
901 /* Older CPUs prefer non-temporal stores at lower threshold. */
902 cpu_features->cachesize_non_temporal_divisor = 8;
903 goto enable_modern_features;
904
905 /* Older Bigcore microarch (smaller non-temporal store
906 threshold). */
907 case INTEL_BIGCORE_SANDYBRIDGE:
908 case INTEL_BIGCORE_IVYBRIDGE:
909 case INTEL_BIGCORE_HASWELL:
910 case INTEL_BIGCORE_BROADWELL:
911 cpu_features->cachesize_non_temporal_divisor = 8;
912 goto default_tuning;
913
914 /* Newer Bigcore microarch (larger non-temporal store
915 threshold). */
916 case INTEL_BIGCORE_SKYLAKE_AVX512:
917 case INTEL_BIGCORE_CANNONLAKE:
918 /* Benchmarks indicate non-temporal memset is not
919 necessarily profitable on SKX (and in some cases much
920 worse). This is likely unique to SKX due to its unique
921 mesh interconnect (not present on ICX or BWD). Disable
922 non-temporal on all Skylake servers. */
923 cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
924 |= bit_arch_Avoid_Non_Temporal_Memset;
925 /* fallthrough */
926 case INTEL_BIGCORE_COMETLAKE:
927 case INTEL_BIGCORE_SKYLAKE:
928 case INTEL_BIGCORE_KABYLAKE:
929 case INTEL_BIGCORE_ICELAKE:
930 case INTEL_BIGCORE_TIGERLAKE:
931 case INTEL_BIGCORE_ROCKETLAKE:
932 case INTEL_BIGCORE_RAPTORLAKE:
933 case INTEL_BIGCORE_METEORLAKE:
934 case INTEL_BIGCORE_LUNARLAKE:
935 case INTEL_BIGCORE_ARROWLAKE:
936 case INTEL_BIGCORE_PANTHERLAKE:
937 case INTEL_BIGCORE_SAPPHIRERAPIDS:
938 case INTEL_BIGCORE_EMERALDRAPIDS:
939 case INTEL_BIGCORE_GRANITERAPIDS:
940 case INTEL_BIGCORE_DIAMONDRAPIDS:
941 /* Default tuned Mixed (bigcore + atom SOC). */
942 case INTEL_MIXED_LAKEFIELD:
943 case INTEL_MIXED_ALDERLAKE:
944 cpu_features->cachesize_non_temporal_divisor = 2;
945 goto default_tuning;
946 }
947
948 /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
949 if AVX512ER is available. Don't use AVX512 to avoid lower CPU
950 frequency if AVX512ER isn't available. */
951 if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
952 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
953 |= bit_arch_Prefer_No_VZEROUPPER;
954 else
955 {
956 /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency
957 when ZMM load and store instructions are used. */
958 if (!CPU_FEATURES_CPU_P (cpu_features, AVX_VNNI))
959 cpu_features->preferred[index_arch_Prefer_No_AVX512]
960 |= bit_arch_Prefer_No_AVX512;
961
962 /* Avoid RTM abort triggered by VZEROUPPER inside a
963 transactionally executing RTM region. */
964 if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
965 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
966 |= bit_arch_Prefer_No_VZEROUPPER;
967 }
968
969 /* Avoid avoid short distance REP MOVSB on processor with FSRM. */
970 if (CPU_FEATURES_CPU_P (cpu_features, FSRM))
971 cpu_features->preferred[index_arch_Avoid_Short_Distance_REP_MOVSB]
972 |= bit_arch_Avoid_Short_Distance_REP_MOVSB;
973 }
974 /* This spells out "AuthenticAMD" or "HygonGenuine". */
975 else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
976 || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
977 {
978 unsigned int extended_model;
979
980 kind = arch_kind_amd;
981
982 get_common_indices (cpu_features, family: &family, model: &model, extended_model: &extended_model,
983 stepping: &stepping);
984
985 get_extended_indices (cpu_features);
986
987 update_active (cpu_features);
988
989 ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx;
990
991 /* Benchmarks indicate non-temporal memset can be profitable on AMD
992 hardware. */
993 cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
994 &= ~bit_arch_Avoid_Non_Temporal_Memset;
995
996 if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
997 {
998 /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
999 FMA4 requires AVX, determine if FMA4 is usable here. */
1000 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA4);
1001 }
1002
1003 if (family == 0x15)
1004 {
1005 /* "Excavator" */
1006 if (model >= 0x60 && model <= 0x7f)
1007 {
1008 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
1009 |= (bit_arch_Fast_Unaligned_Load
1010 | bit_arch_Fast_Copy_Backward);
1011
1012 /* Unaligned AVX loads are slower.*/
1013 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
1014 &= ~bit_arch_AVX_Fast_Unaligned_Load;
1015 }
1016 }
1017 }
1018 /* This spells out "CentaurHauls" or " Shanghai ". */
1019 else if ((ebx == 0x746e6543 && ecx == 0x736c7561 && edx == 0x48727561)
1020 || (ebx == 0x68532020 && ecx == 0x20206961 && edx == 0x68676e61))
1021 {
1022 unsigned int extended_model, stepping;
1023
1024 kind = arch_kind_zhaoxin;
1025
1026 get_common_indices (cpu_features, family: &family, model: &model, extended_model: &extended_model,
1027 stepping: &stepping);
1028
1029 get_extended_indices (cpu_features);
1030
1031 update_active (cpu_features);
1032
1033 model += extended_model;
1034 if (family == 0x6)
1035 {
1036 if (model == 0xf || model == 0x19)
1037 {
1038 CPU_FEATURE_UNSET (cpu_features, AVX);
1039 CPU_FEATURE_UNSET (cpu_features, AVX2);
1040
1041 cpu_features->preferred[index_arch_Slow_SSE4_2]
1042 |= bit_arch_Slow_SSE4_2;
1043
1044 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
1045 &= ~bit_arch_AVX_Fast_Unaligned_Load;
1046 }
1047 }
1048 else if (family == 0x7)
1049 {
1050 if (model == 0x1b)
1051 {
1052 CPU_FEATURE_UNSET (cpu_features, AVX);
1053 CPU_FEATURE_UNSET (cpu_features, AVX2);
1054
1055 cpu_features->preferred[index_arch_Slow_SSE4_2]
1056 |= bit_arch_Slow_SSE4_2;
1057
1058 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
1059 &= ~bit_arch_AVX_Fast_Unaligned_Load;
1060 }
1061 else if (model == 0x3b)
1062 {
1063 CPU_FEATURE_UNSET (cpu_features, AVX);
1064 CPU_FEATURE_UNSET (cpu_features, AVX2);
1065
1066 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
1067 &= ~bit_arch_AVX_Fast_Unaligned_Load;
1068 }
1069 }
1070 }
1071 else
1072 {
1073 kind = arch_kind_other;
1074 get_common_indices (cpu_features, NULL, NULL, NULL, NULL);
1075 update_active (cpu_features);
1076 }
1077
1078 /* Support i586 if CX8 is available. */
1079 if (CPU_FEATURES_CPU_P (cpu_features, CX8))
1080 cpu_features->preferred[index_arch_I586] |= bit_arch_I586;
1081
1082 /* Support i686 if CMOV is available. */
1083 if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
1084 cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
1085
1086#if !HAS_CPUID
1087no_cpuid:
1088#endif
1089
1090 cpu_features->basic.kind = kind;
1091 cpu_features->basic.family = family;
1092 cpu_features->basic.model = model;
1093 cpu_features->basic.stepping = stepping;
1094
1095 dl_init_cacheinfo (cpu_features);
1096
1097 TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
1098
1099#ifdef __LP64__
1100 TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
1101 TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
1102#endif
1103
1104 /* Do not add the logic to disable XSAVE/XSAVEC if this glibc build
1105 requires AVX and therefore XSAVE or XSAVEC support. */
1106#ifndef GCCMACRO__AVX__
1107 bool disable_xsave_features = false;
1108
1109 if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))
1110 {
1111 /* These features are usable only if OSXSAVE is usable. */
1112 CPU_FEATURE_UNSET (cpu_features, XSAVE);
1113 CPU_FEATURE_UNSET (cpu_features, XSAVEOPT);
1114 CPU_FEATURE_UNSET (cpu_features, XSAVEC);
1115 CPU_FEATURE_UNSET (cpu_features, XGETBV_ECX_1);
1116 CPU_FEATURE_UNSET (cpu_features, XFD);
1117
1118 disable_xsave_features = true;
1119 }
1120
1121 if (disable_xsave_features
1122 || (!CPU_FEATURE_USABLE_P (cpu_features, XSAVE)
1123 && !CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)))
1124 {
1125 /* Clear xsave_state_size if both XSAVE and XSAVEC aren't usable. */
1126 cpu_features->xsave_state_size = 0;
1127
1128 CPU_FEATURE_UNSET (cpu_features, AVX);
1129 CPU_FEATURE_UNSET (cpu_features, AVX2);
1130 CPU_FEATURE_UNSET (cpu_features, AVX_VNNI);
1131 CPU_FEATURE_UNSET (cpu_features, FMA);
1132 CPU_FEATURE_UNSET (cpu_features, VAES);
1133 CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ);
1134 CPU_FEATURE_UNSET (cpu_features, XOP);
1135 CPU_FEATURE_UNSET (cpu_features, F16C);
1136 CPU_FEATURE_UNSET (cpu_features, AVX512F);
1137 CPU_FEATURE_UNSET (cpu_features, AVX512CD);
1138 CPU_FEATURE_UNSET (cpu_features, AVX512ER);
1139 CPU_FEATURE_UNSET (cpu_features, AVX512PF);
1140 CPU_FEATURE_UNSET (cpu_features, AVX512VL);
1141 CPU_FEATURE_UNSET (cpu_features, AVX512DQ);
1142 CPU_FEATURE_UNSET (cpu_features, AVX512BW);
1143 CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS);
1144 CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW);
1145 CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG);
1146 CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA);
1147 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI);
1148 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2);
1149 CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI);
1150 CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ);
1151 CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT);
1152 CPU_FEATURE_UNSET (cpu_features, AVX512_BF16);
1153 CPU_FEATURE_UNSET (cpu_features, AVX512_FP16);
1154 CPU_FEATURE_UNSET (cpu_features, AMX_BF16);
1155 CPU_FEATURE_UNSET (cpu_features, AMX_TILE);
1156 CPU_FEATURE_UNSET (cpu_features, AMX_INT8);
1157
1158 CPU_FEATURE_UNSET (cpu_features, FMA4);
1159 }
1160#endif
1161
1162#ifdef __x86_64__
1163 GLRO(dl_hwcap) = HWCAP_X86_64;
1164 if (cpu_features->basic.kind == arch_kind_intel)
1165 {
1166 const char *platform = NULL;
1167
1168 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD))
1169 {
1170 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER))
1171 {
1172 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF))
1173 platform = "xeon_phi";
1174 }
1175 else
1176 {
1177 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
1178 && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
1179 && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
1180 GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
1181 }
1182 }
1183
1184 if (platform == NULL
1185 && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
1186 && CPU_FEATURE_USABLE_P (cpu_features, FMA)
1187 && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
1188 && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
1189 && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
1190 && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
1191 && CPU_FEATURE_USABLE_P (cpu_features, POPCNT))
1192 platform = "haswell";
1193
1194 if (platform != NULL)
1195 GLRO(dl_platform) = platform;
1196 }
1197#else
1198 GLRO(dl_hwcap) = 0;
1199 if (CPU_FEATURE_USABLE_P (cpu_features, SSE2))
1200 GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
1201
1202 if (CPU_FEATURES_ARCH_P (cpu_features, I686))
1203 GLRO(dl_platform) = "i686";
1204 else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
1205 GLRO(dl_platform) = "i586";
1206#endif
1207
1208#if CET_ENABLED
1209 TUNABLE_GET (x86_ibt, tunable_val_t *,
1210 TUNABLE_CALLBACK (set_x86_ibt));
1211 TUNABLE_GET (x86_shstk, tunable_val_t *,
1212 TUNABLE_CALLBACK (set_x86_shstk));
1213#endif
1214
1215 if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
1216 || (GLRO(dl_x86_cpu_features).xsave_state_size != 0))
1217 {
1218 if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
1219 {
1220#ifdef __x86_64__
1221 GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsavec;
1222#endif
1223#ifdef SHARED
1224 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsavec;
1225#endif
1226 }
1227 else
1228 {
1229#ifdef __x86_64__
1230 GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsave;
1231#endif
1232#ifdef SHARED
1233 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsave;
1234#endif
1235 }
1236 }
1237 else
1238 {
1239#ifdef __x86_64__
1240 GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
1241# ifdef SHARED
1242 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
1243# endif
1244#else
1245# ifdef SHARED
1246 if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
1247 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
1248 else
1249 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
1250# endif
1251#endif
1252 }
1253
1254#ifdef SHARED
1255# ifdef __x86_64__
1256 TUNABLE_GET (plt_rewrite, tunable_val_t *,
1257 TUNABLE_CALLBACK (set_plt_rewrite));
1258# endif
1259#else
1260 /* NB: In libc.a, call init_cacheinfo. */
1261 init_cacheinfo ();
1262#endif
1263}
1264

source code of glibc/sysdeps/x86/cpu-features.c