1 | /* x86 CPU feature tuning. |
2 | This file is part of the GNU C Library. |
3 | Copyright (C) 2017-2024 Free Software Foundation, Inc. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #define TUNABLE_NAMESPACE cpu |
20 | #include <stdbool.h> |
21 | #include <stdint.h> |
22 | #include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */ |
23 | #include <elf/dl-tunables.h> |
24 | #include <string.h> |
25 | #include <cpu-features.h> |
26 | #include <ldsodefs.h> |
27 | #include <dl-tunables-parse.h> |
28 | #include <dl-symbol-redir-ifunc.h> |
29 | |
30 | #define CHECK_GLIBC_IFUNC_CPU_OFF(f, cpu_features, name, len) \ |
31 | _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ |
32 | if (tunable_str_comma_strcmp_cte (&f, #name)) \ |
33 | { \ |
34 | CPU_FEATURE_UNSET (cpu_features, name) \ |
35 | break; \ |
36 | } |
37 | |
38 | #define CHECK_GLIBC_IFUNC_CPU_BOTH(f, cpu_features, name, len) \ |
39 | _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ |
40 | if (tunable_str_comma_strcmp_cte (&f, #name)) \ |
41 | { \ |
42 | if (f.disable) \ |
43 | CPU_FEATURE_UNSET (cpu_features, name) \ |
44 | else \ |
45 | CPU_FEATURE_SET_ACTIVE (cpu_features, name) \ |
46 | break; \ |
47 | } |
48 | |
49 | /* Disable a preferred feature NAME. We don't enable a preferred feature |
50 | which isn't available. */ |
51 | #define CHECK_GLIBC_IFUNC_PREFERRED_OFF(f, cpu_features, name, len) \ |
52 | _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ |
53 | if (tunable_str_comma_strcmp_cte (&f, #name)) \ |
54 | { \ |
55 | cpu_features->preferred[index_arch_##name] \ |
56 | &= ~bit_arch_##name; \ |
57 | break; \ |
58 | } |
59 | |
60 | /* Enable/disable a preferred feature NAME. */ |
61 | #define CHECK_GLIBC_IFUNC_PREFERRED_BOTH(f, cpu_features, name, len) \ |
62 | _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ |
63 | if (tunable_str_comma_strcmp_cte (&f, #name)) \ |
64 | { \ |
65 | if (f.disable) \ |
66 | cpu_features->preferred[index_arch_##name] &= ~bit_arch_##name; \ |
67 | else \ |
68 | cpu_features->preferred[index_arch_##name] |= bit_arch_##name; \ |
69 | break; \ |
70 | } |
71 | |
72 | /* Enable/disable a preferred feature NAME. Enable a preferred feature |
73 | only if the feature NEED is usable. */ |
74 | #define CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH(f, cpu_features, name, \ |
75 | need, len) \ |
76 | _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ |
77 | if (tunable_str_comma_strcmp_cte (&f, #name)) \ |
78 | { \ |
79 | if (f.disable) \ |
80 | cpu_features->preferred[index_arch_##name] &= ~bit_arch_##name; \ |
81 | else if (CPU_FEATURE_USABLE_P (cpu_features, need)) \ |
82 | cpu_features->preferred[index_arch_##name] |= bit_arch_##name; \ |
83 | break; \ |
84 | } |
85 | |
86 | attribute_hidden |
87 | void |
88 | TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) |
89 | { |
90 | /* The current IFUNC selection is based on microbenchmarks in glibc. |
91 | It should give the best performance for most workloads. But other |
92 | choices may have better performance for a particular workload or on |
93 | the hardware which wasn't available when the selection was made. |
94 | The environment variable: |
95 | |
96 | GLIBC_TUNABLES=glibc.cpu.hwcaps=-xxx,yyy,-zzz,.... |
97 | |
98 | can be used to enable CPU/ARCH feature yyy, disable CPU/ARCH feature |
99 | yyy and zzz, where the feature name is case-sensitive and has to |
100 | match the ones in cpu-features.h. It can be used by glibc developers |
101 | to tune for a new processor or override the IFUNC selection to |
102 | improve performance for a particular workload. |
103 | |
104 | NOTE: the IFUNC selection may change over time. Please check all |
105 | multiarch implementations when experimenting. */ |
106 | |
107 | struct cpu_features *cpu_features = &GLRO(dl_x86_cpu_features); |
108 | |
109 | struct tunable_str_comma_state_t ts; |
110 | tunable_str_comma_init (state: &ts, valp); |
111 | |
112 | struct tunable_str_comma_t n; |
113 | while (tunable_str_comma_next (state: &ts, str: &n)) |
114 | { |
115 | switch (n.len) |
116 | { |
117 | default: |
118 | break; |
119 | case 3: |
120 | if (n.disable) |
121 | { |
122 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX, 3); |
123 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, CX8, 3); |
124 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, FMA, 3); |
125 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, HTT, 3); |
126 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, IBT, 3); |
127 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, RTM, 3); |
128 | } |
129 | break; |
130 | case 4: |
131 | if (n.disable) |
132 | { |
133 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX2, 4); |
134 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, BMI1, 4); |
135 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, BMI2, 4); |
136 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, CMOV, 4); |
137 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, ERMS, 4); |
138 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, FMA4, 4); |
139 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSE2, 4); |
140 | CHECK_GLIBC_IFUNC_PREFERRED_OFF (n, cpu_features, I586, 4); |
141 | CHECK_GLIBC_IFUNC_PREFERRED_OFF (n, cpu_features, I686, 4); |
142 | } |
143 | break; |
144 | case 5: |
145 | { |
146 | CHECK_GLIBC_IFUNC_CPU_BOTH (n, cpu_features, SHSTK, 5); |
147 | } |
148 | if (n.disable) |
149 | { |
150 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, LZCNT, 5); |
151 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, MOVBE, 5); |
152 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSSE3, 5); |
153 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, XSAVE, 5); |
154 | } |
155 | break; |
156 | case 6: |
157 | if (n.disable) |
158 | { |
159 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, POPCNT, 6); |
160 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSE4_1, 6); |
161 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSE4_2, 6); |
162 | if (memcmp (n.str, "XSAVEC" , 6) == 0) |
163 | { |
164 | /* Update xsave_state_size to XSAVE state size. */ |
165 | cpu_features->xsave_state_size |
166 | = cpu_features->xsave_state_full_size; |
167 | _dl_x86_features_tlsdesc_state_size |
168 | = cpu_features->xsave_state_full_size; |
169 | CPU_FEATURE_UNSET (cpu_features, XSAVEC); |
170 | } |
171 | } |
172 | break; |
173 | case 7: |
174 | if (n.disable) |
175 | { |
176 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512F, 7); |
177 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, OSXSAVE, 7); |
178 | } |
179 | break; |
180 | case 8: |
181 | if (n.disable) |
182 | { |
183 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512CD, 8); |
184 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512BW, 8); |
185 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512DQ, 8); |
186 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512ER, 8); |
187 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512PF, 8); |
188 | CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512VL, 8); |
189 | } |
190 | CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, Slow_BSF, 8); |
191 | break; |
192 | case 11: |
193 | { |
194 | CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, Prefer_ERMS, |
195 | 11); |
196 | CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, Prefer_FSRM, |
197 | 11); |
198 | CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH (n, cpu_features, |
199 | Slow_SSE4_2, |
200 | SSE4_2, |
201 | 11); |
202 | } |
203 | break; |
204 | case 15: |
205 | { |
206 | CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, |
207 | Fast_Rep_String, 15); |
208 | } |
209 | break; |
210 | case 16: |
211 | { |
212 | CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH |
213 | (n, cpu_features, Prefer_No_AVX512, AVX512F, 16); |
214 | } |
215 | break; |
216 | case 18: |
217 | { |
218 | CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, |
219 | Fast_Copy_Backward, 18); |
220 | } |
221 | break; |
222 | case 19: |
223 | { |
224 | CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, |
225 | Fast_Unaligned_Load, 19); |
226 | CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, |
227 | Fast_Unaligned_Copy, 19); |
228 | } |
229 | break; |
230 | case 20: |
231 | { |
232 | CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH |
233 | (n, cpu_features, Prefer_No_VZEROUPPER, AVX, 20); |
234 | } |
235 | break; |
236 | case 23: |
237 | { |
238 | CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH |
239 | (n, cpu_features, AVX_Fast_Unaligned_Load, AVX, 23); |
240 | } |
241 | break; |
242 | case 24: |
243 | { |
244 | CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH |
245 | (n, cpu_features, MathVec_Prefer_No_AVX512, AVX512F, 24); |
246 | } |
247 | break; |
248 | case 25: |
249 | { |
250 | CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, |
251 | Avoid_Non_Temporal_Memset, 25); |
252 | } |
253 | break; |
254 | case 26: |
255 | { |
256 | CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH |
257 | (n, cpu_features, Prefer_PMINUB_for_stringop, SSE2, 26); |
258 | } |
259 | break; |
260 | } |
261 | } |
262 | } |
263 | |
264 | #if CET_ENABLED |
265 | attribute_hidden |
266 | void |
267 | TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *valp) |
268 | { |
269 | if (tunable_strcmp_cte (valp, "on" )) |
270 | GL(dl_x86_feature_control).ibt = cet_always_on; |
271 | else if (tunable_strcmp_cte (valp, "off" )) |
272 | GL(dl_x86_feature_control).ibt = cet_always_off; |
273 | else if (tunable_strcmp_cte (valp, "permissive" )) |
274 | GL(dl_x86_feature_control).ibt = cet_permissive; |
275 | } |
276 | |
277 | attribute_hidden |
278 | void |
279 | TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *valp) |
280 | { |
281 | if (tunable_strcmp_cte (valp, "on" )) |
282 | GL(dl_x86_feature_control).shstk = cet_always_on; |
283 | else if (tunable_strcmp_cte (valp, "off" )) |
284 | GL(dl_x86_feature_control).shstk = cet_always_off; |
285 | else if (tunable_strcmp_cte (valp, "permissive" )) |
286 | GL(dl_x86_feature_control).shstk = cet_permissive; |
287 | } |
288 | #endif |
289 | |