1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Routines to identify caches on Intel CPU. |
4 | * |
5 | * Changes: |
6 | * Venkatesh Pallipadi : Adding cache identification through cpuid(4) |
7 | * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. |
8 | * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. |
9 | */ |
10 | |
11 | #include <linux/slab.h> |
12 | #include <linux/cacheinfo.h> |
13 | #include <linux/cpu.h> |
14 | #include <linux/cpuhotplug.h> |
15 | #include <linux/sched.h> |
16 | #include <linux/capability.h> |
17 | #include <linux/sysfs.h> |
18 | #include <linux/pci.h> |
19 | #include <linux/stop_machine.h> |
20 | |
21 | #include <asm/cpufeature.h> |
22 | #include <asm/cacheinfo.h> |
23 | #include <asm/amd_nb.h> |
24 | #include <asm/smp.h> |
25 | #include <asm/mtrr.h> |
26 | #include <asm/tlbflush.h> |
27 | |
28 | #include "cpu.h" |
29 | |
30 | #define LVL_1_INST 1 |
31 | #define LVL_1_DATA 2 |
32 | #define LVL_2 3 |
33 | #define LVL_3 4 |
34 | #define LVL_TRACE 5 |
35 | |
36 | /* Shared last level cache maps */ |
37 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); |
38 | |
39 | /* Shared L2 cache maps */ |
40 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); |
41 | |
42 | static cpumask_var_t cpu_cacheinfo_mask; |
43 | |
44 | /* Kernel controls MTRR and/or PAT MSRs. */ |
45 | unsigned int memory_caching_control __ro_after_init; |
46 | |
47 | struct _cache_table { |
48 | unsigned char descriptor; |
49 | char cache_type; |
50 | short size; |
51 | }; |
52 | |
53 | #define MB(x) ((x) * 1024) |
54 | |
55 | /* All the cache descriptor types we care about (no TLB or |
56 | trace cache entries) */ |
57 | |
58 | static const struct _cache_table cache_table[] = |
59 | { |
60 | { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ |
61 | { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ |
62 | { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */ |
63 | { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ |
64 | { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ |
65 | { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ |
66 | { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */ |
67 | { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ |
68 | { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
69 | { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
70 | { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
71 | { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
72 | { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ |
73 | { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ |
74 | { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
75 | { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ |
76 | { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ |
77 | { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
78 | { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ |
79 | { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
80 | { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */ |
81 | { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ |
82 | { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ |
83 | { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ |
84 | { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */ |
85 | { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */ |
86 | { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */ |
87 | { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */ |
88 | { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */ |
89 | { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ |
90 | { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */ |
91 | { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ |
92 | { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */ |
93 | { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */ |
94 | { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */ |
95 | { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
96 | { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
97 | { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
98 | { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
99 | { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ |
100 | { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ |
101 | { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ |
102 | { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ |
103 | { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */ |
104 | { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
105 | { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
106 | { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
107 | { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
108 | { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */ |
109 | { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ |
110 | { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */ |
111 | { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ |
112 | { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ |
113 | { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */ |
114 | { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */ |
115 | { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ |
116 | { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */ |
117 | { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ |
118 | { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */ |
119 | { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */ |
120 | { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */ |
121 | { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */ |
122 | { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ |
123 | { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */ |
124 | { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ |
125 | { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */ |
126 | { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */ |
127 | { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ |
128 | { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ |
129 | { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */ |
130 | { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */ |
131 | { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */ |
132 | { 0x00, 0, 0} |
133 | }; |
134 | |
135 | |
136 | enum _cache_type { |
137 | CTYPE_NULL = 0, |
138 | CTYPE_DATA = 1, |
139 | CTYPE_INST = 2, |
140 | CTYPE_UNIFIED = 3 |
141 | }; |
142 | |
143 | union _cpuid4_leaf_eax { |
144 | struct { |
145 | enum _cache_type type:5; |
146 | unsigned int level:3; |
147 | unsigned int is_self_initializing:1; |
148 | unsigned int is_fully_associative:1; |
149 | unsigned int reserved:4; |
150 | unsigned int num_threads_sharing:12; |
151 | unsigned int num_cores_on_die:6; |
152 | } split; |
153 | u32 full; |
154 | }; |
155 | |
156 | union _cpuid4_leaf_ebx { |
157 | struct { |
158 | unsigned int coherency_line_size:12; |
159 | unsigned int physical_line_partition:10; |
160 | unsigned int ways_of_associativity:10; |
161 | } split; |
162 | u32 full; |
163 | }; |
164 | |
165 | union _cpuid4_leaf_ecx { |
166 | struct { |
167 | unsigned int number_of_sets:32; |
168 | } split; |
169 | u32 full; |
170 | }; |
171 | |
172 | struct _cpuid4_info_regs { |
173 | union _cpuid4_leaf_eax eax; |
174 | union _cpuid4_leaf_ebx ebx; |
175 | union _cpuid4_leaf_ecx ecx; |
176 | unsigned int id; |
177 | unsigned long size; |
178 | struct amd_northbridge *nb; |
179 | }; |
180 | |
181 | static unsigned short num_cache_leaves; |
182 | |
183 | /* AMD doesn't have CPUID4. Emulate it here to report the same |
184 | information to the user. This makes some assumptions about the machine: |
185 | L2 not shared, no SMT etc. that is currently true on AMD CPUs. |
186 | |
187 | In theory the TLBs could be reported as fake type (they are in "dummy"). |
188 | Maybe later */ |
189 | union l1_cache { |
190 | struct { |
191 | unsigned line_size:8; |
192 | unsigned lines_per_tag:8; |
193 | unsigned assoc:8; |
194 | unsigned size_in_kb:8; |
195 | }; |
196 | unsigned val; |
197 | }; |
198 | |
199 | union l2_cache { |
200 | struct { |
201 | unsigned line_size:8; |
202 | unsigned lines_per_tag:4; |
203 | unsigned assoc:4; |
204 | unsigned size_in_kb:16; |
205 | }; |
206 | unsigned val; |
207 | }; |
208 | |
209 | union l3_cache { |
210 | struct { |
211 | unsigned line_size:8; |
212 | unsigned lines_per_tag:4; |
213 | unsigned assoc:4; |
214 | unsigned res:2; |
215 | unsigned size_encoded:14; |
216 | }; |
217 | unsigned val; |
218 | }; |
219 | |
220 | static const unsigned short assocs[] = { |
221 | [1] = 1, |
222 | [2] = 2, |
223 | [4] = 4, |
224 | [6] = 8, |
225 | [8] = 16, |
226 | [0xa] = 32, |
227 | [0xb] = 48, |
228 | [0xc] = 64, |
229 | [0xd] = 96, |
230 | [0xe] = 128, |
231 | [0xf] = 0xffff /* fully associative - no way to show this currently */ |
232 | }; |
233 | |
234 | static const unsigned char levels[] = { 1, 1, 2, 3 }; |
235 | static const unsigned char types[] = { 1, 2, 3, 3 }; |
236 | |
237 | static const enum cache_type cache_type_map[] = { |
238 | [CTYPE_NULL] = CACHE_TYPE_NOCACHE, |
239 | [CTYPE_DATA] = CACHE_TYPE_DATA, |
240 | [CTYPE_INST] = CACHE_TYPE_INST, |
241 | [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, |
242 | }; |
243 | |
244 | static void |
245 | amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, |
246 | union _cpuid4_leaf_ebx *ebx, |
247 | union _cpuid4_leaf_ecx *ecx) |
248 | { |
249 | unsigned dummy; |
250 | unsigned line_size, lines_per_tag, assoc, size_in_kb; |
251 | union l1_cache l1i, l1d; |
252 | union l2_cache l2; |
253 | union l3_cache l3; |
254 | union l1_cache *l1 = &l1d; |
255 | |
256 | eax->full = 0; |
257 | ebx->full = 0; |
258 | ecx->full = 0; |
259 | |
260 | cpuid(op: 0x80000005, eax: &dummy, ebx: &dummy, ecx: &l1d.val, edx: &l1i.val); |
261 | cpuid(op: 0x80000006, eax: &dummy, ebx: &dummy, ecx: &l2.val, edx: &l3.val); |
262 | |
263 | switch (leaf) { |
264 | case 1: |
265 | l1 = &l1i; |
266 | fallthrough; |
267 | case 0: |
268 | if (!l1->val) |
269 | return; |
270 | assoc = assocs[l1->assoc]; |
271 | line_size = l1->line_size; |
272 | lines_per_tag = l1->lines_per_tag; |
273 | size_in_kb = l1->size_in_kb; |
274 | break; |
275 | case 2: |
276 | if (!l2.val) |
277 | return; |
278 | assoc = assocs[l2.assoc]; |
279 | line_size = l2.line_size; |
280 | lines_per_tag = l2.lines_per_tag; |
281 | /* cpu_data has errata corrections for K7 applied */ |
282 | size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); |
283 | break; |
284 | case 3: |
285 | if (!l3.val) |
286 | return; |
287 | assoc = assocs[l3.assoc]; |
288 | line_size = l3.line_size; |
289 | lines_per_tag = l3.lines_per_tag; |
290 | size_in_kb = l3.size_encoded * 512; |
291 | if (boot_cpu_has(X86_FEATURE_AMD_DCM)) { |
292 | size_in_kb = size_in_kb >> 1; |
293 | assoc = assoc >> 1; |
294 | } |
295 | break; |
296 | default: |
297 | return; |
298 | } |
299 | |
300 | eax->split.is_self_initializing = 1; |
301 | eax->split.type = types[leaf]; |
302 | eax->split.level = levels[leaf]; |
303 | eax->split.num_threads_sharing = 0; |
304 | eax->split.num_cores_on_die = topology_num_cores_per_package(); |
305 | |
306 | |
307 | if (assoc == 0xffff) |
308 | eax->split.is_fully_associative = 1; |
309 | ebx->split.coherency_line_size = line_size - 1; |
310 | ebx->split.ways_of_associativity = assoc - 1; |
311 | ebx->split.physical_line_partition = lines_per_tag - 1; |
312 | ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / |
313 | (ebx->split.ways_of_associativity + 1) - 1; |
314 | } |
315 | |
316 | #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS) |
317 | |
318 | /* |
319 | * L3 cache descriptors |
320 | */ |
321 | static void amd_calc_l3_indices(struct amd_northbridge *nb) |
322 | { |
323 | struct amd_l3_cache *l3 = &nb->l3_cache; |
324 | unsigned int sc0, sc1, sc2, sc3; |
325 | u32 val = 0; |
326 | |
327 | pci_read_config_dword(dev: nb->misc, where: 0x1C4, val: &val); |
328 | |
329 | /* calculate subcache sizes */ |
330 | l3->subcaches[0] = sc0 = !(val & BIT(0)); |
331 | l3->subcaches[1] = sc1 = !(val & BIT(4)); |
332 | |
333 | if (boot_cpu_data.x86 == 0x15) { |
334 | l3->subcaches[0] = sc0 += !(val & BIT(1)); |
335 | l3->subcaches[1] = sc1 += !(val & BIT(5)); |
336 | } |
337 | |
338 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); |
339 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); |
340 | |
341 | l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; |
342 | } |
343 | |
344 | /* |
345 | * check whether a slot used for disabling an L3 index is occupied. |
346 | * @l3: L3 cache descriptor |
347 | * @slot: slot number (0..1) |
348 | * |
349 | * @returns: the disabled index if used or negative value if slot free. |
350 | */ |
351 | static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) |
352 | { |
353 | unsigned int reg = 0; |
354 | |
355 | pci_read_config_dword(dev: nb->misc, where: 0x1BC + slot * 4, val: ®); |
356 | |
357 | /* check whether this slot is activated already */ |
358 | if (reg & (3UL << 30)) |
359 | return reg & 0xfff; |
360 | |
361 | return -1; |
362 | } |
363 | |
364 | static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf, |
365 | unsigned int slot) |
366 | { |
367 | int index; |
368 | struct amd_northbridge *nb = this_leaf->priv; |
369 | |
370 | index = amd_get_l3_disable_slot(nb, slot); |
371 | if (index >= 0) |
372 | return sprintf(buf, fmt: "%d\n" , index); |
373 | |
374 | return sprintf(buf, fmt: "FREE\n" ); |
375 | } |
376 | |
377 | #define SHOW_CACHE_DISABLE(slot) \ |
378 | static ssize_t \ |
379 | cache_disable_##slot##_show(struct device *dev, \ |
380 | struct device_attribute *attr, char *buf) \ |
381 | { \ |
382 | struct cacheinfo *this_leaf = dev_get_drvdata(dev); \ |
383 | return show_cache_disable(this_leaf, buf, slot); \ |
384 | } |
385 | SHOW_CACHE_DISABLE(0) |
386 | SHOW_CACHE_DISABLE(1) |
387 | |
388 | static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu, |
389 | unsigned slot, unsigned long idx) |
390 | { |
391 | int i; |
392 | |
393 | idx |= BIT(30); |
394 | |
395 | /* |
396 | * disable index in all 4 subcaches |
397 | */ |
398 | for (i = 0; i < 4; i++) { |
399 | u32 reg = idx | (i << 20); |
400 | |
401 | if (!nb->l3_cache.subcaches[i]) |
402 | continue; |
403 | |
404 | pci_write_config_dword(dev: nb->misc, where: 0x1BC + slot * 4, val: reg); |
405 | |
406 | /* |
407 | * We need to WBINVD on a core on the node containing the L3 |
408 | * cache which indices we disable therefore a simple wbinvd() |
409 | * is not sufficient. |
410 | */ |
411 | wbinvd_on_cpu(cpu); |
412 | |
413 | reg |= BIT(31); |
414 | pci_write_config_dword(dev: nb->misc, where: 0x1BC + slot * 4, val: reg); |
415 | } |
416 | } |
417 | |
418 | /* |
419 | * disable a L3 cache index by using a disable-slot |
420 | * |
421 | * @l3: L3 cache descriptor |
422 | * @cpu: A CPU on the node containing the L3 cache |
423 | * @slot: slot number (0..1) |
424 | * @index: index to disable |
425 | * |
426 | * @return: 0 on success, error status on failure |
427 | */ |
428 | static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, |
429 | unsigned slot, unsigned long index) |
430 | { |
431 | int ret = 0; |
432 | |
433 | /* check if @slot is already used or the index is already disabled */ |
434 | ret = amd_get_l3_disable_slot(nb, slot); |
435 | if (ret >= 0) |
436 | return -EEXIST; |
437 | |
438 | if (index > nb->l3_cache.indices) |
439 | return -EINVAL; |
440 | |
441 | /* check whether the other slot has disabled the same index already */ |
442 | if (index == amd_get_l3_disable_slot(nb, slot: !slot)) |
443 | return -EEXIST; |
444 | |
445 | amd_l3_disable_index(nb, cpu, slot, idx: index); |
446 | |
447 | return 0; |
448 | } |
449 | |
450 | static ssize_t store_cache_disable(struct cacheinfo *this_leaf, |
451 | const char *buf, size_t count, |
452 | unsigned int slot) |
453 | { |
454 | unsigned long val = 0; |
455 | int cpu, err = 0; |
456 | struct amd_northbridge *nb = this_leaf->priv; |
457 | |
458 | if (!capable(CAP_SYS_ADMIN)) |
459 | return -EPERM; |
460 | |
461 | cpu = cpumask_first(srcp: &this_leaf->shared_cpu_map); |
462 | |
463 | if (kstrtoul(s: buf, base: 10, res: &val) < 0) |
464 | return -EINVAL; |
465 | |
466 | err = amd_set_l3_disable_slot(nb, cpu, slot, index: val); |
467 | if (err) { |
468 | if (err == -EEXIST) |
469 | pr_warn("L3 slot %d in use/index already disabled!\n" , |
470 | slot); |
471 | return err; |
472 | } |
473 | return count; |
474 | } |
475 | |
476 | #define STORE_CACHE_DISABLE(slot) \ |
477 | static ssize_t \ |
478 | cache_disable_##slot##_store(struct device *dev, \ |
479 | struct device_attribute *attr, \ |
480 | const char *buf, size_t count) \ |
481 | { \ |
482 | struct cacheinfo *this_leaf = dev_get_drvdata(dev); \ |
483 | return store_cache_disable(this_leaf, buf, count, slot); \ |
484 | } |
485 | STORE_CACHE_DISABLE(0) |
486 | STORE_CACHE_DISABLE(1) |
487 | |
488 | static ssize_t subcaches_show(struct device *dev, |
489 | struct device_attribute *attr, char *buf) |
490 | { |
491 | struct cacheinfo *this_leaf = dev_get_drvdata(dev); |
492 | int cpu = cpumask_first(srcp: &this_leaf->shared_cpu_map); |
493 | |
494 | return sprintf(buf, fmt: "%x\n" , amd_get_subcaches(cpu)); |
495 | } |
496 | |
497 | static ssize_t subcaches_store(struct device *dev, |
498 | struct device_attribute *attr, |
499 | const char *buf, size_t count) |
500 | { |
501 | struct cacheinfo *this_leaf = dev_get_drvdata(dev); |
502 | int cpu = cpumask_first(srcp: &this_leaf->shared_cpu_map); |
503 | unsigned long val; |
504 | |
505 | if (!capable(CAP_SYS_ADMIN)) |
506 | return -EPERM; |
507 | |
508 | if (kstrtoul(s: buf, base: 16, res: &val) < 0) |
509 | return -EINVAL; |
510 | |
511 | if (amd_set_subcaches(cpu, val)) |
512 | return -EINVAL; |
513 | |
514 | return count; |
515 | } |
516 | |
517 | static DEVICE_ATTR_RW(cache_disable_0); |
518 | static DEVICE_ATTR_RW(cache_disable_1); |
519 | static DEVICE_ATTR_RW(subcaches); |
520 | |
521 | static umode_t |
522 | cache_private_attrs_is_visible(struct kobject *kobj, |
523 | struct attribute *attr, int unused) |
524 | { |
525 | struct device *dev = kobj_to_dev(kobj); |
526 | struct cacheinfo *this_leaf = dev_get_drvdata(dev); |
527 | umode_t mode = attr->mode; |
528 | |
529 | if (!this_leaf->priv) |
530 | return 0; |
531 | |
532 | if ((attr == &dev_attr_subcaches.attr) && |
533 | amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) |
534 | return mode; |
535 | |
536 | if ((attr == &dev_attr_cache_disable_0.attr || |
537 | attr == &dev_attr_cache_disable_1.attr) && |
538 | amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) |
539 | return mode; |
540 | |
541 | return 0; |
542 | } |
543 | |
544 | static struct attribute_group cache_private_group = { |
545 | .is_visible = cache_private_attrs_is_visible, |
546 | }; |
547 | |
548 | static void init_amd_l3_attrs(void) |
549 | { |
550 | int n = 1; |
551 | static struct attribute **amd_l3_attrs; |
552 | |
553 | if (amd_l3_attrs) /* already initialized */ |
554 | return; |
555 | |
556 | if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) |
557 | n += 2; |
558 | if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) |
559 | n += 1; |
560 | |
561 | amd_l3_attrs = kcalloc(n, size: sizeof(*amd_l3_attrs), GFP_KERNEL); |
562 | if (!amd_l3_attrs) |
563 | return; |
564 | |
565 | n = 0; |
566 | if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { |
567 | amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr; |
568 | amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr; |
569 | } |
570 | if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) |
571 | amd_l3_attrs[n++] = &dev_attr_subcaches.attr; |
572 | |
573 | cache_private_group.attrs = amd_l3_attrs; |
574 | } |
575 | |
576 | const struct attribute_group * |
577 | cache_get_priv_group(struct cacheinfo *this_leaf) |
578 | { |
579 | struct amd_northbridge *nb = this_leaf->priv; |
580 | |
581 | if (this_leaf->level < 3 || !nb) |
582 | return NULL; |
583 | |
584 | if (nb && nb->l3_cache.indices) |
585 | init_amd_l3_attrs(); |
586 | |
587 | return &cache_private_group; |
588 | } |
589 | |
590 | static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) |
591 | { |
592 | int node; |
593 | |
594 | /* only for L3, and not in virtualized environments */ |
595 | if (index < 3) |
596 | return; |
597 | |
598 | node = topology_amd_node_id(smp_processor_id()); |
599 | this_leaf->nb = node_to_amd_nb(node); |
600 | if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) |
601 | amd_calc_l3_indices(nb: this_leaf->nb); |
602 | } |
603 | #else |
604 | #define amd_init_l3_cache(x, y) |
605 | #endif /* CONFIG_AMD_NB && CONFIG_SYSFS */ |
606 | |
607 | static int |
608 | cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf) |
609 | { |
610 | union _cpuid4_leaf_eax eax; |
611 | union _cpuid4_leaf_ebx ebx; |
612 | union _cpuid4_leaf_ecx ecx; |
613 | unsigned edx; |
614 | |
615 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
616 | if (boot_cpu_has(X86_FEATURE_TOPOEXT)) |
617 | cpuid_count(op: 0x8000001d, count: index, eax: &eax.full, |
618 | ebx: &ebx.full, ecx: &ecx.full, edx: &edx); |
619 | else |
620 | amd_cpuid4(leaf: index, eax: &eax, ebx: &ebx, ecx: &ecx); |
621 | amd_init_l3_cache(this_leaf, index); |
622 | } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { |
623 | cpuid_count(op: 0x8000001d, count: index, eax: &eax.full, |
624 | ebx: &ebx.full, ecx: &ecx.full, edx: &edx); |
625 | amd_init_l3_cache(this_leaf, index); |
626 | } else { |
627 | cpuid_count(op: 4, count: index, eax: &eax.full, ebx: &ebx.full, ecx: &ecx.full, edx: &edx); |
628 | } |
629 | |
630 | if (eax.split.type == CTYPE_NULL) |
631 | return -EIO; /* better error ? */ |
632 | |
633 | this_leaf->eax = eax; |
634 | this_leaf->ebx = ebx; |
635 | this_leaf->ecx = ecx; |
636 | this_leaf->size = (ecx.split.number_of_sets + 1) * |
637 | (ebx.split.coherency_line_size + 1) * |
638 | (ebx.split.physical_line_partition + 1) * |
639 | (ebx.split.ways_of_associativity + 1); |
640 | return 0; |
641 | } |
642 | |
643 | static int find_num_cache_leaves(struct cpuinfo_x86 *c) |
644 | { |
645 | unsigned int eax, ebx, ecx, edx, op; |
646 | union _cpuid4_leaf_eax cache_eax; |
647 | int i = -1; |
648 | |
649 | if (c->x86_vendor == X86_VENDOR_AMD || |
650 | c->x86_vendor == X86_VENDOR_HYGON) |
651 | op = 0x8000001d; |
652 | else |
653 | op = 4; |
654 | |
655 | do { |
656 | ++i; |
657 | /* Do cpuid(op) loop to find out num_cache_leaves */ |
658 | cpuid_count(op, count: i, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
659 | cache_eax.full = eax; |
660 | } while (cache_eax.split.type != CTYPE_NULL); |
661 | return i; |
662 | } |
663 | |
664 | void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id) |
665 | { |
666 | /* |
667 | * We may have multiple LLCs if L3 caches exist, so check if we |
668 | * have an L3 cache by looking at the L3 cache CPUID leaf. |
669 | */ |
670 | if (!cpuid_edx(op: 0x80000006)) |
671 | return; |
672 | |
673 | if (c->x86 < 0x17) { |
674 | /* LLC is at the node level. */ |
675 | c->topo.llc_id = die_id; |
676 | } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { |
677 | /* |
678 | * LLC is at the core complex level. |
679 | * Core complex ID is ApicId[3] for these processors. |
680 | */ |
681 | c->topo.llc_id = c->topo.apicid >> 3; |
682 | } else { |
683 | /* |
684 | * LLC ID is calculated from the number of threads sharing the |
685 | * cache. |
686 | * */ |
687 | u32 eax, ebx, ecx, edx, num_sharing_cache = 0; |
688 | u32 llc_index = find_num_cache_leaves(c) - 1; |
689 | |
690 | cpuid_count(op: 0x8000001d, count: llc_index, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
691 | if (eax) |
692 | num_sharing_cache = ((eax >> 14) & 0xfff) + 1; |
693 | |
694 | if (num_sharing_cache) { |
695 | int bits = get_count_order(count: num_sharing_cache); |
696 | |
697 | c->topo.llc_id = c->topo.apicid >> bits; |
698 | } |
699 | } |
700 | } |
701 | |
702 | void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c) |
703 | { |
704 | /* |
705 | * We may have multiple LLCs if L3 caches exist, so check if we |
706 | * have an L3 cache by looking at the L3 cache CPUID leaf. |
707 | */ |
708 | if (!cpuid_edx(op: 0x80000006)) |
709 | return; |
710 | |
711 | /* |
712 | * LLC is at the core complex level. |
713 | * Core complex ID is ApicId[3] for these processors. |
714 | */ |
715 | c->topo.llc_id = c->topo.apicid >> 3; |
716 | } |
717 | |
718 | void init_amd_cacheinfo(struct cpuinfo_x86 *c) |
719 | { |
720 | |
721 | if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { |
722 | num_cache_leaves = find_num_cache_leaves(c); |
723 | } else if (c->extended_cpuid_level >= 0x80000006) { |
724 | if (cpuid_edx(op: 0x80000006) & 0xf000) |
725 | num_cache_leaves = 4; |
726 | else |
727 | num_cache_leaves = 3; |
728 | } |
729 | } |
730 | |
731 | void init_hygon_cacheinfo(struct cpuinfo_x86 *c) |
732 | { |
733 | num_cache_leaves = find_num_cache_leaves(c); |
734 | } |
735 | |
736 | void init_intel_cacheinfo(struct cpuinfo_x86 *c) |
737 | { |
738 | /* Cache sizes */ |
739 | unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; |
740 | unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ |
741 | unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ |
742 | unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; |
743 | |
744 | if (c->cpuid_level > 3) { |
745 | static int is_initialized; |
746 | |
747 | if (is_initialized == 0) { |
748 | /* Init num_cache_leaves from boot CPU */ |
749 | num_cache_leaves = find_num_cache_leaves(c); |
750 | is_initialized++; |
751 | } |
752 | |
753 | /* |
754 | * Whenever possible use cpuid(4), deterministic cache |
755 | * parameters cpuid leaf to find the cache details |
756 | */ |
757 | for (i = 0; i < num_cache_leaves; i++) { |
758 | struct _cpuid4_info_regs this_leaf = {}; |
759 | int retval; |
760 | |
761 | retval = cpuid4_cache_lookup_regs(index: i, this_leaf: &this_leaf); |
762 | if (retval < 0) |
763 | continue; |
764 | |
765 | switch (this_leaf.eax.split.level) { |
766 | case 1: |
767 | if (this_leaf.eax.split.type == CTYPE_DATA) |
768 | new_l1d = this_leaf.size/1024; |
769 | else if (this_leaf.eax.split.type == CTYPE_INST) |
770 | new_l1i = this_leaf.size/1024; |
771 | break; |
772 | case 2: |
773 | new_l2 = this_leaf.size/1024; |
774 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; |
775 | index_msb = get_count_order(count: num_threads_sharing); |
776 | l2_id = c->topo.apicid & ~((1 << index_msb) - 1); |
777 | break; |
778 | case 3: |
779 | new_l3 = this_leaf.size/1024; |
780 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; |
781 | index_msb = get_count_order(count: num_threads_sharing); |
782 | l3_id = c->topo.apicid & ~((1 << index_msb) - 1); |
783 | break; |
784 | default: |
785 | break; |
786 | } |
787 | } |
788 | } |
789 | /* |
790 | * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for |
791 | * trace cache |
792 | */ |
793 | if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { |
794 | /* supports eax=2 call */ |
795 | int j, n; |
796 | unsigned int regs[4]; |
797 | unsigned char *dp = (unsigned char *)regs; |
798 | int only_trace = 0; |
799 | |
800 | if (num_cache_leaves != 0 && c->x86 == 15) |
801 | only_trace = 1; |
802 | |
803 | /* Number of times to iterate */ |
804 | n = cpuid_eax(op: 2) & 0xFF; |
805 | |
806 | for (i = 0 ; i < n ; i++) { |
807 | cpuid(op: 2, eax: ®s[0], ebx: ®s[1], ecx: ®s[2], edx: ®s[3]); |
808 | |
809 | /* If bit 31 is set, this is an unknown format */ |
810 | for (j = 0 ; j < 3 ; j++) |
811 | if (regs[j] & (1 << 31)) |
812 | regs[j] = 0; |
813 | |
814 | /* Byte 0 is level count, not a descriptor */ |
815 | for (j = 1 ; j < 16 ; j++) { |
816 | unsigned char des = dp[j]; |
817 | unsigned char k = 0; |
818 | |
819 | /* look up this descriptor in the table */ |
820 | while (cache_table[k].descriptor != 0) { |
821 | if (cache_table[k].descriptor == des) { |
822 | if (only_trace && cache_table[k].cache_type != LVL_TRACE) |
823 | break; |
824 | switch (cache_table[k].cache_type) { |
825 | case LVL_1_INST: |
826 | l1i += cache_table[k].size; |
827 | break; |
828 | case LVL_1_DATA: |
829 | l1d += cache_table[k].size; |
830 | break; |
831 | case LVL_2: |
832 | l2 += cache_table[k].size; |
833 | break; |
834 | case LVL_3: |
835 | l3 += cache_table[k].size; |
836 | break; |
837 | } |
838 | |
839 | break; |
840 | } |
841 | |
842 | k++; |
843 | } |
844 | } |
845 | } |
846 | } |
847 | |
848 | if (new_l1d) |
849 | l1d = new_l1d; |
850 | |
851 | if (new_l1i) |
852 | l1i = new_l1i; |
853 | |
854 | if (new_l2) { |
855 | l2 = new_l2; |
856 | c->topo.llc_id = l2_id; |
857 | c->topo.l2c_id = l2_id; |
858 | } |
859 | |
860 | if (new_l3) { |
861 | l3 = new_l3; |
862 | c->topo.llc_id = l3_id; |
863 | } |
864 | |
865 | /* |
866 | * If llc_id is not yet set, this means cpuid_level < 4 which in |
867 | * turns means that the only possibility is SMT (as indicated in |
868 | * cpuid1). Since cpuid2 doesn't specify shared caches, and we know |
869 | * that SMT shares all caches, we can unconditionally set cpu_llc_id to |
870 | * c->topo.pkg_id. |
871 | */ |
872 | if (c->topo.llc_id == BAD_APICID) |
873 | c->topo.llc_id = c->topo.pkg_id; |
874 | |
875 | c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); |
876 | |
877 | if (!l2) |
878 | cpu_detect_cache_sizes(c); |
879 | } |
880 | |
881 | static int __cache_amd_cpumap_setup(unsigned int cpu, int index, |
882 | struct _cpuid4_info_regs *base) |
883 | { |
884 | struct cpu_cacheinfo *this_cpu_ci; |
885 | struct cacheinfo *this_leaf; |
886 | int i, sibling; |
887 | |
888 | /* |
889 | * For L3, always use the pre-calculated cpu_llc_shared_mask |
890 | * to derive shared_cpu_map. |
891 | */ |
892 | if (index == 3) { |
893 | for_each_cpu(i, cpu_llc_shared_mask(cpu)) { |
894 | this_cpu_ci = get_cpu_cacheinfo(cpu: i); |
895 | if (!this_cpu_ci->info_list) |
896 | continue; |
897 | this_leaf = this_cpu_ci->info_list + index; |
898 | for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { |
899 | if (!cpu_online(cpu: sibling)) |
900 | continue; |
901 | cpumask_set_cpu(cpu: sibling, |
902 | dstp: &this_leaf->shared_cpu_map); |
903 | } |
904 | } |
905 | } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { |
906 | unsigned int apicid, nshared, first, last; |
907 | |
908 | nshared = base->eax.split.num_threads_sharing + 1; |
909 | apicid = cpu_data(cpu).topo.apicid; |
910 | first = apicid - (apicid % nshared); |
911 | last = first + nshared - 1; |
912 | |
913 | for_each_online_cpu(i) { |
914 | this_cpu_ci = get_cpu_cacheinfo(cpu: i); |
915 | if (!this_cpu_ci->info_list) |
916 | continue; |
917 | |
918 | apicid = cpu_data(i).topo.apicid; |
919 | if ((apicid < first) || (apicid > last)) |
920 | continue; |
921 | |
922 | this_leaf = this_cpu_ci->info_list + index; |
923 | |
924 | for_each_online_cpu(sibling) { |
925 | apicid = cpu_data(sibling).topo.apicid; |
926 | if ((apicid < first) || (apicid > last)) |
927 | continue; |
928 | cpumask_set_cpu(cpu: sibling, |
929 | dstp: &this_leaf->shared_cpu_map); |
930 | } |
931 | } |
932 | } else |
933 | return 0; |
934 | |
935 | return 1; |
936 | } |
937 | |
938 | static void __cache_cpumap_setup(unsigned int cpu, int index, |
939 | struct _cpuid4_info_regs *base) |
940 | { |
941 | struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); |
942 | struct cacheinfo *this_leaf, *sibling_leaf; |
943 | unsigned long num_threads_sharing; |
944 | int index_msb, i; |
945 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
946 | |
947 | if (c->x86_vendor == X86_VENDOR_AMD || |
948 | c->x86_vendor == X86_VENDOR_HYGON) { |
949 | if (__cache_amd_cpumap_setup(cpu, index, base)) |
950 | return; |
951 | } |
952 | |
953 | this_leaf = this_cpu_ci->info_list + index; |
954 | num_threads_sharing = 1 + base->eax.split.num_threads_sharing; |
955 | |
956 | cpumask_set_cpu(cpu, dstp: &this_leaf->shared_cpu_map); |
957 | if (num_threads_sharing == 1) |
958 | return; |
959 | |
960 | index_msb = get_count_order(count: num_threads_sharing); |
961 | |
962 | for_each_online_cpu(i) |
963 | if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) { |
964 | struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(cpu: i); |
965 | |
966 | if (i == cpu || !sib_cpu_ci->info_list) |
967 | continue;/* skip if itself or no cacheinfo */ |
968 | sibling_leaf = sib_cpu_ci->info_list + index; |
969 | cpumask_set_cpu(cpu: i, dstp: &this_leaf->shared_cpu_map); |
970 | cpumask_set_cpu(cpu, dstp: &sibling_leaf->shared_cpu_map); |
971 | } |
972 | } |
973 | |
974 | static void ci_leaf_init(struct cacheinfo *this_leaf, |
975 | struct _cpuid4_info_regs *base) |
976 | { |
977 | this_leaf->id = base->id; |
978 | this_leaf->attributes = CACHE_ID; |
979 | this_leaf->level = base->eax.split.level; |
980 | this_leaf->type = cache_type_map[base->eax.split.type]; |
981 | this_leaf->coherency_line_size = |
982 | base->ebx.split.coherency_line_size + 1; |
983 | this_leaf->ways_of_associativity = |
984 | base->ebx.split.ways_of_associativity + 1; |
985 | this_leaf->size = base->size; |
986 | this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1; |
987 | this_leaf->physical_line_partition = |
988 | base->ebx.split.physical_line_partition + 1; |
989 | this_leaf->priv = base->nb; |
990 | } |
991 | |
992 | int init_cache_level(unsigned int cpu) |
993 | { |
994 | struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); |
995 | |
996 | if (!num_cache_leaves) |
997 | return -ENOENT; |
998 | if (!this_cpu_ci) |
999 | return -EINVAL; |
1000 | this_cpu_ci->num_levels = 3; |
1001 | this_cpu_ci->num_leaves = num_cache_leaves; |
1002 | return 0; |
1003 | } |
1004 | |
1005 | /* |
1006 | * The max shared threads number comes from CPUID.4:EAX[25-14] with input |
1007 | * ECX as cache index. Then right shift apicid by the number's order to get |
1008 | * cache id for this cache node. |
1009 | */ |
1010 | static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs) |
1011 | { |
1012 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
1013 | unsigned long num_threads_sharing; |
1014 | int index_msb; |
1015 | |
1016 | num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing; |
1017 | index_msb = get_count_order(count: num_threads_sharing); |
1018 | id4_regs->id = c->topo.apicid >> index_msb; |
1019 | } |
1020 | |
1021 | int populate_cache_leaves(unsigned int cpu) |
1022 | { |
1023 | unsigned int idx, ret; |
1024 | struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); |
1025 | struct cacheinfo *this_leaf = this_cpu_ci->info_list; |
1026 | struct _cpuid4_info_regs id4_regs = {}; |
1027 | |
1028 | for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { |
1029 | ret = cpuid4_cache_lookup_regs(index: idx, this_leaf: &id4_regs); |
1030 | if (ret) |
1031 | return ret; |
1032 | get_cache_id(cpu, id4_regs: &id4_regs); |
1033 | ci_leaf_init(this_leaf: this_leaf++, base: &id4_regs); |
1034 | __cache_cpumap_setup(cpu, index: idx, base: &id4_regs); |
1035 | } |
1036 | this_cpu_ci->cpu_map_populated = true; |
1037 | |
1038 | return 0; |
1039 | } |
1040 | |
1041 | /* |
1042 | * Disable and enable caches. Needed for changing MTRRs and the PAT MSR. |
1043 | * |
1044 | * Since we are disabling the cache don't allow any interrupts, |
1045 | * they would run extremely slow and would only increase the pain. |
1046 | * |
1047 | * The caller must ensure that local interrupts are disabled and |
1048 | * are reenabled after cache_enable() has been called. |
1049 | */ |
1050 | static unsigned long saved_cr4; |
1051 | static DEFINE_RAW_SPINLOCK(cache_disable_lock); |
1052 | |
1053 | void cache_disable(void) __acquires(cache_disable_lock) |
1054 | { |
1055 | unsigned long cr0; |
1056 | |
1057 | /* |
1058 | * Note that this is not ideal |
1059 | * since the cache is only flushed/disabled for this CPU while the |
1060 | * MTRRs are changed, but changing this requires more invasive |
1061 | * changes to the way the kernel boots |
1062 | */ |
1063 | |
1064 | raw_spin_lock(&cache_disable_lock); |
1065 | |
1066 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ |
1067 | cr0 = read_cr0() | X86_CR0_CD; |
1068 | write_cr0(x: cr0); |
1069 | |
1070 | /* |
1071 | * Cache flushing is the most time-consuming step when programming |
1072 | * the MTRRs. Fortunately, as per the Intel Software Development |
1073 | * Manual, we can skip it if the processor supports cache self- |
1074 | * snooping. |
1075 | */ |
1076 | if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) |
1077 | wbinvd(); |
1078 | |
1079 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ |
1080 | if (cpu_feature_enabled(X86_FEATURE_PGE)) { |
1081 | saved_cr4 = __read_cr4(); |
1082 | __write_cr4(x: saved_cr4 & ~X86_CR4_PGE); |
1083 | } |
1084 | |
1085 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ |
1086 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); |
1087 | flush_tlb_local(); |
1088 | |
1089 | if (cpu_feature_enabled(X86_FEATURE_MTRR)) |
1090 | mtrr_disable(); |
1091 | |
1092 | /* Again, only flush caches if we have to. */ |
1093 | if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) |
1094 | wbinvd(); |
1095 | } |
1096 | |
1097 | void cache_enable(void) __releases(cache_disable_lock) |
1098 | { |
1099 | /* Flush TLBs (no need to flush caches - they are disabled) */ |
1100 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); |
1101 | flush_tlb_local(); |
1102 | |
1103 | if (cpu_feature_enabled(X86_FEATURE_MTRR)) |
1104 | mtrr_enable(); |
1105 | |
1106 | /* Enable caches */ |
1107 | write_cr0(x: read_cr0() & ~X86_CR0_CD); |
1108 | |
1109 | /* Restore value of CR4 */ |
1110 | if (cpu_feature_enabled(X86_FEATURE_PGE)) |
1111 | __write_cr4(x: saved_cr4); |
1112 | |
1113 | raw_spin_unlock(&cache_disable_lock); |
1114 | } |
1115 | |
1116 | static void cache_cpu_init(void) |
1117 | { |
1118 | unsigned long flags; |
1119 | |
1120 | local_irq_save(flags); |
1121 | |
1122 | if (memory_caching_control & CACHE_MTRR) { |
1123 | cache_disable(); |
1124 | mtrr_generic_set_state(); |
1125 | cache_enable(); |
1126 | } |
1127 | |
1128 | if (memory_caching_control & CACHE_PAT) |
1129 | pat_cpu_init(); |
1130 | |
1131 | local_irq_restore(flags); |
1132 | } |
1133 | |
1134 | static bool cache_aps_delayed_init = true; |
1135 | |
1136 | void set_cache_aps_delayed_init(bool val) |
1137 | { |
1138 | cache_aps_delayed_init = val; |
1139 | } |
1140 | |
1141 | bool get_cache_aps_delayed_init(void) |
1142 | { |
1143 | return cache_aps_delayed_init; |
1144 | } |
1145 | |
1146 | static int cache_rendezvous_handler(void *unused) |
1147 | { |
1148 | if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id())) |
1149 | cache_cpu_init(); |
1150 | |
1151 | return 0; |
1152 | } |
1153 | |
1154 | void __init cache_bp_init(void) |
1155 | { |
1156 | mtrr_bp_init(); |
1157 | pat_bp_init(); |
1158 | |
1159 | if (memory_caching_control) |
1160 | cache_cpu_init(); |
1161 | } |
1162 | |
1163 | void cache_bp_restore(void) |
1164 | { |
1165 | if (memory_caching_control) |
1166 | cache_cpu_init(); |
1167 | } |
1168 | |
1169 | static int cache_ap_online(unsigned int cpu) |
1170 | { |
1171 | cpumask_set_cpu(cpu, dstp: cpu_cacheinfo_mask); |
1172 | |
1173 | if (!memory_caching_control || get_cache_aps_delayed_init()) |
1174 | return 0; |
1175 | |
1176 | /* |
1177 | * Ideally we should hold mtrr_mutex here to avoid MTRR entries |
1178 | * changed, but this routine will be called in CPU boot time, |
1179 | * holding the lock breaks it. |
1180 | * |
1181 | * This routine is called in two cases: |
1182 | * |
1183 | * 1. very early time of software resume, when there absolutely |
1184 | * isn't MTRR entry changes; |
1185 | * |
1186 | * 2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug |
1187 | * lock to prevent MTRR entry changes |
1188 | */ |
1189 | stop_machine_from_inactive_cpu(fn: cache_rendezvous_handler, NULL, |
1190 | cpus: cpu_cacheinfo_mask); |
1191 | |
1192 | return 0; |
1193 | } |
1194 | |
1195 | static int cache_ap_offline(unsigned int cpu) |
1196 | { |
1197 | cpumask_clear_cpu(cpu, dstp: cpu_cacheinfo_mask); |
1198 | return 0; |
1199 | } |
1200 | |
1201 | /* |
1202 | * Delayed cache initialization for all AP's |
1203 | */ |
1204 | void cache_aps_init(void) |
1205 | { |
1206 | if (!memory_caching_control || !get_cache_aps_delayed_init()) |
1207 | return; |
1208 | |
1209 | stop_machine(fn: cache_rendezvous_handler, NULL, cpu_online_mask); |
1210 | set_cache_aps_delayed_init(false); |
1211 | } |
1212 | |
1213 | static int __init cache_ap_register(void) |
1214 | { |
1215 | zalloc_cpumask_var(mask: &cpu_cacheinfo_mask, GFP_KERNEL); |
1216 | cpumask_set_cpu(smp_processor_id(), dstp: cpu_cacheinfo_mask); |
1217 | |
1218 | cpuhp_setup_state_nocalls(state: CPUHP_AP_CACHECTRL_STARTING, |
1219 | name: "x86/cachectrl:starting" , |
1220 | startup: cache_ap_online, teardown: cache_ap_offline); |
1221 | return 0; |
1222 | } |
1223 | early_initcall(cache_ap_register); |
1224 | |