1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * acpi_numa.c - ACPI NUMA support |
4 | * |
5 | * Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com> |
6 | */ |
7 | |
8 | #define pr_fmt(fmt) "ACPI: " fmt |
9 | |
10 | #include <linux/module.h> |
11 | #include <linux/init.h> |
12 | #include <linux/kernel.h> |
13 | #include <linux/types.h> |
14 | #include <linux/errno.h> |
15 | #include <linux/acpi.h> |
16 | #include <linux/memblock.h> |
17 | #include <linux/numa.h> |
18 | #include <linux/nodemask.h> |
19 | #include <linux/topology.h> |
20 | |
21 | static nodemask_t nodes_found_map = NODE_MASK_NONE; |
22 | |
23 | /* maps to convert between proximity domain and logical node ID */ |
24 | static int pxm_to_node_map[MAX_PXM_DOMAINS] |
25 | = { [0 ... MAX_PXM_DOMAINS - 1] = NUMA_NO_NODE }; |
26 | static int node_to_pxm_map[MAX_NUMNODES] |
27 | = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; |
28 | |
29 | unsigned char acpi_srat_revision __initdata; |
30 | static int acpi_numa __initdata; |
31 | |
32 | static int last_real_pxm; |
33 | |
34 | void __init disable_srat(void) |
35 | { |
36 | acpi_numa = -1; |
37 | } |
38 | |
39 | int pxm_to_node(int pxm) |
40 | { |
41 | if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off) |
42 | return NUMA_NO_NODE; |
43 | return pxm_to_node_map[pxm]; |
44 | } |
45 | EXPORT_SYMBOL(pxm_to_node); |
46 | |
47 | int node_to_pxm(int node) |
48 | { |
49 | if (node < 0) |
50 | return PXM_INVAL; |
51 | return node_to_pxm_map[node]; |
52 | } |
53 | |
54 | static void __acpi_map_pxm_to_node(int pxm, int node) |
55 | { |
56 | if (pxm_to_node_map[pxm] == NUMA_NO_NODE || node < pxm_to_node_map[pxm]) |
57 | pxm_to_node_map[pxm] = node; |
58 | if (node_to_pxm_map[node] == PXM_INVAL || pxm < node_to_pxm_map[node]) |
59 | node_to_pxm_map[node] = pxm; |
60 | } |
61 | |
62 | int acpi_map_pxm_to_node(int pxm) |
63 | { |
64 | int node; |
65 | |
66 | if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off) |
67 | return NUMA_NO_NODE; |
68 | |
69 | node = pxm_to_node_map[pxm]; |
70 | |
71 | if (node == NUMA_NO_NODE) { |
72 | node = first_unset_node(nodes_found_map); |
73 | if (node >= MAX_NUMNODES) |
74 | return NUMA_NO_NODE; |
75 | __acpi_map_pxm_to_node(pxm, node); |
76 | node_set(node, nodes_found_map); |
77 | } |
78 | |
79 | return node; |
80 | } |
81 | EXPORT_SYMBOL(acpi_map_pxm_to_node); |
82 | |
83 | static void __init |
84 | acpi_table_print_srat_entry(struct acpi_subtable_header *) |
85 | { |
86 | switch (header->type) { |
87 | case ACPI_SRAT_TYPE_CPU_AFFINITY: |
88 | { |
89 | struct acpi_srat_cpu_affinity *p = |
90 | (struct acpi_srat_cpu_affinity *)header; |
91 | pr_debug("SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n" , |
92 | p->apic_id, p->local_sapic_eid, |
93 | p->proximity_domain_lo, |
94 | (p->flags & ACPI_SRAT_CPU_ENABLED) ? |
95 | "enabled" : "disabled" ); |
96 | } |
97 | break; |
98 | |
99 | case ACPI_SRAT_TYPE_MEMORY_AFFINITY: |
100 | { |
101 | struct acpi_srat_mem_affinity *p = |
102 | (struct acpi_srat_mem_affinity *)header; |
103 | pr_debug("SRAT Memory (0x%llx length 0x%llx) in proximity domain %d %s%s%s\n" , |
104 | (unsigned long long)p->base_address, |
105 | (unsigned long long)p->length, |
106 | p->proximity_domain, |
107 | (p->flags & ACPI_SRAT_MEM_ENABLED) ? |
108 | "enabled" : "disabled" , |
109 | (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ? |
110 | " hot-pluggable" : "" , |
111 | (p->flags & ACPI_SRAT_MEM_NON_VOLATILE) ? |
112 | " non-volatile" : "" ); |
113 | } |
114 | break; |
115 | |
116 | case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: |
117 | { |
118 | struct acpi_srat_x2apic_cpu_affinity *p = |
119 | (struct acpi_srat_x2apic_cpu_affinity *)header; |
120 | pr_debug("SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n" , |
121 | p->apic_id, |
122 | p->proximity_domain, |
123 | (p->flags & ACPI_SRAT_CPU_ENABLED) ? |
124 | "enabled" : "disabled" ); |
125 | } |
126 | break; |
127 | |
128 | case ACPI_SRAT_TYPE_GICC_AFFINITY: |
129 | { |
130 | struct acpi_srat_gicc_affinity *p = |
131 | (struct acpi_srat_gicc_affinity *)header; |
132 | pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n" , |
133 | p->acpi_processor_uid, |
134 | p->proximity_domain, |
135 | (p->flags & ACPI_SRAT_GICC_ENABLED) ? |
136 | "enabled" : "disabled" ); |
137 | } |
138 | break; |
139 | |
140 | case ACPI_SRAT_TYPE_GENERIC_AFFINITY: |
141 | { |
142 | struct acpi_srat_generic_affinity *p = |
143 | (struct acpi_srat_generic_affinity *)header; |
144 | |
145 | if (p->device_handle_type == 0) { |
146 | /* |
147 | * For pci devices this may be the only place they |
148 | * are assigned a proximity domain |
149 | */ |
150 | pr_debug("SRAT Generic Initiator(Seg:%u BDF:%u) in proximity domain %d %s\n" , |
151 | *(u16 *)(&p->device_handle[0]), |
152 | *(u16 *)(&p->device_handle[2]), |
153 | p->proximity_domain, |
154 | (p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED) ? |
155 | "enabled" : "disabled" ); |
156 | } else { |
157 | /* |
158 | * In this case we can rely on the device having a |
159 | * proximity domain reference |
160 | */ |
161 | pr_debug("SRAT Generic Initiator(HID=%.8s UID=%.4s) in proximity domain %d %s\n" , |
162 | (char *)(&p->device_handle[0]), |
163 | (char *)(&p->device_handle[8]), |
164 | p->proximity_domain, |
165 | (p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED) ? |
166 | "enabled" : "disabled" ); |
167 | } |
168 | } |
169 | break; |
170 | default: |
171 | pr_warn("Found unsupported SRAT entry (type = 0x%x)\n" , |
172 | header->type); |
173 | break; |
174 | } |
175 | } |
176 | |
177 | /* |
178 | * A lot of BIOS fill in 10 (= no distance) everywhere. This messes |
179 | * up the NUMA heuristics which wants the local node to have a smaller |
180 | * distance than the others. |
181 | * Do some quick checks here and only use the SLIT if it passes. |
182 | */ |
183 | static int __init slit_valid(struct acpi_table_slit *slit) |
184 | { |
185 | int i, j; |
186 | int d = slit->locality_count; |
187 | for (i = 0; i < d; i++) { |
188 | for (j = 0; j < d; j++) { |
189 | u8 val = slit->entry[d*i + j]; |
190 | if (i == j) { |
191 | if (val != LOCAL_DISTANCE) |
192 | return 0; |
193 | } else if (val <= LOCAL_DISTANCE) |
194 | return 0; |
195 | } |
196 | } |
197 | return 1; |
198 | } |
199 | |
200 | void __init bad_srat(void) |
201 | { |
202 | pr_err("SRAT: SRAT not used.\n" ); |
203 | disable_srat(); |
204 | } |
205 | |
206 | int __init srat_disabled(void) |
207 | { |
208 | return acpi_numa < 0; |
209 | } |
210 | |
211 | #if defined(CONFIG_X86) || defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH) |
212 | /* |
213 | * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for |
214 | * I/O localities since SRAT does not list them. I/O localities are |
215 | * not supported at this point. |
216 | */ |
217 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) |
218 | { |
219 | int i, j; |
220 | |
221 | for (i = 0; i < slit->locality_count; i++) { |
222 | const int from_node = pxm_to_node(i); |
223 | |
224 | if (from_node == NUMA_NO_NODE) |
225 | continue; |
226 | |
227 | for (j = 0; j < slit->locality_count; j++) { |
228 | const int to_node = pxm_to_node(j); |
229 | |
230 | if (to_node == NUMA_NO_NODE) |
231 | continue; |
232 | |
233 | numa_set_distance(from: from_node, to: to_node, |
234 | distance: slit->entry[slit->locality_count * i + j]); |
235 | } |
236 | } |
237 | } |
238 | |
239 | /* |
240 | * Default callback for parsing of the Proximity Domain <-> Memory |
241 | * Area mappings |
242 | */ |
243 | int __init |
244 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) |
245 | { |
246 | u64 start, end; |
247 | u32 hotpluggable; |
248 | int node, pxm; |
249 | |
250 | if (srat_disabled()) |
251 | goto out_err; |
252 | if (ma->header.length < sizeof(struct acpi_srat_mem_affinity)) { |
253 | pr_err("SRAT: Unexpected header length: %d\n" , |
254 | ma->header.length); |
255 | goto out_err_bad_srat; |
256 | } |
257 | if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) |
258 | goto out_err; |
259 | hotpluggable = IS_ENABLED(CONFIG_MEMORY_HOTPLUG) && |
260 | (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE); |
261 | |
262 | start = ma->base_address; |
263 | end = start + ma->length; |
264 | pxm = ma->proximity_domain; |
265 | if (acpi_srat_revision <= 1) |
266 | pxm &= 0xff; |
267 | |
268 | node = acpi_map_pxm_to_node(pxm); |
269 | if (node == NUMA_NO_NODE) { |
270 | pr_err("SRAT: Too many proximity domains.\n" ); |
271 | goto out_err_bad_srat; |
272 | } |
273 | |
274 | if (numa_add_memblk(nodeid: node, start, end) < 0) { |
275 | pr_err("SRAT: Failed to add memblk to node %u [mem %#010Lx-%#010Lx]\n" , |
276 | node, (unsigned long long) start, |
277 | (unsigned long long) end - 1); |
278 | goto out_err_bad_srat; |
279 | } |
280 | |
281 | node_set(node, numa_nodes_parsed); |
282 | |
283 | pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n" , |
284 | node, pxm, |
285 | (unsigned long long) start, (unsigned long long) end - 1, |
286 | hotpluggable ? " hotplug" : "" , |
287 | ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "" ); |
288 | |
289 | /* Mark hotplug range in memblock. */ |
290 | if (hotpluggable && memblock_mark_hotplug(base: start, size: ma->length)) |
291 | pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n" , |
292 | (unsigned long long)start, (unsigned long long)end - 1); |
293 | |
294 | max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1)); |
295 | |
296 | return 0; |
297 | out_err_bad_srat: |
298 | bad_srat(); |
299 | out_err: |
300 | return -EINVAL; |
301 | } |
302 | |
303 | static int __init acpi_parse_cfmws(union acpi_subtable_headers *, |
304 | void *arg, const unsigned long table_end) |
305 | { |
306 | struct acpi_cedt_cfmws *cfmws; |
307 | int *fake_pxm = arg; |
308 | u64 start, end; |
309 | int node; |
310 | |
311 | cfmws = (struct acpi_cedt_cfmws *)header; |
312 | start = cfmws->base_hpa; |
313 | end = cfmws->base_hpa + cfmws->window_size; |
314 | |
315 | /* |
316 | * The SRAT may have already described NUMA details for all, |
317 | * or a portion of, this CFMWS HPA range. Extend the memblks |
318 | * found for any portion of the window to cover the entire |
319 | * window. |
320 | */ |
321 | if (!numa_fill_memblks(start, end)) |
322 | return 0; |
323 | |
324 | /* No SRAT description. Create a new node. */ |
325 | node = acpi_map_pxm_to_node(*fake_pxm); |
326 | |
327 | if (node == NUMA_NO_NODE) { |
328 | pr_err("ACPI NUMA: Too many proximity domains while processing CFMWS.\n" ); |
329 | return -EINVAL; |
330 | } |
331 | |
332 | if (numa_add_memblk(nodeid: node, start, end) < 0) { |
333 | /* CXL driver must handle the NUMA_NO_NODE case */ |
334 | pr_warn("ACPI NUMA: Failed to add memblk for CFMWS node %d [mem %#llx-%#llx]\n" , |
335 | node, start, end); |
336 | } |
337 | node_set(node, numa_nodes_parsed); |
338 | |
339 | /* Set the next available fake_pxm value */ |
340 | (*fake_pxm)++; |
341 | return 0; |
342 | } |
343 | #else |
344 | static int __init acpi_parse_cfmws(union acpi_subtable_headers *header, |
345 | void *arg, const unsigned long table_end) |
346 | { |
347 | return 0; |
348 | } |
349 | #endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */ |
350 | |
351 | static int __init acpi_parse_slit(struct acpi_table_header *table) |
352 | { |
353 | struct acpi_table_slit *slit = (struct acpi_table_slit *)table; |
354 | |
355 | if (!slit_valid(slit)) { |
356 | pr_info("SLIT table looks invalid. Not used.\n" ); |
357 | return -EINVAL; |
358 | } |
359 | acpi_numa_slit_init(slit); |
360 | |
361 | return 0; |
362 | } |
363 | |
364 | void __init __weak |
365 | acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) |
366 | { |
367 | pr_warn("Found unsupported x2apic [0x%08x] SRAT entry\n" , pa->apic_id); |
368 | } |
369 | |
370 | static int __init |
371 | acpi_parse_x2apic_affinity(union acpi_subtable_headers *, |
372 | const unsigned long end) |
373 | { |
374 | struct acpi_srat_x2apic_cpu_affinity *processor_affinity; |
375 | |
376 | processor_affinity = (struct acpi_srat_x2apic_cpu_affinity *)header; |
377 | |
378 | acpi_table_print_srat_entry(header: &header->common); |
379 | |
380 | /* let architecture-dependent part to do it */ |
381 | acpi_numa_x2apic_affinity_init(pa: processor_affinity); |
382 | |
383 | return 0; |
384 | } |
385 | |
386 | static int __init |
387 | acpi_parse_processor_affinity(union acpi_subtable_headers *, |
388 | const unsigned long end) |
389 | { |
390 | struct acpi_srat_cpu_affinity *processor_affinity; |
391 | |
392 | processor_affinity = (struct acpi_srat_cpu_affinity *)header; |
393 | |
394 | acpi_table_print_srat_entry(header: &header->common); |
395 | |
396 | /* let architecture-dependent part to do it */ |
397 | acpi_numa_processor_affinity_init(pa: processor_affinity); |
398 | |
399 | return 0; |
400 | } |
401 | |
402 | static int __init |
403 | acpi_parse_gicc_affinity(union acpi_subtable_headers *, |
404 | const unsigned long end) |
405 | { |
406 | struct acpi_srat_gicc_affinity *processor_affinity; |
407 | |
408 | processor_affinity = (struct acpi_srat_gicc_affinity *)header; |
409 | |
410 | acpi_table_print_srat_entry(header: &header->common); |
411 | |
412 | /* let architecture-dependent part to do it */ |
413 | acpi_numa_gicc_affinity_init(pa: processor_affinity); |
414 | |
415 | return 0; |
416 | } |
417 | |
418 | #if defined(CONFIG_X86) || defined(CONFIG_ARM64) |
419 | static int __init |
420 | acpi_parse_gi_affinity(union acpi_subtable_headers *, |
421 | const unsigned long end) |
422 | { |
423 | struct acpi_srat_generic_affinity *gi_affinity; |
424 | int node; |
425 | |
426 | gi_affinity = (struct acpi_srat_generic_affinity *)header; |
427 | if (!gi_affinity) |
428 | return -EINVAL; |
429 | acpi_table_print_srat_entry(header: &header->common); |
430 | |
431 | if (!(gi_affinity->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED)) |
432 | return -EINVAL; |
433 | |
434 | node = acpi_map_pxm_to_node(gi_affinity->proximity_domain); |
435 | if (node == NUMA_NO_NODE) { |
436 | pr_err("SRAT: Too many proximity domains.\n" ); |
437 | return -EINVAL; |
438 | } |
439 | node_set(node, numa_nodes_parsed); |
440 | node_set_state(node, state: N_GENERIC_INITIATOR); |
441 | |
442 | return 0; |
443 | } |
444 | #else |
445 | static int __init |
446 | acpi_parse_gi_affinity(union acpi_subtable_headers *header, |
447 | const unsigned long end) |
448 | { |
449 | return 0; |
450 | } |
451 | #endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */ |
452 | |
453 | static int __initdata parsed_numa_memblks; |
454 | |
455 | static int __init |
456 | acpi_parse_memory_affinity(union acpi_subtable_headers * , |
457 | const unsigned long end) |
458 | { |
459 | struct acpi_srat_mem_affinity *memory_affinity; |
460 | |
461 | memory_affinity = (struct acpi_srat_mem_affinity *)header; |
462 | |
463 | acpi_table_print_srat_entry(header: &header->common); |
464 | |
465 | /* let architecture-dependent part to do it */ |
466 | if (!acpi_numa_memory_affinity_init(ma: memory_affinity)) |
467 | parsed_numa_memblks++; |
468 | return 0; |
469 | } |
470 | |
471 | static int __init acpi_parse_srat(struct acpi_table_header *table) |
472 | { |
473 | struct acpi_table_srat *srat = (struct acpi_table_srat *)table; |
474 | |
475 | acpi_srat_revision = srat->header.revision; |
476 | |
477 | /* Real work done in acpi_table_parse_srat below. */ |
478 | |
479 | return 0; |
480 | } |
481 | |
482 | static int __init |
483 | acpi_table_parse_srat(enum acpi_srat_type id, |
484 | acpi_tbl_entry_handler handler, unsigned int max_entries) |
485 | { |
486 | return acpi_table_parse_entries(ACPI_SIG_SRAT, |
487 | table_size: sizeof(struct acpi_table_srat), entry_id: id, |
488 | handler, max_entries); |
489 | } |
490 | |
491 | int __init acpi_numa_init(void) |
492 | { |
493 | int i, fake_pxm, cnt = 0; |
494 | |
495 | if (acpi_disabled) |
496 | return -EINVAL; |
497 | |
498 | /* |
499 | * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= |
500 | * SRAT cpu entries could have different order with that in MADT. |
501 | * So go over all cpu entries in SRAT to get apicid to node mapping. |
502 | */ |
503 | |
504 | /* SRAT: System Resource Affinity Table */ |
505 | if (!acpi_table_parse(ACPI_SIG_SRAT, handler: acpi_parse_srat)) { |
506 | struct acpi_subtable_proc srat_proc[4]; |
507 | |
508 | memset(srat_proc, 0, sizeof(srat_proc)); |
509 | srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY; |
510 | srat_proc[0].handler = acpi_parse_processor_affinity; |
511 | srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY; |
512 | srat_proc[1].handler = acpi_parse_x2apic_affinity; |
513 | srat_proc[2].id = ACPI_SRAT_TYPE_GICC_AFFINITY; |
514 | srat_proc[2].handler = acpi_parse_gicc_affinity; |
515 | srat_proc[3].id = ACPI_SRAT_TYPE_GENERIC_AFFINITY; |
516 | srat_proc[3].handler = acpi_parse_gi_affinity; |
517 | |
518 | acpi_table_parse_entries_array(ACPI_SIG_SRAT, |
519 | table_size: sizeof(struct acpi_table_srat), |
520 | proc: srat_proc, ARRAY_SIZE(srat_proc), max_entries: 0); |
521 | |
522 | cnt = acpi_table_parse_srat(id: ACPI_SRAT_TYPE_MEMORY_AFFINITY, |
523 | handler: acpi_parse_memory_affinity, max_entries: 0); |
524 | } |
525 | |
526 | /* SLIT: System Locality Information Table */ |
527 | acpi_table_parse(ACPI_SIG_SLIT, handler: acpi_parse_slit); |
528 | |
529 | /* |
530 | * CXL Fixed Memory Window Structures (CFMWS) must be parsed |
531 | * after the SRAT. Create NUMA Nodes for CXL memory ranges that |
532 | * are defined in the CFMWS and not already defined in the SRAT. |
533 | * Initialize a fake_pxm as the first available PXM to emulate. |
534 | */ |
535 | |
536 | /* fake_pxm is the next unused PXM value after SRAT parsing */ |
537 | for (i = 0, fake_pxm = -1; i < MAX_NUMNODES; i++) { |
538 | if (node_to_pxm_map[i] > fake_pxm) |
539 | fake_pxm = node_to_pxm_map[i]; |
540 | } |
541 | last_real_pxm = fake_pxm; |
542 | fake_pxm++; |
543 | acpi_table_parse_cedt(id: ACPI_CEDT_TYPE_CFMWS, handler_arg: acpi_parse_cfmws, |
544 | arg: &fake_pxm); |
545 | |
546 | if (cnt < 0) |
547 | return cnt; |
548 | else if (!parsed_numa_memblks) |
549 | return -ENOENT; |
550 | return 0; |
551 | } |
552 | |
553 | bool acpi_node_backed_by_real_pxm(int nid) |
554 | { |
555 | int pxm = node_to_pxm(node: nid); |
556 | |
557 | return pxm <= last_real_pxm; |
558 | } |
559 | EXPORT_SYMBOL_GPL(acpi_node_backed_by_real_pxm); |
560 | |
561 | static int acpi_get_pxm(acpi_handle h) |
562 | { |
563 | unsigned long long pxm; |
564 | acpi_status status; |
565 | acpi_handle handle; |
566 | acpi_handle phandle = h; |
567 | |
568 | do { |
569 | handle = phandle; |
570 | status = acpi_evaluate_integer(handle, pathname: "_PXM" , NULL, data: &pxm); |
571 | if (ACPI_SUCCESS(status)) |
572 | return pxm; |
573 | status = acpi_get_parent(object: handle, out_handle: &phandle); |
574 | } while (ACPI_SUCCESS(status)); |
575 | return -1; |
576 | } |
577 | |
578 | int acpi_get_node(acpi_handle handle) |
579 | { |
580 | int pxm; |
581 | |
582 | pxm = acpi_get_pxm(h: handle); |
583 | |
584 | return pxm_to_node(pxm); |
585 | } |
586 | EXPORT_SYMBOL(acpi_get_node); |
587 | |