1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * SMP initialisation and IPI support |
4 | * Based on arch/arm/kernel/smp.c |
5 | * |
6 | * Copyright (C) 2012 ARM Ltd. |
7 | */ |
8 | |
9 | #include <linux/acpi.h> |
10 | #include <linux/arm_sdei.h> |
11 | #include <linux/delay.h> |
12 | #include <linux/init.h> |
13 | #include <linux/spinlock.h> |
14 | #include <linux/sched/mm.h> |
15 | #include <linux/sched/hotplug.h> |
16 | #include <linux/sched/task_stack.h> |
17 | #include <linux/interrupt.h> |
18 | #include <linux/cache.h> |
19 | #include <linux/profile.h> |
20 | #include <linux/errno.h> |
21 | #include <linux/mm.h> |
22 | #include <linux/err.h> |
23 | #include <linux/cpu.h> |
24 | #include <linux/smp.h> |
25 | #include <linux/seq_file.h> |
26 | #include <linux/irq.h> |
27 | #include <linux/irqchip/arm-gic-v3.h> |
28 | #include <linux/percpu.h> |
29 | #include <linux/clockchips.h> |
30 | #include <linux/completion.h> |
31 | #include <linux/of.h> |
32 | #include <linux/irq_work.h> |
33 | #include <linux/kernel_stat.h> |
34 | #include <linux/kexec.h> |
35 | #include <linux/kgdb.h> |
36 | #include <linux/kvm_host.h> |
37 | #include <linux/nmi.h> |
38 | |
39 | #include <asm/alternative.h> |
40 | #include <asm/atomic.h> |
41 | #include <asm/cacheflush.h> |
42 | #include <asm/cpu.h> |
43 | #include <asm/cputype.h> |
44 | #include <asm/cpu_ops.h> |
45 | #include <asm/daifflags.h> |
46 | #include <asm/kvm_mmu.h> |
47 | #include <asm/mmu_context.h> |
48 | #include <asm/numa.h> |
49 | #include <asm/processor.h> |
50 | #include <asm/smp_plat.h> |
51 | #include <asm/sections.h> |
52 | #include <asm/tlbflush.h> |
53 | #include <asm/ptrace.h> |
54 | #include <asm/virt.h> |
55 | |
56 | #include <trace/events/ipi.h> |
57 | |
58 | DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); |
59 | EXPORT_PER_CPU_SYMBOL(cpu_number); |
60 | |
61 | /* |
62 | * as from 2.5, kernels no longer have an init_tasks structure |
63 | * so we need some other way of telling a new secondary core |
64 | * where to place its SVC stack |
65 | */ |
66 | struct secondary_data secondary_data; |
67 | /* Number of CPUs which aren't online, but looping in kernel text. */ |
68 | static int cpus_stuck_in_kernel; |
69 | |
70 | enum ipi_msg_type { |
71 | IPI_RESCHEDULE, |
72 | IPI_CALL_FUNC, |
73 | IPI_CPU_STOP, |
74 | IPI_CPU_CRASH_STOP, |
75 | IPI_TIMER, |
76 | IPI_IRQ_WORK, |
77 | NR_IPI, |
78 | /* |
79 | * Any enum >= NR_IPI and < MAX_IPI is special and not tracable |
80 | * with trace_ipi_* |
81 | */ |
82 | IPI_CPU_BACKTRACE = NR_IPI, |
83 | IPI_KGDB_ROUNDUP, |
84 | MAX_IPI |
85 | }; |
86 | |
87 | static int ipi_irq_base __ro_after_init; |
88 | static int nr_ipi __ro_after_init = NR_IPI; |
89 | static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init; |
90 | |
91 | static void ipi_setup(int cpu); |
92 | |
93 | #ifdef CONFIG_HOTPLUG_CPU |
94 | static void ipi_teardown(int cpu); |
95 | static int op_cpu_kill(unsigned int cpu); |
96 | #else |
97 | static inline int op_cpu_kill(unsigned int cpu) |
98 | { |
99 | return -ENOSYS; |
100 | } |
101 | #endif |
102 | |
103 | |
104 | /* |
105 | * Boot a secondary CPU, and assign it the specified idle task. |
106 | * This also gives us the initial stack to use for this CPU. |
107 | */ |
108 | static int boot_secondary(unsigned int cpu, struct task_struct *idle) |
109 | { |
110 | const struct cpu_operations *ops = get_cpu_ops(cpu); |
111 | |
112 | if (ops->cpu_boot) |
113 | return ops->cpu_boot(cpu); |
114 | |
115 | return -EOPNOTSUPP; |
116 | } |
117 | |
118 | static DECLARE_COMPLETION(cpu_running); |
119 | |
120 | int __cpu_up(unsigned int cpu, struct task_struct *idle) |
121 | { |
122 | int ret; |
123 | long status; |
124 | |
125 | /* |
126 | * We need to tell the secondary core where to find its stack and the |
127 | * page tables. |
128 | */ |
129 | secondary_data.task = idle; |
130 | update_cpu_boot_status(CPU_MMU_OFF); |
131 | |
132 | /* Now bring the CPU into our world */ |
133 | ret = boot_secondary(cpu, idle); |
134 | if (ret) { |
135 | pr_err("CPU%u: failed to boot: %d\n" , cpu, ret); |
136 | return ret; |
137 | } |
138 | |
139 | /* |
140 | * CPU was successfully started, wait for it to come online or |
141 | * time out. |
142 | */ |
143 | wait_for_completion_timeout(x: &cpu_running, |
144 | timeout: msecs_to_jiffies(m: 5000)); |
145 | if (cpu_online(cpu)) |
146 | return 0; |
147 | |
148 | pr_crit("CPU%u: failed to come online\n" , cpu); |
149 | secondary_data.task = NULL; |
150 | status = READ_ONCE(secondary_data.status); |
151 | if (status == CPU_MMU_OFF) |
152 | status = READ_ONCE(__early_cpu_boot_status); |
153 | |
154 | switch (status & CPU_BOOT_STATUS_MASK) { |
155 | default: |
156 | pr_err("CPU%u: failed in unknown state : 0x%lx\n" , |
157 | cpu, status); |
158 | cpus_stuck_in_kernel++; |
159 | break; |
160 | case CPU_KILL_ME: |
161 | if (!op_cpu_kill(cpu)) { |
162 | pr_crit("CPU%u: died during early boot\n" , cpu); |
163 | break; |
164 | } |
165 | pr_crit("CPU%u: may not have shut down cleanly\n" , cpu); |
166 | fallthrough; |
167 | case CPU_STUCK_IN_KERNEL: |
168 | pr_crit("CPU%u: is stuck in kernel\n" , cpu); |
169 | if (status & CPU_STUCK_REASON_52_BIT_VA) |
170 | pr_crit("CPU%u: does not support 52-bit VAs\n" , cpu); |
171 | if (status & CPU_STUCK_REASON_NO_GRAN) { |
172 | pr_crit("CPU%u: does not support %luK granule\n" , |
173 | cpu, PAGE_SIZE / SZ_1K); |
174 | } |
175 | cpus_stuck_in_kernel++; |
176 | break; |
177 | case CPU_PANIC_KERNEL: |
178 | panic(fmt: "CPU%u detected unsupported configuration\n" , cpu); |
179 | } |
180 | |
181 | return -EIO; |
182 | } |
183 | |
184 | static void init_gic_priority_masking(void) |
185 | { |
186 | u32 cpuflags; |
187 | |
188 | if (WARN_ON(!gic_enable_sre())) |
189 | return; |
190 | |
191 | cpuflags = read_sysreg(daif); |
192 | |
193 | WARN_ON(!(cpuflags & PSR_I_BIT)); |
194 | WARN_ON(!(cpuflags & PSR_F_BIT)); |
195 | |
196 | gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); |
197 | } |
198 | |
199 | /* |
200 | * This is the secondary CPU boot entry. We're using this CPUs |
201 | * idle thread stack, but a set of temporary page tables. |
202 | */ |
203 | asmlinkage notrace void secondary_start_kernel(void) |
204 | { |
205 | u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; |
206 | struct mm_struct *mm = &init_mm; |
207 | const struct cpu_operations *ops; |
208 | unsigned int cpu = smp_processor_id(); |
209 | |
210 | /* |
211 | * All kernel threads share the same mm context; grab a |
212 | * reference and switch to it. |
213 | */ |
214 | mmgrab(mm); |
215 | current->active_mm = mm; |
216 | |
217 | /* |
218 | * TTBR0 is only used for the identity mapping at this stage. Make it |
219 | * point to zero page to avoid speculatively fetching new entries. |
220 | */ |
221 | cpu_uninstall_idmap(); |
222 | |
223 | if (system_uses_irq_prio_masking()) |
224 | init_gic_priority_masking(); |
225 | |
226 | rcutree_report_cpu_starting(cpu); |
227 | trace_hardirqs_off(); |
228 | |
229 | /* |
230 | * If the system has established the capabilities, make sure |
231 | * this CPU ticks all of those. If it doesn't, the CPU will |
232 | * fail to come online. |
233 | */ |
234 | check_local_cpu_capabilities(); |
235 | |
236 | ops = get_cpu_ops(cpu); |
237 | if (ops->cpu_postboot) |
238 | ops->cpu_postboot(); |
239 | |
240 | /* |
241 | * Log the CPU info before it is marked online and might get read. |
242 | */ |
243 | cpuinfo_store_cpu(); |
244 | store_cpu_topology(cpu); |
245 | |
246 | /* |
247 | * Enable GIC and timers. |
248 | */ |
249 | notify_cpu_starting(cpu); |
250 | |
251 | ipi_setup(cpu); |
252 | |
253 | numa_add_cpu(cpu); |
254 | |
255 | /* |
256 | * OK, now it's safe to let the boot CPU continue. Wait for |
257 | * the CPU migration code to notice that the CPU is online |
258 | * before we continue. |
259 | */ |
260 | pr_info("CPU%u: Booted secondary processor 0x%010lx [0x%08x]\n" , |
261 | cpu, (unsigned long)mpidr, |
262 | read_cpuid_id()); |
263 | update_cpu_boot_status(CPU_BOOT_SUCCESS); |
264 | set_cpu_online(cpu, online: true); |
265 | complete(&cpu_running); |
266 | |
267 | local_daif_restore(DAIF_PROCCTX); |
268 | |
269 | /* |
270 | * OK, it's off to the idle thread for us |
271 | */ |
272 | cpu_startup_entry(state: CPUHP_AP_ONLINE_IDLE); |
273 | } |
274 | |
275 | #ifdef CONFIG_HOTPLUG_CPU |
276 | static int op_cpu_disable(unsigned int cpu) |
277 | { |
278 | const struct cpu_operations *ops = get_cpu_ops(cpu); |
279 | |
280 | /* |
281 | * If we don't have a cpu_die method, abort before we reach the point |
282 | * of no return. CPU0 may not have an cpu_ops, so test for it. |
283 | */ |
284 | if (!ops || !ops->cpu_die) |
285 | return -EOPNOTSUPP; |
286 | |
287 | /* |
288 | * We may need to abort a hot unplug for some other mechanism-specific |
289 | * reason. |
290 | */ |
291 | if (ops->cpu_disable) |
292 | return ops->cpu_disable(cpu); |
293 | |
294 | return 0; |
295 | } |
296 | |
297 | /* |
298 | * __cpu_disable runs on the processor to be shutdown. |
299 | */ |
300 | int __cpu_disable(void) |
301 | { |
302 | unsigned int cpu = smp_processor_id(); |
303 | int ret; |
304 | |
305 | ret = op_cpu_disable(cpu); |
306 | if (ret) |
307 | return ret; |
308 | |
309 | remove_cpu_topology(cpu); |
310 | numa_remove_cpu(cpu); |
311 | |
312 | /* |
313 | * Take this CPU offline. Once we clear this, we can't return, |
314 | * and we must not schedule until we're ready to give up the cpu. |
315 | */ |
316 | set_cpu_online(cpu, online: false); |
317 | ipi_teardown(cpu); |
318 | |
319 | /* |
320 | * OK - migrate IRQs away from this CPU |
321 | */ |
322 | irq_migrate_all_off_this_cpu(); |
323 | |
324 | return 0; |
325 | } |
326 | |
327 | static int op_cpu_kill(unsigned int cpu) |
328 | { |
329 | const struct cpu_operations *ops = get_cpu_ops(cpu); |
330 | |
331 | /* |
332 | * If we have no means of synchronising with the dying CPU, then assume |
333 | * that it is really dead. We can only wait for an arbitrary length of |
334 | * time and hope that it's dead, so let's skip the wait and just hope. |
335 | */ |
336 | if (!ops->cpu_kill) |
337 | return 0; |
338 | |
339 | return ops->cpu_kill(cpu); |
340 | } |
341 | |
342 | /* |
343 | * Called on the thread which is asking for a CPU to be shutdown after the |
344 | * shutdown completed. |
345 | */ |
346 | void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) |
347 | { |
348 | int err; |
349 | |
350 | pr_debug("CPU%u: shutdown\n" , cpu); |
351 | |
352 | /* |
353 | * Now that the dying CPU is beyond the point of no return w.r.t. |
354 | * in-kernel synchronisation, try to get the firwmare to help us to |
355 | * verify that it has really left the kernel before we consider |
356 | * clobbering anything it might still be using. |
357 | */ |
358 | err = op_cpu_kill(cpu); |
359 | if (err) |
360 | pr_warn("CPU%d may not have shut down cleanly: %d\n" , cpu, err); |
361 | } |
362 | |
363 | /* |
364 | * Called from the idle thread for the CPU which has been shutdown. |
365 | * |
366 | */ |
367 | void __noreturn cpu_die(void) |
368 | { |
369 | unsigned int cpu = smp_processor_id(); |
370 | const struct cpu_operations *ops = get_cpu_ops(cpu); |
371 | |
372 | idle_task_exit(); |
373 | |
374 | local_daif_mask(); |
375 | |
376 | /* Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose of */ |
377 | cpuhp_ap_report_dead(); |
378 | |
379 | /* |
380 | * Actually shutdown the CPU. This must never fail. The specific hotplug |
381 | * mechanism must perform all required cache maintenance to ensure that |
382 | * no dirty lines are lost in the process of shutting down the CPU. |
383 | */ |
384 | ops->cpu_die(cpu); |
385 | |
386 | BUG(); |
387 | } |
388 | #endif |
389 | |
390 | static void __cpu_try_die(int cpu) |
391 | { |
392 | #ifdef CONFIG_HOTPLUG_CPU |
393 | const struct cpu_operations *ops = get_cpu_ops(cpu); |
394 | |
395 | if (ops && ops->cpu_die) |
396 | ops->cpu_die(cpu); |
397 | #endif |
398 | } |
399 | |
400 | /* |
401 | * Kill the calling secondary CPU, early in bringup before it is turned |
402 | * online. |
403 | */ |
404 | void __noreturn cpu_die_early(void) |
405 | { |
406 | int cpu = smp_processor_id(); |
407 | |
408 | pr_crit("CPU%d: will not boot\n" , cpu); |
409 | |
410 | /* Mark this CPU absent */ |
411 | set_cpu_present(cpu, present: 0); |
412 | rcutree_report_cpu_dead(); |
413 | |
414 | if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { |
415 | update_cpu_boot_status(CPU_KILL_ME); |
416 | __cpu_try_die(cpu); |
417 | } |
418 | |
419 | update_cpu_boot_status(CPU_STUCK_IN_KERNEL); |
420 | |
421 | cpu_park_loop(); |
422 | } |
423 | |
424 | static void __init hyp_mode_check(void) |
425 | { |
426 | if (is_hyp_mode_available()) |
427 | pr_info("CPU: All CPU(s) started at EL2\n" ); |
428 | else if (is_hyp_mode_mismatched()) |
429 | WARN_TAINT(1, TAINT_CPU_OUT_OF_SPEC, |
430 | "CPU: CPUs started in inconsistent modes" ); |
431 | else |
432 | pr_info("CPU: All CPU(s) started at EL1\n" ); |
433 | if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) { |
434 | kvm_compute_layout(); |
435 | kvm_apply_hyp_relocations(); |
436 | } |
437 | } |
438 | |
439 | void __init smp_cpus_done(unsigned int max_cpus) |
440 | { |
441 | pr_info("SMP: Total of %d processors activated.\n" , num_online_cpus()); |
442 | hyp_mode_check(); |
443 | setup_system_features(); |
444 | setup_user_features(); |
445 | mark_linear_text_alias_ro(); |
446 | } |
447 | |
448 | void __init smp_prepare_boot_cpu(void) |
449 | { |
450 | /* |
451 | * The runtime per-cpu areas have been allocated by |
452 | * setup_per_cpu_areas(), and CPU0's boot time per-cpu area will be |
453 | * freed shortly, so we must move over to the runtime per-cpu area. |
454 | */ |
455 | set_my_cpu_offset(per_cpu_offset(smp_processor_id())); |
456 | |
457 | cpuinfo_store_boot_cpu(); |
458 | setup_boot_cpu_features(); |
459 | |
460 | /* Conditionally switch to GIC PMR for interrupt masking */ |
461 | if (system_uses_irq_prio_masking()) |
462 | init_gic_priority_masking(); |
463 | |
464 | kasan_init_hw_tags(); |
465 | } |
466 | |
467 | /* |
468 | * Duplicate MPIDRs are a recipe for disaster. Scan all initialized |
469 | * entries and check for duplicates. If any is found just ignore the |
470 | * cpu. cpu_logical_map was initialized to INVALID_HWID to avoid |
471 | * matching valid MPIDR values. |
472 | */ |
473 | static bool __init is_mpidr_duplicate(unsigned int cpu, u64 hwid) |
474 | { |
475 | unsigned int i; |
476 | |
477 | for (i = 1; (i < cpu) && (i < NR_CPUS); i++) |
478 | if (cpu_logical_map(i) == hwid) |
479 | return true; |
480 | return false; |
481 | } |
482 | |
483 | /* |
484 | * Initialize cpu operations for a logical cpu and |
485 | * set it in the possible mask on success |
486 | */ |
487 | static int __init smp_cpu_setup(int cpu) |
488 | { |
489 | const struct cpu_operations *ops; |
490 | |
491 | if (init_cpu_ops(cpu)) |
492 | return -ENODEV; |
493 | |
494 | ops = get_cpu_ops(cpu); |
495 | if (ops->cpu_init(cpu)) |
496 | return -ENODEV; |
497 | |
498 | set_cpu_possible(cpu, possible: true); |
499 | |
500 | return 0; |
501 | } |
502 | |
503 | static bool bootcpu_valid __initdata; |
504 | static unsigned int cpu_count = 1; |
505 | |
506 | #ifdef CONFIG_ACPI |
507 | static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS]; |
508 | |
509 | struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu) |
510 | { |
511 | return &cpu_madt_gicc[cpu]; |
512 | } |
513 | EXPORT_SYMBOL_GPL(acpi_cpu_get_madt_gicc); |
514 | |
515 | /* |
516 | * acpi_map_gic_cpu_interface - parse processor MADT entry |
517 | * |
518 | * Carry out sanity checks on MADT processor entry and initialize |
519 | * cpu_logical_map on success |
520 | */ |
521 | static void __init |
522 | acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) |
523 | { |
524 | u64 hwid = processor->arm_mpidr; |
525 | |
526 | if (!acpi_gicc_is_usable(gicc: processor)) { |
527 | pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n" , hwid); |
528 | return; |
529 | } |
530 | |
531 | if (hwid & ~MPIDR_HWID_BITMASK || hwid == INVALID_HWID) { |
532 | pr_err("skipping CPU entry with invalid MPIDR 0x%llx\n" , hwid); |
533 | return; |
534 | } |
535 | |
536 | if (is_mpidr_duplicate(cpu: cpu_count, hwid)) { |
537 | pr_err("duplicate CPU MPIDR 0x%llx in MADT\n" , hwid); |
538 | return; |
539 | } |
540 | |
541 | /* Check if GICC structure of boot CPU is available in the MADT */ |
542 | if (cpu_logical_map(0) == hwid) { |
543 | if (bootcpu_valid) { |
544 | pr_err("duplicate boot CPU MPIDR: 0x%llx in MADT\n" , |
545 | hwid); |
546 | return; |
547 | } |
548 | bootcpu_valid = true; |
549 | cpu_madt_gicc[0] = *processor; |
550 | return; |
551 | } |
552 | |
553 | if (cpu_count >= NR_CPUS) |
554 | return; |
555 | |
556 | /* map the logical cpu id to cpu MPIDR */ |
557 | set_cpu_logical_map(cpu_count, hwid); |
558 | |
559 | cpu_madt_gicc[cpu_count] = *processor; |
560 | |
561 | /* |
562 | * Set-up the ACPI parking protocol cpu entries |
563 | * while initializing the cpu_logical_map to |
564 | * avoid parsing MADT entries multiple times for |
565 | * nothing (ie a valid cpu_logical_map entry should |
566 | * contain a valid parking protocol data set to |
567 | * initialize the cpu if the parking protocol is |
568 | * the only available enable method). |
569 | */ |
570 | acpi_set_mailbox_entry(cpu_count, processor); |
571 | |
572 | cpu_count++; |
573 | } |
574 | |
575 | static int __init |
576 | acpi_parse_gic_cpu_interface(union acpi_subtable_headers *, |
577 | const unsigned long end) |
578 | { |
579 | struct acpi_madt_generic_interrupt *processor; |
580 | |
581 | processor = (struct acpi_madt_generic_interrupt *)header; |
582 | if (BAD_MADT_GICC_ENTRY(processor, end)) |
583 | return -EINVAL; |
584 | |
585 | acpi_table_print_madt_entry(madt: &header->common); |
586 | |
587 | acpi_map_gic_cpu_interface(processor); |
588 | |
589 | return 0; |
590 | } |
591 | |
592 | static void __init acpi_parse_and_init_cpus(void) |
593 | { |
594 | int i; |
595 | |
596 | /* |
597 | * do a walk of MADT to determine how many CPUs |
598 | * we have including disabled CPUs, and get information |
599 | * we need for SMP init. |
600 | */ |
601 | acpi_table_parse_madt(id: ACPI_MADT_TYPE_GENERIC_INTERRUPT, |
602 | handler: acpi_parse_gic_cpu_interface, max_entries: 0); |
603 | |
604 | /* |
605 | * In ACPI, SMP and CPU NUMA information is provided in separate |
606 | * static tables, namely the MADT and the SRAT. |
607 | * |
608 | * Thus, it is simpler to first create the cpu logical map through |
609 | * an MADT walk and then map the logical cpus to their node ids |
610 | * as separate steps. |
611 | */ |
612 | acpi_map_cpus_to_nodes(); |
613 | |
614 | for (i = 0; i < nr_cpu_ids; i++) |
615 | early_map_cpu_to_node(i, acpi_numa_get_nid(i)); |
616 | } |
617 | #else |
618 | #define acpi_parse_and_init_cpus(...) do { } while (0) |
619 | #endif |
620 | |
621 | /* |
622 | * Enumerate the possible CPU set from the device tree and build the |
623 | * cpu logical map array containing MPIDR values related to logical |
624 | * cpus. Assumes that cpu_logical_map(0) has already been initialized. |
625 | */ |
626 | static void __init of_parse_and_init_cpus(void) |
627 | { |
628 | struct device_node *dn; |
629 | |
630 | for_each_of_cpu_node(dn) { |
631 | u64 hwid = of_get_cpu_hwid(cpun: dn, thread: 0); |
632 | |
633 | if (hwid & ~MPIDR_HWID_BITMASK) |
634 | goto next; |
635 | |
636 | if (is_mpidr_duplicate(cpu: cpu_count, hwid)) { |
637 | pr_err("%pOF: duplicate cpu reg properties in the DT\n" , |
638 | dn); |
639 | goto next; |
640 | } |
641 | |
642 | /* |
643 | * The numbering scheme requires that the boot CPU |
644 | * must be assigned logical id 0. Record it so that |
645 | * the logical map built from DT is validated and can |
646 | * be used. |
647 | */ |
648 | if (hwid == cpu_logical_map(0)) { |
649 | if (bootcpu_valid) { |
650 | pr_err("%pOF: duplicate boot cpu reg property in DT\n" , |
651 | dn); |
652 | goto next; |
653 | } |
654 | |
655 | bootcpu_valid = true; |
656 | early_map_cpu_to_node(0, of_node_to_nid(np: dn)); |
657 | |
658 | /* |
659 | * cpu_logical_map has already been |
660 | * initialized and the boot cpu doesn't need |
661 | * the enable-method so continue without |
662 | * incrementing cpu. |
663 | */ |
664 | continue; |
665 | } |
666 | |
667 | if (cpu_count >= NR_CPUS) |
668 | goto next; |
669 | |
670 | pr_debug("cpu logical map 0x%llx\n" , hwid); |
671 | set_cpu_logical_map(cpu_count, hwid); |
672 | |
673 | early_map_cpu_to_node(cpu_count, of_node_to_nid(np: dn)); |
674 | next: |
675 | cpu_count++; |
676 | } |
677 | } |
678 | |
679 | /* |
680 | * Enumerate the possible CPU set from the device tree or ACPI and build the |
681 | * cpu logical map array containing MPIDR values related to logical |
682 | * cpus. Assumes that cpu_logical_map(0) has already been initialized. |
683 | */ |
684 | void __init smp_init_cpus(void) |
685 | { |
686 | int i; |
687 | |
688 | if (acpi_disabled) |
689 | of_parse_and_init_cpus(); |
690 | else |
691 | acpi_parse_and_init_cpus(); |
692 | |
693 | if (cpu_count > nr_cpu_ids) |
694 | pr_warn("Number of cores (%d) exceeds configured maximum of %u - clipping\n" , |
695 | cpu_count, nr_cpu_ids); |
696 | |
697 | if (!bootcpu_valid) { |
698 | pr_err("missing boot CPU MPIDR, not enabling secondaries\n" ); |
699 | return; |
700 | } |
701 | |
702 | /* |
703 | * We need to set the cpu_logical_map entries before enabling |
704 | * the cpus so that cpu processor description entries (DT cpu nodes |
705 | * and ACPI MADT entries) can be retrieved by matching the cpu hwid |
706 | * with entries in cpu_logical_map while initializing the cpus. |
707 | * If the cpu set-up fails, invalidate the cpu_logical_map entry. |
708 | */ |
709 | for (i = 1; i < nr_cpu_ids; i++) { |
710 | if (cpu_logical_map(i) != INVALID_HWID) { |
711 | if (smp_cpu_setup(i)) |
712 | set_cpu_logical_map(i, INVALID_HWID); |
713 | } |
714 | } |
715 | } |
716 | |
717 | void __init smp_prepare_cpus(unsigned int max_cpus) |
718 | { |
719 | const struct cpu_operations *ops; |
720 | int err; |
721 | unsigned int cpu; |
722 | unsigned int this_cpu; |
723 | |
724 | init_cpu_topology(); |
725 | |
726 | this_cpu = smp_processor_id(); |
727 | store_cpu_topology(this_cpu); |
728 | numa_store_cpu_info(this_cpu); |
729 | numa_add_cpu(cpu: this_cpu); |
730 | |
731 | /* |
732 | * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set |
733 | * secondary CPUs present. |
734 | */ |
735 | if (max_cpus == 0) |
736 | return; |
737 | |
738 | /* |
739 | * Initialise the present map (which describes the set of CPUs |
740 | * actually populated at the present time) and release the |
741 | * secondaries from the bootloader. |
742 | */ |
743 | for_each_possible_cpu(cpu) { |
744 | |
745 | per_cpu(cpu_number, cpu) = cpu; |
746 | |
747 | if (cpu == smp_processor_id()) |
748 | continue; |
749 | |
750 | ops = get_cpu_ops(cpu); |
751 | if (!ops) |
752 | continue; |
753 | |
754 | err = ops->cpu_prepare(cpu); |
755 | if (err) |
756 | continue; |
757 | |
758 | set_cpu_present(cpu, present: true); |
759 | numa_store_cpu_info(cpu); |
760 | } |
761 | } |
762 | |
763 | static const char *ipi_types[NR_IPI] __tracepoint_string = { |
764 | [IPI_RESCHEDULE] = "Rescheduling interrupts" , |
765 | [IPI_CALL_FUNC] = "Function call interrupts" , |
766 | [IPI_CPU_STOP] = "CPU stop interrupts" , |
767 | [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts" , |
768 | [IPI_TIMER] = "Timer broadcast interrupts" , |
769 | [IPI_IRQ_WORK] = "IRQ work interrupts" , |
770 | }; |
771 | |
772 | static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); |
773 | |
774 | unsigned long irq_err_count; |
775 | |
776 | int arch_show_interrupts(struct seq_file *p, int prec) |
777 | { |
778 | unsigned int cpu, i; |
779 | |
780 | for (i = 0; i < NR_IPI; i++) { |
781 | seq_printf(m: p, fmt: "%*s%u:%s" , prec - 1, "IPI" , i, |
782 | prec >= 4 ? " " : "" ); |
783 | for_each_online_cpu(cpu) |
784 | seq_printf(m: p, fmt: "%10u " , irq_desc_kstat_cpu(desc: ipi_desc[i], cpu)); |
785 | seq_printf(m: p, fmt: " %s\n" , ipi_types[i]); |
786 | } |
787 | |
788 | seq_printf(m: p, fmt: "%*s: %10lu\n" , prec, "Err" , irq_err_count); |
789 | return 0; |
790 | } |
791 | |
792 | void arch_send_call_function_ipi_mask(const struct cpumask *mask) |
793 | { |
794 | smp_cross_call(target: mask, ipinr: IPI_CALL_FUNC); |
795 | } |
796 | |
797 | void arch_send_call_function_single_ipi(int cpu) |
798 | { |
799 | smp_cross_call(cpumask_of(cpu), ipinr: IPI_CALL_FUNC); |
800 | } |
801 | |
802 | #ifdef CONFIG_IRQ_WORK |
803 | void arch_irq_work_raise(void) |
804 | { |
805 | smp_cross_call(cpumask_of(smp_processor_id()), ipinr: IPI_IRQ_WORK); |
806 | } |
807 | #endif |
808 | |
809 | static void __noreturn local_cpu_stop(void) |
810 | { |
811 | set_cpu_online(smp_processor_id(), online: false); |
812 | |
813 | local_daif_mask(); |
814 | sdei_mask_local_cpu(); |
815 | cpu_park_loop(); |
816 | } |
817 | |
818 | /* |
819 | * We need to implement panic_smp_self_stop() for parallel panic() calls, so |
820 | * that cpu_online_mask gets correctly updated and smp_send_stop() can skip |
821 | * CPUs that have already stopped themselves. |
822 | */ |
823 | void __noreturn panic_smp_self_stop(void) |
824 | { |
825 | local_cpu_stop(); |
826 | } |
827 | |
828 | #ifdef CONFIG_KEXEC_CORE |
829 | static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); |
830 | #endif |
831 | |
832 | static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) |
833 | { |
834 | #ifdef CONFIG_KEXEC_CORE |
835 | crash_save_cpu(regs, cpu); |
836 | |
837 | atomic_dec(v: &waiting_for_crash_ipi); |
838 | |
839 | local_irq_disable(); |
840 | sdei_mask_local_cpu(); |
841 | |
842 | if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) |
843 | __cpu_try_die(cpu); |
844 | |
845 | /* just in case */ |
846 | cpu_park_loop(); |
847 | #else |
848 | BUG(); |
849 | #endif |
850 | } |
851 | |
852 | static void arm64_backtrace_ipi(cpumask_t *mask) |
853 | { |
854 | __ipi_send_mask(desc: ipi_desc[IPI_CPU_BACKTRACE], dest: mask); |
855 | } |
856 | |
857 | void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) |
858 | { |
859 | /* |
860 | * NOTE: though nmi_trigger_cpumask_backtrace() has "nmi_" in the name, |
861 | * nothing about it truly needs to be implemented using an NMI, it's |
862 | * just that it's _allowed_ to work with NMIs. If ipi_should_be_nmi() |
863 | * returned false our backtrace attempt will just use a regular IPI. |
864 | */ |
865 | nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise: arm64_backtrace_ipi); |
866 | } |
867 | |
868 | #ifdef CONFIG_KGDB |
869 | void kgdb_roundup_cpus(void) |
870 | { |
871 | int this_cpu = raw_smp_processor_id(); |
872 | int cpu; |
873 | |
874 | for_each_online_cpu(cpu) { |
875 | /* No need to roundup ourselves */ |
876 | if (cpu == this_cpu) |
877 | continue; |
878 | |
879 | __ipi_send_single(desc: ipi_desc[IPI_KGDB_ROUNDUP], cpu); |
880 | } |
881 | } |
882 | #endif |
883 | |
884 | /* |
885 | * Main handler for inter-processor interrupts |
886 | */ |
887 | static void do_handle_IPI(int ipinr) |
888 | { |
889 | unsigned int cpu = smp_processor_id(); |
890 | |
891 | if ((unsigned)ipinr < NR_IPI) |
892 | trace_ipi_entry(reason: ipi_types[ipinr]); |
893 | |
894 | switch (ipinr) { |
895 | case IPI_RESCHEDULE: |
896 | scheduler_ipi(); |
897 | break; |
898 | |
899 | case IPI_CALL_FUNC: |
900 | generic_smp_call_function_interrupt(); |
901 | break; |
902 | |
903 | case IPI_CPU_STOP: |
904 | local_cpu_stop(); |
905 | break; |
906 | |
907 | case IPI_CPU_CRASH_STOP: |
908 | if (IS_ENABLED(CONFIG_KEXEC_CORE)) { |
909 | ipi_cpu_crash_stop(cpu, regs: get_irq_regs()); |
910 | |
911 | unreachable(); |
912 | } |
913 | break; |
914 | |
915 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
916 | case IPI_TIMER: |
917 | tick_receive_broadcast(); |
918 | break; |
919 | #endif |
920 | |
921 | #ifdef CONFIG_IRQ_WORK |
922 | case IPI_IRQ_WORK: |
923 | irq_work_run(); |
924 | break; |
925 | #endif |
926 | |
927 | case IPI_CPU_BACKTRACE: |
928 | /* |
929 | * NOTE: in some cases this _won't_ be NMI context. See the |
930 | * comment in arch_trigger_cpumask_backtrace(). |
931 | */ |
932 | nmi_cpu_backtrace(regs: get_irq_regs()); |
933 | break; |
934 | |
935 | case IPI_KGDB_ROUNDUP: |
936 | kgdb_nmicallback(cpu, regs: get_irq_regs()); |
937 | break; |
938 | |
939 | default: |
940 | pr_crit("CPU%u: Unknown IPI message 0x%x\n" , cpu, ipinr); |
941 | break; |
942 | } |
943 | |
944 | if ((unsigned)ipinr < NR_IPI) |
945 | trace_ipi_exit(reason: ipi_types[ipinr]); |
946 | } |
947 | |
948 | static irqreturn_t ipi_handler(int irq, void *data) |
949 | { |
950 | do_handle_IPI(ipinr: irq - ipi_irq_base); |
951 | return IRQ_HANDLED; |
952 | } |
953 | |
954 | static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) |
955 | { |
956 | trace_ipi_raise(mask: target, reason: ipi_types[ipinr]); |
957 | __ipi_send_mask(desc: ipi_desc[ipinr], dest: target); |
958 | } |
959 | |
960 | static bool ipi_should_be_nmi(enum ipi_msg_type ipi) |
961 | { |
962 | if (!system_uses_irq_prio_masking()) |
963 | return false; |
964 | |
965 | switch (ipi) { |
966 | case IPI_CPU_STOP: |
967 | case IPI_CPU_CRASH_STOP: |
968 | case IPI_CPU_BACKTRACE: |
969 | case IPI_KGDB_ROUNDUP: |
970 | return true; |
971 | default: |
972 | return false; |
973 | } |
974 | } |
975 | |
976 | static void ipi_setup(int cpu) |
977 | { |
978 | int i; |
979 | |
980 | if (WARN_ON_ONCE(!ipi_irq_base)) |
981 | return; |
982 | |
983 | for (i = 0; i < nr_ipi; i++) { |
984 | if (ipi_should_be_nmi(ipi: i)) { |
985 | prepare_percpu_nmi(irq: ipi_irq_base + i); |
986 | enable_percpu_nmi(irq: ipi_irq_base + i, type: 0); |
987 | } else { |
988 | enable_percpu_irq(irq: ipi_irq_base + i, type: 0); |
989 | } |
990 | } |
991 | } |
992 | |
993 | #ifdef CONFIG_HOTPLUG_CPU |
994 | static void ipi_teardown(int cpu) |
995 | { |
996 | int i; |
997 | |
998 | if (WARN_ON_ONCE(!ipi_irq_base)) |
999 | return; |
1000 | |
1001 | for (i = 0; i < nr_ipi; i++) { |
1002 | if (ipi_should_be_nmi(ipi: i)) { |
1003 | disable_percpu_nmi(irq: ipi_irq_base + i); |
1004 | teardown_percpu_nmi(irq: ipi_irq_base + i); |
1005 | } else { |
1006 | disable_percpu_irq(irq: ipi_irq_base + i); |
1007 | } |
1008 | } |
1009 | } |
1010 | #endif |
1011 | |
1012 | void __init set_smp_ipi_range(int ipi_base, int n) |
1013 | { |
1014 | int i; |
1015 | |
1016 | WARN_ON(n < MAX_IPI); |
1017 | nr_ipi = min(n, MAX_IPI); |
1018 | |
1019 | for (i = 0; i < nr_ipi; i++) { |
1020 | int err; |
1021 | |
1022 | if (ipi_should_be_nmi(ipi: i)) { |
1023 | err = request_percpu_nmi(irq: ipi_base + i, handler: ipi_handler, |
1024 | devname: "IPI" , dev: &cpu_number); |
1025 | WARN(err, "Could not request IPI %d as NMI, err=%d\n" , |
1026 | i, err); |
1027 | } else { |
1028 | err = request_percpu_irq(irq: ipi_base + i, handler: ipi_handler, |
1029 | devname: "IPI" , percpu_dev_id: &cpu_number); |
1030 | WARN(err, "Could not request IPI %d as IRQ, err=%d\n" , |
1031 | i, err); |
1032 | } |
1033 | |
1034 | ipi_desc[i] = irq_to_desc(irq: ipi_base + i); |
1035 | irq_set_status_flags(irq: ipi_base + i, set: IRQ_HIDDEN); |
1036 | } |
1037 | |
1038 | ipi_irq_base = ipi_base; |
1039 | |
1040 | /* Setup the boot CPU immediately */ |
1041 | ipi_setup(smp_processor_id()); |
1042 | } |
1043 | |
1044 | void arch_smp_send_reschedule(int cpu) |
1045 | { |
1046 | smp_cross_call(cpumask_of(cpu), ipinr: IPI_RESCHEDULE); |
1047 | } |
1048 | |
1049 | #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL |
1050 | void arch_send_wakeup_ipi(unsigned int cpu) |
1051 | { |
1052 | /* |
1053 | * We use a scheduler IPI to wake the CPU as this avoids the need for a |
1054 | * dedicated IPI and we can safely handle spurious scheduler IPIs. |
1055 | */ |
1056 | smp_send_reschedule(cpu); |
1057 | } |
1058 | #endif |
1059 | |
1060 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
1061 | void tick_broadcast(const struct cpumask *mask) |
1062 | { |
1063 | smp_cross_call(mask, IPI_TIMER); |
1064 | } |
1065 | #endif |
1066 | |
1067 | /* |
1068 | * The number of CPUs online, not counting this CPU (which may not be |
1069 | * fully online and so not counted in num_online_cpus()). |
1070 | */ |
1071 | static inline unsigned int num_other_online_cpus(void) |
1072 | { |
1073 | unsigned int this_cpu_online = cpu_online(smp_processor_id()); |
1074 | |
1075 | return num_online_cpus() - this_cpu_online; |
1076 | } |
1077 | |
1078 | void smp_send_stop(void) |
1079 | { |
1080 | unsigned long timeout; |
1081 | |
1082 | if (num_other_online_cpus()) { |
1083 | cpumask_t mask; |
1084 | |
1085 | cpumask_copy(dstp: &mask, cpu_online_mask); |
1086 | cpumask_clear_cpu(smp_processor_id(), dstp: &mask); |
1087 | |
1088 | if (system_state <= SYSTEM_RUNNING) |
1089 | pr_crit("SMP: stopping secondary CPUs\n" ); |
1090 | smp_cross_call(target: &mask, ipinr: IPI_CPU_STOP); |
1091 | } |
1092 | |
1093 | /* Wait up to one second for other CPUs to stop */ |
1094 | timeout = USEC_PER_SEC; |
1095 | while (num_other_online_cpus() && timeout--) |
1096 | udelay(1); |
1097 | |
1098 | if (num_other_online_cpus()) |
1099 | pr_warn("SMP: failed to stop secondary CPUs %*pbl\n" , |
1100 | cpumask_pr_args(cpu_online_mask)); |
1101 | |
1102 | sdei_mask_local_cpu(); |
1103 | } |
1104 | |
1105 | #ifdef CONFIG_KEXEC_CORE |
1106 | void crash_smp_send_stop(void) |
1107 | { |
1108 | static int cpus_stopped; |
1109 | cpumask_t mask; |
1110 | unsigned long timeout; |
1111 | |
1112 | /* |
1113 | * This function can be called twice in panic path, but obviously |
1114 | * we execute this only once. |
1115 | */ |
1116 | if (cpus_stopped) |
1117 | return; |
1118 | |
1119 | cpus_stopped = 1; |
1120 | |
1121 | /* |
1122 | * If this cpu is the only one alive at this point in time, online or |
1123 | * not, there are no stop messages to be sent around, so just back out. |
1124 | */ |
1125 | if (num_other_online_cpus() == 0) |
1126 | goto skip_ipi; |
1127 | |
1128 | cpumask_copy(dstp: &mask, cpu_online_mask); |
1129 | cpumask_clear_cpu(smp_processor_id(), dstp: &mask); |
1130 | |
1131 | atomic_set(v: &waiting_for_crash_ipi, i: num_other_online_cpus()); |
1132 | |
1133 | pr_crit("SMP: stopping secondary CPUs\n" ); |
1134 | smp_cross_call(target: &mask, ipinr: IPI_CPU_CRASH_STOP); |
1135 | |
1136 | /* Wait up to one second for other CPUs to stop */ |
1137 | timeout = USEC_PER_SEC; |
1138 | while ((atomic_read(v: &waiting_for_crash_ipi) > 0) && timeout--) |
1139 | udelay(1); |
1140 | |
1141 | if (atomic_read(v: &waiting_for_crash_ipi) > 0) |
1142 | pr_warn("SMP: failed to stop secondary CPUs %*pbl\n" , |
1143 | cpumask_pr_args(&mask)); |
1144 | |
1145 | skip_ipi: |
1146 | sdei_mask_local_cpu(); |
1147 | sdei_handler_abort(); |
1148 | } |
1149 | |
1150 | bool smp_crash_stop_failed(void) |
1151 | { |
1152 | return (atomic_read(v: &waiting_for_crash_ipi) > 0); |
1153 | } |
1154 | #endif |
1155 | |
1156 | static bool have_cpu_die(void) |
1157 | { |
1158 | #ifdef CONFIG_HOTPLUG_CPU |
1159 | int any_cpu = raw_smp_processor_id(); |
1160 | const struct cpu_operations *ops = get_cpu_ops(any_cpu); |
1161 | |
1162 | if (ops && ops->cpu_die) |
1163 | return true; |
1164 | #endif |
1165 | return false; |
1166 | } |
1167 | |
1168 | bool cpus_are_stuck_in_kernel(void) |
1169 | { |
1170 | bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die()); |
1171 | |
1172 | return !!cpus_stuck_in_kernel || smp_spin_tables || |
1173 | is_protected_kvm_enabled(); |
1174 | } |
1175 | |