1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * SMP initialisation and IPI support
4 * Based on arch/arm/kernel/smp.c
5 *
6 * Copyright (C) 2012 ARM Ltd.
7 */
8
9#include <linux/acpi.h>
10#include <linux/arm_sdei.h>
11#include <linux/delay.h>
12#include <linux/init.h>
13#include <linux/spinlock.h>
14#include <linux/sched/mm.h>
15#include <linux/sched/hotplug.h>
16#include <linux/sched/task_stack.h>
17#include <linux/interrupt.h>
18#include <linux/cache.h>
19#include <linux/profile.h>
20#include <linux/errno.h>
21#include <linux/mm.h>
22#include <linux/err.h>
23#include <linux/cpu.h>
24#include <linux/smp.h>
25#include <linux/seq_file.h>
26#include <linux/irq.h>
27#include <linux/irqchip/arm-gic-v3.h>
28#include <linux/percpu.h>
29#include <linux/clockchips.h>
30#include <linux/completion.h>
31#include <linux/of.h>
32#include <linux/irq_work.h>
33#include <linux/kernel_stat.h>
34#include <linux/kexec.h>
35#include <linux/kgdb.h>
36#include <linux/kvm_host.h>
37#include <linux/nmi.h>
38
39#include <asm/alternative.h>
40#include <asm/atomic.h>
41#include <asm/cacheflush.h>
42#include <asm/cpu.h>
43#include <asm/cputype.h>
44#include <asm/cpu_ops.h>
45#include <asm/daifflags.h>
46#include <asm/kvm_mmu.h>
47#include <asm/mmu_context.h>
48#include <asm/numa.h>
49#include <asm/processor.h>
50#include <asm/smp_plat.h>
51#include <asm/sections.h>
52#include <asm/tlbflush.h>
53#include <asm/ptrace.h>
54#include <asm/virt.h>
55
56#include <trace/events/ipi.h>
57
58DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
59EXPORT_PER_CPU_SYMBOL(cpu_number);
60
61/*
62 * as from 2.5, kernels no longer have an init_tasks structure
63 * so we need some other way of telling a new secondary core
64 * where to place its SVC stack
65 */
66struct secondary_data secondary_data;
67/* Number of CPUs which aren't online, but looping in kernel text. */
68static int cpus_stuck_in_kernel;
69
70enum ipi_msg_type {
71 IPI_RESCHEDULE,
72 IPI_CALL_FUNC,
73 IPI_CPU_STOP,
74 IPI_CPU_CRASH_STOP,
75 IPI_TIMER,
76 IPI_IRQ_WORK,
77 NR_IPI,
78 /*
79 * Any enum >= NR_IPI and < MAX_IPI is special and not tracable
80 * with trace_ipi_*
81 */
82 IPI_CPU_BACKTRACE = NR_IPI,
83 IPI_KGDB_ROUNDUP,
84 MAX_IPI
85};
86
87static int ipi_irq_base __ro_after_init;
88static int nr_ipi __ro_after_init = NR_IPI;
89static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
90
91static void ipi_setup(int cpu);
92
93#ifdef CONFIG_HOTPLUG_CPU
94static void ipi_teardown(int cpu);
95static int op_cpu_kill(unsigned int cpu);
96#else
97static inline int op_cpu_kill(unsigned int cpu)
98{
99 return -ENOSYS;
100}
101#endif
102
103
104/*
105 * Boot a secondary CPU, and assign it the specified idle task.
106 * This also gives us the initial stack to use for this CPU.
107 */
108static int boot_secondary(unsigned int cpu, struct task_struct *idle)
109{
110 const struct cpu_operations *ops = get_cpu_ops(cpu);
111
112 if (ops->cpu_boot)
113 return ops->cpu_boot(cpu);
114
115 return -EOPNOTSUPP;
116}
117
118static DECLARE_COMPLETION(cpu_running);
119
120int __cpu_up(unsigned int cpu, struct task_struct *idle)
121{
122 int ret;
123 long status;
124
125 /*
126 * We need to tell the secondary core where to find its stack and the
127 * page tables.
128 */
129 secondary_data.task = idle;
130 update_cpu_boot_status(CPU_MMU_OFF);
131
132 /* Now bring the CPU into our world */
133 ret = boot_secondary(cpu, idle);
134 if (ret) {
135 pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
136 return ret;
137 }
138
139 /*
140 * CPU was successfully started, wait for it to come online or
141 * time out.
142 */
143 wait_for_completion_timeout(x: &cpu_running,
144 timeout: msecs_to_jiffies(m: 5000));
145 if (cpu_online(cpu))
146 return 0;
147
148 pr_crit("CPU%u: failed to come online\n", cpu);
149 secondary_data.task = NULL;
150 status = READ_ONCE(secondary_data.status);
151 if (status == CPU_MMU_OFF)
152 status = READ_ONCE(__early_cpu_boot_status);
153
154 switch (status & CPU_BOOT_STATUS_MASK) {
155 default:
156 pr_err("CPU%u: failed in unknown state : 0x%lx\n",
157 cpu, status);
158 cpus_stuck_in_kernel++;
159 break;
160 case CPU_KILL_ME:
161 if (!op_cpu_kill(cpu)) {
162 pr_crit("CPU%u: died during early boot\n", cpu);
163 break;
164 }
165 pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
166 fallthrough;
167 case CPU_STUCK_IN_KERNEL:
168 pr_crit("CPU%u: is stuck in kernel\n", cpu);
169 if (status & CPU_STUCK_REASON_52_BIT_VA)
170 pr_crit("CPU%u: does not support 52-bit VAs\n", cpu);
171 if (status & CPU_STUCK_REASON_NO_GRAN) {
172 pr_crit("CPU%u: does not support %luK granule\n",
173 cpu, PAGE_SIZE / SZ_1K);
174 }
175 cpus_stuck_in_kernel++;
176 break;
177 case CPU_PANIC_KERNEL:
178 panic(fmt: "CPU%u detected unsupported configuration\n", cpu);
179 }
180
181 return -EIO;
182}
183
184static void init_gic_priority_masking(void)
185{
186 u32 cpuflags;
187
188 if (WARN_ON(!gic_enable_sre()))
189 return;
190
191 cpuflags = read_sysreg(daif);
192
193 WARN_ON(!(cpuflags & PSR_I_BIT));
194 WARN_ON(!(cpuflags & PSR_F_BIT));
195
196 gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
197}
198
199/*
200 * This is the secondary CPU boot entry. We're using this CPUs
201 * idle thread stack, but a set of temporary page tables.
202 */
203asmlinkage notrace void secondary_start_kernel(void)
204{
205 u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
206 struct mm_struct *mm = &init_mm;
207 const struct cpu_operations *ops;
208 unsigned int cpu = smp_processor_id();
209
210 /*
211 * All kernel threads share the same mm context; grab a
212 * reference and switch to it.
213 */
214 mmgrab(mm);
215 current->active_mm = mm;
216
217 /*
218 * TTBR0 is only used for the identity mapping at this stage. Make it
219 * point to zero page to avoid speculatively fetching new entries.
220 */
221 cpu_uninstall_idmap();
222
223 if (system_uses_irq_prio_masking())
224 init_gic_priority_masking();
225
226 rcutree_report_cpu_starting(cpu);
227 trace_hardirqs_off();
228
229 /*
230 * If the system has established the capabilities, make sure
231 * this CPU ticks all of those. If it doesn't, the CPU will
232 * fail to come online.
233 */
234 check_local_cpu_capabilities();
235
236 ops = get_cpu_ops(cpu);
237 if (ops->cpu_postboot)
238 ops->cpu_postboot();
239
240 /*
241 * Log the CPU info before it is marked online and might get read.
242 */
243 cpuinfo_store_cpu();
244 store_cpu_topology(cpu);
245
246 /*
247 * Enable GIC and timers.
248 */
249 notify_cpu_starting(cpu);
250
251 ipi_setup(cpu);
252
253 numa_add_cpu(cpu);
254
255 /*
256 * OK, now it's safe to let the boot CPU continue. Wait for
257 * the CPU migration code to notice that the CPU is online
258 * before we continue.
259 */
260 pr_info("CPU%u: Booted secondary processor 0x%010lx [0x%08x]\n",
261 cpu, (unsigned long)mpidr,
262 read_cpuid_id());
263 update_cpu_boot_status(CPU_BOOT_SUCCESS);
264 set_cpu_online(cpu, online: true);
265 complete(&cpu_running);
266
267 local_daif_restore(DAIF_PROCCTX);
268
269 /*
270 * OK, it's off to the idle thread for us
271 */
272 cpu_startup_entry(state: CPUHP_AP_ONLINE_IDLE);
273}
274
275#ifdef CONFIG_HOTPLUG_CPU
276static int op_cpu_disable(unsigned int cpu)
277{
278 const struct cpu_operations *ops = get_cpu_ops(cpu);
279
280 /*
281 * If we don't have a cpu_die method, abort before we reach the point
282 * of no return. CPU0 may not have an cpu_ops, so test for it.
283 */
284 if (!ops || !ops->cpu_die)
285 return -EOPNOTSUPP;
286
287 /*
288 * We may need to abort a hot unplug for some other mechanism-specific
289 * reason.
290 */
291 if (ops->cpu_disable)
292 return ops->cpu_disable(cpu);
293
294 return 0;
295}
296
297/*
298 * __cpu_disable runs on the processor to be shutdown.
299 */
300int __cpu_disable(void)
301{
302 unsigned int cpu = smp_processor_id();
303 int ret;
304
305 ret = op_cpu_disable(cpu);
306 if (ret)
307 return ret;
308
309 remove_cpu_topology(cpu);
310 numa_remove_cpu(cpu);
311
312 /*
313 * Take this CPU offline. Once we clear this, we can't return,
314 * and we must not schedule until we're ready to give up the cpu.
315 */
316 set_cpu_online(cpu, online: false);
317 ipi_teardown(cpu);
318
319 /*
320 * OK - migrate IRQs away from this CPU
321 */
322 irq_migrate_all_off_this_cpu();
323
324 return 0;
325}
326
327static int op_cpu_kill(unsigned int cpu)
328{
329 const struct cpu_operations *ops = get_cpu_ops(cpu);
330
331 /*
332 * If we have no means of synchronising with the dying CPU, then assume
333 * that it is really dead. We can only wait for an arbitrary length of
334 * time and hope that it's dead, so let's skip the wait and just hope.
335 */
336 if (!ops->cpu_kill)
337 return 0;
338
339 return ops->cpu_kill(cpu);
340}
341
342/*
343 * Called on the thread which is asking for a CPU to be shutdown after the
344 * shutdown completed.
345 */
346void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
347{
348 int err;
349
350 pr_debug("CPU%u: shutdown\n", cpu);
351
352 /*
353 * Now that the dying CPU is beyond the point of no return w.r.t.
354 * in-kernel synchronisation, try to get the firwmare to help us to
355 * verify that it has really left the kernel before we consider
356 * clobbering anything it might still be using.
357 */
358 err = op_cpu_kill(cpu);
359 if (err)
360 pr_warn("CPU%d may not have shut down cleanly: %d\n", cpu, err);
361}
362
363/*
364 * Called from the idle thread for the CPU which has been shutdown.
365 *
366 */
367void __noreturn cpu_die(void)
368{
369 unsigned int cpu = smp_processor_id();
370 const struct cpu_operations *ops = get_cpu_ops(cpu);
371
372 idle_task_exit();
373
374 local_daif_mask();
375
376 /* Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose of */
377 cpuhp_ap_report_dead();
378
379 /*
380 * Actually shutdown the CPU. This must never fail. The specific hotplug
381 * mechanism must perform all required cache maintenance to ensure that
382 * no dirty lines are lost in the process of shutting down the CPU.
383 */
384 ops->cpu_die(cpu);
385
386 BUG();
387}
388#endif
389
390static void __cpu_try_die(int cpu)
391{
392#ifdef CONFIG_HOTPLUG_CPU
393 const struct cpu_operations *ops = get_cpu_ops(cpu);
394
395 if (ops && ops->cpu_die)
396 ops->cpu_die(cpu);
397#endif
398}
399
400/*
401 * Kill the calling secondary CPU, early in bringup before it is turned
402 * online.
403 */
404void __noreturn cpu_die_early(void)
405{
406 int cpu = smp_processor_id();
407
408 pr_crit("CPU%d: will not boot\n", cpu);
409
410 /* Mark this CPU absent */
411 set_cpu_present(cpu, present: 0);
412 rcutree_report_cpu_dead();
413
414 if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
415 update_cpu_boot_status(CPU_KILL_ME);
416 __cpu_try_die(cpu);
417 }
418
419 update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
420
421 cpu_park_loop();
422}
423
424static void __init hyp_mode_check(void)
425{
426 if (is_hyp_mode_available())
427 pr_info("CPU: All CPU(s) started at EL2\n");
428 else if (is_hyp_mode_mismatched())
429 WARN_TAINT(1, TAINT_CPU_OUT_OF_SPEC,
430 "CPU: CPUs started in inconsistent modes");
431 else
432 pr_info("CPU: All CPU(s) started at EL1\n");
433 if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) {
434 kvm_compute_layout();
435 kvm_apply_hyp_relocations();
436 }
437}
438
439void __init smp_cpus_done(unsigned int max_cpus)
440{
441 pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
442 hyp_mode_check();
443 setup_system_features();
444 setup_user_features();
445 mark_linear_text_alias_ro();
446}
447
448void __init smp_prepare_boot_cpu(void)
449{
450 /*
451 * The runtime per-cpu areas have been allocated by
452 * setup_per_cpu_areas(), and CPU0's boot time per-cpu area will be
453 * freed shortly, so we must move over to the runtime per-cpu area.
454 */
455 set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
456
457 cpuinfo_store_boot_cpu();
458 setup_boot_cpu_features();
459
460 /* Conditionally switch to GIC PMR for interrupt masking */
461 if (system_uses_irq_prio_masking())
462 init_gic_priority_masking();
463
464 kasan_init_hw_tags();
465}
466
467/*
468 * Duplicate MPIDRs are a recipe for disaster. Scan all initialized
469 * entries and check for duplicates. If any is found just ignore the
470 * cpu. cpu_logical_map was initialized to INVALID_HWID to avoid
471 * matching valid MPIDR values.
472 */
473static bool __init is_mpidr_duplicate(unsigned int cpu, u64 hwid)
474{
475 unsigned int i;
476
477 for (i = 1; (i < cpu) && (i < NR_CPUS); i++)
478 if (cpu_logical_map(i) == hwid)
479 return true;
480 return false;
481}
482
483/*
484 * Initialize cpu operations for a logical cpu and
485 * set it in the possible mask on success
486 */
487static int __init smp_cpu_setup(int cpu)
488{
489 const struct cpu_operations *ops;
490
491 if (init_cpu_ops(cpu))
492 return -ENODEV;
493
494 ops = get_cpu_ops(cpu);
495 if (ops->cpu_init(cpu))
496 return -ENODEV;
497
498 set_cpu_possible(cpu, possible: true);
499
500 return 0;
501}
502
503static bool bootcpu_valid __initdata;
504static unsigned int cpu_count = 1;
505
506#ifdef CONFIG_ACPI
507static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS];
508
509struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu)
510{
511 return &cpu_madt_gicc[cpu];
512}
513EXPORT_SYMBOL_GPL(acpi_cpu_get_madt_gicc);
514
515/*
516 * acpi_map_gic_cpu_interface - parse processor MADT entry
517 *
518 * Carry out sanity checks on MADT processor entry and initialize
519 * cpu_logical_map on success
520 */
521static void __init
522acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
523{
524 u64 hwid = processor->arm_mpidr;
525
526 if (!acpi_gicc_is_usable(gicc: processor)) {
527 pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
528 return;
529 }
530
531 if (hwid & ~MPIDR_HWID_BITMASK || hwid == INVALID_HWID) {
532 pr_err("skipping CPU entry with invalid MPIDR 0x%llx\n", hwid);
533 return;
534 }
535
536 if (is_mpidr_duplicate(cpu: cpu_count, hwid)) {
537 pr_err("duplicate CPU MPIDR 0x%llx in MADT\n", hwid);
538 return;
539 }
540
541 /* Check if GICC structure of boot CPU is available in the MADT */
542 if (cpu_logical_map(0) == hwid) {
543 if (bootcpu_valid) {
544 pr_err("duplicate boot CPU MPIDR: 0x%llx in MADT\n",
545 hwid);
546 return;
547 }
548 bootcpu_valid = true;
549 cpu_madt_gicc[0] = *processor;
550 return;
551 }
552
553 if (cpu_count >= NR_CPUS)
554 return;
555
556 /* map the logical cpu id to cpu MPIDR */
557 set_cpu_logical_map(cpu_count, hwid);
558
559 cpu_madt_gicc[cpu_count] = *processor;
560
561 /*
562 * Set-up the ACPI parking protocol cpu entries
563 * while initializing the cpu_logical_map to
564 * avoid parsing MADT entries multiple times for
565 * nothing (ie a valid cpu_logical_map entry should
566 * contain a valid parking protocol data set to
567 * initialize the cpu if the parking protocol is
568 * the only available enable method).
569 */
570 acpi_set_mailbox_entry(cpu_count, processor);
571
572 cpu_count++;
573}
574
575static int __init
576acpi_parse_gic_cpu_interface(union acpi_subtable_headers *header,
577 const unsigned long end)
578{
579 struct acpi_madt_generic_interrupt *processor;
580
581 processor = (struct acpi_madt_generic_interrupt *)header;
582 if (BAD_MADT_GICC_ENTRY(processor, end))
583 return -EINVAL;
584
585 acpi_table_print_madt_entry(madt: &header->common);
586
587 acpi_map_gic_cpu_interface(processor);
588
589 return 0;
590}
591
592static void __init acpi_parse_and_init_cpus(void)
593{
594 int i;
595
596 /*
597 * do a walk of MADT to determine how many CPUs
598 * we have including disabled CPUs, and get information
599 * we need for SMP init.
600 */
601 acpi_table_parse_madt(id: ACPI_MADT_TYPE_GENERIC_INTERRUPT,
602 handler: acpi_parse_gic_cpu_interface, max_entries: 0);
603
604 /*
605 * In ACPI, SMP and CPU NUMA information is provided in separate
606 * static tables, namely the MADT and the SRAT.
607 *
608 * Thus, it is simpler to first create the cpu logical map through
609 * an MADT walk and then map the logical cpus to their node ids
610 * as separate steps.
611 */
612 acpi_map_cpus_to_nodes();
613
614 for (i = 0; i < nr_cpu_ids; i++)
615 early_map_cpu_to_node(i, acpi_numa_get_nid(i));
616}
617#else
618#define acpi_parse_and_init_cpus(...) do { } while (0)
619#endif
620
621/*
622 * Enumerate the possible CPU set from the device tree and build the
623 * cpu logical map array containing MPIDR values related to logical
624 * cpus. Assumes that cpu_logical_map(0) has already been initialized.
625 */
626static void __init of_parse_and_init_cpus(void)
627{
628 struct device_node *dn;
629
630 for_each_of_cpu_node(dn) {
631 u64 hwid = of_get_cpu_hwid(cpun: dn, thread: 0);
632
633 if (hwid & ~MPIDR_HWID_BITMASK)
634 goto next;
635
636 if (is_mpidr_duplicate(cpu: cpu_count, hwid)) {
637 pr_err("%pOF: duplicate cpu reg properties in the DT\n",
638 dn);
639 goto next;
640 }
641
642 /*
643 * The numbering scheme requires that the boot CPU
644 * must be assigned logical id 0. Record it so that
645 * the logical map built from DT is validated and can
646 * be used.
647 */
648 if (hwid == cpu_logical_map(0)) {
649 if (bootcpu_valid) {
650 pr_err("%pOF: duplicate boot cpu reg property in DT\n",
651 dn);
652 goto next;
653 }
654
655 bootcpu_valid = true;
656 early_map_cpu_to_node(0, of_node_to_nid(np: dn));
657
658 /*
659 * cpu_logical_map has already been
660 * initialized and the boot cpu doesn't need
661 * the enable-method so continue without
662 * incrementing cpu.
663 */
664 continue;
665 }
666
667 if (cpu_count >= NR_CPUS)
668 goto next;
669
670 pr_debug("cpu logical map 0x%llx\n", hwid);
671 set_cpu_logical_map(cpu_count, hwid);
672
673 early_map_cpu_to_node(cpu_count, of_node_to_nid(np: dn));
674next:
675 cpu_count++;
676 }
677}
678
679/*
680 * Enumerate the possible CPU set from the device tree or ACPI and build the
681 * cpu logical map array containing MPIDR values related to logical
682 * cpus. Assumes that cpu_logical_map(0) has already been initialized.
683 */
684void __init smp_init_cpus(void)
685{
686 int i;
687
688 if (acpi_disabled)
689 of_parse_and_init_cpus();
690 else
691 acpi_parse_and_init_cpus();
692
693 if (cpu_count > nr_cpu_ids)
694 pr_warn("Number of cores (%d) exceeds configured maximum of %u - clipping\n",
695 cpu_count, nr_cpu_ids);
696
697 if (!bootcpu_valid) {
698 pr_err("missing boot CPU MPIDR, not enabling secondaries\n");
699 return;
700 }
701
702 /*
703 * We need to set the cpu_logical_map entries before enabling
704 * the cpus so that cpu processor description entries (DT cpu nodes
705 * and ACPI MADT entries) can be retrieved by matching the cpu hwid
706 * with entries in cpu_logical_map while initializing the cpus.
707 * If the cpu set-up fails, invalidate the cpu_logical_map entry.
708 */
709 for (i = 1; i < nr_cpu_ids; i++) {
710 if (cpu_logical_map(i) != INVALID_HWID) {
711 if (smp_cpu_setup(i))
712 set_cpu_logical_map(i, INVALID_HWID);
713 }
714 }
715}
716
717void __init smp_prepare_cpus(unsigned int max_cpus)
718{
719 const struct cpu_operations *ops;
720 int err;
721 unsigned int cpu;
722 unsigned int this_cpu;
723
724 init_cpu_topology();
725
726 this_cpu = smp_processor_id();
727 store_cpu_topology(this_cpu);
728 numa_store_cpu_info(this_cpu);
729 numa_add_cpu(cpu: this_cpu);
730
731 /*
732 * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set
733 * secondary CPUs present.
734 */
735 if (max_cpus == 0)
736 return;
737
738 /*
739 * Initialise the present map (which describes the set of CPUs
740 * actually populated at the present time) and release the
741 * secondaries from the bootloader.
742 */
743 for_each_possible_cpu(cpu) {
744
745 per_cpu(cpu_number, cpu) = cpu;
746
747 if (cpu == smp_processor_id())
748 continue;
749
750 ops = get_cpu_ops(cpu);
751 if (!ops)
752 continue;
753
754 err = ops->cpu_prepare(cpu);
755 if (err)
756 continue;
757
758 set_cpu_present(cpu, present: true);
759 numa_store_cpu_info(cpu);
760 }
761}
762
763static const char *ipi_types[NR_IPI] __tracepoint_string = {
764 [IPI_RESCHEDULE] = "Rescheduling interrupts",
765 [IPI_CALL_FUNC] = "Function call interrupts",
766 [IPI_CPU_STOP] = "CPU stop interrupts",
767 [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
768 [IPI_TIMER] = "Timer broadcast interrupts",
769 [IPI_IRQ_WORK] = "IRQ work interrupts",
770};
771
772static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
773
774unsigned long irq_err_count;
775
776int arch_show_interrupts(struct seq_file *p, int prec)
777{
778 unsigned int cpu, i;
779
780 for (i = 0; i < NR_IPI; i++) {
781 seq_printf(m: p, fmt: "%*s%u:%s", prec - 1, "IPI", i,
782 prec >= 4 ? " " : "");
783 for_each_online_cpu(cpu)
784 seq_printf(m: p, fmt: "%10u ", irq_desc_kstat_cpu(desc: ipi_desc[i], cpu));
785 seq_printf(m: p, fmt: " %s\n", ipi_types[i]);
786 }
787
788 seq_printf(m: p, fmt: "%*s: %10lu\n", prec, "Err", irq_err_count);
789 return 0;
790}
791
792void arch_send_call_function_ipi_mask(const struct cpumask *mask)
793{
794 smp_cross_call(target: mask, ipinr: IPI_CALL_FUNC);
795}
796
797void arch_send_call_function_single_ipi(int cpu)
798{
799 smp_cross_call(cpumask_of(cpu), ipinr: IPI_CALL_FUNC);
800}
801
802#ifdef CONFIG_IRQ_WORK
803void arch_irq_work_raise(void)
804{
805 smp_cross_call(cpumask_of(smp_processor_id()), ipinr: IPI_IRQ_WORK);
806}
807#endif
808
809static void __noreturn local_cpu_stop(void)
810{
811 set_cpu_online(smp_processor_id(), online: false);
812
813 local_daif_mask();
814 sdei_mask_local_cpu();
815 cpu_park_loop();
816}
817
818/*
819 * We need to implement panic_smp_self_stop() for parallel panic() calls, so
820 * that cpu_online_mask gets correctly updated and smp_send_stop() can skip
821 * CPUs that have already stopped themselves.
822 */
823void __noreturn panic_smp_self_stop(void)
824{
825 local_cpu_stop();
826}
827
828#ifdef CONFIG_KEXEC_CORE
829static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
830#endif
831
832static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
833{
834#ifdef CONFIG_KEXEC_CORE
835 crash_save_cpu(regs, cpu);
836
837 atomic_dec(v: &waiting_for_crash_ipi);
838
839 local_irq_disable();
840 sdei_mask_local_cpu();
841
842 if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
843 __cpu_try_die(cpu);
844
845 /* just in case */
846 cpu_park_loop();
847#else
848 BUG();
849#endif
850}
851
852static void arm64_backtrace_ipi(cpumask_t *mask)
853{
854 __ipi_send_mask(desc: ipi_desc[IPI_CPU_BACKTRACE], dest: mask);
855}
856
857void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
858{
859 /*
860 * NOTE: though nmi_trigger_cpumask_backtrace() has "nmi_" in the name,
861 * nothing about it truly needs to be implemented using an NMI, it's
862 * just that it's _allowed_ to work with NMIs. If ipi_should_be_nmi()
863 * returned false our backtrace attempt will just use a regular IPI.
864 */
865 nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise: arm64_backtrace_ipi);
866}
867
868#ifdef CONFIG_KGDB
869void kgdb_roundup_cpus(void)
870{
871 int this_cpu = raw_smp_processor_id();
872 int cpu;
873
874 for_each_online_cpu(cpu) {
875 /* No need to roundup ourselves */
876 if (cpu == this_cpu)
877 continue;
878
879 __ipi_send_single(desc: ipi_desc[IPI_KGDB_ROUNDUP], cpu);
880 }
881}
882#endif
883
884/*
885 * Main handler for inter-processor interrupts
886 */
887static void do_handle_IPI(int ipinr)
888{
889 unsigned int cpu = smp_processor_id();
890
891 if ((unsigned)ipinr < NR_IPI)
892 trace_ipi_entry(reason: ipi_types[ipinr]);
893
894 switch (ipinr) {
895 case IPI_RESCHEDULE:
896 scheduler_ipi();
897 break;
898
899 case IPI_CALL_FUNC:
900 generic_smp_call_function_interrupt();
901 break;
902
903 case IPI_CPU_STOP:
904 local_cpu_stop();
905 break;
906
907 case IPI_CPU_CRASH_STOP:
908 if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
909 ipi_cpu_crash_stop(cpu, regs: get_irq_regs());
910
911 unreachable();
912 }
913 break;
914
915#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
916 case IPI_TIMER:
917 tick_receive_broadcast();
918 break;
919#endif
920
921#ifdef CONFIG_IRQ_WORK
922 case IPI_IRQ_WORK:
923 irq_work_run();
924 break;
925#endif
926
927 case IPI_CPU_BACKTRACE:
928 /*
929 * NOTE: in some cases this _won't_ be NMI context. See the
930 * comment in arch_trigger_cpumask_backtrace().
931 */
932 nmi_cpu_backtrace(regs: get_irq_regs());
933 break;
934
935 case IPI_KGDB_ROUNDUP:
936 kgdb_nmicallback(cpu, regs: get_irq_regs());
937 break;
938
939 default:
940 pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
941 break;
942 }
943
944 if ((unsigned)ipinr < NR_IPI)
945 trace_ipi_exit(reason: ipi_types[ipinr]);
946}
947
948static irqreturn_t ipi_handler(int irq, void *data)
949{
950 do_handle_IPI(ipinr: irq - ipi_irq_base);
951 return IRQ_HANDLED;
952}
953
954static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
955{
956 trace_ipi_raise(mask: target, reason: ipi_types[ipinr]);
957 __ipi_send_mask(desc: ipi_desc[ipinr], dest: target);
958}
959
960static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
961{
962 if (!system_uses_irq_prio_masking())
963 return false;
964
965 switch (ipi) {
966 case IPI_CPU_STOP:
967 case IPI_CPU_CRASH_STOP:
968 case IPI_CPU_BACKTRACE:
969 case IPI_KGDB_ROUNDUP:
970 return true;
971 default:
972 return false;
973 }
974}
975
976static void ipi_setup(int cpu)
977{
978 int i;
979
980 if (WARN_ON_ONCE(!ipi_irq_base))
981 return;
982
983 for (i = 0; i < nr_ipi; i++) {
984 if (ipi_should_be_nmi(ipi: i)) {
985 prepare_percpu_nmi(irq: ipi_irq_base + i);
986 enable_percpu_nmi(irq: ipi_irq_base + i, type: 0);
987 } else {
988 enable_percpu_irq(irq: ipi_irq_base + i, type: 0);
989 }
990 }
991}
992
993#ifdef CONFIG_HOTPLUG_CPU
994static void ipi_teardown(int cpu)
995{
996 int i;
997
998 if (WARN_ON_ONCE(!ipi_irq_base))
999 return;
1000
1001 for (i = 0; i < nr_ipi; i++) {
1002 if (ipi_should_be_nmi(ipi: i)) {
1003 disable_percpu_nmi(irq: ipi_irq_base + i);
1004 teardown_percpu_nmi(irq: ipi_irq_base + i);
1005 } else {
1006 disable_percpu_irq(irq: ipi_irq_base + i);
1007 }
1008 }
1009}
1010#endif
1011
1012void __init set_smp_ipi_range(int ipi_base, int n)
1013{
1014 int i;
1015
1016 WARN_ON(n < MAX_IPI);
1017 nr_ipi = min(n, MAX_IPI);
1018
1019 for (i = 0; i < nr_ipi; i++) {
1020 int err;
1021
1022 if (ipi_should_be_nmi(ipi: i)) {
1023 err = request_percpu_nmi(irq: ipi_base + i, handler: ipi_handler,
1024 devname: "IPI", dev: &cpu_number);
1025 WARN(err, "Could not request IPI %d as NMI, err=%d\n",
1026 i, err);
1027 } else {
1028 err = request_percpu_irq(irq: ipi_base + i, handler: ipi_handler,
1029 devname: "IPI", percpu_dev_id: &cpu_number);
1030 WARN(err, "Could not request IPI %d as IRQ, err=%d\n",
1031 i, err);
1032 }
1033
1034 ipi_desc[i] = irq_to_desc(irq: ipi_base + i);
1035 irq_set_status_flags(irq: ipi_base + i, set: IRQ_HIDDEN);
1036 }
1037
1038 ipi_irq_base = ipi_base;
1039
1040 /* Setup the boot CPU immediately */
1041 ipi_setup(smp_processor_id());
1042}
1043
1044void arch_smp_send_reschedule(int cpu)
1045{
1046 smp_cross_call(cpumask_of(cpu), ipinr: IPI_RESCHEDULE);
1047}
1048
1049#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
1050void arch_send_wakeup_ipi(unsigned int cpu)
1051{
1052 /*
1053 * We use a scheduler IPI to wake the CPU as this avoids the need for a
1054 * dedicated IPI and we can safely handle spurious scheduler IPIs.
1055 */
1056 smp_send_reschedule(cpu);
1057}
1058#endif
1059
1060#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
1061void tick_broadcast(const struct cpumask *mask)
1062{
1063 smp_cross_call(mask, IPI_TIMER);
1064}
1065#endif
1066
1067/*
1068 * The number of CPUs online, not counting this CPU (which may not be
1069 * fully online and so not counted in num_online_cpus()).
1070 */
1071static inline unsigned int num_other_online_cpus(void)
1072{
1073 unsigned int this_cpu_online = cpu_online(smp_processor_id());
1074
1075 return num_online_cpus() - this_cpu_online;
1076}
1077
1078void smp_send_stop(void)
1079{
1080 unsigned long timeout;
1081
1082 if (num_other_online_cpus()) {
1083 cpumask_t mask;
1084
1085 cpumask_copy(dstp: &mask, cpu_online_mask);
1086 cpumask_clear_cpu(smp_processor_id(), dstp: &mask);
1087
1088 if (system_state <= SYSTEM_RUNNING)
1089 pr_crit("SMP: stopping secondary CPUs\n");
1090 smp_cross_call(target: &mask, ipinr: IPI_CPU_STOP);
1091 }
1092
1093 /* Wait up to one second for other CPUs to stop */
1094 timeout = USEC_PER_SEC;
1095 while (num_other_online_cpus() && timeout--)
1096 udelay(1);
1097
1098 if (num_other_online_cpus())
1099 pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
1100 cpumask_pr_args(cpu_online_mask));
1101
1102 sdei_mask_local_cpu();
1103}
1104
1105#ifdef CONFIG_KEXEC_CORE
1106void crash_smp_send_stop(void)
1107{
1108 static int cpus_stopped;
1109 cpumask_t mask;
1110 unsigned long timeout;
1111
1112 /*
1113 * This function can be called twice in panic path, but obviously
1114 * we execute this only once.
1115 */
1116 if (cpus_stopped)
1117 return;
1118
1119 cpus_stopped = 1;
1120
1121 /*
1122 * If this cpu is the only one alive at this point in time, online or
1123 * not, there are no stop messages to be sent around, so just back out.
1124 */
1125 if (num_other_online_cpus() == 0)
1126 goto skip_ipi;
1127
1128 cpumask_copy(dstp: &mask, cpu_online_mask);
1129 cpumask_clear_cpu(smp_processor_id(), dstp: &mask);
1130
1131 atomic_set(v: &waiting_for_crash_ipi, i: num_other_online_cpus());
1132
1133 pr_crit("SMP: stopping secondary CPUs\n");
1134 smp_cross_call(target: &mask, ipinr: IPI_CPU_CRASH_STOP);
1135
1136 /* Wait up to one second for other CPUs to stop */
1137 timeout = USEC_PER_SEC;
1138 while ((atomic_read(v: &waiting_for_crash_ipi) > 0) && timeout--)
1139 udelay(1);
1140
1141 if (atomic_read(v: &waiting_for_crash_ipi) > 0)
1142 pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
1143 cpumask_pr_args(&mask));
1144
1145skip_ipi:
1146 sdei_mask_local_cpu();
1147 sdei_handler_abort();
1148}
1149
1150bool smp_crash_stop_failed(void)
1151{
1152 return (atomic_read(v: &waiting_for_crash_ipi) > 0);
1153}
1154#endif
1155
1156static bool have_cpu_die(void)
1157{
1158#ifdef CONFIG_HOTPLUG_CPU
1159 int any_cpu = raw_smp_processor_id();
1160 const struct cpu_operations *ops = get_cpu_ops(any_cpu);
1161
1162 if (ops && ops->cpu_die)
1163 return true;
1164#endif
1165 return false;
1166}
1167
1168bool cpus_are_stuck_in_kernel(void)
1169{
1170 bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die());
1171
1172 return !!cpus_stuck_in_kernel || smp_spin_tables ||
1173 is_protected_kvm_enabled();
1174}
1175

source code of linux/arch/arm64/kernel/smp.c