smp_64.c source code [linux/arch/sparc/kernel/smp_64.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/ smp.c: Sparc64 SMP support.*
3	*
4	* Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
5	*/
6
7	#include <linux/export.h>
8	#include <linux/kernel.h>
9	#include <linux/sched/mm.h>
10	#include <linux/sched/hotplug.h>
11	#include <linux/mm.h>
12	#include <linux/pagemap.h>
13	#include <linux/threads.h>
14	#include <linux/smp.h>
15	#include <linux/interrupt.h>
16	#include <linux/kernel_stat.h>
17	#include <linux/delay.h>
18	#include <linux/init.h>
19	#include <linux/spinlock.h>
20	#include <linux/fs.h>
21	#include <linux/seq_file.h>
22	#include <linux/cache.h>
23	#include <linux/jiffies.h>
24	#include <linux/profile.h>
25	#include <linux/memblock.h>
26	#include <linux/vmalloc.h>
27	#include <linux/ftrace.h>
28	#include <linux/cpu.h>
29	#include <linux/slab.h>
30	#include <linux/kgdb.h>
31
32	#include <asm/head.h>
33	#include <asm/ptrace.h>
34	#include <linux/atomic.h>
35	#include <asm/tlbflush.h>
36	#include <asm/mmu_context.h>
37	#include <asm/cpudata.h>
38	#include <asm/hvtramp.h>
39	#include <asm/io.h>
40	#include <asm/timer.h>
41	#include <asm/setup.h>
42
43	#include <asm/irq.h>
44	#include <asm/irq_regs.h>
45	#include <asm/page.h>
46	#include <asm/oplib.h>
47	#include <linux/uaccess.h>
48	#include <asm/starfire.h>
49	#include <asm/tlb.h>
50	#include <asm/pgalloc.h>
51	#include <asm/sections.h>
52	#include <asm/prom.h>
53	#include <asm/mdesc.h>
54	#include <asm/ldc.h>
55	#include <asm/hypervisor.h>
56	#include <asm/pcr.h>
57
58	#include "cpumap.h"
59	#include "kernel.h"
60
61	DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
62	cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
63	{ [`0` ... NR_CPUS-`1`] = CPU_MASK_NONE };
64
65	cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
66	[`0` ... NR_CPUS-`1`] = CPU_MASK_NONE };
67
68	cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = {
69	[`0` ... NR_CPUS - `1`] = CPU_MASK_NONE };
70
71	EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
72	EXPORT_SYMBOL(cpu_core_map);
73	EXPORT_SYMBOL(cpu_core_sib_map);
74	EXPORT_SYMBOL(cpu_core_sib_cache_map);
75
76	static cpumask_t smp_commenced_mask;
77
78	static DEFINE_PER_CPU(bool, poke);
79	static bool cpu_poke;
80
81	void smp_info(struct seq_file *m)
82	{
83	int i;
84
85	seq_printf(m, fmt: "State:\n");
86	for_each_online_cpu(i)
87	seq_printf(m, fmt: "CPU%d:\t\tonline\n", i);
88	}
89
90	void smp_bogo(struct seq_file *m)
91	{
92	int i;
93
94	for_each_online_cpu(i)
95	seq_printf(m,
96	fmt: "Cpu%dClkTck\t: %016lx\n",
97	i, cpu_data(i).clock_tick);
98	}
99
100	extern void setup_sparc64_timer(void);
101
102	static volatile unsigned long callin_flag = `0`;
103
104	void smp_callin(void)
105	{
106	int cpuid = hard_smp_processor_id();
107
108	__local_per_cpu_offset = __per_cpu_offset(cpuid);
109
110	if (tlb_type == hypervisor)
111	sun4v_ktsb_register();
112
113	__flush_tlb_all();
114
115	setup_sparc64_timer();
116
117	if (cheetah_pcache_forced_on)
118	cheetah_enable_pcache();
119
120	callin_flag = `1`;
121	__asm__ __volatile__("membar #Sync\n\t"
122	"flush %%g6" : : : "memory");
123
124	/ Clear this or we will die instantly when we*
125	* schedule back to this idler...
126	*/
127	current_thread_info()->new_child = `0`;
128
129	/ Attach to the address space of init_task. /
130	mmgrab(mm: &init_mm);
131	current->active_mm = &init_mm;
132
133	/ inform the notifiers about the new cpu /
134	notify_cpu_starting(cpu: cpuid);
135
136	while (!cpumask_test_cpu(cpu: cpuid, cpumask: &smp_commenced_mask))
137	rmb();
138
139	set_cpu_online(cpu: cpuid, online: true);
140
141	local_irq_enable();
142
143	cpu_startup_entry(state: CPUHP_AP_ONLINE_IDLE);
144	}
145
146	void cpu_panic(void)
147	{
148	printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
149	panic(fmt: "SMP bolixed\n");
150	}
151
152	/ This tick register synchronization scheme is taken entirely from*
153	* the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit.
154	*
155	* The only change I've made is to rework it so that the master
156	* initiates the synchonization instead of the slave. -DaveM
157	*/
158
159	#define MASTER 0
160	#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long))
161
162	#define NUM_ROUNDS 64 /* magic value */
163	#define NUM_ITERS 5 /* likewise */
164
165	static DEFINE_RAW_SPINLOCK(itc_sync_lock);
166	static unsigned long go[SLAVE + `1`];
167
168	#define DEBUG_TICK_SYNC 0
169
170	static inline long get_delta (long rt, long* *master)
171	{
172	unsigned long best_t0 = `0`, best_t1 = ~`0UL`, best_tm = `0`;
173	unsigned long tcenter, t0, t1, tm;
174	unsigned long i;
175
176	for (i = `0`; i < NUM_ITERS; i++) {
177	t0 = tick_ops->get_tick();
178	go[MASTER] = `1`;
179	membar_safe("#StoreLoad");
180	while (!(tm = go[SLAVE]))
181	rmb();
182	go[SLAVE] = `0`;
183	wmb();
184	t1 = tick_ops->get_tick();
185
186	if (t1 - t0 < best_t1 - best_t0)
187	best_t0 = t0, best_t1 = t1, best_tm = tm;
188	}
189
190	*rt = best_t1 - best_t0;
191	*master = best_tm - best_t0;
192
193	/ average best_t0 and best_t1 without overflow: /
194	tcenter = (best_t0/`2` + best_t1/`2`);
195	if (best_t0 % `2` + best_t1 % `2` == `2`)
196	tcenter++;
197	return tcenter - best_tm;
198	}
199
200	void smp_synchronize_tick_client(void)
201	{
202	long i, delta, adj, adjust_latency = `0`, done = `0`;
203	unsigned long flags, rt, master_time_stamp;
204	#if DEBUG_TICK_SYNC
205	struct {
206	long rt; / roundtrip time /
207	long master; / master's timestamp /
208	long diff; / difference between midpoint and master's timestamp /
209	long lat; / estimate of itc adjustment latency /
210	} t[NUM_ROUNDS];
211	#endif
212
213	go[MASTER] = `1`;
214
215	while (go[MASTER])
216	rmb();
217
218	local_irq_save(flags);
219	{
220	for (i = `0`; i < NUM_ROUNDS; i++) {
221	delta = get_delta(rt: &rt, master: &master_time_stamp);
222	if (delta == `0`)
223	done = `1`; / let's lock on to this... /
224
225	if (!done) {
226	if (i > `0`) {
227	adjust_latency += -delta;
228	adj = -delta + adjust_latency/`4`;
229	} else
230	adj = -delta;
231
232	tick_ops->add_tick(adj);
233	}
234	#if DEBUG_TICK_SYNC
235	t[i].rt = rt;
236	t[i].master = master_time_stamp;
237	t[i].diff = delta;
238	t[i].lat = adjust_latency/`4`;
239	#endif
240	}
241	}
242	local_irq_restore(flags);
243
244	#if DEBUG_TICK_SYNC
245	for (i = `0`; i < NUM_ROUNDS; i++)
246	printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
247	t[i].rt, t[i].master, t[i].diff, t[i].lat);
248	#endif
249
250	printk(KERN_INFO "CPU %d: synchronized TICK with master CPU "
251	"(last diff %ld cycles, maxerr %lu cycles)\n",
252	smp_processor_id(), delta, rt);
253	}
254
255	static void smp_start_sync_tick_client(int cpu);
256
257	static void smp_synchronize_one_tick(int cpu)
258	{
259	unsigned long flags, i;
260
261	go[MASTER] = `0`;
262
263	smp_start_sync_tick_client(cpu);
264
265	/ wait for client to be ready /
266	while (!go[MASTER])
267	rmb();
268
269	/ now let the client proceed into his loop /
270	go[MASTER] = `0`;
271	membar_safe("#StoreLoad");
272
273	raw_spin_lock_irqsave(&itc_sync_lock, flags);
274	{
275	for (i = `0`; i < NUM_ROUNDS*NUM_ITERS; i++) {
276	while (!go[MASTER])
277	rmb();
278	go[MASTER] = `0`;
279	wmb();
280	go[SLAVE] = tick_ops->get_tick();
281	membar_safe("#StoreLoad");
282	}
283	}
284	raw_spin_unlock_irqrestore(&itc_sync_lock, flags);
285	}
286
287	#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
288	static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg,
289	void **descrp)
290	{
291	extern unsigned long sparc64_ttable_tl0;
292	extern unsigned long kern_locked_tte_data;
293	struct hvtramp_descr *hdesc;
294	unsigned long trampoline_ra;
295	struct trap_per_cpu *tb;
296	u64 tte_vaddr, tte_data;
297	unsigned long hv_err;
298	int i;
299
300	hdesc = kzalloc(sizeof(*hdesc) +
301	(sizeof(struct hvtramp_mapping) *
302	num_kernel_image_mappings - `1`),
303	GFP_KERNEL);
304	if (!hdesc) {
305	printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
306	"hvtramp_descr.\n");
307	return;
308	}
309	*descrp = hdesc;
310
311	hdesc->cpu = cpu;
312	hdesc->num_mappings = num_kernel_image_mappings;
313
314	tb = &trap_block[cpu];
315
316	hdesc->fault_info_va = (unsigned long) &tb->fault_info;
317	hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
318
319	hdesc->thread_reg = thread_reg;
320
321	tte_vaddr = (unsigned long) KERNBASE;
322	tte_data = kern_locked_tte_data;
323
324	for (i = `0`; i < hdesc->num_mappings; i++) {
325	hdesc->maps[i].vaddr = tte_vaddr;
326	hdesc->maps[i].tte = tte_data;
327	tte_vaddr += `0x400000`;
328	tte_data += `0x400000`;
329	}
330
331	trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
332
333	hv_err = sun4v_cpu_start(cpu, trampoline_ra,
334	kimage_addr_to_ra(&sparc64_ttable_tl0),
335	__pa(hdesc));
336	if (hv_err)
337	printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
338	"gives error %lu\n", hv_err);
339	}
340	#endif
341
342	extern unsigned long sparc64_cpu_startup;
343
344	/ The OBP cpu startup callback truncates the 3rd arg cookie to*
345	* 32-bits (I think) so to be safe we have it read the pointer
346	* contained here so we work on >4GB machines. -DaveM
347	*/
348	static struct thread_info *cpu_new_thread = NULL;
349
350	static int smp_boot_one_cpu(unsigned int cpu, struct task_struct *idle)
351	{
352	unsigned long entry =
353	(unsigned long)(&sparc64_cpu_startup);
354	unsigned long cookie =
355	(unsigned long)(&cpu_new_thread);
356	void *descr = NULL;
357	int timeout, ret;
358
359	callin_flag = `0`;
360	cpu_new_thread = task_thread_info(idle);
361
362	if (tlb_type == hypervisor) {
363	#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
364	if (ldom_domaining_enabled)
365	ldom_startcpu_cpuid(cpu,
366	(unsigned long) cpu_new_thread,
367	&descr);
368	else
369	#endif
370	prom_startcpu_cpuid(cpu, entry, cookie);
371	} else {
372	struct device_node *dp = of_find_node_by_cpuid(cpu);
373
374	prom_startcpu(dp->phandle, entry, cookie);
375	}
376
377	for (timeout = `0`; timeout < `50000`; timeout++) {
378	if (callin_flag)
379	break;
380	udelay(`100`);
381	}
382
383	if (callin_flag) {
384	ret = `0`;
385	} else {
386	printk("Processor %d is stuck.\n", cpu);
387	ret = -ENODEV;
388	}
389	cpu_new_thread = NULL;
390
391	kfree(objp: descr);
392
393	return ret;
394	}
395
396	static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
397	{
398	u64 result, target;
399	int stuck, tmp;
400
401	if (this_is_starfire) {
402	/ map to real upaid /
403	cpu = (((cpu & `0x3c`) << `1`) \|
404	((cpu & `0x40`) >> `4`) \|
405	(cpu & `0x3`));
406	}
407
408	target = (cpu << `14`) \| `0x70`;
409	again:
410	/ Ok, this is the real Spitfire Errata #54.*
411	* One must read back from a UDB internal register
412	* after writes to the UDB interrupt dispatch, but
413	* before the membar Sync for that write.
414	* So we use the high UDB control register (ASI 0x7f,
415	* ADDR 0x20) for the dummy read. -DaveM
416	*/
417	tmp = `0x40`;
418	__asm__ __volatile__(
419	"wrpr %1, %2, %%pstate\n\t"
420	"stxa %4, [%0] %3\n\t"
421	"stxa %5, [%0+%8] %3\n\t"
422	"add %0, %8, %0\n\t"
423	"stxa %6, [%0+%8] %3\n\t"
424	"membar #Sync\n\t"
425	"stxa %%g0, [%7] %3\n\t"
426	"membar #Sync\n\t"
427	"mov 0x20, %%g1\n\t"
428	"ldxa [%%g1] 0x7f, %%g0\n\t"
429	"membar #Sync"
430	: "=r" (tmp)
431	: "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),
432	"r" (data0), "r" (data1), "r" (data2), "r" (target),
433	"r" (`0x10`), "0" (tmp)
434	: "g1");
435
436	/ NOTE: PSTATE_IE is still clear. /
437	stuck = `100000`;
438	do {
439	__asm__ __volatile__("ldxa [%%g0] %1, %0"
440	: "=r" (result)
441	: "i" (ASI_INTR_DISPATCH_STAT));
442	if (result == `0`) {
443	__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
444	: : "r" (pstate));
445	return;
446	}
447	stuck -= `1`;
448	if (stuck == `0`)
449	break;
450	} while (result & `0x1`);
451	__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
452	: : "r" (pstate));
453	if (stuck == `0`) {
454	printk("CPU[%d]: mondo stuckage result[%016llx]\n",
455	smp_processor_id(), result);
456	} else {
457	udelay(`2`);
458	goto again;
459	}
460	}
461
462	static void spitfire_xcall_deliver(struct trap_per_cpu tb, int* cnt)
463	{
464	u64 *mondo, data0, data1, data2;
465	u16 *cpu_list;
466	u64 pstate;
467	int i;
468
469	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
470	cpu_list = __va(tb->cpu_list_pa);
471	mondo = __va(tb->cpu_mondo_block_pa);
472	data0 = mondo[`0`];
473	data1 = mondo[`1`];
474	data2 = mondo[`2`];
475	for (i = `0`; i < cnt; i++)
476	spitfire_xcall_helper(data0, data1, data2, pstate, cpu: cpu_list[i]);
477	}
478
479	/ Cheetah now allows to send the whole 64-bytes of data in the interrupt*
480	* packet, but we have no use for that. However we do take advantage of
481	* the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
482	*/
483	static void cheetah_xcall_deliver(struct trap_per_cpu tb, int* cnt)
484	{
485	int nack_busy_id, is_jbus, need_more;
486	u64 *mondo, pstate, ver, busy_mask;
487	u16 *cpu_list;
488
489	cpu_list = __va(tb->cpu_list_pa);
490	mondo = __va(tb->cpu_mondo_block_pa);
491
492	/ Unfortunately, someone at Sun had the brilliant idea to make the*
493	* busy/nack fields hard-coded by ITID number for this Ultra-III
494	* derivative processor.
495	*/
496	__asm__ ("rdpr %%ver, %0" : "=r" (ver));
497	is_jbus = ((ver >> `32`) == __JALAPENO_ID \|\|
498	(ver >> `32`) == __SERRANO_ID);
499
500	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
501
502	retry:
503	need_more = `0`;
504	__asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
505	: : "r" (pstate), "i" (PSTATE_IE));
506
507	/ Setup the dispatch data registers. /
508	__asm__ __volatile__("stxa %0, [%3] %6\n\t"
509	"stxa %1, [%4] %6\n\t"
510	"stxa %2, [%5] %6\n\t"
511	"membar #Sync\n\t"
512	: / no outputs /
513	: "r" (mondo[`0`]), "r" (mondo[`1`]), "r" (mondo[`2`]),
514	"r" (`0x40`), "r" (`0x50`), "r" (`0x60`),
515	"i" (ASI_INTR_W));
516
517	nack_busy_id = `0`;
518	busy_mask = `0`;
519	{
520	int i;
521
522	for (i = `0`; i < cnt; i++) {
523	u64 target, nr;
524
525	nr = cpu_list[i];
526	if (nr == `0xffff`)
527	continue;
528
529	target = (nr << `14`) \| `0x70`;
530	if (is_jbus) {
531	busy_mask \|= (`0x1UL` << (nr * `2`));
532	} else {
533	target \|= (nack_busy_id << `24`);
534	busy_mask \|= (`0x1UL` <<
535	(nack_busy_id * `2`));
536	}
537	__asm__ __volatile__(
538	"stxa %%g0, [%0] %1\n\t"
539	"membar #Sync\n\t"
540	: / no outputs /
541	: "r" (target), "i" (ASI_INTR_W));
542	nack_busy_id++;
543	if (nack_busy_id == `32`) {
544	need_more = `1`;
545	break;
546	}
547	}
548	}
549
550	/ Now, poll for completion. /
551	{
552	u64 dispatch_stat, nack_mask;
553	long stuck;
554
555	stuck = `100000` * nack_busy_id;
556	nack_mask = busy_mask << `1`;
557	do {
558	__asm__ __volatile__("ldxa [%%g0] %1, %0"
559	: "=r" (dispatch_stat)
560	: "i" (ASI_INTR_DISPATCH_STAT));
561	if (!(dispatch_stat & (busy_mask \| nack_mask))) {
562	__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
563	: : "r" (pstate));
564	if (unlikely(need_more)) {
565	int i, this_cnt = `0`;
566	for (i = `0`; i < cnt; i++) {
567	if (cpu_list[i] == `0xffff`)
568	continue;
569	cpu_list[i] = `0xffff`;
570	this_cnt++;
571	if (this_cnt == `32`)
572	break;
573	}
574	goto retry;
575	}
576	return;
577	}
578	if (!--stuck)
579	break;
580	} while (dispatch_stat & busy_mask);
581
582	__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
583	: : "r" (pstate));
584
585	if (dispatch_stat & busy_mask) {
586	/ Busy bits will not clear, continue instead*
587	* of freezing up on this cpu.
588	*/
589	printk("CPU[%d]: mondo stuckage result[%016llx]\n",
590	smp_processor_id(), dispatch_stat);
591	} else {
592	int i, this_busy_nack = `0`;
593
594	/ Delay some random time with interrupts enabled*
595	* to prevent deadlock.
596	*/
597	udelay(`2` * nack_busy_id);
598
599	/ Clear out the mask bits for cpus which did not*
600	* NACK us.
601	*/
602	for (i = `0`; i < cnt; i++) {
603	u64 check_mask, nr;
604
605	nr = cpu_list[i];
606	if (nr == `0xffff`)
607	continue;
608
609	if (is_jbus)
610	check_mask = (`0x2UL` << (`2`*nr));
611	else
612	check_mask = (`0x2UL` <<
613	this_busy_nack);
614	if ((dispatch_stat & check_mask) == `0`)
615	cpu_list[i] = `0xffff`;
616	this_busy_nack += `2`;
617	if (this_busy_nack == `64`)
618	break;
619	}
620
621	goto retry;
622	}
623	}
624	}
625
626	#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid])
627	#define MONDO_USEC_WAIT_MIN 2
628	#define MONDO_USEC_WAIT_MAX 100
629	#define MONDO_RETRY_LIMIT 500000
630
631	/ Multi-cpu list version.*
632	*
633	* Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
634	* Sometimes not all cpus receive the mondo, requiring us to re-send
635	* the mondo until all cpus have received, or cpus are truly stuck
636	* unable to receive mondo, and we timeout.
637	* Occasionally a target cpu strand is borrowed briefly by hypervisor to
638	* perform guest service, such as PCIe error handling. Consider the
639	* service time, 1 second overall wait is reasonable for 1 cpu.
640	* Here two in-between mondo check wait time are defined: 2 usec for
641	* single cpu quick turn around and up to 100usec for large cpu count.
642	* Deliver mondo to large number of cpus could take longer, we adjusts
643	* the retry count as long as target cpus are making forward progress.
644	*/
645	static void hypervisor_xcall_deliver(struct trap_per_cpu tb, int* cnt)
646	{
647	int this_cpu, tot_cpus, prev_sent, i, rem;
648	int usec_wait, retries, tot_retries;
649	u16 first_cpu = `0xffff`;
650	unsigned long xc_rcvd = `0`;
651	unsigned long status;
652	int ecpuerror_id = `0`;
653	int enocpu_id = `0`;
654	u16 *cpu_list;
655	u16 cpu;
656
657	this_cpu = smp_processor_id();
658	cpu_list = __va(tb->cpu_list_pa);
659	usec_wait = cnt * MONDO_USEC_WAIT_MIN;
660	if (usec_wait > MONDO_USEC_WAIT_MAX)
661	usec_wait = MONDO_USEC_WAIT_MAX;
662	retries = tot_retries = `0`;
663	tot_cpus = cnt;
664	prev_sent = `0`;
665
666	do {
667	int n_sent, mondo_delivered, target_cpu_busy;
668
669	status = sun4v_cpu_mondo_send(cnt,
670	tb->cpu_list_pa,
671	tb->cpu_mondo_block_pa);
672
673	/ HV_EOK means all cpus received the xcall, we're done. /
674	if (likely(status == HV_EOK))
675	goto xcall_done;
676
677	/ If not these non-fatal errors, panic /
678	if (unlikely((status != HV_EWOULDBLOCK) &&
679	(status != HV_ECPUERROR) &&
680	(status != HV_ENOCPU)))
681	goto fatal_errors;
682
683	/ First, see if we made any forward progress.*
684	*
685	* Go through the cpu_list, count the target cpus that have
686	* received our mondo (n_sent), and those that did not (rem).
687	* Re-pack cpu_list with the cpus remain to be retried in the
688	* front - this simplifies tracking the truly stalled cpus.
689	*
690	* The hypervisor indicates successful sends by setting
691	* cpu list entries to the value 0xffff.
692	*
693	* EWOULDBLOCK means some target cpus did not receive the
694	* mondo and retry usually helps.
695	*
696	* ECPUERROR means at least one target cpu is in error state,
697	* it's usually safe to skip the faulty cpu and retry.
698	*
699	* ENOCPU means one of the target cpu doesn't belong to the
700	* domain, perhaps offlined which is unexpected, but not
701	* fatal and it's okay to skip the offlined cpu.
702	*/
703	rem = `0`;
704	n_sent = `0`;
705	for (i = `0`; i < cnt; i++) {
706	cpu = cpu_list[i];
707	if (likely(cpu == `0xffff`)) {
708	n_sent++;
709	} else if ((status == HV_ECPUERROR) &&
710	(sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
711	ecpuerror_id = cpu + `1`;
712	} else if (status == HV_ENOCPU && !cpu_online(cpu)) {
713	enocpu_id = cpu + `1`;
714	} else {
715	cpu_list[rem++] = cpu;
716	}
717	}
718
719	/ No cpu remained, we're done. /
720	if (rem == `0`)
721	break;
722
723	/ Otherwise, update the cpu count for retry. /
724	cnt = rem;
725
726	/ Record the overall number of mondos received by the*
727	* first of the remaining cpus.
728	*/
729	if (first_cpu != cpu_list[`0`]) {
730	first_cpu = cpu_list[`0`];
731	xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
732	}
733
734	/ Was any mondo delivered successfully? /
735	mondo_delivered = (n_sent > prev_sent);
736	prev_sent = n_sent;
737
738	/ or, was any target cpu busy processing other mondos? /
739	target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
740	xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
741
742	/ Retry count is for no progress. If we're making progress,*
743	* reset the retry count.
744	*/
745	if (likely(mondo_delivered \|\| target_cpu_busy)) {
746	tot_retries += retries;
747	retries = `0`;
748	} else if (unlikely(retries > MONDO_RETRY_LIMIT)) {
749	goto fatal_mondo_timeout;
750	}
751
752	/ Delay a little bit to let other cpus catch up on*
753	* their cpu mondo queue work.
754	*/
755	if (!mondo_delivered)
756	udelay(usec_wait);
757
758	retries++;
759	} while (`1`);
760
761	xcall_done:
762	if (unlikely(ecpuerror_id > `0`)) {
763	pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
764	this_cpu, ecpuerror_id - `1`);
765	} else if (unlikely(enocpu_id > `0`)) {
766	pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
767	this_cpu, enocpu_id - `1`);
768	}
769	return;
770
771	fatal_errors:
772	/ fatal errors include bad alignment, etc /
773	pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
774	this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
775	panic(fmt: "Unexpected SUN4V mondo error %lu\n", status);
776
777	fatal_mondo_timeout:
778	/ some cpus being non-responsive to the cpu mondo /
779	pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
780	this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
781	panic(fmt: "SUN4V mondo timeout panic\n");
782	}
783
784	static void (xcall_deliver_impl)(struct* trap_per_cpu , int*);
785
786	static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
787	{
788	struct trap_per_cpu *tb;
789	int this_cpu, i, cnt;
790	unsigned long flags;
791	u16 *cpu_list;
792	u64 *mondo;
793
794	/ We have to do this whole thing with interrupts fully disabled.*
795	* Otherwise if we send an xcall from interrupt context it will
796	* corrupt both our mondo block and cpu list state.
797	*
798	* One consequence of this is that we cannot use timeout mechanisms
799	* that depend upon interrupts being delivered locally. So, for
800	* example, we cannot sample jiffies and expect it to advance.
801	*
802	* Fortunately, udelay() uses %stick/%tick so we can use that.
803	*/
804	local_irq_save(flags);
805
806	this_cpu = smp_processor_id();
807	tb = &trap_block[this_cpu];
808
809	mondo = __va(tb->cpu_mondo_block_pa);
810	mondo[`0`] = data0;
811	mondo[`1`] = data1;
812	mondo[`2`] = data2;
813	wmb();
814
815	cpu_list = __va(tb->cpu_list_pa);
816
817	/ Setup the initial cpu list. /
818	cnt = `0`;
819	for_each_cpu(i, mask) {
820	if (i == this_cpu \|\| !cpu_online(cpu: i))
821	continue;
822	cpu_list[cnt++] = i;
823	}
824
825	if (cnt)
826	xcall_deliver_impl(tb, cnt);
827
828	local_irq_restore(flags);
829	}
830
831	/ Send cross call to all processors mentioned in MASK_P*
832	* except self. Really, there are only two cases currently,
833	* "cpu_online_mask" and "mm_cpumask(mm)".
834	*/
835	static void smp_cross_call_masked(unsigned long func, u32 ctx, u64 data1, u64 data2, const* cpumask_t *mask)
836	{
837	u64 data0 = (((u64)ctx)<<`32` \| (((u64)func) & `0xffffffff`));
838
839	xcall_deliver(data0, data1, data2, mask);
840	}
841
842	/ Send cross call to all processors except self. /
843	static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
844	{
845	smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask);
846	}
847
848	extern unsigned long xcall_sync_tick;
849
850	static void smp_start_sync_tick_client(int cpu)
851	{
852	xcall_deliver(data0: (u64) &xcall_sync_tick, data1: `0`, data2: `0`,
853	cpumask_of(cpu));
854	}
855
856	extern unsigned long xcall_call_function;
857
858	void arch_send_call_function_ipi_mask(const struct cpumask *mask)
859	{
860	xcall_deliver(data0: (u64) &xcall_call_function, data1: `0`, data2: `0`, mask);
861	}
862
863	extern unsigned long xcall_call_function_single;
864
865	void arch_send_call_function_single_ipi(int cpu)
866	{
867	xcall_deliver(data0: (u64) &xcall_call_function_single, data1: `0`, data2: `0`,
868	cpumask_of(cpu));
869	}
870
871	void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
872	{
873	clear_softint(`1` << irq);
874	irq_enter();
875	generic_smp_call_function_interrupt();
876	irq_exit();
877	}
878
879	void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs)
880	{
881	clear_softint(`1` << irq);
882	irq_enter();
883	generic_smp_call_function_single_interrupt();
884	irq_exit();
885	}
886
887	static void tsb_sync(void *info)
888	{
889	struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()];
890	struct mm_struct *mm = info;
891
892	/ It is not valid to test "current->active_mm == mm" here.*
893	*
894	* The value of "current" is not changed atomically with
895	* switch_mm(). But that's OK, we just need to check the
896	* current cpu's trap block PGD physical address.
897	*/
898	if (tp->pgd_paddr == __pa(mm->pgd))
899	tsb_context_switch(mm);
900	}
901
902	void smp_tsb_sync(struct mm_struct *mm)
903	{
904	smp_call_function_many(mask: mm_cpumask(mm), func: tsb_sync, info: mm, wait: `1`);
905	}
906
907	extern unsigned long xcall_flush_tlb_mm;
908	extern unsigned long xcall_flush_tlb_page;
909	extern unsigned long xcall_flush_tlb_kernel_range;
910	extern unsigned long xcall_fetch_glob_regs;
911	extern unsigned long xcall_fetch_glob_pmu;
912	extern unsigned long xcall_fetch_glob_pmu_n4;
913	extern unsigned long xcall_receive_signal;
914	extern unsigned long xcall_new_mmu_context_version;
915	#ifdef CONFIG_KGDB
916	extern unsigned long xcall_kgdb_capture;
917	#endif
918
919	#ifdef DCACHE_ALIASING_POSSIBLE
920	extern unsigned long xcall_flush_dcache_page_cheetah;
921	#endif
922	extern unsigned long xcall_flush_dcache_page_spitfire;
923
924	static inline void __local_flush_dcache_folio(struct folio *folio)
925	{
926	unsigned int i, nr = folio_nr_pages(folio);
927
928	#ifdef DCACHE_ALIASING_POSSIBLE
929	for (i = `0`; i < nr; i++)
930	__flush_dcache_page(folio_address(folio) + i * PAGE_SIZE,
931	((tlb_type == spitfire) &&
932	folio_flush_mapping(folio) != NULL));
933	#else
934	if (folio_flush_mapping(folio) != NULL &&
935	tlb_type == spitfire) {
936	unsigned long pfn = folio_pfn(folio)
937	for (i = `0`; i < nr; i++)
938	__flush_icache_page((pfn + i) * PAGE_SIZE);
939	}
940	#endif
941	}
942
943	void smp_flush_dcache_folio_impl(struct folio folio, int* cpu)
944	{
945	int this_cpu;
946
947	if (tlb_type == hypervisor)
948	return;
949
950	#ifdef CONFIG_DEBUG_DCFLUSH
951	atomic_inc(&dcpage_flushes);
952	#endif
953
954	this_cpu = get_cpu();
955
956	if (cpu == this_cpu) {
957	__local_flush_dcache_folio(folio);
958	} else if (cpu_online(cpu)) {
959	void *pg_addr = folio_address(folio);
960	u64 data0 = `0`;
961
962	if (tlb_type == spitfire) {
963	data0 = ((u64)&xcall_flush_dcache_page_spitfire);
964	if (folio_flush_mapping(folio) != NULL)
965	data0 \|= ((u64)`1` << `32`);
966	} else if (tlb_type == cheetah \|\| tlb_type == cheetah_plus) {
967	#ifdef DCACHE_ALIASING_POSSIBLE
968	data0 = ((u64)&xcall_flush_dcache_page_cheetah);
969	#endif
970	}
971	if (data0) {
972	unsigned int i, nr = folio_nr_pages(folio);
973
974	for (i = `0`; i < nr; i++) {
975	xcall_deliver(data0, __pa(pg_addr),
976	data2: (u64) pg_addr, cpumask_of(cpu));
977	#ifdef CONFIG_DEBUG_DCFLUSH
978	atomic_inc(&dcpage_flushes_xcall);
979	#endif
980	pg_addr += PAGE_SIZE;
981	}
982	}
983	}
984
985	put_cpu();
986	}
987
988	void flush_dcache_folio_all(struct mm_struct mm, struct* folio *folio)
989	{
990	void *pg_addr;
991	u64 data0;
992
993	if (tlb_type == hypervisor)
994	return;
995
996	preempt_disable();
997
998	#ifdef CONFIG_DEBUG_DCFLUSH
999	atomic_inc(&dcpage_flushes);
1000	#endif
1001	data0 = `0`;
1002	pg_addr = folio_address(folio);
1003	if (tlb_type == spitfire) {
1004	data0 = ((u64)&xcall_flush_dcache_page_spitfire);
1005	if (folio_flush_mapping(folio) != NULL)
1006	data0 \|= ((u64)`1` << `32`);
1007	} else if (tlb_type == cheetah \|\| tlb_type == cheetah_plus) {
1008	#ifdef DCACHE_ALIASING_POSSIBLE
1009	data0 = ((u64)&xcall_flush_dcache_page_cheetah);
1010	#endif
1011	}
1012	if (data0) {
1013	unsigned int i, nr = folio_nr_pages(folio);
1014
1015	for (i = `0`; i < nr; i++) {
1016	xcall_deliver(data0, __pa(pg_addr),
1017	data2: (u64) pg_addr, cpu_online_mask);
1018	#ifdef CONFIG_DEBUG_DCFLUSH
1019	atomic_inc(&dcpage_flushes_xcall);
1020	#endif
1021	pg_addr += PAGE_SIZE;
1022	}
1023	}
1024	__local_flush_dcache_folio(folio);
1025
1026	preempt_enable();
1027	}
1028
1029	#ifdef CONFIG_KGDB
1030	void kgdb_roundup_cpus(void)
1031	{
1032	smp_cross_call(func: &xcall_kgdb_capture, ctx: `0`, data1: `0`, data2: `0`);
1033	}
1034	#endif
1035
1036	void smp_fetch_global_regs(void)
1037	{
1038	smp_cross_call(func: &xcall_fetch_glob_regs, ctx: `0`, data1: `0`, data2: `0`);
1039	}
1040
1041	void smp_fetch_global_pmu(void)
1042	{
1043	if (tlb_type == hypervisor &&
1044	sun4v_chip_type >= SUN4V_CHIP_NIAGARA4)
1045	smp_cross_call(func: &xcall_fetch_glob_pmu_n4, ctx: `0`, data1: `0`, data2: `0`);
1046	else
1047	smp_cross_call(func: &xcall_fetch_glob_pmu, ctx: `0`, data1: `0`, data2: `0`);
1048	}
1049
1050	/ We know that the window frames of the user have been flushed*
1051	* to the stack before we get here because all callers of us
1052	* are flush_tlb_() routines, and these run after flush_cache_()
1053	* which performs the flushw.
1054	*
1055	* mm->cpu_vm_mask is a bit mask of which cpus an address
1056	* space has (potentially) executed on, this is the heuristic
1057	* we use to limit cross calls.
1058	*/
1059
1060	/ This currently is only used by the hugetlb arch pre-fault*
1061	* hook on UltraSPARC-III+ and later when changing the pagesize
1062	* bits of the context register for an address space.
1063	*/
1064	void smp_flush_tlb_mm(struct mm_struct *mm)
1065	{
1066	u32 ctx = CTX_HWBITS(mm->context);
1067
1068	get_cpu();
1069
1070	smp_cross_call_masked(func: &xcall_flush_tlb_mm,
1071	ctx, data1: `0`, data2: `0`,
1072	mask: mm_cpumask(mm));
1073
1074	__flush_tlb_mm(ctx, SECONDARY_CONTEXT);
1075
1076	put_cpu();
1077	}
1078
1079	struct tlb_pending_info {
1080	unsigned long ctx;
1081	unsigned long nr;
1082	unsigned long *vaddrs;
1083	};
1084
1085	static void tlb_pending_func(void *info)
1086	{
1087	struct tlb_pending_info *t = info;
1088
1089	__flush_tlb_pending(t->ctx, t->nr, t->vaddrs);
1090	}
1091
1092	void smp_flush_tlb_pending(struct mm_struct mm, unsigned* long nr, unsigned long *vaddrs)
1093	{
1094	u32 ctx = CTX_HWBITS(mm->context);
1095	struct tlb_pending_info info;
1096
1097	get_cpu();
1098
1099	info.ctx = ctx;
1100	info.nr = nr;
1101	info.vaddrs = vaddrs;
1102
1103	smp_call_function_many(mask: mm_cpumask(mm), func: tlb_pending_func,
1104	info: &info, wait: `1`);
1105
1106	__flush_tlb_pending(ctx, nr, vaddrs);
1107
1108	put_cpu();
1109	}
1110
1111	void smp_flush_tlb_page(struct mm_struct mm, unsigned* long vaddr)
1112	{
1113	unsigned long context = CTX_HWBITS(mm->context);
1114
1115	get_cpu();
1116
1117	smp_cross_call_masked(func: &xcall_flush_tlb_page,
1118	ctx: context, data1: vaddr, data2: `0`,
1119	mask: mm_cpumask(mm));
1120
1121	__flush_tlb_page(context, vaddr);
1122
1123	put_cpu();
1124	}
1125
1126	void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
1127	{
1128	start &= PAGE_MASK;
1129	end = PAGE_ALIGN(end);
1130	if (start != end) {
1131	smp_cross_call(func: &xcall_flush_tlb_kernel_range,
1132	ctx: `0`, data1: start, data2: end);
1133
1134	__flush_tlb_kernel_range(start, end);
1135	}
1136	}
1137
1138	/ CPU capture. /
1139	/ #define CAPTURE_DEBUG /
1140	extern unsigned long xcall_capture;
1141
1142	static atomic_t smp_capture_depth = ATOMIC_INIT(`0`);
1143	static atomic_t smp_capture_registry = ATOMIC_INIT(`0`);
1144	static unsigned long penguins_are_doing_time;
1145
1146	void smp_capture(void)
1147	{
1148	int result = atomic_add_return(i: `1`, v: &smp_capture_depth);
1149
1150	if (result == `1`) {
1151	int ncpus = num_online_cpus();
1152
1153	#ifdef CAPTURE_DEBUG
1154	printk("CPU[%d]: Sending penguins to jail...",
1155	smp_processor_id());
1156	#endif
1157	penguins_are_doing_time = `1`;
1158	atomic_inc(v: &smp_capture_registry);
1159	smp_cross_call(func: &xcall_capture, ctx: `0`, data1: `0`, data2: `0`);
1160	while (atomic_read(v: &smp_capture_registry) != ncpus)
1161	rmb();
1162	#ifdef CAPTURE_DEBUG
1163	printk("done\n");
1164	#endif
1165	}
1166	}
1167
1168	void smp_release(void)
1169	{
1170	if (atomic_dec_and_test(v: &smp_capture_depth)) {
1171	#ifdef CAPTURE_DEBUG
1172	printk("CPU[%d]: Giving pardon to "
1173	"imprisoned penguins\n",
1174	smp_processor_id());
1175	#endif
1176	penguins_are_doing_time = `0`;
1177	membar_safe("#StoreLoad");
1178	atomic_dec(v: &smp_capture_registry);
1179	}
1180	}
1181
1182	/ Imprisoned penguins run with %pil == PIL_NORMAL_MAX, but PSTATE_IE*
1183	* set, so they can service tlb flush xcalls...
1184	*/
1185	extern void prom_world(int);
1186
1187	void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs)
1188	{
1189	clear_softint(`1` << irq);
1190
1191	preempt_disable();
1192
1193	__asm__ __volatile__("flushw");
1194	prom_world(`1`);
1195	atomic_inc(&smp_capture_registry);
1196	membar_safe("#StoreLoad");
1197	while (penguins_are_doing_time)
1198	rmb();
1199	atomic_dec(&smp_capture_registry);
1200	prom_world(`0`);
1201
1202	preempt_enable();
1203	}
1204
1205	void __init smp_prepare_cpus(unsigned int max_cpus)
1206	{
1207	}
1208
1209	void __init smp_setup_processor_id(void)
1210	{
1211	if (tlb_type == spitfire)
1212	xcall_deliver_impl = spitfire_xcall_deliver;
1213	else if (tlb_type == cheetah \|\| tlb_type == cheetah_plus)
1214	xcall_deliver_impl = cheetah_xcall_deliver;
1215	else
1216	xcall_deliver_impl = hypervisor_xcall_deliver;
1217	}
1218
1219	void __init smp_fill_in_cpu_possible_map(void)
1220	{
1221	int possible_cpus = num_possible_cpus();
1222	int i;
1223
1224	if (possible_cpus > nr_cpu_ids)
1225	possible_cpus = nr_cpu_ids;
1226
1227	for (i = `0`; i < possible_cpus; i++)
1228	set_cpu_possible(cpu: i, possible: true);
1229	for (; i < NR_CPUS; i++)
1230	set_cpu_possible(cpu: i, possible: false);
1231	}
1232
1233	void smp_fill_in_sib_core_maps(void)
1234	{
1235	unsigned int i;
1236
1237	for_each_present_cpu(i) {
1238	unsigned int j;
1239
1240	cpumask_clear(dstp: &cpu_core_map[i]);
1241	if (cpu_data(i).core_id == `0`) {
1242	cpumask_set_cpu(cpu: i, dstp: &cpu_core_map[i]);
1243	continue;
1244	}
1245
1246	for_each_present_cpu(j) {
1247	if (cpu_data(i).core_id ==
1248	cpu_data(j).core_id)
1249	cpumask_set_cpu(cpu: j, dstp: &cpu_core_map[i]);
1250	}
1251	}
1252
1253	for_each_present_cpu(i) {
1254	unsigned int j;
1255
1256	for_each_present_cpu(j) {
1257	if (cpu_data(i).max_cache_id ==
1258	cpu_data(j).max_cache_id)
1259	cpumask_set_cpu(cpu: j, dstp: &cpu_core_sib_cache_map[i]);
1260
1261	if (cpu_data(i).sock_id == cpu_data(j).sock_id)
1262	cpumask_set_cpu(cpu: j, dstp: &cpu_core_sib_map[i]);
1263	}
1264	}
1265
1266	for_each_present_cpu(i) {
1267	unsigned int j;
1268
1269	cpumask_clear(dstp: &per_cpu(cpu_sibling_map, i));
1270	if (cpu_data(i).proc_id == -`1`) {
1271	cpumask_set_cpu(cpu: i, dstp: &per_cpu(cpu_sibling_map, i));
1272	continue;
1273	}
1274
1275	for_each_present_cpu(j) {
1276	if (cpu_data(i).proc_id ==
1277	cpu_data(j).proc_id)
1278	cpumask_set_cpu(cpu: j, dstp: &per_cpu(cpu_sibling_map, i));
1279	}
1280	}
1281	}
1282
1283	int __cpu_up(unsigned int cpu, struct task_struct *tidle)
1284	{
1285	int ret = smp_boot_one_cpu(cpu, idle: tidle);
1286
1287	if (!ret) {
1288	cpumask_set_cpu(cpu, dstp: &smp_commenced_mask);
1289	while (!cpu_online(cpu))
1290	mb();
1291	if (!cpu_online(cpu)) {
1292	ret = -ENODEV;
1293	} else {
1294	/ On SUN4V, writes to %tick and %stick are*
1295	* not allowed.
1296	*/
1297	if (tlb_type != hypervisor)
1298	smp_synchronize_one_tick(cpu);
1299	}
1300	}
1301	return ret;
1302	}
1303
1304	#ifdef CONFIG_HOTPLUG_CPU
1305	void cpu_play_dead(void)
1306	{
1307	int cpu = smp_processor_id();
1308	unsigned long pstate;
1309
1310	idle_task_exit();
1311
1312	if (tlb_type == hypervisor) {
1313	struct trap_per_cpu *tb = &trap_block[cpu];
1314
1315	sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
1316	tb->cpu_mondo_pa, `0`);
1317	sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
1318	tb->dev_mondo_pa, `0`);
1319	sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
1320	tb->resum_mondo_pa, `0`);
1321	sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
1322	tb->nonresum_mondo_pa, `0`);
1323	}
1324
1325	cpumask_clear_cpu(cpu, dstp: &smp_commenced_mask);
1326	membar_safe("#Sync");
1327
1328	local_irq_disable();
1329
1330	__asm__ __volatile__(
1331	"rdpr %%pstate, %0\n\t"
1332	"wrpr %0, %1, %%pstate"
1333	: "=r" (pstate)
1334	: "i" (PSTATE_IE));
1335
1336	while (`1`)
1337	barrier();
1338	}
1339
1340	int __cpu_disable(void)
1341	{
1342	int cpu = smp_processor_id();
1343	cpuinfo_sparc *c;
1344	int i;
1345
1346	for_each_cpu(i, &cpu_core_map[cpu])
1347	cpumask_clear_cpu(cpu, dstp: &cpu_core_map[i]);
1348	cpumask_clear(dstp: &cpu_core_map[cpu]);
1349
1350	for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu))
1351	cpumask_clear_cpu(cpu, dstp: &per_cpu(cpu_sibling_map, i));
1352	cpumask_clear(dstp: &per_cpu(cpu_sibling_map, cpu));
1353
1354	c = &cpu_data(cpu);
1355
1356	c->core_id = `0`;
1357	c->proc_id = -`1`;
1358
1359	smp_wmb();
1360
1361	/ Make sure no interrupts point to this cpu. /
1362	fixup_irqs();
1363
1364	local_irq_enable();
1365	mdelay(`1`);
1366	local_irq_disable();
1367
1368	set_cpu_online(cpu, online: false);
1369
1370	cpu_map_rebuild();
1371
1372	return `0`;
1373	}
1374
1375	void __cpu_die(unsigned int cpu)
1376	{
1377	int i;
1378
1379	for (i = `0`; i < `100`; i++) {
1380	smp_rmb();
1381	if (!cpumask_test_cpu(cpu, cpumask: &smp_commenced_mask))
1382	break;
1383	msleep(msecs: `100`);
1384	}
1385	if (cpumask_test_cpu(cpu, cpumask: &smp_commenced_mask)) {
1386	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1387	} else {
1388	#if defined(CONFIG_SUN_LDOMS)
1389	unsigned long hv_err;
1390	int limit = `100`;
1391
1392	do {
1393	hv_err = sun4v_cpu_stop(cpu);
1394	if (hv_err == HV_EOK) {
1395	set_cpu_present(cpu, false);
1396	break;
1397	}
1398	} while (--limit > `0`);
1399	if (limit <= `0`) {
1400	printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
1401	hv_err);
1402	}
1403	#endif
1404	}
1405	}
1406	#endif
1407
1408	void __init smp_cpus_done(unsigned int max_cpus)
1409	{
1410	}
1411
1412	static void send_cpu_ipi(int cpu)
1413	{
1414	xcall_deliver(data0: (u64) &xcall_receive_signal,
1415	data1: `0`, data2: `0`, cpumask_of(cpu));
1416	}
1417
1418	void scheduler_poke(void)
1419	{
1420	if (!cpu_poke)
1421	return;
1422
1423	if (!__this_cpu_read(poke))
1424	return;
1425
1426	__this_cpu_write(poke, false);
1427	set_softint(`1` << PIL_SMP_RECEIVE_SIGNAL);
1428	}
1429
1430	static unsigned long send_cpu_poke(int cpu)
1431	{
1432	unsigned long hv_err;
1433
1434	per_cpu(poke, cpu) = true;
1435	hv_err = sun4v_cpu_poke(cpu);
1436	if (hv_err != HV_EOK) {
1437	per_cpu(poke, cpu) = false;
1438	pr_err_ratelimited("%s: sun4v_cpu_poke() fails err=%lu\n",
1439	__func__, hv_err);
1440	}
1441
1442	return hv_err;
1443	}
1444
1445	void arch_smp_send_reschedule(int cpu)
1446	{
1447	if (cpu == smp_processor_id()) {
1448	WARN_ON_ONCE(preemptible());
1449	set_softint(`1` << PIL_SMP_RECEIVE_SIGNAL);
1450	return;
1451	}
1452
1453	/ Use cpu poke to resume idle cpu if supported. /
1454	if (cpu_poke && idle_cpu(cpu)) {
1455	unsigned long ret;
1456
1457	ret = send_cpu_poke(cpu);
1458	if (ret == HV_EOK)
1459	return;
1460	}
1461
1462	/ Use IPI in following cases:*
1463	* - cpu poke not supported
1464	* - cpu not idle
1465	* - send_cpu_poke() returns with error
1466	*/
1467	send_cpu_ipi(cpu);
1468	}
1469
1470	void smp_init_cpu_poke(void)
1471	{
1472	unsigned long major;
1473	unsigned long minor;
1474	int ret;
1475
1476	if (tlb_type != hypervisor)
1477	return;
1478
1479	ret = sun4v_hvapi_get(HV_GRP_CORE, &major, &minor);
1480	if (ret) {
1481	pr_debug("HV_GRP_CORE is not registered\n");
1482	return;
1483	}
1484
1485	if (major == `1` && minor >= `6`) {
1486	/ CPU POKE is registered. /
1487	cpu_poke = true;
1488	return;
1489	}
1490
1491	pr_debug("CPU_POKE not supported\n");
1492	}
1493
1494	void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
1495	{
1496	clear_softint(`1` << irq);
1497	scheduler_ipi();
1498	}
1499
1500	static void stop_this_cpu(void *dummy)
1501	{
1502	set_cpu_online(smp_processor_id(), online: false);
1503	prom_stopself();
1504	}
1505
1506	void smp_send_stop(void)
1507	{
1508	int cpu;
1509
1510	if (tlb_type == hypervisor) {
1511	int this_cpu = smp_processor_id();
1512	#ifdef CONFIG_SERIAL_SUNHV
1513	sunhv_migrate_hvcons_irq(this_cpu);
1514	#endif
1515	for_each_online_cpu(cpu) {
1516	if (cpu == this_cpu)
1517	continue;
1518
1519	set_cpu_online(cpu, online: false);
1520	#ifdef CONFIG_SUN_LDOMS
1521	if (ldom_domaining_enabled) {
1522	unsigned long hv_err;
1523	hv_err = sun4v_cpu_stop(cpu);
1524	if (hv_err)
1525	printk(KERN_ERR "sun4v_cpu_stop() "
1526	"failed err=%lu\n", hv_err);
1527	} else
1528	#endif
1529	prom_stopcpu_cpuid(cpu);
1530	}
1531	} else
1532	smp_call_function(func: stop_this_cpu, NULL, wait: `0`);
1533	}
1534
1535	static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
1536	{
1537	if (cpu_to_node(cpu: from) == cpu_to_node(cpu: to))
1538	return LOCAL_DISTANCE;
1539	else
1540	return REMOTE_DISTANCE;
1541	}
1542
1543	static int __init pcpu_cpu_to_node(int cpu)
1544	{
1545	return cpu_to_node(cpu);
1546	}
1547
1548	void __init setup_per_cpu_areas(void)
1549	{
1550	unsigned long delta;
1551	unsigned int cpu;
1552	int rc = -EINVAL;
1553
1554	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
1555	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
1556	PERCPU_DYNAMIC_RESERVE, atom_size: `4` << `20`,
1557	cpu_distance_fn: pcpu_cpu_distance,
1558	cpu_to_nd_fn: pcpu_cpu_to_node);
1559	if (rc)
1560	pr_warn("PERCPU: %s allocator failed (%d), "
1561	"falling back to page size\n",
1562	pcpu_fc_names[pcpu_chosen_fc], rc);
1563	}
1564	if (rc < `0`)
1565	rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
1566	cpu_to_nd_fn: pcpu_cpu_to_node);
1567	if (rc < `0`)
1568	panic(fmt: "cannot initialize percpu area (err=%d)", rc);
1569
1570	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1571	for_each_possible_cpu(cpu)
1572	__per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
1573
1574	/ Setup %g5 for the boot cpu. /
1575	__local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
1576
1577	of_fill_in_cpu_data();
1578	if (tlb_type == hypervisor)
1579	mdesc_fill_in_cpu_data(cpu_all_mask);
1580	}
1581

source code of linux/arch/sparc/kernel/smp_64.c