smp.c source code [linux/kernel/smp.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Generic helpers for smp ipi calls
4	*
5	* (C) Jens Axboe <jens.axboe@oracle.com> 2008
6	*/
7
8	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10	#include <linux/irq_work.h>
11	#include <linux/rcupdate.h>
12	#include <linux/rculist.h>
13	#include <linux/kernel.h>
14	#include <linux/export.h>
15	#include <linux/percpu.h>
16	#include <linux/init.h>
17	#include <linux/interrupt.h>
18	#include <linux/gfp.h>
19	#include <linux/smp.h>
20	#include <linux/cpu.h>
21	#include <linux/sched.h>
22	#include <linux/sched/idle.h>
23	#include <linux/hypervisor.h>
24	#include <linux/sched/clock.h>
25	#include <linux/nmi.h>
26	#include <linux/sched/debug.h>
27	#include <linux/jump_label.h>
28	#include <linux/string_choices.h>
29
30	#include <trace/events/ipi.h>
31	#define CREATE_TRACE_POINTS
32	#include <trace/events/csd.h>
33	#undef CREATE_TRACE_POINTS
34
35	#include "smpboot.h"
36	#include "sched/smp.h"
37
38	#define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
39
40	struct call_function_data {
41	call_single_data_t __percpu *csd;
42	cpumask_var_t cpumask;
43	cpumask_var_t cpumask_ipi;
44	};
45
46	static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
47
48	static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
49
50	static DEFINE_PER_CPU(atomic_t, trigger_backtrace) = ATOMIC_INIT(`1`);
51
52	static void __flush_smp_call_function_queue(bool warn_cpu_offline);
53
54	int smpcfd_prepare_cpu(unsigned int cpu)
55	{
56	struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
57
58	if (!zalloc_cpumask_var_node(mask: &cfd->cpumask, GFP_KERNEL,
59	cpu_to_node(cpu)))
60	return -ENOMEM;
61	if (!zalloc_cpumask_var_node(mask: &cfd->cpumask_ipi, GFP_KERNEL,
62	cpu_to_node(cpu))) {
63	free_cpumask_var(mask: cfd->cpumask);
64	return -ENOMEM;
65	}
66	cfd->csd = alloc_percpu(call_single_data_t);
67	if (!cfd->csd) {
68	free_cpumask_var(mask: cfd->cpumask);
69	free_cpumask_var(mask: cfd->cpumask_ipi);
70	return -ENOMEM;
71	}
72
73	return `0`;
74	}
75
76	int smpcfd_dead_cpu(unsigned int cpu)
77	{
78	struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
79
80	free_cpumask_var(mask: cfd->cpumask);
81	free_cpumask_var(mask: cfd->cpumask_ipi);
82	free_percpu(pdata: cfd->csd);
83	return `0`;
84	}
85
86	int smpcfd_dying_cpu(unsigned int cpu)
87	{
88	/*
89	* The IPIs for the smp-call-function callbacks queued by other
90	* CPUs might arrive late, either due to hardware latencies or
91	* because this CPU disabled interrupts (inside stop-machine)
92	* before the IPIs were sent. So flush out any pending callbacks
93	* explicitly (without waiting for the IPIs to arrive), to
94	* ensure that the outgoing CPU doesn't go offline with work
95	* still pending.
96	*/
97	__flush_smp_call_function_queue(warn_cpu_offline: false);
98	irq_work_run();
99	return `0`;
100	}
101
102	void __init call_function_init(void)
103	{
104	int i;
105
106	for_each_possible_cpu(i)
107	init_llist_head(list: &per_cpu(call_single_queue, i));
108
109	smpcfd_prepare_cpu(smp_processor_id());
110	}
111
112	static __always_inline void
113	send_call_function_single_ipi(int cpu)
114	{
115	if (call_function_single_prep_ipi(cpu)) {
116	trace_ipi_send_cpu(cpu, _RET_IP_,
117	callback: generic_smp_call_function_single_interrupt);
118	arch_send_call_function_single_ipi(cpu);
119	}
120	}
121
122	static __always_inline void
123	send_call_function_ipi_mask(struct cpumask *mask)
124	{
125	trace_ipi_send_cpumask(cpumask: mask, _RET_IP_,
126	callback: generic_smp_call_function_single_interrupt);
127	arch_send_call_function_ipi_mask(mask);
128	}
129
130	static __always_inline void
131	csd_do_func(smp_call_func_t func, void info, call_single_data_t csd)
132	{
133	trace_csd_function_entry(func, csd);
134	func(info);
135	trace_csd_function_exit(func, csd);
136	}
137
138	#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
139
140	static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled);
141
142	/*
143	* Parse the csdlock_debug= kernel boot parameter.
144	*
145	* If you need to restore the old "ext" value that once provided
146	* additional debugging information, reapply the following commits:
147	*
148	* de7b09ef658d ("locking/csd_lock: Prepare more CSD lock debugging")
149	* a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging")
150	*/
151	static int __init csdlock_debug(char *str)
152	{
153	int ret;
154	unsigned int val = `0`;
155
156	ret = get_option(str: &str, pint: &val);
157	if (ret) {
158	if (val)
159	static_branch_enable(&csdlock_debug_enabled);
160	else
161	static_branch_disable(&csdlock_debug_enabled);
162	}
163
164	return `1`;
165	}
166	__setup("csdlock_debug=", csdlock_debug);
167
168	static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
169	static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
170	static DEFINE_PER_CPU(void *, cur_csd_info);
171
172	static ulong csd_lock_timeout = `5000`; / CSD lock timeout in milliseconds. /
173	module_param(csd_lock_timeout, ulong, `0644`);
174	static int panic_on_ipistall; / CSD panic timeout in milliseconds, 300000 for five minutes. /
175	module_param(panic_on_ipistall, int, `0644`);
176
177	static atomic_t csd_bug_count = ATOMIC_INIT(`0`);
178
179	/ Record current CSD work for current CPU, NULL to erase. /
180	static void __csd_lock_record(call_single_data_t *csd)
181	{
182	if (!csd) {
183	smp_mb(); / NULL cur_csd after unlock. /
184	__this_cpu_write(cur_csd, NULL);
185	return;
186	}
187	__this_cpu_write(cur_csd_func, csd->func);
188	__this_cpu_write(cur_csd_info, csd->info);
189	smp_wmb(); / func and info before csd. /
190	__this_cpu_write(cur_csd, csd);
191	smp_mb(); / Update cur_csd before function call. /
192	/ Or before unlock, as the case may be. /
193	}
194
195	static __always_inline void csd_lock_record(call_single_data_t *csd)
196	{
197	if (static_branch_unlikely(&csdlock_debug_enabled))
198	__csd_lock_record(csd);
199	}
200
201	static int csd_lock_wait_getcpu(call_single_data_t *csd)
202	{
203	unsigned int csd_type;
204
205	csd_type = CSD_TYPE(csd);
206	if (csd_type == CSD_TYPE_ASYNC \|\| csd_type == CSD_TYPE_SYNC)
207	return csd->node.dst; / Other CSD_TYPE_ values might not have ->dst. /
208	return -`1`;
209	}
210
211	static atomic_t n_csd_lock_stuck;
212
213	/**
214	* csd_lock_is_stuck - Has a CSD-lock acquisition been stuck too long?
215	*
216	* Returns @true if a CSD-lock acquisition is stuck and has been stuck
217	* long enough for a "non-responsive CSD lock" message to be printed.
218	*/
219	bool csd_lock_is_stuck(void)
220	{
221	return !!atomic_read(v: &n_csd_lock_stuck);
222	}
223
224	/*
225	* Complain if too much time spent waiting. Note that only
226	* the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
227	* so waiting on other types gets much less information.
228	*/
229	static bool csd_lock_wait_toolong(call_single_data_t csd, u64 ts0, u64 ts1, int bug_id, unsigned* long *nmessages)
230	{
231	int cpu = -`1`;
232	int cpux;
233	bool firsttime;
234	u64 ts2, ts_delta;
235	call_single_data_t *cpu_cur_csd;
236	unsigned int flags = READ_ONCE(csd->node.u_flags);
237	unsigned long long csd_lock_timeout_ns = csd_lock_timeout * NSEC_PER_MSEC;
238
239	if (!(flags & CSD_FLAG_LOCK)) {
240	if (!unlikely(*bug_id))
241	return true;
242	cpu = csd_lock_wait_getcpu(csd);
243	pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
244	*bug_id, raw_smp_processor_id(), cpu);
245	atomic_dec(v: &n_csd_lock_stuck);
246	return true;
247	}
248
249	ts2 = ktime_get_mono_fast_ns();
250	/ How long since we last checked for a stuck CSD lock./
251	ts_delta = ts2 - *ts1;
252	if (likely(ts_delta <= csd_lock_timeout_ns * (nmessages + `1`)
253	(!*nmessages ? `1` : (ilog2(num_online_cpus()) / `2` + `1`)) \|\|
254	csd_lock_timeout_ns == `0`))
255	return false;
256
257	if (ts0 > ts2) {
258	/ Our own sched_clock went backward; don't blame another CPU. /
259	ts_delta = ts0 - ts2;
260	pr_alert("sched_clock on CPU %d went backward by %llu ns\n", raw_smp_processor_id(), ts_delta);
261	*ts1 = ts2;
262	return false;
263	}
264
265	firsttime = !*bug_id;
266	if (firsttime)
267	*bug_id = atomic_inc_return(v: &csd_bug_count);
268	cpu = csd_lock_wait_getcpu(csd);
269	if (WARN_ONCE(cpu < `0` \|\| cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu))
270	cpux = `0`;
271	else
272	cpux = cpu;
273	cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); / Before func and info. /
274	/ How long since this CSD lock was stuck. /
275	ts_delta = ts2 - ts0;
276	pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %lld ns for CPU#%02d %pS(%ps).\n",
277	firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), (s64)ts_delta,
278	cpu, csd->func, csd->info);
279	(*nmessages)++;
280	if (firsttime)
281	atomic_inc(v: &n_csd_lock_stuck);
282	/*
283	* If the CSD lock is still stuck after 5 minutes, it is unlikely
284	* to become unstuck. Use a signed comparison to avoid triggering
285	* on underflows when the TSC is out of sync between sockets.
286	*/
287	BUG_ON(panic_on_ipistall > `0` && (s64)ts_delta > ((s64)panic_on_ipistall * NSEC_PER_MSEC));
288	if (cpu_cur_csd && csd != cpu_cur_csd) {
289	pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
290	*bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
291	READ_ONCE(per_cpu(cur_csd_info, cpux)));
292	} else {
293	pr_alert("\tcsd: CSD lock (#%d) %s.\n",
294	*bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
295	}
296	if (cpu >= `0`) {
297	if (atomic_cmpxchg_acquire(v: &per_cpu(trigger_backtrace, cpu), old: `1`, new: `0`))
298	dump_cpu_task(cpu);
299	if (!cpu_cur_csd) {
300	pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
301	arch_send_call_function_single_ipi(cpu);
302	}
303	}
304	if (firsttime)
305	dump_stack();
306	*ts1 = ts2;
307
308	return false;
309	}
310
311	/*
312	* csd_lock/csd_unlock used to serialize access to per-cpu csd resources
313	*
314	* For non-synchronous ipi calls the csd can still be in use by the
315	* previous function call. For multi-cpu calls its even more interesting
316	* as we'll have to ensure no other cpu is observing our csd.
317	*/
318	static void __csd_lock_wait(call_single_data_t *csd)
319	{
320	unsigned long nmessages = `0`;
321	int bug_id = `0`;
322	u64 ts0, ts1;
323
324	ts1 = ts0 = ktime_get_mono_fast_ns();
325	for (;;) {
326	if (csd_lock_wait_toolong(csd, ts0, ts1: &ts1, bug_id: &bug_id, nmessages: &nmessages))
327	break;
328	cpu_relax();
329	}
330	smp_acquire__after_ctrl_dep();
331	}
332
333	static __always_inline void csd_lock_wait(call_single_data_t *csd)
334	{
335	if (static_branch_unlikely(&csdlock_debug_enabled)) {
336	__csd_lock_wait(csd);
337	return;
338	}
339
340	smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
341	}
342	#else
343	static void csd_lock_record(call_single_data_t *csd)
344	{
345	}
346
347	static __always_inline void csd_lock_wait(call_single_data_t *csd)
348	{
349	smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
350	}
351	#endif
352
353	static __always_inline void csd_lock(call_single_data_t *csd)
354	{
355	csd_lock_wait(csd);
356	csd->node.u_flags \|= CSD_FLAG_LOCK;
357
358	/*
359	* prevent CPU from reordering the above assignment
360	* to ->flags with any subsequent assignments to other
361	* fields of the specified call_single_data_t structure:
362	*/
363	smp_wmb();
364	}
365
366	static __always_inline void csd_unlock(call_single_data_t *csd)
367	{
368	WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK));
369
370	/*
371	* ensure we're all done before releasing data:
372	*/
373	smp_store_release(&csd->node.u_flags, `0`);
374	}
375
376	static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
377
378	void __smp_call_single_queue(int cpu, struct llist_node *node)
379	{
380	/*
381	* We have to check the type of the CSD before queueing it, because
382	* once queued it can have its flags cleared by
383	* flush_smp_call_function_queue()
384	* even if we haven't sent the smp_call IPI yet (e.g. the stopper
385	* executes migration_cpu_stop() on the remote CPU).
386	*/
387	if (trace_csd_queue_cpu_enabled()) {
388	call_single_data_t *csd;
389	smp_call_func_t func;
390
391	csd = container_of(node, call_single_data_t, node.llist);
392	func = CSD_TYPE(csd) == CSD_TYPE_TTWU ?
393	sched_ttwu_pending : csd->func;
394
395	trace_csd_queue_cpu(cpu, _RET_IP_, func, csd);
396	}
397
398	/*
399	* The list addition should be visible to the target CPU when it pops
400	* the head of the list to pull the entry off it in the IPI handler
401	* because of normal cache coherency rules implied by the underlying
402	* llist ops.
403	*
404	* If IPIs can go out of order to the cache coherency protocol
405	* in an architecture, sufficient synchronisation should be added
406	* to arch code to make it appear to obey cache coherency WRT
407	* locking and barrier primitives. Generic code isn't really
408	* equipped to do the right thing...
409	*/
410	if (llist_add(new: node, head: &per_cpu(call_single_queue, cpu)))
411	send_call_function_single_ipi(cpu);
412	}
413
414	/*
415	* Insert a previously allocated call_single_data_t element
416	* for execution on the given CPU. data must already have
417	* ->func, ->info, and ->flags set.
418	*/
419	static int generic_exec_single(int cpu, call_single_data_t *csd)
420	{
421	if (cpu == smp_processor_id()) {
422	smp_call_func_t func = csd->func;
423	void *info = csd->info;
424	unsigned long flags;
425
426	/*
427	* We can unlock early even for the synchronous on-stack case,
428	* since we're doing this from the same CPU..
429	*/
430	csd_lock_record(csd);
431	csd_unlock(csd);
432	local_irq_save(flags);
433	csd_do_func(func, info, NULL);
434	csd_lock_record(NULL);
435	local_irq_restore(flags);
436	return `0`;
437	}
438
439	if ((unsigned)cpu >= nr_cpu_ids \|\| !cpu_online(cpu)) {
440	csd_unlock(csd);
441	return -ENXIO;
442	}
443
444	__smp_call_single_queue(cpu, node: &csd->node.llist);
445
446	return `0`;
447	}
448
449	/**
450	* generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
451	*
452	* Invoked by arch to handle an IPI for call function single.
453	* Must be called with interrupts disabled.
454	*/
455	void generic_smp_call_function_single_interrupt(void)
456	{
457	__flush_smp_call_function_queue(warn_cpu_offline: true);
458	}
459
460	/**
461	* __flush_smp_call_function_queue - Flush pending smp-call-function callbacks
462	*
463	* @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
464	* offline CPU. Skip this check if set to 'false'.
465	*
466	* Flush any pending smp-call-function callbacks queued on this CPU. This is
467	* invoked by the generic IPI handler, as well as by a CPU about to go offline,
468	* to ensure that all pending IPI callbacks are run before it goes completely
469	* offline.
470	*
471	* Loop through the call_single_queue and run all the queued callbacks.
472	* Must be called with interrupts disabled.
473	*/
474	static void __flush_smp_call_function_queue(bool warn_cpu_offline)
475	{
476	call_single_data_t csd, csd_next;
477	struct llist_node entry, prev;
478	struct llist_head *head;
479	static bool warned;
480	atomic_t *tbt;
481
482	lockdep_assert_irqs_disabled();
483
484	/ Allow waiters to send backtrace NMI from here onwards /
485	tbt = this_cpu_ptr(&trigger_backtrace);
486	atomic_set_release(v: tbt, i: `1`);
487
488	head = this_cpu_ptr(&call_single_queue);
489	entry = llist_del_all(head);
490	entry = llist_reverse_order(head: entry);
491
492	/ There shouldn't be any pending callbacks on an offline CPU. /
493	if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
494	!warned && entry != NULL)) {
495	warned = true;
496	WARN(`1`, "IPI on offline CPU %d\n", smp_processor_id());
497
498	/*
499	* We don't have to use the _safe() variant here
500	* because we are not invoking the IPI handlers yet.
501	*/
502	llist_for_each_entry(csd, entry, node.llist) {
503	switch (CSD_TYPE(csd)) {
504	case CSD_TYPE_ASYNC:
505	case CSD_TYPE_SYNC:
506	case CSD_TYPE_IRQ_WORK:
507	pr_warn("IPI callback %pS sent to offline CPU\n",
508	csd->func);
509	break;
510
511	case CSD_TYPE_TTWU:
512	pr_warn("IPI task-wakeup sent to offline CPU\n");
513	break;
514
515	default:
516	pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
517	CSD_TYPE(csd));
518	break;
519	}
520	}
521	}
522
523	/*
524	* First; run all SYNC callbacks, people are waiting for us.
525	*/
526	prev = NULL;
527	llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
528	/ Do we wait until after callback? /
529	if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
530	smp_call_func_t func = csd->func;
531	void *info = csd->info;
532
533	if (prev) {
534	prev->next = &csd_next->node.llist;
535	} else {
536	entry = &csd_next->node.llist;
537	}
538
539	csd_lock_record(csd);
540	csd_do_func(func, info, csd);
541	csd_unlock(csd);
542	csd_lock_record(NULL);
543	} else {
544	prev = &csd->node.llist;
545	}
546	}
547
548	if (!entry)
549	return;
550
551	/*
552	* Second; run all !SYNC callbacks.
553	*/
554	prev = NULL;
555	llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
556	int type = CSD_TYPE(csd);
557
558	if (type != CSD_TYPE_TTWU) {
559	if (prev) {
560	prev->next = &csd_next->node.llist;
561	} else {
562	entry = &csd_next->node.llist;
563	}
564
565	if (type == CSD_TYPE_ASYNC) {
566	smp_call_func_t func = csd->func;
567	void *info = csd->info;
568
569	csd_lock_record(csd);
570	csd_unlock(csd);
571	csd_do_func(func, info, csd);
572	csd_lock_record(NULL);
573	} else if (type == CSD_TYPE_IRQ_WORK) {
574	irq_work_single(arg: csd);
575	}
576
577	} else {
578	prev = &csd->node.llist;
579	}
580	}
581
582	/*
583	* Third; only CSD_TYPE_TTWU is left, issue those.
584	*/
585	if (entry) {
586	csd = llist_entry(entry, typeof(*csd), node.llist);
587	csd_do_func(func: sched_ttwu_pending, info: entry, csd);
588	}
589	}
590
591
592	/**
593	* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
594	* from task context (idle, migration thread)
595	*
596	* When TIF_POLLING_NRFLAG is supported and a CPU is in idle and has it
597	* set, then remote CPUs can avoid sending IPIs and wake the idle CPU by
598	* setting TIF_NEED_RESCHED. The idle task on the woken up CPU has to
599	* handle queued SMP function calls before scheduling.
600	*
601	* The migration thread has to ensure that an eventually pending wakeup has
602	* been handled before it migrates a task.
603	*/
604	void flush_smp_call_function_queue(void)
605	{
606	unsigned int was_pending;
607	unsigned long flags;
608
609	if (llist_empty(this_cpu_ptr(&call_single_queue)))
610	return;
611
612	local_irq_save(flags);
613	/ Get the already pending soft interrupts for RT enabled kernels /
614	was_pending = local_softirq_pending();
615	__flush_smp_call_function_queue(warn_cpu_offline: true);
616	if (local_softirq_pending())
617	do_softirq_post_smp_call_flush(unused: was_pending);
618
619	local_irq_restore(flags);
620	}
621
622	/*
623	* smp_call_function_single - Run a function on a specific CPU
624	* @func: The function to run. This must be fast and non-blocking.
625	* @info: An arbitrary pointer to pass to the function.
626	* @wait: If true, wait until function has completed on other CPUs.
627	*
628	* Returns 0 on success, else a negative status code.
629	*/
630	int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
631	int wait)
632	{
633	call_single_data_t *csd;
634	call_single_data_t csd_stack = {
635	.node = { .u_flags = CSD_FLAG_LOCK \| CSD_TYPE_SYNC, },
636	};
637	int this_cpu;
638	int err;
639
640	/*
641	* prevent preemption and reschedule on another processor,
642	* as well as CPU removal
643	*/
644	this_cpu = get_cpu();
645
646	/*
647	* Can deadlock when called with interrupts disabled.
648	* We allow cpu's that are not yet online though, as no one else can
649	* send smp call function interrupt to this cpu and as such deadlocks
650	* can't happen.
651	*/
652	WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
653	&& !oops_in_progress);
654
655	/*
656	* When @wait we can deadlock when we interrupt between llist_add() and
657	* arch_send_call_function_ipi*(); when !@wait we can deadlock due to
658	* csd_lock() on because the interrupt context uses the same csd
659	* storage.
660	*/
661	WARN_ON_ONCE(!in_task());
662
663	csd = &csd_stack;
664	if (!wait) {
665	csd = this_cpu_ptr(&csd_data);
666	csd_lock(csd);
667	}
668
669	csd->func = func;
670	csd->info = info;
671	#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
672	csd->node.src = smp_processor_id();
673	csd->node.dst = cpu;
674	#endif
675
676	err = generic_exec_single(cpu, csd);
677
678	if (wait)
679	csd_lock_wait(csd);
680
681	put_cpu();
682
683	return err;
684	}
685	EXPORT_SYMBOL(smp_call_function_single);
686
687	/**
688	* smp_call_function_single_async() - Run an asynchronous function on a
689	* specific CPU.
690	* @cpu: The CPU to run on.
691	* @csd: Pre-allocated and setup data structure
692	*
693	* Like smp_call_function_single(), but the call is asynchonous and
694	* can thus be done from contexts with disabled interrupts.
695	*
696	* The caller passes his own pre-allocated data structure
697	* (ie: embedded in an object) and is responsible for synchronizing it
698	* such that the IPIs performed on the @csd are strictly serialized.
699	*
700	* If the function is called with one csd which has not yet been
701	* processed by previous call to smp_call_function_single_async(), the
702	* function will return immediately with -EBUSY showing that the csd
703	* object is still in progress.
704	*
705	* NOTE: Be careful, there is unfortunately no current debugging facility to
706	* validate the correctness of this serialization.
707	*
708	* Return: %0 on success or negative errno value on error
709	*/
710	int smp_call_function_single_async(int cpu, call_single_data_t *csd)
711	{
712	int err = `0`;
713
714	preempt_disable();
715
716	if (csd->node.u_flags & CSD_FLAG_LOCK) {
717	err = -EBUSY;
718	goto out;
719	}
720
721	csd->node.u_flags = CSD_FLAG_LOCK;
722	smp_wmb();
723
724	err = generic_exec_single(cpu, csd);
725
726	out:
727	preempt_enable();
728
729	return err;
730	}
731	EXPORT_SYMBOL_GPL(smp_call_function_single_async);
732
733	/*
734	* smp_call_function_any - Run a function on any of the given cpus
735	* @mask: The mask of cpus it can run on.
736	* @func: The function to run. This must be fast and non-blocking.
737	* @info: An arbitrary pointer to pass to the function.
738	* @wait: If true, wait until function has completed.
739	*
740	* Returns 0 on success, else a negative status code (if no cpus were online).
741	*
742	* Selection preference:
743	* 1) current cpu if in @mask
744	* 2) any cpu of current node if in @mask
745	* 3) any other online cpu in @mask
746	*/
747	int smp_call_function_any(const struct cpumask *mask,
748	smp_call_func_t func, void info, int* wait)
749	{
750	unsigned int cpu;
751	const struct cpumask *nodemask;
752	int ret;
753
754	/ Try for same CPU (cheapest) /
755	cpu = get_cpu();
756	if (cpumask_test_cpu(cpu, cpumask: mask))
757	goto call;
758
759	/ Try for same node. /
760	nodemask = cpumask_of_node(cpu_to_node(cpu));
761	for (cpu = cpumask_first_and(srcp1: nodemask, srcp2: mask); cpu < nr_cpu_ids;
762	cpu = cpumask_next_and(n: cpu, src1p: nodemask, src2p: mask)) {
763	if (cpu_online(cpu))
764	goto call;
765	}
766
767	/ Any online will do: smp_call_function_single handles nr_cpu_ids. /
768	cpu = cpumask_any_and(mask, cpu_online_mask);
769	call:
770	ret = smp_call_function_single(cpu, func, info, wait);
771	put_cpu();
772	return ret;
773	}
774	EXPORT_SYMBOL_GPL(smp_call_function_any);
775
776	/*
777	* Flags to be used as scf_flags argument of smp_call_function_many_cond().
778	*
779	* %SCF_WAIT: Wait until function execution is completed
780	* %SCF_RUN_LOCAL: Run also locally if local cpu is set in cpumask
781	*/
782	#define SCF_WAIT (1U << 0)
783	#define SCF_RUN_LOCAL (1U << 1)
784
785	static void smp_call_function_many_cond(const struct cpumask *mask,
786	smp_call_func_t func, void *info,
787	unsigned int scf_flags,
788	smp_cond_func_t cond_func)
789	{
790	int cpu, last_cpu, this_cpu = smp_processor_id();
791	struct call_function_data *cfd;
792	bool wait = scf_flags & SCF_WAIT;
793	int nr_cpus = `0`;
794	bool run_remote = false;
795	bool run_local = false;
796
797	lockdep_assert_preemption_disabled();
798
799	/*
800	* Can deadlock when called with interrupts disabled.
801	* We allow cpu's that are not yet online though, as no one else can
802	* send smp call function interrupt to this cpu and as such deadlocks
803	* can't happen.
804	*/
805	if (cpu_online(cpu: this_cpu) && !oops_in_progress &&
806	!early_boot_irqs_disabled)
807	lockdep_assert_irqs_enabled();
808
809	/*
810	* When @wait we can deadlock when we interrupt between llist_add() and
811	* arch_send_call_function_ipi*(); when !@wait we can deadlock due to
812	* csd_lock() on because the interrupt context uses the same csd
813	* storage.
814	*/
815	WARN_ON_ONCE(!in_task());
816
817	/ Check if we need local execution. /
818	if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(cpu: this_cpu, cpumask: mask) &&
819	(!cond_func \|\| cond_func(this_cpu, info)))
820	run_local = true;
821
822	/ Check if we need remote execution, i.e., any CPU excluding this one. /
823	cpu = cpumask_first_and(srcp1: mask, cpu_online_mask);
824	if (cpu == this_cpu)
825	cpu = cpumask_next_and(n: cpu, src1p: mask, cpu_online_mask);
826	if (cpu < nr_cpu_ids)
827	run_remote = true;
828
829	if (run_remote) {
830	cfd = this_cpu_ptr(&cfd_data);
831	cpumask_and(dstp: cfd->cpumask, src1p: mask, cpu_online_mask);
832	__cpumask_clear_cpu(cpu: this_cpu, dstp: cfd->cpumask);
833
834	cpumask_clear(dstp: cfd->cpumask_ipi);
835	for_each_cpu(cpu, cfd->cpumask) {
836	call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
837
838	if (cond_func && !cond_func(cpu, info)) {
839	__cpumask_clear_cpu(cpu, dstp: cfd->cpumask);
840	continue;
841	}
842
843	csd_lock(csd);
844	if (wait)
845	csd->node.u_flags \|= CSD_TYPE_SYNC;
846	csd->func = func;
847	csd->info = info;
848	#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
849	csd->node.src = smp_processor_id();
850	csd->node.dst = cpu;
851	#endif
852	trace_csd_queue_cpu(cpu, _RET_IP_, func, csd);
853
854	if (llist_add(new: &csd->node.llist, head: &per_cpu(call_single_queue, cpu))) {
855	__cpumask_set_cpu(cpu, dstp: cfd->cpumask_ipi);
856	nr_cpus++;
857	last_cpu = cpu;
858	}
859	}
860
861	/*
862	* Choose the most efficient way to send an IPI. Note that the
863	* number of CPUs might be zero due to concurrent changes to the
864	* provided mask.
865	*/
866	if (nr_cpus == `1`)
867	send_call_function_single_ipi(cpu: last_cpu);
868	else if (likely(nr_cpus > `1`))
869	send_call_function_ipi_mask(mask: cfd->cpumask_ipi);
870	}
871
872	if (run_local) {
873	unsigned long flags;
874
875	local_irq_save(flags);
876	csd_do_func(func, info, NULL);
877	local_irq_restore(flags);
878	}
879
880	if (run_remote && wait) {
881	for_each_cpu(cpu, cfd->cpumask) {
882	call_single_data_t *csd;
883
884	csd = per_cpu_ptr(cfd->csd, cpu);
885	csd_lock_wait(csd);
886	}
887	}
888	}
889
890	/**
891	* smp_call_function_many(): Run a function on a set of CPUs.
892	* @mask: The set of cpus to run on (only runs on online subset).
893	* @func: The function to run. This must be fast and non-blocking.
894	* @info: An arbitrary pointer to pass to the function.
895	* @wait: Bitmask that controls the operation. If %SCF_WAIT is set, wait
896	* (atomically) until function has completed on other CPUs. If
897	* %SCF_RUN_LOCAL is set, the function will also be run locally
898	* if the local CPU is set in the @cpumask.
899	*
900	* If @wait is true, then returns once @func has returned.
901	*
902	* You must not call this function with disabled interrupts or from a
903	* hardware interrupt handler or from a bottom half handler. Preemption
904	* must be disabled when calling this function.
905	*/
906	void smp_call_function_many(const struct cpumask *mask,
907	smp_call_func_t func, void *info, bool wait)
908	{
909	smp_call_function_many_cond(mask, func, info, scf_flags: wait * SCF_WAIT, NULL);
910	}
911	EXPORT_SYMBOL(smp_call_function_many);
912
913	/**
914	* smp_call_function(): Run a function on all other CPUs.
915	* @func: The function to run. This must be fast and non-blocking.
916	* @info: An arbitrary pointer to pass to the function.
917	* @wait: If true, wait (atomically) until function has completed
918	* on other CPUs.
919	*
920	* Returns 0.
921	*
922	* If @wait is true, then returns once @func has returned; otherwise
923	* it returns just before the target cpu calls @func.
924	*
925	* You must not call this function with disabled interrupts or from a
926	* hardware interrupt handler or from a bottom half handler.
927	*/
928	void smp_call_function(smp_call_func_t func, void info, int* wait)
929	{
930	preempt_disable();
931	smp_call_function_many(cpu_online_mask, func, info, wait);
932	preempt_enable();
933	}
934	EXPORT_SYMBOL(smp_call_function);
935
936	/ Setup configured maximum number of CPUs to activate /
937	unsigned int setup_max_cpus = NR_CPUS;
938	EXPORT_SYMBOL(setup_max_cpus);
939
940
941	/*
942	* Setup routine for controlling SMP activation
943	*
944	* Command-line option of "nosmp" or "maxcpus=0" will disable SMP
945	* activation entirely (the MPS table probe still happens, though).
946	*
947	* Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
948	* greater than 0, limits the maximum number of CPUs activated in
949	* SMP mode to <NUM>.
950	*/
951
952	void __weak __init arch_disable_smp_support(void) { }
953
954	static int __init nosmp(char *str)
955	{
956	setup_max_cpus = `0`;
957	arch_disable_smp_support();
958
959	return `0`;
960	}
961
962	early_param("nosmp", nosmp);
963
964	/ this is hard limit /
965	static int __init nrcpus(char *str)
966	{
967	int nr_cpus;
968
969	if (get_option(str: &str, pint: &nr_cpus) && nr_cpus > `0` && nr_cpus < nr_cpu_ids)
970	set_nr_cpu_ids(nr_cpus);
971
972	return `0`;
973	}
974
975	early_param("nr_cpus", nrcpus);
976
977	static int __init maxcpus(char *str)
978	{
979	get_option(str: &str, pint: &setup_max_cpus);
980	if (setup_max_cpus == `0`)
981	arch_disable_smp_support();
982
983	return `0`;
984	}
985
986	early_param("maxcpus", maxcpus);
987
988	#if (NR_CPUS > 1) && !defined(CONFIG_FORCE_NR_CPUS)
989	/ Setup number of possible processor ids /
990	unsigned int nr_cpu_ids __read_mostly = NR_CPUS;
991	EXPORT_SYMBOL(nr_cpu_ids);
992	#endif
993
994	/ An arch may set nr_cpu_ids earlier if needed, so this would be redundant /
995	void __init setup_nr_cpu_ids(void)
996	{
997	set_nr_cpu_ids(find_last_bit(cpumask_bits(cpu_possible_mask), NR_CPUS) + `1`);
998	}
999
1000	/ Called by boot processor to activate the rest. /
1001	void __init smp_init(void)
1002	{
1003	int num_nodes, num_cpus;
1004
1005	idle_threads_init();
1006	cpuhp_threads_init();
1007
1008	pr_info("Bringing up secondary CPUs ...\n");
1009
1010	bringup_nonboot_cpus(max_cpus: setup_max_cpus);
1011
1012	num_nodes = num_online_nodes();
1013	num_cpus = num_online_cpus();
1014	pr_info("Brought up %d node%s, %d CPU%s\n",
1015	num_nodes, str_plural(num_nodes), num_cpus, str_plural(num_cpus));
1016
1017	/ Any cleanup work /
1018	smp_cpus_done(max_cpus: setup_max_cpus);
1019	}
1020
1021	/*
1022	* on_each_cpu_cond(): Call a function on each processor for which
1023	* the supplied function cond_func returns true, optionally waiting
1024	* for all the required CPUs to finish. This may include the local
1025	* processor.
1026	* @cond_func: A callback function that is passed a cpu id and
1027	* the info parameter. The function is called
1028	* with preemption disabled. The function should
1029	* return a blooean value indicating whether to IPI
1030	* the specified CPU.
1031	* @func: The function to run on all applicable CPUs.
1032	* This must be fast and non-blocking.
1033	* @info: An arbitrary pointer to pass to both functions.
1034	* @wait: If true, wait (atomically) until function has
1035	* completed on other CPUs.
1036	*
1037	* Preemption is disabled to protect against CPUs going offline but not online.
1038	* CPUs going online during the call will not be seen or sent an IPI.
1039	*
1040	* You must not call this function with disabled interrupts or
1041	* from a hardware interrupt handler or from a bottom half handler.
1042	*/
1043	void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
1044	void info, bool wait, const* struct cpumask *mask)
1045	{
1046	unsigned int scf_flags = SCF_RUN_LOCAL;
1047
1048	if (wait)
1049	scf_flags \|= SCF_WAIT;
1050
1051	preempt_disable();
1052	smp_call_function_many_cond(mask, func, info, scf_flags, cond_func);
1053	preempt_enable();
1054	}
1055	EXPORT_SYMBOL(on_each_cpu_cond_mask);
1056
1057	static void do_nothing(void *unused)
1058	{
1059	}
1060
1061	/**
1062	* kick_all_cpus_sync - Force all cpus out of idle
1063	*
1064	* Used to synchronize the update of pm_idle function pointer. It's
1065	* called after the pointer is updated and returns after the dummy
1066	* callback function has been executed on all cpus. The execution of
1067	* the function can only happen on the remote cpus after they have
1068	* left the idle function which had been called via pm_idle function
1069	* pointer. So it's guaranteed that nothing uses the previous pointer
1070	* anymore.
1071	*/
1072	void kick_all_cpus_sync(void)
1073	{
1074	/ Make sure the change is visible before we kick the cpus /
1075	smp_mb();
1076	smp_call_function(do_nothing, NULL, `1`);
1077	}
1078	EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
1079
1080	/**
1081	* wake_up_all_idle_cpus - break all cpus out of idle
1082	* wake_up_all_idle_cpus try to break all cpus which is in idle state even
1083	* including idle polling cpus, for non-idle cpus, we will do nothing
1084	* for them.
1085	*/
1086	void wake_up_all_idle_cpus(void)
1087	{
1088	int cpu;
1089
1090	for_each_possible_cpu(cpu) {
1091	preempt_disable();
1092	if (cpu != smp_processor_id() && cpu_online(cpu))
1093	wake_up_if_idle(cpu);
1094	preempt_enable();
1095	}
1096	}
1097	EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
1098
1099	/**
1100	* struct smp_call_on_cpu_struct - Call a function on a specific CPU
1101	* @work: &work_struct
1102	* @done: &completion to signal
1103	* @func: function to call
1104	* @data: function's data argument
1105	* @ret: return value from @func
1106	* @cpu: target CPU (%-1 for any CPU)
1107	*
1108	* Used to call a function on a specific cpu and wait for it to return.
1109	* Optionally make sure the call is done on a specified physical cpu via vcpu
1110	* pinning in order to support virtualized environments.
1111	*/
1112	struct smp_call_on_cpu_struct {
1113	struct work_struct work;
1114	struct completion done;
1115	int (func)(void* *);
1116	void *data;
1117	int ret;
1118	int cpu;
1119	};
1120
1121	static void smp_call_on_cpu_callback(struct work_struct *work)
1122	{
1123	struct smp_call_on_cpu_struct *sscs;
1124
1125	sscs = container_of(work, struct smp_call_on_cpu_struct, work);
1126	if (sscs->cpu >= `0`)
1127	hypervisor_pin_vcpu(cpu: sscs->cpu);
1128	sscs->ret = sscs->func(sscs->data);
1129	if (sscs->cpu >= `0`)
1130	hypervisor_pin_vcpu(cpu: -`1`);
1131
1132	complete(&sscs->done);
1133	}
1134
1135	int smp_call_on_cpu(unsigned int cpu, int (func)(void* ), void* *par, bool phys)
1136	{
1137	struct smp_call_on_cpu_struct sscs = {
1138	.done = COMPLETION_INITIALIZER_ONSTACK(sscs.done),
1139	.func = func,
1140	.data = par,
1141	.cpu = phys ? cpu : -`1`,
1142	};
1143
1144	INIT_WORK_ONSTACK(&sscs.work, smp_call_on_cpu_callback);
1145
1146	if (cpu >= nr_cpu_ids \|\| !cpu_online(cpu))
1147	return -ENXIO;
1148
1149	queue_work_on(cpu, wq: system_wq, work: &sscs.work);
1150	wait_for_completion(&sscs.done);
1151	destroy_work_on_stack(work: &sscs.work);
1152
1153	return sscs.ret;
1154	}
1155	EXPORT_SYMBOL_GPL(smp_call_on_cpu);
1156

Provided by KDAB

Definitions

call_function_data
cfd_data
call_single_queue
trigger_backtrace
smpcfd_prepare_cpu
smpcfd_dead_cpu
smpcfd_dying_cpu
call_function_init
send_call_function_single_ipi
send_call_function_ipi_mask
csd_do_func
csdlock_debug_enabled
csdlock_debug
cur_csd
cur_csd_func
cur_csd_info
csd_lock_timeout
panic_on_ipistall
csd_bug_count
__csd_lock_record
csd_lock_record
csd_lock_wait_getcpu
n_csd_lock_stuck
csd_lock_is_stuck
csd_lock_wait_toolong
__csd_lock_wait
csd_lock_wait
csd_lock
csd_unlock
csd_data
__smp_call_single_queue
generic_exec_single
generic_smp_call_function_single_interrupt
__flush_smp_call_function_queue
flush_smp_call_function_queue
smp_call_function_single
smp_call_function_single_async
smp_call_function_any
smp_call_function_many_cond
smp_call_function_many
smp_call_function
setup_max_cpus
arch_disable_smp_support
nosmp
nrcpus
maxcpus
nr_cpu_ids
setup_nr_cpu_ids
smp_init
on_each_cpu_cond_mask
do_nothing
kick_all_cpus_sync
wake_up_all_idle_cpus
smp_call_on_cpu_struct
smp_call_on_cpu_callback

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of linux/kernel/smp.c