cpu.c source code [linux/kernel/cpu.c]

1	/ CPU control.*
2	* (C) 2001, 2002, 2003, 2004 Rusty Russell
3	*
4	* This code is licenced under the GPL.
5	*/
6	#include <linux/sched/mm.h>
7	#include <linux/proc_fs.h>
8	#include <linux/smp.h>
9	#include <linux/init.h>
10	#include <linux/notifier.h>
11	#include <linux/sched/signal.h>
12	#include <linux/sched/hotplug.h>
13	#include <linux/sched/isolation.h>
14	#include <linux/sched/task.h>
15	#include <linux/sched/smt.h>
16	#include <linux/unistd.h>
17	#include <linux/cpu.h>
18	#include <linux/oom.h>
19	#include <linux/rcupdate.h>
20	#include <linux/delay.h>
21	#include <linux/export.h>
22	#include <linux/bug.h>
23	#include <linux/kthread.h>
24	#include <linux/stop_machine.h>
25	#include <linux/mutex.h>
26	#include <linux/gfp.h>
27	#include <linux/suspend.h>
28	#include <linux/lockdep.h>
29	#include <linux/tick.h>
30	#include <linux/irq.h>
31	#include <linux/nmi.h>
32	#include <linux/smpboot.h>
33	#include <linux/relay.h>
34	#include <linux/slab.h>
35	#include <linux/scs.h>
36	#include <linux/percpu-rwsem.h>
37	#include <linux/cpuset.h>
38	#include <linux/random.h>
39	#include <linux/cc_platform.h>
40
41	#include <trace/events/power.h>
42	#define CREATE_TRACE_POINTS
43	#include <trace/events/cpuhp.h>
44
45	#include "smpboot.h"
46
47	/**
48	* struct cpuhp_cpu_state - Per cpu hotplug state storage
49	* @state: The current cpu state
50	* @target: The target state
51	* @fail: Current CPU hotplug callback state
52	* @thread: Pointer to the hotplug thread
53	* @should_run: Thread should execute
54	* @rollback: Perform a rollback
55	* @single: Single callback invocation
56	* @bringup: Single callback bringup or teardown selector
57	* @node: Remote CPU node; for multi-instance, do a
58	* single entry callback for install/remove
59	* @last: For multi-instance rollback, remember how far we got
60	* @cb_state: The state for a single callback (install/uninstall)
61	* @result: Result of the operation
62	* @ap_sync_state: State for AP synchronization
63	* @done_up: Signal completion to the issuer of the task for cpu-up
64	* @done_down: Signal completion to the issuer of the task for cpu-down
65	*/
66	struct cpuhp_cpu_state {
67	enum cpuhp_state state;
68	enum cpuhp_state target;
69	enum cpuhp_state fail;
70	#ifdef CONFIG_SMP
71	struct task_struct *thread;
72	bool should_run;
73	bool rollback;
74	bool single;
75	bool bringup;
76	struct hlist_node *node;
77	struct hlist_node *last;
78	enum cpuhp_state cb_state;
79	int result;
80	atomic_t ap_sync_state;
81	struct completion done_up;
82	struct completion done_down;
83	#endif
84	};
85
86	static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
87	.fail = CPUHP_INVALID,
88	};
89
90	#ifdef CONFIG_SMP
91	cpumask_t cpus_booted_once_mask;
92	#endif
93
94	#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
95	static struct lockdep_map cpuhp_state_up_map =
96	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
97	static struct lockdep_map cpuhp_state_down_map =
98	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
99
100
101	static inline void cpuhp_lock_acquire(bool bringup)
102	{
103	lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
104	}
105
106	static inline void cpuhp_lock_release(bool bringup)
107	{
108	lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
109	}
110	#else
111
112	static inline void cpuhp_lock_acquire(bool bringup) { }
113	static inline void cpuhp_lock_release(bool bringup) { }
114
115	#endif
116
117	/**
118	* struct cpuhp_step - Hotplug state machine step
119	* @name: Name of the step
120	* @startup: Startup function of the step
121	* @teardown: Teardown function of the step
122	* @cant_stop: Bringup/teardown can't be stopped at this step
123	* @multi_instance: State has multiple instances which get added afterwards
124	*/
125	struct cpuhp_step {
126	const char *name;
127	union {
128	int (single)(unsigned* int cpu);
129	int (multi)(unsigned* int cpu,
130	struct hlist_node *node);
131	} startup;
132	union {
133	int (single)(unsigned* int cpu);
134	int (multi)(unsigned* int cpu,
135	struct hlist_node *node);
136	} teardown;
137	/ private: /
138	struct hlist_head list;
139	/ public: /
140	bool cant_stop;
141	bool multi_instance;
142	};
143
144	static DEFINE_MUTEX(cpuhp_state_mutex);
145	static struct cpuhp_step cpuhp_hp_states[];
146
147	static struct cpuhp_step cpuhp_get_step(enum* cpuhp_state state)
148	{
149	return cpuhp_hp_states + state;
150	}
151
152	static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
153	{
154	return bringup ? !step->startup.single : !step->teardown.single;
155	}
156
157	/**
158	* cpuhp_invoke_callback - Invoke the callbacks for a given state
159	* @cpu: The cpu for which the callback should be invoked
160	* @state: The state to do callbacks for
161	* @bringup: True if the bringup callback should be invoked
162	* @node: For multi-instance, do a single entry callback for install/remove
163	* @lastp: For multi-instance rollback, remember how far we got
164	*
165	* Called from cpu hotplug and from the state register machinery.
166	*
167	* Return: %0 on success or a negative errno code
168	*/
169	static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
170	bool bringup, struct hlist_node *node,
171	struct hlist_node **lastp)
172	{
173	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
174	struct cpuhp_step *step = cpuhp_get_step(state);
175	int (cbm)(unsigned* int cpu, struct hlist_node *node);
176	int (cb)(unsigned* int cpu);
177	int ret, cnt;
178
179	if (st->fail == state) {
180	st->fail = CPUHP_INVALID;
181	return -EAGAIN;
182	}
183
184	if (cpuhp_step_empty(bringup, step)) {
185	WARN_ON_ONCE(`1`);
186	return `0`;
187	}
188
189	if (!step->multi_instance) {
190	WARN_ON_ONCE(lastp && *lastp);
191	cb = bringup ? step->startup.single : step->teardown.single;
192
193	trace_cpuhp_enter(cpu, target: st->target, idx: state, fun: cb);
194	ret = cb(cpu);
195	trace_cpuhp_exit(cpu, state: st->state, idx: state, ret);
196	return ret;
197	}
198	cbm = bringup ? step->startup.multi : step->teardown.multi;
199
200	/ Single invocation for instance add/remove /
201	if (node) {
202	WARN_ON_ONCE(lastp && *lastp);
203	trace_cpuhp_multi_enter(cpu, target: st->target, idx: state, fun: cbm, node);
204	ret = cbm(cpu, node);
205	trace_cpuhp_exit(cpu, state: st->state, idx: state, ret);
206	return ret;
207	}
208
209	/ State transition. Invoke on all instances /
210	cnt = `0`;
211	hlist_for_each(node, &step->list) {
212	if (lastp && node == *lastp)
213	break;
214
215	trace_cpuhp_multi_enter(cpu, target: st->target, idx: state, fun: cbm, node);
216	ret = cbm(cpu, node);
217	trace_cpuhp_exit(cpu, state: st->state, idx: state, ret);
218	if (ret) {
219	if (!lastp)
220	goto err;
221
222	*lastp = node;
223	return ret;
224	}
225	cnt++;
226	}
227	if (lastp)
228	*lastp = NULL;
229	return `0`;
230	err:
231	/ Rollback the instances if one failed /
232	cbm = !bringup ? step->startup.multi : step->teardown.multi;
233	if (!cbm)
234	return ret;
235
236	hlist_for_each(node, &step->list) {
237	if (!cnt--)
238	break;
239
240	trace_cpuhp_multi_enter(cpu, target: st->target, idx: state, fun: cbm, node);
241	ret = cbm(cpu, node);
242	trace_cpuhp_exit(cpu, state: st->state, idx: state, ret);
243	/*
244	* Rollback must not fail,
245	*/
246	WARN_ON_ONCE(ret);
247	}
248	return ret;
249	}
250
251	#ifdef CONFIG_SMP
252	static bool cpuhp_is_ap_state(enum cpuhp_state state)
253	{
254	/*
255	* The extra check for CPUHP_TEARDOWN_CPU is only for documentation
256	* purposes as that state is handled explicitly in cpu_down.
257	*/
258	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
259	}
260
261	static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
262	{
263	struct completion *done = bringup ? &st->done_up : &st->done_down;
264	wait_for_completion(done);
265	}
266
267	static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
268	{
269	struct completion *done = bringup ? &st->done_up : &st->done_down;
270	complete(done);
271	}
272
273	/*
274	* The former STARTING/DYING states, ran with IRQs disabled and must not fail.
275	*/
276	static bool cpuhp_is_atomic_state(enum cpuhp_state state)
277	{
278	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
279	}
280
281	/ Synchronization state management /
282	enum cpuhp_sync_state {
283	SYNC_STATE_DEAD,
284	SYNC_STATE_KICKED,
285	SYNC_STATE_SHOULD_DIE,
286	SYNC_STATE_ALIVE,
287	SYNC_STATE_SHOULD_ONLINE,
288	SYNC_STATE_ONLINE,
289	};
290
291	#ifdef CONFIG_HOTPLUG_CORE_SYNC
292	/**
293	* cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
294	* @state: The synchronization state to set
295	*
296	* No synchronization point. Just update of the synchronization state, but implies
297	* a full barrier so that the AP changes are visible before the control CPU proceeds.
298	*/
299	static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
300	{
301	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
302
303	(void)atomic_xchg(v: st, new: state);
304	}
305
306	void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
307
308	static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state,
309	enum cpuhp_sync_state next_state)
310	{
311	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
312	ktime_t now, end, start = ktime_get();
313	int sync;
314
315	end = start + `10ULL` * NSEC_PER_SEC;
316
317	sync = atomic_read(v: st);
318	while (`1`) {
319	if (sync == state) {
320	if (!atomic_try_cmpxchg(v: st, old: &sync, new: next_state))
321	continue;
322	return true;
323	}
324
325	now = ktime_get();
326	if (now > end) {
327	/ Timeout. Leave the state unchanged /
328	return false;
329	} else if (now - start < NSEC_PER_MSEC) {
330	/ Poll for one millisecond /
331	arch_cpuhp_sync_state_poll();
332	} else {
333	usleep_range(USEC_PER_MSEC, max: `2` * USEC_PER_MSEC);
334	}
335	sync = atomic_read(v: st);
336	}
337	return true;
338	}
339	#else /* CONFIG_HOTPLUG_CORE_SYNC */
340	static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { }
341	#endif /* !CONFIG_HOTPLUG_CORE_SYNC */
342
343	#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
344	/**
345	* cpuhp_ap_report_dead - Update synchronization state to DEAD
346	*
347	* No synchronization point. Just update of the synchronization state.
348	*/
349	void cpuhp_ap_report_dead(void)
350	{
351	cpuhp_ap_update_sync_state(state: SYNC_STATE_DEAD);
352	}
353
354	void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }
355
356	/*
357	* Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down
358	* because the AP cannot issue complete() at this stage.
359	*/
360	static void cpuhp_bp_sync_dead(unsigned int cpu)
361	{
362	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
363	int sync = atomic_read(v: st);
364
365	do {
366	/ CPU can have reported dead already. Don't overwrite that! /
367	if (sync == SYNC_STATE_DEAD)
368	break;
369	} while (!atomic_try_cmpxchg(v: st, old: &sync, new: SYNC_STATE_SHOULD_DIE));
370
371	if (cpuhp_wait_for_sync_state(cpu, state: SYNC_STATE_DEAD, next_state: SYNC_STATE_DEAD)) {
372	/ CPU reached dead state. Invoke the cleanup function /
373	arch_cpuhp_cleanup_dead_cpu(cpu);
374	return;
375	}
376
377	/ No further action possible. Emit message and give up. /
378	pr_err("CPU%u failed to report dead state\n", cpu);
379	}
380	#else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */
381	static inline void cpuhp_bp_sync_dead(unsigned int cpu) { }
382	#endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */
383
384	#ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL
385	/**
386	* cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive
387	*
388	* Updates the AP synchronization state to SYNC_STATE_ALIVE and waits
389	* for the BP to release it.
390	*/
391	void cpuhp_ap_sync_alive(void)
392	{
393	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
394
395	cpuhp_ap_update_sync_state(state: SYNC_STATE_ALIVE);
396
397	/ Wait for the control CPU to release it. /
398	while (atomic_read(v: st) != SYNC_STATE_SHOULD_ONLINE)
399	cpu_relax();
400	}
401
402	static bool cpuhp_can_boot_ap(unsigned int cpu)
403	{
404	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
405	int sync = atomic_read(v: st);
406
407	again:
408	switch (sync) {
409	case SYNC_STATE_DEAD:
410	/ CPU is properly dead /
411	break;
412	case SYNC_STATE_KICKED:
413	/ CPU did not come up in previous attempt /
414	break;
415	case SYNC_STATE_ALIVE:
416	/ CPU is stuck cpuhp_ap_sync_alive(). /
417	break;
418	default:
419	/ CPU failed to report online or dead and is in limbo state. /
420	return false;
421	}
422
423	/ Prepare for booting /
424	if (!atomic_try_cmpxchg(v: st, old: &sync, new: SYNC_STATE_KICKED))
425	goto again;
426
427	return true;
428	}
429
430	void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { }
431
432	/*
433	* Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up
434	* because the AP cannot issue complete() so early in the bringup.
435	*/
436	static int cpuhp_bp_sync_alive(unsigned int cpu)
437	{
438	int ret = `0`;
439
440	if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL))
441	return `0`;
442
443	if (!cpuhp_wait_for_sync_state(cpu, state: SYNC_STATE_ALIVE, next_state: SYNC_STATE_SHOULD_ONLINE)) {
444	pr_err("CPU%u failed to report alive state\n", cpu);
445	ret = -EIO;
446	}
447
448	/ Let the architecture cleanup the kick alive mechanics. /
449	arch_cpuhp_cleanup_kick_cpu(cpu);
450	return ret;
451	}
452	#else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */
453	static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return `0`; }
454	static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; }
455	#endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */
456
457	/ Serializes the updates to cpu_online_mask, cpu_present_mask /
458	static DEFINE_MUTEX(cpu_add_remove_lock);
459	bool cpuhp_tasks_frozen;
460	EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
461
462	/*
463	* The following two APIs (cpu_maps_update_begin/done) must be used when
464	* attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
465	*/
466	void cpu_maps_update_begin(void)
467	{
468	mutex_lock(&cpu_add_remove_lock);
469	}
470
471	void cpu_maps_update_done(void)
472	{
473	mutex_unlock(lock: &cpu_add_remove_lock);
474	}
475
476	/*
477	* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
478	* Should always be manipulated under cpu_add_remove_lock
479	*/
480	static int cpu_hotplug_disabled;
481
482	#ifdef CONFIG_HOTPLUG_CPU
483
484	DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
485
486	static bool cpu_hotplug_offline_disabled __ro_after_init;
487
488	void cpus_read_lock(void)
489	{
490	percpu_down_read(sem: &cpu_hotplug_lock);
491	}
492	EXPORT_SYMBOL_GPL(cpus_read_lock);
493
494	int cpus_read_trylock(void)
495	{
496	return percpu_down_read_trylock(sem: &cpu_hotplug_lock);
497	}
498	EXPORT_SYMBOL_GPL(cpus_read_trylock);
499
500	void cpus_read_unlock(void)
501	{
502	percpu_up_read(sem: &cpu_hotplug_lock);
503	}
504	EXPORT_SYMBOL_GPL(cpus_read_unlock);
505
506	void cpus_write_lock(void)
507	{
508	percpu_down_write(&cpu_hotplug_lock);
509	}
510
511	void cpus_write_unlock(void)
512	{
513	percpu_up_write(&cpu_hotplug_lock);
514	}
515
516	void lockdep_assert_cpus_held(void)
517	{
518	/*
519	* We can't have hotplug operations before userspace starts running,
520	* and some init codepaths will knowingly not take the hotplug lock.
521	* This is all valid, so mute lockdep until it makes sense to report
522	* unheld locks.
523	*/
524	if (system_state < SYSTEM_RUNNING)
525	return;
526
527	percpu_rwsem_assert_held(&cpu_hotplug_lock);
528	}
529	EXPORT_SYMBOL_GPL(lockdep_assert_cpus_held);
530
531	#ifdef CONFIG_LOCKDEP
532	int lockdep_is_cpus_held(void)
533	{
534	return percpu_rwsem_is_held(&cpu_hotplug_lock);
535	}
536	#endif
537
538	static void lockdep_acquire_cpus_lock(void)
539	{
540	rwsem_acquire(&cpu_hotplug_lock.dep_map, `0`, `0`, _THIS_IP_);
541	}
542
543	static void lockdep_release_cpus_lock(void)
544	{
545	rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
546	}
547
548	/ Declare CPU offlining not supported /
549	void cpu_hotplug_disable_offlining(void)
550	{
551	cpu_maps_update_begin();
552	cpu_hotplug_offline_disabled = true;
553	cpu_maps_update_done();
554	}
555
556	/*
557	* Wait for currently running CPU hotplug operations to complete (if any) and
558	* disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
559	* the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
560	* hotplug path before performing hotplug operations. So acquiring that lock
561	* guarantees mutual exclusion from any currently running hotplug operations.
562	*/
563	void cpu_hotplug_disable(void)
564	{
565	cpu_maps_update_begin();
566	cpu_hotplug_disabled++;
567	cpu_maps_update_done();
568	}
569	EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
570
571	static void __cpu_hotplug_enable(void)
572	{
573	if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
574	return;
575	cpu_hotplug_disabled--;
576	}
577
578	void cpu_hotplug_enable(void)
579	{
580	cpu_maps_update_begin();
581	__cpu_hotplug_enable();
582	cpu_maps_update_done();
583	}
584	EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
585
586	#else
587
588	static void lockdep_acquire_cpus_lock(void)
589	{
590	}
591
592	static void lockdep_release_cpus_lock(void)
593	{
594	}
595
596	#endif /* CONFIG_HOTPLUG_CPU */
597
598	/*
599	* Architectures that need SMT-specific errata handling during SMT hotplug
600	* should override this.
601	*/
602	void __weak arch_smt_update(void) { }
603
604	#ifdef CONFIG_HOTPLUG_SMT
605
606	enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
607	static unsigned int cpu_smt_max_threads __ro_after_init;
608	unsigned int cpu_smt_num_threads __read_mostly = UINT_MAX;
609
610	void __init cpu_smt_disable(bool force)
611	{
612	if (!cpu_smt_possible())
613	return;
614
615	if (force) {
616	pr_info("SMT: Force disabled\n");
617	cpu_smt_control = CPU_SMT_FORCE_DISABLED;
618	} else {
619	pr_info("SMT: disabled\n");
620	cpu_smt_control = CPU_SMT_DISABLED;
621	}
622	cpu_smt_num_threads = `1`;
623	}
624
625	/*
626	* The decision whether SMT is supported can only be done after the full
627	* CPU identification. Called from architecture code.
628	*/
629	void __init cpu_smt_set_num_threads(unsigned int num_threads,
630	unsigned int max_threads)
631	{
632	WARN_ON(!num_threads \|\| (num_threads > max_threads));
633
634	if (max_threads == `1`)
635	cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
636
637	cpu_smt_max_threads = max_threads;
638
639	/*
640	* If SMT has been disabled via the kernel command line or SMT is
641	* not supported, set cpu_smt_num_threads to 1 for consistency.
642	* If enabled, take the architecture requested number of threads
643	* to bring up into account.
644	*/
645	if (cpu_smt_control != CPU_SMT_ENABLED)
646	cpu_smt_num_threads = `1`;
647	else if (num_threads < cpu_smt_num_threads)
648	cpu_smt_num_threads = num_threads;
649	}
650
651	static int __init smt_cmdline_disable(char *str)
652	{
653	cpu_smt_disable(force: str && !strcmp(str, "force"));
654	return `0`;
655	}
656	early_param("nosmt", smt_cmdline_disable);
657
658	/*
659	* For Archicture supporting partial SMT states check if the thread is allowed.
660	* Otherwise this has already been checked through cpu_smt_max_threads when
661	* setting the SMT level.
662	*/
663	static inline bool cpu_smt_thread_allowed(unsigned int cpu)
664	{
665	#ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC
666	return topology_smt_thread_allowed(cpu);
667	#else
668	return true;
669	#endif
670	}
671
672	static inline bool cpu_bootable(unsigned int cpu)
673	{
674	if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
675	return true;
676
677	/ All CPUs are bootable if controls are not configured /
678	if (cpu_smt_control == CPU_SMT_NOT_IMPLEMENTED)
679	return true;
680
681	/ All CPUs are bootable if CPU is not SMT capable /
682	if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
683	return true;
684
685	if (topology_is_primary_thread(cpu))
686	return true;
687
688	/*
689	* On x86 it's required to boot all logical CPUs at least once so
690	* that the init code can get a chance to set CR4.MCE on each
691	* CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
692	* core will shutdown the machine.
693	*/
694	return !cpumask_test_cpu(cpu, cpumask: &cpus_booted_once_mask);
695	}
696
697	/ Returns true if SMT is supported and not forcefully (irreversibly) disabled /
698	bool cpu_smt_possible(void)
699	{
700	return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
701	cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
702	}
703	EXPORT_SYMBOL_GPL(cpu_smt_possible);
704
705	#else
706	static inline bool cpu_bootable(unsigned int cpu) { return true; }
707	#endif
708
709	static inline enum cpuhp_state
710	cpuhp_set_state(int cpu, struct cpuhp_cpu_state st, enum* cpuhp_state target)
711	{
712	enum cpuhp_state prev_state = st->state;
713	bool bringup = st->state < target;
714
715	st->rollback = false;
716	st->last = NULL;
717
718	st->target = target;
719	st->single = false;
720	st->bringup = bringup;
721	if (cpu_dying(cpu) != !bringup)
722	set_cpu_dying(cpu, !bringup);
723
724	return prev_state;
725	}
726
727	static inline void
728	cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
729	enum cpuhp_state prev_state)
730	{
731	bool bringup = !st->bringup;
732
733	st->target = prev_state;
734
735	/*
736	* Already rolling back. No need invert the bringup value or to change
737	* the current state.
738	*/
739	if (st->rollback)
740	return;
741
742	st->rollback = true;
743
744	/*
745	* If we have st->last we need to undo partial multi_instance of this
746	* state first. Otherwise start undo at the previous state.
747	*/
748	if (!st->last) {
749	if (st->bringup)
750	st->state--;
751	else
752	st->state++;
753	}
754
755	st->bringup = bringup;
756	if (cpu_dying(cpu) != !bringup)
757	set_cpu_dying(cpu, !bringup);
758	}
759
760	/ Regular hotplug invocation of the AP hotplug thread /
761	static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
762	{
763	if (!st->single && st->state == st->target)
764	return;
765
766	st->result = `0`;
767	/*
768	* Make sure the above stores are visible before should_run becomes
769	* true. Paired with the mb() above in cpuhp_thread_fun()
770	*/
771	smp_mb();
772	st->should_run = true;
773	wake_up_process(tsk: st->thread);
774	wait_for_ap_thread(st, bringup: st->bringup);
775	}
776
777	static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
778	enum cpuhp_state target)
779	{
780	enum cpuhp_state prev_state;
781	int ret;
782
783	prev_state = cpuhp_set_state(cpu, st, target);
784	__cpuhp_kick_ap(st);
785	if ((ret = st->result)) {
786	cpuhp_reset_state(cpu, st, prev_state);
787	__cpuhp_kick_ap(st);
788	}
789
790	return ret;
791	}
792
793	static int bringup_wait_for_ap_online(unsigned int cpu)
794	{
795	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
796
797	/ Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE /
798	wait_for_ap_thread(st, bringup: true);
799	if (WARN_ON_ONCE((!cpu_online(cpu))))
800	return -ECANCELED;
801
802	/ Unpark the hotplug thread of the target cpu /
803	kthread_unpark(k: st->thread);
804
805	/*
806	* SMT soft disabling on X86 requires to bring the CPU out of the
807	* BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
808	* CPU marked itself as booted_once in notify_cpu_starting() so the
809	* cpu_bootable() check will now return false if this is not the
810	* primary sibling.
811	*/
812	if (!cpu_bootable(cpu))
813	return -ECANCELED;
814	return `0`;
815	}
816
817	#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
818	static int cpuhp_kick_ap_alive(unsigned int cpu)
819	{
820	if (!cpuhp_can_boot_ap(cpu))
821	return -EAGAIN;
822
823	return arch_cpuhp_kick_ap_alive(cpu, tidle: idle_thread_get(cpu));
824	}
825
826	static int cpuhp_bringup_ap(unsigned int cpu)
827	{
828	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
829	int ret;
830
831	/*
832	* Some architectures have to walk the irq descriptors to
833	* setup the vector space for the cpu which comes online.
834	* Prevent irq alloc/free across the bringup.
835	*/
836	irq_lock_sparse();
837
838	ret = cpuhp_bp_sync_alive(cpu);
839	if (ret)
840	goto out_unlock;
841
842	ret = bringup_wait_for_ap_online(cpu);
843	if (ret)
844	goto out_unlock;
845
846	irq_unlock_sparse();
847
848	if (st->target <= CPUHP_AP_ONLINE_IDLE)
849	return `0`;
850
851	return cpuhp_kick_ap(cpu, st, target: st->target);
852
853	out_unlock:
854	irq_unlock_sparse();
855	return ret;
856	}
857	#else
858	static int bringup_cpu(unsigned int cpu)
859	{
860	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
861	struct task_struct *idle = idle_thread_get(cpu);
862	int ret;
863
864	if (!cpuhp_can_boot_ap(cpu))
865	return -EAGAIN;
866
867	/*
868	* Some architectures have to walk the irq descriptors to
869	* setup the vector space for the cpu which comes online.
870	*
871	* Prevent irq alloc/free across the bringup by acquiring the
872	* sparse irq lock. Hold it until the upcoming CPU completes the
873	* startup in cpuhp_online_idle() which allows to avoid
874	* intermediate synchronization points in the architecture code.
875	*/
876	irq_lock_sparse();
877
878	ret = __cpu_up(cpu, idle);
879	if (ret)
880	goto out_unlock;
881
882	ret = cpuhp_bp_sync_alive(cpu);
883	if (ret)
884	goto out_unlock;
885
886	ret = bringup_wait_for_ap_online(cpu);
887	if (ret)
888	goto out_unlock;
889
890	irq_unlock_sparse();
891
892	if (st->target <= CPUHP_AP_ONLINE_IDLE)
893	return `0`;
894
895	return cpuhp_kick_ap(cpu, st, st->target);
896
897	out_unlock:
898	irq_unlock_sparse();
899	return ret;
900	}
901	#endif
902
903	static int finish_cpu(unsigned int cpu)
904	{
905	struct task_struct *idle = idle_thread_get(cpu);
906	struct mm_struct *mm = idle->active_mm;
907
908	/*
909	* sched_force_init_mm() ensured the use of &init_mm,
910	* drop that refcount now that the CPU has stopped.
911	*/
912	WARN_ON(mm != &init_mm);
913	idle->active_mm = NULL;
914	mmdrop_lazy_tlb(mm);
915
916	return `0`;
917	}
918
919	/*
920	* Hotplug state machine related functions
921	*/
922
923	/*
924	* Get the next state to run. Empty ones will be skipped. Returns true if a
925	* state must be run.
926	*
927	* st->state will be modified ahead of time, to match state_to_run, as if it
928	* has already ran.
929	*/
930	static bool cpuhp_next_state(bool bringup,
931	enum cpuhp_state *state_to_run,
932	struct cpuhp_cpu_state *st,
933	enum cpuhp_state target)
934	{
935	do {
936	if (bringup) {
937	if (st->state >= target)
938	return false;
939
940	*state_to_run = ++st->state;
941	} else {
942	if (st->state <= target)
943	return false;
944
945	*state_to_run = st->state--;
946	}
947
948	if (!cpuhp_step_empty(bringup, step: cpuhp_get_step(state: *state_to_run)))
949	break;
950	} while (true);
951
952	return true;
953	}
954
955	static int __cpuhp_invoke_callback_range(bool bringup,
956	unsigned int cpu,
957	struct cpuhp_cpu_state *st,
958	enum cpuhp_state target,
959	bool nofail)
960	{
961	enum cpuhp_state state;
962	int ret = `0`;
963
964	while (cpuhp_next_state(bringup, state_to_run: &state, st, target)) {
965	int err;
966
967	err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
968	if (!err)
969	continue;
970
971	if (nofail) {
972	pr_warn("CPU %u %s state %s (%d) failed (%d)\n",
973	cpu, bringup ? "UP" : "DOWN",
974	cpuhp_get_step(st->state)->name,
975	st->state, err);
976	ret = -`1`;
977	} else {
978	ret = err;
979	break;
980	}
981	}
982
983	return ret;
984	}
985
986	static inline int cpuhp_invoke_callback_range(bool bringup,
987	unsigned int cpu,
988	struct cpuhp_cpu_state *st,
989	enum cpuhp_state target)
990	{
991	return __cpuhp_invoke_callback_range(bringup, cpu, st, target, nofail: false);
992	}
993
994	static inline void cpuhp_invoke_callback_range_nofail(bool bringup,
995	unsigned int cpu,
996	struct cpuhp_cpu_state *st,
997	enum cpuhp_state target)
998	{
999	__cpuhp_invoke_callback_range(bringup, cpu, st, target, nofail: true);
1000	}
1001
1002	static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
1003	{
1004	if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
1005	return true;
1006	/*
1007	* When CPU hotplug is disabled, then taking the CPU down is not
1008	* possible because takedown_cpu() and the architecture and
1009	* subsystem specific mechanisms are not available. So the CPU
1010	* which would be completely unplugged again needs to stay around
1011	* in the current state.
1012	*/
1013	return st->state <= CPUHP_BRINGUP_CPU;
1014	}
1015
1016	static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
1017	enum cpuhp_state target)
1018	{
1019	enum cpuhp_state prev_state = st->state;
1020	int ret = `0`;
1021
1022	ret = cpuhp_invoke_callback_range(bringup: true, cpu, st, target);
1023	if (ret) {
1024	pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n",
1025	ret, cpu, cpuhp_get_step(st->state)->name,
1026	st->state);
1027
1028	cpuhp_reset_state(cpu, st, prev_state);
1029	if (can_rollback_cpu(st))
1030	WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
1031	prev_state));
1032	}
1033	return ret;
1034	}
1035
1036	/*
1037	* The cpu hotplug threads manage the bringup and teardown of the cpus
1038	*/
1039	static int cpuhp_should_run(unsigned int cpu)
1040	{
1041	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1042
1043	return st->should_run;
1044	}
1045
1046	/*
1047	* Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
1048	* callbacks when a state gets [un]installed at runtime.
1049	*
1050	* Each invocation of this function by the smpboot thread does a single AP
1051	* state callback.
1052	*
1053	* It has 3 modes of operation:
1054	* - single: runs st->cb_state
1055	* - up: runs ++st->state, while st->state < st->target
1056	* - down: runs st->state--, while st->state > st->target
1057	*
1058	* When complete or on error, should_run is cleared and the completion is fired.
1059	*/
1060	static void cpuhp_thread_fun(unsigned int cpu)
1061	{
1062	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1063	bool bringup = st->bringup;
1064	enum cpuhp_state state;
1065
1066	if (WARN_ON_ONCE(!st->should_run))
1067	return;
1068
1069	/*
1070	* ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
1071	* that if we see ->should_run we also see the rest of the state.
1072	*/
1073	smp_mb();
1074
1075	/*
1076	* The BP holds the hotplug lock, but we're now running on the AP,
1077	* ensure that anybody asserting the lock is held, will actually find
1078	* it so.
1079	*/
1080	lockdep_acquire_cpus_lock();
1081	cpuhp_lock_acquire(bringup);
1082
1083	if (st->single) {
1084	state = st->cb_state;
1085	st->should_run = false;
1086	} else {
1087	st->should_run = cpuhp_next_state(bringup, state_to_run: &state, st, target: st->target);
1088	if (!st->should_run)
1089	goto end;
1090	}
1091
1092	WARN_ON_ONCE(!cpuhp_is_ap_state(state));
1093
1094	if (cpuhp_is_atomic_state(state)) {
1095	local_irq_disable();
1096	st->result = cpuhp_invoke_callback(cpu, state, bringup, node: st->node, lastp: &st->last);
1097	local_irq_enable();
1098
1099	/*
1100	* STARTING/DYING must not fail!
1101	*/
1102	WARN_ON_ONCE(st->result);
1103	} else {
1104	st->result = cpuhp_invoke_callback(cpu, state, bringup, node: st->node, lastp: &st->last);
1105	}
1106
1107	if (st->result) {
1108	/*
1109	* If we fail on a rollback, we're up a creek without no
1110	* paddle, no way forward, no way back. We loose, thanks for
1111	* playing.
1112	*/
1113	WARN_ON_ONCE(st->rollback);
1114	st->should_run = false;
1115	}
1116
1117	end:
1118	cpuhp_lock_release(bringup);
1119	lockdep_release_cpus_lock();
1120
1121	if (!st->should_run)
1122	complete_ap_thread(st, bringup);
1123	}
1124
1125	/ Invoke a single callback on a remote cpu /
1126	static int
1127	cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
1128	struct hlist_node *node)
1129	{
1130	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1131	int ret;
1132
1133	if (!cpu_online(cpu))
1134	return `0`;
1135
1136	cpuhp_lock_acquire(bringup: false);
1137	cpuhp_lock_release(bringup: false);
1138
1139	cpuhp_lock_acquire(bringup: true);
1140	cpuhp_lock_release(bringup: true);
1141
1142	/*
1143	* If we are up and running, use the hotplug thread. For early calls
1144	* we invoke the thread function directly.
1145	*/
1146	if (!st->thread)
1147	return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1148
1149	st->rollback = false;
1150	st->last = NULL;
1151
1152	st->node = node;
1153	st->bringup = bringup;
1154	st->cb_state = state;
1155	st->single = true;
1156
1157	__cpuhp_kick_ap(st);
1158
1159	/*
1160	* If we failed and did a partial, do a rollback.
1161	*/
1162	if ((ret = st->result) && st->last) {
1163	st->rollback = true;
1164	st->bringup = !bringup;
1165
1166	__cpuhp_kick_ap(st);
1167	}
1168
1169	/*
1170	* Clean up the leftovers so the next hotplug operation wont use stale
1171	* data.
1172	*/
1173	st->node = st->last = NULL;
1174	return ret;
1175	}
1176
1177	static int cpuhp_kick_ap_work(unsigned int cpu)
1178	{
1179	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1180	enum cpuhp_state prev_state = st->state;
1181	int ret;
1182
1183	cpuhp_lock_acquire(bringup: false);
1184	cpuhp_lock_release(bringup: false);
1185
1186	cpuhp_lock_acquire(bringup: true);
1187	cpuhp_lock_release(bringup: true);
1188
1189	trace_cpuhp_enter(cpu, target: st->target, idx: prev_state, fun: cpuhp_kick_ap_work);
1190	ret = cpuhp_kick_ap(cpu, st, target: st->target);
1191	trace_cpuhp_exit(cpu, state: st->state, idx: prev_state, ret);
1192
1193	return ret;
1194	}
1195
1196	static struct smp_hotplug_thread cpuhp_threads = {
1197	.store = &cpuhp_state.thread,
1198	.thread_should_run = cpuhp_should_run,
1199	.thread_fn = cpuhp_thread_fun,
1200	.thread_comm = "cpuhp/%u",
1201	.selfparking = true,
1202	};
1203
1204	static __init void cpuhp_init_state(void)
1205	{
1206	struct cpuhp_cpu_state *st;
1207	int cpu;
1208
1209	for_each_possible_cpu(cpu) {
1210	st = per_cpu_ptr(&cpuhp_state, cpu);
1211	init_completion(x: &st->done_up);
1212	init_completion(x: &st->done_down);
1213	}
1214	}
1215
1216	void __init cpuhp_threads_init(void)
1217	{
1218	cpuhp_init_state();
1219	BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
1220	kthread_unpark(this_cpu_read(cpuhp_state.thread));
1221	}
1222
1223	#ifdef CONFIG_HOTPLUG_CPU
1224	#ifndef arch_clear_mm_cpumask_cpu
1225	#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
1226	#endif
1227
1228	/**
1229	* clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
1230	* @cpu: a CPU id
1231	*
1232	* This function walks all processes, finds a valid mm struct for each one and
1233	* then clears a corresponding bit in mm's cpumask. While this all sounds
1234	* trivial, there are various non-obvious corner cases, which this function
1235	* tries to solve in a safe manner.
1236	*
1237	* Also note that the function uses a somewhat relaxed locking scheme, so it may
1238	* be called only for an already offlined CPU.
1239	*/
1240	void clear_tasks_mm_cpumask(int cpu)
1241	{
1242	struct task_struct *p;
1243
1244	/*
1245	* This function is called after the cpu is taken down and marked
1246	* offline, so its not like new tasks will ever get this cpu set in
1247	* their mm mask. -- Peter Zijlstra
1248	* Thus, we may use rcu_read_lock() here, instead of grabbing
1249	* full-fledged tasklist_lock.
1250	*/
1251	WARN_ON(cpu_online(cpu));
1252	rcu_read_lock();
1253	for_each_process(p) {
1254	struct task_struct *t;
1255
1256	/*
1257	* Main thread might exit, but other threads may still have
1258	* a valid mm. Find one.
1259	*/
1260	t = find_lock_task_mm(p);
1261	if (!t)
1262	continue;
1263	arch_clear_mm_cpumask_cpu(cpu, t->mm);
1264	task_unlock(p: t);
1265	}
1266	rcu_read_unlock();
1267	}
1268
1269	/ Take this CPU down. /
1270	static int take_cpu_down(void *_param)
1271	{
1272	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1273	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
1274	int err, cpu = smp_processor_id();
1275
1276	/ Ensure this CPU doesn't handle any more interrupts. /
1277	err = __cpu_disable();
1278	if (err < `0`)
1279	return err;
1280
1281	/*
1282	* Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going
1283	* down, that the current state is CPUHP_TEARDOWN_CPU - 1.
1284	*/
1285	WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - `1`));
1286
1287	/*
1288	* Invoke the former CPU_DYING callbacks. DYING must not fail!
1289	*/
1290	cpuhp_invoke_callback_range_nofail(bringup: false, cpu, st, target);
1291
1292	/ Park the stopper thread /
1293	stop_machine_park(cpu);
1294	return `0`;
1295	}
1296
1297	static int takedown_cpu(unsigned int cpu)
1298	{
1299	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1300	int err;
1301
1302	/ Park the smpboot threads /
1303	kthread_park(k: st->thread);
1304
1305	/*
1306	* Prevent irq alloc/free while the dying cpu reorganizes the
1307	* interrupt affinities.
1308	*/
1309	irq_lock_sparse();
1310
1311	/*
1312	* So now all preempt/rcu users must observe !cpu_active().
1313	*/
1314	err = stop_machine_cpuslocked(fn: take_cpu_down, NULL, cpumask_of(cpu));
1315	if (err) {
1316	/ CPU refused to die /
1317	irq_unlock_sparse();
1318	/ Unpark the hotplug thread so we can rollback there /
1319	kthread_unpark(k: st->thread);
1320	return err;
1321	}
1322	BUG_ON(cpu_online(cpu));
1323
1324	/*
1325	* The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
1326	* all runnable tasks from the CPU, there's only the idle task left now
1327	* that the migration thread is done doing the stop_machine thing.
1328	*
1329	* Wait for the stop thread to go away.
1330	*/
1331	wait_for_ap_thread(st, bringup: false);
1332	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
1333
1334	/ Interrupts are moved away from the dying cpu, reenable alloc/free /
1335	irq_unlock_sparse();
1336
1337	hotplug_cpu__broadcast_tick_pull(dead_cpu: cpu);
1338	/ This actually kills the CPU. /
1339	__cpu_die(cpu);
1340
1341	cpuhp_bp_sync_dead(cpu);
1342
1343	lockdep_cleanup_dead_cpu(cpu, idle: idle_thread_get(cpu));
1344
1345	/*
1346	* Callbacks must be re-integrated right away to the RCU state machine.
1347	* Otherwise an RCU callback could block a further teardown function
1348	* waiting for its completion.
1349	*/
1350	rcutree_migrate_callbacks(cpu);
1351
1352	return `0`;
1353	}
1354
1355	static void cpuhp_complete_idle_dead(void *arg)
1356	{
1357	struct cpuhp_cpu_state *st = arg;
1358
1359	complete_ap_thread(st, bringup: false);
1360	}
1361
1362	void cpuhp_report_idle_dead(void)
1363	{
1364	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1365
1366	BUG_ON(st->state != CPUHP_AP_OFFLINE);
1367	tick_assert_timekeeping_handover();
1368	rcutree_report_cpu_dead();
1369	st->state = CPUHP_AP_IDLE_DEAD;
1370	/*
1371	* We cannot call complete after rcutree_report_cpu_dead() so we delegate it
1372	* to an online cpu.
1373	*/
1374	smp_call_function_single(cpuid: cpumask_first(cpu_online_mask),
1375	func: cpuhp_complete_idle_dead, info: st, wait: `0`);
1376	}
1377
1378	static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
1379	enum cpuhp_state target)
1380	{
1381	enum cpuhp_state prev_state = st->state;
1382	int ret = `0`;
1383
1384	ret = cpuhp_invoke_callback_range(bringup: false, cpu, st, target);
1385	if (ret) {
1386	pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n",
1387	ret, cpu, cpuhp_get_step(st->state)->name,
1388	st->state);
1389
1390	cpuhp_reset_state(cpu, st, prev_state);
1391
1392	if (st->state < prev_state)
1393	WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
1394	prev_state));
1395	}
1396
1397	return ret;
1398	}
1399
1400	/ Requires cpu_add_remove_lock to be held /
1401	static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
1402	enum cpuhp_state target)
1403	{
1404	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1405	int prev_state, ret = `0`;
1406
1407	if (num_online_cpus() == `1`)
1408	return -EBUSY;
1409
1410	if (!cpu_present(cpu))
1411	return -EINVAL;
1412
1413	cpus_write_lock();
1414
1415	cpuhp_tasks_frozen = tasks_frozen;
1416
1417	prev_state = cpuhp_set_state(cpu, st, target);
1418	/*
1419	* If the current CPU state is in the range of the AP hotplug thread,
1420	* then we need to kick the thread.
1421	*/
1422	if (st->state > CPUHP_TEARDOWN_CPU) {
1423	st->target = max((int)target, CPUHP_TEARDOWN_CPU);
1424	ret = cpuhp_kick_ap_work(cpu);
1425	/*
1426	* The AP side has done the error rollback already. Just
1427	* return the error code..
1428	*/
1429	if (ret)
1430	goto out;
1431
1432	/*
1433	* We might have stopped still in the range of the AP hotplug
1434	* thread. Nothing to do anymore.
1435	*/
1436	if (st->state > CPUHP_TEARDOWN_CPU)
1437	goto out;
1438
1439	st->target = target;
1440	}
1441	/*
1442	* The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
1443	* to do the further cleanups.
1444	*/
1445	ret = cpuhp_down_callbacks(cpu, st, target);
1446	if (ret && st->state < prev_state) {
1447	if (st->state == CPUHP_TEARDOWN_CPU) {
1448	cpuhp_reset_state(cpu, st, prev_state);
1449	__cpuhp_kick_ap(st);
1450	} else {
1451	WARN(`1`, "DEAD callback error for CPU%d", cpu);
1452	}
1453	}
1454
1455	out:
1456	cpus_write_unlock();
1457	arch_smt_update();
1458	return ret;
1459	}
1460
1461	struct cpu_down_work {
1462	unsigned int cpu;
1463	enum cpuhp_state target;
1464	};
1465
1466	static long __cpu_down_maps_locked(void *arg)
1467	{
1468	struct cpu_down_work *work = arg;
1469
1470	return _cpu_down(cpu: work->cpu, tasks_frozen: `0`, target: work->target);
1471	}
1472
1473	static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1474	{
1475	struct cpu_down_work work = { .cpu = cpu, .target = target, };
1476
1477	/*
1478	* If the platform does not support hotplug, report it explicitly to
1479	* differentiate it from a transient offlining failure.
1480	*/
1481	if (cpu_hotplug_offline_disabled)
1482	return -EOPNOTSUPP;
1483	if (cpu_hotplug_disabled)
1484	return -EBUSY;
1485
1486	/*
1487	* Ensure that the control task does not run on the to be offlined
1488	* CPU to prevent a deadlock against cfs_b->period_timer.
1489	* Also keep at least one housekeeping cpu onlined to avoid generating
1490	* an empty sched_domain span.
1491	*/
1492	for_each_cpu_and(cpu, cpu_online_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)) {
1493	if (cpu != work.cpu)
1494	return work_on_cpu(cpu, __cpu_down_maps_locked, &work);
1495	}
1496	return -EBUSY;
1497	}
1498
1499	static int cpu_down(unsigned int cpu, enum cpuhp_state target)
1500	{
1501	int err;
1502
1503	cpu_maps_update_begin();
1504	err = cpu_down_maps_locked(cpu, target);
1505	cpu_maps_update_done();
1506	return err;
1507	}
1508
1509	/**
1510	* cpu_device_down - Bring down a cpu device
1511	* @dev: Pointer to the cpu device to offline
1512	*
1513	* This function is meant to be used by device core cpu subsystem only.
1514	*
1515	* Other subsystems should use remove_cpu() instead.
1516	*
1517	* Return: %0 on success or a negative errno code
1518	*/
1519	int cpu_device_down(struct device *dev)
1520	{
1521	return cpu_down(cpu: dev->id, target: CPUHP_OFFLINE);
1522	}
1523
1524	int remove_cpu(unsigned int cpu)
1525	{
1526	int ret;
1527
1528	lock_device_hotplug();
1529	ret = device_offline(dev: get_cpu_device(cpu));
1530	unlock_device_hotplug();
1531
1532	return ret;
1533	}
1534	EXPORT_SYMBOL_GPL(remove_cpu);
1535
1536	void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
1537	{
1538	unsigned int cpu;
1539	int error;
1540
1541	cpu_maps_update_begin();
1542
1543	/*
1544	* Make certain the cpu I'm about to reboot on is online.
1545	*
1546	* This is inline to what migrate_to_reboot_cpu() already do.
1547	*/
1548	if (!cpu_online(cpu: primary_cpu))
1549	primary_cpu = cpumask_first(cpu_online_mask);
1550
1551	for_each_online_cpu(cpu) {
1552	if (cpu == primary_cpu)
1553	continue;
1554
1555	error = cpu_down_maps_locked(cpu, target: CPUHP_OFFLINE);
1556	if (error) {
1557	pr_err("Failed to offline CPU%d - error=%d",
1558	cpu, error);
1559	break;
1560	}
1561	}
1562
1563	/*
1564	* Ensure all but the reboot CPU are offline.
1565	*/
1566	BUG_ON(num_online_cpus() > `1`);
1567
1568	/*
1569	* Make sure the CPUs won't be enabled by someone else after this
1570	* point. Kexec will reboot to a new kernel shortly resetting
1571	* everything along the way.
1572	*/
1573	cpu_hotplug_disabled++;
1574
1575	cpu_maps_update_done();
1576	}
1577
1578	#else
1579	#define takedown_cpu NULL
1580	#endif /CONFIG_HOTPLUG_CPU/
1581
1582	/**
1583	* notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1584	* @cpu: cpu that just started
1585	*
1586	* It must be called by the arch code on the new cpu, before the new cpu
1587	* enables interrupts and before the "boot" cpu returns from __cpu_up().
1588	*/
1589	void notify_cpu_starting(unsigned int cpu)
1590	{
1591	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1592	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1593
1594	rcutree_report_cpu_starting(cpu); / Enables RCU usage on this CPU. /
1595	cpumask_set_cpu(cpu, dstp: &cpus_booted_once_mask);
1596
1597	/*
1598	* STARTING must not fail!
1599	*/
1600	cpuhp_invoke_callback_range_nofail(bringup: true, cpu, st, target);
1601	}
1602
1603	/*
1604	* Called from the idle task. Wake up the controlling task which brings the
1605	* hotplug thread of the upcoming CPU up and then delegates the rest of the
1606	* online bringup to the hotplug thread.
1607	*/
1608	void cpuhp_online_idle(enum cpuhp_state state)
1609	{
1610	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1611
1612	/ Happens for the boot cpu /
1613	if (state != CPUHP_AP_ONLINE_IDLE)
1614	return;
1615
1616	cpuhp_ap_update_sync_state(state: SYNC_STATE_ONLINE);
1617
1618	/*
1619	* Unpark the stopper thread before we start the idle loop (and start
1620	* scheduling); this ensures the stopper task is always available.
1621	*/
1622	stop_machine_unpark(smp_processor_id());
1623
1624	st->state = CPUHP_AP_ONLINE_IDLE;
1625	complete_ap_thread(st, bringup: true);
1626	}
1627
1628	/ Requires cpu_add_remove_lock to be held /
1629	static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1630	{
1631	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1632	struct task_struct *idle;
1633	int ret = `0`;
1634
1635	cpus_write_lock();
1636
1637	if (!cpu_present(cpu)) {
1638	ret = -EINVAL;
1639	goto out;
1640	}
1641
1642	/*
1643	* The caller of cpu_up() might have raced with another
1644	* caller. Nothing to do.
1645	*/
1646	if (st->state >= target)
1647	goto out;
1648
1649	if (st->state == CPUHP_OFFLINE) {
1650	/ Let it fail before we try to bring the cpu up /
1651	idle = idle_thread_get(cpu);
1652	if (IS_ERR(ptr: idle)) {
1653	ret = PTR_ERR(ptr: idle);
1654	goto out;
1655	}
1656
1657	/*
1658	* Reset stale stack state from the last time this CPU was online.
1659	*/
1660	scs_task_reset(tsk: idle);
1661	kasan_unpoison_task_stack(task: idle);
1662	}
1663
1664	cpuhp_tasks_frozen = tasks_frozen;
1665
1666	cpuhp_set_state(cpu, st, target);
1667	/*
1668	* If the current CPU state is in the range of the AP hotplug thread,
1669	* then we need to kick the thread once more.
1670	*/
1671	if (st->state > CPUHP_BRINGUP_CPU) {
1672	ret = cpuhp_kick_ap_work(cpu);
1673	/*
1674	* The AP side has done the error rollback already. Just
1675	* return the error code..
1676	*/
1677	if (ret)
1678	goto out;
1679	}
1680
1681	/*
1682	* Try to reach the target state. We max out on the BP at
1683	* CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1684	* responsible for bringing it up to the target state.
1685	*/
1686	target = min((int)target, CPUHP_BRINGUP_CPU);
1687	ret = cpuhp_up_callbacks(cpu, st, target);
1688	out:
1689	cpus_write_unlock();
1690	arch_smt_update();
1691	return ret;
1692	}
1693
1694	static int cpu_up(unsigned int cpu, enum cpuhp_state target)
1695	{
1696	int err = `0`;
1697
1698	if (!cpu_possible(cpu)) {
1699	pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
1700	cpu);
1701	return -EINVAL;
1702	}
1703
1704	err = try_online_node(cpu_to_node(cpu));
1705	if (err)
1706	return err;
1707
1708	cpu_maps_update_begin();
1709
1710	if (cpu_hotplug_disabled) {
1711	err = -EBUSY;
1712	goto out;
1713	}
1714	if (!cpu_bootable(cpu)) {
1715	err = -EPERM;
1716	goto out;
1717	}
1718
1719	err = _cpu_up(cpu, tasks_frozen: `0`, target);
1720	out:
1721	cpu_maps_update_done();
1722	return err;
1723	}
1724
1725	/**
1726	* cpu_device_up - Bring up a cpu device
1727	* @dev: Pointer to the cpu device to online
1728	*
1729	* This function is meant to be used by device core cpu subsystem only.
1730	*
1731	* Other subsystems should use add_cpu() instead.
1732	*
1733	* Return: %0 on success or a negative errno code
1734	*/
1735	int cpu_device_up(struct device *dev)
1736	{
1737	return cpu_up(cpu: dev->id, target: CPUHP_ONLINE);
1738	}
1739
1740	int add_cpu(unsigned int cpu)
1741	{
1742	int ret;
1743
1744	lock_device_hotplug();
1745	ret = device_online(dev: get_cpu_device(cpu));
1746	unlock_device_hotplug();
1747
1748	return ret;
1749	}
1750	EXPORT_SYMBOL_GPL(add_cpu);
1751
1752	/**
1753	* bringup_hibernate_cpu - Bring up the CPU that we hibernated on
1754	* @sleep_cpu: The cpu we hibernated on and should be brought up.
1755	*
1756	* On some architectures like arm64, we can hibernate on any CPU, but on
1757	* wake up the CPU we hibernated on might be offline as a side effect of
1758	* using maxcpus= for example.
1759	*
1760	* Return: %0 on success or a negative errno code
1761	*/
1762	int bringup_hibernate_cpu(unsigned int sleep_cpu)
1763	{
1764	int ret;
1765
1766	if (!cpu_online(cpu: sleep_cpu)) {
1767	pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
1768	ret = cpu_up(cpu: sleep_cpu, target: CPUHP_ONLINE);
1769	if (ret) {
1770	pr_err("Failed to bring hibernate-CPU up!\n");
1771	return ret;
1772	}
1773	}
1774	return `0`;
1775	}
1776
1777	static void __init cpuhp_bringup_mask(const struct cpumask mask, unsigned* int ncpus,
1778	enum cpuhp_state target)
1779	{
1780	unsigned int cpu;
1781
1782	for_each_cpu(cpu, mask) {
1783	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1784
1785	if (cpu_up(cpu, target) && can_rollback_cpu(st)) {
1786	/*
1787	* If this failed then cpu_up() might have only
1788	* rolled back to CPUHP_BP_KICK_AP for the final
1789	* online. Clean it up. NOOP if already rolled back.
1790	*/
1791	WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE));
1792	}
1793
1794	if (!--ncpus)
1795	break;
1796	}
1797	}
1798
1799	#ifdef CONFIG_HOTPLUG_PARALLEL
1800	static bool __cpuhp_parallel_bringup __ro_after_init = true;
1801
1802	static int __init parallel_bringup_parse_param(char *arg)
1803	{
1804	return kstrtobool(s: arg, res: &__cpuhp_parallel_bringup);
1805	}
1806	early_param("cpuhp.parallel", parallel_bringup_parse_param);
1807
1808	#ifdef CONFIG_HOTPLUG_SMT
1809	static inline bool cpuhp_smt_aware(void)
1810	{
1811	return cpu_smt_max_threads > `1`;
1812	}
1813
1814	static inline const struct cpumask cpuhp_get_primary_thread_mask(void*)
1815	{
1816	return cpu_primary_thread_mask;
1817	}
1818	#else
1819	static inline bool cpuhp_smt_aware(void)
1820	{
1821	return false;
1822	}
1823	static inline const struct cpumask cpuhp_get_primary_thread_mask(void*)
1824	{
1825	return cpu_none_mask;
1826	}
1827	#endif
1828
1829	bool __weak arch_cpuhp_init_parallel_bringup(void)
1830	{
1831	return true;
1832	}
1833
1834	/*
1835	* On architectures which have enabled parallel bringup this invokes all BP
1836	* prepare states for each of the to be onlined APs first. The last state
1837	* sends the startup IPI to the APs. The APs proceed through the low level
1838	* bringup code in parallel and then wait for the control CPU to release
1839	* them one by one for the final onlining procedure.
1840	*
1841	* This avoids waiting for each AP to respond to the startup IPI in
1842	* CPUHP_BRINGUP_CPU.
1843	*/
1844	static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
1845	{
1846	const struct cpumask *mask = cpu_present_mask;
1847
1848	if (__cpuhp_parallel_bringup)
1849	__cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup();
1850	if (!__cpuhp_parallel_bringup)
1851	return false;
1852
1853	if (cpuhp_smt_aware()) {
1854	const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
1855	static struct cpumask tmp_mask __initdata;
1856
1857	/*
1858	* X86 requires to prevent that SMT siblings stopped while
1859	* the primary thread does a microcode update for various
1860	* reasons. Bring the primary threads up first.
1861	*/
1862	cpumask_and(dstp: &tmp_mask, src1p: mask, src2p: pmask);
1863	cpuhp_bringup_mask(mask: &tmp_mask, ncpus, target: CPUHP_BP_KICK_AP);
1864	cpuhp_bringup_mask(mask: &tmp_mask, ncpus, target: CPUHP_ONLINE);
1865	/ Account for the online CPUs /
1866	ncpus -= num_online_cpus();
1867	if (!ncpus)
1868	return true;
1869	/ Create the mask for secondary CPUs /
1870	cpumask_andnot(dstp: &tmp_mask, src1p: mask, src2p: pmask);
1871	mask = &tmp_mask;
1872	}
1873
1874	/ Bring the not-yet started CPUs up /
1875	cpuhp_bringup_mask(mask, ncpus, target: CPUHP_BP_KICK_AP);
1876	cpuhp_bringup_mask(mask, ncpus, target: CPUHP_ONLINE);
1877	return true;
1878	}
1879	#else
1880	static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; }
1881	#endif /* CONFIG_HOTPLUG_PARALLEL */
1882
1883	void __init bringup_nonboot_cpus(unsigned int max_cpus)
1884	{
1885	if (!max_cpus)
1886	return;
1887
1888	/ Try parallel bringup optimization if enabled /
1889	if (cpuhp_bringup_cpus_parallel(ncpus: max_cpus))
1890	return;
1891
1892	/ Full per CPU serialized bringup /
1893	cpuhp_bringup_mask(cpu_present_mask, ncpus: max_cpus, target: CPUHP_ONLINE);
1894	}
1895
1896	#ifdef CONFIG_PM_SLEEP_SMP
1897	static cpumask_var_t frozen_cpus;
1898
1899	int freeze_secondary_cpus(int primary)
1900	{
1901	int cpu, error = `0`;
1902
1903	cpu_maps_update_begin();
1904	if (primary == -`1`) {
1905	primary = cpumask_first(cpu_online_mask);
1906	if (!housekeeping_cpu(cpu: primary, type: HK_TYPE_TIMER))
1907	primary = housekeeping_any_cpu(type: HK_TYPE_TIMER);
1908	} else {
1909	if (!cpu_online(cpu: primary))
1910	primary = cpumask_first(cpu_online_mask);
1911	}
1912
1913	/*
1914	* We take down all of the non-boot CPUs in one shot to avoid races
1915	* with the userspace trying to use the CPU hotplug at the same time
1916	*/
1917	cpumask_clear(dstp: frozen_cpus);
1918
1919	pr_info("Disabling non-boot CPUs ...\n");
1920	for (cpu = nr_cpu_ids - `1`; cpu >= `0`; cpu--) {
1921	if (!cpu_online(cpu) \|\| cpu == primary)
1922	continue;
1923
1924	if (pm_wakeup_pending()) {
1925	pr_info("Wakeup pending. Abort CPU freeze\n");
1926	error = -EBUSY;
1927	break;
1928	}
1929
1930	trace_suspend_resume(TPS("CPU_OFF"), val: cpu, start: true);
1931	error = _cpu_down(cpu, tasks_frozen: `1`, target: CPUHP_OFFLINE);
1932	trace_suspend_resume(TPS("CPU_OFF"), val: cpu, start: false);
1933	if (!error)
1934	cpumask_set_cpu(cpu, dstp: frozen_cpus);
1935	else {
1936	pr_err("Error taking CPU%d down: %d\n", cpu, error);
1937	break;
1938	}
1939	}
1940
1941	if (!error)
1942	BUG_ON(num_online_cpus() > `1`);
1943	else
1944	pr_err("Non-boot CPUs are not disabled\n");
1945
1946	/*
1947	* Make sure the CPUs won't be enabled by someone else. We need to do
1948	* this even in case of failure as all freeze_secondary_cpus() users are
1949	* supposed to do thaw_secondary_cpus() on the failure path.
1950	*/
1951	cpu_hotplug_disabled++;
1952
1953	cpu_maps_update_done();
1954	return error;
1955	}
1956
1957	void __weak arch_thaw_secondary_cpus_begin(void)
1958	{
1959	}
1960
1961	void __weak arch_thaw_secondary_cpus_end(void)
1962	{
1963	}
1964
1965	void thaw_secondary_cpus(void)
1966	{
1967	int cpu, error;
1968
1969	/ Allow everyone to use the CPU hotplug again /
1970	cpu_maps_update_begin();
1971	__cpu_hotplug_enable();
1972	if (cpumask_empty(srcp: frozen_cpus))
1973	goto out;
1974
1975	pr_info("Enabling non-boot CPUs ...\n");
1976
1977	arch_thaw_secondary_cpus_begin();
1978
1979	for_each_cpu(cpu, frozen_cpus) {
1980	trace_suspend_resume(TPS("CPU_ON"), val: cpu, start: true);
1981	error = _cpu_up(cpu, tasks_frozen: `1`, target: CPUHP_ONLINE);
1982	trace_suspend_resume(TPS("CPU_ON"), val: cpu, start: false);
1983	if (!error) {
1984	pr_info("CPU%d is up\n", cpu);
1985	continue;
1986	}
1987	pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1988	}
1989
1990	arch_thaw_secondary_cpus_end();
1991
1992	cpumask_clear(dstp: frozen_cpus);
1993	out:
1994	cpu_maps_update_done();
1995	}
1996
1997	static int __init alloc_frozen_cpus(void)
1998	{
1999	if (!alloc_cpumask_var(mask: &frozen_cpus, GFP_KERNEL\|__GFP_ZERO))
2000	return -ENOMEM;
2001	return `0`;
2002	}
2003	core_initcall(alloc_frozen_cpus);
2004
2005	/*
2006	* When callbacks for CPU hotplug notifications are being executed, we must
2007	* ensure that the state of the system with respect to the tasks being frozen
2008	* or not, as reported by the notification, remains unchanged *throughout the
2009	* duration* of the execution of the callbacks.
2010	* Hence we need to prevent the freezer from racing with regular CPU hotplug.
2011	*
2012	* This synchronization is implemented by mutually excluding regular CPU
2013	* hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
2014	* Hibernate notifications.
2015	*/
2016	static int
2017	cpu_hotplug_pm_callback(struct notifier_block *nb,
2018	unsigned long action, void *ptr)
2019	{
2020	switch (action) {
2021
2022	case PM_SUSPEND_PREPARE:
2023	case PM_HIBERNATION_PREPARE:
2024	cpu_hotplug_disable();
2025	break;
2026
2027	case PM_POST_SUSPEND:
2028	case PM_POST_HIBERNATION:
2029	cpu_hotplug_enable();
2030	break;
2031
2032	default:
2033	return NOTIFY_DONE;
2034	}
2035
2036	return NOTIFY_OK;
2037	}
2038
2039
2040	static int __init cpu_hotplug_pm_sync_init(void)
2041	{
2042	/*
2043	* cpu_hotplug_pm_callback has higher priority than x86
2044	* bsp_pm_callback which depends on cpu_hotplug_pm_callback
2045	* to disable cpu hotplug to avoid cpu hotplug race.
2046	*/
2047	pm_notifier(cpu_hotplug_pm_callback, `0`);
2048	return `0`;
2049	}
2050	core_initcall(cpu_hotplug_pm_sync_init);
2051
2052	#endif /* CONFIG_PM_SLEEP_SMP */
2053
2054	int __boot_cpu_id;
2055
2056	#endif /* CONFIG_SMP */
2057
2058	/ Boot processor state steps /
2059	static struct cpuhp_step cpuhp_hp_states[] = {
2060	[CPUHP_OFFLINE] = {
2061	.name = "offline",
2062	.startup.single = NULL,
2063	.teardown.single = NULL,
2064	},
2065	#ifdef CONFIG_SMP
2066	[CPUHP_CREATE_THREADS]= {
2067	.name = "threads:prepare",
2068	.startup.single = smpboot_create_threads,
2069	.teardown.single = NULL,
2070	.cant_stop = true,
2071	},
2072	[CPUHP_RANDOM_PREPARE] = {
2073	.name = "random:prepare",
2074	.startup.single = random_prepare_cpu,
2075	.teardown.single = NULL,
2076	},
2077	[CPUHP_WORKQUEUE_PREP] = {
2078	.name = "workqueue:prepare",
2079	.startup.single = workqueue_prepare_cpu,
2080	.teardown.single = NULL,
2081	},
2082	[CPUHP_HRTIMERS_PREPARE] = {
2083	.name = "hrtimers:prepare",
2084	.startup.single = hrtimers_prepare_cpu,
2085	.teardown.single = NULL,
2086	},
2087	[CPUHP_SMPCFD_PREPARE] = {
2088	.name = "smpcfd:prepare",
2089	.startup.single = smpcfd_prepare_cpu,
2090	.teardown.single = smpcfd_dead_cpu,
2091	},
2092	[CPUHP_RELAY_PREPARE] = {
2093	.name = "relay:prepare",
2094	.startup.single = relay_prepare_cpu,
2095	.teardown.single = NULL,
2096	},
2097	[CPUHP_RCUTREE_PREP] = {
2098	.name = "RCU/tree:prepare",
2099	.startup.single = rcutree_prepare_cpu,
2100	.teardown.single = rcutree_dead_cpu,
2101	},
2102	/*
2103	* On the tear-down path, timers_dead_cpu() must be invoked
2104	* before blk_mq_queue_reinit_notify() from notify_dead(),
2105	* otherwise a RCU stall occurs.
2106	*/
2107	[CPUHP_TIMERS_PREPARE] = {
2108	.name = "timers:prepare",
2109	.startup.single = timers_prepare_cpu,
2110	.teardown.single = timers_dead_cpu,
2111	},
2112
2113	#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
2114	/*
2115	* Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until
2116	* the next step will release it.
2117	*/
2118	[CPUHP_BP_KICK_AP] = {
2119	.name = "cpu:kick_ap",
2120	.startup.single = cpuhp_kick_ap_alive,
2121	},
2122
2123	/*
2124	* Waits for the AP to reach cpuhp_ap_sync_alive() and then
2125	* releases it for the complete bringup.
2126	*/
2127	[CPUHP_BRINGUP_CPU] = {
2128	.name = "cpu:bringup",
2129	.startup.single = cpuhp_bringup_ap,
2130	.teardown.single = finish_cpu,
2131	.cant_stop = true,
2132	},
2133	#else
2134	/*
2135	* All-in-one CPU bringup state which includes the kick alive.
2136	*/
2137	[CPUHP_BRINGUP_CPU] = {
2138	.name = "cpu:bringup",
2139	.startup.single = bringup_cpu,
2140	.teardown.single = finish_cpu,
2141	.cant_stop = true,
2142	},
2143	#endif
2144	/ Final state before CPU kills itself /
2145	[CPUHP_AP_IDLE_DEAD] = {
2146	.name = "idle:dead",
2147	},
2148	/*
2149	* Last state before CPU enters the idle loop to die. Transient state
2150	* for synchronization.
2151	*/
2152	[CPUHP_AP_OFFLINE] = {
2153	.name = "ap:offline",
2154	.cant_stop = true,
2155	},
2156	/ First state is scheduler control. Interrupts are disabled /
2157	[CPUHP_AP_SCHED_STARTING] = {
2158	.name = "sched:starting",
2159	.startup.single = sched_cpu_starting,
2160	.teardown.single = sched_cpu_dying,
2161	},
2162	[CPUHP_AP_RCUTREE_DYING] = {
2163	.name = "RCU/tree:dying",
2164	.startup.single = NULL,
2165	.teardown.single = rcutree_dying_cpu,
2166	},
2167	[CPUHP_AP_SMPCFD_DYING] = {
2168	.name = "smpcfd:dying",
2169	.startup.single = NULL,
2170	.teardown.single = smpcfd_dying_cpu,
2171	},
2172	[CPUHP_AP_HRTIMERS_DYING] = {
2173	.name = "hrtimers:dying",
2174	.startup.single = hrtimers_cpu_starting,
2175	.teardown.single = hrtimers_cpu_dying,
2176	},
2177	[CPUHP_AP_TICK_DYING] = {
2178	.name = "tick:dying",
2179	.startup.single = NULL,
2180	.teardown.single = tick_cpu_dying,
2181	},
2182	/ Entry state on starting. Interrupts enabled from here on. Transient*
2183	* state for synchronsization */
2184	[CPUHP_AP_ONLINE] = {
2185	.name = "ap:online",
2186	},
2187	/*
2188	* Handled on control processor until the plugged processor manages
2189	* this itself.
2190	*/
2191	[CPUHP_TEARDOWN_CPU] = {
2192	.name = "cpu:teardown",
2193	.startup.single = NULL,
2194	.teardown.single = takedown_cpu,
2195	.cant_stop = true,
2196	},
2197
2198	[CPUHP_AP_SCHED_WAIT_EMPTY] = {
2199	.name = "sched:waitempty",
2200	.startup.single = NULL,
2201	.teardown.single = sched_cpu_wait_empty,
2202	},
2203
2204	/ Handle smpboot threads park/unpark /
2205	[CPUHP_AP_SMPBOOT_THREADS] = {
2206	.name = "smpboot/threads:online",
2207	.startup.single = smpboot_unpark_threads,
2208	.teardown.single = smpboot_park_threads,
2209	},
2210	[CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
2211	.name = "irq/affinity:online",
2212	.startup.single = irq_affinity_online_cpu,
2213	.teardown.single = NULL,
2214	},
2215	[CPUHP_AP_PERF_ONLINE] = {
2216	.name = "perf:online",
2217	.startup.single = perf_event_init_cpu,
2218	.teardown.single = perf_event_exit_cpu,
2219	},
2220	[CPUHP_AP_WATCHDOG_ONLINE] = {
2221	.name = "lockup_detector:online",
2222	.startup.single = lockup_detector_online_cpu,
2223	.teardown.single = lockup_detector_offline_cpu,
2224	},
2225	[CPUHP_AP_WORKQUEUE_ONLINE] = {
2226	.name = "workqueue:online",
2227	.startup.single = workqueue_online_cpu,
2228	.teardown.single = workqueue_offline_cpu,
2229	},
2230	[CPUHP_AP_RANDOM_ONLINE] = {
2231	.name = "random:online",
2232	.startup.single = random_online_cpu,
2233	.teardown.single = NULL,
2234	},
2235	[CPUHP_AP_RCUTREE_ONLINE] = {
2236	.name = "RCU/tree:online",
2237	.startup.single = rcutree_online_cpu,
2238	.teardown.single = rcutree_offline_cpu,
2239	},
2240	#endif
2241	/*
2242	* The dynamically registered state space is here
2243	*/
2244
2245	#ifdef CONFIG_SMP
2246	/ Last state is scheduler control setting the cpu active /
2247	[CPUHP_AP_ACTIVE] = {
2248	.name = "sched:active",
2249	.startup.single = sched_cpu_activate,
2250	.teardown.single = sched_cpu_deactivate,
2251	},
2252	#endif
2253
2254	/ CPU is fully up and running. /
2255	[CPUHP_ONLINE] = {
2256	.name = "online",
2257	.startup.single = NULL,
2258	.teardown.single = NULL,
2259	},
2260	};
2261
2262	/ Sanity check for callbacks /
2263	static int cpuhp_cb_check(enum cpuhp_state state)
2264	{
2265	if (state <= CPUHP_OFFLINE \|\| state >= CPUHP_ONLINE)
2266	return -EINVAL;
2267	return `0`;
2268	}
2269
2270	/*
2271	* Returns a free for dynamic slot assignment of the Online state. The states
2272	* are protected by the cpuhp_slot_states mutex and an empty slot is identified
2273	* by having no name assigned.
2274	*/
2275	static int cpuhp_reserve_state(enum cpuhp_state state)
2276	{
2277	enum cpuhp_state i, end;
2278	struct cpuhp_step *step;
2279
2280	switch (state) {
2281	case CPUHP_AP_ONLINE_DYN:
2282	step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
2283	end = CPUHP_AP_ONLINE_DYN_END;
2284	break;
2285	case CPUHP_BP_PREPARE_DYN:
2286	step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
2287	end = CPUHP_BP_PREPARE_DYN_END;
2288	break;
2289	default:
2290	return -EINVAL;
2291	}
2292
2293	for (i = state; i <= end; i++, step++) {
2294	if (!step->name)
2295	return i;
2296	}
2297	WARN(`1`, "No more dynamic states available for CPU hotplug\n");
2298	return -ENOSPC;
2299	}
2300
2301	static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
2302	int (startup)(unsigned* int cpu),
2303	int (teardown)(unsigned* int cpu),
2304	bool multi_instance)
2305	{
2306	/ (Un)Install the callbacks for further cpu hotplug operations /
2307	struct cpuhp_step *sp;
2308	int ret = `0`;
2309
2310	/*
2311	* If name is NULL, then the state gets removed.
2312	*
2313	* CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
2314	* the first allocation from these dynamic ranges, so the removal
2315	* would trigger a new allocation and clear the wrong (already
2316	* empty) state, leaving the callbacks of the to be cleared state
2317	* dangling, which causes wreckage on the next hotplug operation.
2318	*/
2319	if (name && (state == CPUHP_AP_ONLINE_DYN \|\|
2320	state == CPUHP_BP_PREPARE_DYN)) {
2321	ret = cpuhp_reserve_state(state);
2322	if (ret < `0`)
2323	return ret;
2324	state = ret;
2325	}
2326	sp = cpuhp_get_step(state);
2327	if (name && sp->name)
2328	return -EBUSY;
2329
2330	sp->startup.single = startup;
2331	sp->teardown.single = teardown;
2332	sp->name = name;
2333	sp->multi_instance = multi_instance;
2334	INIT_HLIST_HEAD(&sp->list);
2335	return ret;
2336	}
2337
2338	static void cpuhp_get_teardown_cb(enum* cpuhp_state state)
2339	{
2340	return cpuhp_get_step(state)->teardown.single;
2341	}
2342
2343	/*
2344	* Call the startup/teardown function for a step either on the AP or
2345	* on the current CPU.
2346	*/
2347	static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
2348	struct hlist_node *node)
2349	{
2350	struct cpuhp_step *sp = cpuhp_get_step(state);
2351	int ret;
2352
2353	/*
2354	* If there's nothing to do, we done.
2355	* Relies on the union for multi_instance.
2356	*/
2357	if (cpuhp_step_empty(bringup, step: sp))
2358	return `0`;
2359	/*
2360	* The non AP bound callbacks can fail on bringup. On teardown
2361	* e.g. module removal we crash for now.
2362	*/
2363	#ifdef CONFIG_SMP
2364	if (cpuhp_is_ap_state(state))
2365	ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
2366	else
2367	ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
2368	#else
2369	ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
2370	#endif
2371	BUG_ON(ret && !bringup);
2372	return ret;
2373	}
2374
2375	/*
2376	* Called from __cpuhp_setup_state on a recoverable failure.
2377	*
2378	* Note: The teardown callbacks for rollback are not allowed to fail!
2379	*/
2380	static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
2381	struct hlist_node *node)
2382	{
2383	int cpu;
2384
2385	/ Roll back the already executed steps on the other cpus /
2386	for_each_present_cpu(cpu) {
2387	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2388	int cpustate = st->state;
2389
2390	if (cpu >= failedcpu)
2391	break;
2392
2393	/ Did we invoke the startup call on that cpu ? /
2394	if (cpustate >= state)
2395	cpuhp_issue_call(cpu, state, bringup: false, node);
2396	}
2397	}
2398
2399	int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
2400	struct hlist_node *node,
2401	bool invoke)
2402	{
2403	struct cpuhp_step *sp;
2404	int cpu;
2405	int ret;
2406
2407	lockdep_assert_cpus_held();
2408
2409	sp = cpuhp_get_step(state);
2410	if (sp->multi_instance == false)
2411	return -EINVAL;
2412
2413	mutex_lock(&cpuhp_state_mutex);
2414
2415	if (!invoke \|\| !sp->startup.multi)
2416	goto add_node;
2417
2418	/*
2419	* Try to call the startup callback for each present cpu
2420	* depending on the hotplug state of the cpu.
2421	*/
2422	for_each_present_cpu(cpu) {
2423	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2424	int cpustate = st->state;
2425
2426	if (cpustate < state)
2427	continue;
2428
2429	ret = cpuhp_issue_call(cpu, state, bringup: true, node);
2430	if (ret) {
2431	if (sp->teardown.multi)
2432	cpuhp_rollback_install(failedcpu: cpu, state, node);
2433	goto unlock;
2434	}
2435	}
2436	add_node:
2437	ret = `0`;
2438	hlist_add_head(n: node, h: &sp->list);
2439	unlock:
2440	mutex_unlock(lock: &cpuhp_state_mutex);
2441	return ret;
2442	}
2443
2444	int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
2445	bool invoke)
2446	{
2447	int ret;
2448
2449	cpus_read_lock();
2450	ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
2451	cpus_read_unlock();
2452	return ret;
2453	}
2454	EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
2455
2456	/**
2457	* __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
2458	* @state: The state to setup
2459	* @name: Name of the step
2460	* @invoke: If true, the startup function is invoked for cpus where
2461	* cpu state >= @state
2462	* @startup: startup callback function
2463	* @teardown: teardown callback function
2464	* @multi_instance: State is set up for multiple instances which get
2465	* added afterwards.
2466	*
2467	* The caller needs to hold cpus read locked while calling this function.
2468	* Return:
2469	* On success:
2470	* Positive state number if @state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN;
2471	* 0 for all other states
2472	* On failure: proper (negative) error code
2473	*/
2474	int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
2475	const char *name, bool invoke,
2476	int (startup)(unsigned* int cpu),
2477	int (teardown)(unsigned* int cpu),
2478	bool multi_instance)
2479	{
2480	int cpu, ret = `0`;
2481	bool dynstate;
2482
2483	lockdep_assert_cpus_held();
2484
2485	if (cpuhp_cb_check(state) \|\| !name)
2486	return -EINVAL;
2487
2488	mutex_lock(&cpuhp_state_mutex);
2489
2490	ret = cpuhp_store_callbacks(state, name, startup, teardown,
2491	multi_instance);
2492
2493	dynstate = state == CPUHP_AP_ONLINE_DYN \|\| state == CPUHP_BP_PREPARE_DYN;
2494	if (ret > `0` && dynstate) {
2495	state = ret;
2496	ret = `0`;
2497	}
2498
2499	if (ret \|\| !invoke \|\| !startup)
2500	goto out;
2501
2502	/*
2503	* Try to call the startup callback for each present cpu
2504	* depending on the hotplug state of the cpu.
2505	*/
2506	for_each_present_cpu(cpu) {
2507	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2508	int cpustate = st->state;
2509
2510	if (cpustate < state)
2511	continue;
2512
2513	ret = cpuhp_issue_call(cpu, state, bringup: true, NULL);
2514	if (ret) {
2515	if (teardown)
2516	cpuhp_rollback_install(failedcpu: cpu, state, NULL);
2517	cpuhp_store_callbacks(state, NULL, NULL, NULL, multi_instance: false);
2518	goto out;
2519	}
2520	}
2521	out:
2522	mutex_unlock(lock: &cpuhp_state_mutex);
2523	/*
2524	* If the requested state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN,
2525	* return the dynamically allocated state in case of success.
2526	*/
2527	if (!ret && dynstate)
2528	return state;
2529	return ret;
2530	}
2531	EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
2532
2533	int __cpuhp_setup_state(enum cpuhp_state state,
2534	const char *name, bool invoke,
2535	int (startup)(unsigned* int cpu),
2536	int (teardown)(unsigned* int cpu),
2537	bool multi_instance)
2538	{
2539	int ret;
2540
2541	cpus_read_lock();
2542	ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
2543	teardown, multi_instance);
2544	cpus_read_unlock();
2545	return ret;
2546	}
2547	EXPORT_SYMBOL(__cpuhp_setup_state);
2548
2549	int __cpuhp_state_remove_instance(enum cpuhp_state state,
2550	struct hlist_node *node, bool invoke)
2551	{
2552	struct cpuhp_step *sp = cpuhp_get_step(state);
2553	int cpu;
2554
2555	BUG_ON(cpuhp_cb_check(state));
2556
2557	if (!sp->multi_instance)
2558	return -EINVAL;
2559
2560	cpus_read_lock();
2561	mutex_lock(&cpuhp_state_mutex);
2562
2563	if (!invoke \|\| !cpuhp_get_teardown_cb(state))
2564	goto remove;
2565	/*
2566	* Call the teardown callback for each present cpu depending
2567	* on the hotplug state of the cpu. This function is not
2568	* allowed to fail currently!
2569	*/
2570	for_each_present_cpu(cpu) {
2571	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2572	int cpustate = st->state;
2573
2574	if (cpustate >= state)
2575	cpuhp_issue_call(cpu, state, bringup: false, node);
2576	}
2577
2578	remove:
2579	hlist_del(n: node);
2580	mutex_unlock(lock: &cpuhp_state_mutex);
2581	cpus_read_unlock();
2582
2583	return `0`;
2584	}
2585	EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
2586
2587	/**
2588	* __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
2589	* @state: The state to remove
2590	* @invoke: If true, the teardown function is invoked for cpus where
2591	* cpu state >= @state
2592	*
2593	* The caller needs to hold cpus read locked while calling this function.
2594	* The teardown callback is currently not allowed to fail. Think
2595	* about module removal!
2596	*/
2597	void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
2598	{
2599	struct cpuhp_step *sp = cpuhp_get_step(state);
2600	int cpu;
2601
2602	BUG_ON(cpuhp_cb_check(state));
2603
2604	lockdep_assert_cpus_held();
2605
2606	mutex_lock(&cpuhp_state_mutex);
2607	if (sp->multi_instance) {
2608	WARN(!hlist_empty(&sp->list),
2609	"Error: Removing state %d which has instances left.\n",
2610	state);
2611	goto remove;
2612	}
2613
2614	if (!invoke \|\| !cpuhp_get_teardown_cb(state))
2615	goto remove;
2616
2617	/*
2618	* Call the teardown callback for each present cpu depending
2619	* on the hotplug state of the cpu. This function is not
2620	* allowed to fail currently!
2621	*/
2622	for_each_present_cpu(cpu) {
2623	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2624	int cpustate = st->state;
2625
2626	if (cpustate >= state)
2627	cpuhp_issue_call(cpu, state, bringup: false, NULL);
2628	}
2629	remove:
2630	cpuhp_store_callbacks(state, NULL, NULL, NULL, multi_instance: false);
2631	mutex_unlock(lock: &cpuhp_state_mutex);
2632	}
2633	EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
2634
2635	void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
2636	{
2637	cpus_read_lock();
2638	__cpuhp_remove_state_cpuslocked(state, invoke);
2639	cpus_read_unlock();
2640	}
2641	EXPORT_SYMBOL(__cpuhp_remove_state);
2642
2643	#ifdef CONFIG_HOTPLUG_SMT
2644	static void cpuhp_offline_cpu_device(unsigned int cpu)
2645	{
2646	struct device *dev = get_cpu_device(cpu);
2647
2648	dev->offline = true;
2649	/ Tell user space about the state change /
2650	kobject_uevent(kobj: &dev->kobj, action: KOBJ_OFFLINE);
2651	}
2652
2653	static void cpuhp_online_cpu_device(unsigned int cpu)
2654	{
2655	struct device *dev = get_cpu_device(cpu);
2656
2657	dev->offline = false;
2658	/ Tell user space about the state change /
2659	kobject_uevent(kobj: &dev->kobj, action: KOBJ_ONLINE);
2660	}
2661
2662	int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2663	{
2664	int cpu, ret = `0`;
2665
2666	cpu_maps_update_begin();
2667	for_each_online_cpu(cpu) {
2668	if (topology_is_primary_thread(cpu))
2669	continue;
2670	/*
2671	* Disable can be called with CPU_SMT_ENABLED when changing
2672	* from a higher to lower number of SMT threads per core.
2673	*/
2674	if (ctrlval == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
2675	continue;
2676	ret = cpu_down_maps_locked(cpu, target: CPUHP_OFFLINE);
2677	if (ret)
2678	break;
2679	/*
2680	* As this needs to hold the cpu maps lock it's impossible
2681	* to call device_offline() because that ends up calling
2682	* cpu_down() which takes cpu maps lock. cpu maps lock
2683	* needs to be held as this might race against in kernel
2684	* abusers of the hotplug machinery (thermal management).
2685	*
2686	* So nothing would update device:offline state. That would
2687	* leave the sysfs entry stale and prevent onlining after
2688	* smt control has been changed to 'off' again. This is
2689	* called under the sysfs hotplug lock, so it is properly
2690	* serialized against the regular offline usage.
2691	*/
2692	cpuhp_offline_cpu_device(cpu);
2693	}
2694	if (!ret)
2695	cpu_smt_control = ctrlval;
2696	cpu_maps_update_done();
2697	return ret;
2698	}
2699
2700	/ Check if the core a CPU belongs to is online /
2701	#if !defined(topology_is_core_online)
2702	static inline bool topology_is_core_online(unsigned int cpu)
2703	{
2704	return true;
2705	}
2706	#endif
2707
2708	int cpuhp_smt_enable(void)
2709	{
2710	int cpu, ret = `0`;
2711
2712	cpu_maps_update_begin();
2713	cpu_smt_control = CPU_SMT_ENABLED;
2714	for_each_present_cpu(cpu) {
2715	/ Skip online CPUs and CPUs on offline nodes /
2716	if (cpu_online(cpu) \|\| !node_online(cpu_to_node(cpu)))
2717	continue;
2718	if (!cpu_smt_thread_allowed(cpu) \|\| !topology_is_core_online(cpu))
2719	continue;
2720	ret = _cpu_up(cpu, tasks_frozen: `0`, target: CPUHP_ONLINE);
2721	if (ret)
2722	break;
2723	/ See comment in cpuhp_smt_disable() /
2724	cpuhp_online_cpu_device(cpu);
2725	}
2726	cpu_maps_update_done();
2727	return ret;
2728	}
2729	#endif
2730
2731	#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
2732	static ssize_t state_show(struct device *dev,
2733	struct device_attribute attr, char* *buf)
2734	{
2735	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2736
2737	return sprintf(buf, fmt: "%d\n", st->state);
2738	}
2739	static DEVICE_ATTR_RO(state);
2740
2741	static ssize_t target_store(struct device dev, struct* device_attribute *attr,
2742	const char *buf, size_t count)
2743	{
2744	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2745	struct cpuhp_step *sp;
2746	int target, ret;
2747
2748	ret = kstrtoint(s: buf, base: `10`, res: &target);
2749	if (ret)
2750	return ret;
2751
2752	#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
2753	if (target < CPUHP_OFFLINE \|\| target > CPUHP_ONLINE)
2754	return -EINVAL;
2755	#else
2756	if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
2757	return -EINVAL;
2758	#endif
2759
2760	ret = lock_device_hotplug_sysfs();
2761	if (ret)
2762	return ret;
2763
2764	mutex_lock(&cpuhp_state_mutex);
2765	sp = cpuhp_get_step(state: target);
2766	ret = !sp->name \|\| sp->cant_stop ? -EINVAL : `0`;
2767	mutex_unlock(lock: &cpuhp_state_mutex);
2768	if (ret)
2769	goto out;
2770
2771	if (st->state < target)
2772	ret = cpu_up(cpu: dev->id, target);
2773	else if (st->state > target)
2774	ret = cpu_down(cpu: dev->id, target);
2775	else if (WARN_ON(st->target != target))
2776	st->target = target;
2777	out:
2778	unlock_device_hotplug();
2779	return ret ? ret : count;
2780	}
2781
2782	static ssize_t target_show(struct device *dev,
2783	struct device_attribute attr, char* *buf)
2784	{
2785	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2786
2787	return sprintf(buf, fmt: "%d\n", st->target);
2788	}
2789	static DEVICE_ATTR_RW(target);
2790
2791	static ssize_t fail_store(struct device dev, struct* device_attribute *attr,
2792	const char *buf, size_t count)
2793	{
2794	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2795	struct cpuhp_step *sp;
2796	int fail, ret;
2797
2798	ret = kstrtoint(s: buf, base: `10`, res: &fail);
2799	if (ret)
2800	return ret;
2801
2802	if (fail == CPUHP_INVALID) {
2803	st->fail = fail;
2804	return count;
2805	}
2806
2807	if (fail < CPUHP_OFFLINE \|\| fail > CPUHP_ONLINE)
2808	return -EINVAL;
2809
2810	/*
2811	* Cannot fail STARTING/DYING callbacks.
2812	*/
2813	if (cpuhp_is_atomic_state(state: fail))
2814	return -EINVAL;
2815
2816	/*
2817	* DEAD callbacks cannot fail...
2818	* ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter
2819	* triggering STARTING callbacks, a failure in this state would
2820	* hinder rollback.
2821	*/
2822	if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU)
2823	return -EINVAL;
2824
2825	/*
2826	* Cannot fail anything that doesn't have callbacks.
2827	*/
2828	mutex_lock(&cpuhp_state_mutex);
2829	sp = cpuhp_get_step(state: fail);
2830	if (!sp->startup.single && !sp->teardown.single)
2831	ret = -EINVAL;
2832	mutex_unlock(lock: &cpuhp_state_mutex);
2833	if (ret)
2834	return ret;
2835
2836	st->fail = fail;
2837
2838	return count;
2839	}
2840
2841	static ssize_t fail_show(struct device *dev,
2842	struct device_attribute attr, char* *buf)
2843	{
2844	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2845
2846	return sprintf(buf, fmt: "%d\n", st->fail);
2847	}
2848
2849	static DEVICE_ATTR_RW(fail);
2850
2851	static struct attribute *cpuhp_cpu_attrs[] = {
2852	&dev_attr_state.attr,
2853	&dev_attr_target.attr,
2854	&dev_attr_fail.attr,
2855	NULL
2856	};
2857
2858	static const struct attribute_group cpuhp_cpu_attr_group = {
2859	.attrs = cpuhp_cpu_attrs,
2860	.name = "hotplug",
2861	};
2862
2863	static ssize_t states_show(struct device *dev,
2864	struct device_attribute attr, char* *buf)
2865	{
2866	ssize_t cur, res = `0`;
2867	int i;
2868
2869	mutex_lock(&cpuhp_state_mutex);
2870	for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
2871	struct cpuhp_step *sp = cpuhp_get_step(state: i);
2872
2873	if (sp->name) {
2874	cur = sprintf(buf, fmt: "%3d: %s\n", i, sp->name);
2875	buf += cur;
2876	res += cur;
2877	}
2878	}
2879	mutex_unlock(lock: &cpuhp_state_mutex);
2880	return res;
2881	}
2882	static DEVICE_ATTR_RO(states);
2883
2884	static struct attribute *cpuhp_cpu_root_attrs[] = {
2885	&dev_attr_states.attr,
2886	NULL
2887	};
2888
2889	static const struct attribute_group cpuhp_cpu_root_attr_group = {
2890	.attrs = cpuhp_cpu_root_attrs,
2891	.name = "hotplug",
2892	};
2893
2894	#ifdef CONFIG_HOTPLUG_SMT
2895
2896	static bool cpu_smt_num_threads_valid(unsigned int threads)
2897	{
2898	if (IS_ENABLED(CONFIG_SMT_NUM_THREADS_DYNAMIC))
2899	return threads >= `1` && threads <= cpu_smt_max_threads;
2900	return threads == `1` \|\| threads == cpu_smt_max_threads;
2901	}
2902
2903	static ssize_t
2904	__store_smt_control(struct device dev, struct* device_attribute *attr,
2905	const char *buf, size_t count)
2906	{
2907	int ctrlval, ret, num_threads, orig_threads;
2908	bool force_off;
2909
2910	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
2911	return -EPERM;
2912
2913	if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
2914	return -ENODEV;
2915
2916	if (sysfs_streq(s1: buf, s2: "on")) {
2917	ctrlval = CPU_SMT_ENABLED;
2918	num_threads = cpu_smt_max_threads;
2919	} else if (sysfs_streq(s1: buf, s2: "off")) {
2920	ctrlval = CPU_SMT_DISABLED;
2921	num_threads = `1`;
2922	} else if (sysfs_streq(s1: buf, s2: "forceoff")) {
2923	ctrlval = CPU_SMT_FORCE_DISABLED;
2924	num_threads = `1`;
2925	} else if (kstrtoint(s: buf, base: `10`, res: &num_threads) == `0`) {
2926	if (num_threads == `1`)
2927	ctrlval = CPU_SMT_DISABLED;
2928	else if (cpu_smt_num_threads_valid(threads: num_threads))
2929	ctrlval = CPU_SMT_ENABLED;
2930	else
2931	return -EINVAL;
2932	} else {
2933	return -EINVAL;
2934	}
2935
2936	ret = lock_device_hotplug_sysfs();
2937	if (ret)
2938	return ret;
2939
2940	orig_threads = cpu_smt_num_threads;
2941	cpu_smt_num_threads = num_threads;
2942
2943	force_off = ctrlval != cpu_smt_control && ctrlval == CPU_SMT_FORCE_DISABLED;
2944
2945	if (num_threads > orig_threads)
2946	ret = cpuhp_smt_enable();
2947	else if (num_threads < orig_threads \|\| force_off)
2948	ret = cpuhp_smt_disable(ctrlval);
2949
2950	unlock_device_hotplug();
2951	return ret ? ret : count;
2952	}
2953
2954	#else /* !CONFIG_HOTPLUG_SMT */
2955	static ssize_t
2956	__store_smt_control(struct device dev, struct* device_attribute *attr,
2957	const char *buf, size_t count)
2958	{
2959	return -ENODEV;
2960	}
2961	#endif /* CONFIG_HOTPLUG_SMT */
2962
2963	static const char *smt_states[] = {
2964	[CPU_SMT_ENABLED] = "on",
2965	[CPU_SMT_DISABLED] = "off",
2966	[CPU_SMT_FORCE_DISABLED] = "forceoff",
2967	[CPU_SMT_NOT_SUPPORTED] = "notsupported",
2968	[CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
2969	};
2970
2971	static ssize_t control_show(struct device *dev,
2972	struct device_attribute attr, char* *buf)
2973	{
2974	const char *state = smt_states[cpu_smt_control];
2975
2976	#ifdef CONFIG_HOTPLUG_SMT
2977	/*
2978	* If SMT is enabled but not all threads are enabled then show the
2979	* number of threads. If all threads are enabled show "on". Otherwise
2980	* show the state name.
2981	*/
2982	if (cpu_smt_control == CPU_SMT_ENABLED &&
2983	cpu_smt_num_threads != cpu_smt_max_threads)
2984	return sysfs_emit(buf, fmt: "%d\n", cpu_smt_num_threads);
2985	#endif
2986
2987	return sysfs_emit(buf, fmt: "%s\n", state);
2988	}
2989
2990	static ssize_t control_store(struct device dev, struct* device_attribute *attr,
2991	const char *buf, size_t count)
2992	{
2993	return __store_smt_control(dev, attr, buf, count);
2994	}
2995	static DEVICE_ATTR_RW(control);
2996
2997	static ssize_t active_show(struct device *dev,
2998	struct device_attribute attr, char* *buf)
2999	{
3000	return sysfs_emit(buf, fmt: "%d\n", sched_smt_active());
3001	}
3002	static DEVICE_ATTR_RO(active);
3003
3004	static struct attribute *cpuhp_smt_attrs[] = {
3005	&dev_attr_control.attr,
3006	&dev_attr_active.attr,
3007	NULL
3008	};
3009
3010	static const struct attribute_group cpuhp_smt_attr_group = {
3011	.attrs = cpuhp_smt_attrs,
3012	.name = "smt",
3013	};
3014
3015	static int __init cpu_smt_sysfs_init(void)
3016	{
3017	struct device *dev_root;
3018	int ret = -ENODEV;
3019
3020	dev_root = bus_get_dev_root(bus: &cpu_subsys);
3021	if (dev_root) {
3022	ret = sysfs_create_group(kobj: &dev_root->kobj, grp: &cpuhp_smt_attr_group);
3023	put_device(dev: dev_root);
3024	}
3025	return ret;
3026	}
3027
3028	static int __init cpuhp_sysfs_init(void)
3029	{
3030	struct device *dev_root;
3031	int cpu, ret;
3032
3033	ret = cpu_smt_sysfs_init();
3034	if (ret)
3035	return ret;
3036
3037	dev_root = bus_get_dev_root(bus: &cpu_subsys);
3038	if (dev_root) {
3039	ret = sysfs_create_group(kobj: &dev_root->kobj, grp: &cpuhp_cpu_root_attr_group);
3040	put_device(dev: dev_root);
3041	if (ret)
3042	return ret;
3043	}
3044
3045	for_each_possible_cpu(cpu) {
3046	struct device *dev = get_cpu_device(cpu);
3047
3048	if (!dev)
3049	continue;
3050	ret = sysfs_create_group(kobj: &dev->kobj, grp: &cpuhp_cpu_attr_group);
3051	if (ret)
3052	return ret;
3053	}
3054	return `0`;
3055	}
3056	device_initcall(cpuhp_sysfs_init);
3057	#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
3058
3059	/*
3060	* cpu_bit_bitmap[] is a special, "compressed" data structure that
3061	* represents all NR_CPUS bits binary values of 1<<nr.
3062	*
3063	* It is used by cpumask_of() to get a constant address to a CPU
3064	* mask value that has a single bit set only.
3065	*/
3066
3067	/ cpu_bit_bitmap[0] is empty - so we can back into it /
3068	#define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x))
3069	#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
3070	#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
3071	#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
3072
3073	const unsigned long cpu_bit_bitmap[BITS_PER_LONG+`1`][BITS_TO_LONGS(NR_CPUS)] = {
3074
3075	MASK_DECLARE_8(`0`), MASK_DECLARE_8(`8`),
3076	MASK_DECLARE_8(`16`), MASK_DECLARE_8(`24`),
3077	#if BITS_PER_LONG > 32
3078	MASK_DECLARE_8(`32`), MASK_DECLARE_8(`40`),
3079	MASK_DECLARE_8(`48`), MASK_DECLARE_8(`56`),
3080	#endif
3081	};
3082	EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
3083
3084	const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
3085	EXPORT_SYMBOL(cpu_all_bits);
3086
3087	#ifdef CONFIG_INIT_ALL_POSSIBLE
3088	struct cpumask __cpu_possible_mask __ro_after_init
3089	= {CPU_BITS_ALL};
3090	#else
3091	struct cpumask __cpu_possible_mask __ro_after_init;
3092	#endif
3093	EXPORT_SYMBOL(__cpu_possible_mask);
3094
3095	struct cpumask __cpu_online_mask __read_mostly;
3096	EXPORT_SYMBOL(__cpu_online_mask);
3097
3098	struct cpumask __cpu_enabled_mask __read_mostly;
3099	EXPORT_SYMBOL(__cpu_enabled_mask);
3100
3101	struct cpumask __cpu_present_mask __read_mostly;
3102	EXPORT_SYMBOL(__cpu_present_mask);
3103
3104	struct cpumask __cpu_active_mask __read_mostly;
3105	EXPORT_SYMBOL(__cpu_active_mask);
3106
3107	struct cpumask __cpu_dying_mask __read_mostly;
3108	EXPORT_SYMBOL(__cpu_dying_mask);
3109
3110	atomic_t __num_online_cpus __read_mostly;
3111	EXPORT_SYMBOL(__num_online_cpus);
3112
3113	void init_cpu_present(const struct cpumask *src)
3114	{
3115	cpumask_copy(dstp: &__cpu_present_mask, srcp: src);
3116	}
3117
3118	void init_cpu_possible(const struct cpumask *src)
3119	{
3120	cpumask_copy(dstp: &__cpu_possible_mask, srcp: src);
3121	}
3122
3123	void set_cpu_online(unsigned int cpu, bool online)
3124	{
3125	/*
3126	* atomic_inc/dec() is required to handle the horrid abuse of this
3127	* function by the reboot and kexec code which invoke it from
3128	* IPI/NMI broadcasts when shutting down CPUs. Invocation from
3129	* regular CPU hotplug is properly serialized.
3130	*
3131	* Note, that the fact that __num_online_cpus is of type atomic_t
3132	* does not protect readers which are not serialized against
3133	* concurrent hotplug operations.
3134	*/
3135	if (online) {
3136	if (!cpumask_test_and_set_cpu(cpu, cpumask: &__cpu_online_mask))
3137	atomic_inc(v: &__num_online_cpus);
3138	} else {
3139	if (cpumask_test_and_clear_cpu(cpu, cpumask: &__cpu_online_mask))
3140	atomic_dec(v: &__num_online_cpus);
3141	}
3142	}
3143
3144	/*
3145	* Activate the first processor.
3146	*/
3147	void __init boot_cpu_init(void)
3148	{
3149	int cpu = smp_processor_id();
3150
3151	/ Mark the boot cpu "present", "online" etc for SMP and UP case /
3152	set_cpu_online(cpu, online: true);
3153	set_cpu_active(cpu, true);
3154	set_cpu_present(cpu, true);
3155	set_cpu_possible(cpu, true);
3156
3157	#ifdef CONFIG_SMP
3158	__boot_cpu_id = cpu;
3159	#endif
3160	}
3161
3162	/*
3163	* Must be called _AFTER_ setting up the per_cpu areas
3164	*/
3165	void __init boot_cpu_hotplug_init(void)
3166	{
3167	#ifdef CONFIG_SMP
3168	cpumask_set_cpu(smp_processor_id(), dstp: &cpus_booted_once_mask);
3169	atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), i: SYNC_STATE_ONLINE);
3170	#endif
3171	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
3172	this_cpu_write(cpuhp_state.target, CPUHP_ONLINE);
3173	}
3174
3175	#ifdef CONFIG_CPU_MITIGATIONS
3176	/*
3177	* These are used for a global "mitigations=" cmdline option for toggling
3178	* optional CPU mitigations.
3179	*/
3180	enum cpu_mitigations {
3181	CPU_MITIGATIONS_OFF,
3182	CPU_MITIGATIONS_AUTO,
3183	CPU_MITIGATIONS_AUTO_NOSMT,
3184	};
3185
3186	static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
3187
3188	static int __init mitigations_parse_cmdline(char *arg)
3189	{
3190	if (!strcmp(arg, "off"))
3191	cpu_mitigations = CPU_MITIGATIONS_OFF;
3192	else if (!strcmp(arg, "auto"))
3193	cpu_mitigations = CPU_MITIGATIONS_AUTO;
3194	else if (!strcmp(arg, "auto,nosmt"))
3195	cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
3196	else
3197	pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
3198	arg);
3199
3200	return `0`;
3201	}
3202
3203	/ mitigations=off /
3204	bool cpu_mitigations_off(void)
3205	{
3206	return cpu_mitigations == CPU_MITIGATIONS_OFF;
3207	}
3208	EXPORT_SYMBOL_GPL(cpu_mitigations_off);
3209
3210	/ mitigations=auto,nosmt /
3211	bool cpu_mitigations_auto_nosmt(void)
3212	{
3213	return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
3214	}
3215	EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
3216	#else
3217	static int __init mitigations_parse_cmdline(char *arg)
3218	{
3219	pr_crit("Kernel compiled without mitigations, ignoring 'mitigations'; system may still be vulnerable\n");
3220	return `0`;
3221	}
3222	#endif
3223	early_param("mitigations", mitigations_parse_cmdline);
3224

Provided by KDAB

Definitions

cpuhp_cpu_state
cpuhp_state
cpus_booted_once_mask
cpuhp_state_up_map
cpuhp_state_down_map
cpuhp_lock_acquire
cpuhp_lock_release
cpuhp_step
cpuhp_state_mutex
cpuhp_hp_states
cpuhp_get_step
cpuhp_step_empty
cpuhp_invoke_callback
cpuhp_is_ap_state
wait_for_ap_thread
complete_ap_thread
cpuhp_is_atomic_state
cpuhp_sync_state
cpuhp_ap_update_sync_state
arch_cpuhp_sync_state_poll
cpuhp_wait_for_sync_state
cpuhp_ap_report_dead
arch_cpuhp_cleanup_dead_cpu
cpuhp_bp_sync_dead
cpuhp_ap_sync_alive
cpuhp_can_boot_ap
arch_cpuhp_cleanup_kick_cpu
cpuhp_bp_sync_alive
cpu_add_remove_lock
cpuhp_tasks_frozen
cpu_maps_update_begin
cpu_maps_update_done
cpu_hotplug_disabled
cpu_hotplug_lock
cpu_hotplug_offline_disabled
cpus_read_lock
cpus_read_trylock
cpus_read_unlock
cpus_write_lock
cpus_write_unlock
lockdep_assert_cpus_held
lockdep_is_cpus_held
lockdep_acquire_cpus_lock
lockdep_release_cpus_lock
cpu_hotplug_disable_offlining
cpu_hotplug_disable
__cpu_hotplug_enable
cpu_hotplug_enable
arch_smt_update
cpu_smt_control
cpu_smt_max_threads
cpu_smt_num_threads
cpu_smt_disable
cpu_smt_set_num_threads
smt_cmdline_disable
cpu_smt_thread_allowed
cpu_bootable
cpu_smt_possible
cpuhp_set_state
cpuhp_reset_state
__cpuhp_kick_ap
cpuhp_kick_ap
bringup_wait_for_ap_online
cpuhp_kick_ap_alive
cpuhp_bringup_ap
finish_cpu
cpuhp_next_state
__cpuhp_invoke_callback_range
cpuhp_invoke_callback_range
cpuhp_invoke_callback_range_nofail
can_rollback_cpu
cpuhp_up_callbacks
cpuhp_should_run
cpuhp_thread_fun
cpuhp_invoke_ap_callback
cpuhp_kick_ap_work
cpuhp_threads
cpuhp_init_state
cpuhp_threads_init
clear_tasks_mm_cpumask
take_cpu_down
takedown_cpu
cpuhp_complete_idle_dead
cpuhp_report_idle_dead
cpuhp_down_callbacks
_cpu_down
cpu_down_work
__cpu_down_maps_locked
cpu_down_maps_locked
cpu_down
cpu_device_down
remove_cpu
smp_shutdown_nonboot_cpus
notify_cpu_starting
cpuhp_online_idle
_cpu_up
cpu_up
cpu_device_up
add_cpu
bringup_hibernate_cpu
cpuhp_bringup_mask
__cpuhp_parallel_bringup
parallel_bringup_parse_param
cpuhp_smt_aware
cpuhp_get_primary_thread_mask
arch_cpuhp_init_parallel_bringup
cpuhp_bringup_cpus_parallel
bringup_nonboot_cpus
frozen_cpus
freeze_secondary_cpus
arch_thaw_secondary_cpus_begin
arch_thaw_secondary_cpus_end
thaw_secondary_cpus
alloc_frozen_cpus
cpu_hotplug_pm_callback
cpu_hotplug_pm_sync_init
__boot_cpu_id
cpuhp_hp_states
cpuhp_cb_check
cpuhp_reserve_state
cpuhp_store_callbacks
cpuhp_get_teardown_cb
cpuhp_issue_call
cpuhp_rollback_install
__cpuhp_state_add_instance_cpuslocked
__cpuhp_state_add_instance
__cpuhp_setup_state_cpuslocked
__cpuhp_setup_state
__cpuhp_state_remove_instance
__cpuhp_remove_state_cpuslocked
__cpuhp_remove_state
cpuhp_offline_cpu_device
cpuhp_online_cpu_device
cpuhp_smt_disable
topology_is_core_online
cpuhp_smt_enable
state_show
target_store
target_show
fail_store
fail_show
cpuhp_cpu_attrs
cpuhp_cpu_attr_group
states_show
cpuhp_cpu_root_attrs
cpuhp_cpu_root_attr_group
cpu_smt_num_threads_valid
__store_smt_control
smt_states
control_show
control_store
active_show
cpuhp_smt_attrs
cpuhp_smt_attr_group
cpu_smt_sysfs_init
cpuhp_sysfs_init
cpu_bit_bitmap
cpu_all_bits
__cpu_possible_mask
__cpu_online_mask
__cpu_enabled_mask
__cpu_present_mask
__cpu_active_mask
__cpu_dying_mask
__num_online_cpus
init_cpu_present
init_cpu_possible
set_cpu_online
boot_cpu_init
boot_cpu_hotplug_init
cpu_mitigations
cpu_mitigations
mitigations_parse_cmdline
cpu_mitigations_off

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of linux/kernel/cpu.c