workqueue.c source code [linux/kernel/workqueue.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* kernel/workqueue.c - generic async execution with shared worker pool
4	*
5	* Copyright (C) 2002 Ingo Molnar
6	*
7	* Derived from the taskqueue/keventd code by:
8	* David Woodhouse <dwmw2@infradead.org>
9	* Andrew Morton
10	* Kai Petzke <wpp@marie.physik.tu-berlin.de>
11	* Theodore Ts'o <tytso@mit.edu>
12	*
13	* Made to use alloc_percpu by Christoph Lameter.
14	*
15	* Copyright (C) 2010 SUSE Linux Products GmbH
16	* Copyright (C) 2010 Tejun Heo <tj@kernel.org>
17	*
18	* This is the generic async execution mechanism. Work items as are
19	* executed in process context. The worker pool is shared and
20	* automatically managed. There are two worker pools for each CPU (one for
21	* normal work items and the other for high priority ones) and some extra
22	* pools for workqueues which are not bound to any specific CPU - the
23	* number of these backing pools is dynamic.
24	*
25	* Please read Documentation/core-api/workqueue.rst for details.
26	*/
27
28	#include <linux/export.h>
29	#include <linux/kernel.h>
30	#include <linux/sched.h>
31	#include <linux/init.h>
32	#include <linux/interrupt.h>
33	#include <linux/signal.h>
34	#include <linux/completion.h>
35	#include <linux/workqueue.h>
36	#include <linux/slab.h>
37	#include <linux/cpu.h>
38	#include <linux/notifier.h>
39	#include <linux/kthread.h>
40	#include <linux/hardirq.h>
41	#include <linux/mempolicy.h>
42	#include <linux/freezer.h>
43	#include <linux/debug_locks.h>
44	#include <linux/lockdep.h>
45	#include <linux/idr.h>
46	#include <linux/jhash.h>
47	#include <linux/hashtable.h>
48	#include <linux/rculist.h>
49	#include <linux/nodemask.h>
50	#include <linux/moduleparam.h>
51	#include <linux/uaccess.h>
52	#include <linux/sched/isolation.h>
53	#include <linux/sched/debug.h>
54	#include <linux/nmi.h>
55	#include <linux/kvm_para.h>
56	#include <linux/delay.h>
57	#include <linux/irq_work.h>
58
59	#include "workqueue_internal.h"
60
61	enum worker_pool_flags {
62	/*
63	* worker_pool flags
64	*
65	* A bound pool is either associated or disassociated with its CPU.
66	* While associated (!DISASSOCIATED), all workers are bound to the
67	* CPU and none has %WORKER_UNBOUND set and concurrency management
68	* is in effect.
69	*
70	* While DISASSOCIATED, the cpu may be offline and all workers have
71	* %WORKER_UNBOUND set and concurrency management disabled, and may
72	* be executing on any CPU. The pool behaves as an unbound one.
73	*
74	* Note that DISASSOCIATED should be flipped only while holding
75	* wq_pool_attach_mutex to avoid changing binding state while
76	* worker_attach_to_pool() is in progress.
77	*
78	* As there can only be one concurrent BH execution context per CPU, a
79	* BH pool is per-CPU and always DISASSOCIATED.
80	*/
81	POOL_BH = `1` << `0`, / is a BH pool /
82	POOL_MANAGER_ACTIVE = `1` << `1`, / being managed /
83	POOL_DISASSOCIATED = `1` << `2`, / cpu can't serve workers /
84	POOL_BH_DRAINING = `1` << `3`, / draining after CPU offline /
85	};
86
87	enum worker_flags {
88	/ worker flags /
89	WORKER_DIE = `1` << `1`, / die die die /
90	WORKER_IDLE = `1` << `2`, / is idle /
91	WORKER_PREP = `1` << `3`, / preparing to run works /
92	WORKER_CPU_INTENSIVE = `1` << `6`, / cpu intensive /
93	WORKER_UNBOUND = `1` << `7`, / worker is unbound /
94	WORKER_REBOUND = `1` << `8`, / worker was rebound /
95
96	WORKER_NOT_RUNNING = WORKER_PREP \| WORKER_CPU_INTENSIVE \|
97	WORKER_UNBOUND \| WORKER_REBOUND,
98	};
99
100	enum work_cancel_flags {
101	WORK_CANCEL_DELAYED = `1` << `0`, / canceling a delayed_work /
102	WORK_CANCEL_DISABLE = `1` << `1`, / canceling to disable /
103	};
104
105	enum wq_internal_consts {
106	NR_STD_WORKER_POOLS = `2`, / # standard pools per cpu /
107
108	UNBOUND_POOL_HASH_ORDER = `6`, / hashed by pool->attrs /
109	BUSY_WORKER_HASH_ORDER = `6`, / 64 pointers /
110
111	MAX_IDLE_WORKERS_RATIO = `4`, / 1/4 of busy can be idle /
112	IDLE_WORKER_TIMEOUT = `300` * HZ, / keep idle ones for 5 mins /
113
114	MAYDAY_INITIAL_TIMEOUT = HZ / `100` >= `2` ? HZ / `100` : `2`,
115	/ call for help after 10ms*
116	(min two ticks) /*
117	MAYDAY_INTERVAL = HZ / `10`, / and then every 100ms /
118	CREATE_COOLDOWN = HZ, / time to breath after fail /
119
120	/*
121	* Rescue workers are used only on emergencies and shared by
122	* all cpus. Give MIN_NICE.
123	*/
124	RESCUER_NICE_LEVEL = MIN_NICE,
125	HIGHPRI_NICE_LEVEL = MIN_NICE,
126
127	WQ_NAME_LEN = `32`,
128	WORKER_ID_LEN = `10` + WQ_NAME_LEN, / "kworker/R-" + WQ_NAME_LEN /
129	};
130
131	/*
132	* We don't want to trap softirq for too long. See MAX_SOFTIRQ_TIME and
133	* MAX_SOFTIRQ_RESTART in kernel/softirq.c. These are macros because
134	* msecs_to_jiffies() can't be an initializer.
135	*/
136	#define BH_WORKER_JIFFIES msecs_to_jiffies(2)
137	#define BH_WORKER_RESTARTS 10
138
139	/*
140	* Structure fields follow one of the following exclusion rules.
141	*
142	* I: Modifiable by initialization/destruction paths and read-only for
143	* everyone else.
144	*
145	* P: Preemption protected. Disabling preemption is enough and should
146	* only be modified and accessed from the local cpu.
147	*
148	* L: pool->lock protected. Access with pool->lock held.
149	*
150	* LN: pool->lock and wq_node_nr_active->lock protected for writes. Either for
151	* reads.
152	*
153	* K: Only modified by worker while holding pool->lock. Can be safely read by
154	* self, while holding pool->lock or from IRQ context if %current is the
155	* kworker.
156	*
157	* S: Only modified by worker self.
158	*
159	* A: wq_pool_attach_mutex protected.
160	*
161	* PL: wq_pool_mutex protected.
162	*
163	* PR: wq_pool_mutex protected for writes. RCU protected for reads.
164	*
165	* PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
166	*
167	* PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
168	* RCU for reads.
169	*
170	* WQ: wq->mutex protected.
171	*
172	* WR: wq->mutex protected for writes. RCU protected for reads.
173	*
174	* WO: wq->mutex protected for writes. Updated with WRITE_ONCE() and can be read
175	* with READ_ONCE() without locking.
176	*
177	* MD: wq_mayday_lock protected.
178	*
179	* WD: Used internally by the watchdog.
180	*/
181
182	/ struct worker is defined in workqueue_internal.h /
183
184	struct worker_pool {
185	raw_spinlock_t lock; / the pool lock /
186	int cpu; / I: the associated cpu /
187	int node; / I: the associated node ID /
188	int id; / I: pool ID /
189	unsigned int flags; / L: flags /
190
191	unsigned long watchdog_ts; / L: watchdog timestamp /
192	bool cpu_stall; / WD: stalled cpu bound pool /
193
194	/*
195	* The counter is incremented in a process context on the associated CPU
196	* w/ preemption disabled, and decremented or reset in the same context
197	* but w/ pool->lock held. The readers grab pool->lock and are
198	* guaranteed to see if the counter reached zero.
199	*/
200	int nr_running;
201
202	struct list_head worklist; / L: list of pending works /
203
204	int nr_workers; / L: total number of workers /
205	int nr_idle; / L: currently idle workers /
206
207	struct list_head idle_list; / L: list of idle workers /
208	struct timer_list idle_timer; / L: worker idle timeout /
209	struct work_struct idle_cull_work; / L: worker idle cleanup /
210
211	struct timer_list mayday_timer; / L: SOS timer for workers /
212
213	/ a workers is either on busy_hash or idle_list, or the manager /
214	DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
215	/ L: hash of busy workers /
216
217	struct worker manager; /* L: purely informational /
218	struct list_head workers; / A: attached workers /
219
220	struct ida worker_ida; / worker IDs for task name /
221
222	struct workqueue_attrs attrs; /* I: worker attributes /
223	struct hlist_node hash_node; / PL: unbound_pool_hash node /
224	int refcnt; / PL: refcnt for unbound pools /
225
226	/*
227	* Destruction of pool is RCU protected to allow dereferences
228	* from get_work_pool().
229	*/
230	struct rcu_head rcu;
231	};
232
233	/*
234	* Per-pool_workqueue statistics. These can be monitored using
235	* tools/workqueue/wq_monitor.py.
236	*/
237	enum pool_workqueue_stats {
238	PWQ_STAT_STARTED, / work items started execution /
239	PWQ_STAT_COMPLETED, / work items completed execution /
240	PWQ_STAT_CPU_TIME, / total CPU time consumed /
241	PWQ_STAT_CPU_INTENSIVE, / wq_cpu_intensive_thresh_us violations /
242	PWQ_STAT_CM_WAKEUP, / concurrency-management worker wakeups /
243	PWQ_STAT_REPATRIATED, / unbound workers brought back into scope /
244	PWQ_STAT_MAYDAY, / maydays to rescuer /
245	PWQ_STAT_RESCUED, / linked work items executed by rescuer /
246
247	PWQ_NR_STATS,
248	};
249
250	/*
251	* The per-pool workqueue. While queued, bits below WORK_PWQ_SHIFT
252	* of work_struct->data are used for flags and the remaining high bits
253	* point to the pwq; thus, pwqs need to be aligned at two's power of the
254	* number of flag bits.
255	*/
256	struct pool_workqueue {
257	struct worker_pool pool; /* I: the associated pool /
258	struct workqueue_struct wq; /* I: the owning workqueue /
259	int work_color; / L: current color /
260	int flush_color; / L: flushing color /
261	int refcnt; / L: reference count /
262	int nr_in_flight[WORK_NR_COLORS];
263	/ L: nr of in_flight works /
264	bool plugged; / L: execution suspended /
265
266	/*
267	* nr_active management and WORK_STRUCT_INACTIVE:
268	*
269	* When pwq->nr_active >= max_active, new work item is queued to
270	* pwq->inactive_works instead of pool->worklist and marked with
271	* WORK_STRUCT_INACTIVE.
272	*
273	* All work items marked with WORK_STRUCT_INACTIVE do not participate in
274	* nr_active and all work items in pwq->inactive_works are marked with
275	* WORK_STRUCT_INACTIVE. But not all WORK_STRUCT_INACTIVE work items are
276	* in pwq->inactive_works. Some of them are ready to run in
277	* pool->worklist or worker->scheduled. Those work itmes are only struct
278	* wq_barrier which is used for flush_work() and should not participate
279	* in nr_active. For non-barrier work item, it is marked with
280	* WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
281	*/
282	int nr_active; / L: nr of active works /
283	struct list_head inactive_works; / L: inactive works /
284	struct list_head pending_node; / LN: node on wq_node_nr_active->pending_pwqs /
285	struct list_head pwqs_node; / WR: node on wq->pwqs /
286	struct list_head mayday_node; / MD: node on wq->maydays /
287
288	u64 stats[PWQ_NR_STATS];
289
290	/*
291	* Release of unbound pwq is punted to a kthread_worker. See put_pwq()
292	* and pwq_release_workfn() for details. pool_workqueue itself is also
293	* RCU protected so that the first pwq can be determined without
294	* grabbing wq->mutex.
295	*/
296	struct kthread_work release_work;
297	struct rcu_head rcu;
298	} __aligned(`1` << WORK_STRUCT_PWQ_SHIFT);
299
300	/*
301	* Structure used to wait for workqueue flush.
302	*/
303	struct wq_flusher {
304	struct list_head list; / WQ: list of flushers /
305	int flush_color; / WQ: flush color waiting for /
306	struct completion done; / flush completion /
307	};
308
309	struct wq_device;
310
311	/*
312	* Unlike in a per-cpu workqueue where max_active limits its concurrency level
313	* on each CPU, in an unbound workqueue, max_active applies to the whole system.
314	* As sharing a single nr_active across multiple sockets can be very expensive,
315	* the counting and enforcement is per NUMA node.
316	*
317	* The following struct is used to enforce per-node max_active. When a pwq wants
318	* to start executing a work item, it should increment ->nr using
319	* tryinc_node_nr_active(). If acquisition fails due to ->nr already being over
320	* ->max, the pwq is queued on ->pending_pwqs. As in-flight work items finish
321	* and decrement ->nr, node_activate_pending_pwq() activates the pending pwqs in
322	* round-robin order.
323	*/
324	struct wq_node_nr_active {
325	int max; / per-node max_active /
326	atomic_t nr; / per-node nr_active /
327	raw_spinlock_t lock; / nests inside pool locks /
328	struct list_head pending_pwqs; / LN: pwqs with inactive works /
329	};
330
331	/*
332	* The externally visible workqueue. It relays the issued work items to
333	* the appropriate worker_pool through its pool_workqueues.
334	*/
335	struct workqueue_struct {
336	struct list_head pwqs; / WR: all pwqs of this wq /
337	struct list_head list; / PR: list of all workqueues /
338
339	struct mutex mutex; / protects this wq /
340	int work_color; / WQ: current work color /
341	int flush_color; / WQ: current flush color /
342	atomic_t nr_pwqs_to_flush; / flush in progress /
343	struct wq_flusher first_flusher; /* WQ: first flusher /
344	struct list_head flusher_queue; / WQ: flush waiters /
345	struct list_head flusher_overflow; / WQ: flush overflow list /
346
347	struct list_head maydays; / MD: pwqs requesting rescue /
348	struct worker rescuer; /* MD: rescue worker /
349
350	int nr_drainers; / WQ: drain in progress /
351
352	/ See alloc_workqueue() function comment for info on min/max_active /
353	int max_active; / WO: max active works /
354	int min_active; / WO: min active works /
355	int saved_max_active; / WQ: saved max_active /
356	int saved_min_active; / WQ: saved min_active /
357
358	struct workqueue_attrs unbound_attrs; /* PW: only for unbound wqs /
359	struct pool_workqueue __rcu dfl_pwq; /* PW: only for unbound wqs /
360
361	#ifdef CONFIG_SYSFS
362	struct wq_device wq_dev; /* I: for sysfs interface /
363	#endif
364	#ifdef CONFIG_LOCKDEP
365	char *lock_name;
366	struct lock_class_key key;
367	struct lockdep_map __lockdep_map;
368	struct lockdep_map *lockdep_map;
369	#endif
370	char name[WQ_NAME_LEN]; / I: workqueue name /
371
372	/*
373	* Destruction of workqueue_struct is RCU protected to allow walking
374	* the workqueues list without grabbing wq_pool_mutex.
375	* This is used to dump all workqueues from sysrq.
376	*/
377	struct rcu_head rcu;
378
379	/ hot fields used during command issue, aligned to cacheline /
380	unsigned int flags ____cacheline_aligned; / WQ: WQ_* flags /
381	struct pool_workqueue __rcu * __percpu cpu_pwq; /* I: per-cpu pwqs /
382	struct wq_node_nr_active node_nr_active[]; /* I: per-node nr_active /
383	};
384
385	/*
386	* Each pod type describes how CPUs should be grouped for unbound workqueues.
387	* See the comment above workqueue_attrs->affn_scope.
388	*/
389	struct wq_pod_type {
390	int nr_pods; / number of pods /
391	cpumask_var_t pod_cpus; /* pod -> cpus /
392	int pod_node; /* pod -> node /
393	int cpu_pod; /* cpu -> pod /
394	};
395
396	struct work_offq_data {
397	u32 pool_id;
398	u32 disable;
399	u32 flags;
400	};
401
402	static const char *wq_affn_names[WQ_AFFN_NR_TYPES] = {
403	[WQ_AFFN_DFL] = "default",
404	[WQ_AFFN_CPU] = "cpu",
405	[WQ_AFFN_SMT] = "smt",
406	[WQ_AFFN_CACHE] = "cache",
407	[WQ_AFFN_NUMA] = "numa",
408	[WQ_AFFN_SYSTEM] = "system",
409	};
410
411	/*
412	* Per-cpu work items which run for longer than the following threshold are
413	* automatically considered CPU intensive and excluded from concurrency
414	* management to prevent them from noticeably delaying other per-cpu work items.
415	* ULONG_MAX indicates that the user hasn't overridden it with a boot parameter.
416	* The actual value is initialized in wq_cpu_intensive_thresh_init().
417	*/
418	static unsigned long wq_cpu_intensive_thresh_us = ULONG_MAX;
419	module_param_named(cpu_intensive_thresh_us, wq_cpu_intensive_thresh_us, ulong, `0644`);
420	#ifdef CONFIG_WQ_CPU_INTENSIVE_REPORT
421	static unsigned int wq_cpu_intensive_warning_thresh = `4`;
422	module_param_named(cpu_intensive_warning_thresh, wq_cpu_intensive_warning_thresh, uint, `0644`);
423	#endif
424
425	/ see the comment above the definition of WQ_POWER_EFFICIENT /
426	static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
427	module_param_named(power_efficient, wq_power_efficient, bool, `0444`);
428
429	static bool wq_online; / can kworkers be created yet? /
430	static bool wq_topo_initialized __read_mostly = false;
431
432	static struct kmem_cache *pwq_cache;
433
434	static struct wq_pod_type wq_pod_types[WQ_AFFN_NR_TYPES];
435	static enum wq_affn_scope wq_affn_dfl = WQ_AFFN_CACHE;
436
437	/ buf for wq_update_unbound_pod_attrs(), protected by CPU hotplug exclusion /
438	static struct workqueue_attrs *unbound_wq_update_pwq_attrs_buf;
439
440	static DEFINE_MUTEX(wq_pool_mutex); / protects pools and workqueues list /
441	static DEFINE_MUTEX(wq_pool_attach_mutex); / protects worker attach/detach /
442	static DEFINE_RAW_SPINLOCK(wq_mayday_lock); / protects wq->maydays list /
443	/ wait for manager to go away /
444	static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait);
445
446	static LIST_HEAD(workqueues); / PR: list of all workqueues /
447	static bool workqueue_freezing; / PL: have wqs started freezing? /
448
449	/ PL: mirror the cpu_online_mask excluding the CPU in the midst of hotplugging /
450	static cpumask_var_t wq_online_cpumask;
451
452	/ PL&A: allowable cpus for unbound wqs and work items /
453	static cpumask_var_t wq_unbound_cpumask;
454
455	/ PL: user requested unbound cpumask via sysfs /
456	static cpumask_var_t wq_requested_unbound_cpumask;
457
458	/ PL: isolated cpumask to be excluded from unbound cpumask /
459	static cpumask_var_t wq_isolated_cpumask;
460
461	/ for further constrain wq_unbound_cpumask by cmdline parameter/
462	static struct cpumask wq_cmdline_cpumask __initdata;
463
464	/ CPU where unbound work was last round robin scheduled from this CPU /
465	static DEFINE_PER_CPU(int, wq_rr_cpu_last);
466
467	/*
468	* Local execution of unbound work items is no longer guaranteed. The
469	* following always forces round-robin CPU selection on unbound work items
470	* to uncover usages which depend on it.
471	*/
472	#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
473	static bool wq_debug_force_rr_cpu = true;
474	#else
475	static bool wq_debug_force_rr_cpu = false;
476	#endif
477	module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, `0644`);
478
479	/ to raise softirq for the BH worker pools on other CPUs /
480	static DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_work [NR_STD_WORKER_POOLS], bh_pool_irq_works);
481
482	/ the BH worker pools /
483	static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], bh_worker_pools);
484
485	/ the per-cpu worker pools /
486	static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
487
488	static DEFINE_IDR(worker_pool_idr); / PR: idr of all pools /
489
490	/ PL: hash of all unbound pools keyed by pool->attrs /
491	static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
492
493	/ I: attributes used when instantiating standard unbound pools on demand /
494	static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
495
496	/ I: attributes used when instantiating ordered pools on demand /
497	static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
498
499	/*
500	* I: kthread_worker to release pwq's. pwq release needs to be bounced to a
501	* process context while holding a pool lock. Bounce to a dedicated kthread
502	* worker to avoid A-A deadlocks.
503	*/
504	static struct kthread_worker *pwq_release_worker __ro_after_init;
505
506	struct workqueue_struct *system_wq __ro_after_init;
507	EXPORT_SYMBOL(system_wq);
508	struct workqueue_struct *system_highpri_wq __ro_after_init;
509	EXPORT_SYMBOL_GPL(system_highpri_wq);
510	struct workqueue_struct *system_long_wq __ro_after_init;
511	EXPORT_SYMBOL_GPL(system_long_wq);
512	struct workqueue_struct *system_unbound_wq __ro_after_init;
513	EXPORT_SYMBOL_GPL(system_unbound_wq);
514	struct workqueue_struct *system_freezable_wq __ro_after_init;
515	EXPORT_SYMBOL_GPL(system_freezable_wq);
516	struct workqueue_struct *system_power_efficient_wq __ro_after_init;
517	EXPORT_SYMBOL_GPL(system_power_efficient_wq);
518	struct workqueue_struct *system_freezable_power_efficient_wq __ro_after_init;
519	EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
520	struct workqueue_struct *system_bh_wq;
521	EXPORT_SYMBOL_GPL(system_bh_wq);
522	struct workqueue_struct *system_bh_highpri_wq;
523	EXPORT_SYMBOL_GPL(system_bh_highpri_wq);
524
525	static int worker_thread(void *__worker);
526	static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
527	static void show_pwq(struct pool_workqueue *pwq);
528	static void show_one_worker_pool(struct worker_pool *pool);
529
530	#define CREATE_TRACE_POINTS
531	#include <trace/events/workqueue.h>
532
533	#define assert_rcu_or_pool_mutex() \
534	RCU_LOCKDEP_WARN(!rcu_read_lock_any_held() && \
535	!lockdep_is_held(&wq_pool_mutex), \
536	"RCU or wq_pool_mutex should be held")
537
538	#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
539	RCU_LOCKDEP_WARN(!rcu_read_lock_any_held() && \
540	!lockdep_is_held(&wq->mutex) && \
541	!lockdep_is_held(&wq_pool_mutex), \
542	"RCU, wq->mutex or wq_pool_mutex should be held")
543
544	#define for_each_bh_worker_pool(pool, cpu) \
545	for ((pool) = &per_cpu(bh_worker_pools, cpu)[0]; \
546	(pool) < &per_cpu(bh_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
547	(pool)++)
548
549	#define for_each_cpu_worker_pool(pool, cpu) \
550	for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
551	(pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
552	(pool)++)
553
554	/**
555	* for_each_pool - iterate through all worker_pools in the system
556	* @pool: iteration cursor
557	* @pi: integer used for iteration
558	*
559	* This must be called either with wq_pool_mutex held or RCU read
560	* locked. If the pool needs to be used beyond the locking in effect, the
561	* caller is responsible for guaranteeing that the pool stays online.
562	*
563	* The if/else clause exists only for the lockdep assertion and can be
564	* ignored.
565	*/
566	#define for_each_pool(pool, pi) \
567	idr_for_each_entry(&worker_pool_idr, pool, pi) \
568	if (({ assert_rcu_or_pool_mutex(); false; })) { } \
569	else
570
571	/**
572	* for_each_pool_worker - iterate through all workers of a worker_pool
573	* @worker: iteration cursor
574	* @pool: worker_pool to iterate workers of
575	*
576	* This must be called with wq_pool_attach_mutex.
577	*
578	* The if/else clause exists only for the lockdep assertion and can be
579	* ignored.
580	*/
581	#define for_each_pool_worker(worker, pool) \
582	list_for_each_entry((worker), &(pool)->workers, node) \
583	if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \
584	else
585
586	/**
587	* for_each_pwq - iterate through all pool_workqueues of the specified workqueue
588	* @pwq: iteration cursor
589	* @wq: the target workqueue
590	*
591	* This must be called either with wq->mutex held or RCU read locked.
592	* If the pwq needs to be used beyond the locking in effect, the caller is
593	* responsible for guaranteeing that the pwq stays online.
594	*
595	* The if/else clause exists only for the lockdep assertion and can be
596	* ignored.
597	*/
598	#define for_each_pwq(pwq, wq) \
599	list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
600	lockdep_is_held(&(wq->mutex)))
601
602	#ifdef CONFIG_DEBUG_OBJECTS_WORK
603
604	static const struct debug_obj_descr work_debug_descr;
605
606	static void work_debug_hint(void* *addr)
607	{
608	return ((struct work_struct *) addr)->func;
609	}
610
611	static bool work_is_static_object(void *addr)
612	{
613	struct work_struct *work = addr;
614
615	return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
616	}
617
618	/*
619	* fixup_init is called when:
620	* - an active object is initialized
621	*/
622	static bool work_fixup_init(void addr, enum* debug_obj_state state)
623	{
624	struct work_struct *work = addr;
625
626	switch (state) {
627	case ODEBUG_STATE_ACTIVE:
628	cancel_work_sync(work);
629	debug_object_init(addr: work, descr: &work_debug_descr);
630	return true;
631	default:
632	return false;
633	}
634	}
635
636	/*
637	* fixup_free is called when:
638	* - an active object is freed
639	*/
640	static bool work_fixup_free(void addr, enum* debug_obj_state state)
641	{
642	struct work_struct *work = addr;
643
644	switch (state) {
645	case ODEBUG_STATE_ACTIVE:
646	cancel_work_sync(work);
647	debug_object_free(addr: work, descr: &work_debug_descr);
648	return true;
649	default:
650	return false;
651	}
652	}
653
654	static const struct debug_obj_descr work_debug_descr = {
655	.name = "work_struct",
656	.debug_hint = work_debug_hint,
657	.is_static_object = work_is_static_object,
658	.fixup_init = work_fixup_init,
659	.fixup_free = work_fixup_free,
660	};
661
662	static inline void debug_work_activate(struct work_struct *work)
663	{
664	debug_object_activate(addr: work, descr: &work_debug_descr);
665	}
666
667	static inline void debug_work_deactivate(struct work_struct *work)
668	{
669	debug_object_deactivate(addr: work, descr: &work_debug_descr);
670	}
671
672	void __init_work(struct work_struct work, int* onstack)
673	{
674	if (onstack)
675	debug_object_init_on_stack(addr: work, descr: &work_debug_descr);
676	else
677	debug_object_init(addr: work, descr: &work_debug_descr);
678	}
679	EXPORT_SYMBOL_GPL(__init_work);
680
681	void destroy_work_on_stack(struct work_struct *work)
682	{
683	debug_object_free(addr: work, descr: &work_debug_descr);
684	}
685	EXPORT_SYMBOL_GPL(destroy_work_on_stack);
686
687	void destroy_delayed_work_on_stack(struct delayed_work *work)
688	{
689	timer_destroy_on_stack(timer: &work->timer);
690	debug_object_free(addr: &work->work, descr: &work_debug_descr);
691	}
692	EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
693
694	#else
695	static inline void debug_work_activate(struct work_struct *work) { }
696	static inline void debug_work_deactivate(struct work_struct *work) { }
697	#endif
698
699	/**
700	* worker_pool_assign_id - allocate ID and assign it to @pool
701	* @pool: the pool pointer of interest
702	*
703	* Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned
704	* successfully, -errno on failure.
705	*/
706	static int worker_pool_assign_id(struct worker_pool *pool)
707	{
708	int ret;
709
710	lockdep_assert_held(&wq_pool_mutex);
711
712	ret = idr_alloc(&worker_pool_idr, ptr: pool, start: `0`, WORK_OFFQ_POOL_NONE,
713	GFP_KERNEL);
714	if (ret >= `0`) {
715	pool->id = ret;
716	return `0`;
717	}
718	return ret;
719	}
720
721	static struct pool_workqueue __rcu **
722	unbound_pwq_slot(struct workqueue_struct wq, int* cpu)
723	{
724	if (cpu >= `0`)
725	return per_cpu_ptr(wq->cpu_pwq, cpu);
726	else
727	return &wq->dfl_pwq;
728	}
729
730	/ @cpu < 0 for dfl_pwq /
731	static struct pool_workqueue unbound_pwq(struct* workqueue_struct wq, int* cpu)
732	{
733	return rcu_dereference_check(*unbound_pwq_slot(wq, cpu),
734	lockdep_is_held(&wq_pool_mutex) \|\|
735	lockdep_is_held(&wq->mutex));
736	}
737
738	/**
739	* unbound_effective_cpumask - effective cpumask of an unbound workqueue
740	* @wq: workqueue of interest
741	*
742	* @wq->unbound_attrs->cpumask contains the cpumask requested by the user which
743	* is masked with wq_unbound_cpumask to determine the effective cpumask. The
744	* default pwq is always mapped to the pool with the current effective cpumask.
745	*/
746	static struct cpumask unbound_effective_cpumask(struct* workqueue_struct *wq)
747	{
748	return unbound_pwq(wq, cpu: -`1`)->pool->attrs->__pod_cpumask;
749	}
750
751	static unsigned int work_color_to_flags(int color)
752	{
753	return color << WORK_STRUCT_COLOR_SHIFT;
754	}
755
756	static int get_work_color(unsigned long work_data)
757	{
758	return (work_data >> WORK_STRUCT_COLOR_SHIFT) &
759	((`1` << WORK_STRUCT_COLOR_BITS) - `1`);
760	}
761
762	static int work_next_color(int color)
763	{
764	return (color + `1`) % WORK_NR_COLORS;
765	}
766
767	static unsigned long pool_offq_flags(struct worker_pool *pool)
768	{
769	return (pool->flags & POOL_BH) ? WORK_OFFQ_BH : `0`;
770	}
771
772	/*
773	* While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
774	* contain the pointer to the queued pwq. Once execution starts, the flag
775	* is cleared and the high bits contain OFFQ flags and pool ID.
776	*
777	* set_work_pwq(), set_work_pool_and_clear_pending() and mark_work_canceling()
778	* can be used to set the pwq, pool or clear work->data. These functions should
779	* only be called while the work is owned - ie. while the PENDING bit is set.
780	*
781	* get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
782	* corresponding to a work. Pool is available once the work has been
783	* queued anywhere after initialization until it is sync canceled. pwq is
784	* available only while the work item is queued.
785	*/
786	static inline void set_work_data(struct work_struct work, unsigned* long data)
787	{
788	WARN_ON_ONCE(!work_pending(work));
789	atomic_long_set(v: &work->data, i: data \| work_static(work));
790	}
791
792	static void set_work_pwq(struct work_struct work, struct* pool_workqueue *pwq,
793	unsigned long flags)
794	{
795	set_work_data(work, data: (unsigned long)pwq \| WORK_STRUCT_PENDING \|
796	WORK_STRUCT_PWQ \| flags);
797	}
798
799	static void set_work_pool_and_keep_pending(struct work_struct *work,
800	int pool_id, unsigned long flags)
801	{
802	set_work_data(work, data: ((unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT) \|
803	WORK_STRUCT_PENDING \| flags);
804	}
805
806	static void set_work_pool_and_clear_pending(struct work_struct *work,
807	int pool_id, unsigned long flags)
808	{
809	/*
810	* The following wmb is paired with the implied mb in
811	* test_and_set_bit(PENDING) and ensures all updates to @work made
812	* here are visible to and precede any updates by the next PENDING
813	* owner.
814	*/
815	smp_wmb();
816	set_work_data(work, data: ((unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT) \|
817	flags);
818	/*
819	* The following mb guarantees that previous clear of a PENDING bit
820	* will not be reordered with any speculative LOADS or STORES from
821	* work->current_func, which is executed afterwards. This possible
822	* reordering can lead to a missed execution on attempt to queue
823	* the same @work. E.g. consider this case:
824	*
825	* CPU#0 CPU#1
826	* ---------------------------- --------------------------------
827	*
828	* 1 STORE event_indicated
829	* 2 queue_work_on() {
830	* 3 test_and_set_bit(PENDING)
831	* 4 } set_..._and_clear_pending() {
832	* 5 set_work_data() # clear bit
833	* 6 smp_mb()
834	* 7 work->current_func() {
835	* 8 LOAD event_indicated
836	* }
837	*
838	* Without an explicit full barrier speculative LOAD on line 8 can
839	* be executed before CPU#0 does STORE on line 1. If that happens,
840	* CPU#0 observes the PENDING bit is still set and new execution of
841	* a @work is not queued in a hope, that CPU#1 will eventually
842	* finish the queued @work. Meanwhile CPU#1 does not see
843	* event_indicated is set, because speculative LOAD was executed
844	* before actual STORE.
845	*/
846	smp_mb();
847	}
848
849	static inline struct pool_workqueue work_struct_pwq(unsigned* long data)
850	{
851	return (struct pool_workqueue *)(data & WORK_STRUCT_PWQ_MASK);
852	}
853
854	static struct pool_workqueue get_work_pwq(struct* work_struct *work)
855	{
856	unsigned long data = atomic_long_read(v: &work->data);
857
858	if (data & WORK_STRUCT_PWQ)
859	return work_struct_pwq(data);
860	else
861	return NULL;
862	}
863
864	/**
865	* get_work_pool - return the worker_pool a given work was associated with
866	* @work: the work item of interest
867	*
868	* Pools are created and destroyed under wq_pool_mutex, and allows read
869	* access under RCU read lock. As such, this function should be
870	* called under wq_pool_mutex or inside of a rcu_read_lock() region.
871	*
872	* All fields of the returned pool are accessible as long as the above
873	* mentioned locking is in effect. If the returned pool needs to be used
874	* beyond the critical section, the caller is responsible for ensuring the
875	* returned pool is and stays online.
876	*
877	* Return: The worker_pool @work was last associated with. %NULL if none.
878	*/
879	static struct worker_pool get_work_pool(struct* work_struct *work)
880	{
881	unsigned long data = atomic_long_read(v: &work->data);
882	int pool_id;
883
884	assert_rcu_or_pool_mutex();
885
886	if (data & WORK_STRUCT_PWQ)
887	return work_struct_pwq(data)->pool;
888
889	pool_id = data >> WORK_OFFQ_POOL_SHIFT;
890	if (pool_id == WORK_OFFQ_POOL_NONE)
891	return NULL;
892
893	return idr_find(&worker_pool_idr, id: pool_id);
894	}
895
896	static unsigned long shift_and_mask(unsigned long v, u32 shift, u32 bits)
897	{
898	return (v >> shift) & ((`1U` << bits) - `1`);
899	}
900
901	static void work_offqd_unpack(struct work_offq_data offqd, unsigned* long data)
902	{
903	WARN_ON_ONCE(data & WORK_STRUCT_PWQ);
904
905	offqd->pool_id = shift_and_mask(v: data, shift: WORK_OFFQ_POOL_SHIFT,
906	bits: WORK_OFFQ_POOL_BITS);
907	offqd->disable = shift_and_mask(v: data, shift: WORK_OFFQ_DISABLE_SHIFT,
908	bits: WORK_OFFQ_DISABLE_BITS);
909	offqd->flags = data & WORK_OFFQ_FLAG_MASK;
910	}
911
912	static unsigned long work_offqd_pack_flags(struct work_offq_data *offqd)
913	{
914	return ((unsigned long)offqd->disable << WORK_OFFQ_DISABLE_SHIFT) \|
915	((unsigned long)offqd->flags);
916	}
917
918	/*
919	* Policy functions. These define the policies on how the global worker
920	* pools are managed. Unless noted otherwise, these functions assume that
921	* they're being called with pool->lock held.
922	*/
923
924	/*
925	* Need to wake up a worker? Called from anything but currently
926	* running workers.
927	*
928	* Note that, because unbound workers never contribute to nr_running, this
929	* function will always return %true for unbound pools as long as the
930	* worklist isn't empty.
931	*/
932	static bool need_more_worker(struct worker_pool *pool)
933	{
934	return !list_empty(head: &pool->worklist) && !pool->nr_running;
935	}
936
937	/ Can I start working? Called from busy but !running workers. /
938	static bool may_start_working(struct worker_pool *pool)
939	{
940	return pool->nr_idle;
941	}
942
943	/ Do I need to keep working? Called from currently running workers. /
944	static bool keep_working(struct worker_pool *pool)
945	{
946	return !list_empty(head: &pool->worklist) && (pool->nr_running <= `1`);
947	}
948
949	/ Do we need a new worker? Called from manager. /
950	static bool need_to_create_worker(struct worker_pool *pool)
951	{
952	return need_more_worker(pool) && !may_start_working(pool);
953	}
954
955	/ Do we have too many workers and should some go away? /
956	static bool too_many_workers(struct worker_pool *pool)
957	{
958	bool managing = pool->flags & POOL_MANAGER_ACTIVE;
959	int nr_idle = pool->nr_idle + managing; / manager is considered idle /
960	int nr_busy = pool->nr_workers - nr_idle;
961
962	return nr_idle > `2` && (nr_idle - `2`) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
963	}
964
965	/**
966	* worker_set_flags - set worker flags and adjust nr_running accordingly
967	* @worker: self
968	* @flags: flags to set
969	*
970	* Set @flags in @worker->flags and adjust nr_running accordingly.
971	*/
972	static inline void worker_set_flags(struct worker worker, unsigned* int flags)
973	{
974	struct worker_pool *pool = worker->pool;
975
976	lockdep_assert_held(&pool->lock);
977
978	/ If transitioning into NOT_RUNNING, adjust nr_running. /
979	if ((flags & WORKER_NOT_RUNNING) &&
980	!(worker->flags & WORKER_NOT_RUNNING)) {
981	pool->nr_running--;
982	}
983
984	worker->flags \|= flags;
985	}
986
987	/**
988	* worker_clr_flags - clear worker flags and adjust nr_running accordingly
989	* @worker: self
990	* @flags: flags to clear
991	*
992	* Clear @flags in @worker->flags and adjust nr_running accordingly.
993	*/
994	static inline void worker_clr_flags(struct worker worker, unsigned* int flags)
995	{
996	struct worker_pool *pool = worker->pool;
997	unsigned int oflags = worker->flags;
998
999	lockdep_assert_held(&pool->lock);
1000
1001	worker->flags &= ~flags;
1002
1003	/*
1004	* If transitioning out of NOT_RUNNING, increment nr_running. Note
1005	* that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask
1006	* of multiple flags, not a single flag.
1007	*/
1008	if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
1009	if (!(worker->flags & WORKER_NOT_RUNNING))
1010	pool->nr_running++;
1011	}
1012
1013	/ Return the first idle worker. Called with pool->lock held. /
1014	static struct worker first_idle_worker(struct* worker_pool *pool)
1015	{
1016	if (unlikely(list_empty(&pool->idle_list)))
1017	return NULL;
1018
1019	return list_first_entry(&pool->idle_list, struct worker, entry);
1020	}
1021
1022	/**
1023	* worker_enter_idle - enter idle state
1024	* @worker: worker which is entering idle state
1025	*
1026	* @worker is entering idle state. Update stats and idle timer if
1027	* necessary.
1028	*
1029	* LOCKING:
1030	* raw_spin_lock_irq(pool->lock).
1031	*/
1032	static void worker_enter_idle(struct worker *worker)
1033	{
1034	struct worker_pool *pool = worker->pool;
1035
1036	if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) \|\|
1037	WARN_ON_ONCE(!list_empty(&worker->entry) &&
1038	(worker->hentry.next \|\| worker->hentry.pprev)))
1039	return;
1040
1041	/ can't use worker_set_flags(), also called from create_worker() /
1042	worker->flags \|= WORKER_IDLE;
1043	pool->nr_idle++;
1044	worker->last_active = jiffies;
1045
1046	/ idle_list is LIFO /
1047	list_add(new: &worker->entry, head: &pool->idle_list);
1048
1049	if (too_many_workers(pool) && !timer_pending(timer: &pool->idle_timer))
1050	mod_timer(timer: &pool->idle_timer, expires: jiffies + IDLE_WORKER_TIMEOUT);
1051
1052	/ Sanity check nr_running. /
1053	WARN_ON_ONCE(pool->nr_workers == pool->nr_idle && pool->nr_running);
1054	}
1055
1056	/**
1057	* worker_leave_idle - leave idle state
1058	* @worker: worker which is leaving idle state
1059	*
1060	* @worker is leaving idle state. Update stats.
1061	*
1062	* LOCKING:
1063	* raw_spin_lock_irq(pool->lock).
1064	*/
1065	static void worker_leave_idle(struct worker *worker)
1066	{
1067	struct worker_pool *pool = worker->pool;
1068
1069	if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
1070	return;
1071	worker_clr_flags(worker, flags: WORKER_IDLE);
1072	pool->nr_idle--;
1073	list_del_init(entry: &worker->entry);
1074	}
1075
1076	/**
1077	* find_worker_executing_work - find worker which is executing a work
1078	* @pool: pool of interest
1079	* @work: work to find worker for
1080	*
1081	* Find a worker which is executing @work on @pool by searching
1082	* @pool->busy_hash which is keyed by the address of @work. For a worker
1083	* to match, its current execution should match the address of @work and
1084	* its work function. This is to avoid unwanted dependency between
1085	* unrelated work executions through a work item being recycled while still
1086	* being executed.
1087	*
1088	* This is a bit tricky. A work item may be freed once its execution
1089	* starts and nothing prevents the freed area from being recycled for
1090	* another work item. If the same work item address ends up being reused
1091	* before the original execution finishes, workqueue will identify the
1092	* recycled work item as currently executing and make it wait until the
1093	* current execution finishes, introducing an unwanted dependency.
1094	*
1095	* This function checks the work item address and work function to avoid
1096	* false positives. Note that this isn't complete as one may construct a
1097	* work function which can introduce dependency onto itself through a
1098	* recycled work item. Well, if somebody wants to shoot oneself in the
1099	* foot that badly, there's only so much we can do, and if such deadlock
1100	* actually occurs, it should be easy to locate the culprit work function.
1101	*
1102	* CONTEXT:
1103	* raw_spin_lock_irq(pool->lock).
1104	*
1105	* Return:
1106	* Pointer to worker which is executing @work if found, %NULL
1107	* otherwise.
1108	*/
1109	static struct worker find_worker_executing_work(struct* worker_pool *pool,
1110	struct work_struct *work)
1111	{
1112	struct worker *worker;
1113
1114	hash_for_each_possible(pool->busy_hash, worker, hentry,
1115	(unsigned long)work)
1116	if (worker->current_work == work &&
1117	worker->current_func == work->func)
1118	return worker;
1119
1120	return NULL;
1121	}
1122
1123	/**
1124	* move_linked_works - move linked works to a list
1125	* @work: start of series of works to be scheduled
1126	* @head: target list to append @work to
1127	* @nextp: out parameter for nested worklist walking
1128	*
1129	* Schedule linked works starting from @work to @head. Work series to be
1130	* scheduled starts at @work and includes any consecutive work with
1131	* WORK_STRUCT_LINKED set in its predecessor. See assign_work() for details on
1132	* @nextp.
1133	*
1134	* CONTEXT:
1135	* raw_spin_lock_irq(pool->lock).
1136	*/
1137	static void move_linked_works(struct work_struct work, struct* list_head *head,
1138	struct work_struct **nextp)
1139	{
1140	struct work_struct *n;
1141
1142	/*
1143	* Linked worklist will always end before the end of the list,
1144	* use NULL for list head.
1145	*/
1146	list_for_each_entry_safe_from(work, n, NULL, entry) {
1147	list_move_tail(list: &work->entry, head);
1148	if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1149	break;
1150	}
1151
1152	/*
1153	* If we're already inside safe list traversal and have moved
1154	* multiple works to the scheduled queue, the next position
1155	* needs to be updated.
1156	*/
1157	if (nextp)
1158	*nextp = n;
1159	}
1160
1161	/**
1162	* assign_work - assign a work item and its linked work items to a worker
1163	* @work: work to assign
1164	* @worker: worker to assign to
1165	* @nextp: out parameter for nested worklist walking
1166	*
1167	* Assign @work and its linked work items to @worker. If @work is already being
1168	* executed by another worker in the same pool, it'll be punted there.
1169	*
1170	* If @nextp is not NULL, it's updated to point to the next work of the last
1171	* scheduled work. This allows assign_work() to be nested inside
1172	* list_for_each_entry_safe().
1173	*
1174	* Returns %true if @work was successfully assigned to @worker. %false if @work
1175	* was punted to another worker already executing it.
1176	*/
1177	static bool assign_work(struct work_struct work, struct* worker *worker,
1178	struct work_struct **nextp)
1179	{
1180	struct worker_pool *pool = worker->pool;
1181	struct worker *collision;
1182
1183	lockdep_assert_held(&pool->lock);
1184
1185	/*
1186	* A single work shouldn't be executed concurrently by multiple workers.
1187	* __queue_work() ensures that @work doesn't jump to a different pool
1188	* while still running in the previous pool. Here, we should ensure that
1189	* @work is not executed concurrently by multiple workers from the same
1190	* pool. Check whether anyone is already processing the work. If so,
1191	* defer the work to the currently executing one.
1192	*/
1193	collision = find_worker_executing_work(pool, work);
1194	if (unlikely(collision)) {
1195	move_linked_works(work, head: &collision->scheduled, nextp);
1196	return false;
1197	}
1198
1199	move_linked_works(work, head: &worker->scheduled, nextp);
1200	return true;
1201	}
1202
1203	static struct irq_work bh_pool_irq_work(struct* worker_pool *pool)
1204	{
1205	int high = pool->attrs->nice == HIGHPRI_NICE_LEVEL ? `1` : `0`;
1206
1207	return &per_cpu(bh_pool_irq_works, pool->cpu)[high];
1208	}
1209
1210	static void kick_bh_pool(struct worker_pool *pool)
1211	{
1212	#ifdef CONFIG_SMP
1213	/ see drain_dead_softirq_workfn() for BH_DRAINING /
1214	if (unlikely(pool->cpu != smp_processor_id() &&
1215	!(pool->flags & POOL_BH_DRAINING))) {
1216	irq_work_queue_on(work: bh_pool_irq_work(pool), cpu: pool->cpu);
1217	return;
1218	}
1219	#endif
1220	if (pool->attrs->nice == HIGHPRI_NICE_LEVEL)
1221	raise_softirq_irqoff(nr: HI_SOFTIRQ);
1222	else
1223	raise_softirq_irqoff(nr: TASKLET_SOFTIRQ);
1224	}
1225
1226	/**
1227	* kick_pool - wake up an idle worker if necessary
1228	* @pool: pool to kick
1229	*
1230	* @pool may have pending work items. Wake up worker if necessary. Returns
1231	* whether a worker was woken up.
1232	*/
1233	static bool kick_pool(struct worker_pool *pool)
1234	{
1235	struct worker *worker = first_idle_worker(pool);
1236	struct task_struct *p;
1237
1238	lockdep_assert_held(&pool->lock);
1239
1240	if (!need_more_worker(pool) \|\| !worker)
1241	return false;
1242
1243	if (pool->flags & POOL_BH) {
1244	kick_bh_pool(pool);
1245	return true;
1246	}
1247
1248	p = worker->task;
1249
1250	#ifdef CONFIG_SMP
1251	/*
1252	* Idle @worker is about to execute @work and waking up provides an
1253	* opportunity to migrate @worker at a lower cost by setting the task's
1254	* wake_cpu field. Let's see if we want to move @worker to improve
1255	* execution locality.
1256	*
1257	* We're waking the worker that went idle the latest and there's some
1258	* chance that @worker is marked idle but hasn't gone off CPU yet. If
1259	* so, setting the wake_cpu won't do anything. As this is a best-effort
1260	* optimization and the race window is narrow, let's leave as-is for
1261	* now. If this becomes pronounced, we can skip over workers which are
1262	* still on cpu when picking an idle worker.
1263	*
1264	* If @pool has non-strict affinity, @worker might have ended up outside
1265	* its affinity scope. Repatriate.
1266	*/
1267	if (!pool->attrs->affn_strict &&
1268	!cpumask_test_cpu(cpu: p->wake_cpu, cpumask: pool->attrs->__pod_cpumask)) {
1269	struct work_struct *work = list_first_entry(&pool->worklist,
1270	struct work_struct, entry);
1271	int wake_cpu = cpumask_any_and_distribute(src1p: pool->attrs->__pod_cpumask,
1272	cpu_online_mask);
1273	if (wake_cpu < nr_cpu_ids) {
1274	p->wake_cpu = wake_cpu;
1275	get_work_pwq(work)->stats[PWQ_STAT_REPATRIATED]++;
1276	}
1277	}
1278	#endif
1279	wake_up_process(tsk: p);
1280	return true;
1281	}
1282
1283	#ifdef CONFIG_WQ_CPU_INTENSIVE_REPORT
1284
1285	/*
1286	* Concurrency-managed per-cpu work items that hog CPU for longer than
1287	* wq_cpu_intensive_thresh_us trigger the automatic CPU_INTENSIVE mechanism,
1288	* which prevents them from stalling other concurrency-managed work items. If a
1289	* work function keeps triggering this mechanism, it's likely that the work item
1290	* should be using an unbound workqueue instead.
1291	*
1292	* wq_cpu_intensive_report() tracks work functions which trigger such conditions
1293	* and report them so that they can be examined and converted to use unbound
1294	* workqueues as appropriate. To avoid flooding the console, each violating work
1295	* function is tracked and reported with exponential backoff.
1296	*/
1297	#define WCI_MAX_ENTS 128
1298
1299	struct wci_ent {
1300	work_func_t func;
1301	atomic64_t cnt;
1302	struct hlist_node hash_node;
1303	};
1304
1305	static struct wci_ent wci_ents[WCI_MAX_ENTS];
1306	static int wci_nr_ents;
1307	static DEFINE_RAW_SPINLOCK(wci_lock);
1308	static DEFINE_HASHTABLE(wci_hash, ilog2(WCI_MAX_ENTS));
1309
1310	static struct wci_ent *wci_find_ent(work_func_t func)
1311	{
1312	struct wci_ent *ent;
1313
1314	hash_for_each_possible_rcu(wci_hash, ent, hash_node,
1315	(unsigned long)func) {
1316	if (ent->func == func)
1317	return ent;
1318	}
1319	return NULL;
1320	}
1321
1322	static void wq_cpu_intensive_report(work_func_t func)
1323	{
1324	struct wci_ent *ent;
1325
1326	restart:
1327	ent = wci_find_ent(func);
1328	if (ent) {
1329	u64 cnt;
1330
1331	/*
1332	* Start reporting from the warning_thresh and back off
1333	* exponentially.
1334	*/
1335	cnt = atomic64_inc_return_relaxed(v: &ent->cnt);
1336	if (wq_cpu_intensive_warning_thresh &&
1337	cnt >= wq_cpu_intensive_warning_thresh &&
1338	is_power_of_2(n: cnt + `1` - wq_cpu_intensive_warning_thresh))
1339	printk_deferred(KERN_WARNING "workqueue: %ps hogged CPU for >%luus %llu times, consider switching to WQ_UNBOUND\n",
1340	ent->func, wq_cpu_intensive_thresh_us,
1341	atomic64_read(&ent->cnt));
1342	return;
1343	}
1344
1345	/*
1346	* @func is a new violation. Allocate a new entry for it. If wcn_ents[]
1347	* is exhausted, something went really wrong and we probably made enough
1348	* noise already.
1349	*/
1350	if (wci_nr_ents >= WCI_MAX_ENTS)
1351	return;
1352
1353	raw_spin_lock(&wci_lock);
1354
1355	if (wci_nr_ents >= WCI_MAX_ENTS) {
1356	raw_spin_unlock(&wci_lock);
1357	return;
1358	}
1359
1360	if (wci_find_ent(func)) {
1361	raw_spin_unlock(&wci_lock);
1362	goto restart;
1363	}
1364
1365	ent = &wci_ents[wci_nr_ents++];
1366	ent->func = func;
1367	atomic64_set(v: &ent->cnt, i: `0`);
1368	hash_add_rcu(wci_hash, &ent->hash_node, (unsigned long)func);
1369
1370	raw_spin_unlock(&wci_lock);
1371
1372	goto restart;
1373	}
1374
1375	#else /* CONFIG_WQ_CPU_INTENSIVE_REPORT */
1376	static void wq_cpu_intensive_report(work_func_t func) {}
1377	#endif /* CONFIG_WQ_CPU_INTENSIVE_REPORT */
1378
1379	/**
1380	* wq_worker_running - a worker is running again
1381	* @task: task waking up
1382	*
1383	* This function is called when a worker returns from schedule()
1384	*/
1385	void wq_worker_running(struct task_struct *task)
1386	{
1387	struct worker *worker = kthread_data(k: task);
1388
1389	if (!READ_ONCE(worker->sleeping))
1390	return;
1391
1392	/*
1393	* If preempted by unbind_workers() between the WORKER_NOT_RUNNING check
1394	* and the nr_running increment below, we may ruin the nr_running reset
1395	* and leave with an unexpected pool->nr_running == 1 on the newly unbound
1396	* pool. Protect against such race.
1397	*/
1398	preempt_disable();
1399	if (!(worker->flags & WORKER_NOT_RUNNING))
1400	worker->pool->nr_running++;
1401	preempt_enable();
1402
1403	/*
1404	* CPU intensive auto-detection cares about how long a work item hogged
1405	* CPU without sleeping. Reset the starting timestamp on wakeup.
1406	*/
1407	worker->current_at = worker->task->se.sum_exec_runtime;
1408
1409	WRITE_ONCE(worker->sleeping, `0`);
1410	}
1411
1412	/**
1413	* wq_worker_sleeping - a worker is going to sleep
1414	* @task: task going to sleep
1415	*
1416	* This function is called from schedule() when a busy worker is
1417	* going to sleep.
1418	*/
1419	void wq_worker_sleeping(struct task_struct *task)
1420	{
1421	struct worker *worker = kthread_data(k: task);
1422	struct worker_pool *pool;
1423
1424	/*
1425	* Rescuers, which may not have all the fields set up like normal
1426	* workers, also reach here, let's not access anything before
1427	* checking NOT_RUNNING.
1428	*/
1429	if (worker->flags & WORKER_NOT_RUNNING)
1430	return;
1431
1432	pool = worker->pool;
1433
1434	/ Return if preempted before wq_worker_running() was reached /
1435	if (READ_ONCE(worker->sleeping))
1436	return;
1437
1438	WRITE_ONCE(worker->sleeping, `1`);
1439	raw_spin_lock_irq(&pool->lock);
1440
1441	/*
1442	* Recheck in case unbind_workers() preempted us. We don't
1443	* want to decrement nr_running after the worker is unbound
1444	* and nr_running has been reset.
1445	*/
1446	if (worker->flags & WORKER_NOT_RUNNING) {
1447	raw_spin_unlock_irq(&pool->lock);
1448	return;
1449	}
1450
1451	pool->nr_running--;
1452	if (kick_pool(pool))
1453	worker->current_pwq->stats[PWQ_STAT_CM_WAKEUP]++;
1454
1455	raw_spin_unlock_irq(&pool->lock);
1456	}
1457
1458	/**
1459	* wq_worker_tick - a scheduler tick occurred while a kworker is running
1460	* @task: task currently running
1461	*
1462	* Called from sched_tick(). We're in the IRQ context and the current
1463	* worker's fields which follow the 'K' locking rule can be accessed safely.
1464	*/
1465	void wq_worker_tick(struct task_struct *task)
1466	{
1467	struct worker *worker = kthread_data(k: task);
1468	struct pool_workqueue *pwq = worker->current_pwq;
1469	struct worker_pool *pool = worker->pool;
1470
1471	if (!pwq)
1472	return;
1473
1474	pwq->stats[PWQ_STAT_CPU_TIME] += TICK_USEC;
1475
1476	if (!wq_cpu_intensive_thresh_us)
1477	return;
1478
1479	/*
1480	* If the current worker is concurrency managed and hogged the CPU for
1481	* longer than wq_cpu_intensive_thresh_us, it's automatically marked
1482	* CPU_INTENSIVE to avoid stalling other concurrency-managed work items.
1483	*
1484	* Set @worker->sleeping means that @worker is in the process of
1485	* switching out voluntarily and won't be contributing to
1486	* @pool->nr_running until it wakes up. As wq_worker_sleeping() also
1487	* decrements ->nr_running, setting CPU_INTENSIVE here can lead to
1488	* double decrements. The task is releasing the CPU anyway. Let's skip.
1489	* We probably want to make this prettier in the future.
1490	*/
1491	if ((worker->flags & WORKER_NOT_RUNNING) \|\| READ_ONCE(worker->sleeping) \|\|
1492	worker->task->se.sum_exec_runtime - worker->current_at <
1493	wq_cpu_intensive_thresh_us * NSEC_PER_USEC)
1494	return;
1495
1496	raw_spin_lock(&pool->lock);
1497
1498	worker_set_flags(worker, flags: WORKER_CPU_INTENSIVE);
1499	wq_cpu_intensive_report(func: worker->current_func);
1500	pwq->stats[PWQ_STAT_CPU_INTENSIVE]++;
1501
1502	if (kick_pool(pool))
1503	pwq->stats[PWQ_STAT_CM_WAKEUP]++;
1504
1505	raw_spin_unlock(&pool->lock);
1506	}
1507
1508	/**
1509	* wq_worker_last_func - retrieve worker's last work function
1510	* @task: Task to retrieve last work function of.
1511	*
1512	* Determine the last function a worker executed. This is called from
1513	* the scheduler to get a worker's last known identity.
1514	*
1515	* CONTEXT:
1516	* raw_spin_lock_irq(rq->lock)
1517	*
1518	* This function is called during schedule() when a kworker is going
1519	* to sleep. It's used by psi to identify aggregation workers during
1520	* dequeuing, to allow periodic aggregation to shut-off when that
1521	* worker is the last task in the system or cgroup to go to sleep.
1522	*
1523	* As this function doesn't involve any workqueue-related locking, it
1524	* only returns stable values when called from inside the scheduler's
1525	* queuing and dequeuing paths, when @task, which must be a kworker,
1526	* is guaranteed to not be processing any works.
1527	*
1528	* Return:
1529	* The last work function %current executed as a worker, NULL if it
1530	* hasn't executed any work yet.
1531	*/
1532	work_func_t wq_worker_last_func(struct task_struct *task)
1533	{
1534	struct worker *worker = kthread_data(k: task);
1535
1536	return worker->last_func;
1537	}
1538
1539	/**
1540	* wq_node_nr_active - Determine wq_node_nr_active to use
1541	* @wq: workqueue of interest
1542	* @node: NUMA node, can be %NUMA_NO_NODE
1543	*
1544	* Determine wq_node_nr_active to use for @wq on @node. Returns:
1545	*
1546	* - %NULL for per-cpu workqueues as they don't need to use shared nr_active.
1547	*
1548	* - node_nr_active[nr_node_ids] if @node is %NUMA_NO_NODE.
1549	*
1550	* - Otherwise, node_nr_active[@node].
1551	*/
1552	static struct wq_node_nr_active wq_node_nr_active(struct* workqueue_struct *wq,
1553	int node)
1554	{
1555	if (!(wq->flags & WQ_UNBOUND))
1556	return NULL;
1557
1558	if (node == NUMA_NO_NODE)
1559	node = nr_node_ids;
1560
1561	return wq->node_nr_active[node];
1562	}
1563
1564	/**
1565	* wq_update_node_max_active - Update per-node max_actives to use
1566	* @wq: workqueue to update
1567	* @off_cpu: CPU that's going down, -1 if a CPU is not going down
1568	*
1569	* Update @wq->node_nr_active[]->max. @wq must be unbound. max_active is
1570	* distributed among nodes according to the proportions of numbers of online
1571	* cpus. The result is always between @wq->min_active and max_active.
1572	*/
1573	static void wq_update_node_max_active(struct workqueue_struct wq, int* off_cpu)
1574	{
1575	struct cpumask *effective = unbound_effective_cpumask(wq);
1576	int min_active = READ_ONCE(wq->min_active);
1577	int max_active = READ_ONCE(wq->max_active);
1578	int total_cpus, node;
1579
1580	lockdep_assert_held(&wq->mutex);
1581
1582	if (!wq_topo_initialized)
1583	return;
1584
1585	if (off_cpu >= `0` && !cpumask_test_cpu(cpu: off_cpu, cpumask: effective))
1586	off_cpu = -`1`;
1587
1588	total_cpus = cpumask_weight_and(srcp1: effective, cpu_online_mask);
1589	if (off_cpu >= `0`)
1590	total_cpus--;
1591
1592	/ If all CPUs of the wq get offline, use the default values /
1593	if (unlikely(!total_cpus)) {
1594	for_each_node(node)
1595	wq_node_nr_active(wq, node)->max = min_active;
1596
1597	wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active;
1598	return;
1599	}
1600
1601	for_each_node(node) {
1602	int node_cpus;
1603
1604	node_cpus = cpumask_weight_and(srcp1: effective, srcp2: cpumask_of_node(node));
1605	if (off_cpu >= `0` && cpu_to_node(cpu: off_cpu) == node)
1606	node_cpus--;
1607
1608	wq_node_nr_active(wq, node)->max =
1609	clamp(DIV_ROUND_UP(max_active * node_cpus, total_cpus),
1610	min_active, max_active);
1611	}
1612
1613	wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active;
1614	}
1615
1616	/**
1617	* get_pwq - get an extra reference on the specified pool_workqueue
1618	* @pwq: pool_workqueue to get
1619	*
1620	* Obtain an extra reference on @pwq. The caller should guarantee that
1621	* @pwq has positive refcnt and be holding the matching pool->lock.
1622	*/
1623	static void get_pwq(struct pool_workqueue *pwq)
1624	{
1625	lockdep_assert_held(&pwq->pool->lock);
1626	WARN_ON_ONCE(pwq->refcnt <= `0`);
1627	pwq->refcnt++;
1628	}
1629
1630	/**
1631	* put_pwq - put a pool_workqueue reference
1632	* @pwq: pool_workqueue to put
1633	*
1634	* Drop a reference of @pwq. If its refcnt reaches zero, schedule its
1635	* destruction. The caller should be holding the matching pool->lock.
1636	*/
1637	static void put_pwq(struct pool_workqueue *pwq)
1638	{
1639	lockdep_assert_held(&pwq->pool->lock);
1640	if (likely(--pwq->refcnt))
1641	return;
1642	/*
1643	* @pwq can't be released under pool->lock, bounce to a dedicated
1644	* kthread_worker to avoid A-A deadlocks.
1645	*/
1646	kthread_queue_work(worker: pwq_release_worker, work: &pwq->release_work);
1647	}
1648
1649	/**
1650	* put_pwq_unlocked - put_pwq() with surrounding pool lock/unlock
1651	* @pwq: pool_workqueue to put (can be %NULL)
1652	*
1653	* put_pwq() with locking. This function also allows %NULL @pwq.
1654	*/
1655	static void put_pwq_unlocked(struct pool_workqueue *pwq)
1656	{
1657	if (pwq) {
1658	/*
1659	* As both pwqs and pools are RCU protected, the
1660	* following lock operations are safe.
1661	*/
1662	raw_spin_lock_irq(&pwq->pool->lock);
1663	put_pwq(pwq);
1664	raw_spin_unlock_irq(&pwq->pool->lock);
1665	}
1666	}
1667
1668	static bool pwq_is_empty(struct pool_workqueue *pwq)
1669	{
1670	return !pwq->nr_active && list_empty(head: &pwq->inactive_works);
1671	}
1672
1673	static void __pwq_activate_work(struct pool_workqueue *pwq,
1674	struct work_struct *work)
1675	{
1676	unsigned long *wdb = work_data_bits(work);
1677
1678	WARN_ON_ONCE(!(*wdb & WORK_STRUCT_INACTIVE));
1679	trace_workqueue_activate_work(work);
1680	if (list_empty(head: &pwq->pool->worklist))
1681	pwq->pool->watchdog_ts = jiffies;
1682	move_linked_works(work, head: &pwq->pool->worklist, NULL);
1683	__clear_bit(WORK_STRUCT_INACTIVE_BIT, wdb);
1684	}
1685
1686	static bool tryinc_node_nr_active(struct wq_node_nr_active *nna)
1687	{
1688	int max = READ_ONCE(nna->max);
1689
1690	while (true) {
1691	int old, tmp;
1692
1693	old = atomic_read(v: &nna->nr);
1694	if (old >= max)
1695	return false;
1696	tmp = atomic_cmpxchg_relaxed(v: &nna->nr, old, new: old + `1`);
1697	if (tmp == old)
1698	return true;
1699	}
1700	}
1701
1702	/**
1703	* pwq_tryinc_nr_active - Try to increment nr_active for a pwq
1704	* @pwq: pool_workqueue of interest
1705	* @fill: max_active may have increased, try to increase concurrency level
1706	*
1707	* Try to increment nr_active for @pwq. Returns %true if an nr_active count is
1708	* successfully obtained. %false otherwise.
1709	*/
1710	static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq, bool fill)
1711	{
1712	struct workqueue_struct *wq = pwq->wq;
1713	struct worker_pool *pool = pwq->pool;
1714	struct wq_node_nr_active *nna = wq_node_nr_active(wq, node: pool->node);
1715	bool obtained = false;
1716
1717	lockdep_assert_held(&pool->lock);
1718
1719	if (!nna) {
1720	/ BH or per-cpu workqueue, pwq->nr_active is sufficient /
1721	obtained = pwq->nr_active < READ_ONCE(wq->max_active);
1722	goto out;
1723	}
1724
1725	if (unlikely(pwq->plugged))
1726	return false;
1727
1728	/*
1729	* Unbound workqueue uses per-node shared nr_active $nna. If @pwq is
1730	* already waiting on $nna, pwq_dec_nr_active() will maintain the
1731	* concurrency level. Don't jump the line.
1732	*
1733	* We need to ignore the pending test after max_active has increased as
1734	* pwq_dec_nr_active() can only maintain the concurrency level but not
1735	* increase it. This is indicated by @fill.
1736	*/
1737	if (!list_empty(head: &pwq->pending_node) && likely(!fill))
1738	goto out;
1739
1740	obtained = tryinc_node_nr_active(nna);
1741	if (obtained)
1742	goto out;
1743
1744	/*
1745	* Lockless acquisition failed. Lock, add ourself to $nna->pending_pwqs
1746	* and try again. The smp_mb() is paired with the implied memory barrier
1747	* of atomic_dec_return() in pwq_dec_nr_active() to ensure that either
1748	* we see the decremented $nna->nr or they see non-empty
1749	* $nna->pending_pwqs.
1750	*/
1751	raw_spin_lock(&nna->lock);
1752
1753	if (list_empty(head: &pwq->pending_node))
1754	list_add_tail(new: &pwq->pending_node, head: &nna->pending_pwqs);
1755	else if (likely(!fill))
1756	goto out_unlock;
1757
1758	smp_mb();
1759
1760	obtained = tryinc_node_nr_active(nna);
1761
1762	/*
1763	* If @fill, @pwq might have already been pending. Being spuriously
1764	* pending in cold paths doesn't affect anything. Let's leave it be.
1765	*/
1766	if (obtained && likely(!fill))
1767	list_del_init(entry: &pwq->pending_node);
1768
1769	out_unlock:
1770	raw_spin_unlock(&nna->lock);
1771	out:
1772	if (obtained)
1773	pwq->nr_active++;
1774	return obtained;
1775	}
1776
1777	/**
1778	* pwq_activate_first_inactive - Activate the first inactive work item on a pwq
1779	* @pwq: pool_workqueue of interest
1780	* @fill: max_active may have increased, try to increase concurrency level
1781	*
1782	* Activate the first inactive work item of @pwq if available and allowed by
1783	* max_active limit.
1784	*
1785	* Returns %true if an inactive work item has been activated. %false if no
1786	* inactive work item is found or max_active limit is reached.
1787	*/
1788	static bool pwq_activate_first_inactive(struct pool_workqueue *pwq, bool fill)
1789	{
1790	struct work_struct *work =
1791	list_first_entry_or_null(&pwq->inactive_works,
1792	struct work_struct, entry);
1793
1794	if (work && pwq_tryinc_nr_active(pwq, fill)) {
1795	__pwq_activate_work(pwq, work);
1796	return true;
1797	} else {
1798	return false;
1799	}
1800	}
1801
1802	/**
1803	* unplug_oldest_pwq - unplug the oldest pool_workqueue
1804	* @wq: workqueue_struct where its oldest pwq is to be unplugged
1805	*
1806	* This function should only be called for ordered workqueues where only the
1807	* oldest pwq is unplugged, the others are plugged to suspend execution to
1808	* ensure proper work item ordering::
1809	*
1810	* dfl_pwq --------------+ [P] - plugged
1811	* \|
1812	* v
1813	* pwqs -> A -> B [P] -> C [P] (newest)
1814	* \| \| \|
1815	* 1 3 5
1816	* \| \| \|
1817	* 2 4 6
1818	*
1819	* When the oldest pwq is drained and removed, this function should be called
1820	* to unplug the next oldest one to start its work item execution. Note that
1821	* pwq's are linked into wq->pwqs with the oldest first, so the first one in
1822	* the list is the oldest.
1823	*/
1824	static void unplug_oldest_pwq(struct workqueue_struct *wq)
1825	{
1826	struct pool_workqueue *pwq;
1827
1828	lockdep_assert_held(&wq->mutex);
1829
1830	/ Caller should make sure that pwqs isn't empty before calling /
1831	pwq = list_first_entry_or_null(&wq->pwqs, struct pool_workqueue,
1832	pwqs_node);
1833	raw_spin_lock_irq(&pwq->pool->lock);
1834	if (pwq->plugged) {
1835	pwq->plugged = false;
1836	if (pwq_activate_first_inactive(pwq, fill: true))
1837	kick_pool(pool: pwq->pool);
1838	}
1839	raw_spin_unlock_irq(&pwq->pool->lock);
1840	}
1841
1842	/**
1843	* node_activate_pending_pwq - Activate a pending pwq on a wq_node_nr_active
1844	* @nna: wq_node_nr_active to activate a pending pwq for
1845	* @caller_pool: worker_pool the caller is locking
1846	*
1847	* Activate a pwq in @nna->pending_pwqs. Called with @caller_pool locked.
1848	* @caller_pool may be unlocked and relocked to lock other worker_pools.
1849	*/
1850	static void node_activate_pending_pwq(struct wq_node_nr_active *nna,
1851	struct worker_pool *caller_pool)
1852	{
1853	struct worker_pool *locked_pool = caller_pool;
1854	struct pool_workqueue *pwq;
1855	struct work_struct *work;
1856
1857	lockdep_assert_held(&caller_pool->lock);
1858
1859	raw_spin_lock(&nna->lock);
1860	retry:
1861	pwq = list_first_entry_or_null(&nna->pending_pwqs,
1862	struct pool_workqueue, pending_node);
1863	if (!pwq)
1864	goto out_unlock;
1865
1866	/*
1867	* If @pwq is for a different pool than @locked_pool, we need to lock
1868	* @pwq->pool->lock. Let's trylock first. If unsuccessful, do the unlock
1869	* / lock dance. For that, we also need to release @nna->lock as it's
1870	* nested inside pool locks.
1871	*/
1872	if (pwq->pool != locked_pool) {
1873	raw_spin_unlock(&locked_pool->lock);
1874	locked_pool = pwq->pool;
1875	if (!raw_spin_trylock(&locked_pool->lock)) {
1876	raw_spin_unlock(&nna->lock);
1877	raw_spin_lock(&locked_pool->lock);
1878	raw_spin_lock(&nna->lock);
1879	goto retry;
1880	}
1881	}
1882
1883	/*
1884	* $pwq may not have any inactive work items due to e.g. cancellations.
1885	* Drop it from pending_pwqs and see if there's another one.
1886	*/
1887	work = list_first_entry_or_null(&pwq->inactive_works,
1888	struct work_struct, entry);
1889	if (!work) {
1890	list_del_init(entry: &pwq->pending_node);
1891	goto retry;
1892	}
1893
1894	/*
1895	* Acquire an nr_active count and activate the inactive work item. If
1896	* $pwq still has inactive work items, rotate it to the end of the
1897	* pending_pwqs so that we round-robin through them. This means that
1898	* inactive work items are not activated in queueing order which is fine
1899	* given that there has never been any ordering across different pwqs.
1900	*/
1901	if (likely(tryinc_node_nr_active(nna))) {
1902	pwq->nr_active++;
1903	__pwq_activate_work(pwq, work);
1904
1905	if (list_empty(head: &pwq->inactive_works))
1906	list_del_init(entry: &pwq->pending_node);
1907	else
1908	list_move_tail(list: &pwq->pending_node, head: &nna->pending_pwqs);
1909
1910	/ if activating a foreign pool, make sure it's running /
1911	if (pwq->pool != caller_pool)
1912	kick_pool(pool: pwq->pool);
1913	}
1914
1915	out_unlock:
1916	raw_spin_unlock(&nna->lock);
1917	if (locked_pool != caller_pool) {
1918	raw_spin_unlock(&locked_pool->lock);
1919	raw_spin_lock(&caller_pool->lock);
1920	}
1921	}
1922
1923	/**
1924	* pwq_dec_nr_active - Retire an active count
1925	* @pwq: pool_workqueue of interest
1926	*
1927	* Decrement @pwq's nr_active and try to activate the first inactive work item.
1928	* For unbound workqueues, this function may temporarily drop @pwq->pool->lock.
1929	*/
1930	static void pwq_dec_nr_active(struct pool_workqueue *pwq)
1931	{
1932	struct worker_pool *pool = pwq->pool;
1933	struct wq_node_nr_active *nna = wq_node_nr_active(wq: pwq->wq, node: pool->node);
1934
1935	lockdep_assert_held(&pool->lock);
1936
1937	/*
1938	* @pwq->nr_active should be decremented for both percpu and unbound
1939	* workqueues.
1940	*/
1941	pwq->nr_active--;
1942
1943	/*
1944	* For a percpu workqueue, it's simple. Just need to kick the first
1945	* inactive work item on @pwq itself.
1946	*/
1947	if (!nna) {
1948	pwq_activate_first_inactive(pwq, fill: false);
1949	return;
1950	}
1951
1952	/*
1953	* If @pwq is for an unbound workqueue, it's more complicated because
1954	* multiple pwqs and pools may be sharing the nr_active count. When a
1955	* pwq needs to wait for an nr_active count, it puts itself on
1956	* $nna->pending_pwqs. The following atomic_dec_return()'s implied
1957	* memory barrier is paired with smp_mb() in pwq_tryinc_nr_active() to
1958	* guarantee that either we see non-empty pending_pwqs or they see
1959	* decremented $nna->nr.
1960	*
1961	* $nna->max may change as CPUs come online/offline and @pwq->wq's
1962	* max_active gets updated. However, it is guaranteed to be equal to or
1963	* larger than @pwq->wq->min_active which is above zero unless freezing.
1964	* This maintains the forward progress guarantee.
1965	*/
1966	if (atomic_dec_return(v: &nna->nr) >= READ_ONCE(nna->max))
1967	return;
1968
1969	if (!list_empty(head: &nna->pending_pwqs))
1970	node_activate_pending_pwq(nna, caller_pool: pool);
1971	}
1972
1973	/**
1974	* pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
1975	* @pwq: pwq of interest
1976	* @work_data: work_data of work which left the queue
1977	*
1978	* A work either has completed or is removed from pending queue,
1979	* decrement nr_in_flight of its pwq and handle workqueue flushing.
1980	*
1981	* NOTE:
1982	* For unbound workqueues, this function may temporarily drop @pwq->pool->lock
1983	* and thus should be called after all other state updates for the in-flight
1984	* work item is complete.
1985	*
1986	* CONTEXT:
1987	* raw_spin_lock_irq(pool->lock).
1988	*/
1989	static void pwq_dec_nr_in_flight(struct pool_workqueue pwq, unsigned* long work_data)
1990	{
1991	int color = get_work_color(work_data);
1992
1993	if (!(work_data & WORK_STRUCT_INACTIVE))
1994	pwq_dec_nr_active(pwq);
1995
1996	pwq->nr_in_flight[color]--;
1997
1998	/ is flush in progress and are we at the flushing tip? /
1999	if (likely(pwq->flush_color != color))
2000	goto out_put;
2001
2002	/ are there still in-flight works? /
2003	if (pwq->nr_in_flight[color])
2004	goto out_put;
2005
2006	/ this pwq is done, clear flush_color /
2007	pwq->flush_color = -`1`;
2008
2009	/*
2010	* If this was the last pwq, wake up the first flusher. It
2011	* will handle the rest.
2012	*/
2013	if (atomic_dec_and_test(v: &pwq->wq->nr_pwqs_to_flush))
2014	complete(&pwq->wq->first_flusher->done);
2015	out_put:
2016	put_pwq(pwq);
2017	}
2018
2019	/**
2020	* try_to_grab_pending - steal work item from worklist and disable irq
2021	* @work: work item to steal
2022	* @cflags: %WORK_CANCEL_ flags
2023	* @irq_flags: place to store irq state
2024	*
2025	* Try to grab PENDING bit of @work. This function can handle @work in any
2026	* stable state - idle, on timer or on worklist.
2027	*
2028	* Return:
2029	*
2030	* ======== ================================================================
2031	* 1 if @work was pending and we successfully stole PENDING
2032	* 0 if @work was idle and we claimed PENDING
2033	* -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
2034	* ======== ================================================================
2035	*
2036	* Note:
2037	* On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
2038	* interrupted while holding PENDING and @work off queue, irq must be
2039	* disabled on entry. This, combined with delayed_work->timer being
2040	* irqsafe, ensures that we return -EAGAIN for finite short period of time.
2041	*
2042	* On successful return, >= 0, irq is disabled and the caller is
2043	* responsible for releasing it using local_irq_restore(*@irq_flags).
2044	*
2045	* This function is safe to call from any context including IRQ handler.
2046	*/
2047	static int try_to_grab_pending(struct work_struct *work, u32 cflags,
2048	unsigned long *irq_flags)
2049	{
2050	struct worker_pool *pool;
2051	struct pool_workqueue *pwq;
2052
2053	local_irq_save(*irq_flags);
2054
2055	/ try to steal the timer if it exists /
2056	if (cflags & WORK_CANCEL_DELAYED) {
2057	struct delayed_work *dwork = to_delayed_work(work);
2058
2059	/*
2060	* dwork->timer is irqsafe. If timer_delete() fails, it's
2061	* guaranteed that the timer is not queued anywhere and not
2062	* running on the local CPU.
2063	*/
2064	if (likely(timer_delete(&dwork->timer)))
2065	return `1`;
2066	}
2067
2068	/ try to claim PENDING the normal way /
2069	if (!test_and_set_bit(nr: WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
2070	return `0`;
2071
2072	rcu_read_lock();
2073	/*
2074	* The queueing is in progress, or it is already queued. Try to
2075	* steal it from ->worklist without clearing WORK_STRUCT_PENDING.
2076	*/
2077	pool = get_work_pool(work);
2078	if (!pool)
2079	goto fail;
2080
2081	raw_spin_lock(&pool->lock);
2082	/*
2083	* work->data is guaranteed to point to pwq only while the work
2084	* item is queued on pwq->wq, and both updating work->data to point
2085	* to pwq on queueing and to pool on dequeueing are done under
2086	* pwq->pool->lock. This in turn guarantees that, if work->data
2087	* points to pwq which is associated with a locked pool, the work
2088	* item is currently queued on that pool.
2089	*/
2090	pwq = get_work_pwq(work);
2091	if (pwq && pwq->pool == pool) {
2092	unsigned long work_data = *work_data_bits(work);
2093
2094	debug_work_deactivate(work);
2095
2096	/*
2097	* A cancelable inactive work item must be in the
2098	* pwq->inactive_works since a queued barrier can't be
2099	* canceled (see the comments in insert_wq_barrier()).
2100	*
2101	* An inactive work item cannot be deleted directly because
2102	* it might have linked barrier work items which, if left
2103	* on the inactive_works list, will confuse pwq->nr_active
2104	* management later on and cause stall. Move the linked
2105	* barrier work items to the worklist when deleting the grabbed
2106	* item. Also keep WORK_STRUCT_INACTIVE in work_data, so that
2107	* it doesn't participate in nr_active management in later
2108	* pwq_dec_nr_in_flight().
2109	*/
2110	if (work_data & WORK_STRUCT_INACTIVE)
2111	move_linked_works(work, head: &pwq->pool->worklist, NULL);
2112
2113	list_del_init(entry: &work->entry);
2114
2115	/*
2116	* work->data points to pwq iff queued. Let's point to pool. As
2117	* this destroys work->data needed by the next step, stash it.
2118	*/
2119	set_work_pool_and_keep_pending(work, pool_id: pool->id,
2120	flags: pool_offq_flags(pool));
2121
2122	/ must be the last step, see the function comment /
2123	pwq_dec_nr_in_flight(pwq, work_data);
2124
2125	raw_spin_unlock(&pool->lock);
2126	rcu_read_unlock();
2127	return `1`;
2128	}
2129	raw_spin_unlock(&pool->lock);
2130	fail:
2131	rcu_read_unlock();
2132	local_irq_restore(*irq_flags);
2133	return -EAGAIN;
2134	}
2135
2136	/**
2137	* work_grab_pending - steal work item from worklist and disable irq
2138	* @work: work item to steal
2139	* @cflags: %WORK_CANCEL_ flags
2140	* @irq_flags: place to store IRQ state
2141	*
2142	* Grab PENDING bit of @work. @work can be in any stable state - idle, on timer
2143	* or on worklist.
2144	*
2145	* Can be called from any context. IRQ is disabled on return with IRQ state
2146	* stored in *@irq_flags. The caller is responsible for re-enabling it using
2147	* local_irq_restore().
2148	*
2149	* Returns %true if @work was pending. %false if idle.
2150	*/
2151	static bool work_grab_pending(struct work_struct *work, u32 cflags,
2152	unsigned long *irq_flags)
2153	{
2154	int ret;
2155
2156	while (true) {
2157	ret = try_to_grab_pending(work, cflags, irq_flags);
2158	if (ret >= `0`)
2159	return ret;
2160	cpu_relax();
2161	}
2162	}
2163
2164	/**
2165	* insert_work - insert a work into a pool
2166	* @pwq: pwq @work belongs to
2167	* @work: work to insert
2168	* @head: insertion point
2169	* @extra_flags: extra WORK_STRUCT_* flags to set
2170	*
2171	* Insert @work which belongs to @pwq after @head. @extra_flags is or'd to
2172	* work_struct flags.
2173	*
2174	* CONTEXT:
2175	* raw_spin_lock_irq(pool->lock).
2176	*/
2177	static void insert_work(struct pool_workqueue pwq, struct* work_struct *work,
2178	struct list_head head, unsigned* int extra_flags)
2179	{
2180	debug_work_activate(work);
2181
2182	/ record the work call stack in order to print it in KASAN reports /
2183	kasan_record_aux_stack(ptr: work);
2184
2185	/ we own @work, set data and link /
2186	set_work_pwq(work, pwq, flags: extra_flags);
2187	list_add_tail(new: &work->entry, head);
2188	get_pwq(pwq);
2189	}
2190
2191	/*
2192	* Test whether @work is being queued from another work executing on the
2193	* same workqueue.
2194	*/
2195	static bool is_chained_work(struct workqueue_struct *wq)
2196	{
2197	struct worker *worker;
2198
2199	worker = current_wq_worker();
2200	/*
2201	* Return %true iff I'm a worker executing a work item on @wq. If
2202	* I'm @worker, it's safe to dereference it without locking.
2203	*/
2204	return worker && worker->current_pwq->wq == wq;
2205	}
2206
2207	/*
2208	* When queueing an unbound work item to a wq, prefer local CPU if allowed
2209	* by wq_unbound_cpumask. Otherwise, round robin among the allowed ones to
2210	* avoid perturbing sensitive tasks.
2211	*/
2212	static int wq_select_unbound_cpu(int cpu)
2213	{
2214	int new_cpu;
2215
2216	if (likely(!wq_debug_force_rr_cpu)) {
2217	if (cpumask_test_cpu(cpu, cpumask: wq_unbound_cpumask))
2218	return cpu;
2219	} else {
2220	pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n");
2221	}
2222
2223	new_cpu = __this_cpu_read(wq_rr_cpu_last);
2224	new_cpu = cpumask_next_and(n: new_cpu, src1p: wq_unbound_cpumask, cpu_online_mask);
2225	if (unlikely(new_cpu >= nr_cpu_ids)) {
2226	new_cpu = cpumask_first_and(srcp1: wq_unbound_cpumask, cpu_online_mask);
2227	if (unlikely(new_cpu >= nr_cpu_ids))
2228	return cpu;
2229	}
2230	__this_cpu_write(wq_rr_cpu_last, new_cpu);
2231
2232	return new_cpu;
2233	}
2234
2235	static void __queue_work(int cpu, struct workqueue_struct *wq,
2236	struct work_struct *work)
2237	{
2238	struct pool_workqueue *pwq;
2239	struct worker_pool last_pool, pool;
2240	unsigned int work_flags;
2241	unsigned int req_cpu = cpu;
2242
2243	/*
2244	* While a work item is PENDING && off queue, a task trying to
2245	* steal the PENDING will busy-loop waiting for it to either get
2246	* queued or lose PENDING. Grabbing PENDING and queueing should
2247	* happen with IRQ disabled.
2248	*/
2249	lockdep_assert_irqs_disabled();
2250
2251	/*
2252	* For a draining wq, only works from the same workqueue are
2253	* allowed. The __WQ_DESTROYING helps to spot the issue that
2254	* queues a new work item to a wq after destroy_workqueue(wq).
2255	*/
2256	if (unlikely(wq->flags & (__WQ_DESTROYING \| __WQ_DRAINING) &&
2257	WARN_ONCE(!is_chained_work(wq), "workqueue: cannot queue %ps on wq %s\n",
2258	work->func, wq->name))) {
2259	return;
2260	}
2261	rcu_read_lock();
2262	retry:
2263	/ pwq which will be used unless @work is executing elsewhere /
2264	if (req_cpu == WORK_CPU_UNBOUND) {
2265	if (wq->flags & WQ_UNBOUND)
2266	cpu = wq_select_unbound_cpu(raw_smp_processor_id());
2267	else
2268	cpu = raw_smp_processor_id();
2269	}
2270
2271	pwq = rcu_dereference(*per_cpu_ptr(wq->cpu_pwq, cpu));
2272	pool = pwq->pool;
2273
2274	/*
2275	* If @work was previously on a different pool, it might still be
2276	* running there, in which case the work needs to be queued on that
2277	* pool to guarantee non-reentrancy.
2278	*
2279	* For ordered workqueue, work items must be queued on the newest pwq
2280	* for accurate order management. Guaranteed order also guarantees
2281	* non-reentrancy. See the comments above unplug_oldest_pwq().
2282	*/
2283	last_pool = get_work_pool(work);
2284	if (last_pool && last_pool != pool && !(wq->flags & __WQ_ORDERED)) {
2285	struct worker *worker;
2286
2287	raw_spin_lock(&last_pool->lock);
2288
2289	worker = find_worker_executing_work(pool: last_pool, work);
2290
2291	if (worker && worker->current_pwq->wq == wq) {
2292	pwq = worker->current_pwq;
2293	pool = pwq->pool;
2294	WARN_ON_ONCE(pool != last_pool);
2295	} else {
2296	/ meh... not running there, queue here /
2297	raw_spin_unlock(&last_pool->lock);
2298	raw_spin_lock(&pool->lock);
2299	}
2300	} else {
2301	raw_spin_lock(&pool->lock);
2302	}
2303
2304	/*
2305	* pwq is determined and locked. For unbound pools, we could have raced
2306	* with pwq release and it could already be dead. If its refcnt is zero,
2307	* repeat pwq selection. Note that unbound pwqs never die without
2308	* another pwq replacing it in cpu_pwq or while work items are executing
2309	* on it, so the retrying is guaranteed to make forward-progress.
2310	*/
2311	if (unlikely(!pwq->refcnt)) {
2312	if (wq->flags & WQ_UNBOUND) {
2313	raw_spin_unlock(&pool->lock);
2314	cpu_relax();
2315	goto retry;
2316	}
2317	/ oops /
2318	WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
2319	wq->name, cpu);
2320	}
2321
2322	/ pwq determined, queue /
2323	trace_workqueue_queue_work(req_cpu, pwq, work);
2324
2325	if (WARN_ON(!list_empty(&work->entry)))
2326	goto out;
2327
2328	pwq->nr_in_flight[pwq->work_color]++;
2329	work_flags = work_color_to_flags(color: pwq->work_color);
2330
2331	/*
2332	* Limit the number of concurrently active work items to max_active.
2333	* @work must also queue behind existing inactive work items to maintain
2334	* ordering when max_active changes. See wq_adjust_max_active().
2335	*/
2336	if (list_empty(head: &pwq->inactive_works) && pwq_tryinc_nr_active(pwq, fill: false)) {
2337	if (list_empty(head: &pool->worklist))
2338	pool->watchdog_ts = jiffies;
2339
2340	trace_workqueue_activate_work(work);
2341	insert_work(pwq, work, head: &pool->worklist, extra_flags: work_flags);
2342	kick_pool(pool);
2343	} else {
2344	work_flags \|= WORK_STRUCT_INACTIVE;
2345	insert_work(pwq, work, head: &pwq->inactive_works, extra_flags: work_flags);
2346	}
2347
2348	out:
2349	raw_spin_unlock(&pool->lock);
2350	rcu_read_unlock();
2351	}
2352
2353	static bool clear_pending_if_disabled(struct work_struct *work)
2354	{
2355	unsigned long data = *work_data_bits(work);
2356	struct work_offq_data offqd;
2357
2358	if (likely((data & WORK_STRUCT_PWQ) \|\|
2359	!(data & WORK_OFFQ_DISABLE_MASK)))
2360	return false;
2361
2362	work_offqd_unpack(offqd: &offqd, data);
2363	set_work_pool_and_clear_pending(work, pool_id: offqd.pool_id,
2364	flags: work_offqd_pack_flags(offqd: &offqd));
2365	return true;
2366	}
2367
2368	/**
2369	* queue_work_on - queue work on specific cpu
2370	* @cpu: CPU number to execute work on
2371	* @wq: workqueue to use
2372	* @work: work to queue
2373	*
2374	* We queue the work to a specific CPU, the caller must ensure it
2375	* can't go away. Callers that fail to ensure that the specified
2376	* CPU cannot go away will execute on a randomly chosen CPU.
2377	* But note well that callers specifying a CPU that never has been
2378	* online will get a splat.
2379	*
2380	* Return: %false if @work was already on a queue, %true otherwise.
2381	*/
2382	bool queue_work_on(int cpu, struct workqueue_struct *wq,
2383	struct work_struct *work)
2384	{
2385	bool ret = false;
2386	unsigned long irq_flags;
2387
2388	local_irq_save(irq_flags);
2389
2390	if (!test_and_set_bit(nr: WORK_STRUCT_PENDING_BIT, work_data_bits(work)) &&
2391	!clear_pending_if_disabled(work)) {
2392	__queue_work(cpu, wq, work);
2393	ret = true;
2394	}
2395
2396	local_irq_restore(irq_flags);
2397	return ret;
2398	}
2399	EXPORT_SYMBOL(queue_work_on);
2400
2401	/**
2402	* select_numa_node_cpu - Select a CPU based on NUMA node
2403	* @node: NUMA node ID that we want to select a CPU from
2404	*
2405	* This function will attempt to find a "random" cpu available on a given
2406	* node. If there are no CPUs available on the given node it will return
2407	* WORK_CPU_UNBOUND indicating that we should just schedule to any
2408	* available CPU if we need to schedule this work.
2409	*/
2410	static int select_numa_node_cpu(int node)
2411	{
2412	int cpu;
2413
2414	/ Delay binding to CPU if node is not valid or online /
2415	if (node < `0` \|\| node >= MAX_NUMNODES \|\| !node_online(node))
2416	return WORK_CPU_UNBOUND;
2417
2418	/ Use local node/cpu if we are already there /
2419	cpu = raw_smp_processor_id();
2420	if (node == cpu_to_node(cpu))
2421	return cpu;
2422
2423	/ Use "random" otherwise know as "first" online CPU of node /
2424	cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
2425
2426	/ If CPU is valid return that, otherwise just defer /
2427	return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
2428	}
2429
2430	/**
2431	* queue_work_node - queue work on a "random" cpu for a given NUMA node
2432	* @node: NUMA node that we are targeting the work for
2433	* @wq: workqueue to use
2434	* @work: work to queue
2435	*
2436	* We queue the work to a "random" CPU within a given NUMA node. The basic
2437	* idea here is to provide a way to somehow associate work with a given
2438	* NUMA node.
2439	*
2440	* This function will only make a best effort attempt at getting this onto
2441	* the right NUMA node. If no node is requested or the requested node is
2442	* offline then we just fall back to standard queue_work behavior.
2443	*
2444	* Currently the "random" CPU ends up being the first available CPU in the
2445	* intersection of cpu_online_mask and the cpumask of the node, unless we
2446	* are running on the node. In that case we just use the current CPU.
2447	*
2448	* Return: %false if @work was already on a queue, %true otherwise.
2449	*/
2450	bool queue_work_node(int node, struct workqueue_struct *wq,
2451	struct work_struct *work)
2452	{
2453	unsigned long irq_flags;
2454	bool ret = false;
2455
2456	/*
2457	* This current implementation is specific to unbound workqueues.
2458	* Specifically we only return the first available CPU for a given
2459	* node instead of cycling through individual CPUs within the node.
2460	*
2461	* If this is used with a per-cpu workqueue then the logic in
2462	* workqueue_select_cpu_near would need to be updated to allow for
2463	* some round robin type logic.
2464	*/
2465	WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
2466
2467	local_irq_save(irq_flags);
2468
2469	if (!test_and_set_bit(nr: WORK_STRUCT_PENDING_BIT, work_data_bits(work)) &&
2470	!clear_pending_if_disabled(work)) {
2471	int cpu = select_numa_node_cpu(node);
2472
2473	__queue_work(cpu, wq, work);
2474	ret = true;
2475	}
2476
2477	local_irq_restore(irq_flags);
2478	return ret;
2479	}
2480	EXPORT_SYMBOL_GPL(queue_work_node);
2481
2482	void delayed_work_timer_fn(struct timer_list *t)
2483	{
2484	struct delayed_work *dwork = timer_container_of(dwork, t, timer);
2485
2486	/ should have been called from irqsafe timer with irq already off /
2487	__queue_work(cpu: dwork->cpu, wq: dwork->wq, work: &dwork->work);
2488	}
2489	EXPORT_SYMBOL(delayed_work_timer_fn);
2490
2491	static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
2492	struct delayed_work dwork, unsigned* long delay)
2493	{
2494	struct timer_list *timer = &dwork->timer;
2495	struct work_struct *work = &dwork->work;
2496
2497	WARN_ON_ONCE(!wq);
2498	WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
2499	WARN_ON_ONCE(timer_pending(timer));
2500	WARN_ON_ONCE(!list_empty(&work->entry));
2501
2502	/*
2503	* If @delay is 0, queue @dwork->work immediately. This is for
2504	* both optimization and correctness. The earliest @timer can
2505	* expire is on the closest next tick and delayed_work users depend
2506	* on that there's no such delay when @delay is 0.
2507	*/
2508	if (!delay) {
2509	__queue_work(cpu, wq, work: &dwork->work);
2510	return;
2511	}
2512
2513	WARN_ON_ONCE(cpu != WORK_CPU_UNBOUND && !cpu_online(cpu));
2514	dwork->wq = wq;
2515	dwork->cpu = cpu;
2516	timer->expires = jiffies + delay;
2517
2518	if (housekeeping_enabled(type: HK_TYPE_TIMER)) {
2519	/ If the current cpu is a housekeeping cpu, use it. /
2520	cpu = smp_processor_id();
2521	if (!housekeeping_test_cpu(cpu, type: HK_TYPE_TIMER))
2522	cpu = housekeeping_any_cpu(type: HK_TYPE_TIMER);
2523	add_timer_on(timer, cpu);
2524	} else {
2525	if (likely(cpu == WORK_CPU_UNBOUND))
2526	add_timer_global(timer);
2527	else
2528	add_timer_on(timer, cpu);
2529	}
2530	}
2531
2532	/**
2533	* queue_delayed_work_on - queue work on specific CPU after delay
2534	* @cpu: CPU number to execute work on
2535	* @wq: workqueue to use
2536	* @dwork: work to queue
2537	* @delay: number of jiffies to wait before queueing
2538	*
2539	* We queue the delayed_work to a specific CPU, for non-zero delays the
2540	* caller must ensure it is online and can't go away. Callers that fail
2541	* to ensure this, may get @dwork->timer queued to an offlined CPU and
2542	* this will prevent queueing of @dwork->work unless the offlined CPU
2543	* becomes online again.
2544	*
2545	* Return: %false if @work was already on a queue, %true otherwise. If
2546	* @delay is zero and @dwork is idle, it will be scheduled for immediate
2547	* execution.
2548	*/
2549	bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
2550	struct delayed_work dwork, unsigned* long delay)
2551	{
2552	struct work_struct *work = &dwork->work;
2553	bool ret = false;
2554	unsigned long irq_flags;
2555
2556	/ read the comment in __queue_work() /
2557	local_irq_save(irq_flags);
2558
2559	if (!test_and_set_bit(nr: WORK_STRUCT_PENDING_BIT, work_data_bits(work)) &&
2560	!clear_pending_if_disabled(work)) {
2561	__queue_delayed_work(cpu, wq, dwork, delay);
2562	ret = true;
2563	}
2564
2565	local_irq_restore(irq_flags);
2566	return ret;
2567	}
2568	EXPORT_SYMBOL(queue_delayed_work_on);
2569
2570	/**
2571	* mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
2572	* @cpu: CPU number to execute work on
2573	* @wq: workqueue to use
2574	* @dwork: work to queue
2575	* @delay: number of jiffies to wait before queueing
2576	*
2577	* If @dwork is idle, equivalent to queue_delayed_work_on(); otherwise,
2578	* modify @dwork's timer so that it expires after @delay. If @delay is
2579	* zero, @work is guaranteed to be scheduled immediately regardless of its
2580	* current state.
2581	*
2582	* Return: %false if @dwork was idle and queued, %true if @dwork was
2583	* pending and its timer was modified.
2584	*
2585	* This function is safe to call from any context including IRQ handler.
2586	* See try_to_grab_pending() for details.
2587	*/
2588	bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
2589	struct delayed_work dwork, unsigned* long delay)
2590	{
2591	unsigned long irq_flags;
2592	bool ret;
2593
2594	ret = work_grab_pending(work: &dwork->work, cflags: WORK_CANCEL_DELAYED, irq_flags: &irq_flags);
2595
2596	if (!clear_pending_if_disabled(work: &dwork->work))
2597	__queue_delayed_work(cpu, wq, dwork, delay);
2598
2599	local_irq_restore(irq_flags);
2600	return ret;
2601	}
2602	EXPORT_SYMBOL_GPL(mod_delayed_work_on);
2603
2604	static void rcu_work_rcufn(struct rcu_head *rcu)
2605	{
2606	struct rcu_work rwork = container_of(rcu, struct* rcu_work, rcu);
2607
2608	/ read the comment in __queue_work() /
2609	local_irq_disable();
2610	__queue_work(cpu: WORK_CPU_UNBOUND, wq: rwork->wq, work: &rwork->work);
2611	local_irq_enable();
2612	}
2613
2614	/**
2615	* queue_rcu_work - queue work after a RCU grace period
2616	* @wq: workqueue to use
2617	* @rwork: work to queue
2618	*
2619	* Return: %false if @rwork was already pending, %true otherwise. Note
2620	* that a full RCU grace period is guaranteed only after a %true return.
2621	* While @rwork is guaranteed to be executed after a %false return, the
2622	* execution may happen before a full RCU grace period has passed.
2623	*/
2624	bool queue_rcu_work(struct workqueue_struct wq, struct* rcu_work *rwork)
2625	{
2626	struct work_struct *work = &rwork->work;
2627
2628	/*
2629	* rcu_work can't be canceled or disabled. Warn if the user reached
2630	* inside @rwork and disabled the inner work.
2631	*/
2632	if (!test_and_set_bit(nr: WORK_STRUCT_PENDING_BIT, work_data_bits(work)) &&
2633	!WARN_ON_ONCE(clear_pending_if_disabled(work))) {
2634	rwork->wq = wq;
2635	call_rcu_hurry(head: &rwork->rcu, func: rcu_work_rcufn);
2636	return true;
2637	}
2638
2639	return false;
2640	}
2641	EXPORT_SYMBOL(queue_rcu_work);
2642
2643	static struct worker alloc_worker(int* node)
2644	{
2645	struct worker *worker;
2646
2647	worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
2648	if (worker) {
2649	INIT_LIST_HEAD(list: &worker->entry);
2650	INIT_LIST_HEAD(list: &worker->scheduled);
2651	INIT_LIST_HEAD(list: &worker->node);
2652	/ on creation a worker is in !idle && prep state /
2653	worker->flags = WORKER_PREP;
2654	}
2655	return worker;
2656	}
2657
2658	static cpumask_t pool_allowed_cpus(struct* worker_pool *pool)
2659	{
2660	if (pool->cpu < `0` && pool->attrs->affn_strict)
2661	return pool->attrs->__pod_cpumask;
2662	else
2663	return pool->attrs->cpumask;
2664	}
2665
2666	/**
2667	* worker_attach_to_pool() - attach a worker to a pool
2668	* @worker: worker to be attached
2669	* @pool: the target pool
2670	*
2671	* Attach @worker to @pool. Once attached, the %WORKER_UNBOUND flag and
2672	* cpu-binding of @worker are kept coordinated with the pool across
2673	* cpu-[un]hotplugs.
2674	*/
2675	static void worker_attach_to_pool(struct worker *worker,
2676	struct worker_pool *pool)
2677	{
2678	mutex_lock(&wq_pool_attach_mutex);
2679
2680	/*
2681	* The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains stable
2682	* across this function. See the comments above the flag definition for
2683	* details. BH workers are, while per-CPU, always DISASSOCIATED.
2684	*/
2685	if (pool->flags & POOL_DISASSOCIATED) {
2686	worker->flags \|= WORKER_UNBOUND;
2687	} else {
2688	WARN_ON_ONCE(pool->flags & POOL_BH);
2689	kthread_set_per_cpu(k: worker->task, cpu: pool->cpu);
2690	}
2691
2692	if (worker->rescue_wq)
2693	set_cpus_allowed_ptr(p: worker->task, new_mask: pool_allowed_cpus(pool));
2694
2695	list_add_tail(new: &worker->node, head: &pool->workers);
2696	worker->pool = pool;
2697
2698	mutex_unlock(lock: &wq_pool_attach_mutex);
2699	}
2700
2701	static void unbind_worker(struct worker *worker)
2702	{
2703	lockdep_assert_held(&wq_pool_attach_mutex);
2704
2705	kthread_set_per_cpu(k: worker->task, cpu: -`1`);
2706	if (cpumask_intersects(src1p: wq_unbound_cpumask, cpu_active_mask))
2707	WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < `0`);
2708	else
2709	WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < `0`);
2710	}
2711
2712
2713	static void detach_worker(struct worker *worker)
2714	{
2715	lockdep_assert_held(&wq_pool_attach_mutex);
2716
2717	unbind_worker(worker);
2718	list_del(entry: &worker->node);
2719	}
2720
2721	/**
2722	* worker_detach_from_pool() - detach a worker from its pool
2723	* @worker: worker which is attached to its pool
2724	*
2725	* Undo the attaching which had been done in worker_attach_to_pool(). The
2726	* caller worker shouldn't access to the pool after detached except it has
2727	* other reference to the pool.
2728	*/
2729	static void worker_detach_from_pool(struct worker *worker)
2730	{
2731	struct worker_pool *pool = worker->pool;
2732
2733	/ there is one permanent BH worker per CPU which should never detach /
2734	WARN_ON_ONCE(pool->flags & POOL_BH);
2735
2736	mutex_lock(&wq_pool_attach_mutex);
2737	detach_worker(worker);
2738	worker->pool = NULL;
2739	mutex_unlock(lock: &wq_pool_attach_mutex);
2740
2741	/ clear leftover flags without pool->lock after it is detached /
2742	worker->flags &= ~(WORKER_UNBOUND \| WORKER_REBOUND);
2743	}
2744
2745	static int format_worker_id(char buf, size_t size, struct* worker *worker,
2746	struct worker_pool *pool)
2747	{
2748	if (worker->rescue_wq)
2749	return scnprintf(buf, size, fmt: "kworker/R-%s",
2750	worker->rescue_wq->name);
2751
2752	if (pool) {
2753	if (pool->cpu >= `0`)
2754	return scnprintf(buf, size, fmt: "kworker/%d:%d%s",
2755	pool->cpu, worker->id,
2756	pool->attrs->nice < `0` ? "H" : "");
2757	else
2758	return scnprintf(buf, size, fmt: "kworker/u%d:%d",
2759	pool->id, worker->id);
2760	} else {
2761	return scnprintf(buf, size, fmt: "kworker/dying");
2762	}
2763	}
2764
2765	/**
2766	* create_worker - create a new workqueue worker
2767	* @pool: pool the new worker will belong to
2768	*
2769	* Create and start a new worker which is attached to @pool.
2770	*
2771	* CONTEXT:
2772	* Might sleep. Does GFP_KERNEL allocations.
2773	*
2774	* Return:
2775	* Pointer to the newly created worker.
2776	*/
2777	static struct worker create_worker(struct* worker_pool *pool)
2778	{
2779	struct worker *worker;
2780	int id;
2781
2782	/ ID is needed to determine kthread name /
2783	id = ida_alloc(ida: &pool->worker_ida, GFP_KERNEL);
2784	if (id < `0`) {
2785	pr_err_once("workqueue: Failed to allocate a worker ID: %pe\n",
2786	ERR_PTR(id));
2787	return NULL;
2788	}
2789
2790	worker = alloc_worker(node: pool->node);
2791	if (!worker) {
2792	pr_err_once("workqueue: Failed to allocate a worker\n");
2793	goto fail;
2794	}
2795
2796	worker->id = id;
2797
2798	if (!(pool->flags & POOL_BH)) {
2799	char id_buf[WORKER_ID_LEN];
2800
2801	format_worker_id(buf: id_buf, size: sizeof(id_buf), worker, pool);
2802	worker->task = kthread_create_on_node(threadfn: worker_thread, data: worker,
2803	node: pool->node, namefmt: "%s", id_buf);
2804	if (IS_ERR(ptr: worker->task)) {
2805	if (PTR_ERR(ptr: worker->task) == -EINTR) {
2806	pr_err("workqueue: Interrupted when creating a worker thread \"%s\"\n",
2807	id_buf);
2808	} else {
2809	pr_err_once("workqueue: Failed to create a worker thread: %pe",
2810	worker->task);
2811	}
2812	goto fail;
2813	}
2814
2815	set_user_nice(p: worker->task, nice: pool->attrs->nice);
2816	kthread_bind_mask(k: worker->task, mask: pool_allowed_cpus(pool));
2817	}
2818
2819	/ successful, attach the worker to the pool /
2820	worker_attach_to_pool(worker, pool);
2821
2822	/ start the newly created worker /
2823	raw_spin_lock_irq(&pool->lock);
2824
2825	worker->pool->nr_workers++;
2826	worker_enter_idle(worker);
2827
2828	/*
2829	* @worker is waiting on a completion in kthread() and will trigger hung
2830	* check if not woken up soon. As kick_pool() is noop if @pool is empty,
2831	* wake it up explicitly.
2832	*/
2833	if (worker->task)
2834	wake_up_process(tsk: worker->task);
2835
2836	raw_spin_unlock_irq(&pool->lock);
2837
2838	return worker;
2839
2840	fail:
2841	ida_free(&pool->worker_ida, id);
2842	kfree(objp: worker);
2843	return NULL;
2844	}
2845
2846	static void detach_dying_workers(struct list_head *cull_list)
2847	{
2848	struct worker *worker;
2849
2850	list_for_each_entry(worker, cull_list, entry)
2851	detach_worker(worker);
2852	}
2853
2854	static void reap_dying_workers(struct list_head *cull_list)
2855	{
2856	struct worker worker, tmp;
2857
2858	list_for_each_entry_safe(worker, tmp, cull_list, entry) {
2859	list_del_init(entry: &worker->entry);
2860	kthread_stop_put(k: worker->task);
2861	kfree(objp: worker);
2862	}
2863	}
2864
2865	/**
2866	* set_worker_dying - Tag a worker for destruction
2867	* @worker: worker to be destroyed
2868	* @list: transfer worker away from its pool->idle_list and into list
2869	*
2870	* Tag @worker for destruction and adjust @pool stats accordingly. The worker
2871	* should be idle.
2872	*
2873	* CONTEXT:
2874	* raw_spin_lock_irq(pool->lock).
2875	*/
2876	static void set_worker_dying(struct worker worker, struct* list_head *list)
2877	{
2878	struct worker_pool *pool = worker->pool;
2879
2880	lockdep_assert_held(&pool->lock);
2881	lockdep_assert_held(&wq_pool_attach_mutex);
2882
2883	/ sanity check frenzy /
2884	if (WARN_ON(worker->current_work) \|\|
2885	WARN_ON(!list_empty(&worker->scheduled)) \|\|
2886	WARN_ON(!(worker->flags & WORKER_IDLE)))
2887	return;
2888
2889	pool->nr_workers--;
2890	pool->nr_idle--;
2891
2892	worker->flags \|= WORKER_DIE;
2893
2894	list_move(list: &worker->entry, head: list);
2895
2896	/ get an extra task struct reference for later kthread_stop_put() /
2897	get_task_struct(t: worker->task);
2898	}
2899
2900	/**
2901	* idle_worker_timeout - check if some idle workers can now be deleted.
2902	* @t: The pool's idle_timer that just expired
2903	*
2904	* The timer is armed in worker_enter_idle(). Note that it isn't disarmed in
2905	* worker_leave_idle(), as a worker flicking between idle and active while its
2906	* pool is at the too_many_workers() tipping point would cause too much timer
2907	* housekeeping overhead. Since IDLE_WORKER_TIMEOUT is long enough, we just let
2908	* it expire and re-evaluate things from there.
2909	*/
2910	static void idle_worker_timeout(struct timer_list *t)
2911	{
2912	struct worker_pool *pool = timer_container_of(pool, t, idle_timer);
2913	bool do_cull = false;
2914
2915	if (work_pending(&pool->idle_cull_work))
2916	return;
2917
2918	raw_spin_lock_irq(&pool->lock);
2919
2920	if (too_many_workers(pool)) {
2921	struct worker *worker;
2922	unsigned long expires;
2923
2924	/ idle_list is kept in LIFO order, check the last one /
2925	worker = list_last_entry(&pool->idle_list, struct worker, entry);
2926	expires = worker->last_active + IDLE_WORKER_TIMEOUT;
2927	do_cull = !time_before(jiffies, expires);
2928
2929	if (!do_cull)
2930	mod_timer(timer: &pool->idle_timer, expires);
2931	}
2932	raw_spin_unlock_irq(&pool->lock);
2933
2934	if (do_cull)
2935	queue_work(wq: system_unbound_wq, work: &pool->idle_cull_work);
2936	}
2937
2938	/**
2939	* idle_cull_fn - cull workers that have been idle for too long.
2940	* @work: the pool's work for handling these idle workers
2941	*
2942	* This goes through a pool's idle workers and gets rid of those that have been
2943	* idle for at least IDLE_WORKER_TIMEOUT seconds.
2944	*
2945	* We don't want to disturb isolated CPUs because of a pcpu kworker being
2946	* culled, so this also resets worker affinity. This requires a sleepable
2947	* context, hence the split between timer callback and work item.
2948	*/
2949	static void idle_cull_fn(struct work_struct *work)
2950	{
2951	struct worker_pool pool = container_of(work, struct* worker_pool, idle_cull_work);
2952	LIST_HEAD(cull_list);
2953
2954	/*
2955	* Grabbing wq_pool_attach_mutex here ensures an already-running worker
2956	* cannot proceed beyong set_pf_worker() in its self-destruct path.
2957	* This is required as a previously-preempted worker could run after
2958	* set_worker_dying() has happened but before detach_dying_workers() did.
2959	*/
2960	mutex_lock(&wq_pool_attach_mutex);
2961	raw_spin_lock_irq(&pool->lock);
2962
2963	while (too_many_workers(pool)) {
2964	struct worker *worker;
2965	unsigned long expires;
2966
2967	worker = list_last_entry(&pool->idle_list, struct worker, entry);
2968	expires = worker->last_active + IDLE_WORKER_TIMEOUT;
2969
2970	if (time_before(jiffies, expires)) {
2971	mod_timer(timer: &pool->idle_timer, expires);
2972	break;
2973	}
2974
2975	set_worker_dying(worker, list: &cull_list);
2976	}
2977
2978	raw_spin_unlock_irq(&pool->lock);
2979	detach_dying_workers(cull_list: &cull_list);
2980	mutex_unlock(lock: &wq_pool_attach_mutex);
2981
2982	reap_dying_workers(cull_list: &cull_list);
2983	}
2984
2985	static void send_mayday(struct work_struct *work)
2986	{
2987	struct pool_workqueue *pwq = get_work_pwq(work);
2988	struct workqueue_struct *wq = pwq->wq;
2989
2990	lockdep_assert_held(&wq_mayday_lock);
2991
2992	if (!wq->rescuer)
2993	return;
2994
2995	/ mayday mayday mayday /
2996	if (list_empty(head: &pwq->mayday_node)) {
2997	/*
2998	* If @pwq is for an unbound wq, its base ref may be put at
2999	* any time due to an attribute change. Pin @pwq until the
3000	* rescuer is done with it.
3001	*/
3002	get_pwq(pwq);
3003	list_add_tail(new: &pwq->mayday_node, head: &wq->maydays);
3004	wake_up_process(tsk: wq->rescuer->task);
3005	pwq->stats[PWQ_STAT_MAYDAY]++;
3006	}
3007	}
3008
3009	static void pool_mayday_timeout(struct timer_list *t)
3010	{
3011	struct worker_pool *pool = timer_container_of(pool, t, mayday_timer);
3012	struct work_struct *work;
3013
3014	raw_spin_lock_irq(&pool->lock);
3015	raw_spin_lock(&wq_mayday_lock); / for wq->maydays /
3016
3017	if (need_to_create_worker(pool)) {
3018	/*
3019	* We've been trying to create a new worker but
3020	* haven't been successful. We might be hitting an
3021	* allocation deadlock. Send distress signals to
3022	* rescuers.
3023	*/
3024	list_for_each_entry(work, &pool->worklist, entry)
3025	send_mayday(work);
3026	}
3027
3028	raw_spin_unlock(&wq_mayday_lock);
3029	raw_spin_unlock_irq(&pool->lock);
3030
3031	mod_timer(timer: &pool->mayday_timer, expires: jiffies + MAYDAY_INTERVAL);
3032	}
3033
3034	/**
3035	* maybe_create_worker - create a new worker if necessary
3036	* @pool: pool to create a new worker for
3037	*
3038	* Create a new worker for @pool if necessary. @pool is guaranteed to
3039	* have at least one idle worker on return from this function. If
3040	* creating a new worker takes longer than MAYDAY_INTERVAL, mayday is
3041	* sent to all rescuers with works scheduled on @pool to resolve
3042	* possible allocation deadlock.
3043	*
3044	* On return, need_to_create_worker() is guaranteed to be %false and
3045	* may_start_working() %true.
3046	*
3047	* LOCKING:
3048	* raw_spin_lock_irq(pool->lock) which may be released and regrabbed
3049	* multiple times. Does GFP_KERNEL allocations. Called only from
3050	* manager.
3051	*/
3052	static void maybe_create_worker(struct worker_pool *pool)
3053	__releases(&pool->lock)
3054	__acquires(&pool->lock)
3055	{
3056	restart:
3057	raw_spin_unlock_irq(&pool->lock);
3058
3059	/ if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help /
3060	mod_timer(timer: &pool->mayday_timer, expires: jiffies + MAYDAY_INITIAL_TIMEOUT);
3061
3062	while (true) {
3063	if (create_worker(pool) \|\| !need_to_create_worker(pool))
3064	break;
3065
3066	schedule_timeout_interruptible(timeout: CREATE_COOLDOWN);
3067
3068	if (!need_to_create_worker(pool))
3069	break;
3070	}
3071
3072	timer_delete_sync(timer: &pool->mayday_timer);
3073	raw_spin_lock_irq(&pool->lock);
3074	/*
3075	* This is necessary even after a new worker was just successfully
3076	* created as @pool->lock was dropped and the new worker might have
3077	* already become busy.
3078	*/
3079	if (need_to_create_worker(pool))
3080	goto restart;
3081	}
3082
3083	/**
3084	* manage_workers - manage worker pool
3085	* @worker: self
3086	*
3087	* Assume the manager role and manage the worker pool @worker belongs
3088	* to. At any given time, there can be only zero or one manager per
3089	* pool. The exclusion is handled automatically by this function.
3090	*
3091	* The caller can safely start processing works on false return. On
3092	* true return, it's guaranteed that need_to_create_worker() is false
3093	* and may_start_working() is true.
3094	*
3095	* CONTEXT:
3096	* raw_spin_lock_irq(pool->lock) which may be released and regrabbed
3097	* multiple times. Does GFP_KERNEL allocations.
3098	*
3099	* Return:
3100	* %false if the pool doesn't need management and the caller can safely
3101	* start processing works, %true if management function was performed and
3102	* the conditions that the caller verified before calling the function may
3103	* no longer be true.
3104	*/
3105	static bool manage_workers(struct worker *worker)
3106	{
3107	struct worker_pool *pool = worker->pool;
3108
3109	if (pool->flags & POOL_MANAGER_ACTIVE)
3110	return false;
3111
3112	pool->flags \|= POOL_MANAGER_ACTIVE;
3113	pool->manager = worker;
3114
3115	maybe_create_worker(pool);
3116
3117	pool->manager = NULL;
3118	pool->flags &= ~POOL_MANAGER_ACTIVE;
3119	rcuwait_wake_up(w: &manager_wait);
3120	return true;
3121	}
3122
3123	/**
3124	* process_one_work - process single work
3125	* @worker: self
3126	* @work: work to process
3127	*
3128	* Process @work. This function contains all the logics necessary to
3129	* process a single work including synchronization against and
3130	* interaction with other workers on the same cpu, queueing and
3131	* flushing. As long as context requirement is met, any worker can
3132	* call this function to process a work.
3133	*
3134	* CONTEXT:
3135	* raw_spin_lock_irq(pool->lock) which is released and regrabbed.
3136	*/
3137	static void process_one_work(struct worker worker, struct* work_struct *work)
3138	__releases(&pool->lock)
3139	__acquires(&pool->lock)
3140	{
3141	struct pool_workqueue *pwq = get_work_pwq(work);
3142	struct worker_pool *pool = worker->pool;
3143	unsigned long work_data;
3144	int lockdep_start_depth, rcu_start_depth;
3145	bool bh_draining = pool->flags & POOL_BH_DRAINING;
3146	#ifdef CONFIG_LOCKDEP
3147	/*
3148	* It is permissible to free the struct work_struct from
3149	* inside the function that is called from it, this we need to
3150	* take into account for lockdep too. To avoid bogus "held
3151	* lock freed" warnings as well as problems when looking into
3152	* work->lockdep_map, make a copy and use that here.
3153	*/
3154	struct lockdep_map lockdep_map;
3155
3156	lockdep_copy_map(to: &lockdep_map, from: &work->lockdep_map);
3157	#endif
3158	/ ensure we're on the correct CPU /
3159	WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
3160	raw_smp_processor_id() != pool->cpu);
3161
3162	/ claim and dequeue /
3163	debug_work_deactivate(work);
3164	hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
3165	worker->current_work = work;
3166	worker->current_func = work->func;
3167	worker->current_pwq = pwq;
3168	if (worker->task)
3169	worker->current_at = worker->task->se.sum_exec_runtime;
3170	work_data = *work_data_bits(work);
3171	worker->current_color = get_work_color(work_data);
3172
3173	/*
3174	* Record wq name for cmdline and debug reporting, may get
3175	* overridden through set_worker_desc().
3176	*/
3177	strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN);
3178
3179	list_del_init(entry: &work->entry);
3180
3181	/*
3182	* CPU intensive works don't participate in concurrency management.
3183	* They're the scheduler's responsibility. This takes @worker out
3184	* of concurrency management and the next code block will chain
3185	* execution of the pending work items.
3186	*/
3187	if (unlikely(pwq->wq->flags & WQ_CPU_INTENSIVE))
3188	worker_set_flags(worker, flags: WORKER_CPU_INTENSIVE);
3189
3190	/*
3191	* Kick @pool if necessary. It's always noop for per-cpu worker pools
3192	* since nr_running would always be >= 1 at this point. This is used to
3193	* chain execution of the pending work items for WORKER_NOT_RUNNING
3194	* workers such as the UNBOUND and CPU_INTENSIVE ones.
3195	*/
3196	kick_pool(pool);
3197
3198	/*
3199	* Record the last pool and clear PENDING which should be the last
3200	* update to @work. Also, do this inside @pool->lock so that
3201	* PENDING and queued state changes happen together while IRQ is
3202	* disabled.
3203	*/
3204	set_work_pool_and_clear_pending(work, pool_id: pool->id, flags: pool_offq_flags(pool));
3205
3206	pwq->stats[PWQ_STAT_STARTED]++;
3207	raw_spin_unlock_irq(&pool->lock);
3208
3209	rcu_start_depth = rcu_preempt_depth();
3210	lockdep_start_depth = lockdep_depth(current);
3211	/ see drain_dead_softirq_workfn() /
3212	if (!bh_draining)
3213	lock_map_acquire(pwq->wq->lockdep_map);
3214	lock_map_acquire(&lockdep_map);
3215	/*
3216	* Strictly speaking we should mark the invariant state without holding
3217	* any locks, that is, before these two lock_map_acquire()'s.
3218	*
3219	* However, that would result in:
3220	*
3221	* A(W1)
3222	* WFC(C)
3223	* A(W1)
3224	* C(C)
3225	*
3226	* Which would create W1->C->W1 dependencies, even though there is no
3227	* actual deadlock possible. There are two solutions, using a
3228	* read-recursive acquire on the work(queue) 'locks', but this will then
3229	* hit the lockdep limitation on recursive locks, or simply discard
3230	* these locks.
3231	*
3232	* AFAICT there is no possible deadlock scenario between the
3233	* flush_work() and complete() primitives (except for single-threaded
3234	* workqueues), so hiding them isn't a problem.
3235	*/
3236	lockdep_invariant_state(force: true);
3237	trace_workqueue_execute_start(work);
3238	worker->current_func(work);
3239	/*
3240	* While we must be careful to not use "work" after this, the trace
3241	* point will only record its address.
3242	*/
3243	trace_workqueue_execute_end(work, function: worker->current_func);
3244
3245	lock_map_release(&lockdep_map);
3246	if (!bh_draining)
3247	lock_map_release(pwq->wq->lockdep_map);
3248
3249	if (unlikely((worker->task && in_atomic()) \|\|
3250	lockdep_depth(current) != lockdep_start_depth \|\|
3251	rcu_preempt_depth() != rcu_start_depth)) {
3252	pr_err("BUG: workqueue leaked atomic, lock or RCU: %s[%d]\n"
3253	" preempt=0x%08x lock=%d->%d RCU=%d->%d workfn=%ps\n",
3254	current->comm, task_pid_nr(current), preempt_count(),
3255	lockdep_start_depth, lockdep_depth(current),
3256	rcu_start_depth, rcu_preempt_depth(),
3257	worker->current_func);
3258	debug_show_held_locks(current);
3259	dump_stack();
3260	}
3261
3262	/*
3263	* The following prevents a kworker from hogging CPU on !PREEMPTION
3264	* kernels, where a requeueing work item waiting for something to
3265	* happen could deadlock with stop_machine as such work item could
3266	* indefinitely requeue itself while all other CPUs are trapped in
3267	* stop_machine. At the same time, report a quiescent RCU state so
3268	* the same condition doesn't freeze RCU.
3269	*/
3270	if (worker->task)
3271	cond_resched();
3272
3273	raw_spin_lock_irq(&pool->lock);
3274
3275	pwq->stats[PWQ_STAT_COMPLETED]++;
3276
3277	/*
3278	* In addition to %WQ_CPU_INTENSIVE, @worker may also have been marked
3279	* CPU intensive by wq_worker_tick() if @work hogged CPU longer than
3280	* wq_cpu_intensive_thresh_us. Clear it.
3281	*/
3282	worker_clr_flags(worker, flags: WORKER_CPU_INTENSIVE);
3283
3284	/ tag the worker for identification in schedule() /
3285	worker->last_func = worker->current_func;
3286
3287	/ we're done with it, release /
3288	hash_del(node: &worker->hentry);
3289	worker->current_work = NULL;
3290	worker->current_func = NULL;
3291	worker->current_pwq = NULL;
3292	worker->current_color = INT_MAX;
3293
3294	/ must be the last step, see the function comment /
3295	pwq_dec_nr_in_flight(pwq, work_data);
3296	}
3297
3298	/**
3299	* process_scheduled_works - process scheduled works
3300	* @worker: self
3301	*
3302	* Process all scheduled works. Please note that the scheduled list
3303	* may change while processing a work, so this function repeatedly
3304	* fetches a work from the top and executes it.
3305	*
3306	* CONTEXT:
3307	* raw_spin_lock_irq(pool->lock) which may be released and regrabbed
3308	* multiple times.
3309	*/
3310	static void process_scheduled_works(struct worker *worker)
3311	{
3312	struct work_struct *work;
3313	bool first = true;
3314
3315	while ((work = list_first_entry_or_null(&worker->scheduled,
3316	struct work_struct, entry))) {
3317	if (first) {
3318	worker->pool->watchdog_ts = jiffies;
3319	first = false;
3320	}
3321	process_one_work(worker, work);
3322	}
3323	}
3324
3325	static void set_pf_worker(bool val)
3326	{
3327	mutex_lock(&wq_pool_attach_mutex);
3328	if (val)
3329	current->flags \|= PF_WQ_WORKER;
3330	else
3331	current->flags &= ~PF_WQ_WORKER;
3332	mutex_unlock(lock: &wq_pool_attach_mutex);
3333	}
3334
3335	/**
3336	* worker_thread - the worker thread function
3337	* @__worker: self
3338	*
3339	* The worker thread function. All workers belong to a worker_pool -
3340	* either a per-cpu one or dynamic unbound one. These workers process all
3341	* work items regardless of their specific target workqueue. The only
3342	* exception is work items which belong to workqueues with a rescuer which
3343	* will be explained in rescuer_thread().
3344	*
3345	* Return: 0
3346	*/
3347	static int worker_thread(void *__worker)
3348	{
3349	struct worker *worker = __worker;
3350	struct worker_pool *pool = worker->pool;
3351
3352	/ tell the scheduler that this is a workqueue worker /
3353	set_pf_worker(true);
3354	woke_up:
3355	raw_spin_lock_irq(&pool->lock);
3356
3357	/ am I supposed to die? /
3358	if (unlikely(worker->flags & WORKER_DIE)) {
3359	raw_spin_unlock_irq(&pool->lock);
3360	set_pf_worker(false);
3361	/*
3362	* The worker is dead and PF_WQ_WORKER is cleared, worker->pool
3363	* shouldn't be accessed, reset it to NULL in case otherwise.
3364	*/
3365	worker->pool = NULL;
3366	ida_free(&pool->worker_ida, id: worker->id);
3367	return `0`;
3368	}
3369
3370	worker_leave_idle(worker);
3371	recheck:
3372	/ no more worker necessary? /
3373	if (!need_more_worker(pool))
3374	goto sleep;
3375
3376	/ do we need to manage? /
3377	if (unlikely(!may_start_working(pool)) && manage_workers(worker))
3378	goto recheck;
3379
3380	/*
3381	* ->scheduled list can only be filled while a worker is
3382	* preparing to process a work or actually processing it.
3383	* Make sure nobody diddled with it while I was sleeping.
3384	*/
3385	WARN_ON_ONCE(!list_empty(&worker->scheduled));
3386
3387	/*
3388	* Finish PREP stage. We're guaranteed to have at least one idle
3389	* worker or that someone else has already assumed the manager
3390	* role. This is where @worker starts participating in concurrency
3391	* management if applicable and concurrency management is restored
3392	* after being rebound. See rebind_workers() for details.
3393	*/
3394	worker_clr_flags(worker, flags: WORKER_PREP \| WORKER_REBOUND);
3395
3396	do {
3397	struct work_struct *work =
3398	list_first_entry(&pool->worklist,
3399	struct work_struct, entry);
3400
3401	if (assign_work(work, worker, NULL))
3402	process_scheduled_works(worker);
3403	} while (keep_working(pool));
3404
3405	worker_set_flags(worker, flags: WORKER_PREP);
3406	sleep:
3407	/*
3408	* pool->lock is held and there's no work to process and no need to
3409	* manage, sleep. Workers are woken up only while holding
3410	* pool->lock or from local cpu, so setting the current state
3411	* before releasing pool->lock is enough to prevent losing any
3412	* event.
3413	*/
3414	worker_enter_idle(worker);
3415	__set_current_state(TASK_IDLE);
3416	raw_spin_unlock_irq(&pool->lock);
3417	schedule();
3418	goto woke_up;
3419	}
3420
3421	/**
3422	* rescuer_thread - the rescuer thread function
3423	* @__rescuer: self
3424	*
3425	* Workqueue rescuer thread function. There's one rescuer for each
3426	* workqueue which has WQ_MEM_RECLAIM set.
3427	*
3428	* Regular work processing on a pool may block trying to create a new
3429	* worker which uses GFP_KERNEL allocation which has slight chance of
3430	* developing into deadlock if some works currently on the same queue
3431	* need to be processed to satisfy the GFP_KERNEL allocation. This is
3432	* the problem rescuer solves.
3433	*
3434	* When such condition is possible, the pool summons rescuers of all
3435	* workqueues which have works queued on the pool and let them process
3436	* those works so that forward progress can be guaranteed.
3437	*
3438	* This should happen rarely.
3439	*
3440	* Return: 0
3441	*/
3442	static int rescuer_thread(void *__rescuer)
3443	{
3444	struct worker *rescuer = __rescuer;
3445	struct workqueue_struct *wq = rescuer->rescue_wq;
3446	bool should_stop;
3447
3448	set_user_nice(current, nice: RESCUER_NICE_LEVEL);
3449
3450	/*
3451	* Mark rescuer as worker too. As WORKER_PREP is never cleared, it
3452	* doesn't participate in concurrency management.
3453	*/
3454	set_pf_worker(true);
3455	repeat:
3456	set_current_state(TASK_IDLE);
3457
3458	/*
3459	* By the time the rescuer is requested to stop, the workqueue
3460	* shouldn't have any work pending, but @wq->maydays may still have
3461	* pwq(s) queued. This can happen by non-rescuer workers consuming
3462	* all the work items before the rescuer got to them. Go through
3463	* @wq->maydays processing before acting on should_stop so that the
3464	* list is always empty on exit.
3465	*/
3466	should_stop = kthread_should_stop();
3467
3468	/ see whether any pwq is asking for help /
3469	raw_spin_lock_irq(&wq_mayday_lock);
3470
3471	while (!list_empty(head: &wq->maydays)) {
3472	struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
3473	struct pool_workqueue, mayday_node);
3474	struct worker_pool *pool = pwq->pool;
3475	struct work_struct work, n;
3476
3477	__set_current_state(TASK_RUNNING);
3478	list_del_init(entry: &pwq->mayday_node);
3479
3480	raw_spin_unlock_irq(&wq_mayday_lock);
3481
3482	worker_attach_to_pool(worker: rescuer, pool);
3483
3484	raw_spin_lock_irq(&pool->lock);
3485
3486	/*
3487	* Slurp in all works issued via this workqueue and
3488	* process'em.
3489	*/
3490	WARN_ON_ONCE(!list_empty(&rescuer->scheduled));
3491	list_for_each_entry_safe(work, n, &pool->worklist, entry) {
3492	if (get_work_pwq(work) == pwq &&
3493	assign_work(work, worker: rescuer, nextp: &n))
3494	pwq->stats[PWQ_STAT_RESCUED]++;
3495	}
3496
3497	if (!list_empty(head: &rescuer->scheduled)) {
3498	process_scheduled_works(worker: rescuer);
3499
3500	/*
3501	* The above execution of rescued work items could
3502	* have created more to rescue through
3503	* pwq_activate_first_inactive() or chained
3504	* queueing. Let's put @pwq back on mayday list so
3505	* that such back-to-back work items, which may be
3506	* being used to relieve memory pressure, don't
3507	* incur MAYDAY_INTERVAL delay inbetween.
3508	*/
3509	if (pwq->nr_active && need_to_create_worker(pool)) {
3510	raw_spin_lock(&wq_mayday_lock);
3511	/*
3512	* Queue iff we aren't racing destruction
3513	* and somebody else hasn't queued it already.
3514	*/
3515	if (wq->rescuer && list_empty(head: &pwq->mayday_node)) {
3516	get_pwq(pwq);
3517	list_add_tail(new: &pwq->mayday_node, head: &wq->maydays);
3518	}
3519	raw_spin_unlock(&wq_mayday_lock);
3520	}
3521	}
3522
3523	/*
3524	* Leave this pool. Notify regular workers; otherwise, we end up
3525	* with 0 concurrency and stalling the execution.
3526	*/
3527	kick_pool(pool);
3528
3529	raw_spin_unlock_irq(&pool->lock);
3530
3531	worker_detach_from_pool(worker: rescuer);
3532
3533	/*
3534	* Put the reference grabbed by send_mayday(). @pool might
3535	* go away any time after it.
3536	*/
3537	put_pwq_unlocked(pwq);
3538
3539	raw_spin_lock_irq(&wq_mayday_lock);
3540	}
3541
3542	raw_spin_unlock_irq(&wq_mayday_lock);
3543
3544	if (should_stop) {
3545	__set_current_state(TASK_RUNNING);
3546	set_pf_worker(false);
3547	return `0`;
3548	}
3549
3550	/ rescuers should never participate in concurrency management /
3551	WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
3552	schedule();
3553	goto repeat;
3554	}
3555
3556	static void bh_worker(struct worker *worker)
3557	{
3558	struct worker_pool *pool = worker->pool;
3559	int nr_restarts = BH_WORKER_RESTARTS;
3560	unsigned long end = jiffies + BH_WORKER_JIFFIES;
3561
3562	raw_spin_lock_irq(&pool->lock);
3563	worker_leave_idle(worker);
3564
3565	/*
3566	* This function follows the structure of worker_thread(). See there for
3567	* explanations on each step.
3568	*/
3569	if (!need_more_worker(pool))
3570	goto done;
3571
3572	WARN_ON_ONCE(!list_empty(&worker->scheduled));
3573	worker_clr_flags(worker, flags: WORKER_PREP \| WORKER_REBOUND);
3574
3575	do {
3576	struct work_struct *work =
3577	list_first_entry(&pool->worklist,
3578	struct work_struct, entry);
3579
3580	if (assign_work(work, worker, NULL))
3581	process_scheduled_works(worker);
3582	} while (keep_working(pool) &&
3583	--nr_restarts && time_before(jiffies, end));
3584
3585	worker_set_flags(worker, flags: WORKER_PREP);
3586	done:
3587	worker_enter_idle(worker);
3588	kick_pool(pool);
3589	raw_spin_unlock_irq(&pool->lock);
3590	}
3591
3592	/*
3593	* TODO: Convert all tasklet users to workqueue and use softirq directly.
3594	*
3595	* This is currently called from tasklet[_hi]action() and thus is also called
3596	* whenever there are tasklets to run. Let's do an early exit if there's nothing
3597	* queued. Once conversion from tasklet is complete, the need_more_worker() test
3598	* can be dropped.
3599	*
3600	* After full conversion, we'll add worker->softirq_action, directly use the
3601	* softirq action and obtain the worker pointer from the softirq_action pointer.
3602	*/
3603	void workqueue_softirq_action(bool highpri)
3604	{
3605	struct worker_pool *pool =
3606	&per_cpu(bh_worker_pools, smp_processor_id())[highpri];
3607	if (need_more_worker(pool))
3608	bh_worker(list_first_entry(&pool->workers, struct worker, node));
3609	}
3610
3611	struct wq_drain_dead_softirq_work {
3612	struct work_struct work;
3613	struct worker_pool *pool;
3614	struct completion done;
3615	};
3616
3617	static void drain_dead_softirq_workfn(struct work_struct *work)
3618	{
3619	struct wq_drain_dead_softirq_work *dead_work =
3620	container_of(work, struct wq_drain_dead_softirq_work, work);
3621	struct worker_pool *pool = dead_work->pool;
3622	bool repeat;
3623
3624	/*
3625	* @pool's CPU is dead and we want to execute its still pending work
3626	* items from this BH work item which is running on a different CPU. As
3627	* its CPU is dead, @pool can't be kicked and, as work execution path
3628	* will be nested, a lockdep annotation needs to be suppressed. Mark
3629	* @pool with %POOL_BH_DRAINING for the special treatments.
3630	*/
3631	raw_spin_lock_irq(&pool->lock);
3632	pool->flags \|= POOL_BH_DRAINING;
3633	raw_spin_unlock_irq(&pool->lock);
3634
3635	bh_worker(list_first_entry(&pool->workers, struct worker, node));
3636
3637	raw_spin_lock_irq(&pool->lock);
3638	pool->flags &= ~POOL_BH_DRAINING;
3639	repeat = need_more_worker(pool);
3640	raw_spin_unlock_irq(&pool->lock);
3641
3642	/*
3643	* bh_worker() might hit consecutive execution limit and bail. If there
3644	* still are pending work items, reschedule self and return so that we
3645	* don't hog this CPU's BH.
3646	*/
3647	if (repeat) {
3648	if (pool->attrs->nice == HIGHPRI_NICE_LEVEL)
3649	queue_work(wq: system_bh_highpri_wq, work);
3650	else
3651	queue_work(wq: system_bh_wq, work);
3652	} else {
3653	complete(&dead_work->done);
3654	}
3655	}
3656
3657	/*
3658	* @cpu is dead. Drain the remaining BH work items on the current CPU. It's
3659	* possible to allocate dead_work per CPU and avoid flushing. However, then we
3660	* have to worry about draining overlapping with CPU coming back online or
3661	* nesting (one CPU's dead_work queued on another CPU which is also dead and so
3662	* on). Let's keep it simple and drain them synchronously. These are BH work
3663	* items which shouldn't be requeued on the same pool. Shouldn't take long.
3664	*/
3665	void workqueue_softirq_dead(unsigned int cpu)
3666	{
3667	int i;
3668
3669	for (i = `0`; i < NR_STD_WORKER_POOLS; i++) {
3670	struct worker_pool *pool = &per_cpu(bh_worker_pools, cpu)[i];
3671	struct wq_drain_dead_softirq_work dead_work;
3672
3673	if (!need_more_worker(pool))
3674	continue;
3675
3676	INIT_WORK_ONSTACK(&dead_work.work, drain_dead_softirq_workfn);
3677	dead_work.pool = pool;
3678	init_completion(x: &dead_work.done);
3679
3680	if (pool->attrs->nice == HIGHPRI_NICE_LEVEL)
3681	queue_work(wq: system_bh_highpri_wq, work: &dead_work.work);
3682	else
3683	queue_work(wq: system_bh_wq, work: &dead_work.work);
3684
3685	wait_for_completion(&dead_work.done);
3686	destroy_work_on_stack(&dead_work.work);
3687	}
3688	}
3689
3690	/**
3691	* check_flush_dependency - check for flush dependency sanity
3692	* @target_wq: workqueue being flushed
3693	* @target_work: work item being flushed (NULL for workqueue flushes)
3694	* @from_cancel: are we called from the work cancel path
3695	*
3696	* %current is trying to flush the whole @target_wq or @target_work on it.
3697	* If this is not the cancel path (which implies work being flushed is either
3698	* already running, or will not be at all), check if @target_wq doesn't have
3699	* %WQ_MEM_RECLAIM and verify that %current is not reclaiming memory or running
3700	* on a workqueue which doesn't have %WQ_MEM_RECLAIM as that can break forward-
3701	* progress guarantee leading to a deadlock.
3702	*/
3703	static void check_flush_dependency(struct workqueue_struct *target_wq,
3704	struct work_struct *target_work,
3705	bool from_cancel)
3706	{
3707	work_func_t target_func;
3708	struct worker *worker;
3709
3710	if (from_cancel \|\| target_wq->flags & WQ_MEM_RECLAIM)
3711	return;
3712
3713	worker = current_wq_worker();
3714	target_func = target_work ? target_work->func : NULL;
3715
3716	WARN_ONCE(current->flags & PF_MEMALLOC,
3717	"workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
3718	current->pid, current->comm, target_wq->name, target_func);
3719	WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
3720	(WQ_MEM_RECLAIM \| __WQ_LEGACY)) == WQ_MEM_RECLAIM),
3721	"workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
3722	worker->current_pwq->wq->name, worker->current_func,
3723	target_wq->name, target_func);
3724	}
3725
3726	struct wq_barrier {
3727	struct work_struct work;
3728	struct completion done;
3729	struct task_struct task; /* purely informational /
3730	};
3731
3732	static void wq_barrier_func(struct work_struct *work)
3733	{
3734	struct wq_barrier barr = container_of(work, struct* wq_barrier, work);
3735	complete(&barr->done);
3736	}
3737
3738	/**
3739	* insert_wq_barrier - insert a barrier work
3740	* @pwq: pwq to insert barrier into
3741	* @barr: wq_barrier to insert
3742	* @target: target work to attach @barr to
3743	* @worker: worker currently executing @target, NULL if @target is not executing
3744	*
3745	* @barr is linked to @target such that @barr is completed only after
3746	* @target finishes execution. Please note that the ordering
3747	* guarantee is observed only with respect to @target and on the local
3748	* cpu.
3749	*
3750	* Currently, a queued barrier can't be canceled. This is because
3751	* try_to_grab_pending() can't determine whether the work to be
3752	* grabbed is at the head of the queue and thus can't clear LINKED
3753	* flag of the previous work while there must be a valid next work
3754	* after a work with LINKED flag set.
3755	*
3756	* Note that when @worker is non-NULL, @target may be modified
3757	* underneath us, so we can't reliably determine pwq from @target.
3758	*
3759	* CONTEXT:
3760	* raw_spin_lock_irq(pool->lock).
3761	*/
3762	static void insert_wq_barrier(struct pool_workqueue *pwq,
3763	struct wq_barrier *barr,
3764	struct work_struct target, struct* worker *worker)
3765	{
3766	static __maybe_unused struct lock_class_key bh_key, thr_key;
3767	unsigned int work_flags = `0`;
3768	unsigned int work_color;
3769	struct list_head *head;
3770
3771	/*
3772	* debugobject calls are safe here even with pool->lock locked
3773	* as we know for sure that this will not trigger any of the
3774	* checks and call back into the fixup functions where we
3775	* might deadlock.
3776	*
3777	* BH and threaded workqueues need separate lockdep keys to avoid
3778	* spuriously triggering "inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W}
3779	* usage".
3780	*/
3781	INIT_WORK_ONSTACK_KEY(&barr->work, wq_barrier_func,
3782	(pwq->wq->flags & WQ_BH) ? &bh_key : &thr_key);
3783	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
3784
3785	init_completion_map(&barr->done, &target->lockdep_map);
3786
3787	barr->task = current;
3788
3789	/ The barrier work item does not participate in nr_active. /
3790	work_flags \|= WORK_STRUCT_INACTIVE;
3791
3792	/*
3793	* If @target is currently being executed, schedule the
3794	* barrier to the worker; otherwise, put it after @target.
3795	*/
3796	if (worker) {
3797	head = worker->scheduled.next;
3798	work_color = worker->current_color;
3799	} else {
3800	unsigned long *bits = work_data_bits(target);
3801
3802	head = target->entry.next;
3803	/ there can already be other linked works, inherit and set /
3804	work_flags \|= *bits & WORK_STRUCT_LINKED;
3805	work_color = get_work_color(work_data: *bits);
3806	__set_bit(WORK_STRUCT_LINKED_BIT, bits);
3807	}
3808
3809	pwq->nr_in_flight[work_color]++;
3810	work_flags \|= work_color_to_flags(color: work_color);
3811
3812	insert_work(pwq, work: &barr->work, head, extra_flags: work_flags);
3813	}
3814
3815	/**
3816	* flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
3817	* @wq: workqueue being flushed
3818	* @flush_color: new flush color, < 0 for no-op
3819	* @work_color: new work color, < 0 for no-op
3820	*
3821	* Prepare pwqs for workqueue flushing.
3822	*
3823	* If @flush_color is non-negative, flush_color on all pwqs should be
3824	* -1. If no pwq has in-flight commands at the specified color, all
3825	* pwq->flush_color's stay at -1 and %false is returned. If any pwq
3826	* has in flight commands, its pwq->flush_color is set to
3827	* @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
3828	* wakeup logic is armed and %true is returned.
3829	*
3830	* The caller should have initialized @wq->first_flusher prior to
3831	* calling this function with non-negative @flush_color. If
3832	* @flush_color is negative, no flush color update is done and %false
3833	* is returned.
3834	*
3835	* If @work_color is non-negative, all pwqs should have the same
3836	* work_color which is previous to @work_color and all will be
3837	* advanced to @work_color.
3838	*
3839	* CONTEXT:
3840	* mutex_lock(wq->mutex).
3841	*
3842	* Return:
3843	* %true if @flush_color >= 0 and there's something to flush. %false
3844	* otherwise.
3845	*/
3846	static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
3847	int flush_color, int work_color)
3848	{
3849	bool wait = false;
3850	struct pool_workqueue *pwq;
3851	struct worker_pool *current_pool = NULL;
3852
3853	if (flush_color >= `0`) {
3854	WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
3855	atomic_set(v: &wq->nr_pwqs_to_flush, i: `1`);
3856	}
3857
3858	/*
3859	* For unbound workqueue, pwqs will map to only a few pools.
3860	* Most of the time, pwqs within the same pool will be linked
3861	* sequentially to wq->pwqs by cpu index. So in the majority
3862	* of pwq iters, the pool is the same, only doing lock/unlock
3863	* if the pool has changed. This can largely reduce expensive
3864	* lock operations.
3865	*/
3866	for_each_pwq(pwq, wq) {
3867	if (current_pool != pwq->pool) {
3868	if (likely(current_pool))
3869	raw_spin_unlock_irq(&current_pool->lock);
3870	current_pool = pwq->pool;
3871	raw_spin_lock_irq(&current_pool->lock);
3872	}
3873
3874	if (flush_color >= `0`) {
3875	WARN_ON_ONCE(pwq->flush_color != -`1`);
3876
3877	if (pwq->nr_in_flight[flush_color]) {
3878	pwq->flush_color = flush_color;
3879	atomic_inc(v: &wq->nr_pwqs_to_flush);
3880	wait = true;
3881	}
3882	}
3883
3884	if (work_color >= `0`) {
3885	WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
3886	pwq->work_color = work_color;
3887	}
3888
3889	}
3890
3891	if (current_pool)
3892	raw_spin_unlock_irq(&current_pool->lock);
3893
3894	if (flush_color >= `0` && atomic_dec_and_test(v: &wq->nr_pwqs_to_flush))
3895	complete(&wq->first_flusher->done);
3896
3897	return wait;
3898	}
3899
3900	static void touch_wq_lockdep_map(struct workqueue_struct *wq)
3901	{
3902	#ifdef CONFIG_LOCKDEP
3903	if (unlikely(!wq->lockdep_map))
3904	return;
3905
3906	if (wq->flags & WQ_BH)
3907	local_bh_disable();
3908
3909	lock_map_acquire(wq->lockdep_map);
3910	lock_map_release(wq->lockdep_map);
3911
3912	if (wq->flags & WQ_BH)
3913	local_bh_enable();
3914	#endif
3915	}
3916
3917	static void touch_work_lockdep_map(struct work_struct *work,
3918	struct workqueue_struct *wq)
3919	{
3920	#ifdef CONFIG_LOCKDEP
3921	if (wq->flags & WQ_BH)
3922	local_bh_disable();
3923
3924	lock_map_acquire(&work->lockdep_map);
3925	lock_map_release(&work->lockdep_map);
3926
3927	if (wq->flags & WQ_BH)
3928	local_bh_enable();
3929	#endif
3930	}
3931
3932	/**
3933	* __flush_workqueue - ensure that any scheduled work has run to completion.
3934	* @wq: workqueue to flush
3935	*
3936	* This function sleeps until all work items which were queued on entry
3937	* have finished execution, but it is not livelocked by new incoming ones.
3938	*/
3939	void __flush_workqueue(struct workqueue_struct *wq)
3940	{
3941	struct wq_flusher this_flusher = {
3942	.list = LIST_HEAD_INIT(this_flusher.list),
3943	.flush_color = -`1`,
3944	.done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, (*wq->lockdep_map)),
3945	};
3946	int next_color;
3947
3948	if (WARN_ON(!wq_online))
3949	return;
3950
3951	touch_wq_lockdep_map(wq);
3952
3953	mutex_lock(&wq->mutex);
3954
3955	/*
3956	* Start-to-wait phase
3957	*/
3958	next_color = work_next_color(color: wq->work_color);
3959
3960	if (next_color != wq->flush_color) {
3961	/*
3962	* Color space is not full. The current work_color
3963	* becomes our flush_color and work_color is advanced
3964	* by one.
3965	*/
3966	WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
3967	this_flusher.flush_color = wq->work_color;
3968	wq->work_color = next_color;
3969
3970	if (!wq->first_flusher) {
3971	/ no flush in progress, become the first flusher /
3972	WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
3973
3974	wq->first_flusher = &this_flusher;
3975
3976	if (!flush_workqueue_prep_pwqs(wq, flush_color: wq->flush_color,
3977	work_color: wq->work_color)) {
3978	/ nothing to flush, done /
3979	wq->flush_color = next_color;
3980	wq->first_flusher = NULL;
3981	goto out_unlock;
3982	}
3983	} else {
3984	/ wait in queue /
3985	WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
3986	list_add_tail(new: &this_flusher.list, head: &wq->flusher_queue);
3987	flush_workqueue_prep_pwqs(wq, flush_color: -`1`, work_color: wq->work_color);
3988	}
3989	} else {
3990	/*
3991	* Oops, color space is full, wait on overflow queue.
3992	* The next flush completion will assign us
3993	* flush_color and transfer to flusher_queue.
3994	*/
3995	list_add_tail(new: &this_flusher.list, head: &wq->flusher_overflow);
3996	}
3997
3998	check_flush_dependency(target_wq: wq, NULL, from_cancel: false);
3999
4000	mutex_unlock(lock: &wq->mutex);
4001
4002	wait_for_completion(&this_flusher.done);
4003
4004	/*
4005	* Wake-up-and-cascade phase
4006	*
4007	* First flushers are responsible for cascading flushes and
4008	* handling overflow. Non-first flushers can simply return.
4009	*/
4010	if (READ_ONCE(wq->first_flusher) != &this_flusher)
4011	return;
4012
4013	mutex_lock(&wq->mutex);
4014
4015	/ we might have raced, check again with mutex held /
4016	if (wq->first_flusher != &this_flusher)
4017	goto out_unlock;
4018
4019	WRITE_ONCE(wq->first_flusher, NULL);
4020
4021	WARN_ON_ONCE(!list_empty(&this_flusher.list));
4022	WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
4023
4024	while (true) {
4025	struct wq_flusher next, tmp;
4026
4027	/ complete all the flushers sharing the current flush color /
4028	list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
4029	if (next->flush_color != wq->flush_color)
4030	break;
4031	list_del_init(entry: &next->list);
4032	complete(&next->done);
4033	}
4034
4035	WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
4036	wq->flush_color != work_next_color(wq->work_color));
4037
4038	/ this flush_color is finished, advance by one /
4039	wq->flush_color = work_next_color(color: wq->flush_color);
4040
4041	/ one color has been freed, handle overflow queue /
4042	if (!list_empty(head: &wq->flusher_overflow)) {
4043	/*
4044	* Assign the same color to all overflowed
4045	* flushers, advance work_color and append to
4046	* flusher_queue. This is the start-to-wait
4047	* phase for these overflowed flushers.
4048	*/
4049	list_for_each_entry(tmp, &wq->flusher_overflow, list)
4050	tmp->flush_color = wq->work_color;
4051
4052	wq->work_color = work_next_color(color: wq->work_color);
4053
4054	list_splice_tail_init(list: &wq->flusher_overflow,
4055	head: &wq->flusher_queue);
4056	flush_workqueue_prep_pwqs(wq, flush_color: -`1`, work_color: wq->work_color);
4057	}
4058
4059	if (list_empty(head: &wq->flusher_queue)) {
4060	WARN_ON_ONCE(wq->flush_color != wq->work_color);
4061	break;
4062	}
4063
4064	/*
4065	* Need to flush more colors. Make the next flusher
4066	* the new first flusher and arm pwqs.
4067	*/
4068	WARN_ON_ONCE(wq->flush_color == wq->work_color);
4069	WARN_ON_ONCE(wq->flush_color != next->flush_color);
4070
4071	list_del_init(entry: &next->list);
4072	wq->first_flusher = next;
4073
4074	if (flush_workqueue_prep_pwqs(wq, flush_color: wq->flush_color, work_color: -`1`))
4075	break;
4076
4077	/*
4078	* Meh... this color is already done, clear first
4079	* flusher and repeat cascading.
4080	*/
4081	wq->first_flusher = NULL;
4082	}
4083
4084	out_unlock:
4085	mutex_unlock(lock: &wq->mutex);
4086	}
4087	EXPORT_SYMBOL(__flush_workqueue);
4088
4089	/**
4090	* drain_workqueue - drain a workqueue
4091	* @wq: workqueue to drain
4092	*
4093	* Wait until the workqueue becomes empty. While draining is in progress,
4094	* only chain queueing is allowed. IOW, only currently pending or running
4095	* work items on @wq can queue further work items on it. @wq is flushed
4096	* repeatedly until it becomes empty. The number of flushing is determined
4097	* by the depth of chaining and should be relatively short. Whine if it
4098	* takes too long.
4099	*/
4100	void drain_workqueue(struct workqueue_struct *wq)
4101	{
4102	unsigned int flush_cnt = `0`;
4103	struct pool_workqueue *pwq;
4104
4105	/*
4106	* __queue_work() needs to test whether there are drainers, is much
4107	* hotter than drain_workqueue() and already looks at @wq->flags.
4108	* Use __WQ_DRAINING so that queue doesn't have to check nr_drainers.
4109	*/
4110	mutex_lock(&wq->mutex);
4111	if (!wq->nr_drainers++)
4112	wq->flags \|= __WQ_DRAINING;
4113	mutex_unlock(lock: &wq->mutex);
4114	reflush:
4115	__flush_workqueue(wq);
4116
4117	mutex_lock(&wq->mutex);
4118
4119	for_each_pwq(pwq, wq) {
4120	bool drained;
4121
4122	raw_spin_lock_irq(&pwq->pool->lock);
4123	drained = pwq_is_empty(pwq);
4124	raw_spin_unlock_irq(&pwq->pool->lock);
4125
4126	if (drained)
4127	continue;
4128
4129	if (++flush_cnt == `10` \|\|
4130	(flush_cnt % `100` == `0` && flush_cnt <= `1000`))
4131	pr_warn("workqueue %s: %s() isn't complete after %u tries\n",
4132	wq->name, __func__, flush_cnt);
4133
4134	mutex_unlock(lock: &wq->mutex);
4135	goto reflush;
4136	}
4137
4138	if (!--wq->nr_drainers)
4139	wq->flags &= ~__WQ_DRAINING;
4140	mutex_unlock(lock: &wq->mutex);
4141	}
4142	EXPORT_SYMBOL_GPL(drain_workqueue);
4143
4144	static bool start_flush_work(struct work_struct work, struct* wq_barrier *barr,
4145	bool from_cancel)
4146	{
4147	struct worker *worker = NULL;
4148	struct worker_pool *pool;
4149	struct pool_workqueue *pwq;
4150	struct workqueue_struct *wq;
4151
4152	rcu_read_lock();
4153	pool = get_work_pool(work);
4154	if (!pool) {
4155	rcu_read_unlock();
4156	return false;
4157	}
4158
4159	raw_spin_lock_irq(&pool->lock);
4160	/ see the comment in try_to_grab_pending() with the same code /
4161	pwq = get_work_pwq(work);
4162	if (pwq) {
4163	if (unlikely(pwq->pool != pool))
4164	goto already_gone;
4165	} else {
4166	worker = find_worker_executing_work(pool, work);
4167	if (!worker)
4168	goto already_gone;
4169	pwq = worker->current_pwq;
4170	}
4171
4172	wq = pwq->wq;
4173	check_flush_dependency(target_wq: wq, target_work: work, from_cancel);
4174
4175	insert_wq_barrier(pwq, barr, target: work, worker);
4176	raw_spin_unlock_irq(&pool->lock);
4177
4178	touch_work_lockdep_map(work, wq);
4179
4180	/*
4181	* Force a lock recursion deadlock when using flush_work() inside a
4182	* single-threaded or rescuer equipped workqueue.
4183	*
4184	* For single threaded workqueues the deadlock happens when the work
4185	* is after the work issuing the flush_work(). For rescuer equipped
4186	* workqueues the deadlock happens when the rescuer stalls, blocking
4187	* forward progress.
4188	*/
4189	if (!from_cancel && (wq->saved_max_active == `1` \|\| wq->rescuer))
4190	touch_wq_lockdep_map(wq);
4191
4192	rcu_read_unlock();
4193	return true;
4194	already_gone:
4195	raw_spin_unlock_irq(&pool->lock);
4196	rcu_read_unlock();
4197	return false;
4198	}
4199
4200	static bool __flush_work(struct work_struct *work, bool from_cancel)
4201	{
4202	struct wq_barrier barr;
4203
4204	if (WARN_ON(!wq_online))
4205	return false;
4206
4207	if (WARN_ON(!work->func))
4208	return false;
4209
4210	if (!start_flush_work(work, barr: &barr, from_cancel))
4211	return false;
4212
4213	/*
4214	* start_flush_work() returned %true. If @from_cancel is set, we know
4215	* that @work must have been executing during start_flush_work() and
4216	* can't currently be queued. Its data must contain OFFQ bits. If @work
4217	* was queued on a BH workqueue, we also know that it was running in the
4218	* BH context and thus can be busy-waited.
4219	*/
4220	if (from_cancel) {
4221	unsigned long data = *work_data_bits(work);
4222
4223	if (!WARN_ON_ONCE(data & WORK_STRUCT_PWQ) &&
4224	(data & WORK_OFFQ_BH)) {
4225	/*
4226	* On RT, prevent a live lock when %current preempted
4227	* soft interrupt processing or prevents ksoftirqd from
4228	* running by keeping flipping BH. If the BH work item
4229	* runs on a different CPU then this has no effect other
4230	* than doing the BH disable/enable dance for nothing.
4231	* This is copied from
4232	* kernel/softirq.c::tasklet_unlock_spin_wait().
4233	*/
4234	while (!try_wait_for_completion(x: &barr.done)) {
4235	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
4236	local_bh_disable();
4237	local_bh_enable();
4238	} else {
4239	cpu_relax();
4240	}
4241	}
4242	goto out_destroy;
4243	}
4244	}
4245
4246	wait_for_completion(&barr.done);
4247
4248	out_destroy:
4249	destroy_work_on_stack(&barr.work);
4250	return true;
4251	}
4252
4253	/**
4254	* flush_work - wait for a work to finish executing the last queueing instance
4255	* @work: the work to flush
4256	*
4257	* Wait until @work has finished execution. @work is guaranteed to be idle
4258	* on return if it hasn't been requeued since flush started.
4259	*
4260	* Return:
4261	* %true if flush_work() waited for the work to finish execution,
4262	* %false if it was already idle.
4263	*/
4264	bool flush_work(struct work_struct *work)
4265	{
4266	might_sleep();
4267	return __flush_work(work, from_cancel: false);
4268	}
4269	EXPORT_SYMBOL_GPL(flush_work);
4270
4271	/**
4272	* flush_delayed_work - wait for a dwork to finish executing the last queueing
4273	* @dwork: the delayed work to flush
4274	*
4275	* Delayed timer is cancelled and the pending work is queued for
4276	* immediate execution. Like flush_work(), this function only
4277	* considers the last queueing instance of @dwork.
4278	*
4279	* Return:
4280	* %true if flush_work() waited for the work to finish execution,
4281	* %false if it was already idle.
4282	*/
4283	bool flush_delayed_work(struct delayed_work *dwork)
4284	{
4285	local_irq_disable();
4286	if (timer_delete_sync(timer: &dwork->timer))
4287	__queue_work(cpu: dwork->cpu, wq: dwork->wq, work: &dwork->work);
4288	local_irq_enable();
4289	return flush_work(&dwork->work);
4290	}
4291	EXPORT_SYMBOL(flush_delayed_work);
4292
4293	/**
4294	* flush_rcu_work - wait for a rwork to finish executing the last queueing
4295	* @rwork: the rcu work to flush
4296	*
4297	* Return:
4298	* %true if flush_rcu_work() waited for the work to finish execution,
4299	* %false if it was already idle.
4300	*/
4301	bool flush_rcu_work(struct rcu_work *rwork)
4302	{
4303	if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) {
4304	rcu_barrier();
4305	flush_work(&rwork->work);
4306	return true;
4307	} else {
4308	return flush_work(&rwork->work);
4309	}
4310	}
4311	EXPORT_SYMBOL(flush_rcu_work);
4312
4313	static void work_offqd_disable(struct work_offq_data *offqd)
4314	{
4315	const unsigned long max = (`1lu` << WORK_OFFQ_DISABLE_BITS) - `1`;
4316
4317	if (likely(offqd->disable < max))
4318	offqd->disable++;
4319	else
4320	WARN_ONCE(true, "workqueue: work disable count overflowed\n");
4321	}
4322
4323	static void work_offqd_enable(struct work_offq_data *offqd)
4324	{
4325	if (likely(offqd->disable > `0`))
4326	offqd->disable--;
4327	else
4328	WARN_ONCE(true, "workqueue: work disable count underflowed\n");
4329	}
4330
4331	static bool __cancel_work(struct work_struct *work, u32 cflags)
4332	{
4333	struct work_offq_data offqd;
4334	unsigned long irq_flags;
4335	int ret;
4336
4337	ret = work_grab_pending(work, cflags, irq_flags: &irq_flags);
4338
4339	work_offqd_unpack(offqd: &offqd, data: *work_data_bits(work));
4340
4341	if (cflags & WORK_CANCEL_DISABLE)
4342	work_offqd_disable(offqd: &offqd);
4343
4344	set_work_pool_and_clear_pending(work, pool_id: offqd.pool_id,
4345	flags: work_offqd_pack_flags(offqd: &offqd));
4346	local_irq_restore(irq_flags);
4347	return ret;
4348	}
4349
4350	static bool __cancel_work_sync(struct work_struct *work, u32 cflags)
4351	{
4352	bool ret;
4353
4354	ret = __cancel_work(work, cflags: cflags \| WORK_CANCEL_DISABLE);
4355
4356	if (*work_data_bits(work) & WORK_OFFQ_BH)
4357	WARN_ON_ONCE(in_hardirq());
4358	else
4359	might_sleep();
4360
4361	/*
4362	* Skip __flush_work() during early boot when we know that @work isn't
4363	* executing. This allows canceling during early boot.
4364	*/
4365	if (wq_online)
4366	__flush_work(work, from_cancel: true);
4367
4368	if (!(cflags & WORK_CANCEL_DISABLE))
4369	enable_work(work);
4370
4371	return ret;
4372	}
4373
4374	/*
4375	* See cancel_delayed_work()
4376	*/
4377	bool cancel_work(struct work_struct *work)
4378	{
4379	return __cancel_work(work, cflags: `0`);
4380	}
4381	EXPORT_SYMBOL(cancel_work);
4382
4383	/**
4384	* cancel_work_sync - cancel a work and wait for it to finish
4385	* @work: the work to cancel
4386	*
4387	* Cancel @work and wait for its execution to finish. This function can be used
4388	* even if the work re-queues itself or migrates to another workqueue. On return
4389	* from this function, @work is guaranteed to be not pending or executing on any
4390	* CPU as long as there aren't racing enqueues.
4391	*
4392	* cancel_work_sync(&delayed_work->work) must not be used for delayed_work's.
4393	* Use cancel_delayed_work_sync() instead.
4394	*
4395	* Must be called from a sleepable context if @work was last queued on a non-BH
4396	* workqueue. Can also be called from non-hardirq atomic contexts including BH
4397	* if @work was last queued on a BH workqueue.
4398	*
4399	* Returns %true if @work was pending, %false otherwise.
4400	*/
4401	bool cancel_work_sync(struct work_struct *work)
4402	{
4403	return __cancel_work_sync(work, cflags: `0`);
4404	}
4405	EXPORT_SYMBOL_GPL(cancel_work_sync);
4406
4407	/**
4408	* cancel_delayed_work - cancel a delayed work
4409	* @dwork: delayed_work to cancel
4410	*
4411	* Kill off a pending delayed_work.
4412	*
4413	* Return: %true if @dwork was pending and canceled; %false if it wasn't
4414	* pending.
4415	*
4416	* Note:
4417	* The work callback function may still be running on return, unless
4418	* it returns %true and the work doesn't re-arm itself. Explicitly flush or
4419	* use cancel_delayed_work_sync() to wait on it.
4420	*
4421	* This function is safe to call from any context including IRQ handler.
4422	*/
4423	bool cancel_delayed_work(struct delayed_work *dwork)
4424	{
4425	return __cancel_work(work: &dwork->work, cflags: WORK_CANCEL_DELAYED);
4426	}
4427	EXPORT_SYMBOL(cancel_delayed_work);
4428
4429	/**
4430	* cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
4431	* @dwork: the delayed work cancel
4432	*
4433	* This is cancel_work_sync() for delayed works.
4434	*
4435	* Return:
4436	* %true if @dwork was pending, %false otherwise.
4437	*/
4438	bool cancel_delayed_work_sync(struct delayed_work *dwork)
4439	{
4440	return __cancel_work_sync(work: &dwork->work, cflags: WORK_CANCEL_DELAYED);
4441	}
4442	EXPORT_SYMBOL(cancel_delayed_work_sync);
4443
4444	/**
4445	* disable_work - Disable and cancel a work item
4446	* @work: work item to disable
4447	*
4448	* Disable @work by incrementing its disable count and cancel it if currently
4449	* pending. As long as the disable count is non-zero, any attempt to queue @work
4450	* will fail and return %false. The maximum supported disable depth is 2 to the
4451	* power of %WORK_OFFQ_DISABLE_BITS, currently 65536.
4452	*
4453	* Can be called from any context. Returns %true if @work was pending, %false
4454	* otherwise.
4455	*/
4456	bool disable_work(struct work_struct *work)
4457	{
4458	return __cancel_work(work, cflags: WORK_CANCEL_DISABLE);
4459	}
4460	EXPORT_SYMBOL_GPL(disable_work);
4461
4462	/**
4463	* disable_work_sync - Disable, cancel and drain a work item
4464	* @work: work item to disable
4465	*
4466	* Similar to disable_work() but also wait for @work to finish if currently
4467	* executing.
4468	*
4469	* Must be called from a sleepable context if @work was last queued on a non-BH
4470	* workqueue. Can also be called from non-hardirq atomic contexts including BH
4471	* if @work was last queued on a BH workqueue.
4472	*
4473	* Returns %true if @work was pending, %false otherwise.
4474	*/
4475	bool disable_work_sync(struct work_struct *work)
4476	{
4477	return __cancel_work_sync(work, cflags: WORK_CANCEL_DISABLE);
4478	}
4479	EXPORT_SYMBOL_GPL(disable_work_sync);
4480
4481	/**
4482	* enable_work - Enable a work item
4483	* @work: work item to enable
4484	*
4485	* Undo disable_work[_sync]() by decrementing @work's disable count. @work can
4486	* only be queued if its disable count is 0.
4487	*
4488	* Can be called from any context. Returns %true if the disable count reached 0.
4489	* Otherwise, %false.
4490	*/
4491	bool enable_work(struct work_struct *work)
4492	{
4493	struct work_offq_data offqd;
4494	unsigned long irq_flags;
4495
4496	work_grab_pending(work, cflags: `0`, irq_flags: &irq_flags);
4497
4498	work_offqd_unpack(offqd: &offqd, data: *work_data_bits(work));
4499	work_offqd_enable(offqd: &offqd);
4500	set_work_pool_and_clear_pending(work, pool_id: offqd.pool_id,
4501	flags: work_offqd_pack_flags(offqd: &offqd));
4502	local_irq_restore(irq_flags);
4503
4504	return !offqd.disable;
4505	}
4506	EXPORT_SYMBOL_GPL(enable_work);
4507
4508	/**
4509	* disable_delayed_work - Disable and cancel a delayed work item
4510	* @dwork: delayed work item to disable
4511	*
4512	* disable_work() for delayed work items.
4513	*/
4514	bool disable_delayed_work(struct delayed_work *dwork)
4515	{
4516	return __cancel_work(work: &dwork->work,
4517	cflags: WORK_CANCEL_DELAYED \| WORK_CANCEL_DISABLE);
4518	}
4519	EXPORT_SYMBOL_GPL(disable_delayed_work);
4520
4521	/**
4522	* disable_delayed_work_sync - Disable, cancel and drain a delayed work item
4523	* @dwork: delayed work item to disable
4524	*
4525	* disable_work_sync() for delayed work items.
4526	*/
4527	bool disable_delayed_work_sync(struct delayed_work *dwork)
4528	{
4529	return __cancel_work_sync(work: &dwork->work,
4530	cflags: WORK_CANCEL_DELAYED \| WORK_CANCEL_DISABLE);
4531	}
4532	EXPORT_SYMBOL_GPL(disable_delayed_work_sync);
4533
4534	/**
4535	* enable_delayed_work - Enable a delayed work item
4536	* @dwork: delayed work item to enable
4537	*
4538	* enable_work() for delayed work items.
4539	*/
4540	bool enable_delayed_work(struct delayed_work *dwork)
4541	{
4542	return enable_work(&dwork->work);
4543	}
4544	EXPORT_SYMBOL_GPL(enable_delayed_work);
4545
4546	/**
4547	* schedule_on_each_cpu - execute a function synchronously on each online CPU
4548	* @func: the function to call
4549	*
4550	* schedule_on_each_cpu() executes @func on each online CPU using the
4551	* system workqueue and blocks until all CPUs have completed.
4552	* schedule_on_each_cpu() is very slow.
4553	*
4554	* Return:
4555	* 0 on success, -errno on failure.
4556	*/
4557	int schedule_on_each_cpu(work_func_t func)
4558	{
4559	int cpu;
4560	struct work_struct __percpu *works;
4561
4562	works = alloc_percpu(struct work_struct);
4563	if (!works)
4564	return -ENOMEM;
4565
4566	cpus_read_lock();
4567
4568	for_each_online_cpu(cpu) {
4569	struct work_struct *work = per_cpu_ptr(works, cpu);
4570
4571	INIT_WORK(work, func);
4572	schedule_work_on(cpu, work);
4573	}
4574
4575	for_each_online_cpu(cpu)
4576	flush_work(per_cpu_ptr(works, cpu));
4577
4578	cpus_read_unlock();
4579	free_percpu(pdata: works);
4580	return `0`;
4581	}
4582
4583	/**
4584	* execute_in_process_context - reliably execute the routine with user context
4585	* @fn: the function to execute
4586	* @ew: guaranteed storage for the execute work structure (must
4587	* be available when the work executes)
4588	*
4589	* Executes the function immediately if process context is available,
4590	* otherwise schedules the function for delayed execution.
4591	*
4592	* Return: 0 - function was executed
4593	* 1 - function was scheduled for execution
4594	*/
4595	int execute_in_process_context(work_func_t fn, struct execute_work *ew)
4596	{
4597	if (!in_interrupt()) {
4598	fn(&ew->work);
4599	return `0`;
4600	}
4601
4602	INIT_WORK(&ew->work, fn);
4603	schedule_work(work: &ew->work);
4604
4605	return `1`;
4606	}
4607	EXPORT_SYMBOL_GPL(execute_in_process_context);
4608
4609	/**
4610	* free_workqueue_attrs - free a workqueue_attrs
4611	* @attrs: workqueue_attrs to free
4612	*
4613	* Undo alloc_workqueue_attrs().
4614	*/
4615	void free_workqueue_attrs(struct workqueue_attrs *attrs)
4616	{
4617	if (attrs) {
4618	free_cpumask_var(mask: attrs->cpumask);
4619	free_cpumask_var(mask: attrs->__pod_cpumask);
4620	kfree(objp: attrs);
4621	}
4622	}
4623
4624	/**
4625	* alloc_workqueue_attrs - allocate a workqueue_attrs
4626	*
4627	* Allocate a new workqueue_attrs, initialize with default settings and
4628	* return it.
4629	*
4630	* Return: The allocated new workqueue_attr on success. %NULL on failure.
4631	*/
4632	struct workqueue_attrs alloc_workqueue_attrs(void*)
4633	{
4634	struct workqueue_attrs *attrs;
4635
4636	attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
4637	if (!attrs)
4638	goto fail;
4639	if (!alloc_cpumask_var(mask: &attrs->cpumask, GFP_KERNEL))
4640	goto fail;
4641	if (!alloc_cpumask_var(mask: &attrs->__pod_cpumask, GFP_KERNEL))
4642	goto fail;
4643
4644	cpumask_copy(dstp: attrs->cpumask, cpu_possible_mask);
4645	attrs->affn_scope = WQ_AFFN_DFL;
4646	return attrs;
4647	fail:
4648	free_workqueue_attrs(attrs);
4649	return NULL;
4650	}
4651
4652	static void copy_workqueue_attrs(struct workqueue_attrs *to,
4653	const struct workqueue_attrs *from)
4654	{
4655	to->nice = from->nice;
4656	cpumask_copy(dstp: to->cpumask, srcp: from->cpumask);
4657	cpumask_copy(dstp: to->__pod_cpumask, srcp: from->__pod_cpumask);
4658	to->affn_strict = from->affn_strict;
4659
4660	/*
4661	* Unlike hash and equality test, copying shouldn't ignore wq-only
4662	* fields as copying is used for both pool and wq attrs. Instead,
4663	* get_unbound_pool() explicitly clears the fields.
4664	*/
4665	to->affn_scope = from->affn_scope;
4666	to->ordered = from->ordered;
4667	}
4668
4669	/*
4670	* Some attrs fields are workqueue-only. Clear them for worker_pool's. See the
4671	* comments in 'struct workqueue_attrs' definition.
4672	*/
4673	static void wqattrs_clear_for_pool(struct workqueue_attrs *attrs)
4674	{
4675	attrs->affn_scope = WQ_AFFN_NR_TYPES;
4676	attrs->ordered = false;
4677	if (attrs->affn_strict)
4678	cpumask_copy(dstp: attrs->cpumask, cpu_possible_mask);
4679	}
4680
4681	/ hash value of the content of @attr /
4682	static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
4683	{
4684	u32 hash = `0`;
4685
4686	hash = jhash_1word(a: attrs->nice, initval: hash);
4687	hash = jhash_1word(a: attrs->affn_strict, initval: hash);
4688	hash = jhash(cpumask_bits(attrs->__pod_cpumask),
4689	BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), initval: hash);
4690	if (!attrs->affn_strict)
4691	hash = jhash(cpumask_bits(attrs->cpumask),
4692	BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), initval: hash);
4693	return hash;
4694	}
4695
4696	/ content equality test /
4697	static bool wqattrs_equal(const struct workqueue_attrs *a,
4698	const struct workqueue_attrs *b)
4699	{
4700	if (a->nice != b->nice)
4701	return false;
4702	if (a->affn_strict != b->affn_strict)
4703	return false;
4704	if (!cpumask_equal(src1p: a->__pod_cpumask, src2p: b->__pod_cpumask))
4705	return false;
4706	if (!a->affn_strict && !cpumask_equal(src1p: a->cpumask, src2p: b->cpumask))
4707	return false;
4708	return true;
4709	}
4710
4711	/ Update @attrs with actually available CPUs /
4712	static void wqattrs_actualize_cpumask(struct workqueue_attrs *attrs,
4713	const cpumask_t *unbound_cpumask)
4714	{
4715	/*
4716	* Calculate the effective CPU mask of @attrs given @unbound_cpumask. If
4717	* @attrs->cpumask doesn't overlap with @unbound_cpumask, we fallback to
4718	* @unbound_cpumask.
4719	*/
4720	cpumask_and(dstp: attrs->cpumask, src1p: attrs->cpumask, src2p: unbound_cpumask);
4721	if (unlikely(cpumask_empty(attrs->cpumask)))
4722	cpumask_copy(dstp: attrs->cpumask, srcp: unbound_cpumask);
4723	}
4724
4725	/ find wq_pod_type to use for @attrs /
4726	static const struct wq_pod_type *
4727	wqattrs_pod_type(const struct workqueue_attrs *attrs)
4728	{
4729	enum wq_affn_scope scope;
4730	struct wq_pod_type *pt;
4731
4732	/ to synchronize access to wq_affn_dfl /
4733	lockdep_assert_held(&wq_pool_mutex);
4734
4735	if (attrs->affn_scope == WQ_AFFN_DFL)
4736	scope = wq_affn_dfl;
4737	else
4738	scope = attrs->affn_scope;
4739
4740	pt = &wq_pod_types[scope];
4741
4742	if (!WARN_ON_ONCE(attrs->affn_scope == WQ_AFFN_NR_TYPES) &&
4743	likely(pt->nr_pods))
4744	return pt;
4745
4746	/*
4747	* Before workqueue_init_topology(), only SYSTEM is available which is
4748	* initialized in workqueue_init_early().
4749	*/
4750	pt = &wq_pod_types[WQ_AFFN_SYSTEM];
4751	BUG_ON(!pt->nr_pods);
4752	return pt;
4753	}
4754
4755	/**
4756	* init_worker_pool - initialize a newly zalloc'd worker_pool
4757	* @pool: worker_pool to initialize
4758	*
4759	* Initialize a newly zalloc'd @pool. It also allocates @pool->attrs.
4760	*
4761	* Return: 0 on success, -errno on failure. Even on failure, all fields
4762	* inside @pool proper are initialized and put_unbound_pool() can be called
4763	* on @pool safely to release it.
4764	*/
4765	static int init_worker_pool(struct worker_pool *pool)
4766	{
4767	raw_spin_lock_init(&pool->lock);
4768	pool->id = -`1`;
4769	pool->cpu = -`1`;
4770	pool->node = NUMA_NO_NODE;
4771	pool->flags \|= POOL_DISASSOCIATED;
4772	pool->watchdog_ts = jiffies;
4773	INIT_LIST_HEAD(list: &pool->worklist);
4774	INIT_LIST_HEAD(list: &pool->idle_list);
4775	hash_init(pool->busy_hash);
4776
4777	timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE);
4778	INIT_WORK(&pool->idle_cull_work, idle_cull_fn);
4779
4780	timer_setup(&pool->mayday_timer, pool_mayday_timeout, `0`);
4781
4782	INIT_LIST_HEAD(list: &pool->workers);
4783
4784	ida_init(ida: &pool->worker_ida);
4785	INIT_HLIST_NODE(h: &pool->hash_node);
4786	pool->refcnt = `1`;
4787
4788	/ shouldn't fail above this point /
4789	pool->attrs = alloc_workqueue_attrs();
4790	if (!pool->attrs)
4791	return -ENOMEM;
4792
4793	wqattrs_clear_for_pool(attrs: pool->attrs);
4794
4795	return `0`;
4796	}
4797
4798	#ifdef CONFIG_LOCKDEP
4799	static void wq_init_lockdep(struct workqueue_struct *wq)
4800	{
4801	char *lock_name;
4802
4803	lockdep_register_key(key: &wq->key);
4804	lock_name = kasprintf(GFP_KERNEL, fmt: "%s%s", "(wq_completion)", wq->name);
4805	if (!lock_name)
4806	lock_name = wq->name;
4807
4808	wq->lock_name = lock_name;
4809	wq->lockdep_map = &wq->__lockdep_map;
4810	lockdep_init_map(lock: wq->lockdep_map, name: lock_name, key: &wq->key, subclass: `0`);
4811	}
4812
4813	static void wq_unregister_lockdep(struct workqueue_struct *wq)
4814	{
4815	if (wq->lockdep_map != &wq->__lockdep_map)
4816	return;
4817
4818	lockdep_unregister_key(key: &wq->key);
4819	}
4820
4821	static void wq_free_lockdep(struct workqueue_struct *wq)
4822	{
4823	if (wq->lockdep_map != &wq->__lockdep_map)
4824	return;
4825
4826	if (wq->lock_name != wq->name)
4827	kfree(objp: wq->lock_name);
4828	}
4829	#else
4830	static void wq_init_lockdep(struct workqueue_struct *wq)
4831	{
4832	}
4833
4834	static void wq_unregister_lockdep(struct workqueue_struct *wq)
4835	{
4836	}
4837
4838	static void wq_free_lockdep(struct workqueue_struct *wq)
4839	{
4840	}
4841	#endif
4842
4843	static void free_node_nr_active(struct wq_node_nr_active **nna_ar)
4844	{
4845	int node;
4846
4847	for_each_node(node) {
4848	kfree(objp: nna_ar[node]);
4849	nna_ar[node] = NULL;
4850	}
4851
4852	kfree(objp: nna_ar[nr_node_ids]);
4853	nna_ar[nr_node_ids] = NULL;
4854	}
4855
4856	static void init_node_nr_active(struct wq_node_nr_active *nna)
4857	{
4858	nna->max = WQ_DFL_MIN_ACTIVE;
4859	atomic_set(v: &nna->nr, i: `0`);
4860	raw_spin_lock_init(&nna->lock);
4861	INIT_LIST_HEAD(list: &nna->pending_pwqs);
4862	}
4863
4864	/*
4865	* Each node's nr_active counter will be accessed mostly from its own node and
4866	* should be allocated in the node.
4867	*/
4868	static int alloc_node_nr_active(struct wq_node_nr_active **nna_ar)
4869	{
4870	struct wq_node_nr_active *nna;
4871	int node;
4872
4873	for_each_node(node) {
4874	nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, node);
4875	if (!nna)
4876	goto err_free;
4877	init_node_nr_active(nna);
4878	nna_ar[node] = nna;
4879	}
4880
4881	/ [nr_node_ids] is used as the fallback /
4882	nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, NUMA_NO_NODE);
4883	if (!nna)
4884	goto err_free;
4885	init_node_nr_active(nna);
4886	nna_ar[nr_node_ids] = nna;
4887
4888	return `0`;
4889
4890	err_free:
4891	free_node_nr_active(nna_ar);
4892	return -ENOMEM;
4893	}
4894
4895	static void rcu_free_wq(struct rcu_head *rcu)
4896	{
4897	struct workqueue_struct *wq =
4898	container_of(rcu, struct workqueue_struct, rcu);
4899
4900	if (wq->flags & WQ_UNBOUND)
4901	free_node_nr_active(nna_ar: wq->node_nr_active);
4902
4903	wq_free_lockdep(wq);
4904	free_percpu(pdata: wq->cpu_pwq);
4905	free_workqueue_attrs(attrs: wq->unbound_attrs);
4906	kfree(objp: wq);
4907	}
4908
4909	static void rcu_free_pool(struct rcu_head *rcu)
4910	{
4911	struct worker_pool pool = container_of(rcu, struct* worker_pool, rcu);
4912
4913	ida_destroy(ida: &pool->worker_ida);
4914	free_workqueue_attrs(attrs: pool->attrs);
4915	kfree(objp: pool);
4916	}
4917
4918	/**
4919	* put_unbound_pool - put a worker_pool
4920	* @pool: worker_pool to put
4921	*
4922	* Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
4923	* safe manner. get_unbound_pool() calls this function on its failure path
4924	* and this function should be able to release pools which went through,
4925	* successfully or not, init_worker_pool().
4926	*
4927	* Should be called with wq_pool_mutex held.
4928	*/
4929	static void put_unbound_pool(struct worker_pool *pool)
4930	{
4931	struct worker *worker;
4932	LIST_HEAD(cull_list);
4933
4934	lockdep_assert_held(&wq_pool_mutex);
4935
4936	if (--pool->refcnt)
4937	return;
4938
4939	/ sanity checks /
4940	if (WARN_ON(!(pool->cpu < `0`)) \|\|
4941	WARN_ON(!list_empty(&pool->worklist)))
4942	return;
4943
4944	/ release id and unhash /
4945	if (pool->id >= `0`)
4946	idr_remove(&worker_pool_idr, id: pool->id);
4947	hash_del(node: &pool->hash_node);
4948
4949	/*
4950	* Become the manager and destroy all workers. This prevents
4951	* @pool's workers from blocking on attach_mutex. We're the last
4952	* manager and @pool gets freed with the flag set.
4953	*
4954	* Having a concurrent manager is quite unlikely to happen as we can
4955	* only get here with
4956	* pwq->refcnt == pool->refcnt == 0
4957	* which implies no work queued to the pool, which implies no worker can
4958	* become the manager. However a worker could have taken the role of
4959	* manager before the refcnts dropped to 0, since maybe_create_worker()
4960	* drops pool->lock
4961	*/
4962	while (true) {
4963	rcuwait_wait_event(&manager_wait,
4964	!(pool->flags & POOL_MANAGER_ACTIVE),
4965	TASK_UNINTERRUPTIBLE);
4966
4967	mutex_lock(&wq_pool_attach_mutex);
4968	raw_spin_lock_irq(&pool->lock);
4969	if (!(pool->flags & POOL_MANAGER_ACTIVE)) {
4970	pool->flags \|= POOL_MANAGER_ACTIVE;
4971	break;
4972	}
4973	raw_spin_unlock_irq(&pool->lock);
4974	mutex_unlock(lock: &wq_pool_attach_mutex);
4975	}
4976
4977	while ((worker = first_idle_worker(pool)))
4978	set_worker_dying(worker, list: &cull_list);
4979	WARN_ON(pool->nr_workers \|\| pool->nr_idle);
4980	raw_spin_unlock_irq(&pool->lock);
4981
4982	detach_dying_workers(cull_list: &cull_list);
4983
4984	mutex_unlock(lock: &wq_pool_attach_mutex);
4985
4986	reap_dying_workers(cull_list: &cull_list);
4987
4988	/ shut down the timers /
4989	timer_delete_sync(timer: &pool->idle_timer);
4990	cancel_work_sync(&pool->idle_cull_work);
4991	timer_delete_sync(timer: &pool->mayday_timer);
4992
4993	/ RCU protected to allow dereferences from get_work_pool() /
4994	call_rcu(head: &pool->rcu, func: rcu_free_pool);
4995	}
4996
4997	/**
4998	* get_unbound_pool - get a worker_pool with the specified attributes
4999	* @attrs: the attributes of the worker_pool to get
5000	*
5001	* Obtain a worker_pool which has the same attributes as @attrs, bump the
5002	* reference count and return it. If there already is a matching
5003	* worker_pool, it will be used; otherwise, this function attempts to
5004	* create a new one.
5005	*
5006	* Should be called with wq_pool_mutex held.
5007	*
5008	* Return: On success, a worker_pool with the same attributes as @attrs.
5009	* On failure, %NULL.
5010	*/
5011	static struct worker_pool get_unbound_pool(const* struct workqueue_attrs *attrs)
5012	{
5013	struct wq_pod_type *pt = &wq_pod_types[WQ_AFFN_NUMA];
5014	u32 hash = wqattrs_hash(attrs);
5015	struct worker_pool *pool;
5016	int pod, node = NUMA_NO_NODE;
5017
5018	lockdep_assert_held(&wq_pool_mutex);
5019
5020	/ do we already have a matching pool? /
5021	hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
5022	if (wqattrs_equal(a: pool->attrs, b: attrs)) {
5023	pool->refcnt++;
5024	return pool;
5025	}
5026	}
5027
5028	/ If __pod_cpumask is contained inside a NUMA pod, that's our node /
5029	for (pod = `0`; pod < pt->nr_pods; pod++) {
5030	if (cpumask_subset(src1p: attrs->__pod_cpumask, src2p: pt->pod_cpus[pod])) {
5031	node = pt->pod_node[pod];
5032	break;
5033	}
5034	}
5035
5036	/ nope, create a new one /
5037	pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, node);
5038	if (!pool \|\| init_worker_pool(pool) < `0`)
5039	goto fail;
5040
5041	pool->node = node;
5042	copy_workqueue_attrs(to: pool->attrs, from: attrs);
5043	wqattrs_clear_for_pool(attrs: pool->attrs);
5044
5045	if (worker_pool_assign_id(pool) < `0`)
5046	goto fail;
5047
5048	/ create and start the initial worker /
5049	if (wq_online && !create_worker(pool))
5050	goto fail;
5051
5052	/ install /
5053	hash_add(unbound_pool_hash, &pool->hash_node, hash);
5054
5055	return pool;
5056	fail:
5057	if (pool)
5058	put_unbound_pool(pool);
5059	return NULL;
5060	}
5061
5062	/*
5063	* Scheduled on pwq_release_worker by put_pwq() when an unbound pwq hits zero
5064	* refcnt and needs to be destroyed.
5065	*/
5066	static void pwq_release_workfn(struct kthread_work *work)
5067	{
5068	struct pool_workqueue pwq = container_of(work, struct* pool_workqueue,
5069	release_work);
5070	struct workqueue_struct *wq = pwq->wq;
5071	struct worker_pool *pool = pwq->pool;
5072	bool is_last = false;
5073
5074	/*
5075	* When @pwq is not linked, it doesn't hold any reference to the
5076	* @wq, and @wq is invalid to access.
5077	*/
5078	if (!list_empty(head: &pwq->pwqs_node)) {
5079	mutex_lock(&wq->mutex);
5080	list_del_rcu(entry: &pwq->pwqs_node);
5081	is_last = list_empty(head: &wq->pwqs);
5082
5083	/*
5084	* For ordered workqueue with a plugged dfl_pwq, restart it now.
5085	*/
5086	if (!is_last && (wq->flags & __WQ_ORDERED))
5087	unplug_oldest_pwq(wq);
5088
5089	mutex_unlock(lock: &wq->mutex);
5090	}
5091
5092	if (wq->flags & WQ_UNBOUND) {
5093	mutex_lock(&wq_pool_mutex);
5094	put_unbound_pool(pool);
5095	mutex_unlock(lock: &wq_pool_mutex);
5096	}
5097
5098	if (!list_empty(head: &pwq->pending_node)) {
5099	struct wq_node_nr_active *nna =
5100	wq_node_nr_active(wq: pwq->wq, node: pwq->pool->node);
5101
5102	raw_spin_lock_irq(&nna->lock);
5103	list_del_init(entry: &pwq->pending_node);
5104	raw_spin_unlock_irq(&nna->lock);
5105	}
5106
5107	kfree_rcu(pwq, rcu);
5108
5109	/*
5110	* If we're the last pwq going away, @wq is already dead and no one
5111	* is gonna access it anymore. Schedule RCU free.
5112	*/
5113	if (is_last) {
5114	wq_unregister_lockdep(wq);
5115	call_rcu(head: &wq->rcu, func: rcu_free_wq);
5116	}
5117	}
5118
5119	/ initialize newly allocated @pwq which is associated with @wq and @pool /
5120	static void init_pwq(struct pool_workqueue pwq, struct* workqueue_struct *wq,
5121	struct worker_pool *pool)
5122	{
5123	BUG_ON((unsigned long)pwq & ~WORK_STRUCT_PWQ_MASK);
5124
5125	memset(pwq, `0`, sizeof(*pwq));
5126
5127	pwq->pool = pool;
5128	pwq->wq = wq;
5129	pwq->flush_color = -`1`;
5130	pwq->refcnt = `1`;
5131	INIT_LIST_HEAD(list: &pwq->inactive_works);
5132	INIT_LIST_HEAD(list: &pwq->pending_node);
5133	INIT_LIST_HEAD(list: &pwq->pwqs_node);
5134	INIT_LIST_HEAD(list: &pwq->mayday_node);
5135	kthread_init_work(&pwq->release_work, pwq_release_workfn);
5136	}
5137
5138	/ sync @pwq with the current state of its associated wq and link it /
5139	static void link_pwq(struct pool_workqueue *pwq)
5140	{
5141	struct workqueue_struct *wq = pwq->wq;
5142
5143	lockdep_assert_held(&wq->mutex);
5144
5145	/ may be called multiple times, ignore if already linked /
5146	if (!list_empty(head: &pwq->pwqs_node))
5147	return;
5148
5149	/ set the matching work_color /
5150	pwq->work_color = wq->work_color;
5151
5152	/ link in @pwq /
5153	list_add_tail_rcu(new: &pwq->pwqs_node, head: &wq->pwqs);
5154	}
5155
5156	/ obtain a pool matching @attr and create a pwq associating the pool and @wq /
5157	static struct pool_workqueue alloc_unbound_pwq(struct* workqueue_struct *wq,
5158	const struct workqueue_attrs *attrs)
5159	{
5160	struct worker_pool *pool;
5161	struct pool_workqueue *pwq;
5162
5163	lockdep_assert_held(&wq_pool_mutex);
5164
5165	pool = get_unbound_pool(attrs);
5166	if (!pool)
5167	return NULL;
5168
5169	pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
5170	if (!pwq) {
5171	put_unbound_pool(pool);
5172	return NULL;
5173	}
5174
5175	init_pwq(pwq, wq, pool);
5176	return pwq;
5177	}
5178
5179	static void apply_wqattrs_lock(void)
5180	{
5181	mutex_lock(&wq_pool_mutex);
5182	}
5183
5184	static void apply_wqattrs_unlock(void)
5185	{
5186	mutex_unlock(lock: &wq_pool_mutex);
5187	}
5188
5189	/**
5190	* wq_calc_pod_cpumask - calculate a wq_attrs' cpumask for a pod
5191	* @attrs: the wq_attrs of the default pwq of the target workqueue
5192	* @cpu: the target CPU
5193	*
5194	* Calculate the cpumask a workqueue with @attrs should use on @pod.
5195	* The result is stored in @attrs->__pod_cpumask.
5196	*
5197	* If pod affinity is not enabled, @attrs->cpumask is always used. If enabled
5198	* and @pod has online CPUs requested by @attrs, the returned cpumask is the
5199	* intersection of the possible CPUs of @pod and @attrs->cpumask.
5200	*
5201	* The caller is responsible for ensuring that the cpumask of @pod stays stable.
5202	*/
5203	static void wq_calc_pod_cpumask(struct workqueue_attrs attrs, int* cpu)
5204	{
5205	const struct wq_pod_type *pt = wqattrs_pod_type(attrs);
5206	int pod = pt->cpu_pod[cpu];
5207
5208	/ calculate possible CPUs in @pod that @attrs wants /
5209	cpumask_and(dstp: attrs->__pod_cpumask, src1p: pt->pod_cpus[pod], src2p: attrs->cpumask);
5210	/ does @pod have any online CPUs @attrs wants? /
5211	if (!cpumask_intersects(src1p: attrs->__pod_cpumask, src2p: wq_online_cpumask)) {
5212	cpumask_copy(dstp: attrs->__pod_cpumask, srcp: attrs->cpumask);
5213	return;
5214	}
5215	}
5216
5217	/ install @pwq into @wq and return the old pwq, @cpu < 0 for dfl_pwq /
5218	static struct pool_workqueue install_unbound_pwq(struct* workqueue_struct *wq,
5219	int cpu, struct pool_workqueue *pwq)
5220	{
5221	struct pool_workqueue __rcu **slot = unbound_pwq_slot(wq, cpu);
5222	struct pool_workqueue *old_pwq;
5223
5224	lockdep_assert_held(&wq_pool_mutex);
5225	lockdep_assert_held(&wq->mutex);
5226
5227	/ link_pwq() can handle duplicate calls /
5228	link_pwq(pwq);
5229
5230	old_pwq = rcu_access_pointer(*slot);
5231	rcu_assign_pointer(*slot, pwq);
5232	return old_pwq;
5233	}
5234
5235	/ context to store the prepared attrs & pwqs before applying /
5236	struct apply_wqattrs_ctx {
5237	struct workqueue_struct wq; /* target workqueue /
5238	struct workqueue_attrs attrs; /* attrs to apply /
5239	struct list_head list; / queued for batching commit /
5240	struct pool_workqueue *dfl_pwq;
5241	struct pool_workqueue *pwq_tbl[];
5242	};
5243
5244	/ free the resources after success or abort /
5245	static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
5246	{
5247	if (ctx) {
5248	int cpu;
5249
5250	for_each_possible_cpu(cpu)
5251	put_pwq_unlocked(pwq: ctx->pwq_tbl[cpu]);
5252	put_pwq_unlocked(pwq: ctx->dfl_pwq);
5253
5254	free_workqueue_attrs(attrs: ctx->attrs);
5255
5256	kfree(objp: ctx);
5257	}
5258	}
5259
5260	/ allocate the attrs and pwqs for later installation /
5261	static struct apply_wqattrs_ctx *
5262	apply_wqattrs_prepare(struct workqueue_struct *wq,
5263	const struct workqueue_attrs *attrs,
5264	const cpumask_var_t unbound_cpumask)
5265	{
5266	struct apply_wqattrs_ctx *ctx;
5267	struct workqueue_attrs *new_attrs;
5268	int cpu;
5269
5270	lockdep_assert_held(&wq_pool_mutex);
5271
5272	if (WARN_ON(attrs->affn_scope < `0` \|\|
5273	attrs->affn_scope >= WQ_AFFN_NR_TYPES))
5274	return ERR_PTR(error: -EINVAL);
5275
5276	ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_cpu_ids), GFP_KERNEL);
5277
5278	new_attrs = alloc_workqueue_attrs();
5279	if (!ctx \|\| !new_attrs)
5280	goto out_free;
5281
5282	/*
5283	* If something goes wrong during CPU up/down, we'll fall back to
5284	* the default pwq covering whole @attrs->cpumask. Always create
5285	* it even if we don't use it immediately.
5286	*/
5287	copy_workqueue_attrs(to: new_attrs, from: attrs);
5288	wqattrs_actualize_cpumask(attrs: new_attrs, unbound_cpumask);
5289	cpumask_copy(dstp: new_attrs->__pod_cpumask, srcp: new_attrs->cpumask);
5290	ctx->dfl_pwq = alloc_unbound_pwq(wq, attrs: new_attrs);
5291	if (!ctx->dfl_pwq)
5292	goto out_free;
5293
5294	for_each_possible_cpu(cpu) {
5295	if (new_attrs->ordered) {
5296	ctx->dfl_pwq->refcnt++;
5297	ctx->pwq_tbl[cpu] = ctx->dfl_pwq;
5298	} else {
5299	wq_calc_pod_cpumask(attrs: new_attrs, cpu);
5300	ctx->pwq_tbl[cpu] = alloc_unbound_pwq(wq, attrs: new_attrs);
5301	if (!ctx->pwq_tbl[cpu])
5302	goto out_free;
5303	}
5304	}
5305
5306	/ save the user configured attrs and sanitize it. /
5307	copy_workqueue_attrs(to: new_attrs, from: attrs);
5308	cpumask_and(dstp: new_attrs->cpumask, src1p: new_attrs->cpumask, cpu_possible_mask);
5309	cpumask_copy(dstp: new_attrs->__pod_cpumask, srcp: new_attrs->cpumask);
5310	ctx->attrs = new_attrs;
5311
5312	/*
5313	* For initialized ordered workqueues, there should only be one pwq
5314	* (dfl_pwq). Set the plugged flag of ctx->dfl_pwq to suspend execution
5315	* of newly queued work items until execution of older work items in
5316	* the old pwq's have completed.
5317	*/
5318	if ((wq->flags & __WQ_ORDERED) && !list_empty(head: &wq->pwqs))
5319	ctx->dfl_pwq->plugged = true;
5320
5321	ctx->wq = wq;
5322	return ctx;
5323
5324	out_free:
5325	free_workqueue_attrs(attrs: new_attrs);
5326	apply_wqattrs_cleanup(ctx);
5327	return ERR_PTR(error: -ENOMEM);
5328	}
5329
5330	/ set attrs and install prepared pwqs, @ctx points to old pwqs on return /
5331	static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
5332	{
5333	int cpu;
5334
5335	/ all pwqs have been created successfully, let's install'em /
5336	mutex_lock(&ctx->wq->mutex);
5337
5338	copy_workqueue_attrs(to: ctx->wq->unbound_attrs, from: ctx->attrs);
5339
5340	/ save the previous pwqs and install the new ones /
5341	for_each_possible_cpu(cpu)
5342	ctx->pwq_tbl[cpu] = install_unbound_pwq(wq: ctx->wq, cpu,
5343	pwq: ctx->pwq_tbl[cpu]);
5344	ctx->dfl_pwq = install_unbound_pwq(wq: ctx->wq, cpu: -`1`, pwq: ctx->dfl_pwq);
5345
5346	/ update node_nr_active->max /
5347	wq_update_node_max_active(wq: ctx->wq, off_cpu: -`1`);
5348
5349	/ rescuer needs to respect wq cpumask changes /
5350	if (ctx->wq->rescuer)
5351	set_cpus_allowed_ptr(p: ctx->wq->rescuer->task,
5352	new_mask: unbound_effective_cpumask(wq: ctx->wq));
5353
5354	mutex_unlock(lock: &ctx->wq->mutex);
5355	}
5356
5357	static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
5358	const struct workqueue_attrs *attrs)
5359	{
5360	struct apply_wqattrs_ctx *ctx;
5361
5362	/ only unbound workqueues can change attributes /
5363	if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
5364	return -EINVAL;
5365
5366	ctx = apply_wqattrs_prepare(wq, attrs, unbound_cpumask: wq_unbound_cpumask);
5367	if (IS_ERR(ptr: ctx))
5368	return PTR_ERR(ptr: ctx);
5369
5370	/ the ctx has been prepared successfully, let's commit it /
5371	apply_wqattrs_commit(ctx);
5372	apply_wqattrs_cleanup(ctx);
5373
5374	return `0`;
5375	}
5376
5377	/**
5378	* apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
5379	* @wq: the target workqueue
5380	* @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
5381	*
5382	* Apply @attrs to an unbound workqueue @wq. Unless disabled, this function maps
5383	* a separate pwq to each CPU pod with possibles CPUs in @attrs->cpumask so that
5384	* work items are affine to the pod it was issued on. Older pwqs are released as
5385	* in-flight work items finish. Note that a work item which repeatedly requeues
5386	* itself back-to-back will stay on its current pwq.
5387	*
5388	* Performs GFP_KERNEL allocations.
5389	*
5390	* Return: 0 on success and -errno on failure.
5391	*/
5392	int apply_workqueue_attrs(struct workqueue_struct *wq,
5393	const struct workqueue_attrs *attrs)
5394	{
5395	int ret;
5396
5397	mutex_lock(&wq_pool_mutex);
5398	ret = apply_workqueue_attrs_locked(wq, attrs);
5399	mutex_unlock(lock: &wq_pool_mutex);
5400
5401	return ret;
5402	}
5403
5404	/**
5405	* unbound_wq_update_pwq - update a pwq slot for CPU hot[un]plug
5406	* @wq: the target workqueue
5407	* @cpu: the CPU to update the pwq slot for
5408	*
5409	* This function is to be called from %CPU_DOWN_PREPARE, %CPU_ONLINE and
5410	* %CPU_DOWN_FAILED. @cpu is in the same pod of the CPU being hot[un]plugged.
5411	*
5412	*
5413	* If pod affinity can't be adjusted due to memory allocation failure, it falls
5414	* back to @wq->dfl_pwq which may not be optimal but is always correct.
5415	*
5416	* Note that when the last allowed CPU of a pod goes offline for a workqueue
5417	* with a cpumask spanning multiple pods, the workers which were already
5418	* executing the work items for the workqueue will lose their CPU affinity and
5419	* may execute on any CPU. This is similar to how per-cpu workqueues behave on
5420	* CPU_DOWN. If a workqueue user wants strict affinity, it's the user's
5421	* responsibility to flush the work item from CPU_DOWN_PREPARE.
5422	*/
5423	static void unbound_wq_update_pwq(struct workqueue_struct wq, int* cpu)
5424	{
5425	struct pool_workqueue old_pwq = NULL, pwq;
5426	struct workqueue_attrs *target_attrs;
5427
5428	lockdep_assert_held(&wq_pool_mutex);
5429
5430	if (!(wq->flags & WQ_UNBOUND) \|\| wq->unbound_attrs->ordered)
5431	return;
5432
5433	/*
5434	* We don't wanna alloc/free wq_attrs for each wq for each CPU.
5435	* Let's use a preallocated one. The following buf is protected by
5436	* CPU hotplug exclusion.
5437	*/
5438	target_attrs = unbound_wq_update_pwq_attrs_buf;
5439
5440	copy_workqueue_attrs(to: target_attrs, from: wq->unbound_attrs);
5441	wqattrs_actualize_cpumask(attrs: target_attrs, unbound_cpumask: wq_unbound_cpumask);
5442
5443	/ nothing to do if the target cpumask matches the current pwq /
5444	wq_calc_pod_cpumask(attrs: target_attrs, cpu);
5445	if (wqattrs_equal(a: target_attrs, b: unbound_pwq(wq, cpu)->pool->attrs))
5446	return;
5447
5448	/ create a new pwq /
5449	pwq = alloc_unbound_pwq(wq, attrs: target_attrs);
5450	if (!pwq) {
5451	pr_warn("workqueue: allocation failed while updating CPU pod affinity of \"%s\"\n",
5452	wq->name);
5453	goto use_dfl_pwq;
5454	}
5455
5456	/ Install the new pwq. /
5457	mutex_lock(&wq->mutex);
5458	old_pwq = install_unbound_pwq(wq, cpu, pwq);
5459	goto out_unlock;
5460
5461	use_dfl_pwq:
5462	mutex_lock(&wq->mutex);
5463	pwq = unbound_pwq(wq, cpu: -`1`);
5464	raw_spin_lock_irq(&pwq->pool->lock);
5465	get_pwq(pwq);
5466	raw_spin_unlock_irq(&pwq->pool->lock);
5467	old_pwq = install_unbound_pwq(wq, cpu, pwq);
5468	out_unlock:
5469	mutex_unlock(lock: &wq->mutex);
5470	put_pwq_unlocked(pwq: old_pwq);
5471	}
5472
5473	static int alloc_and_link_pwqs(struct workqueue_struct *wq)
5474	{
5475	bool highpri = wq->flags & WQ_HIGHPRI;
5476	int cpu, ret;
5477
5478	lockdep_assert_held(&wq_pool_mutex);
5479
5480	wq->cpu_pwq = alloc_percpu(struct pool_workqueue *);
5481	if (!wq->cpu_pwq)
5482	goto enomem;
5483
5484	if (!(wq->flags & WQ_UNBOUND)) {
5485	struct worker_pool __percpu *pools;
5486
5487	if (wq->flags & WQ_BH)
5488	pools = bh_worker_pools;
5489	else
5490	pools = cpu_worker_pools;
5491
5492	for_each_possible_cpu(cpu) {
5493	struct pool_workqueue **pwq_p;
5494	struct worker_pool *pool;
5495
5496	pool = &(per_cpu_ptr(pools, cpu)[highpri]);
5497	pwq_p = per_cpu_ptr(wq->cpu_pwq, cpu);
5498
5499	*pwq_p = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL,
5500	pool->node);
5501	if (!*pwq_p)
5502	goto enomem;
5503
5504	init_pwq(pwq: *pwq_p, wq, pool);
5505
5506	mutex_lock(&wq->mutex);
5507	link_pwq(pwq: *pwq_p);
5508	mutex_unlock(lock: &wq->mutex);
5509	}
5510	return `0`;
5511	}
5512
5513	if (wq->flags & __WQ_ORDERED) {
5514	struct pool_workqueue *dfl_pwq;
5515
5516	ret = apply_workqueue_attrs_locked(wq, attrs: ordered_wq_attrs[highpri]);
5517	/ there should only be single pwq for ordering guarantee /
5518	dfl_pwq = rcu_access_pointer(wq->dfl_pwq);
5519	WARN(!ret && (wq->pwqs.next != &dfl_pwq->pwqs_node \|\|
5520	wq->pwqs.prev != &dfl_pwq->pwqs_node),
5521	"ordering guarantee broken for workqueue %s\n", wq->name);
5522	} else {
5523	ret = apply_workqueue_attrs_locked(wq, attrs: unbound_std_wq_attrs[highpri]);
5524	}
5525
5526	return ret;
5527
5528	enomem:
5529	if (wq->cpu_pwq) {
5530	for_each_possible_cpu(cpu) {
5531	struct pool_workqueue pwq = per_cpu_ptr(wq->cpu_pwq, cpu);
5532
5533	if (pwq)
5534	kmem_cache_free(s: pwq_cache, objp: pwq);
5535	}
5536	free_percpu(pdata: wq->cpu_pwq);
5537	wq->cpu_pwq = NULL;
5538	}
5539	return -ENOMEM;
5540	}
5541
5542	static int wq_clamp_max_active(int max_active, unsigned int flags,
5543	const char *name)
5544	{
5545	if (max_active < `1` \|\| max_active > WQ_MAX_ACTIVE)
5546	pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
5547	max_active, name, `1`, WQ_MAX_ACTIVE);
5548
5549	return clamp_val(max_active, `1`, WQ_MAX_ACTIVE);
5550	}
5551
5552	/*
5553	* Workqueues which may be used during memory reclaim should have a rescuer
5554	* to guarantee forward progress.
5555	*/
5556	static int init_rescuer(struct workqueue_struct *wq)
5557	{
5558	struct worker *rescuer;
5559	char id_buf[WORKER_ID_LEN];
5560	int ret;
5561
5562	lockdep_assert_held(&wq_pool_mutex);
5563
5564	if (!(wq->flags & WQ_MEM_RECLAIM))
5565	return `0`;
5566
5567	rescuer = alloc_worker(NUMA_NO_NODE);
5568	if (!rescuer) {
5569	pr_err("workqueue: Failed to allocate a rescuer for wq \"%s\"\n",
5570	wq->name);
5571	return -ENOMEM;
5572	}
5573
5574	rescuer->rescue_wq = wq;
5575	format_worker_id(buf: id_buf, size: sizeof(id_buf), worker: rescuer, NULL);
5576
5577	rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", id_buf);
5578	if (IS_ERR(ptr: rescuer->task)) {
5579	ret = PTR_ERR(ptr: rescuer->task);
5580	pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %pe",
5581	wq->name, ERR_PTR(ret));
5582	kfree(objp: rescuer);
5583	return ret;
5584	}
5585
5586	wq->rescuer = rescuer;
5587	if (wq->flags & WQ_UNBOUND)
5588	kthread_bind_mask(k: rescuer->task, mask: unbound_effective_cpumask(wq));
5589	else
5590	kthread_bind_mask(k: rescuer->task, cpu_possible_mask);
5591	wake_up_process(tsk: rescuer->task);
5592
5593	return `0`;
5594	}
5595
5596	/**
5597	* wq_adjust_max_active - update a wq's max_active to the current setting
5598	* @wq: target workqueue
5599	*
5600	* If @wq isn't freezing, set @wq->max_active to the saved_max_active and
5601	* activate inactive work items accordingly. If @wq is freezing, clear
5602	* @wq->max_active to zero.
5603	*/
5604	static void wq_adjust_max_active(struct workqueue_struct *wq)
5605	{
5606	bool activated;
5607	int new_max, new_min;
5608
5609	lockdep_assert_held(&wq->mutex);
5610
5611	if ((wq->flags & WQ_FREEZABLE) && workqueue_freezing) {
5612	new_max = `0`;
5613	new_min = `0`;
5614	} else {
5615	new_max = wq->saved_max_active;
5616	new_min = wq->saved_min_active;
5617	}
5618
5619	if (wq->max_active == new_max && wq->min_active == new_min)
5620	return;
5621
5622	/*
5623	* Update @wq->max/min_active and then kick inactive work items if more
5624	* active work items are allowed. This doesn't break work item ordering
5625	* because new work items are always queued behind existing inactive
5626	* work items if there are any.
5627	*/
5628	WRITE_ONCE(wq->max_active, new_max);
5629	WRITE_ONCE(wq->min_active, new_min);
5630
5631	if (wq->flags & WQ_UNBOUND)
5632	wq_update_node_max_active(wq, off_cpu: -`1`);
5633
5634	if (new_max == `0`)
5635	return;
5636
5637	/*
5638	* Round-robin through pwq's activating the first inactive work item
5639	* until max_active is filled.
5640	*/
5641	do {
5642	struct pool_workqueue *pwq;
5643
5644	activated = false;
5645	for_each_pwq(pwq, wq) {
5646	unsigned long irq_flags;
5647
5648	/ can be called during early boot w/ irq disabled /
5649	raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags);
5650	if (pwq_activate_first_inactive(pwq, fill: true)) {
5651	activated = true;
5652	kick_pool(pool: pwq->pool);
5653	}
5654	raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags);
5655	}
5656	} while (activated);
5657	}
5658
5659	__printf(`1`, `0`)
5660	static struct workqueue_struct __alloc_workqueue(const* char *fmt,
5661	unsigned int flags,
5662	int max_active, va_list args)
5663	{
5664	struct workqueue_struct *wq;
5665	size_t wq_size;
5666	int name_len;
5667
5668	if (flags & WQ_BH) {
5669	if (WARN_ON_ONCE(flags & ~__WQ_BH_ALLOWS))
5670	return NULL;
5671	if (WARN_ON_ONCE(max_active))
5672	return NULL;
5673	}
5674
5675	/ see the comment above the definition of WQ_POWER_EFFICIENT /
5676	if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
5677	flags \|= WQ_UNBOUND;
5678
5679	/ allocate wq and format name /
5680	if (flags & WQ_UNBOUND)
5681	wq_size = struct_size(wq, node_nr_active, nr_node_ids + `1`);
5682	else
5683	wq_size = sizeof(*wq);
5684
5685	wq = kzalloc(wq_size, GFP_KERNEL);
5686	if (!wq)
5687	return NULL;
5688
5689	if (flags & WQ_UNBOUND) {
5690	wq->unbound_attrs = alloc_workqueue_attrs();
5691	if (!wq->unbound_attrs)
5692	goto err_free_wq;
5693	}
5694
5695	name_len = vsnprintf(buf: wq->name, size: sizeof(wq->name), fmt, args);
5696
5697	if (name_len >= WQ_NAME_LEN)
5698	pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n",
5699	wq->name);
5700
5701	if (flags & WQ_BH) {
5702	/*
5703	* BH workqueues always share a single execution context per CPU
5704	* and don't impose any max_active limit.
5705	*/
5706	max_active = INT_MAX;
5707	} else {
5708	max_active = max_active ?: WQ_DFL_ACTIVE;
5709	max_active = wq_clamp_max_active(max_active, flags, name: wq->name);
5710	}
5711
5712	/ init wq /
5713	wq->flags = flags;
5714	wq->max_active = max_active;
5715	wq->min_active = min(max_active, WQ_DFL_MIN_ACTIVE);
5716	wq->saved_max_active = wq->max_active;
5717	wq->saved_min_active = wq->min_active;
5718	mutex_init(&wq->mutex);
5719	atomic_set(v: &wq->nr_pwqs_to_flush, i: `0`);
5720	INIT_LIST_HEAD(list: &wq->pwqs);
5721	INIT_LIST_HEAD(list: &wq->flusher_queue);
5722	INIT_LIST_HEAD(list: &wq->flusher_overflow);
5723	INIT_LIST_HEAD(list: &wq->maydays);
5724
5725	INIT_LIST_HEAD(list: &wq->list);
5726
5727	if (flags & WQ_UNBOUND) {
5728	if (alloc_node_nr_active(nna_ar: wq->node_nr_active) < `0`)
5729	goto err_free_wq;
5730	}
5731
5732	/*
5733	* wq_pool_mutex protects the workqueues list, allocations of PWQs,
5734	* and the global freeze state.
5735	*/
5736	apply_wqattrs_lock();
5737
5738	if (alloc_and_link_pwqs(wq) < `0`)
5739	goto err_unlock_free_node_nr_active;
5740
5741	mutex_lock(&wq->mutex);
5742	wq_adjust_max_active(wq);
5743	mutex_unlock(lock: &wq->mutex);
5744
5745	list_add_tail_rcu(new: &wq->list, head: &workqueues);
5746
5747	if (wq_online && init_rescuer(wq) < `0`)
5748	goto err_unlock_destroy;
5749
5750	apply_wqattrs_unlock();
5751
5752	if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
5753	goto err_destroy;
5754
5755	return wq;
5756
5757	err_unlock_free_node_nr_active:
5758	apply_wqattrs_unlock();
5759	/*
5760	* Failed alloc_and_link_pwqs() may leave pending pwq->release_work,
5761	* flushing the pwq_release_worker ensures that the pwq_release_workfn()
5762	* completes before calling kfree(wq).
5763	*/
5764	if (wq->flags & WQ_UNBOUND) {
5765	kthread_flush_worker(worker: pwq_release_worker);
5766	free_node_nr_active(nna_ar: wq->node_nr_active);
5767	}
5768	err_free_wq:
5769	free_workqueue_attrs(attrs: wq->unbound_attrs);
5770	kfree(objp: wq);
5771	return NULL;
5772	err_unlock_destroy:
5773	apply_wqattrs_unlock();
5774	err_destroy:
5775	destroy_workqueue(wq);
5776	return NULL;
5777	}
5778
5779	__printf(`1`, `4`)
5780	struct workqueue_struct alloc_workqueue(const* char *fmt,
5781	unsigned int flags,
5782	int max_active, ...)
5783	{
5784	struct workqueue_struct *wq;
5785	va_list args;
5786
5787	va_start(args, max_active);
5788	wq = __alloc_workqueue(fmt, flags, max_active, args);
5789	va_end(args);
5790	if (!wq)
5791	return NULL;
5792
5793	wq_init_lockdep(wq);
5794
5795	return wq;
5796	}
5797	EXPORT_SYMBOL_GPL(alloc_workqueue);
5798
5799	#ifdef CONFIG_LOCKDEP
5800	__printf(`1`, `5`)
5801	struct workqueue_struct *
5802	alloc_workqueue_lockdep_map(const char fmt, unsigned* int flags,
5803	int max_active, struct lockdep_map *lockdep_map, ...)
5804	{
5805	struct workqueue_struct *wq;
5806	va_list args;
5807
5808	va_start(args, lockdep_map);
5809	wq = __alloc_workqueue(fmt, flags, max_active, args);
5810	va_end(args);
5811	if (!wq)
5812	return NULL;
5813
5814	wq->lockdep_map = lockdep_map;
5815
5816	return wq;
5817	}
5818	EXPORT_SYMBOL_GPL(alloc_workqueue_lockdep_map);
5819	#endif
5820
5821	static bool pwq_busy(struct pool_workqueue *pwq)
5822	{
5823	int i;
5824
5825	for (i = `0`; i < WORK_NR_COLORS; i++)
5826	if (pwq->nr_in_flight[i])
5827	return true;
5828
5829	if ((pwq != rcu_access_pointer(pwq->wq->dfl_pwq)) && (pwq->refcnt > `1`))
5830	return true;
5831	if (!pwq_is_empty(pwq))
5832	return true;
5833
5834	return false;
5835	}
5836
5837	/**
5838	* destroy_workqueue - safely terminate a workqueue
5839	* @wq: target workqueue
5840	*
5841	* Safely destroy a workqueue. All work currently pending will be done first.
5842	*
5843	* This function does NOT guarantee that non-pending work that has been
5844	* submitted with queue_delayed_work() and similar functions will be done
5845	* before destroying the workqueue. The fundamental problem is that, currently,
5846	* the workqueue has no way of accessing non-pending delayed_work. delayed_work
5847	* is only linked on the timer-side. All delayed_work must, therefore, be
5848	* canceled before calling this function.
5849	*
5850	* TODO: It would be better if the problem described above wouldn't exist and
5851	* destroy_workqueue() would cleanly cancel all pending and non-pending
5852	* delayed_work.
5853	*/
5854	void destroy_workqueue(struct workqueue_struct *wq)
5855	{
5856	struct pool_workqueue *pwq;
5857	int cpu;
5858
5859	/*
5860	* Remove it from sysfs first so that sanity check failure doesn't
5861	* lead to sysfs name conflicts.
5862	*/
5863	workqueue_sysfs_unregister(wq);
5864
5865	/ mark the workqueue destruction is in progress /
5866	mutex_lock(&wq->mutex);
5867	wq->flags \|= __WQ_DESTROYING;
5868	mutex_unlock(lock: &wq->mutex);
5869
5870	/ drain it before proceeding with destruction /
5871	drain_workqueue(wq);
5872
5873	/ kill rescuer, if sanity checks fail, leave it w/o rescuer /
5874	if (wq->rescuer) {
5875	struct worker *rescuer = wq->rescuer;
5876
5877	/ this prevents new queueing /
5878	raw_spin_lock_irq(&wq_mayday_lock);
5879	wq->rescuer = NULL;
5880	raw_spin_unlock_irq(&wq_mayday_lock);
5881
5882	/ rescuer will empty maydays list before exiting /
5883	kthread_stop(k: rescuer->task);
5884	kfree(objp: rescuer);
5885	}
5886
5887	/*
5888	* Sanity checks - grab all the locks so that we wait for all
5889	* in-flight operations which may do put_pwq().
5890	*/
5891	mutex_lock(&wq_pool_mutex);
5892	mutex_lock(&wq->mutex);
5893	for_each_pwq(pwq, wq) {
5894	raw_spin_lock_irq(&pwq->pool->lock);
5895	if (WARN_ON(pwq_busy(pwq))) {
5896	pr_warn("%s: %s has the following busy pwq\n",
5897	__func__, wq->name);
5898	show_pwq(pwq);
5899	raw_spin_unlock_irq(&pwq->pool->lock);
5900	mutex_unlock(lock: &wq->mutex);
5901	mutex_unlock(lock: &wq_pool_mutex);
5902	show_one_workqueue(wq);
5903	return;
5904	}
5905	raw_spin_unlock_irq(&pwq->pool->lock);
5906	}
5907	mutex_unlock(lock: &wq->mutex);
5908
5909	/*
5910	* wq list is used to freeze wq, remove from list after
5911	* flushing is complete in case freeze races us.
5912	*/
5913	list_del_rcu(entry: &wq->list);
5914	mutex_unlock(lock: &wq_pool_mutex);
5915
5916	/*
5917	* We're the sole accessor of @wq. Directly access cpu_pwq and dfl_pwq
5918	* to put the base refs. @wq will be auto-destroyed from the last
5919	* pwq_put. RCU read lock prevents @wq from going away from under us.
5920	*/
5921	rcu_read_lock();
5922
5923	for_each_possible_cpu(cpu) {
5924	put_pwq_unlocked(pwq: unbound_pwq(wq, cpu));
5925	RCU_INIT_POINTER(*unbound_pwq_slot(wq, cpu), NULL);
5926	}
5927
5928	put_pwq_unlocked(pwq: unbound_pwq(wq, cpu: -`1`));
5929	RCU_INIT_POINTER(*unbound_pwq_slot(wq, -`1`), NULL);
5930
5931	rcu_read_unlock();
5932	}
5933	EXPORT_SYMBOL_GPL(destroy_workqueue);
5934
5935	/**
5936	* workqueue_set_max_active - adjust max_active of a workqueue
5937	* @wq: target workqueue
5938	* @max_active: new max_active value.
5939	*
5940	* Set max_active of @wq to @max_active. See the alloc_workqueue() function
5941	* comment.
5942	*
5943	* CONTEXT:
5944	* Don't call from IRQ context.
5945	*/
5946	void workqueue_set_max_active(struct workqueue_struct wq, int* max_active)
5947	{
5948	/ max_active doesn't mean anything for BH workqueues /
5949	if (WARN_ON(wq->flags & WQ_BH))
5950	return;
5951	/ disallow meddling with max_active for ordered workqueues /
5952	if (WARN_ON(wq->flags & __WQ_ORDERED))
5953	return;
5954
5955	max_active = wq_clamp_max_active(max_active, flags: wq->flags, name: wq->name);
5956
5957	mutex_lock(&wq->mutex);
5958
5959	wq->saved_max_active = max_active;
5960	if (wq->flags & WQ_UNBOUND)
5961	wq->saved_min_active = min(wq->saved_min_active, max_active);
5962
5963	wq_adjust_max_active(wq);
5964
5965	mutex_unlock(lock: &wq->mutex);
5966	}
5967	EXPORT_SYMBOL_GPL(workqueue_set_max_active);
5968
5969	/**
5970	* workqueue_set_min_active - adjust min_active of an unbound workqueue
5971	* @wq: target unbound workqueue
5972	* @min_active: new min_active value
5973	*
5974	* Set min_active of an unbound workqueue. Unlike other types of workqueues, an
5975	* unbound workqueue is not guaranteed to be able to process max_active
5976	* interdependent work items. Instead, an unbound workqueue is guaranteed to be
5977	* able to process min_active number of interdependent work items which is
5978	* %WQ_DFL_MIN_ACTIVE by default.
5979	*
5980	* Use this function to adjust the min_active value between 0 and the current
5981	* max_active.
5982	*/
5983	void workqueue_set_min_active(struct workqueue_struct wq, int* min_active)
5984	{
5985	/ min_active is only meaningful for non-ordered unbound workqueues /
5986	if (WARN_ON((wq->flags & (WQ_BH \| WQ_UNBOUND \| __WQ_ORDERED)) !=
5987	WQ_UNBOUND))
5988	return;
5989
5990	mutex_lock(&wq->mutex);
5991	wq->saved_min_active = clamp(min_active, `0`, wq->saved_max_active);
5992	wq_adjust_max_active(wq);
5993	mutex_unlock(lock: &wq->mutex);
5994	}
5995
5996	/**
5997	* current_work - retrieve %current task's work struct
5998	*
5999	* Determine if %current task is a workqueue worker and what it's working on.
6000	* Useful to find out the context that the %current task is running in.
6001	*
6002	* Return: work struct if %current task is a workqueue worker, %NULL otherwise.
6003	*/
6004	struct work_struct current_work(void*)
6005	{
6006	struct worker *worker = current_wq_worker();
6007
6008	return worker ? worker->current_work : NULL;
6009	}
6010	EXPORT_SYMBOL(current_work);
6011
6012	/**
6013	* current_is_workqueue_rescuer - is %current workqueue rescuer?
6014	*
6015	* Determine whether %current is a workqueue rescuer. Can be used from
6016	* work functions to determine whether it's being run off the rescuer task.
6017	*
6018	* Return: %true if %current is a workqueue rescuer. %false otherwise.
6019	*/
6020	bool current_is_workqueue_rescuer(void)
6021	{
6022	struct worker *worker = current_wq_worker();
6023
6024	return worker && worker->rescue_wq;
6025	}
6026
6027	/**
6028	* workqueue_congested - test whether a workqueue is congested
6029	* @cpu: CPU in question
6030	* @wq: target workqueue
6031	*
6032	* Test whether @wq's cpu workqueue for @cpu is congested. There is
6033	* no synchronization around this function and the test result is
6034	* unreliable and only useful as advisory hints or for debugging.
6035	*
6036	* If @cpu is WORK_CPU_UNBOUND, the test is performed on the local CPU.
6037	*
6038	* With the exception of ordered workqueues, all workqueues have per-cpu
6039	* pool_workqueues, each with its own congested state. A workqueue being
6040	* congested on one CPU doesn't mean that the workqueue is contested on any
6041	* other CPUs.
6042	*
6043	* Return:
6044	* %true if congested, %false otherwise.
6045	*/
6046	bool workqueue_congested(int cpu, struct workqueue_struct *wq)
6047	{
6048	struct pool_workqueue *pwq;
6049	bool ret;
6050
6051	rcu_read_lock();
6052	preempt_disable();
6053
6054	if (cpu == WORK_CPU_UNBOUND)
6055	cpu = smp_processor_id();
6056
6057	pwq = *per_cpu_ptr(wq->cpu_pwq, cpu);
6058	ret = !list_empty(head: &pwq->inactive_works);
6059
6060	preempt_enable();
6061	rcu_read_unlock();
6062
6063	return ret;
6064	}
6065	EXPORT_SYMBOL_GPL(workqueue_congested);
6066
6067	/**
6068	* work_busy - test whether a work is currently pending or running
6069	* @work: the work to be tested
6070	*
6071	* Test whether @work is currently pending or running. There is no
6072	* synchronization around this function and the test result is
6073	* unreliable and only useful as advisory hints or for debugging.
6074	*
6075	* Return:
6076	* OR'd bitmask of WORK_BUSY_* bits.
6077	*/
6078	unsigned int work_busy(struct work_struct *work)
6079	{
6080	struct worker_pool *pool;
6081	unsigned long irq_flags;
6082	unsigned int ret = `0`;
6083
6084	if (work_pending(work))
6085	ret \|= WORK_BUSY_PENDING;
6086
6087	rcu_read_lock();
6088	pool = get_work_pool(work);
6089	if (pool) {
6090	raw_spin_lock_irqsave(&pool->lock, irq_flags);
6091	if (find_worker_executing_work(pool, work))
6092	ret \|= WORK_BUSY_RUNNING;
6093	raw_spin_unlock_irqrestore(&pool->lock, irq_flags);
6094	}
6095	rcu_read_unlock();
6096
6097	return ret;
6098	}
6099	EXPORT_SYMBOL_GPL(work_busy);
6100
6101	/**
6102	* set_worker_desc - set description for the current work item
6103	* @fmt: printf-style format string
6104	* @...: arguments for the format string
6105	*
6106	* This function can be called by a running work function to describe what
6107	* the work item is about. If the worker task gets dumped, this
6108	* information will be printed out together to help debugging. The
6109	* description can be at most WORKER_DESC_LEN including the trailing '\0'.
6110	*/
6111	void set_worker_desc(const char *fmt, ...)
6112	{
6113	struct worker *worker = current_wq_worker();
6114	va_list args;
6115
6116	if (worker) {
6117	va_start(args, fmt);
6118	vsnprintf(buf: worker->desc, size: sizeof(worker->desc), fmt, args);
6119	va_end(args);
6120	}
6121	}
6122	EXPORT_SYMBOL_GPL(set_worker_desc);
6123
6124	/**
6125	* print_worker_info - print out worker information and description
6126	* @log_lvl: the log level to use when printing
6127	* @task: target task
6128	*
6129	* If @task is a worker and currently executing a work item, print out the
6130	* name of the workqueue being serviced and worker description set with
6131	* set_worker_desc() by the currently executing work item.
6132	*
6133	* This function can be safely called on any task as long as the
6134	* task_struct itself is accessible. While safe, this function isn't
6135	* synchronized and may print out mixups or garbages of limited length.
6136	*/
6137	void print_worker_info(const char log_lvl, struct* task_struct *task)
6138	{
6139	work_func_t *fn = NULL;
6140	char name[WQ_NAME_LEN] = { };
6141	char desc[WORKER_DESC_LEN] = { };
6142	struct pool_workqueue *pwq = NULL;
6143	struct workqueue_struct *wq = NULL;
6144	struct worker *worker;
6145
6146	if (!(task->flags & PF_WQ_WORKER))
6147	return;
6148
6149	/*
6150	* This function is called without any synchronization and @task
6151	* could be in any state. Be careful with dereferences.
6152	*/
6153	worker = kthread_probe_data(k: task);
6154
6155	/*
6156	* Carefully copy the associated workqueue's workfn, name and desc.
6157	* Keep the original last '\0' in case the original is garbage.
6158	*/
6159	copy_from_kernel_nofault(dst: &fn, src: &worker->current_func, size: sizeof(fn));
6160	copy_from_kernel_nofault(dst: &pwq, src: &worker->current_pwq, size: sizeof(pwq));
6161	copy_from_kernel_nofault(dst: &wq, src: &pwq->wq, size: sizeof(wq));
6162	copy_from_kernel_nofault(dst: name, src: wq->name, size: sizeof(name) - `1`);
6163	copy_from_kernel_nofault(dst: desc, src: worker->desc, size: sizeof(desc) - `1`);
6164
6165	if (fn \|\| name[`0`] \|\| desc[`0`]) {
6166	printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
6167	if (strcmp(name, desc))
6168	pr_cont(" (%s)", desc);
6169	pr_cont("\n");
6170	}
6171	}
6172
6173	static void pr_cont_pool_info(struct worker_pool *pool)
6174	{
6175	pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
6176	if (pool->node != NUMA_NO_NODE)
6177	pr_cont(" node=%d", pool->node);
6178	pr_cont(" flags=0x%x", pool->flags);
6179	if (pool->flags & POOL_BH)
6180	pr_cont(" bh%s",
6181	pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : "");
6182	else
6183	pr_cont(" nice=%d", pool->attrs->nice);
6184	}
6185
6186	static void pr_cont_worker_id(struct worker *worker)
6187	{
6188	struct worker_pool *pool = worker->pool;
6189
6190	if (pool->flags & WQ_BH)
6191	pr_cont("bh%s",
6192	pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : "");
6193	else
6194	pr_cont("%d%s", task_pid_nr(worker->task),
6195	worker->rescue_wq ? "(RESCUER)" : "");
6196	}
6197
6198	struct pr_cont_work_struct {
6199	bool comma;
6200	work_func_t func;
6201	long ctr;
6202	};
6203
6204	static void pr_cont_work_flush(bool comma, work_func_t func, struct pr_cont_work_struct *pcwsp)
6205	{
6206	if (!pcwsp->ctr)
6207	goto out_record;
6208	if (func == pcwsp->func) {
6209	pcwsp->ctr++;
6210	return;
6211	}
6212	if (pcwsp->ctr == `1`)
6213	pr_cont("%s %ps", pcwsp->comma ? "," : "", pcwsp->func);
6214	else
6215	pr_cont("%s %ld*%ps", pcwsp->comma ? "," : "", pcwsp->ctr, pcwsp->func);
6216	pcwsp->ctr = `0`;
6217	out_record:
6218	if ((long)func == -`1L`)
6219	return;
6220	pcwsp->comma = comma;
6221	pcwsp->func = func;
6222	pcwsp->ctr = `1`;
6223	}
6224
6225	static void pr_cont_work(bool comma, struct work_struct work, struct* pr_cont_work_struct *pcwsp)
6226	{
6227	if (work->func == wq_barrier_func) {
6228	struct wq_barrier *barr;
6229
6230	barr = container_of(work, struct wq_barrier, work);
6231
6232	pr_cont_work_flush(comma, func: (work_func_t)-`1`, pcwsp);
6233	pr_cont("%s BAR(%d)", comma ? "," : "",
6234	task_pid_nr(barr->task));
6235	} else {
6236	if (!comma)
6237	pr_cont_work_flush(comma, func: (work_func_t)-`1`, pcwsp);
6238	pr_cont_work_flush(comma, func: work->func, pcwsp);
6239	}
6240	}
6241
6242	static void show_pwq(struct pool_workqueue *pwq)
6243	{
6244	struct pr_cont_work_struct pcws = { .ctr = `0`, };
6245	struct worker_pool *pool = pwq->pool;
6246	struct work_struct *work;
6247	struct worker *worker;
6248	bool has_in_flight = false, has_pending = false;
6249	int bkt;
6250
6251	pr_info(" pwq %d:", pool->id);
6252	pr_cont_pool_info(pool);
6253
6254	pr_cont(" active=%d refcnt=%d%s\n",
6255	pwq->nr_active, pwq->refcnt,
6256	!list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
6257
6258	hash_for_each(pool->busy_hash, bkt, worker, hentry) {
6259	if (worker->current_pwq == pwq) {
6260	has_in_flight = true;
6261	break;
6262	}
6263	}
6264	if (has_in_flight) {
6265	bool comma = false;
6266
6267	pr_info(" in-flight:");
6268	hash_for_each(pool->busy_hash, bkt, worker, hentry) {
6269	if (worker->current_pwq != pwq)
6270	continue;
6271
6272	pr_cont(" %s", comma ? "," : "");
6273	pr_cont_worker_id(worker);
6274	pr_cont(":%ps", worker->current_func);
6275	list_for_each_entry(work, &worker->scheduled, entry)
6276	pr_cont_work(comma: false, work, pcwsp: &pcws);
6277	pr_cont_work_flush(comma, func: (work_func_t)-`1L`, pcwsp: &pcws);
6278	comma = true;
6279	}
6280	pr_cont("\n");
6281	}
6282
6283	list_for_each_entry(work, &pool->worklist, entry) {
6284	if (get_work_pwq(work) == pwq) {
6285	has_pending = true;
6286	break;
6287	}
6288	}
6289	if (has_pending) {
6290	bool comma = false;
6291
6292	pr_info(" pending:");
6293	list_for_each_entry(work, &pool->worklist, entry) {
6294	if (get_work_pwq(work) != pwq)
6295	continue;
6296
6297	pr_cont_work(comma, work, pcwsp: &pcws);
6298	comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
6299	}
6300	pr_cont_work_flush(comma, func: (work_func_t)-`1L`, pcwsp: &pcws);
6301	pr_cont("\n");
6302	}
6303
6304	if (!list_empty(head: &pwq->inactive_works)) {
6305	bool comma = false;
6306
6307	pr_info(" inactive:");
6308	list_for_each_entry(work, &pwq->inactive_works, entry) {
6309	pr_cont_work(comma, work, pcwsp: &pcws);
6310	comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
6311	}
6312	pr_cont_work_flush(comma, func: (work_func_t)-`1L`, pcwsp: &pcws);
6313	pr_cont("\n");
6314	}
6315	}
6316
6317	/**
6318	* show_one_workqueue - dump state of specified workqueue
6319	* @wq: workqueue whose state will be printed
6320	*/
6321	void show_one_workqueue(struct workqueue_struct *wq)
6322	{
6323	struct pool_workqueue *pwq;
6324	bool idle = true;
6325	unsigned long irq_flags;
6326
6327	for_each_pwq(pwq, wq) {
6328	if (!pwq_is_empty(pwq)) {
6329	idle = false;
6330	break;
6331	}
6332	}
6333	if (idle) / Nothing to print for idle workqueue /
6334	return;
6335
6336	pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
6337
6338	for_each_pwq(pwq, wq) {
6339	raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags);
6340	if (!pwq_is_empty(pwq)) {
6341	/*
6342	* Defer printing to avoid deadlocks in console
6343	* drivers that queue work while holding locks
6344	* also taken in their write paths.
6345	*/
6346	printk_deferred_enter();
6347	show_pwq(pwq);
6348	printk_deferred_exit();
6349	}
6350	raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags);
6351	/*
6352	* We could be printing a lot from atomic context, e.g.
6353	* sysrq-t -> show_all_workqueues(). Avoid triggering
6354	* hard lockup.
6355	*/
6356	touch_nmi_watchdog();
6357	}
6358
6359	}
6360
6361	/**
6362	* show_one_worker_pool - dump state of specified worker pool
6363	* @pool: worker pool whose state will be printed
6364	*/
6365	static void show_one_worker_pool(struct worker_pool *pool)
6366	{
6367	struct worker *worker;
6368	bool first = true;
6369	unsigned long irq_flags;
6370	unsigned long hung = `0`;
6371
6372	raw_spin_lock_irqsave(&pool->lock, irq_flags);
6373	if (pool->nr_workers == pool->nr_idle)
6374	goto next_pool;
6375
6376	/ How long the first pending work is waiting for a worker. /
6377	if (!list_empty(head: &pool->worklist))
6378	hung = jiffies_to_msecs(j: jiffies - pool->watchdog_ts) / `1000`;
6379
6380	/*
6381	* Defer printing to avoid deadlocks in console drivers that
6382	* queue work while holding locks also taken in their write
6383	* paths.
6384	*/
6385	printk_deferred_enter();
6386	pr_info("pool %d:", pool->id);
6387	pr_cont_pool_info(pool);
6388	pr_cont(" hung=%lus workers=%d", hung, pool->nr_workers);
6389	if (pool->manager)
6390	pr_cont(" manager: %d",
6391	task_pid_nr(pool->manager->task));
6392	list_for_each_entry(worker, &pool->idle_list, entry) {
6393	pr_cont(" %s", first ? "idle: " : "");
6394	pr_cont_worker_id(worker);
6395	first = false;
6396	}
6397	pr_cont("\n");
6398	printk_deferred_exit();
6399	next_pool:
6400	raw_spin_unlock_irqrestore(&pool->lock, irq_flags);
6401	/*
6402	* We could be printing a lot from atomic context, e.g.
6403	* sysrq-t -> show_all_workqueues(). Avoid triggering
6404	* hard lockup.
6405	*/
6406	touch_nmi_watchdog();
6407
6408	}
6409
6410	/**
6411	* show_all_workqueues - dump workqueue state
6412	*
6413	* Called from a sysrq handler and prints out all busy workqueues and pools.
6414	*/
6415	void show_all_workqueues(void)
6416	{
6417	struct workqueue_struct *wq;
6418	struct worker_pool *pool;
6419	int pi;
6420
6421	rcu_read_lock();
6422
6423	pr_info("Showing busy workqueues and worker pools:\n");
6424
6425	list_for_each_entry_rcu(wq, &workqueues, list)
6426	show_one_workqueue(wq);
6427
6428	for_each_pool(pool, pi)
6429	show_one_worker_pool(pool);
6430
6431	rcu_read_unlock();
6432	}
6433
6434	/**
6435	* show_freezable_workqueues - dump freezable workqueue state
6436	*
6437	* Called from try_to_freeze_tasks() and prints out all freezable workqueues
6438	* still busy.
6439	*/
6440	void show_freezable_workqueues(void)
6441	{
6442	struct workqueue_struct *wq;
6443
6444	rcu_read_lock();
6445
6446	pr_info("Showing freezable workqueues that are still busy:\n");
6447
6448	list_for_each_entry_rcu(wq, &workqueues, list) {
6449	if (!(wq->flags & WQ_FREEZABLE))
6450	continue;
6451	show_one_workqueue(wq);
6452	}
6453
6454	rcu_read_unlock();
6455	}
6456
6457	/ used to show worker information through /proc/PID/{comm,stat,status} /
6458	void wq_worker_comm(char buf, size_t size, struct* task_struct *task)
6459	{
6460	/ stabilize PF_WQ_WORKER and worker pool association /
6461	mutex_lock(&wq_pool_attach_mutex);
6462
6463	if (task->flags & PF_WQ_WORKER) {
6464	struct worker *worker = kthread_data(k: task);
6465	struct worker_pool *pool = worker->pool;
6466	int off;
6467
6468	off = format_worker_id(buf, size, worker, pool);
6469
6470	if (pool) {
6471	raw_spin_lock_irq(&pool->lock);
6472	/*
6473	* ->desc tracks information (wq name or
6474	* set_worker_desc()) for the latest execution. If
6475	* current, prepend '+', otherwise '-'.
6476	*/
6477	if (worker->desc[`0`] != `'\0'`) {
6478	if (worker->current_work)
6479	scnprintf(buf: buf + off, size: size - off, fmt: "+%s",
6480	worker->desc);
6481	else
6482	scnprintf(buf: buf + off, size: size - off, fmt: "-%s",
6483	worker->desc);
6484	}
6485	raw_spin_unlock_irq(&pool->lock);
6486	}
6487	} else {
6488	strscpy(buf, task->comm, size);
6489	}
6490
6491	mutex_unlock(lock: &wq_pool_attach_mutex);
6492	}
6493
6494	#ifdef CONFIG_SMP
6495
6496	/*
6497	* CPU hotplug.
6498	*
6499	* There are two challenges in supporting CPU hotplug. Firstly, there
6500	* are a lot of assumptions on strong associations among work, pwq and
6501	* pool which make migrating pending and scheduled works very
6502	* difficult to implement without impacting hot paths. Secondly,
6503	* worker pools serve mix of short, long and very long running works making
6504	* blocked draining impractical.
6505	*
6506	* This is solved by allowing the pools to be disassociated from the CPU
6507	* running as an unbound one and allowing it to be reattached later if the
6508	* cpu comes back online.
6509	*/
6510
6511	static void unbind_workers(int cpu)
6512	{
6513	struct worker_pool *pool;
6514	struct worker *worker;
6515
6516	for_each_cpu_worker_pool(pool, cpu) {
6517	mutex_lock(&wq_pool_attach_mutex);
6518	raw_spin_lock_irq(&pool->lock);
6519
6520	/*
6521	* We've blocked all attach/detach operations. Make all workers
6522	* unbound and set DISASSOCIATED. Before this, all workers
6523	* must be on the cpu. After this, they may become diasporas.
6524	* And the preemption disabled section in their sched callbacks
6525	* are guaranteed to see WORKER_UNBOUND since the code here
6526	* is on the same cpu.
6527	*/
6528	for_each_pool_worker(worker, pool)
6529	worker->flags \|= WORKER_UNBOUND;
6530
6531	pool->flags \|= POOL_DISASSOCIATED;
6532
6533	/*
6534	* The handling of nr_running in sched callbacks are disabled
6535	* now. Zap nr_running. After this, nr_running stays zero and
6536	* need_more_worker() and keep_working() are always true as
6537	* long as the worklist is not empty. This pool now behaves as
6538	* an unbound (in terms of concurrency management) pool which
6539	* are served by workers tied to the pool.
6540	*/
6541	pool->nr_running = `0`;
6542
6543	/*
6544	* With concurrency management just turned off, a busy
6545	* worker blocking could lead to lengthy stalls. Kick off
6546	* unbound chain execution of currently pending work items.
6547	*/
6548	kick_pool(pool);
6549
6550	raw_spin_unlock_irq(&pool->lock);
6551
6552	for_each_pool_worker(worker, pool)
6553	unbind_worker(worker);
6554
6555	mutex_unlock(lock: &wq_pool_attach_mutex);
6556	}
6557	}
6558
6559	/**
6560	* rebind_workers - rebind all workers of a pool to the associated CPU
6561	* @pool: pool of interest
6562	*
6563	* @pool->cpu is coming online. Rebind all workers to the CPU.
6564	*/
6565	static void rebind_workers(struct worker_pool *pool)
6566	{
6567	struct worker *worker;
6568
6569	lockdep_assert_held(&wq_pool_attach_mutex);
6570
6571	/*
6572	* Restore CPU affinity of all workers. As all idle workers should
6573	* be on the run-queue of the associated CPU before any local
6574	* wake-ups for concurrency management happen, restore CPU affinity
6575	* of all workers first and then clear UNBOUND. As we're called
6576	* from CPU_ONLINE, the following shouldn't fail.
6577	*/
6578	for_each_pool_worker(worker, pool) {
6579	kthread_set_per_cpu(k: worker->task, cpu: pool->cpu);
6580	WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
6581	pool_allowed_cpus(pool)) < `0`);
6582	}
6583
6584	raw_spin_lock_irq(&pool->lock);
6585
6586	pool->flags &= ~POOL_DISASSOCIATED;
6587
6588	for_each_pool_worker(worker, pool) {
6589	unsigned int worker_flags = worker->flags;
6590
6591	/*
6592	* We want to clear UNBOUND but can't directly call
6593	* worker_clr_flags() or adjust nr_running. Atomically
6594	* replace UNBOUND with another NOT_RUNNING flag REBOUND.
6595	* @worker will clear REBOUND using worker_clr_flags() when
6596	* it initiates the next execution cycle thus restoring
6597	* concurrency management. Note that when or whether
6598	* @worker clears REBOUND doesn't affect correctness.
6599	*
6600	* WRITE_ONCE() is necessary because @worker->flags may be
6601	* tested without holding any lock in
6602	* wq_worker_running(). Without it, NOT_RUNNING test may
6603	* fail incorrectly leading to premature concurrency
6604	* management operations.
6605	*/
6606	WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
6607	worker_flags \|= WORKER_REBOUND;
6608	worker_flags &= ~WORKER_UNBOUND;
6609	WRITE_ONCE(worker->flags, worker_flags);
6610	}
6611
6612	raw_spin_unlock_irq(&pool->lock);
6613	}
6614
6615	/**
6616	* restore_unbound_workers_cpumask - restore cpumask of unbound workers
6617	* @pool: unbound pool of interest
6618	* @cpu: the CPU which is coming up
6619	*
6620	* An unbound pool may end up with a cpumask which doesn't have any online
6621	* CPUs. When a worker of such pool get scheduled, the scheduler resets
6622	* its cpus_allowed. If @cpu is in @pool's cpumask which didn't have any
6623	* online CPU before, cpus_allowed of all its workers should be restored.
6624	*/
6625	static void restore_unbound_workers_cpumask(struct worker_pool pool, int* cpu)
6626	{
6627	static cpumask_t cpumask;
6628	struct worker *worker;
6629
6630	lockdep_assert_held(&wq_pool_attach_mutex);
6631
6632	/ is @cpu allowed for @pool? /
6633	if (!cpumask_test_cpu(cpu, cpumask: pool->attrs->cpumask))
6634	return;
6635
6636	cpumask_and(dstp: &cpumask, src1p: pool->attrs->cpumask, cpu_online_mask);
6637
6638	/ as we're called from CPU_ONLINE, the following shouldn't fail /
6639	for_each_pool_worker(worker, pool)
6640	WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < `0`);
6641	}
6642
6643	int workqueue_prepare_cpu(unsigned int cpu)
6644	{
6645	struct worker_pool *pool;
6646
6647	for_each_cpu_worker_pool(pool, cpu) {
6648	if (pool->nr_workers)
6649	continue;
6650	if (!create_worker(pool))
6651	return -ENOMEM;
6652	}
6653	return `0`;
6654	}
6655
6656	int workqueue_online_cpu(unsigned int cpu)
6657	{
6658	struct worker_pool *pool;
6659	struct workqueue_struct *wq;
6660	int pi;
6661
6662	mutex_lock(&wq_pool_mutex);
6663
6664	cpumask_set_cpu(cpu, dstp: wq_online_cpumask);
6665
6666	for_each_pool(pool, pi) {
6667	/ BH pools aren't affected by hotplug /
6668	if (pool->flags & POOL_BH)
6669	continue;
6670
6671	mutex_lock(&wq_pool_attach_mutex);
6672	if (pool->cpu == cpu)
6673	rebind_workers(pool);
6674	else if (pool->cpu < `0`)
6675	restore_unbound_workers_cpumask(pool, cpu);
6676	mutex_unlock(lock: &wq_pool_attach_mutex);
6677	}
6678
6679	/ update pod affinity of unbound workqueues /
6680	list_for_each_entry(wq, &workqueues, list) {
6681	struct workqueue_attrs *attrs = wq->unbound_attrs;
6682
6683	if (attrs) {
6684	const struct wq_pod_type *pt = wqattrs_pod_type(attrs);
6685	int tcpu;
6686
6687	for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]])
6688	unbound_wq_update_pwq(wq, cpu: tcpu);
6689
6690	mutex_lock(&wq->mutex);
6691	wq_update_node_max_active(wq, off_cpu: -`1`);
6692	mutex_unlock(lock: &wq->mutex);
6693	}
6694	}
6695
6696	mutex_unlock(lock: &wq_pool_mutex);
6697	return `0`;
6698	}
6699
6700	int workqueue_offline_cpu(unsigned int cpu)
6701	{
6702	struct workqueue_struct *wq;
6703
6704	/ unbinding per-cpu workers should happen on the local CPU /
6705	if (WARN_ON(cpu != smp_processor_id()))
6706	return -`1`;
6707
6708	unbind_workers(cpu);
6709
6710	/ update pod affinity of unbound workqueues /
6711	mutex_lock(&wq_pool_mutex);
6712
6713	cpumask_clear_cpu(cpu, dstp: wq_online_cpumask);
6714
6715	list_for_each_entry(wq, &workqueues, list) {
6716	struct workqueue_attrs *attrs = wq->unbound_attrs;
6717
6718	if (attrs) {
6719	const struct wq_pod_type *pt = wqattrs_pod_type(attrs);
6720	int tcpu;
6721
6722	for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]])
6723	unbound_wq_update_pwq(wq, cpu: tcpu);
6724
6725	mutex_lock(&wq->mutex);
6726	wq_update_node_max_active(wq, off_cpu: cpu);
6727	mutex_unlock(lock: &wq->mutex);
6728	}
6729	}
6730	mutex_unlock(lock: &wq_pool_mutex);
6731
6732	return `0`;
6733	}
6734
6735	struct work_for_cpu {
6736	struct work_struct work;
6737	long (fn)(void* *);
6738	void *arg;
6739	long ret;
6740	};
6741
6742	static void work_for_cpu_fn(struct work_struct *work)
6743	{
6744	struct work_for_cpu wfc = container_of(work, struct* work_for_cpu, work);
6745
6746	wfc->ret = wfc->fn(wfc->arg);
6747	}
6748
6749	/**
6750	* work_on_cpu_key - run a function in thread context on a particular cpu
6751	* @cpu: the cpu to run on
6752	* @fn: the function to run
6753	* @arg: the function arg
6754	* @key: The lock class key for lock debugging purposes
6755	*
6756	* It is up to the caller to ensure that the cpu doesn't go offline.
6757	* The caller must not hold any locks which would prevent @fn from completing.
6758	*
6759	* Return: The value @fn returns.
6760	*/
6761	long work_on_cpu_key(int cpu, long (fn)(void* *),
6762	void arg, struct* lock_class_key *key)
6763	{
6764	struct work_for_cpu wfc = { .fn = fn, .arg = arg };
6765
6766	INIT_WORK_ONSTACK_KEY(&wfc.work, work_for_cpu_fn, key);
6767	schedule_work_on(cpu, work: &wfc.work);
6768	flush_work(&wfc.work);
6769	destroy_work_on_stack(&wfc.work);
6770	return wfc.ret;
6771	}
6772	EXPORT_SYMBOL_GPL(work_on_cpu_key);
6773
6774	/**
6775	* work_on_cpu_safe_key - run a function in thread context on a particular cpu
6776	* @cpu: the cpu to run on
6777	* @fn: the function to run
6778	* @arg: the function argument
6779	* @key: The lock class key for lock debugging purposes
6780	*
6781	* Disables CPU hotplug and calls work_on_cpu(). The caller must not hold
6782	* any locks which would prevent @fn from completing.
6783	*
6784	* Return: The value @fn returns.
6785	*/
6786	long work_on_cpu_safe_key(int cpu, long (fn)(void* *),
6787	void arg, struct* lock_class_key *key)
6788	{
6789	long ret = -ENODEV;
6790
6791	cpus_read_lock();
6792	if (cpu_online(cpu))
6793	ret = work_on_cpu_key(cpu, fn, arg, key);
6794	cpus_read_unlock();
6795	return ret;
6796	}
6797	EXPORT_SYMBOL_GPL(work_on_cpu_safe_key);
6798	#endif /* CONFIG_SMP */
6799
6800	#ifdef CONFIG_FREEZER
6801
6802	/**
6803	* freeze_workqueues_begin - begin freezing workqueues
6804	*
6805	* Start freezing workqueues. After this function returns, all freezable
6806	* workqueues will queue new works to their inactive_works list instead of
6807	* pool->worklist.
6808	*
6809	* CONTEXT:
6810	* Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
6811	*/
6812	void freeze_workqueues_begin(void)
6813	{
6814	struct workqueue_struct *wq;
6815
6816	mutex_lock(&wq_pool_mutex);
6817
6818	WARN_ON_ONCE(workqueue_freezing);
6819	workqueue_freezing = true;
6820
6821	list_for_each_entry(wq, &workqueues, list) {
6822	mutex_lock(&wq->mutex);
6823	wq_adjust_max_active(wq);
6824	mutex_unlock(lock: &wq->mutex);
6825	}
6826
6827	mutex_unlock(lock: &wq_pool_mutex);
6828	}
6829
6830	/**
6831	* freeze_workqueues_busy - are freezable workqueues still busy?
6832	*
6833	* Check whether freezing is complete. This function must be called
6834	* between freeze_workqueues_begin() and thaw_workqueues().
6835	*
6836	* CONTEXT:
6837	* Grabs and releases wq_pool_mutex.
6838	*
6839	* Return:
6840	* %true if some freezable workqueues are still busy. %false if freezing
6841	* is complete.
6842	*/
6843	bool freeze_workqueues_busy(void)
6844	{
6845	bool busy = false;
6846	struct workqueue_struct *wq;
6847	struct pool_workqueue *pwq;
6848
6849	mutex_lock(&wq_pool_mutex);
6850
6851	WARN_ON_ONCE(!workqueue_freezing);
6852
6853	list_for_each_entry(wq, &workqueues, list) {
6854	if (!(wq->flags & WQ_FREEZABLE))
6855	continue;
6856	/*
6857	* nr_active is monotonically decreasing. It's safe
6858	* to peek without lock.
6859	*/
6860	rcu_read_lock();
6861	for_each_pwq(pwq, wq) {
6862	WARN_ON_ONCE(pwq->nr_active < `0`);
6863	if (pwq->nr_active) {
6864	busy = true;
6865	rcu_read_unlock();
6866	goto out_unlock;
6867	}
6868	}
6869	rcu_read_unlock();
6870	}
6871	out_unlock:
6872	mutex_unlock(lock: &wq_pool_mutex);
6873	return busy;
6874	}
6875
6876	/**
6877	* thaw_workqueues - thaw workqueues
6878	*
6879	* Thaw workqueues. Normal queueing is restored and all collected
6880	* frozen works are transferred to their respective pool worklists.
6881	*
6882	* CONTEXT:
6883	* Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
6884	*/
6885	void thaw_workqueues(void)
6886	{
6887	struct workqueue_struct *wq;
6888
6889	mutex_lock(&wq_pool_mutex);
6890
6891	if (!workqueue_freezing)
6892	goto out_unlock;
6893
6894	workqueue_freezing = false;
6895
6896	/ restore max_active and repopulate worklist /
6897	list_for_each_entry(wq, &workqueues, list) {
6898	mutex_lock(&wq->mutex);
6899	wq_adjust_max_active(wq);
6900	mutex_unlock(lock: &wq->mutex);
6901	}
6902
6903	out_unlock:
6904	mutex_unlock(lock: &wq_pool_mutex);
6905	}
6906	#endif /* CONFIG_FREEZER */
6907
6908	static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
6909	{
6910	LIST_HEAD(ctxs);
6911	int ret = `0`;
6912	struct workqueue_struct *wq;
6913	struct apply_wqattrs_ctx ctx, n;
6914
6915	lockdep_assert_held(&wq_pool_mutex);
6916
6917	list_for_each_entry(wq, &workqueues, list) {
6918	if (!(wq->flags & WQ_UNBOUND) \|\| (wq->flags & __WQ_DESTROYING))
6919	continue;
6920
6921	ctx = apply_wqattrs_prepare(wq, attrs: wq->unbound_attrs, unbound_cpumask);
6922	if (IS_ERR(ptr: ctx)) {
6923	ret = PTR_ERR(ptr: ctx);
6924	break;
6925	}
6926
6927	list_add_tail(new: &ctx->list, head: &ctxs);
6928	}
6929
6930	list_for_each_entry_safe(ctx, n, &ctxs, list) {
6931	if (!ret)
6932	apply_wqattrs_commit(ctx);
6933	apply_wqattrs_cleanup(ctx);
6934	}
6935
6936	if (!ret) {
6937	mutex_lock(&wq_pool_attach_mutex);
6938	cpumask_copy(dstp: wq_unbound_cpumask, srcp: unbound_cpumask);
6939	mutex_unlock(lock: &wq_pool_attach_mutex);
6940	}
6941	return ret;
6942	}
6943
6944	/**
6945	* workqueue_unbound_exclude_cpumask - Exclude given CPUs from unbound cpumask
6946	* @exclude_cpumask: the cpumask to be excluded from wq_unbound_cpumask
6947	*
6948	* This function can be called from cpuset code to provide a set of isolated
6949	* CPUs that should be excluded from wq_unbound_cpumask.
6950	*/
6951	int workqueue_unbound_exclude_cpumask(cpumask_var_t exclude_cpumask)
6952	{
6953	cpumask_var_t cpumask;
6954	int ret = `0`;
6955
6956	if (!zalloc_cpumask_var(mask: &cpumask, GFP_KERNEL))
6957	return -ENOMEM;
6958
6959	mutex_lock(&wq_pool_mutex);
6960
6961	/*
6962	* If the operation fails, it will fall back to
6963	* wq_requested_unbound_cpumask which is initially set to
6964	* (HK_TYPE_WQ ∩ HK_TYPE_DOMAIN) house keeping mask and rewritten
6965	* by any subsequent write to workqueue/cpumask sysfs file.
6966	*/
6967	if (!cpumask_andnot(dstp: cpumask, src1p: wq_requested_unbound_cpumask, src2p: exclude_cpumask))
6968	cpumask_copy(dstp: cpumask, srcp: wq_requested_unbound_cpumask);
6969	if (!cpumask_equal(src1p: cpumask, src2p: wq_unbound_cpumask))
6970	ret = workqueue_apply_unbound_cpumask(unbound_cpumask: cpumask);
6971
6972	/ Save the current isolated cpumask & export it via sysfs /
6973	if (!ret)
6974	cpumask_copy(dstp: wq_isolated_cpumask, srcp: exclude_cpumask);
6975
6976	mutex_unlock(lock: &wq_pool_mutex);
6977	free_cpumask_var(mask: cpumask);
6978	return ret;
6979	}
6980
6981	static int parse_affn_scope(const char *val)
6982	{
6983	int i;
6984
6985	for (i = `0`; i < ARRAY_SIZE(wq_affn_names); i++) {
6986	if (!strncasecmp(s1: val, s2: wq_affn_names[i], strlen(wq_affn_names[i])))
6987	return i;
6988	}
6989	return -EINVAL;
6990	}
6991
6992	static int wq_affn_dfl_set(const char val, const* struct kernel_param *kp)
6993	{
6994	struct workqueue_struct *wq;
6995	int affn, cpu;
6996
6997	affn = parse_affn_scope(val);
6998	if (affn < `0`)
6999	return affn;
7000	if (affn == WQ_AFFN_DFL)
7001	return -EINVAL;
7002
7003	cpus_read_lock();
7004	mutex_lock(&wq_pool_mutex);
7005
7006	wq_affn_dfl = affn;
7007
7008	list_for_each_entry(wq, &workqueues, list) {
7009	for_each_online_cpu(cpu)
7010	unbound_wq_update_pwq(wq, cpu);
7011	}
7012
7013	mutex_unlock(lock: &wq_pool_mutex);
7014	cpus_read_unlock();
7015
7016	return `0`;
7017	}
7018
7019	static int wq_affn_dfl_get(char buffer, const* struct kernel_param *kp)
7020	{
7021	return scnprintf(buf: buffer, PAGE_SIZE, fmt: "%s\n", wq_affn_names[wq_affn_dfl]);
7022	}
7023
7024	static const struct kernel_param_ops wq_affn_dfl_ops = {
7025	.set = wq_affn_dfl_set,
7026	.get = wq_affn_dfl_get,
7027	};
7028
7029	module_param_cb(default_affinity_scope, &wq_affn_dfl_ops, NULL, `0644`);
7030
7031	#ifdef CONFIG_SYSFS
7032	/*
7033	* Workqueues with WQ_SYSFS flag set is visible to userland via
7034	* /sys/bus/workqueue/devices/WQ_NAME. All visible workqueues have the
7035	* following attributes.
7036	*
7037	* per_cpu RO bool : whether the workqueue is per-cpu or unbound
7038	* max_active RW int : maximum number of in-flight work items
7039	*
7040	* Unbound workqueues have the following extra attributes.
7041	*
7042	* nice RW int : nice value of the workers
7043	* cpumask RW mask : bitmask of allowed CPUs for the workers
7044	* affinity_scope RW str : worker CPU affinity scope (cache, numa, none)
7045	* affinity_strict RW bool : worker CPU affinity is strict
7046	*/
7047	struct wq_device {
7048	struct workqueue_struct *wq;
7049	struct device dev;
7050	};
7051
7052	static struct workqueue_struct dev_to_wq(struct* device *dev)
7053	{
7054	struct wq_device wq_dev = container_of(dev, struct* wq_device, dev);
7055
7056	return wq_dev->wq;
7057	}
7058
7059	static ssize_t per_cpu_show(struct device dev, struct* device_attribute *attr,
7060	char *buf)
7061	{
7062	struct workqueue_struct *wq = dev_to_wq(dev);
7063
7064	return scnprintf(buf, PAGE_SIZE, fmt: "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
7065	}
7066	static DEVICE_ATTR_RO(per_cpu);
7067
7068	static ssize_t max_active_show(struct device *dev,
7069	struct device_attribute attr, char* *buf)
7070	{
7071	struct workqueue_struct *wq = dev_to_wq(dev);
7072
7073	return scnprintf(buf, PAGE_SIZE, fmt: "%d\n", wq->saved_max_active);
7074	}
7075
7076	static ssize_t max_active_store(struct device *dev,
7077	struct device_attribute attr, const* char *buf,
7078	size_t count)
7079	{
7080	struct workqueue_struct *wq = dev_to_wq(dev);
7081	int val;
7082
7083	if (sscanf(buf, "%d", &val) != `1` \|\| val <= `0`)
7084	return -EINVAL;
7085
7086	workqueue_set_max_active(wq, val);
7087	return count;
7088	}
7089	static DEVICE_ATTR_RW(max_active);
7090
7091	static struct attribute *wq_sysfs_attrs[] = {
7092	&dev_attr_per_cpu.attr,
7093	&dev_attr_max_active.attr,
7094	NULL,
7095	};
7096	ATTRIBUTE_GROUPS(wq_sysfs);
7097
7098	static ssize_t wq_nice_show(struct device dev, struct* device_attribute *attr,
7099	char *buf)
7100	{
7101	struct workqueue_struct *wq = dev_to_wq(dev);
7102	int written;
7103
7104	mutex_lock(&wq->mutex);
7105	written = scnprintf(buf, PAGE_SIZE, fmt: "%d\n", wq->unbound_attrs->nice);
7106	mutex_unlock(lock: &wq->mutex);
7107
7108	return written;
7109	}
7110
7111	/ prepare workqueue_attrs for sysfs store operations /
7112	static struct workqueue_attrs wq_sysfs_prep_attrs(struct* workqueue_struct *wq)
7113	{
7114	struct workqueue_attrs *attrs;
7115
7116	lockdep_assert_held(&wq_pool_mutex);
7117
7118	attrs = alloc_workqueue_attrs();
7119	if (!attrs)
7120	return NULL;
7121
7122	copy_workqueue_attrs(to: attrs, from: wq->unbound_attrs);
7123	return attrs;
7124	}
7125
7126	static ssize_t wq_nice_store(struct device dev, struct* device_attribute *attr,
7127	const char *buf, size_t count)
7128	{
7129	struct workqueue_struct *wq = dev_to_wq(dev);
7130	struct workqueue_attrs *attrs;
7131	int ret = -ENOMEM;
7132
7133	apply_wqattrs_lock();
7134
7135	attrs = wq_sysfs_prep_attrs(wq);
7136	if (!attrs)
7137	goto out_unlock;
7138
7139	if (sscanf(buf, "%d", &attrs->nice) == `1` &&
7140	attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
7141	ret = apply_workqueue_attrs_locked(wq, attrs);
7142	else
7143	ret = -EINVAL;
7144
7145	out_unlock:
7146	apply_wqattrs_unlock();
7147	free_workqueue_attrs(attrs);
7148	return ret ?: count;
7149	}
7150
7151	static ssize_t wq_cpumask_show(struct device *dev,
7152	struct device_attribute attr, char* *buf)
7153	{
7154	struct workqueue_struct *wq = dev_to_wq(dev);
7155	int written;
7156
7157	mutex_lock(&wq->mutex);
7158	written = scnprintf(buf, PAGE_SIZE, fmt: "%*pb\n",
7159	cpumask_pr_args(wq->unbound_attrs->cpumask));
7160	mutex_unlock(lock: &wq->mutex);
7161	return written;
7162	}
7163
7164	static ssize_t wq_cpumask_store(struct device *dev,
7165	struct device_attribute *attr,
7166	const char *buf, size_t count)
7167	{
7168	struct workqueue_struct *wq = dev_to_wq(dev);
7169	struct workqueue_attrs *attrs;
7170	int ret = -ENOMEM;
7171
7172	apply_wqattrs_lock();
7173
7174	attrs = wq_sysfs_prep_attrs(wq);
7175	if (!attrs)
7176	goto out_unlock;
7177
7178	ret = cpumask_parse(buf, dstp: attrs->cpumask);
7179	if (!ret)
7180	ret = apply_workqueue_attrs_locked(wq, attrs);
7181
7182	out_unlock:
7183	apply_wqattrs_unlock();
7184	free_workqueue_attrs(attrs);
7185	return ret ?: count;
7186	}
7187
7188	static ssize_t wq_affn_scope_show(struct device *dev,
7189	struct device_attribute attr, char* *buf)
7190	{
7191	struct workqueue_struct *wq = dev_to_wq(dev);
7192	int written;
7193
7194	mutex_lock(&wq->mutex);
7195	if (wq->unbound_attrs->affn_scope == WQ_AFFN_DFL)
7196	written = scnprintf(buf, PAGE_SIZE, fmt: "%s (%s)\n",
7197	wq_affn_names[WQ_AFFN_DFL],
7198	wq_affn_names[wq_affn_dfl]);
7199	else
7200	written = scnprintf(buf, PAGE_SIZE, fmt: "%s\n",
7201	wq_affn_names[wq->unbound_attrs->affn_scope]);
7202	mutex_unlock(lock: &wq->mutex);
7203
7204	return written;
7205	}
7206
7207	static ssize_t wq_affn_scope_store(struct device *dev,
7208	struct device_attribute *attr,
7209	const char *buf, size_t count)
7210	{
7211	struct workqueue_struct *wq = dev_to_wq(dev);
7212	struct workqueue_attrs *attrs;
7213	int affn, ret = -ENOMEM;
7214
7215	affn = parse_affn_scope(val: buf);
7216	if (affn < `0`)
7217	return affn;
7218
7219	apply_wqattrs_lock();
7220	attrs = wq_sysfs_prep_attrs(wq);
7221	if (attrs) {
7222	attrs->affn_scope = affn;
7223	ret = apply_workqueue_attrs_locked(wq, attrs);
7224	}
7225	apply_wqattrs_unlock();
7226	free_workqueue_attrs(attrs);
7227	return ret ?: count;
7228	}
7229
7230	static ssize_t wq_affinity_strict_show(struct device *dev,
7231	struct device_attribute attr, char* *buf)
7232	{
7233	struct workqueue_struct *wq = dev_to_wq(dev);
7234
7235	return scnprintf(buf, PAGE_SIZE, fmt: "%d\n",
7236	wq->unbound_attrs->affn_strict);
7237	}
7238
7239	static ssize_t wq_affinity_strict_store(struct device *dev,
7240	struct device_attribute *attr,
7241	const char *buf, size_t count)
7242	{
7243	struct workqueue_struct *wq = dev_to_wq(dev);
7244	struct workqueue_attrs *attrs;
7245	int v, ret = -ENOMEM;
7246
7247	if (sscanf(buf, "%d", &v) != `1`)
7248	return -EINVAL;
7249
7250	apply_wqattrs_lock();
7251	attrs = wq_sysfs_prep_attrs(wq);
7252	if (attrs) {
7253	attrs->affn_strict = (bool)v;
7254	ret = apply_workqueue_attrs_locked(wq, attrs);
7255	}
7256	apply_wqattrs_unlock();
7257	free_workqueue_attrs(attrs);
7258	return ret ?: count;
7259	}
7260
7261	static struct device_attribute wq_sysfs_unbound_attrs[] = {
7262	__ATTR(nice, `0644`, wq_nice_show, wq_nice_store),
7263	__ATTR(cpumask, `0644`, wq_cpumask_show, wq_cpumask_store),
7264	__ATTR(affinity_scope, `0644`, wq_affn_scope_show, wq_affn_scope_store),
7265	__ATTR(affinity_strict, `0644`, wq_affinity_strict_show, wq_affinity_strict_store),
7266	__ATTR_NULL,
7267	};
7268
7269	static const struct bus_type wq_subsys = {
7270	.name = "workqueue",
7271	.dev_groups = wq_sysfs_groups,
7272	};
7273
7274	/**
7275	* workqueue_set_unbound_cpumask - Set the low-level unbound cpumask
7276	* @cpumask: the cpumask to set
7277	*
7278	* The low-level workqueues cpumask is a global cpumask that limits
7279	* the affinity of all unbound workqueues. This function check the @cpumask
7280	* and apply it to all unbound workqueues and updates all pwqs of them.
7281	*
7282	* Return: 0 - Success
7283	* -EINVAL - Invalid @cpumask
7284	* -ENOMEM - Failed to allocate memory for attrs or pwqs.
7285	*/
7286	static int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
7287	{
7288	int ret = -EINVAL;
7289
7290	/*
7291	* Not excluding isolated cpus on purpose.
7292	* If the user wishes to include them, we allow that.
7293	*/
7294	cpumask_and(dstp: cpumask, src1p: cpumask, cpu_possible_mask);
7295	if (!cpumask_empty(srcp: cpumask)) {
7296	ret = `0`;
7297	apply_wqattrs_lock();
7298	if (!cpumask_equal(src1p: cpumask, src2p: wq_unbound_cpumask))
7299	ret = workqueue_apply_unbound_cpumask(unbound_cpumask: cpumask);
7300	if (!ret)
7301	cpumask_copy(dstp: wq_requested_unbound_cpumask, srcp: cpumask);
7302	apply_wqattrs_unlock();
7303	}
7304
7305	return ret;
7306	}
7307
7308	static ssize_t __wq_cpumask_show(struct device *dev,
7309	struct device_attribute attr, char* *buf, cpumask_var_t mask)
7310	{
7311	int written;
7312
7313	mutex_lock(&wq_pool_mutex);
7314	written = scnprintf(buf, PAGE_SIZE, fmt: "%*pb\n", cpumask_pr_args(mask));
7315	mutex_unlock(lock: &wq_pool_mutex);
7316
7317	return written;
7318	}
7319
7320	static ssize_t cpumask_requested_show(struct device *dev,
7321	struct device_attribute attr, char* *buf)
7322	{
7323	return __wq_cpumask_show(dev, attr, buf, mask: wq_requested_unbound_cpumask);
7324	}
7325	static DEVICE_ATTR_RO(cpumask_requested);
7326
7327	static ssize_t cpumask_isolated_show(struct device *dev,
7328	struct device_attribute attr, char* *buf)
7329	{
7330	return __wq_cpumask_show(dev, attr, buf, mask: wq_isolated_cpumask);
7331	}
7332	static DEVICE_ATTR_RO(cpumask_isolated);
7333
7334	static ssize_t cpumask_show(struct device *dev,
7335	struct device_attribute attr, char* *buf)
7336	{
7337	return __wq_cpumask_show(dev, attr, buf, mask: wq_unbound_cpumask);
7338	}
7339
7340	static ssize_t cpumask_store(struct device *dev,
7341	struct device_attribute attr, const* char *buf, size_t count)
7342	{
7343	cpumask_var_t cpumask;
7344	int ret;
7345
7346	if (!zalloc_cpumask_var(mask: &cpumask, GFP_KERNEL))
7347	return -ENOMEM;
7348
7349	ret = cpumask_parse(buf, dstp: cpumask);
7350	if (!ret)
7351	ret = workqueue_set_unbound_cpumask(cpumask);
7352
7353	free_cpumask_var(mask: cpumask);
7354	return ret ? ret : count;
7355	}
7356	static DEVICE_ATTR_RW(cpumask);
7357
7358	static struct attribute *wq_sysfs_cpumask_attrs[] = {
7359	&dev_attr_cpumask.attr,
7360	&dev_attr_cpumask_requested.attr,
7361	&dev_attr_cpumask_isolated.attr,
7362	NULL,
7363	};
7364	ATTRIBUTE_GROUPS(wq_sysfs_cpumask);
7365
7366	static int __init wq_sysfs_init(void)
7367	{
7368	return subsys_virtual_register(subsys: &wq_subsys, groups: wq_sysfs_cpumask_groups);
7369	}
7370	core_initcall(wq_sysfs_init);
7371
7372	static void wq_device_release(struct device *dev)
7373	{
7374	struct wq_device wq_dev = container_of(dev, struct* wq_device, dev);
7375
7376	kfree(objp: wq_dev);
7377	}
7378
7379	/**
7380	* workqueue_sysfs_register - make a workqueue visible in sysfs
7381	* @wq: the workqueue to register
7382	*
7383	* Expose @wq in sysfs under /sys/bus/workqueue/devices.
7384	* alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
7385	* which is the preferred method.
7386	*
7387	* Workqueue user should use this function directly iff it wants to apply
7388	* workqueue_attrs before making the workqueue visible in sysfs; otherwise,
7389	* apply_workqueue_attrs() may race against userland updating the
7390	* attributes.
7391	*
7392	* Return: 0 on success, -errno on failure.
7393	*/
7394	int workqueue_sysfs_register(struct workqueue_struct *wq)
7395	{
7396	struct wq_device *wq_dev;
7397	int ret;
7398
7399	/*
7400	* Adjusting max_active breaks ordering guarantee. Disallow exposing
7401	* ordered workqueues.
7402	*/
7403	if (WARN_ON(wq->flags & __WQ_ORDERED))
7404	return -EINVAL;
7405
7406	wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
7407	if (!wq_dev)
7408	return -ENOMEM;
7409
7410	wq_dev->wq = wq;
7411	wq_dev->dev.bus = &wq_subsys;
7412	wq_dev->dev.release = wq_device_release;
7413	dev_set_name(dev: &wq_dev->dev, name: "%s", wq->name);
7414
7415	/*
7416	* unbound_attrs are created separately. Suppress uevent until
7417	* everything is ready.
7418	*/
7419	dev_set_uevent_suppress(dev: &wq_dev->dev, val: true);
7420
7421	ret = device_register(dev: &wq_dev->dev);
7422	if (ret) {
7423	put_device(dev: &wq_dev->dev);
7424	wq->wq_dev = NULL;
7425	return ret;
7426	}
7427
7428	if (wq->flags & WQ_UNBOUND) {
7429	struct device_attribute *attr;
7430
7431	for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
7432	ret = device_create_file(device: &wq_dev->dev, entry: attr);
7433	if (ret) {
7434	device_unregister(dev: &wq_dev->dev);
7435	wq->wq_dev = NULL;
7436	return ret;
7437	}
7438	}
7439	}
7440
7441	dev_set_uevent_suppress(dev: &wq_dev->dev, val: false);
7442	kobject_uevent(kobj: &wq_dev->dev.kobj, action: KOBJ_ADD);
7443	return `0`;
7444	}
7445
7446	/**
7447	* workqueue_sysfs_unregister - undo workqueue_sysfs_register()
7448	* @wq: the workqueue to unregister
7449	*
7450	* If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
7451	*/
7452	static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
7453	{
7454	struct wq_device *wq_dev = wq->wq_dev;
7455
7456	if (!wq->wq_dev)
7457	return;
7458
7459	wq->wq_dev = NULL;
7460	device_unregister(dev: &wq_dev->dev);
7461	}
7462	#else /* CONFIG_SYSFS */
7463	static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
7464	#endif /* CONFIG_SYSFS */
7465
7466	/*
7467	* Workqueue watchdog.
7468	*
7469	* Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
7470	* flush dependency, a concurrency managed work item which stays RUNNING
7471	* indefinitely. Workqueue stalls can be very difficult to debug as the
7472	* usual warning mechanisms don't trigger and internal workqueue state is
7473	* largely opaque.
7474	*
7475	* Workqueue watchdog monitors all worker pools periodically and dumps
7476	* state if some pools failed to make forward progress for a while where
7477	* forward progress is defined as the first item on ->worklist changing.
7478	*
7479	* This mechanism is controlled through the kernel parameter
7480	* "workqueue.watchdog_thresh" which can be updated at runtime through the
7481	* corresponding sysfs parameter file.
7482	*/
7483	#ifdef CONFIG_WQ_WATCHDOG
7484
7485	static unsigned long wq_watchdog_thresh = `30`;
7486	static struct timer_list wq_watchdog_timer;
7487
7488	static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
7489	static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
7490
7491	static unsigned int wq_panic_on_stall;
7492	module_param_named(panic_on_stall, wq_panic_on_stall, uint, `0644`);
7493
7494	/*
7495	* Show workers that might prevent the processing of pending work items.
7496	* The only candidates are CPU-bound workers in the running state.
7497	* Pending work items should be handled by another idle worker
7498	* in all other situations.
7499	*/
7500	static void show_cpu_pool_hog(struct worker_pool *pool)
7501	{
7502	struct worker *worker;
7503	unsigned long irq_flags;
7504	int bkt;
7505
7506	raw_spin_lock_irqsave(&pool->lock, irq_flags);
7507
7508	hash_for_each(pool->busy_hash, bkt, worker, hentry) {
7509	if (task_is_running(worker->task)) {
7510	/*
7511	* Defer printing to avoid deadlocks in console
7512	* drivers that queue work while holding locks
7513	* also taken in their write paths.
7514	*/
7515	printk_deferred_enter();
7516
7517	pr_info("pool %d:\n", pool->id);
7518	sched_show_task(p: worker->task);
7519
7520	printk_deferred_exit();
7521	}
7522	}
7523
7524	raw_spin_unlock_irqrestore(&pool->lock, irq_flags);
7525	}
7526
7527	static void show_cpu_pools_hogs(void)
7528	{
7529	struct worker_pool *pool;
7530	int pi;
7531
7532	pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n");
7533
7534	rcu_read_lock();
7535
7536	for_each_pool(pool, pi) {
7537	if (pool->cpu_stall)
7538	show_cpu_pool_hog(pool);
7539
7540	}
7541
7542	rcu_read_unlock();
7543	}
7544
7545	static void panic_on_wq_watchdog(void)
7546	{
7547	static unsigned int wq_stall;
7548
7549	if (wq_panic_on_stall) {
7550	wq_stall++;
7551	BUG_ON(wq_stall >= wq_panic_on_stall);
7552	}
7553	}
7554
7555	static void wq_watchdog_reset_touched(void)
7556	{
7557	int cpu;
7558
7559	wq_watchdog_touched = jiffies;
7560	for_each_possible_cpu(cpu)
7561	per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
7562	}
7563
7564	static void wq_watchdog_timer_fn(struct timer_list *unused)
7565	{
7566	unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
7567	bool lockup_detected = false;
7568	bool cpu_pool_stall = false;
7569	unsigned long now = jiffies;
7570	struct worker_pool *pool;
7571	int pi;
7572
7573	if (!thresh)
7574	return;
7575
7576	rcu_read_lock();
7577
7578	for_each_pool(pool, pi) {
7579	unsigned long pool_ts, touched, ts;
7580
7581	pool->cpu_stall = false;
7582	if (list_empty(head: &pool->worklist))
7583	continue;
7584
7585	/*
7586	* If a virtual machine is stopped by the host it can look to
7587	* the watchdog like a stall.
7588	*/
7589	kvm_check_and_clear_guest_paused();
7590
7591	/ get the latest of pool and touched timestamps /
7592	if (pool->cpu >= `0`)
7593	touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
7594	else
7595	touched = READ_ONCE(wq_watchdog_touched);
7596	pool_ts = READ_ONCE(pool->watchdog_ts);
7597
7598	if (time_after(pool_ts, touched))
7599	ts = pool_ts;
7600	else
7601	ts = touched;
7602
7603	/ did we stall? /
7604	if (time_after(now, ts + thresh)) {
7605	lockup_detected = true;
7606	if (pool->cpu >= `0` && !(pool->flags & POOL_BH)) {
7607	pool->cpu_stall = true;
7608	cpu_pool_stall = true;
7609	}
7610	pr_emerg("BUG: workqueue lockup - pool");
7611	pr_cont_pool_info(pool);
7612	pr_cont(" stuck for %us!\n",
7613	jiffies_to_msecs(now - pool_ts) / `1000`);
7614	}
7615
7616
7617	}
7618
7619	rcu_read_unlock();
7620
7621	if (lockup_detected)
7622	show_all_workqueues();
7623
7624	if (cpu_pool_stall)
7625	show_cpu_pools_hogs();
7626
7627	if (lockup_detected)
7628	panic_on_wq_watchdog();
7629
7630	wq_watchdog_reset_touched();
7631	mod_timer(timer: &wq_watchdog_timer, expires: jiffies + thresh);
7632	}
7633
7634	notrace void wq_watchdog_touch(int cpu)
7635	{
7636	unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
7637	unsigned long touch_ts = READ_ONCE(wq_watchdog_touched);
7638	unsigned long now = jiffies;
7639
7640	if (cpu >= `0`)
7641	per_cpu(wq_watchdog_touched_cpu, cpu) = now;
7642	else
7643	WARN_ONCE(`1`, "%s should be called with valid CPU", __func__);
7644
7645	/ Don't unnecessarily store to global cacheline /
7646	if (time_after(now, touch_ts + thresh / `4`))
7647	WRITE_ONCE(wq_watchdog_touched, jiffies);
7648	}
7649
7650	static void wq_watchdog_set_thresh(unsigned long thresh)
7651	{
7652	wq_watchdog_thresh = `0`;
7653	timer_delete_sync(timer: &wq_watchdog_timer);
7654
7655	if (thresh) {
7656	wq_watchdog_thresh = thresh;
7657	wq_watchdog_reset_touched();
7658	mod_timer(timer: &wq_watchdog_timer, expires: jiffies + thresh * HZ);
7659	}
7660	}
7661
7662	static int wq_watchdog_param_set_thresh(const char *val,
7663	const struct kernel_param *kp)
7664	{
7665	unsigned long thresh;
7666	int ret;
7667
7668	ret = kstrtoul(s: val, base: `0`, res: &thresh);
7669	if (ret)
7670	return ret;
7671
7672	if (system_wq)
7673	wq_watchdog_set_thresh(thresh);
7674	else
7675	wq_watchdog_thresh = thresh;
7676
7677	return `0`;
7678	}
7679
7680	static const struct kernel_param_ops wq_watchdog_thresh_ops = {
7681	.set = wq_watchdog_param_set_thresh,
7682	.get = param_get_ulong,
7683	};
7684
7685	module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
7686	`0644`);
7687
7688	static void wq_watchdog_init(void)
7689	{
7690	timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE);
7691	wq_watchdog_set_thresh(thresh: wq_watchdog_thresh);
7692	}
7693
7694	#else /* CONFIG_WQ_WATCHDOG */
7695
7696	static inline void wq_watchdog_init(void) { }
7697
7698	#endif /* CONFIG_WQ_WATCHDOG */
7699
7700	static void bh_pool_kick_normal(struct irq_work *irq_work)
7701	{
7702	raise_softirq_irqoff(nr: TASKLET_SOFTIRQ);
7703	}
7704
7705	static void bh_pool_kick_highpri(struct irq_work *irq_work)
7706	{
7707	raise_softirq_irqoff(nr: HI_SOFTIRQ);
7708	}
7709
7710	static void __init restrict_unbound_cpumask(const char name, const* struct cpumask *mask)
7711	{
7712	if (!cpumask_intersects(src1p: wq_unbound_cpumask, src2p: mask)) {
7713	pr_warn("workqueue: Restricting unbound_cpumask (%pb) with %s (%pb) leaves no CPU, ignoring\n",
7714	cpumask_pr_args(wq_unbound_cpumask), name, cpumask_pr_args(mask));
7715	return;
7716	}
7717
7718	cpumask_and(dstp: wq_unbound_cpumask, src1p: wq_unbound_cpumask, src2p: mask);
7719	}
7720
7721	static void __init init_cpu_worker_pool(struct worker_pool pool, int* cpu, int nice)
7722	{
7723	BUG_ON(init_worker_pool(pool));
7724	pool->cpu = cpu;
7725	cpumask_copy(dstp: pool->attrs->cpumask, cpumask_of(cpu));
7726	cpumask_copy(dstp: pool->attrs->__pod_cpumask, cpumask_of(cpu));
7727	pool->attrs->nice = nice;
7728	pool->attrs->affn_strict = true;
7729	pool->node = cpu_to_node(cpu);
7730
7731	/ alloc pool ID /
7732	mutex_lock(&wq_pool_mutex);
7733	BUG_ON(worker_pool_assign_id(pool));
7734	mutex_unlock(lock: &wq_pool_mutex);
7735	}
7736
7737	/**
7738	* workqueue_init_early - early init for workqueue subsystem
7739	*
7740	* This is the first step of three-staged workqueue subsystem initialization and
7741	* invoked as soon as the bare basics - memory allocation, cpumasks and idr are
7742	* up. It sets up all the data structures and system workqueues and allows early
7743	* boot code to create workqueues and queue/cancel work items. Actual work item
7744	* execution starts only after kthreads can be created and scheduled right
7745	* before early initcalls.
7746	*/
7747	void __init workqueue_init_early(void)
7748	{
7749	struct wq_pod_type *pt = &wq_pod_types[WQ_AFFN_SYSTEM];
7750	int std_nice[NR_STD_WORKER_POOLS] = { `0`, HIGHPRI_NICE_LEVEL };
7751	void (irq_work_fns[`2`])(struct* irq_work *) = { bh_pool_kick_normal,
7752	bh_pool_kick_highpri };
7753	int i, cpu;
7754
7755	BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
7756
7757	BUG_ON(!alloc_cpumask_var(&wq_online_cpumask, GFP_KERNEL));
7758	BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
7759	BUG_ON(!alloc_cpumask_var(&wq_requested_unbound_cpumask, GFP_KERNEL));
7760	BUG_ON(!zalloc_cpumask_var(&wq_isolated_cpumask, GFP_KERNEL));
7761
7762	cpumask_copy(dstp: wq_online_cpumask, cpu_online_mask);
7763	cpumask_copy(dstp: wq_unbound_cpumask, cpu_possible_mask);
7764	restrict_unbound_cpumask(name: "HK_TYPE_WQ", mask: housekeeping_cpumask(type: HK_TYPE_WQ));
7765	restrict_unbound_cpumask(name: "HK_TYPE_DOMAIN", mask: housekeeping_cpumask(type: HK_TYPE_DOMAIN));
7766	if (!cpumask_empty(srcp: &wq_cmdline_cpumask))
7767	restrict_unbound_cpumask(name: "workqueue.unbound_cpus", mask: &wq_cmdline_cpumask);
7768
7769	cpumask_copy(dstp: wq_requested_unbound_cpumask, srcp: wq_unbound_cpumask);
7770
7771	pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
7772
7773	unbound_wq_update_pwq_attrs_buf = alloc_workqueue_attrs();
7774	BUG_ON(!unbound_wq_update_pwq_attrs_buf);
7775
7776	/*
7777	* If nohz_full is enabled, set power efficient workqueue as unbound.
7778	* This allows workqueue items to be moved to HK CPUs.
7779	*/
7780	if (housekeeping_enabled(type: HK_TYPE_TICK))
7781	wq_power_efficient = true;
7782
7783	/ initialize WQ_AFFN_SYSTEM pods /
7784	pt->pod_cpus = kcalloc(`1`, sizeof(pt->pod_cpus[`0`]), GFP_KERNEL);
7785	pt->pod_node = kcalloc(`1`, sizeof(pt->pod_node[`0`]), GFP_KERNEL);
7786	pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[`0`]), GFP_KERNEL);
7787	BUG_ON(!pt->pod_cpus \|\| !pt->pod_node \|\| !pt->cpu_pod);
7788
7789	BUG_ON(!zalloc_cpumask_var_node(&pt->pod_cpus[`0`], GFP_KERNEL, NUMA_NO_NODE));
7790
7791	pt->nr_pods = `1`;
7792	cpumask_copy(dstp: pt->pod_cpus[`0`], cpu_possible_mask);
7793	pt->pod_node[`0`] = NUMA_NO_NODE;
7794	pt->cpu_pod[`0`] = `0`;
7795
7796	/ initialize BH and CPU pools /
7797	for_each_possible_cpu(cpu) {
7798	struct worker_pool *pool;
7799
7800	i = `0`;
7801	for_each_bh_worker_pool(pool, cpu) {
7802	init_cpu_worker_pool(pool, cpu, nice: std_nice[i]);
7803	pool->flags \|= POOL_BH;
7804	init_irq_work(work: bh_pool_irq_work(pool), func: irq_work_fns[i]);
7805	i++;
7806	}
7807
7808	i = `0`;
7809	for_each_cpu_worker_pool(pool, cpu)
7810	init_cpu_worker_pool(pool, cpu, nice: std_nice[i++]);
7811	}
7812
7813	/ create default unbound and ordered wq attrs /
7814	for (i = `0`; i < NR_STD_WORKER_POOLS; i++) {
7815	struct workqueue_attrs *attrs;
7816
7817	BUG_ON(!(attrs = alloc_workqueue_attrs()));
7818	attrs->nice = std_nice[i];
7819	unbound_std_wq_attrs[i] = attrs;
7820
7821	/*
7822	* An ordered wq should have only one pwq as ordering is
7823	* guaranteed by max_active which is enforced by pwqs.
7824	*/
7825	BUG_ON(!(attrs = alloc_workqueue_attrs()));
7826	attrs->nice = std_nice[i];
7827	attrs->ordered = true;
7828	ordered_wq_attrs[i] = attrs;
7829	}
7830
7831	system_wq = alloc_workqueue("events", `0`, `0`);
7832	system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, `0`);
7833	system_long_wq = alloc_workqueue("events_long", `0`, `0`);
7834	system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
7835	WQ_MAX_ACTIVE);
7836	system_freezable_wq = alloc_workqueue("events_freezable",
7837	WQ_FREEZABLE, `0`);
7838	system_power_efficient_wq = alloc_workqueue("events_power_efficient",
7839	WQ_POWER_EFFICIENT, `0`);
7840	system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_pwr_efficient",
7841	WQ_FREEZABLE \| WQ_POWER_EFFICIENT,
7842	`0`);
7843	system_bh_wq = alloc_workqueue("events_bh", WQ_BH, `0`);
7844	system_bh_highpri_wq = alloc_workqueue("events_bh_highpri",
7845	WQ_BH \| WQ_HIGHPRI, `0`);
7846	BUG_ON(!system_wq \|\| !system_highpri_wq \|\| !system_long_wq \|\|
7847	!system_unbound_wq \|\| !system_freezable_wq \|\|
7848	!system_power_efficient_wq \|\|
7849	!system_freezable_power_efficient_wq \|\|
7850	!system_bh_wq \|\| !system_bh_highpri_wq);
7851	}
7852
7853	static void __init wq_cpu_intensive_thresh_init(void)
7854	{
7855	unsigned long thresh;
7856	unsigned long bogo;
7857
7858	pwq_release_worker = kthread_run_worker(`0`, "pool_workqueue_release");
7859	BUG_ON(IS_ERR(pwq_release_worker));
7860
7861	/ if the user set it to a specific value, keep it /
7862	if (wq_cpu_intensive_thresh_us != ULONG_MAX)
7863	return;
7864
7865	/*
7866	* The default of 10ms is derived from the fact that most modern (as of
7867	* 2023) processors can do a lot in 10ms and that it's just below what
7868	* most consider human-perceivable. However, the kernel also runs on a
7869	* lot slower CPUs including microcontrollers where the threshold is way
7870	* too low.
7871	*
7872	* Let's scale up the threshold upto 1 second if BogoMips is below 4000.
7873	* This is by no means accurate but it doesn't have to be. The mechanism
7874	* is still useful even when the threshold is fully scaled up. Also, as
7875	* the reports would usually be applicable to everyone, some machines
7876	* operating on longer thresholds won't significantly diminish their
7877	* usefulness.
7878	*/
7879	thresh = `10` * USEC_PER_MSEC;
7880
7881	/ see init/calibrate.c for lpj -> BogoMIPS calculation /
7882	bogo = max_t(unsigned long, loops_per_jiffy / `500000` * HZ, `1`);
7883	if (bogo < `4000`)
7884	thresh = min_t(unsigned long, thresh * `4000` / bogo, USEC_PER_SEC);
7885
7886	pr_debug("wq_cpu_intensive_thresh: lpj=%lu BogoMIPS=%lu thresh_us=%lu\n",
7887	loops_per_jiffy, bogo, thresh);
7888
7889	wq_cpu_intensive_thresh_us = thresh;
7890	}
7891
7892	/**
7893	* workqueue_init - bring workqueue subsystem fully online
7894	*
7895	* This is the second step of three-staged workqueue subsystem initialization
7896	* and invoked as soon as kthreads can be created and scheduled. Workqueues have
7897	* been created and work items queued on them, but there are no kworkers
7898	* executing the work items yet. Populate the worker pools with the initial
7899	* workers and enable future kworker creations.
7900	*/
7901	void __init workqueue_init(void)
7902	{
7903	struct workqueue_struct *wq;
7904	struct worker_pool *pool;
7905	int cpu, bkt;
7906
7907	wq_cpu_intensive_thresh_init();
7908
7909	mutex_lock(&wq_pool_mutex);
7910
7911	/*
7912	* Per-cpu pools created earlier could be missing node hint. Fix them
7913	* up. Also, create a rescuer for workqueues that requested it.
7914	*/
7915	for_each_possible_cpu(cpu) {
7916	for_each_bh_worker_pool(pool, cpu)
7917	pool->node = cpu_to_node(cpu);
7918	for_each_cpu_worker_pool(pool, cpu)
7919	pool->node = cpu_to_node(cpu);
7920	}
7921
7922	list_for_each_entry(wq, &workqueues, list) {
7923	WARN(init_rescuer(wq),
7924	"workqueue: failed to create early rescuer for %s",
7925	wq->name);
7926	}
7927
7928	mutex_unlock(lock: &wq_pool_mutex);
7929
7930	/*
7931	* Create the initial workers. A BH pool has one pseudo worker that
7932	* represents the shared BH execution context and thus doesn't get
7933	* affected by hotplug events. Create the BH pseudo workers for all
7934	* possible CPUs here.
7935	*/
7936	for_each_possible_cpu(cpu)
7937	for_each_bh_worker_pool(pool, cpu)
7938	BUG_ON(!create_worker(pool));
7939
7940	for_each_online_cpu(cpu) {
7941	for_each_cpu_worker_pool(pool, cpu) {
7942	pool->flags &= ~POOL_DISASSOCIATED;
7943	BUG_ON(!create_worker(pool));
7944	}
7945	}
7946
7947	hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
7948	BUG_ON(!create_worker(pool));
7949
7950	wq_online = true;
7951	wq_watchdog_init();
7952	}
7953
7954	/*
7955	* Initialize @pt by first initializing @pt->cpu_pod[] with pod IDs according to
7956	* @cpu_shares_pod(). Each subset of CPUs that share a pod is assigned a unique
7957	* and consecutive pod ID. The rest of @pt is initialized accordingly.
7958	*/
7959	static void __init init_pod_type(struct wq_pod_type *pt,
7960	bool (cpus_share_pod)(int, int*))
7961	{
7962	int cur, pre, cpu, pod;
7963
7964	pt->nr_pods = `0`;
7965
7966	/ init @pt->cpu_pod[] according to @cpus_share_pod() /
7967	pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[`0`]), GFP_KERNEL);
7968	BUG_ON(!pt->cpu_pod);
7969
7970	for_each_possible_cpu(cur) {
7971	for_each_possible_cpu(pre) {
7972	if (pre >= cur) {
7973	pt->cpu_pod[cur] = pt->nr_pods++;
7974	break;
7975	}
7976	if (cpus_share_pod(cur, pre)) {
7977	pt->cpu_pod[cur] = pt->cpu_pod[pre];
7978	break;
7979	}
7980	}
7981	}
7982
7983	/ init the rest to match @pt->cpu_pod[] /
7984	pt->pod_cpus = kcalloc(pt->nr_pods, sizeof(pt->pod_cpus[`0`]), GFP_KERNEL);
7985	pt->pod_node = kcalloc(pt->nr_pods, sizeof(pt->pod_node[`0`]), GFP_KERNEL);
7986	BUG_ON(!pt->pod_cpus \|\| !pt->pod_node);
7987
7988	for (pod = `0`; pod < pt->nr_pods; pod++)
7989	BUG_ON(!zalloc_cpumask_var(&pt->pod_cpus[pod], GFP_KERNEL));
7990
7991	for_each_possible_cpu(cpu) {
7992	cpumask_set_cpu(cpu, dstp: pt->pod_cpus[pt->cpu_pod[cpu]]);
7993	pt->pod_node[pt->cpu_pod[cpu]] = cpu_to_node(cpu);
7994	}
7995	}
7996
7997	static bool __init cpus_dont_share(int cpu0, int cpu1)
7998	{
7999	return false;
8000	}
8001
8002	static bool __init cpus_share_smt(int cpu0, int cpu1)
8003	{
8004	#ifdef CONFIG_SCHED_SMT
8005	return cpumask_test_cpu(cpu: cpu0, cpumask: cpu_smt_mask(cpu: cpu1));
8006	#else
8007	return false;
8008	#endif
8009	}
8010
8011	static bool __init cpus_share_numa(int cpu0, int cpu1)
8012	{
8013	return cpu_to_node(cpu: cpu0) == cpu_to_node(cpu: cpu1);
8014	}
8015
8016	/**
8017	* workqueue_init_topology - initialize CPU pods for unbound workqueues
8018	*
8019	* This is the third step of three-staged workqueue subsystem initialization and
8020	* invoked after SMP and topology information are fully initialized. It
8021	* initializes the unbound CPU pods accordingly.
8022	*/
8023	void __init workqueue_init_topology(void)
8024	{
8025	struct workqueue_struct *wq;
8026	int cpu;
8027
8028	init_pod_type(pt: &wq_pod_types[WQ_AFFN_CPU], cpus_share_pod: cpus_dont_share);
8029	init_pod_type(pt: &wq_pod_types[WQ_AFFN_SMT], cpus_share_pod: cpus_share_smt);
8030	init_pod_type(pt: &wq_pod_types[WQ_AFFN_CACHE], cpus_share_pod: cpus_share_cache);
8031	init_pod_type(pt: &wq_pod_types[WQ_AFFN_NUMA], cpus_share_pod: cpus_share_numa);
8032
8033	wq_topo_initialized = true;
8034
8035	mutex_lock(&wq_pool_mutex);
8036
8037	/*
8038	* Workqueues allocated earlier would have all CPUs sharing the default
8039	* worker pool. Explicitly call unbound_wq_update_pwq() on all workqueue
8040	* and CPU combinations to apply per-pod sharing.
8041	*/
8042	list_for_each_entry(wq, &workqueues, list) {
8043	for_each_online_cpu(cpu)
8044	unbound_wq_update_pwq(wq, cpu);
8045	if (wq->flags & WQ_UNBOUND) {
8046	mutex_lock(&wq->mutex);
8047	wq_update_node_max_active(wq, off_cpu: -`1`);
8048	mutex_unlock(lock: &wq->mutex);
8049	}
8050	}
8051
8052	mutex_unlock(lock: &wq_pool_mutex);
8053	}
8054
8055	void __warn_flushing_systemwide_wq(void)
8056	{
8057	pr_warn("WARNING: Flushing system-wide workqueues will be prohibited in near future.\n");
8058	dump_stack();
8059	}
8060	EXPORT_SYMBOL(__warn_flushing_systemwide_wq);
8061
8062	static int __init workqueue_unbound_cpus_setup(char *str)
8063	{
8064	if (cpulist_parse(buf: str, dstp: &wq_cmdline_cpumask) < `0`) {
8065	cpumask_clear(dstp: &wq_cmdline_cpumask);
8066	pr_warn("workqueue.unbound_cpus: incorrect CPU range, using default\n");
8067	}
8068
8069	return `1`;
8070	}
8071	__setup("workqueue.unbound_cpus=", workqueue_unbound_cpus_setup);
8072

Provided by KDAB

Definitions

worker_pool_flags
worker_flags
work_cancel_flags
wq_internal_consts
worker_pool
pool_workqueue_stats
pool_workqueue
wq_flusher
wq_node_nr_active
workqueue_struct
wq_pod_type
work_offq_data
wq_affn_names
wq_cpu_intensive_thresh_us
wq_cpu_intensive_warning_thresh
wq_power_efficient
wq_online
wq_topo_initialized
pwq_cache
wq_pod_types
wq_affn_dfl
unbound_wq_update_pwq_attrs_buf
wq_pool_mutex
wq_pool_attach_mutex
wq_mayday_lock
manager_wait
workqueues
workqueue_freezing
wq_online_cpumask
wq_unbound_cpumask
wq_requested_unbound_cpumask
wq_isolated_cpumask
wq_cmdline_cpumask
wq_rr_cpu_last
wq_debug_force_rr_cpu
bh_pool_irq_works
bh_worker_pools
cpu_worker_pools
worker_pool_idr
unbound_pool_hash
unbound_std_wq_attrs
ordered_wq_attrs
pwq_release_worker
system_wq
system_highpri_wq
system_long_wq
system_unbound_wq
system_freezable_wq
system_power_efficient_wq
system_freezable_power_efficient_wq
system_bh_wq
system_bh_highpri_wq
work_debug_descr
work_debug_hint
work_is_static_object
work_fixup_init
work_fixup_free
work_debug_descr
debug_work_activate
debug_work_deactivate
__init_work
destroy_work_on_stack
destroy_delayed_work_on_stack
worker_pool_assign_id
unbound_pwq_slot
unbound_pwq
unbound_effective_cpumask
work_color_to_flags
get_work_color
work_next_color
pool_offq_flags
set_work_data
set_work_pwq
set_work_pool_and_keep_pending
set_work_pool_and_clear_pending
work_struct_pwq
get_work_pwq
get_work_pool
shift_and_mask
work_offqd_unpack
work_offqd_pack_flags
need_more_worker
may_start_working
keep_working
need_to_create_worker
too_many_workers
worker_set_flags
worker_clr_flags
first_idle_worker
worker_enter_idle
worker_leave_idle
find_worker_executing_work
move_linked_works
assign_work
bh_pool_irq_work
kick_bh_pool
kick_pool
wci_ent
wci_ents
wci_nr_ents
wci_lock
wci_hash
wci_find_ent
wq_cpu_intensive_report
wq_worker_running
wq_worker_sleeping
wq_worker_tick
wq_worker_last_func
wq_node_nr_active
wq_update_node_max_active
get_pwq
put_pwq
put_pwq_unlocked
pwq_is_empty
__pwq_activate_work
tryinc_node_nr_active
pwq_tryinc_nr_active
pwq_activate_first_inactive
unplug_oldest_pwq
node_activate_pending_pwq
pwq_dec_nr_active
pwq_dec_nr_in_flight
try_to_grab_pending
work_grab_pending
insert_work
is_chained_work
wq_select_unbound_cpu
__queue_work
clear_pending_if_disabled
queue_work_on
select_numa_node_cpu
queue_work_node
delayed_work_timer_fn
__queue_delayed_work
queue_delayed_work_on
mod_delayed_work_on
rcu_work_rcufn
queue_rcu_work
alloc_worker
pool_allowed_cpus
worker_attach_to_pool
unbind_worker
detach_worker
worker_detach_from_pool
format_worker_id
create_worker
detach_dying_workers
reap_dying_workers
set_worker_dying
idle_worker_timeout
idle_cull_fn
send_mayday
pool_mayday_timeout
maybe_create_worker
manage_workers
process_one_work
process_scheduled_works
set_pf_worker
worker_thread
rescuer_thread
bh_worker
workqueue_softirq_action
wq_drain_dead_softirq_work
drain_dead_softirq_workfn
workqueue_softirq_dead
check_flush_dependency
wq_barrier
wq_barrier_func
insert_wq_barrier
flush_workqueue_prep_pwqs
touch_wq_lockdep_map
touch_work_lockdep_map
__flush_workqueue
drain_workqueue
start_flush_work
__flush_work
flush_work
flush_delayed_work
flush_rcu_work
work_offqd_disable
work_offqd_enable
__cancel_work
__cancel_work_sync
cancel_work
cancel_work_sync
cancel_delayed_work
cancel_delayed_work_sync
disable_work
disable_work_sync
enable_work
disable_delayed_work
disable_delayed_work_sync
enable_delayed_work
schedule_on_each_cpu
execute_in_process_context
free_workqueue_attrs
alloc_workqueue_attrs
copy_workqueue_attrs
wqattrs_clear_for_pool
wqattrs_hash
wqattrs_equal
wqattrs_actualize_cpumask
wqattrs_pod_type
init_worker_pool
wq_init_lockdep
wq_unregister_lockdep
wq_free_lockdep
free_node_nr_active
init_node_nr_active
alloc_node_nr_active
rcu_free_wq
rcu_free_pool
put_unbound_pool
get_unbound_pool
pwq_release_workfn
init_pwq
link_pwq
alloc_unbound_pwq
apply_wqattrs_lock
apply_wqattrs_unlock
wq_calc_pod_cpumask
install_unbound_pwq
apply_wqattrs_ctx
apply_wqattrs_cleanup
apply_wqattrs_prepare
apply_wqattrs_commit
apply_workqueue_attrs_locked
apply_workqueue_attrs
unbound_wq_update_pwq
alloc_and_link_pwqs
wq_clamp_max_active
init_rescuer
wq_adjust_max_active
__alloc_workqueue
alloc_workqueue
alloc_workqueue_lockdep_map
pwq_busy
destroy_workqueue
workqueue_set_max_active
workqueue_set_min_active
current_work
current_is_workqueue_rescuer
workqueue_congested
work_busy
set_worker_desc
print_worker_info
pr_cont_pool_info
pr_cont_worker_id
pr_cont_work_struct
pr_cont_work_flush
pr_cont_work
show_pwq
show_one_workqueue
show_one_worker_pool
show_all_workqueues
show_freezable_workqueues
wq_worker_comm
unbind_workers
rebind_workers
restore_unbound_workers_cpumask
workqueue_prepare_cpu
workqueue_online_cpu
workqueue_offline_cpu
work_for_cpu
work_for_cpu_fn
work_on_cpu_key
work_on_cpu_safe_key
freeze_workqueues_begin
freeze_workqueues_busy
thaw_workqueues
workqueue_apply_unbound_cpumask
workqueue_unbound_exclude_cpumask
parse_affn_scope
wq_affn_dfl_set
wq_affn_dfl_get
wq_affn_dfl_ops
wq_device
dev_to_wq
per_cpu_show
max_active_show
max_active_store
wq_sysfs_attrs
wq_nice_show
wq_sysfs_prep_attrs
wq_nice_store
wq_cpumask_show
wq_cpumask_store
wq_affn_scope_show
wq_affn_scope_store
wq_affinity_strict_show
wq_affinity_strict_store
wq_sysfs_unbound_attrs
wq_subsys
workqueue_set_unbound_cpumask
__wq_cpumask_show
cpumask_requested_show
cpumask_isolated_show
cpumask_show
cpumask_store
wq_sysfs_cpumask_attrs
wq_sysfs_init
wq_device_release
workqueue_sysfs_register
workqueue_sysfs_unregister
wq_watchdog_thresh
wq_watchdog_timer
wq_watchdog_touched
wq_watchdog_touched_cpu
wq_panic_on_stall
show_cpu_pool_hog
show_cpu_pools_hogs
panic_on_wq_watchdog
wq_watchdog_reset_touched
wq_watchdog_timer_fn
wq_watchdog_touch
wq_watchdog_set_thresh
wq_watchdog_param_set_thresh
wq_watchdog_thresh_ops
wq_watchdog_init
bh_pool_kick_normal
bh_pool_kick_highpri
restrict_unbound_cpumask
init_cpu_worker_pool
workqueue_init_early
wq_cpu_intensive_thresh_init
workqueue_init
init_pod_type
cpus_dont_share
cpus_share_smt
cpus_share_numa
workqueue_init_topology
__warn_flushing_systemwide_wq

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of linux/kernel/workqueue.c