1 | // SPDX-License-Identifier: GPL-2.0-only |
---|---|
2 | /* |
3 | * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra |
4 | * |
5 | * Provides a framework for enqueueing and running callbacks from hardirq |
6 | * context. The enqueueing is NMI-safe. |
7 | */ |
8 | |
9 | #include <linux/bug.h> |
10 | #include <linux/kernel.h> |
11 | #include <linux/export.h> |
12 | #include <linux/irq_work.h> |
13 | #include <linux/percpu.h> |
14 | #include <linux/hardirq.h> |
15 | #include <linux/irqflags.h> |
16 | #include <linux/sched.h> |
17 | #include <linux/tick.h> |
18 | #include <linux/cpu.h> |
19 | #include <linux/notifier.h> |
20 | #include <linux/smp.h> |
21 | #include <linux/smpboot.h> |
22 | #include <asm/processor.h> |
23 | #include <linux/kasan.h> |
24 | |
25 | #include <trace/events/ipi.h> |
26 | |
27 | static DEFINE_PER_CPU(struct llist_head, raised_list); |
28 | static DEFINE_PER_CPU(struct llist_head, lazy_list); |
29 | static DEFINE_PER_CPU(struct task_struct *, irq_workd); |
30 | |
31 | static void wake_irq_workd(void) |
32 | { |
33 | struct task_struct *tsk = __this_cpu_read(irq_workd); |
34 | |
35 | if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk) |
36 | wake_up_process(tsk); |
37 | } |
38 | |
39 | #ifdef CONFIG_SMP |
40 | static void irq_work_wake(struct irq_work *entry) |
41 | { |
42 | wake_irq_workd(); |
43 | } |
44 | |
45 | static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) = |
46 | IRQ_WORK_INIT_HARD(irq_work_wake); |
47 | #endif |
48 | |
49 | static int irq_workd_should_run(unsigned int cpu) |
50 | { |
51 | return !llist_empty(this_cpu_ptr(&lazy_list)); |
52 | } |
53 | |
54 | /* |
55 | * Claim the entry so that no one else will poke at it. |
56 | */ |
57 | static bool irq_work_claim(struct irq_work *work) |
58 | { |
59 | int oflags; |
60 | |
61 | oflags = atomic_fetch_or(i: IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, v: &work->node.a_flags); |
62 | /* |
63 | * If the work is already pending, no need to raise the IPI. |
64 | * The pairing smp_mb() in irq_work_single() makes sure |
65 | * everything we did before is visible. |
66 | */ |
67 | if (oflags & IRQ_WORK_PENDING) |
68 | return false; |
69 | return true; |
70 | } |
71 | |
72 | void __weak arch_irq_work_raise(void) |
73 | { |
74 | /* |
75 | * Lame architectures will get the timer tick callback |
76 | */ |
77 | } |
78 | |
79 | static __always_inline void irq_work_raise(struct irq_work *work) |
80 | { |
81 | if (trace_ipi_send_cpu_enabled() && arch_irq_work_has_interrupt()) |
82 | trace_ipi_send_cpu(smp_processor_id(), _RET_IP_, callback: work->func); |
83 | |
84 | arch_irq_work_raise(); |
85 | } |
86 | |
87 | /* Enqueue on current CPU, work must already be claimed and preempt disabled */ |
88 | static void __irq_work_queue_local(struct irq_work *work) |
89 | { |
90 | struct llist_head *list; |
91 | bool rt_lazy_work = false; |
92 | bool lazy_work = false; |
93 | int work_flags; |
94 | |
95 | work_flags = atomic_read(v: &work->node.a_flags); |
96 | if (work_flags & IRQ_WORK_LAZY) |
97 | lazy_work = true; |
98 | else if (IS_ENABLED(CONFIG_PREEMPT_RT) && |
99 | !(work_flags & IRQ_WORK_HARD_IRQ)) |
100 | rt_lazy_work = true; |
101 | |
102 | if (lazy_work || rt_lazy_work) |
103 | list = this_cpu_ptr(&lazy_list); |
104 | else |
105 | list = this_cpu_ptr(&raised_list); |
106 | |
107 | if (!llist_add(new: &work->node.llist, head: list)) |
108 | return; |
109 | |
110 | /* If the work is "lazy", handle it from next tick if any */ |
111 | if (!lazy_work || tick_nohz_tick_stopped()) |
112 | irq_work_raise(work); |
113 | } |
114 | |
115 | /* Enqueue the irq work @work on the current CPU */ |
116 | bool irq_work_queue(struct irq_work *work) |
117 | { |
118 | /* Only queue if not already pending */ |
119 | if (!irq_work_claim(work)) |
120 | return false; |
121 | |
122 | /* Queue the entry and raise the IPI if needed. */ |
123 | preempt_disable(); |
124 | __irq_work_queue_local(work); |
125 | preempt_enable(); |
126 | |
127 | return true; |
128 | } |
129 | EXPORT_SYMBOL_GPL(irq_work_queue); |
130 | |
131 | /* |
132 | * Enqueue the irq_work @work on @cpu unless it's already pending |
133 | * somewhere. |
134 | * |
135 | * Can be re-enqueued while the callback is still in progress. |
136 | */ |
137 | bool irq_work_queue_on(struct irq_work *work, int cpu) |
138 | { |
139 | #ifndef CONFIG_SMP |
140 | return irq_work_queue(work); |
141 | |
142 | #else /* CONFIG_SMP: */ |
143 | /* All work should have been flushed before going offline */ |
144 | WARN_ON_ONCE(cpu_is_offline(cpu)); |
145 | |
146 | /* Only queue if not already pending */ |
147 | if (!irq_work_claim(work)) |
148 | return false; |
149 | |
150 | kasan_record_aux_stack_noalloc(ptr: work); |
151 | |
152 | preempt_disable(); |
153 | if (cpu != smp_processor_id()) { |
154 | /* Arch remote IPI send/receive backend aren't NMI safe */ |
155 | WARN_ON_ONCE(in_nmi()); |
156 | |
157 | /* |
158 | * On PREEMPT_RT the items which are not marked as |
159 | * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work |
160 | * item is used on the remote CPU to wake the thread. |
161 | */ |
162 | if (IS_ENABLED(CONFIG_PREEMPT_RT) && |
163 | !(atomic_read(v: &work->node.a_flags) & IRQ_WORK_HARD_IRQ)) { |
164 | |
165 | if (!llist_add(new: &work->node.llist, head: &per_cpu(lazy_list, cpu))) |
166 | goto out; |
167 | |
168 | work = &per_cpu(irq_work_wakeup, cpu); |
169 | if (!irq_work_claim(work)) |
170 | goto out; |
171 | } |
172 | |
173 | __smp_call_single_queue(cpu, node: &work->node.llist); |
174 | } else { |
175 | __irq_work_queue_local(work); |
176 | } |
177 | out: |
178 | preempt_enable(); |
179 | |
180 | return true; |
181 | #endif /* CONFIG_SMP */ |
182 | } |
183 | |
184 | bool irq_work_needs_cpu(void) |
185 | { |
186 | struct llist_head *raised, *lazy; |
187 | |
188 | raised = this_cpu_ptr(&raised_list); |
189 | lazy = this_cpu_ptr(&lazy_list); |
190 | |
191 | if (llist_empty(head: raised) || arch_irq_work_has_interrupt()) |
192 | if (llist_empty(head: lazy)) |
193 | return false; |
194 | |
195 | /* All work should have been flushed before going offline */ |
196 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); |
197 | |
198 | return true; |
199 | } |
200 | |
201 | void irq_work_single(void *arg) |
202 | { |
203 | struct irq_work *work = arg; |
204 | int flags; |
205 | |
206 | /* |
207 | * Clear the PENDING bit, after this point the @work can be re-used. |
208 | * The PENDING bit acts as a lock, and we own it, so we can clear it |
209 | * without atomic ops. |
210 | */ |
211 | flags = atomic_read(v: &work->node.a_flags); |
212 | flags &= ~IRQ_WORK_PENDING; |
213 | atomic_set(v: &work->node.a_flags, i: flags); |
214 | |
215 | /* |
216 | * See irq_work_claim(). |
217 | */ |
218 | smp_mb(); |
219 | |
220 | lockdep_irq_work_enter(flags); |
221 | work->func(work); |
222 | lockdep_irq_work_exit(flags); |
223 | |
224 | /* |
225 | * Clear the BUSY bit, if set, and return to the free state if no-one |
226 | * else claimed it meanwhile. |
227 | */ |
228 | (void)atomic_cmpxchg(v: &work->node.a_flags, old: flags, new: flags & ~IRQ_WORK_BUSY); |
229 | |
230 | if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || |
231 | !arch_irq_work_has_interrupt()) |
232 | rcuwait_wake_up(w: &work->irqwait); |
233 | } |
234 | |
235 | static void irq_work_run_list(struct llist_head *list) |
236 | { |
237 | struct irq_work *work, *tmp; |
238 | struct llist_node *llnode; |
239 | |
240 | /* |
241 | * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed |
242 | * in a per-CPU thread in preemptible context. Only the items which are |
243 | * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context. |
244 | */ |
245 | BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT)); |
246 | |
247 | if (llist_empty(head: list)) |
248 | return; |
249 | |
250 | llnode = llist_del_all(head: list); |
251 | llist_for_each_entry_safe(work, tmp, llnode, node.llist) |
252 | irq_work_single(arg: work); |
253 | } |
254 | |
255 | /* |
256 | * hotplug calls this through: |
257 | * hotplug_cfd() -> flush_smp_call_function_queue() |
258 | */ |
259 | void irq_work_run(void) |
260 | { |
261 | irq_work_run_list(this_cpu_ptr(&raised_list)); |
262 | if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
263 | irq_work_run_list(this_cpu_ptr(&lazy_list)); |
264 | else |
265 | wake_irq_workd(); |
266 | } |
267 | EXPORT_SYMBOL_GPL(irq_work_run); |
268 | |
269 | void irq_work_tick(void) |
270 | { |
271 | struct llist_head *raised = this_cpu_ptr(&raised_list); |
272 | |
273 | if (!llist_empty(head: raised) && !arch_irq_work_has_interrupt()) |
274 | irq_work_run_list(list: raised); |
275 | |
276 | if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
277 | irq_work_run_list(this_cpu_ptr(&lazy_list)); |
278 | else |
279 | wake_irq_workd(); |
280 | } |
281 | |
282 | /* |
283 | * Synchronize against the irq_work @entry, ensures the entry is not |
284 | * currently in use. |
285 | */ |
286 | void irq_work_sync(struct irq_work *work) |
287 | { |
288 | lockdep_assert_irqs_enabled(); |
289 | might_sleep(); |
290 | |
291 | if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || |
292 | !arch_irq_work_has_interrupt()) { |
293 | rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work), |
294 | TASK_UNINTERRUPTIBLE); |
295 | return; |
296 | } |
297 | |
298 | while (irq_work_is_busy(work)) |
299 | cpu_relax(); |
300 | } |
301 | EXPORT_SYMBOL_GPL(irq_work_sync); |
302 | |
303 | static void run_irq_workd(unsigned int cpu) |
304 | { |
305 | irq_work_run_list(this_cpu_ptr(&lazy_list)); |
306 | } |
307 | |
308 | static void irq_workd_setup(unsigned int cpu) |
309 | { |
310 | sched_set_fifo_low(current); |
311 | } |
312 | |
313 | static struct smp_hotplug_thread irqwork_threads = { |
314 | .store = &irq_workd, |
315 | .setup = irq_workd_setup, |
316 | .thread_should_run = irq_workd_should_run, |
317 | .thread_fn = run_irq_workd, |
318 | .thread_comm = "irq_work/%u", |
319 | }; |
320 | |
321 | static __init int irq_work_init_threads(void) |
322 | { |
323 | if (IS_ENABLED(CONFIG_PREEMPT_RT)) |
324 | BUG_ON(smpboot_register_percpu_thread(&irqwork_threads)); |
325 | return 0; |
326 | } |
327 | early_initcall(irq_work_init_threads); |
328 |
Definitions
- raised_list
- lazy_list
- irq_workd
- wake_irq_workd
- irq_work_wake
- irq_work_wakeup
- irq_workd_should_run
- irq_work_claim
- arch_irq_work_raise
- irq_work_raise
- __irq_work_queue_local
- irq_work_queue
- irq_work_queue_on
- irq_work_needs_cpu
- irq_work_single
- irq_work_run_list
- irq_work_run
- irq_work_tick
- irq_work_sync
- run_irq_workd
- irq_workd_setup
- irqwork_threads
Improve your Profiling and Debugging skills
Find out more