1 | // SPDX-License-Identifier: GPL-2.0+ |
---|---|
2 | /* |
3 | * RCU CPU stall warnings for normal RCU grace periods |
4 | * |
5 | * Copyright IBM Corporation, 2019 |
6 | * |
7 | * Author: Paul E. McKenney <paulmck@linux.ibm.com> |
8 | */ |
9 | |
10 | #include <linux/console.h> |
11 | #include <linux/kvm_para.h> |
12 | #include <linux/rcu_notifier.h> |
13 | #include <linux/smp.h> |
14 | |
15 | ////////////////////////////////////////////////////////////////////////////// |
16 | // |
17 | // Controlling CPU stall warnings, including delay calculation. |
18 | |
19 | /* panic() on RCU Stall sysctl. */ |
20 | int sysctl_panic_on_rcu_stall __read_mostly; |
21 | int sysctl_max_rcu_stall_to_panic __read_mostly; |
22 | |
23 | #ifdef CONFIG_SYSFS |
24 | |
25 | static unsigned int rcu_stall_count; |
26 | |
27 | static ssize_t rcu_stall_count_show(struct kobject *kobj, struct kobj_attribute *attr, |
28 | char *page) |
29 | { |
30 | return sysfs_emit(buf: page, fmt: "%u\n", rcu_stall_count); |
31 | } |
32 | |
33 | static struct kobj_attribute rcu_stall_count_attr = __ATTR_RO(rcu_stall_count); |
34 | |
35 | static __init int kernel_rcu_stall_sysfs_init(void) |
36 | { |
37 | sysfs_add_file_to_group(kobj: kernel_kobj, attr: &rcu_stall_count_attr.attr, NULL); |
38 | return 0; |
39 | } |
40 | |
41 | late_initcall(kernel_rcu_stall_sysfs_init); |
42 | |
43 | #endif // CONFIG_SYSFS |
44 | |
45 | #ifdef CONFIG_PROVE_RCU |
46 | #define RCU_STALL_DELAY_DELTA (5 * HZ) |
47 | #else |
48 | #define RCU_STALL_DELAY_DELTA 0 |
49 | #endif |
50 | #define RCU_STALL_MIGHT_DIV 8 |
51 | #define RCU_STALL_MIGHT_MIN (2 * HZ) |
52 | |
53 | int rcu_exp_jiffies_till_stall_check(void) |
54 | { |
55 | int cpu_stall_timeout = READ_ONCE(rcu_exp_cpu_stall_timeout); |
56 | int exp_stall_delay_delta = 0; |
57 | int till_stall_check; |
58 | |
59 | // Zero says to use rcu_cpu_stall_timeout, but in milliseconds. |
60 | if (!cpu_stall_timeout) |
61 | cpu_stall_timeout = jiffies_to_msecs(j: rcu_jiffies_till_stall_check()); |
62 | |
63 | // Limit check must be consistent with the Kconfig limits for |
64 | // CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range. |
65 | // The minimum clamped value is "2UL", because at least one full |
66 | // tick has to be guaranteed. |
67 | till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 300UL * HZ); |
68 | |
69 | if (cpu_stall_timeout && jiffies_to_msecs(j: till_stall_check) != cpu_stall_timeout) |
70 | WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check)); |
71 | |
72 | #ifdef CONFIG_PROVE_RCU |
73 | /* Add extra ~25% out of till_stall_check. */ |
74 | exp_stall_delay_delta = ((till_stall_check * 25) / 100) + 1; |
75 | #endif |
76 | |
77 | return till_stall_check + exp_stall_delay_delta; |
78 | } |
79 | EXPORT_SYMBOL_GPL(rcu_exp_jiffies_till_stall_check); |
80 | |
81 | /* Limit-check stall timeouts specified at boottime and runtime. */ |
82 | int rcu_jiffies_till_stall_check(void) |
83 | { |
84 | int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout); |
85 | |
86 | /* |
87 | * Limit check must be consistent with the Kconfig limits |
88 | * for CONFIG_RCU_CPU_STALL_TIMEOUT. |
89 | */ |
90 | if (till_stall_check < 3) { |
91 | WRITE_ONCE(rcu_cpu_stall_timeout, 3); |
92 | till_stall_check = 3; |
93 | } else if (till_stall_check > 300) { |
94 | WRITE_ONCE(rcu_cpu_stall_timeout, 300); |
95 | till_stall_check = 300; |
96 | } |
97 | return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; |
98 | } |
99 | EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check); |
100 | |
101 | /* Don't do RCU CPU stall warnings during long sysrq printouts. */ |
102 | void rcu_sysrq_start(void) |
103 | { |
104 | if (!rcu_cpu_stall_suppress) |
105 | rcu_cpu_stall_suppress = 2; |
106 | } |
107 | |
108 | void rcu_sysrq_end(void) |
109 | { |
110 | if (rcu_cpu_stall_suppress == 2) |
111 | rcu_cpu_stall_suppress = 0; |
112 | } |
113 | |
114 | /* Don't print RCU CPU stall warnings during a kernel panic. */ |
115 | static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) |
116 | { |
117 | rcu_cpu_stall_suppress = 1; |
118 | return NOTIFY_DONE; |
119 | } |
120 | |
121 | static struct notifier_block rcu_panic_block = { |
122 | .notifier_call = rcu_panic, |
123 | }; |
124 | |
125 | static int __init check_cpu_stall_init(void) |
126 | { |
127 | atomic_notifier_chain_register(nh: &panic_notifier_list, nb: &rcu_panic_block); |
128 | return 0; |
129 | } |
130 | early_initcall(check_cpu_stall_init); |
131 | |
132 | /* If so specified via sysctl, panic, yielding cleaner stall-warning output. */ |
133 | static void panic_on_rcu_stall(void) |
134 | { |
135 | static int cpu_stall; |
136 | |
137 | if (++cpu_stall < sysctl_max_rcu_stall_to_panic) |
138 | return; |
139 | |
140 | if (sysctl_panic_on_rcu_stall) |
141 | panic(fmt: "RCU Stall\n"); |
142 | } |
143 | |
144 | /** |
145 | * rcu_cpu_stall_reset - restart stall-warning timeout for current grace period |
146 | * |
147 | * To perform the reset request from the caller, disable stall detection until |
148 | * 3 fqs loops have passed. This is required to ensure a fresh jiffies is |
149 | * loaded. It should be safe to do from the fqs loop as enough timer |
150 | * interrupts and context switches should have passed. |
151 | * |
152 | * The caller must disable hard irqs. |
153 | */ |
154 | void rcu_cpu_stall_reset(void) |
155 | { |
156 | WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 3); |
157 | WRITE_ONCE(rcu_state.jiffies_stall, ULONG_MAX); |
158 | } |
159 | |
160 | ////////////////////////////////////////////////////////////////////////////// |
161 | // |
162 | // Interaction with RCU grace periods |
163 | |
164 | /* Start of new grace period, so record stall time (and forcing times). */ |
165 | static void record_gp_stall_check_time(void) |
166 | { |
167 | unsigned long j = jiffies; |
168 | unsigned long j1; |
169 | |
170 | WRITE_ONCE(rcu_state.gp_start, j); |
171 | j1 = rcu_jiffies_till_stall_check(); |
172 | smp_mb(); // ->gp_start before ->jiffies_stall and caller's ->gp_seq. |
173 | WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 0); |
174 | WRITE_ONCE(rcu_state.jiffies_stall, j + j1); |
175 | rcu_state.jiffies_resched = j + j1 / 2; |
176 | rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs); |
177 | } |
178 | |
179 | /* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */ |
180 | static void zero_cpu_stall_ticks(struct rcu_data *rdp) |
181 | { |
182 | rdp->ticks_this_gp = 0; |
183 | rdp->softirq_snap = kstat_softirqs_cpu(irq: RCU_SOFTIRQ, smp_processor_id()); |
184 | WRITE_ONCE(rdp->last_fqs_resched, jiffies); |
185 | } |
186 | |
187 | /* |
188 | * If too much time has passed in the current grace period, and if |
189 | * so configured, go kick the relevant kthreads. |
190 | */ |
191 | static void rcu_stall_kick_kthreads(void) |
192 | { |
193 | unsigned long j; |
194 | |
195 | if (!READ_ONCE(rcu_kick_kthreads)) |
196 | return; |
197 | j = READ_ONCE(rcu_state.jiffies_kick_kthreads); |
198 | if (time_after(jiffies, j) && rcu_state.gp_kthread && |
199 | (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) { |
200 | WARN_ONCE(1, "Kicking %s grace-period kthread\n", |
201 | rcu_state.name); |
202 | rcu_ftrace_dump(DUMP_ALL); |
203 | wake_up_process(tsk: rcu_state.gp_kthread); |
204 | WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ); |
205 | } |
206 | } |
207 | |
208 | /* |
209 | * Handler for the irq_work request posted about halfway into the RCU CPU |
210 | * stall timeout, and used to detect excessive irq disabling. Set state |
211 | * appropriately, but just complain if there is unexpected state on entry. |
212 | */ |
213 | static void rcu_iw_handler(struct irq_work *iwp) |
214 | { |
215 | struct rcu_data *rdp; |
216 | struct rcu_node *rnp; |
217 | |
218 | rdp = container_of(iwp, struct rcu_data, rcu_iw); |
219 | rnp = rdp->mynode; |
220 | raw_spin_lock_rcu_node(rnp); |
221 | if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) { |
222 | rdp->rcu_iw_gp_seq = rnp->gp_seq; |
223 | rdp->rcu_iw_pending = false; |
224 | } |
225 | raw_spin_unlock_rcu_node(rnp); |
226 | } |
227 | |
228 | ////////////////////////////////////////////////////////////////////////////// |
229 | // |
230 | // Printing RCU CPU stall warnings |
231 | |
232 | #ifdef CONFIG_PREEMPT_RCU |
233 | |
234 | /* |
235 | * Dump detailed information for all tasks blocking the current RCU |
236 | * grace period on the specified rcu_node structure. |
237 | */ |
238 | static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) |
239 | { |
240 | unsigned long flags; |
241 | struct task_struct *t; |
242 | |
243 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
244 | if (!rcu_preempt_blocked_readers_cgp(rnp)) { |
245 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
246 | return; |
247 | } |
248 | t = list_entry(rnp->gp_tasks->prev, |
249 | struct task_struct, rcu_node_entry); |
250 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { |
251 | /* |
252 | * We could be printing a lot while holding a spinlock. |
253 | * Avoid triggering hard lockup. |
254 | */ |
255 | touch_nmi_watchdog(); |
256 | sched_show_task(p: t); |
257 | } |
258 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
259 | } |
260 | |
261 | // Communicate task state back to the RCU CPU stall warning request. |
262 | struct rcu_stall_chk_rdr { |
263 | int nesting; |
264 | union rcu_special rs; |
265 | bool on_blkd_list; |
266 | }; |
267 | |
268 | /* |
269 | * Report out the state of a not-running task that is stalling the |
270 | * current RCU grace period. |
271 | */ |
272 | static int check_slow_task(struct task_struct *t, void *arg) |
273 | { |
274 | struct rcu_stall_chk_rdr *rscrp = arg; |
275 | |
276 | if (task_curr(p: t)) |
277 | return -EBUSY; // It is running, so decline to inspect it. |
278 | rscrp->nesting = t->rcu_read_lock_nesting; |
279 | rscrp->rs = t->rcu_read_unlock_special; |
280 | rscrp->on_blkd_list = !list_empty(head: &t->rcu_node_entry); |
281 | return 0; |
282 | } |
283 | |
284 | /* |
285 | * Scan the current list of tasks blocked within RCU read-side critical |
286 | * sections, printing out the tid of each of the first few of them. |
287 | */ |
288 | static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) |
289 | __releases(rnp->lock) |
290 | { |
291 | int i = 0; |
292 | int ndetected = 0; |
293 | struct rcu_stall_chk_rdr rscr; |
294 | struct task_struct *t; |
295 | struct task_struct *ts[8]; |
296 | |
297 | lockdep_assert_irqs_disabled(); |
298 | if (!rcu_preempt_blocked_readers_cgp(rnp)) { |
299 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
300 | return 0; |
301 | } |
302 | pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", |
303 | rnp->level, rnp->grplo, rnp->grphi); |
304 | t = list_entry(rnp->gp_tasks->prev, |
305 | struct task_struct, rcu_node_entry); |
306 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { |
307 | get_task_struct(t); |
308 | ts[i++] = t; |
309 | if (i >= ARRAY_SIZE(ts)) |
310 | break; |
311 | } |
312 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
313 | while (i) { |
314 | t = ts[--i]; |
315 | if (task_call_func(p: t, func: check_slow_task, arg: &rscr)) |
316 | pr_cont(" P%d", t->pid); |
317 | else |
318 | pr_cont(" P%d/%d:%c%c%c%c", |
319 | t->pid, rscr.nesting, |
320 | ".b"[rscr.rs.b.blocked], |
321 | ".q"[rscr.rs.b.need_qs], |
322 | ".e"[rscr.rs.b.exp_hint], |
323 | ".l"[rscr.on_blkd_list]); |
324 | lockdep_assert_irqs_disabled(); |
325 | put_task_struct(t); |
326 | ndetected++; |
327 | } |
328 | pr_cont("\n"); |
329 | return ndetected; |
330 | } |
331 | |
332 | #else /* #ifdef CONFIG_PREEMPT_RCU */ |
333 | |
334 | /* |
335 | * Because preemptible RCU does not exist, we never have to check for |
336 | * tasks blocked within RCU read-side critical sections. |
337 | */ |
338 | static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) |
339 | { |
340 | } |
341 | |
342 | /* |
343 | * Because preemptible RCU does not exist, we never have to check for |
344 | * tasks blocked within RCU read-side critical sections. |
345 | */ |
346 | static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) |
347 | __releases(rnp->lock) |
348 | { |
349 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
350 | return 0; |
351 | } |
352 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ |
353 | |
354 | /* |
355 | * Dump stacks of all tasks running on stalled CPUs. First try using |
356 | * NMIs, but fall back to manual remote stack tracing on architectures |
357 | * that don't support NMI-based stack dumps. The NMI-triggered stack |
358 | * traces are more accurate because they are printed by the target CPU. |
359 | */ |
360 | static void rcu_dump_cpu_stacks(unsigned long gp_seq) |
361 | { |
362 | int cpu; |
363 | unsigned long flags; |
364 | struct rcu_node *rnp; |
365 | |
366 | rcu_for_each_leaf_node(rnp) { |
367 | printk_deferred_enter(); |
368 | for_each_leaf_node_possible_cpu(rnp, cpu) { |
369 | if (gp_seq != data_race(rcu_state.gp_seq)) { |
370 | printk_deferred_exit(); |
371 | pr_err("INFO: Stall ended during stack backtracing.\n"); |
372 | return; |
373 | } |
374 | if (!(data_race(rnp->qsmask) & leaf_node_cpu_bit(rnp, cpu))) |
375 | continue; |
376 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
377 | if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { |
378 | if (cpu_is_offline(cpu)) |
379 | pr_err("Offline CPU %d blocking current GP.\n", cpu); |
380 | else |
381 | dump_cpu_task(cpu); |
382 | } |
383 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
384 | } |
385 | printk_deferred_exit(); |
386 | } |
387 | } |
388 | |
389 | static const char * const gp_state_names[] = { |
390 | [RCU_GP_IDLE] = "RCU_GP_IDLE", |
391 | [RCU_GP_WAIT_GPS] = "RCU_GP_WAIT_GPS", |
392 | [RCU_GP_DONE_GPS] = "RCU_GP_DONE_GPS", |
393 | [RCU_GP_ONOFF] = "RCU_GP_ONOFF", |
394 | [RCU_GP_INIT] = "RCU_GP_INIT", |
395 | [RCU_GP_WAIT_FQS] = "RCU_GP_WAIT_FQS", |
396 | [RCU_GP_DOING_FQS] = "RCU_GP_DOING_FQS", |
397 | [RCU_GP_CLEANUP] = "RCU_GP_CLEANUP", |
398 | [RCU_GP_CLEANED] = "RCU_GP_CLEANED", |
399 | }; |
400 | |
401 | /* |
402 | * Convert a ->gp_state value to a character string. |
403 | */ |
404 | static const char *gp_state_getname(short gs) |
405 | { |
406 | if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names)) |
407 | return "???"; |
408 | return gp_state_names[gs]; |
409 | } |
410 | |
411 | /* Is the RCU grace-period kthread being starved of CPU time? */ |
412 | static bool rcu_is_gp_kthread_starving(unsigned long *jp) |
413 | { |
414 | unsigned long j = jiffies - READ_ONCE(rcu_state.gp_activity); |
415 | |
416 | if (jp) |
417 | *jp = j; |
418 | return j > 2 * HZ; |
419 | } |
420 | |
421 | static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp) |
422 | { |
423 | int cpu; |
424 | struct task_struct *rcuc; |
425 | unsigned long j; |
426 | |
427 | rcuc = rdp->rcu_cpu_kthread_task; |
428 | if (!rcuc) |
429 | return false; |
430 | |
431 | cpu = task_cpu(p: rcuc); |
432 | if (cpu_is_offline(cpu) || idle_cpu(cpu)) |
433 | return false; |
434 | |
435 | j = jiffies - READ_ONCE(rdp->rcuc_activity); |
436 | |
437 | if (jp) |
438 | *jp = j; |
439 | return j > 2 * HZ; |
440 | } |
441 | |
442 | static void print_cpu_stat_info(int cpu) |
443 | { |
444 | struct rcu_snap_record rsr, *rsrp; |
445 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
446 | struct kernel_cpustat *kcsp = &kcpustat_cpu(cpu); |
447 | |
448 | if (!rcu_cpu_stall_cputime) |
449 | return; |
450 | |
451 | rsrp = &rdp->snap_record; |
452 | if (rsrp->gp_seq != rdp->gp_seq) |
453 | return; |
454 | |
455 | rsr.cputime_irq = kcpustat_field(kcpustat: kcsp, usage: CPUTIME_IRQ, cpu); |
456 | rsr.cputime_softirq = kcpustat_field(kcpustat: kcsp, usage: CPUTIME_SOFTIRQ, cpu); |
457 | rsr.cputime_system = kcpustat_field(kcpustat: kcsp, usage: CPUTIME_SYSTEM, cpu); |
458 | |
459 | pr_err("\t hardirqs softirqs csw/system\n"); |
460 | pr_err("\t number: %8lld %10d %12lld\n", |
461 | kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu) - rsrp->nr_hardirqs, |
462 | kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs, |
463 | nr_context_switches_cpu(cpu) - rsrp->nr_csw); |
464 | pr_err("\tcputime: %8lld %10lld %12lld ==> %d(ms)\n", |
465 | div_u64(rsr.cputime_irq - rsrp->cputime_irq, NSEC_PER_MSEC), |
466 | div_u64(rsr.cputime_softirq - rsrp->cputime_softirq, NSEC_PER_MSEC), |
467 | div_u64(rsr.cputime_system - rsrp->cputime_system, NSEC_PER_MSEC), |
468 | jiffies_to_msecs(jiffies - rsrp->jiffies)); |
469 | } |
470 | |
471 | /* |
472 | * Print out diagnostic information for the specified stalled CPU. |
473 | * |
474 | * If the specified CPU is aware of the current RCU grace period, then |
475 | * print the number of scheduling clock interrupts the CPU has taken |
476 | * during the time that it has been aware. Otherwise, print the number |
477 | * of RCU grace periods that this CPU is ignorant of, for example, "1" |
478 | * if the CPU was aware of the previous grace period. |
479 | * |
480 | * Also print out idle info. |
481 | */ |
482 | static void print_cpu_stall_info(int cpu) |
483 | { |
484 | unsigned long delta; |
485 | bool falsepositive; |
486 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
487 | char *ticks_title; |
488 | unsigned long ticks_value; |
489 | bool rcuc_starved; |
490 | unsigned long j; |
491 | char buf[32]; |
492 | |
493 | /* |
494 | * We could be printing a lot while holding a spinlock. Avoid |
495 | * triggering hard lockup. |
496 | */ |
497 | touch_nmi_watchdog(); |
498 | |
499 | ticks_value = rcu_seq_ctr(s: rcu_state.gp_seq - rdp->gp_seq); |
500 | if (ticks_value) { |
501 | ticks_title = "GPs behind"; |
502 | } else { |
503 | ticks_title = "ticks this GP"; |
504 | ticks_value = rdp->ticks_this_gp; |
505 | } |
506 | delta = rcu_seq_ctr(s: rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq); |
507 | falsepositive = rcu_is_gp_kthread_starving(NULL) && |
508 | rcu_watching_snap_in_eqs(snap: ct_rcu_watching_cpu(cpu)); |
509 | rcuc_starved = rcu_is_rcuc_kthread_starving(rdp, jp: &j); |
510 | if (rcuc_starved) |
511 | // Print signed value, as negative values indicate a probable bug. |
512 | snprintf(buf, size: sizeof(buf), fmt: " rcuc=%ld jiffies(starved)", j); |
513 | pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%04x/%ld/%#lx softirq=%u/%u fqs=%ld%s%s\n", |
514 | cpu, |
515 | "O."[!!cpu_online(cpu)], |
516 | "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], |
517 | "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)], |
518 | !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' : |
519 | rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' : |
520 | "!."[!delta], |
521 | ticks_value, ticks_title, |
522 | ct_rcu_watching_cpu(cpu) & 0xffff, |
523 | ct_nesting_cpu(cpu), ct_nmi_nesting_cpu(cpu), |
524 | rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), |
525 | data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart, |
526 | rcuc_starved ? buf : "", |
527 | falsepositive ? " (false positive?)": ""); |
528 | |
529 | print_cpu_stat_info(cpu); |
530 | } |
531 | |
532 | /* Complain about starvation of grace-period kthread. */ |
533 | static void rcu_check_gp_kthread_starvation(void) |
534 | { |
535 | int cpu; |
536 | struct task_struct *gpk = rcu_state.gp_kthread; |
537 | unsigned long j; |
538 | |
539 | if (rcu_is_gp_kthread_starving(jp: &j)) { |
540 | cpu = gpk ? task_cpu(p: gpk) : -1; |
541 | pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x ->cpu=%d\n", |
542 | rcu_state.name, j, |
543 | (long)rcu_seq_current(&rcu_state.gp_seq), |
544 | data_race(READ_ONCE(rcu_state.gp_flags)), |
545 | gp_state_getname(rcu_state.gp_state), |
546 | data_race(READ_ONCE(rcu_state.gp_state)), |
547 | gpk ? data_race(READ_ONCE(gpk->__state)) : ~0, cpu); |
548 | if (gpk) { |
549 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
550 | |
551 | pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name); |
552 | pr_err("RCU grace-period kthread stack dump:\n"); |
553 | sched_show_task(p: gpk); |
554 | if (cpu_is_offline(cpu)) { |
555 | pr_err("RCU GP kthread last ran on offline CPU %d.\n", cpu); |
556 | } else if (!(data_race(READ_ONCE(rdp->mynode->qsmask)) & rdp->grpmask)) { |
557 | pr_err("Stack dump where RCU GP kthread last ran:\n"); |
558 | dump_cpu_task(cpu); |
559 | } |
560 | wake_up_process(tsk: gpk); |
561 | } |
562 | } |
563 | } |
564 | |
565 | /* Complain about missing wakeups from expired fqs wait timer */ |
566 | static void rcu_check_gp_kthread_expired_fqs_timer(void) |
567 | { |
568 | struct task_struct *gpk = rcu_state.gp_kthread; |
569 | short gp_state; |
570 | unsigned long jiffies_fqs; |
571 | int cpu; |
572 | |
573 | /* |
574 | * Order reads of .gp_state and .jiffies_force_qs. |
575 | * Matching smp_wmb() is present in rcu_gp_fqs_loop(). |
576 | */ |
577 | gp_state = smp_load_acquire(&rcu_state.gp_state); |
578 | jiffies_fqs = READ_ONCE(rcu_state.jiffies_force_qs); |
579 | |
580 | if (gp_state == RCU_GP_WAIT_FQS && |
581 | time_after(jiffies, jiffies_fqs + RCU_STALL_MIGHT_MIN) && |
582 | gpk && !READ_ONCE(gpk->on_rq)) { |
583 | cpu = task_cpu(p: gpk); |
584 | pr_err("%s kthread timer wakeup didn't happen for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x\n", |
585 | rcu_state.name, (jiffies - jiffies_fqs), |
586 | (long)rcu_seq_current(&rcu_state.gp_seq), |
587 | data_race(READ_ONCE(rcu_state.gp_flags)), // Diagnostic read |
588 | gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS, |
589 | data_race(READ_ONCE(gpk->__state))); |
590 | pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n", |
591 | cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu)); |
592 | } |
593 | } |
594 | |
595 | static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) |
596 | { |
597 | int cpu; |
598 | unsigned long flags; |
599 | unsigned long gpa; |
600 | unsigned long j; |
601 | int ndetected = 0; |
602 | struct rcu_node *rnp; |
603 | long totqlen = 0; |
604 | |
605 | lockdep_assert_irqs_disabled(); |
606 | |
607 | /* Kick and suppress, if so configured. */ |
608 | rcu_stall_kick_kthreads(); |
609 | if (rcu_stall_is_suppressed()) |
610 | return; |
611 | |
612 | nbcon_cpu_emergency_enter(); |
613 | |
614 | /* |
615 | * OK, time to rat on our buddy... |
616 | * See Documentation/RCU/stallwarn.rst for info on how to debug |
617 | * RCU CPU stall warnings. |
618 | */ |
619 | trace_rcu_stall_warning(rcuname: rcu_state.name, TPS("StallDetected")); |
620 | pr_err("INFO: %s detected stalls on CPUs/tasks:\n", rcu_state.name); |
621 | rcu_for_each_leaf_node(rnp) { |
622 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
623 | if (rnp->qsmask != 0) { |
624 | for_each_leaf_node_possible_cpu(rnp, cpu) |
625 | if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { |
626 | print_cpu_stall_info(cpu); |
627 | ndetected++; |
628 | } |
629 | } |
630 | ndetected += rcu_print_task_stall(rnp, flags); // Releases rnp->lock. |
631 | lockdep_assert_irqs_disabled(); |
632 | } |
633 | |
634 | for_each_possible_cpu(cpu) |
635 | totqlen += rcu_get_n_cbs_cpu(cpu); |
636 | pr_err("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu ncpus=%d)\n", |
637 | smp_processor_id(), (long)(jiffies - gps), |
638 | (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, |
639 | data_race(rcu_state.n_online_cpus)); // Diagnostic read |
640 | if (ndetected) { |
641 | rcu_dump_cpu_stacks(gp_seq); |
642 | |
643 | /* Complain about tasks blocking the grace period. */ |
644 | rcu_for_each_leaf_node(rnp) |
645 | rcu_print_detail_task_stall_rnp(rnp); |
646 | } else { |
647 | if (rcu_seq_current(sp: &rcu_state.gp_seq) != gp_seq) { |
648 | pr_err("INFO: Stall ended before state dump start\n"); |
649 | } else { |
650 | j = jiffies; |
651 | gpa = data_race(READ_ONCE(rcu_state.gp_activity)); |
652 | pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", |
653 | rcu_state.name, j - gpa, j, gpa, |
654 | data_race(READ_ONCE(jiffies_till_next_fqs)), |
655 | data_race(READ_ONCE(rcu_get_root()->qsmask))); |
656 | } |
657 | } |
658 | /* Rewrite if needed in case of slow consoles. */ |
659 | if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall))) |
660 | WRITE_ONCE(rcu_state.jiffies_stall, |
661 | jiffies + 3 * rcu_jiffies_till_stall_check() + 3); |
662 | |
663 | rcu_check_gp_kthread_expired_fqs_timer(); |
664 | rcu_check_gp_kthread_starvation(); |
665 | |
666 | nbcon_cpu_emergency_exit(); |
667 | |
668 | panic_on_rcu_stall(); |
669 | |
670 | rcu_force_quiescent_state(); /* Kick them all. */ |
671 | } |
672 | |
673 | static void print_cpu_stall(unsigned long gp_seq, unsigned long gps) |
674 | { |
675 | int cpu; |
676 | unsigned long flags; |
677 | struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
678 | struct rcu_node *rnp = rcu_get_root(); |
679 | long totqlen = 0; |
680 | |
681 | lockdep_assert_irqs_disabled(); |
682 | |
683 | /* Kick and suppress, if so configured. */ |
684 | rcu_stall_kick_kthreads(); |
685 | if (rcu_stall_is_suppressed()) |
686 | return; |
687 | |
688 | nbcon_cpu_emergency_enter(); |
689 | |
690 | /* |
691 | * OK, time to rat on ourselves... |
692 | * See Documentation/RCU/stallwarn.rst for info on how to debug |
693 | * RCU CPU stall warnings. |
694 | */ |
695 | trace_rcu_stall_warning(rcuname: rcu_state.name, TPS("SelfDetected")); |
696 | pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name); |
697 | raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); |
698 | print_cpu_stall_info(smp_processor_id()); |
699 | raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags); |
700 | for_each_possible_cpu(cpu) |
701 | totqlen += rcu_get_n_cbs_cpu(cpu); |
702 | pr_err("\t(t=%lu jiffies g=%ld q=%lu ncpus=%d)\n", |
703 | jiffies - gps, |
704 | (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, |
705 | data_race(rcu_state.n_online_cpus)); // Diagnostic read |
706 | |
707 | rcu_check_gp_kthread_expired_fqs_timer(); |
708 | rcu_check_gp_kthread_starvation(); |
709 | |
710 | rcu_dump_cpu_stacks(gp_seq); |
711 | |
712 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
713 | /* Rewrite if needed in case of slow consoles. */ |
714 | if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall))) |
715 | WRITE_ONCE(rcu_state.jiffies_stall, |
716 | jiffies + 3 * rcu_jiffies_till_stall_check() + 3); |
717 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
718 | |
719 | nbcon_cpu_emergency_exit(); |
720 | |
721 | panic_on_rcu_stall(); |
722 | |
723 | /* |
724 | * Attempt to revive the RCU machinery by forcing a context switch. |
725 | * |
726 | * A context switch would normally allow the RCU state machine to make |
727 | * progress and it could be we're stuck in kernel space without context |
728 | * switches for an entirely unreasonable amount of time. |
729 | */ |
730 | set_tsk_need_resched(current); |
731 | set_preempt_need_resched(); |
732 | } |
733 | |
734 | static bool csd_lock_suppress_rcu_stall; |
735 | module_param(csd_lock_suppress_rcu_stall, bool, 0644); |
736 | |
737 | static void check_cpu_stall(struct rcu_data *rdp) |
738 | { |
739 | bool self_detected; |
740 | unsigned long gs1; |
741 | unsigned long gs2; |
742 | unsigned long gps; |
743 | unsigned long j; |
744 | unsigned long jn; |
745 | unsigned long js; |
746 | struct rcu_node *rnp; |
747 | |
748 | lockdep_assert_irqs_disabled(); |
749 | if ((rcu_stall_is_suppressed() && !READ_ONCE(rcu_kick_kthreads)) || |
750 | !rcu_gp_in_progress()) |
751 | return; |
752 | rcu_stall_kick_kthreads(); |
753 | |
754 | /* |
755 | * Check if it was requested (via rcu_cpu_stall_reset()) that the FQS |
756 | * loop has to set jiffies to ensure a non-stale jiffies value. This |
757 | * is required to have good jiffies value after coming out of long |
758 | * breaks of jiffies updates. Not doing so can cause false positives. |
759 | */ |
760 | if (READ_ONCE(rcu_state.nr_fqs_jiffies_stall) > 0) |
761 | return; |
762 | |
763 | j = jiffies; |
764 | |
765 | /* |
766 | * Lots of memory barriers to reject false positives. |
767 | * |
768 | * The idea is to pick up rcu_state.gp_seq, then |
769 | * rcu_state.jiffies_stall, then rcu_state.gp_start, and finally |
770 | * another copy of rcu_state.gp_seq. These values are updated in |
771 | * the opposite order with memory barriers (or equivalent) during |
772 | * grace-period initialization and cleanup. Now, a false positive |
773 | * can occur if we get an new value of rcu_state.gp_start and a old |
774 | * value of rcu_state.jiffies_stall. But given the memory barriers, |
775 | * the only way that this can happen is if one grace period ends |
776 | * and another starts between these two fetches. This is detected |
777 | * by comparing the second fetch of rcu_state.gp_seq with the |
778 | * previous fetch from rcu_state.gp_seq. |
779 | * |
780 | * Given this check, comparisons of jiffies, rcu_state.jiffies_stall, |
781 | * and rcu_state.gp_start suffice to forestall false positives. |
782 | */ |
783 | gs1 = READ_ONCE(rcu_state.gp_seq); |
784 | smp_rmb(); /* Pick up ->gp_seq first... */ |
785 | js = READ_ONCE(rcu_state.jiffies_stall); |
786 | smp_rmb(); /* ...then ->jiffies_stall before the rest... */ |
787 | gps = READ_ONCE(rcu_state.gp_start); |
788 | smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */ |
789 | gs2 = READ_ONCE(rcu_state.gp_seq); |
790 | if (gs1 != gs2 || |
791 | ULONG_CMP_LT(j, js) || |
792 | ULONG_CMP_GE(gps, js) || |
793 | !rcu_seq_state(s: gs2)) |
794 | return; /* No stall or GP completed since entering function. */ |
795 | rnp = rdp->mynode; |
796 | jn = jiffies + ULONG_MAX / 2; |
797 | self_detected = READ_ONCE(rnp->qsmask) & rdp->grpmask; |
798 | if (rcu_gp_in_progress() && |
799 | (self_detected || ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) && |
800 | cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { |
801 | /* |
802 | * If a virtual machine is stopped by the host it can look to |
803 | * the watchdog like an RCU stall. Check to see if the host |
804 | * stopped the vm. |
805 | */ |
806 | if (kvm_check_and_clear_guest_paused()) |
807 | return; |
808 | |
809 | #ifdef CONFIG_SYSFS |
810 | ++rcu_stall_count; |
811 | #endif |
812 | |
813 | rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, v: (void *)j - gps); |
814 | if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) { |
815 | pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name); |
816 | } else if (self_detected) { |
817 | /* We haven't checked in, so go dump stack. */ |
818 | print_cpu_stall(gp_seq: gs2, gps); |
819 | } else { |
820 | /* They had a few time units to dump stack, so complain. */ |
821 | print_other_cpu_stall(gp_seq: gs2, gps); |
822 | } |
823 | |
824 | if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) |
825 | rcu_ftrace_dump(DUMP_ALL); |
826 | |
827 | if (READ_ONCE(rcu_state.jiffies_stall) == jn) { |
828 | jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; |
829 | WRITE_ONCE(rcu_state.jiffies_stall, jn); |
830 | } |
831 | } |
832 | } |
833 | |
834 | ////////////////////////////////////////////////////////////////////////////// |
835 | // |
836 | // RCU forward-progress mechanisms, including for callback invocation. |
837 | |
838 | |
839 | /* |
840 | * Check to see if a failure to end RCU priority inversion was due to |
841 | * a CPU not passing through a quiescent state. When this happens, there |
842 | * is nothing that RCU priority boosting can do to help, so we shouldn't |
843 | * count this as an RCU priority boosting failure. A return of true says |
844 | * RCU priority boosting is to blame, and false says otherwise. If false |
845 | * is returned, the first of the CPUs to blame is stored through cpup. |
846 | * If there was no CPU blocking the current grace period, but also nothing |
847 | * in need of being boosted, *cpup is set to -1. This can happen in case |
848 | * of vCPU preemption while the last CPU is reporting its quiscent state, |
849 | * for example. |
850 | * |
851 | * If cpup is NULL, then a lockless quick check is carried out, suitable |
852 | * for high-rate usage. On the other hand, if cpup is non-NULL, each |
853 | * rcu_node structure's ->lock is acquired, ruling out high-rate usage. |
854 | */ |
855 | bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) |
856 | { |
857 | bool atb = false; |
858 | int cpu; |
859 | unsigned long flags; |
860 | struct rcu_node *rnp; |
861 | |
862 | rcu_for_each_leaf_node(rnp) { |
863 | if (!cpup) { |
864 | if (data_race(READ_ONCE(rnp->qsmask))) { |
865 | return false; |
866 | } else { |
867 | if (READ_ONCE(rnp->gp_tasks)) |
868 | atb = true; |
869 | continue; |
870 | } |
871 | } |
872 | *cpup = -1; |
873 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
874 | if (rnp->gp_tasks) |
875 | atb = true; |
876 | if (!rnp->qsmask) { |
877 | // No CPUs without quiescent states for this rnp. |
878 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
879 | continue; |
880 | } |
881 | // Find the first holdout CPU. |
882 | for_each_leaf_node_possible_cpu(rnp, cpu) { |
883 | if (rnp->qsmask & (1UL << (cpu - rnp->grplo))) { |
884 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
885 | *cpup = cpu; |
886 | return false; |
887 | } |
888 | } |
889 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
890 | } |
891 | // Can't blame CPUs, so must blame RCU priority boosting. |
892 | return atb; |
893 | } |
894 | EXPORT_SYMBOL_GPL(rcu_check_boost_fail); |
895 | |
896 | /* |
897 | * Show the state of the grace-period kthreads. |
898 | */ |
899 | void show_rcu_gp_kthreads(void) |
900 | { |
901 | unsigned long cbs = 0; |
902 | int cpu; |
903 | unsigned long j; |
904 | unsigned long ja; |
905 | unsigned long jr; |
906 | unsigned long js; |
907 | unsigned long jw; |
908 | struct rcu_data *rdp; |
909 | struct rcu_node *rnp; |
910 | struct task_struct *t = READ_ONCE(rcu_state.gp_kthread); |
911 | |
912 | j = jiffies; |
913 | ja = j - data_race(READ_ONCE(rcu_state.gp_activity)); |
914 | jr = j - data_race(READ_ONCE(rcu_state.gp_req_activity)); |
915 | js = j - data_race(READ_ONCE(rcu_state.gp_start)); |
916 | jw = j - data_race(READ_ONCE(rcu_state.gp_wake_time)); |
917 | pr_info("%s: wait state: %s(%d) ->state: %#x ->rt_priority %u delta ->gp_start %lu ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_max %lu ->gp_flags %#x\n", |
918 | rcu_state.name, gp_state_getname(rcu_state.gp_state), |
919 | data_race(READ_ONCE(rcu_state.gp_state)), |
920 | t ? data_race(READ_ONCE(t->__state)) : 0x1ffff, t ? t->rt_priority : 0xffU, |
921 | js, ja, jr, jw, (long)data_race(READ_ONCE(rcu_state.gp_wake_seq)), |
922 | (long)data_race(READ_ONCE(rcu_state.gp_seq)), |
923 | (long)data_race(READ_ONCE(rcu_get_root()->gp_seq_needed)), |
924 | data_race(READ_ONCE(rcu_state.gp_max)), |
925 | data_race(READ_ONCE(rcu_state.gp_flags))); |
926 | rcu_for_each_node_breadth_first(rnp) { |
927 | if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), READ_ONCE(rnp->gp_seq_needed)) && |
928 | !data_race(READ_ONCE(rnp->qsmask)) && !data_race(READ_ONCE(rnp->boost_tasks)) && |
929 | !data_race(READ_ONCE(rnp->exp_tasks)) && !data_race(READ_ONCE(rnp->gp_tasks))) |
930 | continue; |
931 | pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld ->qsmask %#lx %c%c%c%c ->n_boosts %ld\n", |
932 | rnp->grplo, rnp->grphi, |
933 | (long)data_race(READ_ONCE(rnp->gp_seq)), |
934 | (long)data_race(READ_ONCE(rnp->gp_seq_needed)), |
935 | data_race(READ_ONCE(rnp->qsmask)), |
936 | ".b"[!!data_race(READ_ONCE(rnp->boost_kthread_task))], |
937 | ".B"[!!data_race(READ_ONCE(rnp->boost_tasks))], |
938 | ".E"[!!data_race(READ_ONCE(rnp->exp_tasks))], |
939 | ".G"[!!data_race(READ_ONCE(rnp->gp_tasks))], |
940 | data_race(READ_ONCE(rnp->n_boosts))); |
941 | if (!rcu_is_leaf_node(rnp)) |
942 | continue; |
943 | for_each_leaf_node_possible_cpu(rnp, cpu) { |
944 | rdp = per_cpu_ptr(&rcu_data, cpu); |
945 | if (READ_ONCE(rdp->gpwrap) || |
946 | ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), |
947 | READ_ONCE(rdp->gp_seq_needed))) |
948 | continue; |
949 | pr_info("\tcpu %d ->gp_seq_needed %ld\n", |
950 | cpu, (long)data_race(READ_ONCE(rdp->gp_seq_needed))); |
951 | } |
952 | } |
953 | for_each_possible_cpu(cpu) { |
954 | rdp = per_cpu_ptr(&rcu_data, cpu); |
955 | cbs += data_race(READ_ONCE(rdp->n_cbs_invoked)); |
956 | if (rcu_segcblist_is_offloaded(rsclp: &rdp->cblist)) |
957 | show_rcu_nocb_state(rdp); |
958 | } |
959 | pr_info("RCU callbacks invoked since boot: %lu\n", cbs); |
960 | show_rcu_tasks_gp_kthreads(); |
961 | } |
962 | EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); |
963 | |
964 | /* |
965 | * This function checks for grace-period requests that fail to motivate |
966 | * RCU to come out of its idle mode. |
967 | */ |
968 | static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp, |
969 | const unsigned long gpssdelay) |
970 | { |
971 | unsigned long flags; |
972 | unsigned long j; |
973 | struct rcu_node *rnp_root = rcu_get_root(); |
974 | static atomic_t warned = ATOMIC_INIT(0); |
975 | |
976 | if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress() || |
977 | ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq), |
978 | READ_ONCE(rnp_root->gp_seq_needed)) || |
979 | !smp_load_acquire(&rcu_state.gp_kthread)) // Get stable kthread. |
980 | return; |
981 | j = jiffies; /* Expensive access, and in common case don't get here. */ |
982 | if (time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) || |
983 | time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) || |
984 | atomic_read(v: &warned)) |
985 | return; |
986 | |
987 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
988 | j = jiffies; |
989 | if (rcu_gp_in_progress() || |
990 | ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq), |
991 | READ_ONCE(rnp_root->gp_seq_needed)) || |
992 | time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) || |
993 | time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) || |
994 | atomic_read(v: &warned)) { |
995 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
996 | return; |
997 | } |
998 | /* Hold onto the leaf lock to make others see warned==1. */ |
999 | |
1000 | if (rnp_root != rnp) |
1001 | raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ |
1002 | j = jiffies; |
1003 | if (rcu_gp_in_progress() || |
1004 | ULONG_CMP_GE(READ_ONCE(rnp_root->gp_seq), |
1005 | READ_ONCE(rnp_root->gp_seq_needed)) || |
1006 | time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) || |
1007 | time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) || |
1008 | atomic_xchg(v: &warned, new: 1)) { |
1009 | if (rnp_root != rnp) |
1010 | /* irqs remain disabled. */ |
1011 | raw_spin_unlock_rcu_node(rnp_root); |
1012 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1013 | return; |
1014 | } |
1015 | WARN_ON(1); |
1016 | if (rnp_root != rnp) |
1017 | raw_spin_unlock_rcu_node(rnp_root); |
1018 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1019 | show_rcu_gp_kthreads(); |
1020 | } |
1021 | |
1022 | /* |
1023 | * Do a forward-progress check for rcutorture. This is normally invoked |
1024 | * due to an OOM event. The argument "j" gives the time period during |
1025 | * which rcutorture would like progress to have been made. |
1026 | */ |
1027 | void rcu_fwd_progress_check(unsigned long j) |
1028 | { |
1029 | unsigned long cbs; |
1030 | int cpu; |
1031 | unsigned long max_cbs = 0; |
1032 | int max_cpu = -1; |
1033 | struct rcu_data *rdp; |
1034 | |
1035 | if (rcu_gp_in_progress()) { |
1036 | pr_info("%s: GP age %lu jiffies\n", |
1037 | __func__, jiffies - data_race(READ_ONCE(rcu_state.gp_start))); |
1038 | show_rcu_gp_kthreads(); |
1039 | } else { |
1040 | pr_info("%s: Last GP end %lu jiffies ago\n", |
1041 | __func__, jiffies - data_race(READ_ONCE(rcu_state.gp_end))); |
1042 | preempt_disable(); |
1043 | rdp = this_cpu_ptr(&rcu_data); |
1044 | rcu_check_gp_start_stall(rnp: rdp->mynode, rdp, gpssdelay: j); |
1045 | preempt_enable(); |
1046 | } |
1047 | for_each_possible_cpu(cpu) { |
1048 | cbs = rcu_get_n_cbs_cpu(cpu); |
1049 | if (!cbs) |
1050 | continue; |
1051 | if (max_cpu < 0) |
1052 | pr_info("%s: callbacks", __func__); |
1053 | pr_cont(" %d: %lu", cpu, cbs); |
1054 | if (cbs <= max_cbs) |
1055 | continue; |
1056 | max_cbs = cbs; |
1057 | max_cpu = cpu; |
1058 | } |
1059 | if (max_cpu >= 0) |
1060 | pr_cont("\n"); |
1061 | } |
1062 | EXPORT_SYMBOL_GPL(rcu_fwd_progress_check); |
1063 | |
1064 | /* Commandeer a sysrq key to dump RCU's tree. */ |
1065 | static bool sysrq_rcu; |
1066 | module_param(sysrq_rcu, bool, 0444); |
1067 | |
1068 | /* Dump grace-period-request information due to commandeered sysrq. */ |
1069 | static void sysrq_show_rcu(u8 key) |
1070 | { |
1071 | show_rcu_gp_kthreads(); |
1072 | } |
1073 | |
1074 | static const struct sysrq_key_op sysrq_rcudump_op = { |
1075 | .handler = sysrq_show_rcu, |
1076 | .help_msg = "show-rcu(y)", |
1077 | .action_msg = "Show RCU tree", |
1078 | .enable_mask = SYSRQ_ENABLE_DUMP, |
1079 | }; |
1080 | |
1081 | static int __init rcu_sysrq_init(void) |
1082 | { |
1083 | if (sysrq_rcu) |
1084 | return register_sysrq_key(key: 'y', op: &sysrq_rcudump_op); |
1085 | return 0; |
1086 | } |
1087 | early_initcall(rcu_sysrq_init); |
1088 | |
1089 | #ifdef CONFIG_RCU_CPU_STALL_NOTIFIER |
1090 | |
1091 | ////////////////////////////////////////////////////////////////////////////// |
1092 | // |
1093 | // RCU CPU stall-warning notifiers |
1094 | |
1095 | static ATOMIC_NOTIFIER_HEAD(rcu_cpu_stall_notifier_list); |
1096 | |
1097 | /** |
1098 | * rcu_stall_chain_notifier_register - Add an RCU CPU stall notifier |
1099 | * @n: Entry to add. |
1100 | * |
1101 | * Adds an RCU CPU stall notifier to an atomic notifier chain. |
1102 | * The @action passed to a notifier will be @RCU_STALL_NOTIFY_NORM or |
1103 | * friends. The @data will be the duration of the stalled grace period, |
1104 | * in jiffies, coerced to a void* pointer. |
1105 | * |
1106 | * Returns 0 on success, %-EEXIST on error. |
1107 | */ |
1108 | int rcu_stall_chain_notifier_register(struct notifier_block *n) |
1109 | { |
1110 | int rcsn = rcu_cpu_stall_notifiers; |
1111 | |
1112 | WARN(1, "Adding %pS() to RCU stall notifier list (%s).\n", n->notifier_call, |
1113 | rcsn ? "possibly suppressing RCU CPU stall warnings": "failed, so all is well"); |
1114 | if (rcsn) |
1115 | return atomic_notifier_chain_register(nh: &rcu_cpu_stall_notifier_list, nb: n); |
1116 | return -EEXIST; |
1117 | } |
1118 | EXPORT_SYMBOL_GPL(rcu_stall_chain_notifier_register); |
1119 | |
1120 | /** |
1121 | * rcu_stall_chain_notifier_unregister - Remove an RCU CPU stall notifier |
1122 | * @n: Entry to add. |
1123 | * |
1124 | * Removes an RCU CPU stall notifier from an atomic notifier chain. |
1125 | * |
1126 | * Returns zero on success, %-ENOENT on failure. |
1127 | */ |
1128 | int rcu_stall_chain_notifier_unregister(struct notifier_block *n) |
1129 | { |
1130 | return atomic_notifier_chain_unregister(nh: &rcu_cpu_stall_notifier_list, nb: n); |
1131 | } |
1132 | EXPORT_SYMBOL_GPL(rcu_stall_chain_notifier_unregister); |
1133 | |
1134 | /* |
1135 | * rcu_stall_notifier_call_chain - Call functions in an RCU CPU stall notifier chain |
1136 | * @val: Value passed unmodified to notifier function |
1137 | * @v: Pointer passed unmodified to notifier function |
1138 | * |
1139 | * Calls each function in the RCU CPU stall notifier chain in turn, which |
1140 | * is an atomic call chain. See atomic_notifier_call_chain() for more |
1141 | * information. |
1142 | * |
1143 | * This is for use within RCU, hence the omission of the extra asterisk |
1144 | * to indicate a non-kerneldoc format header comment. |
1145 | */ |
1146 | int rcu_stall_notifier_call_chain(unsigned long val, void *v) |
1147 | { |
1148 | return atomic_notifier_call_chain(nh: &rcu_cpu_stall_notifier_list, val, v); |
1149 | } |
1150 | |
1151 | #endif // #ifdef CONFIG_RCU_CPU_STALL_NOTIFIER |
1152 |
Definitions
- sysctl_panic_on_rcu_stall
- sysctl_max_rcu_stall_to_panic
- rcu_stall_count
- rcu_stall_count_show
- rcu_stall_count_attr
- kernel_rcu_stall_sysfs_init
- rcu_exp_jiffies_till_stall_check
- rcu_jiffies_till_stall_check
- rcu_sysrq_start
- rcu_sysrq_end
- rcu_panic
- rcu_panic_block
- check_cpu_stall_init
- panic_on_rcu_stall
- rcu_cpu_stall_reset
- record_gp_stall_check_time
- zero_cpu_stall_ticks
- rcu_stall_kick_kthreads
- rcu_iw_handler
- rcu_print_detail_task_stall_rnp
- rcu_stall_chk_rdr
- check_slow_task
- rcu_print_task_stall
- rcu_dump_cpu_stacks
- gp_state_names
- gp_state_getname
- rcu_is_gp_kthread_starving
- rcu_is_rcuc_kthread_starving
- print_cpu_stat_info
- print_cpu_stall_info
- rcu_check_gp_kthread_starvation
- rcu_check_gp_kthread_expired_fqs_timer
- print_other_cpu_stall
- print_cpu_stall
- csd_lock_suppress_rcu_stall
- check_cpu_stall
- rcu_check_boost_fail
- show_rcu_gp_kthreads
- rcu_check_gp_start_stall
- rcu_fwd_progress_check
- sysrq_rcu
- sysrq_show_rcu
- sysrq_rcudump_op
- rcu_sysrq_init
- rcu_cpu_stall_notifier_list
- rcu_stall_chain_notifier_register
- rcu_stall_chain_notifier_unregister
Improve your Profiling and Debugging skills
Find out more