1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #include <linux/cpu.h> |
4 | #include <linux/cpumask.h> |
5 | #include <linux/kernel.h> |
6 | #include <linux/nmi.h> |
7 | #include <linux/percpu-defs.h> |
8 | |
9 | static cpumask_t __read_mostly watchdog_cpus; |
10 | |
11 | static unsigned int watchdog_next_cpu(unsigned int cpu) |
12 | { |
13 | unsigned int next_cpu; |
14 | |
15 | next_cpu = cpumask_next(n: cpu, srcp: &watchdog_cpus); |
16 | if (next_cpu >= nr_cpu_ids) |
17 | next_cpu = cpumask_first(srcp: &watchdog_cpus); |
18 | |
19 | if (next_cpu == cpu) |
20 | return nr_cpu_ids; |
21 | |
22 | return next_cpu; |
23 | } |
24 | |
25 | int __init watchdog_hardlockup_probe(void) |
26 | { |
27 | return 0; |
28 | } |
29 | |
30 | void watchdog_hardlockup_enable(unsigned int cpu) |
31 | { |
32 | unsigned int next_cpu; |
33 | |
34 | /* |
35 | * The new CPU will be marked online before the hrtimer interrupt |
36 | * gets a chance to run on it. If another CPU tests for a |
37 | * hardlockup on the new CPU before it has run its the hrtimer |
38 | * interrupt, it will get a false positive. Touch the watchdog on |
39 | * the new CPU to delay the check for at least 3 sampling periods |
40 | * to guarantee one hrtimer has run on the new CPU. |
41 | */ |
42 | watchdog_hardlockup_touch_cpu(cpu); |
43 | |
44 | /* |
45 | * We are going to check the next CPU. Our watchdog_hrtimer |
46 | * need not be zero if the CPU has already been online earlier. |
47 | * Touch the watchdog on the next CPU to avoid false positive |
48 | * if we try to check it in less then 3 interrupts. |
49 | */ |
50 | next_cpu = watchdog_next_cpu(cpu); |
51 | if (next_cpu < nr_cpu_ids) |
52 | watchdog_hardlockup_touch_cpu(cpu: next_cpu); |
53 | |
54 | /* |
55 | * Makes sure that watchdog is touched on this CPU before |
56 | * other CPUs could see it in watchdog_cpus. The counter |
57 | * part is in watchdog_buddy_check_hardlockup(). |
58 | */ |
59 | smp_wmb(); |
60 | |
61 | cpumask_set_cpu(cpu, dstp: &watchdog_cpus); |
62 | } |
63 | |
64 | void watchdog_hardlockup_disable(unsigned int cpu) |
65 | { |
66 | unsigned int next_cpu = watchdog_next_cpu(cpu); |
67 | |
68 | /* |
69 | * Offlining this CPU will cause the CPU before this one to start |
70 | * checking the one after this one. If this CPU just finished checking |
71 | * the next CPU and updating hrtimer_interrupts_saved, and then the |
72 | * previous CPU checks it within one sample period, it will trigger a |
73 | * false positive. Touch the watchdog on the next CPU to prevent it. |
74 | */ |
75 | if (next_cpu < nr_cpu_ids) |
76 | watchdog_hardlockup_touch_cpu(cpu: next_cpu); |
77 | |
78 | /* |
79 | * Makes sure that watchdog is touched on the next CPU before |
80 | * this CPU disappear in watchdog_cpus. The counter part is in |
81 | * watchdog_buddy_check_hardlockup(). |
82 | */ |
83 | smp_wmb(); |
84 | |
85 | cpumask_clear_cpu(cpu, dstp: &watchdog_cpus); |
86 | } |
87 | |
88 | void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) |
89 | { |
90 | unsigned int next_cpu; |
91 | |
92 | /* |
93 | * Test for hardlockups every 3 samples. The sample period is |
94 | * watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over |
95 | * watchdog_thresh (over by 20%). |
96 | */ |
97 | if (hrtimer_interrupts % 3 != 0) |
98 | return; |
99 | |
100 | /* check for a hardlockup on the next CPU */ |
101 | next_cpu = watchdog_next_cpu(smp_processor_id()); |
102 | if (next_cpu >= nr_cpu_ids) |
103 | return; |
104 | |
105 | /* |
106 | * Make sure that the watchdog was touched on next CPU when |
107 | * watchdog_next_cpu() returned another one because of |
108 | * a change in watchdog_hardlockup_enable()/disable(). |
109 | */ |
110 | smp_rmb(); |
111 | |
112 | watchdog_hardlockup_check(cpu: next_cpu, NULL); |
113 | } |
114 | |