1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Pseudo NMI support on sparc64 systems. |
3 | * |
4 | * Copyright (C) 2009 David S. Miller <davem@davemloft.net> |
5 | * |
6 | * The NMI watchdog support and infrastructure is based almost |
7 | * entirely upon the x86 NMI support code. |
8 | */ |
9 | #include <linux/kernel.h> |
10 | #include <linux/param.h> |
11 | #include <linux/init.h> |
12 | #include <linux/percpu.h> |
13 | #include <linux/nmi.h> |
14 | #include <linux/export.h> |
15 | #include <linux/kprobes.h> |
16 | #include <linux/kernel_stat.h> |
17 | #include <linux/reboot.h> |
18 | #include <linux/slab.h> |
19 | #include <linux/kdebug.h> |
20 | #include <linux/delay.h> |
21 | #include <linux/smp.h> |
22 | |
23 | #include <asm/perf_event.h> |
24 | #include <asm/ptrace.h> |
25 | #include <asm/pcr.h> |
26 | |
27 | #include "kstack.h" |
28 | |
29 | /* We don't have a real NMI on sparc64, but we can fake one |
30 | * up using profiling counter overflow interrupts and interrupt |
31 | * levels. |
32 | * |
33 | * The profile overflow interrupts at level 15, so we use |
34 | * level 14 as our IRQ off level. |
35 | */ |
36 | |
37 | static int panic_on_timeout; |
38 | |
39 | /* nmi_active: |
40 | * >0: the NMI watchdog is active, but can be disabled |
41 | * <0: the NMI watchdog has not been set up, and cannot be enabled |
42 | * 0: the NMI watchdog is disabled, but can be enabled |
43 | */ |
44 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ |
45 | EXPORT_SYMBOL(nmi_active); |
46 | static int nmi_init_done; |
47 | static unsigned int nmi_hz = HZ; |
48 | static DEFINE_PER_CPU(short, wd_enabled); |
49 | static int endflag __initdata; |
50 | |
51 | static DEFINE_PER_CPU(unsigned int, last_irq_sum); |
52 | static DEFINE_PER_CPU(long, alert_counter); |
53 | static DEFINE_PER_CPU(int, nmi_touch); |
54 | |
55 | void arch_touch_nmi_watchdog(void) |
56 | { |
57 | if (atomic_read(v: &nmi_active)) { |
58 | int cpu; |
59 | |
60 | for_each_present_cpu(cpu) { |
61 | if (per_cpu(nmi_touch, cpu) != 1) |
62 | per_cpu(nmi_touch, cpu) = 1; |
63 | } |
64 | } |
65 | } |
66 | EXPORT_SYMBOL(arch_touch_nmi_watchdog); |
67 | |
68 | int __init watchdog_hardlockup_probe(void) |
69 | { |
70 | return 0; |
71 | } |
72 | |
73 | static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) |
74 | { |
75 | int this_cpu = smp_processor_id(); |
76 | |
77 | if (notify_die(val: DIE_NMIWATCHDOG, str, regs, err: 0, |
78 | trap: pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) |
79 | return; |
80 | |
81 | if (do_panic || panic_on_oops) |
82 | panic(fmt: "Watchdog detected hard LOCKUP on cpu %d" , this_cpu); |
83 | else |
84 | WARN(1, "Watchdog detected hard LOCKUP on cpu %d" , this_cpu); |
85 | } |
86 | |
87 | notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) |
88 | { |
89 | unsigned int sum, touched = 0; |
90 | void *orig_sp; |
91 | |
92 | clear_softint(1 << irq); |
93 | |
94 | local_cpu_data().__nmi_count++; |
95 | |
96 | nmi_enter(); |
97 | |
98 | orig_sp = set_hardirq_stack(); |
99 | |
100 | if (notify_die(val: DIE_NMI, str: "nmi" , regs, err: 0, |
101 | trap: pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) |
102 | touched = 1; |
103 | else |
104 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); |
105 | |
106 | sum = local_cpu_data().irq0_irqs; |
107 | if (__this_cpu_read(nmi_touch)) { |
108 | __this_cpu_write(nmi_touch, 0); |
109 | touched = 1; |
110 | } |
111 | if (!touched && __this_cpu_read(last_irq_sum) == sum) { |
112 | __this_cpu_inc(alert_counter); |
113 | if (__this_cpu_read(alert_counter) == 30 * nmi_hz) |
114 | die_nmi(str: "BUG: NMI Watchdog detected LOCKUP" , |
115 | regs, do_panic: panic_on_timeout); |
116 | } else { |
117 | __this_cpu_write(last_irq_sum, sum); |
118 | __this_cpu_write(alert_counter, 0); |
119 | } |
120 | if (__this_cpu_read(wd_enabled)) { |
121 | pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); |
122 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); |
123 | } |
124 | |
125 | restore_hardirq_stack(orig_sp); |
126 | |
127 | nmi_exit(); |
128 | } |
129 | |
130 | static inline unsigned int get_nmi_count(int cpu) |
131 | { |
132 | return cpu_data(cpu).__nmi_count; |
133 | } |
134 | |
135 | static __init void nmi_cpu_busy(void *data) |
136 | { |
137 | while (endflag == 0) |
138 | mb(); |
139 | } |
140 | |
141 | static void report_broken_nmi(int cpu, int *prev_nmi_count) |
142 | { |
143 | printk(KERN_CONT "\n" ); |
144 | |
145 | printk(KERN_WARNING |
146 | "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n" , |
147 | cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); |
148 | |
149 | printk(KERN_WARNING |
150 | "Please report this to bugzilla.kernel.org,\n" ); |
151 | printk(KERN_WARNING |
152 | "and attach the output of the 'dmesg' command.\n" ); |
153 | |
154 | per_cpu(wd_enabled, cpu) = 0; |
155 | atomic_dec(v: &nmi_active); |
156 | } |
157 | |
158 | void stop_nmi_watchdog(void *unused) |
159 | { |
160 | if (!__this_cpu_read(wd_enabled)) |
161 | return; |
162 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); |
163 | __this_cpu_write(wd_enabled, 0); |
164 | atomic_dec(v: &nmi_active); |
165 | } |
166 | |
167 | static int __init check_nmi_watchdog(void) |
168 | { |
169 | unsigned int *prev_nmi_count; |
170 | int cpu, err; |
171 | |
172 | if (!atomic_read(v: &nmi_active)) |
173 | return 0; |
174 | |
175 | prev_nmi_count = kmalloc_array(n: nr_cpu_ids, size: sizeof(unsigned int), |
176 | GFP_KERNEL); |
177 | if (!prev_nmi_count) { |
178 | err = -ENOMEM; |
179 | goto error; |
180 | } |
181 | |
182 | printk(KERN_INFO "Testing NMI watchdog ... " ); |
183 | |
184 | smp_call_function(func: nmi_cpu_busy, info: (void *)&endflag, wait: 0); |
185 | |
186 | for_each_possible_cpu(cpu) |
187 | prev_nmi_count[cpu] = get_nmi_count(cpu); |
188 | local_irq_enable(); |
189 | mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ |
190 | |
191 | for_each_online_cpu(cpu) { |
192 | if (!per_cpu(wd_enabled, cpu)) |
193 | continue; |
194 | if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) |
195 | report_broken_nmi(cpu, prev_nmi_count); |
196 | } |
197 | endflag = 1; |
198 | if (!atomic_read(v: &nmi_active)) { |
199 | kfree(objp: prev_nmi_count); |
200 | atomic_set(v: &nmi_active, i: -1); |
201 | err = -ENODEV; |
202 | goto error; |
203 | } |
204 | printk("OK.\n" ); |
205 | |
206 | nmi_hz = 1; |
207 | |
208 | kfree(objp: prev_nmi_count); |
209 | return 0; |
210 | error: |
211 | on_each_cpu(func: stop_nmi_watchdog, NULL, wait: 1); |
212 | return err; |
213 | } |
214 | |
215 | void start_nmi_watchdog(void *unused) |
216 | { |
217 | if (__this_cpu_read(wd_enabled)) |
218 | return; |
219 | |
220 | __this_cpu_write(wd_enabled, 1); |
221 | atomic_inc(v: &nmi_active); |
222 | |
223 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); |
224 | pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); |
225 | |
226 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); |
227 | } |
228 | |
229 | static void nmi_adjust_hz_one(void *unused) |
230 | { |
231 | if (!__this_cpu_read(wd_enabled)) |
232 | return; |
233 | |
234 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); |
235 | pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); |
236 | |
237 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); |
238 | } |
239 | |
240 | void nmi_adjust_hz(unsigned int new_hz) |
241 | { |
242 | nmi_hz = new_hz; |
243 | on_each_cpu(func: nmi_adjust_hz_one, NULL, wait: 1); |
244 | } |
245 | EXPORT_SYMBOL_GPL(nmi_adjust_hz); |
246 | |
247 | static int nmi_shutdown(struct notifier_block *nb, unsigned long cmd, void *p) |
248 | { |
249 | on_each_cpu(func: stop_nmi_watchdog, NULL, wait: 1); |
250 | return 0; |
251 | } |
252 | |
253 | static struct notifier_block nmi_reboot_notifier = { |
254 | .notifier_call = nmi_shutdown, |
255 | }; |
256 | |
257 | int __init nmi_init(void) |
258 | { |
259 | int err; |
260 | |
261 | on_each_cpu(func: start_nmi_watchdog, NULL, wait: 1); |
262 | |
263 | err = check_nmi_watchdog(); |
264 | if (!err) { |
265 | err = register_reboot_notifier(&nmi_reboot_notifier); |
266 | if (err) { |
267 | on_each_cpu(func: stop_nmi_watchdog, NULL, wait: 1); |
268 | atomic_set(v: &nmi_active, i: -1); |
269 | } |
270 | } |
271 | |
272 | nmi_init_done = 1; |
273 | |
274 | return err; |
275 | } |
276 | |
277 | static int __init setup_nmi_watchdog(char *str) |
278 | { |
279 | if (!strncmp(str, "panic" , 5)) |
280 | panic_on_timeout = 1; |
281 | |
282 | return 1; |
283 | } |
284 | __setup("nmi_watchdog=" , setup_nmi_watchdog); |
285 | |
286 | /* |
287 | * sparc specific NMI watchdog enable function. |
288 | * Enables watchdog if it is not enabled already. |
289 | */ |
290 | void watchdog_hardlockup_enable(unsigned int cpu) |
291 | { |
292 | if (atomic_read(v: &nmi_active) == -1) { |
293 | pr_warn("NMI watchdog cannot be enabled or disabled\n" ); |
294 | return; |
295 | } |
296 | |
297 | /* |
298 | * watchdog thread could start even before nmi_init is called. |
299 | * Just Return in that case. Let nmi_init finish the init |
300 | * process first. |
301 | */ |
302 | if (!nmi_init_done) |
303 | return; |
304 | |
305 | smp_call_function_single(cpuid: cpu, func: start_nmi_watchdog, NULL, wait: 1); |
306 | } |
307 | /* |
308 | * sparc specific NMI watchdog disable function. |
309 | * Disables watchdog if it is not disabled already. |
310 | */ |
311 | void watchdog_hardlockup_disable(unsigned int cpu) |
312 | { |
313 | if (atomic_read(v: &nmi_active) == -1) |
314 | pr_warn_once("NMI watchdog cannot be enabled or disabled\n" ); |
315 | else |
316 | smp_call_function_single(cpuid: cpu, func: stop_nmi_watchdog, NULL, wait: 1); |
317 | } |
318 | |