1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, |
4 | * using the CPU's debug registers. Derived from |
5 | * "arch/x86/kernel/hw_breakpoint.c" |
6 | * |
7 | * Copyright 2010 IBM Corporation |
8 | * Author: K.Prasad <prasad@linux.vnet.ibm.com> |
9 | */ |
10 | |
11 | #include <linux/hw_breakpoint.h> |
12 | #include <linux/notifier.h> |
13 | #include <linux/kprobes.h> |
14 | #include <linux/percpu.h> |
15 | #include <linux/kernel.h> |
16 | #include <linux/sched.h> |
17 | #include <linux/smp.h> |
18 | #include <linux/spinlock.h> |
19 | #include <linux/debugfs.h> |
20 | #include <linux/init.h> |
21 | |
22 | #include <asm/hw_breakpoint.h> |
23 | #include <asm/processor.h> |
24 | #include <asm/sstep.h> |
25 | #include <asm/debug.h> |
26 | #include <asm/hvcall.h> |
27 | #include <asm/inst.h> |
28 | #include <linux/uaccess.h> |
29 | |
30 | /* |
31 | * Stores the breakpoints currently in use on each breakpoint address |
32 | * register for every cpu |
33 | */ |
34 | static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM_MAX]); |
35 | |
36 | /* |
37 | * Returns total number of data or instruction breakpoints available. |
38 | */ |
39 | int hw_breakpoint_slots(int type) |
40 | { |
41 | if (type == TYPE_DATA) |
42 | return nr_wp_slots(); |
43 | return 0; /* no instruction breakpoints available */ |
44 | } |
45 | |
46 | |
47 | /* |
48 | * Install a perf counter breakpoint. |
49 | * |
50 | * We seek a free debug address register and use it for this |
51 | * breakpoint. |
52 | * |
53 | * Atomic: we hold the counter->ctx->lock and we only handle variables |
54 | * and registers local to this cpu. |
55 | */ |
56 | int arch_install_hw_breakpoint(struct perf_event *bp) |
57 | { |
58 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); |
59 | struct perf_event **slot; |
60 | int i; |
61 | |
62 | for (i = 0; i < nr_wp_slots(); i++) { |
63 | slot = this_cpu_ptr(&bp_per_reg[i]); |
64 | if (!*slot) { |
65 | *slot = bp; |
66 | break; |
67 | } |
68 | } |
69 | |
70 | if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot" )) |
71 | return -EBUSY; |
72 | |
73 | /* |
74 | * Do not install DABR values if the instruction must be single-stepped. |
75 | * If so, DABR will be populated in single_step_dabr_instruction(). |
76 | */ |
77 | if (!info->perf_single_step) |
78 | __set_breakpoint(i, info); |
79 | |
80 | return 0; |
81 | } |
82 | |
83 | /* |
84 | * Uninstall the breakpoint contained in the given counter. |
85 | * |
86 | * First we search the debug address register it uses and then we disable |
87 | * it. |
88 | * |
89 | * Atomic: we hold the counter->ctx->lock and we only handle variables |
90 | * and registers local to this cpu. |
91 | */ |
92 | void arch_uninstall_hw_breakpoint(struct perf_event *bp) |
93 | { |
94 | struct arch_hw_breakpoint null_brk = {0}; |
95 | struct perf_event **slot; |
96 | int i; |
97 | |
98 | for (i = 0; i < nr_wp_slots(); i++) { |
99 | slot = this_cpu_ptr(&bp_per_reg[i]); |
100 | if (*slot == bp) { |
101 | *slot = NULL; |
102 | break; |
103 | } |
104 | } |
105 | |
106 | if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot" )) |
107 | return; |
108 | |
109 | __set_breakpoint(i, &null_brk); |
110 | } |
111 | |
112 | static bool is_ptrace_bp(struct perf_event *bp) |
113 | { |
114 | return bp->overflow_handler == ptrace_triggered; |
115 | } |
116 | |
117 | /* |
118 | * Check for virtual address in kernel space. |
119 | */ |
120 | int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) |
121 | { |
122 | return is_kernel_addr(hw->address); |
123 | } |
124 | |
125 | int arch_bp_generic_fields(int type, int *gen_bp_type) |
126 | { |
127 | *gen_bp_type = 0; |
128 | if (type & HW_BRK_TYPE_READ) |
129 | *gen_bp_type |= HW_BREAKPOINT_R; |
130 | if (type & HW_BRK_TYPE_WRITE) |
131 | *gen_bp_type |= HW_BREAKPOINT_W; |
132 | if (*gen_bp_type == 0) |
133 | return -EINVAL; |
134 | return 0; |
135 | } |
136 | |
137 | /* |
138 | * Watchpoint match range is always doubleword(8 bytes) aligned on |
139 | * powerpc. If the given range is crossing doubleword boundary, we |
140 | * need to increase the length such that next doubleword also get |
141 | * covered. Ex, |
142 | * |
143 | * address len = 6 bytes |
144 | * |=========. |
145 | * |------------v--|------v--------| |
146 | * | | | | | | | | | | | | | | | | | |
147 | * |---------------|---------------| |
148 | * <---8 bytes---> |
149 | * |
150 | * In this case, we should configure hw as: |
151 | * start_addr = address & ~(HW_BREAKPOINT_SIZE - 1) |
152 | * len = 16 bytes |
153 | * |
154 | * @start_addr is inclusive but @end_addr is exclusive. |
155 | */ |
156 | static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw) |
157 | { |
158 | u16 max_len = DABR_MAX_LEN; |
159 | u16 hw_len; |
160 | unsigned long start_addr, end_addr; |
161 | |
162 | start_addr = ALIGN_DOWN(hw->address, HW_BREAKPOINT_SIZE); |
163 | end_addr = ALIGN(hw->address + hw->len, HW_BREAKPOINT_SIZE); |
164 | hw_len = end_addr - start_addr; |
165 | |
166 | if (dawr_enabled()) { |
167 | max_len = DAWR_MAX_LEN; |
168 | /* DAWR region can't cross 512 bytes boundary on p10 predecessors */ |
169 | if (!cpu_has_feature(CPU_FTR_ARCH_31) && |
170 | (ALIGN_DOWN(start_addr, SZ_512) != ALIGN_DOWN(end_addr - 1, SZ_512))) |
171 | return -EINVAL; |
172 | } else if (IS_ENABLED(CONFIG_PPC_8xx)) { |
173 | /* 8xx can setup a range without limitation */ |
174 | max_len = U16_MAX; |
175 | } |
176 | |
177 | if (hw_len > max_len) |
178 | return -EINVAL; |
179 | |
180 | hw->hw_len = hw_len; |
181 | return 0; |
182 | } |
183 | |
184 | /* |
185 | * Validate the arch-specific HW Breakpoint register settings |
186 | */ |
187 | int hw_breakpoint_arch_parse(struct perf_event *bp, |
188 | const struct perf_event_attr *attr, |
189 | struct arch_hw_breakpoint *hw) |
190 | { |
191 | int ret = -EINVAL; |
192 | |
193 | if (!bp || !attr->bp_len) |
194 | return ret; |
195 | |
196 | hw->type = HW_BRK_TYPE_TRANSLATE; |
197 | if (attr->bp_type & HW_BREAKPOINT_R) |
198 | hw->type |= HW_BRK_TYPE_READ; |
199 | if (attr->bp_type & HW_BREAKPOINT_W) |
200 | hw->type |= HW_BRK_TYPE_WRITE; |
201 | if (hw->type == HW_BRK_TYPE_TRANSLATE) |
202 | /* must set alteast read or write */ |
203 | return ret; |
204 | if (!attr->exclude_user) |
205 | hw->type |= HW_BRK_TYPE_USER; |
206 | if (!attr->exclude_kernel) |
207 | hw->type |= HW_BRK_TYPE_KERNEL; |
208 | if (!attr->exclude_hv) |
209 | hw->type |= HW_BRK_TYPE_HYP; |
210 | hw->address = attr->bp_addr; |
211 | hw->len = attr->bp_len; |
212 | |
213 | if (!ppc_breakpoint_available()) |
214 | return -ENODEV; |
215 | |
216 | return hw_breakpoint_validate_len(hw); |
217 | } |
218 | |
219 | /* |
220 | * Restores the breakpoint on the debug registers. |
221 | * Invoke this function if it is known that the execution context is |
222 | * about to change to cause loss of MSR_SE settings. |
223 | * |
224 | * The perf watchpoint will simply re-trigger once the thread is started again, |
225 | * and the watchpoint handler will set up MSR_SE and perf_single_step as |
226 | * needed. |
227 | */ |
228 | void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) |
229 | { |
230 | struct arch_hw_breakpoint *info; |
231 | int i; |
232 | |
233 | preempt_disable(); |
234 | |
235 | for (i = 0; i < nr_wp_slots(); i++) { |
236 | struct perf_event *bp = __this_cpu_read(bp_per_reg[i]); |
237 | |
238 | if (unlikely(bp && counter_arch_bp(bp)->perf_single_step)) |
239 | goto reset; |
240 | } |
241 | goto out; |
242 | |
243 | reset: |
244 | regs_set_return_msr(regs, regs->msr & ~MSR_SE); |
245 | for (i = 0; i < nr_wp_slots(); i++) { |
246 | info = counter_arch_bp(__this_cpu_read(bp_per_reg[i])); |
247 | __set_breakpoint(i, info); |
248 | info->perf_single_step = false; |
249 | } |
250 | |
251 | out: |
252 | preempt_enable(); |
253 | } |
254 | |
255 | static bool is_larx_stcx_instr(int type) |
256 | { |
257 | return type == LARX || type == STCX; |
258 | } |
259 | |
260 | static bool is_octword_vsx_instr(int type, int size) |
261 | { |
262 | return ((type == LOAD_VSX || type == STORE_VSX) && size == 32); |
263 | } |
264 | |
265 | /* |
266 | * We've failed in reliably handling the hw-breakpoint. Unregister |
267 | * it and throw a warning message to let the user know about it. |
268 | */ |
269 | static void handler_error(struct perf_event *bp) |
270 | { |
271 | WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled." , |
272 | counter_arch_bp(bp)->address); |
273 | perf_event_disable_inatomic(event: bp); |
274 | } |
275 | |
276 | static void larx_stcx_err(struct perf_event *bp) |
277 | { |
278 | printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n" , |
279 | counter_arch_bp(bp)->address); |
280 | perf_event_disable_inatomic(event: bp); |
281 | } |
282 | |
283 | static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, |
284 | int *hit, ppc_inst_t instr) |
285 | { |
286 | int i; |
287 | int stepped; |
288 | |
289 | /* Do not emulate user-space instructions, instead single-step them */ |
290 | if (user_mode(regs)) { |
291 | for (i = 0; i < nr_wp_slots(); i++) { |
292 | if (!hit[i]) |
293 | continue; |
294 | |
295 | counter_arch_bp(bp: bp[i])->perf_single_step = true; |
296 | bp[i] = NULL; |
297 | } |
298 | regs_set_return_msr(regs, regs->msr | MSR_SE); |
299 | return false; |
300 | } |
301 | |
302 | stepped = emulate_step(regs, instr); |
303 | if (!stepped) { |
304 | for (i = 0; i < nr_wp_slots(); i++) { |
305 | if (!hit[i]) |
306 | continue; |
307 | handler_error(bp: bp[i]); |
308 | bp[i] = NULL; |
309 | } |
310 | return false; |
311 | } |
312 | return true; |
313 | } |
314 | |
315 | static void handle_p10dd1_spurious_exception(struct perf_event **bp, |
316 | int *hit, unsigned long ea) |
317 | { |
318 | int i; |
319 | unsigned long hw_end_addr; |
320 | |
321 | /* |
322 | * Handle spurious exception only when any bp_per_reg is set. |
323 | * Otherwise this might be created by xmon and not actually a |
324 | * spurious exception. |
325 | */ |
326 | for (i = 0; i < nr_wp_slots(); i++) { |
327 | struct arch_hw_breakpoint *info; |
328 | |
329 | if (!bp[i]) |
330 | continue; |
331 | |
332 | info = counter_arch_bp(bp: bp[i]); |
333 | |
334 | hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); |
335 | |
336 | /* |
337 | * Ending address of DAWR range is less than starting |
338 | * address of op. |
339 | */ |
340 | if ((hw_end_addr - 1) >= ea) |
341 | continue; |
342 | |
343 | /* |
344 | * Those addresses need to be in the same or in two |
345 | * consecutive 512B blocks; |
346 | */ |
347 | if (((hw_end_addr - 1) >> 10) != (ea >> 10)) |
348 | continue; |
349 | |
350 | /* |
351 | * 'op address + 64B' generates an address that has a |
352 | * carry into bit 52 (crosses 2K boundary). |
353 | */ |
354 | if ((ea & 0x800) == ((ea + 64) & 0x800)) |
355 | continue; |
356 | |
357 | break; |
358 | } |
359 | |
360 | if (i == nr_wp_slots()) |
361 | return; |
362 | |
363 | for (i = 0; i < nr_wp_slots(); i++) { |
364 | if (bp[i]) { |
365 | hit[i] = 1; |
366 | counter_arch_bp(bp[i])->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; |
367 | } |
368 | } |
369 | } |
370 | |
371 | /* |
372 | * Handle a DABR or DAWR exception. |
373 | * |
374 | * Called in atomic context. |
375 | */ |
376 | int hw_breakpoint_handler(struct die_args *args) |
377 | { |
378 | bool err = false; |
379 | int rc = NOTIFY_STOP; |
380 | struct perf_event *bp[HBP_NUM_MAX] = { NULL }; |
381 | struct pt_regs *regs = args->regs; |
382 | int i; |
383 | int hit[HBP_NUM_MAX] = {0}; |
384 | int nr_hit = 0; |
385 | bool ptrace_bp = false; |
386 | ppc_inst_t instr = ppc_inst(0); |
387 | int type = 0; |
388 | int size = 0; |
389 | unsigned long ea = 0; |
390 | |
391 | /* Disable breakpoints during exception handling */ |
392 | hw_breakpoint_disable(); |
393 | |
394 | /* |
395 | * The counter may be concurrently released but that can only |
396 | * occur from a call_rcu() path. We can then safely fetch |
397 | * the breakpoint, use its callback, touch its counter |
398 | * while we are in an rcu_read_lock() path. |
399 | */ |
400 | rcu_read_lock(); |
401 | |
402 | if (!IS_ENABLED(CONFIG_PPC_8xx)) |
403 | wp_get_instr_detail(regs, &instr, &type, &size, &ea); |
404 | |
405 | for (i = 0; i < nr_wp_slots(); i++) { |
406 | struct arch_hw_breakpoint *info; |
407 | |
408 | bp[i] = __this_cpu_read(bp_per_reg[i]); |
409 | if (!bp[i]) |
410 | continue; |
411 | |
412 | info = counter_arch_bp(bp: bp[i]); |
413 | info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; |
414 | |
415 | if (wp_check_constraints(regs, instr, ea, type, size, info)) { |
416 | if (!IS_ENABLED(CONFIG_PPC_8xx) && |
417 | ppc_inst_equal(instr, ppc_inst(0))) { |
418 | handler_error(bp: bp[i]); |
419 | bp[i] = NULL; |
420 | err = 1; |
421 | continue; |
422 | } |
423 | |
424 | if (is_ptrace_bp(bp: bp[i])) |
425 | ptrace_bp = true; |
426 | hit[i] = 1; |
427 | nr_hit++; |
428 | } |
429 | } |
430 | |
431 | if (err) |
432 | goto reset; |
433 | |
434 | if (!nr_hit) { |
435 | /* Workaround for Power10 DD1 */ |
436 | if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 && |
437 | is_octword_vsx_instr(type, size)) { |
438 | handle_p10dd1_spurious_exception(bp: bp, hit: hit, ea); |
439 | } else { |
440 | rc = NOTIFY_DONE; |
441 | goto out; |
442 | } |
443 | } |
444 | |
445 | /* |
446 | * Return early after invoking user-callback function without restoring |
447 | * DABR if the breakpoint is from ptrace which always operates in |
448 | * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal |
449 | * generated in do_dabr(). |
450 | */ |
451 | if (ptrace_bp) { |
452 | for (i = 0; i < nr_wp_slots(); i++) { |
453 | if (!hit[i] || !is_ptrace_bp(bp: bp[i])) |
454 | continue; |
455 | perf_bp_event(event: bp[i], data: regs); |
456 | bp[i] = NULL; |
457 | } |
458 | rc = NOTIFY_DONE; |
459 | goto reset; |
460 | } |
461 | |
462 | if (!IS_ENABLED(CONFIG_PPC_8xx)) { |
463 | if (is_larx_stcx_instr(type)) { |
464 | for (i = 0; i < nr_wp_slots(); i++) { |
465 | if (!hit[i]) |
466 | continue; |
467 | larx_stcx_err(bp: bp[i]); |
468 | bp[i] = NULL; |
469 | } |
470 | goto reset; |
471 | } |
472 | |
473 | if (!stepping_handler(regs, bp, hit, instr)) |
474 | goto reset; |
475 | } |
476 | |
477 | /* |
478 | * As a policy, the callback is invoked in a 'trigger-after-execute' |
479 | * fashion |
480 | */ |
481 | for (i = 0; i < nr_wp_slots(); i++) { |
482 | if (!hit[i]) |
483 | continue; |
484 | if (!(counter_arch_bp(bp[i])->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) |
485 | perf_bp_event(event: bp[i], data: regs); |
486 | } |
487 | |
488 | reset: |
489 | for (i = 0; i < nr_wp_slots(); i++) { |
490 | if (!bp[i]) |
491 | continue; |
492 | __set_breakpoint(i, counter_arch_bp(bp: bp[i])); |
493 | } |
494 | |
495 | out: |
496 | rcu_read_unlock(); |
497 | return rc; |
498 | } |
499 | NOKPROBE_SYMBOL(hw_breakpoint_handler); |
500 | |
501 | /* |
502 | * Handle single-step exceptions following a DABR hit. |
503 | * |
504 | * Called in atomic context. |
505 | */ |
506 | static int single_step_dabr_instruction(struct die_args *args) |
507 | { |
508 | struct pt_regs *regs = args->regs; |
509 | bool found = false; |
510 | |
511 | /* |
512 | * Check if we are single-stepping as a result of a |
513 | * previous HW Breakpoint exception |
514 | */ |
515 | for (int i = 0; i < nr_wp_slots(); i++) { |
516 | struct perf_event *bp; |
517 | struct arch_hw_breakpoint *info; |
518 | |
519 | bp = __this_cpu_read(bp_per_reg[i]); |
520 | |
521 | if (!bp) |
522 | continue; |
523 | |
524 | info = counter_arch_bp(bp); |
525 | |
526 | if (!info->perf_single_step) |
527 | continue; |
528 | |
529 | found = true; |
530 | |
531 | /* |
532 | * We shall invoke the user-defined callback function in the |
533 | * single stepping handler to confirm to 'trigger-after-execute' |
534 | * semantics |
535 | */ |
536 | if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) |
537 | perf_bp_event(event: bp, data: regs); |
538 | |
539 | info->perf_single_step = false; |
540 | __set_breakpoint(i, counter_arch_bp(bp)); |
541 | } |
542 | |
543 | /* |
544 | * If the process was being single-stepped by ptrace, let the |
545 | * other single-step actions occur (e.g. generate SIGTRAP). |
546 | */ |
547 | if (!found || test_thread_flag(TIF_SINGLESTEP)) |
548 | return NOTIFY_DONE; |
549 | |
550 | return NOTIFY_STOP; |
551 | } |
552 | NOKPROBE_SYMBOL(single_step_dabr_instruction); |
553 | |
554 | /* |
555 | * Handle debug exception notifications. |
556 | * |
557 | * Called in atomic context. |
558 | */ |
559 | int hw_breakpoint_exceptions_notify( |
560 | struct notifier_block *unused, unsigned long val, void *data) |
561 | { |
562 | int ret = NOTIFY_DONE; |
563 | |
564 | switch (val) { |
565 | case DIE_DABR_MATCH: |
566 | ret = hw_breakpoint_handler(args: data); |
567 | break; |
568 | case DIE_SSTEP: |
569 | ret = single_step_dabr_instruction(args: data); |
570 | break; |
571 | } |
572 | |
573 | return ret; |
574 | } |
575 | NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify); |
576 | |
577 | /* |
578 | * Release the user breakpoints used by ptrace |
579 | */ |
580 | void flush_ptrace_hw_breakpoint(struct task_struct *tsk) |
581 | { |
582 | int i; |
583 | struct thread_struct *t = &tsk->thread; |
584 | |
585 | for (i = 0; i < nr_wp_slots(); i++) { |
586 | unregister_hw_breakpoint(bp: t->ptrace_bps[i]); |
587 | t->ptrace_bps[i] = NULL; |
588 | } |
589 | } |
590 | |
591 | void hw_breakpoint_pmu_read(struct perf_event *bp) |
592 | { |
593 | /* TODO */ |
594 | } |
595 | |
596 | void ptrace_triggered(struct perf_event *bp, |
597 | struct perf_sample_data *data, struct pt_regs *regs) |
598 | { |
599 | struct perf_event_attr attr; |
600 | |
601 | /* |
602 | * Disable the breakpoint request here since ptrace has defined a |
603 | * one-shot behaviour for breakpoint exceptions in PPC64. |
604 | * The SIGTRAP signal is generated automatically for us in do_dabr(). |
605 | * We don't have to do anything about that here |
606 | */ |
607 | attr = bp->attr; |
608 | attr.disabled = true; |
609 | modify_user_hw_breakpoint(bp, attr: &attr); |
610 | } |
611 | |