1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * uprobes-based tracing events |
4 | * |
5 | * Copyright (C) IBM Corporation, 2010-2012 |
6 | * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com> |
7 | */ |
8 | #define pr_fmt(fmt) "trace_uprobe: " fmt |
9 | |
10 | #include <linux/bpf-cgroup.h> |
11 | #include <linux/security.h> |
12 | #include <linux/ctype.h> |
13 | #include <linux/module.h> |
14 | #include <linux/uaccess.h> |
15 | #include <linux/uprobes.h> |
16 | #include <linux/namei.h> |
17 | #include <linux/string.h> |
18 | #include <linux/rculist.h> |
19 | #include <linux/filter.h> |
20 | |
21 | #include "trace_dynevent.h" |
22 | #include "trace_probe.h" |
23 | #include "trace_probe_tmpl.h" |
24 | |
25 | #define UPROBE_EVENT_SYSTEM "uprobes" |
26 | |
27 | struct uprobe_trace_entry_head { |
28 | struct trace_entry ent; |
29 | unsigned long vaddr[]; |
30 | }; |
31 | |
32 | #define SIZEOF_TRACE_ENTRY(is_return) \ |
33 | (sizeof(struct uprobe_trace_entry_head) + \ |
34 | sizeof(unsigned long) * (is_return ? 2 : 1)) |
35 | |
36 | #define DATAOF_TRACE_ENTRY(entry, is_return) \ |
37 | ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return)) |
38 | |
39 | static int trace_uprobe_create(const char *raw_command); |
40 | static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev); |
41 | static int trace_uprobe_release(struct dyn_event *ev); |
42 | static bool trace_uprobe_is_busy(struct dyn_event *ev); |
43 | static bool trace_uprobe_match(const char *system, const char *event, |
44 | int argc, const char **argv, struct dyn_event *ev); |
45 | |
46 | static struct dyn_event_operations trace_uprobe_ops = { |
47 | .create = trace_uprobe_create, |
48 | .show = trace_uprobe_show, |
49 | .is_busy = trace_uprobe_is_busy, |
50 | .free = trace_uprobe_release, |
51 | .match = trace_uprobe_match, |
52 | }; |
53 | |
54 | /* |
55 | * uprobe event core functions |
56 | */ |
57 | struct trace_uprobe { |
58 | struct dyn_event devent; |
59 | struct uprobe_consumer consumer; |
60 | struct path path; |
61 | struct inode *inode; |
62 | char *filename; |
63 | unsigned long offset; |
64 | unsigned long ref_ctr_offset; |
65 | unsigned long nhit; |
66 | struct trace_probe tp; |
67 | }; |
68 | |
69 | static bool is_trace_uprobe(struct dyn_event *ev) |
70 | { |
71 | return ev->ops == &trace_uprobe_ops; |
72 | } |
73 | |
74 | static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev) |
75 | { |
76 | return container_of(ev, struct trace_uprobe, devent); |
77 | } |
78 | |
79 | /** |
80 | * for_each_trace_uprobe - iterate over the trace_uprobe list |
81 | * @pos: the struct trace_uprobe * for each entry |
82 | * @dpos: the struct dyn_event * to use as a loop cursor |
83 | */ |
84 | #define for_each_trace_uprobe(pos, dpos) \ |
85 | for_each_dyn_event(dpos) \ |
86 | if (is_trace_uprobe(dpos) && (pos = to_trace_uprobe(dpos))) |
87 | |
88 | static int register_uprobe_event(struct trace_uprobe *tu); |
89 | static int unregister_uprobe_event(struct trace_uprobe *tu); |
90 | |
91 | static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); |
92 | static int uretprobe_dispatcher(struct uprobe_consumer *con, |
93 | unsigned long func, struct pt_regs *regs); |
94 | |
95 | #ifdef CONFIG_STACK_GROWSUP |
96 | static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) |
97 | { |
98 | return addr - (n * sizeof(long)); |
99 | } |
100 | #else |
101 | static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) |
102 | { |
103 | return addr + (n * sizeof(long)); |
104 | } |
105 | #endif |
106 | |
107 | static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n) |
108 | { |
109 | unsigned long ret; |
110 | unsigned long addr = user_stack_pointer(regs); |
111 | |
112 | addr = adjust_stack_addr(addr, n); |
113 | |
114 | if (copy_from_user(to: &ret, from: (void __force __user *) addr, n: sizeof(ret))) |
115 | return 0; |
116 | |
117 | return ret; |
118 | } |
119 | |
120 | /* |
121 | * Uprobes-specific fetch functions |
122 | */ |
123 | static nokprobe_inline int |
124 | probe_mem_read(void *dest, void *src, size_t size) |
125 | { |
126 | void __user *vaddr = (void __force __user *)src; |
127 | |
128 | return copy_from_user(to: dest, from: vaddr, n: size) ? -EFAULT : 0; |
129 | } |
130 | |
131 | static nokprobe_inline int |
132 | probe_mem_read_user(void *dest, void *src, size_t size) |
133 | { |
134 | return probe_mem_read(dest, src, size); |
135 | } |
136 | |
137 | /* |
138 | * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max |
139 | * length and relative data location. |
140 | */ |
141 | static nokprobe_inline int |
142 | fetch_store_string(unsigned long addr, void *dest, void *base) |
143 | { |
144 | long ret; |
145 | u32 loc = *(u32 *)dest; |
146 | int maxlen = get_loc_len(loc); |
147 | u8 *dst = get_loc_data(dl: dest, ent: base); |
148 | void __user *src = (void __force __user *) addr; |
149 | |
150 | if (unlikely(!maxlen)) |
151 | return -ENOMEM; |
152 | |
153 | if (addr == FETCH_TOKEN_COMM) |
154 | ret = strscpy(dst, current->comm, maxlen); |
155 | else |
156 | ret = strncpy_from_user(dst, src, count: maxlen); |
157 | if (ret >= 0) { |
158 | if (ret == maxlen) |
159 | dst[ret - 1] = '\0'; |
160 | else |
161 | /* |
162 | * Include the terminating null byte. In this case it |
163 | * was copied by strncpy_from_user but not accounted |
164 | * for in ret. |
165 | */ |
166 | ret++; |
167 | *(u32 *)dest = make_data_loc(ret, (void *)dst - base); |
168 | } else |
169 | *(u32 *)dest = make_data_loc(0, (void *)dst - base); |
170 | |
171 | return ret; |
172 | } |
173 | |
174 | static nokprobe_inline int |
175 | fetch_store_string_user(unsigned long addr, void *dest, void *base) |
176 | { |
177 | return fetch_store_string(addr, dest, base); |
178 | } |
179 | |
180 | /* Return the length of string -- including null terminal byte */ |
181 | static nokprobe_inline int |
182 | fetch_store_strlen(unsigned long addr) |
183 | { |
184 | int len; |
185 | void __user *vaddr = (void __force __user *) addr; |
186 | |
187 | if (addr == FETCH_TOKEN_COMM) |
188 | len = strlen(current->comm) + 1; |
189 | else |
190 | len = strnlen_user(str: vaddr, MAX_STRING_SIZE); |
191 | |
192 | return (len > MAX_STRING_SIZE) ? 0 : len; |
193 | } |
194 | |
195 | static nokprobe_inline int |
196 | fetch_store_strlen_user(unsigned long addr) |
197 | { |
198 | return fetch_store_strlen(addr); |
199 | } |
200 | |
201 | static unsigned long translate_user_vaddr(unsigned long file_offset) |
202 | { |
203 | unsigned long base_addr; |
204 | struct uprobe_dispatch_data *udd; |
205 | |
206 | udd = (void *) current->utask->vaddr; |
207 | |
208 | base_addr = udd->bp_addr - udd->tu->offset; |
209 | return base_addr + file_offset; |
210 | } |
211 | |
212 | /* Note that we don't verify it, since the code does not come from user space */ |
213 | static int |
214 | process_fetch_insn(struct fetch_insn *code, void *rec, void *edata, |
215 | void *dest, void *base) |
216 | { |
217 | struct pt_regs *regs = rec; |
218 | unsigned long val; |
219 | int ret; |
220 | |
221 | /* 1st stage: get value from context */ |
222 | switch (code->op) { |
223 | case FETCH_OP_REG: |
224 | val = regs_get_register(regs, offset: code->param); |
225 | break; |
226 | case FETCH_OP_STACK: |
227 | val = get_user_stack_nth(regs, n: code->param); |
228 | break; |
229 | case FETCH_OP_STACKP: |
230 | val = user_stack_pointer(regs); |
231 | break; |
232 | case FETCH_OP_RETVAL: |
233 | val = regs_return_value(regs); |
234 | break; |
235 | case FETCH_OP_COMM: |
236 | val = FETCH_TOKEN_COMM; |
237 | break; |
238 | case FETCH_OP_FOFFS: |
239 | val = translate_user_vaddr(file_offset: code->immediate); |
240 | break; |
241 | default: |
242 | ret = process_common_fetch_insn(code, val: &val); |
243 | if (ret < 0) |
244 | return ret; |
245 | } |
246 | code++; |
247 | |
248 | return process_fetch_insn_bottom(code, val, dest, base); |
249 | } |
250 | NOKPROBE_SYMBOL(process_fetch_insn) |
251 | |
252 | static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) |
253 | { |
254 | rwlock_init(&filter->rwlock); |
255 | filter->nr_systemwide = 0; |
256 | INIT_LIST_HEAD(list: &filter->perf_events); |
257 | } |
258 | |
259 | static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter) |
260 | { |
261 | return !filter->nr_systemwide && list_empty(head: &filter->perf_events); |
262 | } |
263 | |
264 | static inline bool is_ret_probe(struct trace_uprobe *tu) |
265 | { |
266 | return tu->consumer.ret_handler != NULL; |
267 | } |
268 | |
269 | static bool trace_uprobe_is_busy(struct dyn_event *ev) |
270 | { |
271 | struct trace_uprobe *tu = to_trace_uprobe(ev); |
272 | |
273 | return trace_probe_is_enabled(tp: &tu->tp); |
274 | } |
275 | |
276 | static bool trace_uprobe_match_command_head(struct trace_uprobe *tu, |
277 | int argc, const char **argv) |
278 | { |
279 | char buf[MAX_ARGSTR_LEN + 1]; |
280 | int len; |
281 | |
282 | if (!argc) |
283 | return true; |
284 | |
285 | len = strlen(tu->filename); |
286 | if (strncmp(tu->filename, argv[0], len) || argv[0][len] != ':') |
287 | return false; |
288 | |
289 | if (tu->ref_ctr_offset == 0) |
290 | snprintf(buf, size: sizeof(buf), fmt: "0x%0*lx" , |
291 | (int)(sizeof(void *) * 2), tu->offset); |
292 | else |
293 | snprintf(buf, size: sizeof(buf), fmt: "0x%0*lx(0x%lx)" , |
294 | (int)(sizeof(void *) * 2), tu->offset, |
295 | tu->ref_ctr_offset); |
296 | if (strcmp(buf, &argv[0][len + 1])) |
297 | return false; |
298 | |
299 | argc--; argv++; |
300 | |
301 | return trace_probe_match_command_args(tp: &tu->tp, argc, argv); |
302 | } |
303 | |
304 | static bool trace_uprobe_match(const char *system, const char *event, |
305 | int argc, const char **argv, struct dyn_event *ev) |
306 | { |
307 | struct trace_uprobe *tu = to_trace_uprobe(ev); |
308 | |
309 | return (event[0] == '\0' || |
310 | strcmp(trace_probe_name(tp: &tu->tp), event) == 0) && |
311 | (!system || strcmp(trace_probe_group_name(tp: &tu->tp), system) == 0) && |
312 | trace_uprobe_match_command_head(tu, argc, argv); |
313 | } |
314 | |
315 | static nokprobe_inline struct trace_uprobe * |
316 | trace_uprobe_primary_from_call(struct trace_event_call *call) |
317 | { |
318 | struct trace_probe *tp; |
319 | |
320 | tp = trace_probe_primary_from_call(call); |
321 | if (WARN_ON_ONCE(!tp)) |
322 | return NULL; |
323 | |
324 | return container_of(tp, struct trace_uprobe, tp); |
325 | } |
326 | |
327 | /* |
328 | * Allocate new trace_uprobe and initialize it (including uprobes). |
329 | */ |
330 | static struct trace_uprobe * |
331 | alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) |
332 | { |
333 | struct trace_uprobe *tu; |
334 | int ret; |
335 | |
336 | tu = kzalloc(struct_size(tu, tp.args, nargs), GFP_KERNEL); |
337 | if (!tu) |
338 | return ERR_PTR(error: -ENOMEM); |
339 | |
340 | ret = trace_probe_init(tp: &tu->tp, event, group, alloc_filter: true, nargs); |
341 | if (ret < 0) |
342 | goto error; |
343 | |
344 | dyn_event_init(ev: &tu->devent, ops: &trace_uprobe_ops); |
345 | tu->consumer.handler = uprobe_dispatcher; |
346 | if (is_ret) |
347 | tu->consumer.ret_handler = uretprobe_dispatcher; |
348 | init_trace_uprobe_filter(filter: tu->tp.event->filter); |
349 | return tu; |
350 | |
351 | error: |
352 | kfree(objp: tu); |
353 | |
354 | return ERR_PTR(error: ret); |
355 | } |
356 | |
357 | static void free_trace_uprobe(struct trace_uprobe *tu) |
358 | { |
359 | if (!tu) |
360 | return; |
361 | |
362 | path_put(&tu->path); |
363 | trace_probe_cleanup(tp: &tu->tp); |
364 | kfree(objp: tu->filename); |
365 | kfree(objp: tu); |
366 | } |
367 | |
368 | static struct trace_uprobe *find_probe_event(const char *event, const char *group) |
369 | { |
370 | struct dyn_event *pos; |
371 | struct trace_uprobe *tu; |
372 | |
373 | for_each_trace_uprobe(tu, pos) |
374 | if (strcmp(trace_probe_name(tp: &tu->tp), event) == 0 && |
375 | strcmp(trace_probe_group_name(tp: &tu->tp), group) == 0) |
376 | return tu; |
377 | |
378 | return NULL; |
379 | } |
380 | |
381 | /* Unregister a trace_uprobe and probe_event */ |
382 | static int unregister_trace_uprobe(struct trace_uprobe *tu) |
383 | { |
384 | int ret; |
385 | |
386 | if (trace_probe_has_sibling(tp: &tu->tp)) |
387 | goto unreg; |
388 | |
389 | /* If there's a reference to the dynamic event */ |
390 | if (trace_event_dyn_busy(call: trace_probe_event_call(tp: &tu->tp))) |
391 | return -EBUSY; |
392 | |
393 | ret = unregister_uprobe_event(tu); |
394 | if (ret) |
395 | return ret; |
396 | |
397 | unreg: |
398 | dyn_event_remove(ev: &tu->devent); |
399 | trace_probe_unlink(tp: &tu->tp); |
400 | free_trace_uprobe(tu); |
401 | return 0; |
402 | } |
403 | |
404 | static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig, |
405 | struct trace_uprobe *comp) |
406 | { |
407 | struct trace_probe_event *tpe = orig->tp.event; |
408 | struct inode *comp_inode = d_real_inode(dentry: comp->path.dentry); |
409 | int i; |
410 | |
411 | list_for_each_entry(orig, &tpe->probes, tp.list) { |
412 | if (comp_inode != d_real_inode(dentry: orig->path.dentry) || |
413 | comp->offset != orig->offset) |
414 | continue; |
415 | |
416 | /* |
417 | * trace_probe_compare_arg_type() ensured that nr_args and |
418 | * each argument name and type are same. Let's compare comm. |
419 | */ |
420 | for (i = 0; i < orig->tp.nr_args; i++) { |
421 | if (strcmp(orig->tp.args[i].comm, |
422 | comp->tp.args[i].comm)) |
423 | break; |
424 | } |
425 | |
426 | if (i == orig->tp.nr_args) |
427 | return true; |
428 | } |
429 | |
430 | return false; |
431 | } |
432 | |
433 | static int append_trace_uprobe(struct trace_uprobe *tu, struct trace_uprobe *to) |
434 | { |
435 | int ret; |
436 | |
437 | ret = trace_probe_compare_arg_type(a: &tu->tp, b: &to->tp); |
438 | if (ret) { |
439 | /* Note that argument starts index = 2 */ |
440 | trace_probe_log_set_index(index: ret + 1); |
441 | trace_probe_log_err(0, DIFF_ARG_TYPE); |
442 | return -EEXIST; |
443 | } |
444 | if (trace_uprobe_has_same_uprobe(orig: to, comp: tu)) { |
445 | trace_probe_log_set_index(index: 0); |
446 | trace_probe_log_err(0, SAME_PROBE); |
447 | return -EEXIST; |
448 | } |
449 | |
450 | /* Append to existing event */ |
451 | ret = trace_probe_append(tp: &tu->tp, to: &to->tp); |
452 | if (!ret) |
453 | dyn_event_add(ev: &tu->devent, call: trace_probe_event_call(tp: &tu->tp)); |
454 | |
455 | return ret; |
456 | } |
457 | |
458 | /* |
459 | * Uprobe with multiple reference counter is not allowed. i.e. |
460 | * If inode and offset matches, reference counter offset *must* |
461 | * match as well. Though, there is one exception: If user is |
462 | * replacing old trace_uprobe with new one(same group/event), |
463 | * then we allow same uprobe with new reference counter as far |
464 | * as the new one does not conflict with any other existing |
465 | * ones. |
466 | */ |
467 | static int validate_ref_ctr_offset(struct trace_uprobe *new) |
468 | { |
469 | struct dyn_event *pos; |
470 | struct trace_uprobe *tmp; |
471 | struct inode *new_inode = d_real_inode(dentry: new->path.dentry); |
472 | |
473 | for_each_trace_uprobe(tmp, pos) { |
474 | if (new_inode == d_real_inode(dentry: tmp->path.dentry) && |
475 | new->offset == tmp->offset && |
476 | new->ref_ctr_offset != tmp->ref_ctr_offset) { |
477 | pr_warn("Reference counter offset mismatch." ); |
478 | return -EINVAL; |
479 | } |
480 | } |
481 | return 0; |
482 | } |
483 | |
484 | /* Register a trace_uprobe and probe_event */ |
485 | static int register_trace_uprobe(struct trace_uprobe *tu) |
486 | { |
487 | struct trace_uprobe *old_tu; |
488 | int ret; |
489 | |
490 | mutex_lock(&event_mutex); |
491 | |
492 | ret = validate_ref_ctr_offset(new: tu); |
493 | if (ret) |
494 | goto end; |
495 | |
496 | /* register as an event */ |
497 | old_tu = find_probe_event(event: trace_probe_name(tp: &tu->tp), |
498 | group: trace_probe_group_name(tp: &tu->tp)); |
499 | if (old_tu) { |
500 | if (is_ret_probe(tu) != is_ret_probe(tu: old_tu)) { |
501 | trace_probe_log_set_index(index: 0); |
502 | trace_probe_log_err(0, DIFF_PROBE_TYPE); |
503 | ret = -EEXIST; |
504 | } else { |
505 | ret = append_trace_uprobe(tu, to: old_tu); |
506 | } |
507 | goto end; |
508 | } |
509 | |
510 | ret = register_uprobe_event(tu); |
511 | if (ret) { |
512 | if (ret == -EEXIST) { |
513 | trace_probe_log_set_index(index: 0); |
514 | trace_probe_log_err(0, EVENT_EXIST); |
515 | } else |
516 | pr_warn("Failed to register probe event(%d)\n" , ret); |
517 | goto end; |
518 | } |
519 | |
520 | dyn_event_add(ev: &tu->devent, call: trace_probe_event_call(tp: &tu->tp)); |
521 | |
522 | end: |
523 | mutex_unlock(lock: &event_mutex); |
524 | |
525 | return ret; |
526 | } |
527 | |
528 | /* |
529 | * Argument syntax: |
530 | * - Add uprobe: p|r[:[GRP/][EVENT]] PATH:OFFSET[%return][(REF)] [FETCHARGS] |
531 | */ |
532 | static int __trace_uprobe_create(int argc, const char **argv) |
533 | { |
534 | struct trace_uprobe *tu; |
535 | const char *event = NULL, *group = UPROBE_EVENT_SYSTEM; |
536 | char *arg, *filename, *rctr, *rctr_end, *tmp; |
537 | char buf[MAX_EVENT_NAME_LEN]; |
538 | char gbuf[MAX_EVENT_NAME_LEN]; |
539 | enum probe_print_type ptype; |
540 | struct path path; |
541 | unsigned long offset, ref_ctr_offset; |
542 | bool is_return = false; |
543 | int i, ret; |
544 | |
545 | ref_ctr_offset = 0; |
546 | |
547 | switch (argv[0][0]) { |
548 | case 'r': |
549 | is_return = true; |
550 | break; |
551 | case 'p': |
552 | break; |
553 | default: |
554 | return -ECANCELED; |
555 | } |
556 | |
557 | if (argc < 2) |
558 | return -ECANCELED; |
559 | |
560 | if (argv[0][1] == ':') |
561 | event = &argv[0][2]; |
562 | |
563 | if (!strchr(argv[1], '/')) |
564 | return -ECANCELED; |
565 | |
566 | filename = kstrdup(s: argv[1], GFP_KERNEL); |
567 | if (!filename) |
568 | return -ENOMEM; |
569 | |
570 | /* Find the last occurrence, in case the path contains ':' too. */ |
571 | arg = strrchr(filename, ':'); |
572 | if (!arg || !isdigit(c: arg[1])) { |
573 | kfree(objp: filename); |
574 | return -ECANCELED; |
575 | } |
576 | |
577 | trace_probe_log_init(subsystem: "trace_uprobe" , argc, argv); |
578 | trace_probe_log_set_index(index: 1); /* filename is the 2nd argument */ |
579 | |
580 | *arg++ = '\0'; |
581 | ret = kern_path(filename, LOOKUP_FOLLOW, &path); |
582 | if (ret) { |
583 | trace_probe_log_err(0, FILE_NOT_FOUND); |
584 | kfree(objp: filename); |
585 | trace_probe_log_clear(); |
586 | return ret; |
587 | } |
588 | if (!d_is_reg(dentry: path.dentry)) { |
589 | trace_probe_log_err(0, NO_REGULAR_FILE); |
590 | ret = -EINVAL; |
591 | goto fail_address_parse; |
592 | } |
593 | |
594 | /* Parse reference counter offset if specified. */ |
595 | rctr = strchr(arg, '('); |
596 | if (rctr) { |
597 | rctr_end = strchr(rctr, ')'); |
598 | if (!rctr_end) { |
599 | ret = -EINVAL; |
600 | rctr_end = rctr + strlen(rctr); |
601 | trace_probe_log_err(rctr_end - filename, |
602 | REFCNT_OPEN_BRACE); |
603 | goto fail_address_parse; |
604 | } else if (rctr_end[1] != '\0') { |
605 | ret = -EINVAL; |
606 | trace_probe_log_err(rctr_end + 1 - filename, |
607 | BAD_REFCNT_SUFFIX); |
608 | goto fail_address_parse; |
609 | } |
610 | |
611 | *rctr++ = '\0'; |
612 | *rctr_end = '\0'; |
613 | ret = kstrtoul(s: rctr, base: 0, res: &ref_ctr_offset); |
614 | if (ret) { |
615 | trace_probe_log_err(rctr - filename, BAD_REFCNT); |
616 | goto fail_address_parse; |
617 | } |
618 | } |
619 | |
620 | /* Check if there is %return suffix */ |
621 | tmp = strchr(arg, '%'); |
622 | if (tmp) { |
623 | if (!strcmp(tmp, "%return" )) { |
624 | *tmp = '\0'; |
625 | is_return = true; |
626 | } else { |
627 | trace_probe_log_err(tmp - filename, BAD_ADDR_SUFFIX); |
628 | ret = -EINVAL; |
629 | goto fail_address_parse; |
630 | } |
631 | } |
632 | |
633 | /* Parse uprobe offset. */ |
634 | ret = kstrtoul(s: arg, base: 0, res: &offset); |
635 | if (ret) { |
636 | trace_probe_log_err(arg - filename, BAD_UPROBE_OFFS); |
637 | goto fail_address_parse; |
638 | } |
639 | |
640 | /* setup a probe */ |
641 | trace_probe_log_set_index(index: 0); |
642 | if (event) { |
643 | ret = traceprobe_parse_event_name(pevent: &event, pgroup: &group, buf: gbuf, |
644 | offset: event - argv[0]); |
645 | if (ret) |
646 | goto fail_address_parse; |
647 | } |
648 | |
649 | if (!event) { |
650 | char *tail; |
651 | char *ptr; |
652 | |
653 | tail = kstrdup(s: kbasename(path: filename), GFP_KERNEL); |
654 | if (!tail) { |
655 | ret = -ENOMEM; |
656 | goto fail_address_parse; |
657 | } |
658 | |
659 | ptr = strpbrk(tail, ".-_" ); |
660 | if (ptr) |
661 | *ptr = '\0'; |
662 | |
663 | snprintf(buf, MAX_EVENT_NAME_LEN, fmt: "%c_%s_0x%lx" , 'p', tail, offset); |
664 | event = buf; |
665 | kfree(objp: tail); |
666 | } |
667 | |
668 | argc -= 2; |
669 | argv += 2; |
670 | |
671 | tu = alloc_trace_uprobe(group, event, nargs: argc, is_ret: is_return); |
672 | if (IS_ERR(ptr: tu)) { |
673 | ret = PTR_ERR(ptr: tu); |
674 | /* This must return -ENOMEM otherwise there is a bug */ |
675 | WARN_ON_ONCE(ret != -ENOMEM); |
676 | goto fail_address_parse; |
677 | } |
678 | tu->offset = offset; |
679 | tu->ref_ctr_offset = ref_ctr_offset; |
680 | tu->path = path; |
681 | tu->filename = filename; |
682 | |
683 | /* parse arguments */ |
684 | for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { |
685 | struct traceprobe_parse_context ctx = { |
686 | .flags = (is_return ? TPARG_FL_RETURN : 0) | TPARG_FL_USER, |
687 | }; |
688 | |
689 | trace_probe_log_set_index(index: i + 2); |
690 | ret = traceprobe_parse_probe_arg(tp: &tu->tp, i, argv: argv[i], ctx: &ctx); |
691 | traceprobe_finish_parse(ctx: &ctx); |
692 | if (ret) |
693 | goto error; |
694 | } |
695 | |
696 | ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL; |
697 | ret = traceprobe_set_print_fmt(tp: &tu->tp, ptype); |
698 | if (ret < 0) |
699 | goto error; |
700 | |
701 | ret = register_trace_uprobe(tu); |
702 | if (!ret) |
703 | goto out; |
704 | |
705 | error: |
706 | free_trace_uprobe(tu); |
707 | out: |
708 | trace_probe_log_clear(); |
709 | return ret; |
710 | |
711 | fail_address_parse: |
712 | trace_probe_log_clear(); |
713 | path_put(&path); |
714 | kfree(objp: filename); |
715 | |
716 | return ret; |
717 | } |
718 | |
719 | int trace_uprobe_create(const char *raw_command) |
720 | { |
721 | return trace_probe_create(raw_command, createfn: __trace_uprobe_create); |
722 | } |
723 | |
724 | static int create_or_delete_trace_uprobe(const char *raw_command) |
725 | { |
726 | int ret; |
727 | |
728 | if (raw_command[0] == '-') |
729 | return dyn_event_release(raw_command, type: &trace_uprobe_ops); |
730 | |
731 | ret = trace_uprobe_create(raw_command); |
732 | return ret == -ECANCELED ? -EINVAL : ret; |
733 | } |
734 | |
735 | static int trace_uprobe_release(struct dyn_event *ev) |
736 | { |
737 | struct trace_uprobe *tu = to_trace_uprobe(ev); |
738 | |
739 | return unregister_trace_uprobe(tu); |
740 | } |
741 | |
742 | /* Probes listing interfaces */ |
743 | static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev) |
744 | { |
745 | struct trace_uprobe *tu = to_trace_uprobe(ev); |
746 | char c = is_ret_probe(tu) ? 'r' : 'p'; |
747 | int i; |
748 | |
749 | seq_printf(m, fmt: "%c:%s/%s %s:0x%0*lx" , c, trace_probe_group_name(tp: &tu->tp), |
750 | trace_probe_name(tp: &tu->tp), tu->filename, |
751 | (int)(sizeof(void *) * 2), tu->offset); |
752 | |
753 | if (tu->ref_ctr_offset) |
754 | seq_printf(m, fmt: "(0x%lx)" , tu->ref_ctr_offset); |
755 | |
756 | for (i = 0; i < tu->tp.nr_args; i++) |
757 | seq_printf(m, fmt: " %s=%s" , tu->tp.args[i].name, tu->tp.args[i].comm); |
758 | |
759 | seq_putc(m, c: '\n'); |
760 | return 0; |
761 | } |
762 | |
763 | static int probes_seq_show(struct seq_file *m, void *v) |
764 | { |
765 | struct dyn_event *ev = v; |
766 | |
767 | if (!is_trace_uprobe(ev)) |
768 | return 0; |
769 | |
770 | return trace_uprobe_show(m, ev); |
771 | } |
772 | |
773 | static const struct seq_operations probes_seq_op = { |
774 | .start = dyn_event_seq_start, |
775 | .next = dyn_event_seq_next, |
776 | .stop = dyn_event_seq_stop, |
777 | .show = probes_seq_show |
778 | }; |
779 | |
780 | static int probes_open(struct inode *inode, struct file *file) |
781 | { |
782 | int ret; |
783 | |
784 | ret = security_locked_down(what: LOCKDOWN_TRACEFS); |
785 | if (ret) |
786 | return ret; |
787 | |
788 | if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { |
789 | ret = dyn_events_release_all(type: &trace_uprobe_ops); |
790 | if (ret) |
791 | return ret; |
792 | } |
793 | |
794 | return seq_open(file, &probes_seq_op); |
795 | } |
796 | |
797 | static ssize_t probes_write(struct file *file, const char __user *buffer, |
798 | size_t count, loff_t *ppos) |
799 | { |
800 | return trace_parse_run_command(file, buffer, count, ppos, |
801 | createfn: create_or_delete_trace_uprobe); |
802 | } |
803 | |
804 | static const struct file_operations uprobe_events_ops = { |
805 | .owner = THIS_MODULE, |
806 | .open = probes_open, |
807 | .read = seq_read, |
808 | .llseek = seq_lseek, |
809 | .release = seq_release, |
810 | .write = probes_write, |
811 | }; |
812 | |
813 | /* Probes profiling interfaces */ |
814 | static int probes_profile_seq_show(struct seq_file *m, void *v) |
815 | { |
816 | struct dyn_event *ev = v; |
817 | struct trace_uprobe *tu; |
818 | |
819 | if (!is_trace_uprobe(ev)) |
820 | return 0; |
821 | |
822 | tu = to_trace_uprobe(ev); |
823 | seq_printf(m, fmt: " %s %-44s %15lu\n" , tu->filename, |
824 | trace_probe_name(tp: &tu->tp), tu->nhit); |
825 | return 0; |
826 | } |
827 | |
828 | static const struct seq_operations profile_seq_op = { |
829 | .start = dyn_event_seq_start, |
830 | .next = dyn_event_seq_next, |
831 | .stop = dyn_event_seq_stop, |
832 | .show = probes_profile_seq_show |
833 | }; |
834 | |
835 | static int profile_open(struct inode *inode, struct file *file) |
836 | { |
837 | int ret; |
838 | |
839 | ret = security_locked_down(what: LOCKDOWN_TRACEFS); |
840 | if (ret) |
841 | return ret; |
842 | |
843 | return seq_open(file, &profile_seq_op); |
844 | } |
845 | |
846 | static const struct file_operations uprobe_profile_ops = { |
847 | .owner = THIS_MODULE, |
848 | .open = profile_open, |
849 | .read = seq_read, |
850 | .llseek = seq_lseek, |
851 | .release = seq_release, |
852 | }; |
853 | |
854 | struct uprobe_cpu_buffer { |
855 | struct mutex mutex; |
856 | void *buf; |
857 | }; |
858 | static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; |
859 | static int uprobe_buffer_refcnt; |
860 | |
861 | static int uprobe_buffer_init(void) |
862 | { |
863 | int cpu, err_cpu; |
864 | |
865 | uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer); |
866 | if (uprobe_cpu_buffer == NULL) |
867 | return -ENOMEM; |
868 | |
869 | for_each_possible_cpu(cpu) { |
870 | struct page *p = alloc_pages_node(cpu_to_node(cpu), |
871 | GFP_KERNEL, order: 0); |
872 | if (p == NULL) { |
873 | err_cpu = cpu; |
874 | goto err; |
875 | } |
876 | per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf = page_address(p); |
877 | mutex_init(&per_cpu_ptr(uprobe_cpu_buffer, cpu)->mutex); |
878 | } |
879 | |
880 | return 0; |
881 | |
882 | err: |
883 | for_each_possible_cpu(cpu) { |
884 | if (cpu == err_cpu) |
885 | break; |
886 | free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf); |
887 | } |
888 | |
889 | free_percpu(pdata: uprobe_cpu_buffer); |
890 | return -ENOMEM; |
891 | } |
892 | |
893 | static int uprobe_buffer_enable(void) |
894 | { |
895 | int ret = 0; |
896 | |
897 | BUG_ON(!mutex_is_locked(&event_mutex)); |
898 | |
899 | if (uprobe_buffer_refcnt++ == 0) { |
900 | ret = uprobe_buffer_init(); |
901 | if (ret < 0) |
902 | uprobe_buffer_refcnt--; |
903 | } |
904 | |
905 | return ret; |
906 | } |
907 | |
908 | static void uprobe_buffer_disable(void) |
909 | { |
910 | int cpu; |
911 | |
912 | BUG_ON(!mutex_is_locked(&event_mutex)); |
913 | |
914 | if (--uprobe_buffer_refcnt == 0) { |
915 | for_each_possible_cpu(cpu) |
916 | free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, |
917 | cpu)->buf); |
918 | |
919 | free_percpu(pdata: uprobe_cpu_buffer); |
920 | uprobe_cpu_buffer = NULL; |
921 | } |
922 | } |
923 | |
924 | static struct uprobe_cpu_buffer *uprobe_buffer_get(void) |
925 | { |
926 | struct uprobe_cpu_buffer *ucb; |
927 | int cpu; |
928 | |
929 | cpu = raw_smp_processor_id(); |
930 | ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu); |
931 | |
932 | /* |
933 | * Use per-cpu buffers for fastest access, but we might migrate |
934 | * so the mutex makes sure we have sole access to it. |
935 | */ |
936 | mutex_lock(&ucb->mutex); |
937 | |
938 | return ucb; |
939 | } |
940 | |
941 | static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) |
942 | { |
943 | mutex_unlock(lock: &ucb->mutex); |
944 | } |
945 | |
946 | static void __uprobe_trace_func(struct trace_uprobe *tu, |
947 | unsigned long func, struct pt_regs *regs, |
948 | struct uprobe_cpu_buffer *ucb, int dsize, |
949 | struct trace_event_file *trace_file) |
950 | { |
951 | struct uprobe_trace_entry_head *entry; |
952 | struct trace_event_buffer fbuffer; |
953 | void *data; |
954 | int size, esize; |
955 | struct trace_event_call *call = trace_probe_event_call(tp: &tu->tp); |
956 | |
957 | WARN_ON(call != trace_file->event_call); |
958 | |
959 | if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE)) |
960 | return; |
961 | |
962 | if (trace_trigger_soft_disabled(file: trace_file)) |
963 | return; |
964 | |
965 | esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); |
966 | size = esize + tu->tp.size + dsize; |
967 | entry = trace_event_buffer_reserve(fbuffer: &fbuffer, trace_file, len: size); |
968 | if (!entry) |
969 | return; |
970 | |
971 | if (is_ret_probe(tu)) { |
972 | entry->vaddr[0] = func; |
973 | entry->vaddr[1] = instruction_pointer(regs); |
974 | data = DATAOF_TRACE_ENTRY(entry, true); |
975 | } else { |
976 | entry->vaddr[0] = instruction_pointer(regs); |
977 | data = DATAOF_TRACE_ENTRY(entry, false); |
978 | } |
979 | |
980 | memcpy(data, ucb->buf, tu->tp.size + dsize); |
981 | |
982 | trace_event_buffer_commit(fbuffer: &fbuffer); |
983 | } |
984 | |
985 | /* uprobe handler */ |
986 | static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, |
987 | struct uprobe_cpu_buffer *ucb, int dsize) |
988 | { |
989 | struct event_file_link *link; |
990 | |
991 | if (is_ret_probe(tu)) |
992 | return 0; |
993 | |
994 | rcu_read_lock(); |
995 | trace_probe_for_each_link_rcu(link, &tu->tp) |
996 | __uprobe_trace_func(tu, func: 0, regs, ucb, dsize, trace_file: link->file); |
997 | rcu_read_unlock(); |
998 | |
999 | return 0; |
1000 | } |
1001 | |
1002 | static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, |
1003 | struct pt_regs *regs, |
1004 | struct uprobe_cpu_buffer *ucb, int dsize) |
1005 | { |
1006 | struct event_file_link *link; |
1007 | |
1008 | rcu_read_lock(); |
1009 | trace_probe_for_each_link_rcu(link, &tu->tp) |
1010 | __uprobe_trace_func(tu, func, regs, ucb, dsize, trace_file: link->file); |
1011 | rcu_read_unlock(); |
1012 | } |
1013 | |
1014 | /* Event entry printers */ |
1015 | static enum print_line_t |
1016 | print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) |
1017 | { |
1018 | struct uprobe_trace_entry_head *entry; |
1019 | struct trace_seq *s = &iter->seq; |
1020 | struct trace_uprobe *tu; |
1021 | u8 *data; |
1022 | |
1023 | entry = (struct uprobe_trace_entry_head *)iter->ent; |
1024 | tu = trace_uprobe_primary_from_call( |
1025 | container_of(event, struct trace_event_call, event)); |
1026 | if (unlikely(!tu)) |
1027 | goto out; |
1028 | |
1029 | if (is_ret_probe(tu)) { |
1030 | trace_seq_printf(s, fmt: "%s: (0x%lx <- 0x%lx)" , |
1031 | trace_probe_name(tp: &tu->tp), |
1032 | entry->vaddr[1], entry->vaddr[0]); |
1033 | data = DATAOF_TRACE_ENTRY(entry, true); |
1034 | } else { |
1035 | trace_seq_printf(s, fmt: "%s: (0x%lx)" , |
1036 | trace_probe_name(tp: &tu->tp), |
1037 | entry->vaddr[0]); |
1038 | data = DATAOF_TRACE_ENTRY(entry, false); |
1039 | } |
1040 | |
1041 | if (trace_probe_print_args(s, args: tu->tp.args, nr_args: tu->tp.nr_args, data, field: entry) < 0) |
1042 | goto out; |
1043 | |
1044 | trace_seq_putc(s, c: '\n'); |
1045 | |
1046 | out: |
1047 | return trace_handle_return(s); |
1048 | } |
1049 | |
1050 | typedef bool (*filter_func_t)(struct uprobe_consumer *self, |
1051 | enum uprobe_filter_ctx ctx, |
1052 | struct mm_struct *mm); |
1053 | |
1054 | static int trace_uprobe_enable(struct trace_uprobe *tu, filter_func_t filter) |
1055 | { |
1056 | int ret; |
1057 | |
1058 | tu->consumer.filter = filter; |
1059 | tu->inode = d_real_inode(dentry: tu->path.dentry); |
1060 | |
1061 | if (tu->ref_ctr_offset) |
1062 | ret = uprobe_register_refctr(inode: tu->inode, offset: tu->offset, |
1063 | ref_ctr_offset: tu->ref_ctr_offset, uc: &tu->consumer); |
1064 | else |
1065 | ret = uprobe_register(inode: tu->inode, offset: tu->offset, uc: &tu->consumer); |
1066 | |
1067 | if (ret) |
1068 | tu->inode = NULL; |
1069 | |
1070 | return ret; |
1071 | } |
1072 | |
1073 | static void __probe_event_disable(struct trace_probe *tp) |
1074 | { |
1075 | struct trace_uprobe *tu; |
1076 | |
1077 | tu = container_of(tp, struct trace_uprobe, tp); |
1078 | WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter)); |
1079 | |
1080 | list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { |
1081 | if (!tu->inode) |
1082 | continue; |
1083 | |
1084 | uprobe_unregister(inode: tu->inode, offset: tu->offset, uc: &tu->consumer); |
1085 | tu->inode = NULL; |
1086 | } |
1087 | } |
1088 | |
1089 | static int probe_event_enable(struct trace_event_call *call, |
1090 | struct trace_event_file *file, filter_func_t filter) |
1091 | { |
1092 | struct trace_probe *tp; |
1093 | struct trace_uprobe *tu; |
1094 | bool enabled; |
1095 | int ret; |
1096 | |
1097 | tp = trace_probe_primary_from_call(call); |
1098 | if (WARN_ON_ONCE(!tp)) |
1099 | return -ENODEV; |
1100 | enabled = trace_probe_is_enabled(tp); |
1101 | |
1102 | /* This may also change "enabled" state */ |
1103 | if (file) { |
1104 | if (trace_probe_test_flag(tp, TP_FLAG_PROFILE)) |
1105 | return -EINTR; |
1106 | |
1107 | ret = trace_probe_add_file(tp, file); |
1108 | if (ret < 0) |
1109 | return ret; |
1110 | } else { |
1111 | if (trace_probe_test_flag(tp, TP_FLAG_TRACE)) |
1112 | return -EINTR; |
1113 | |
1114 | trace_probe_set_flag(tp, TP_FLAG_PROFILE); |
1115 | } |
1116 | |
1117 | tu = container_of(tp, struct trace_uprobe, tp); |
1118 | WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter)); |
1119 | |
1120 | if (enabled) |
1121 | return 0; |
1122 | |
1123 | ret = uprobe_buffer_enable(); |
1124 | if (ret) |
1125 | goto err_flags; |
1126 | |
1127 | list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { |
1128 | ret = trace_uprobe_enable(tu, filter); |
1129 | if (ret) { |
1130 | __probe_event_disable(tp); |
1131 | goto err_buffer; |
1132 | } |
1133 | } |
1134 | |
1135 | return 0; |
1136 | |
1137 | err_buffer: |
1138 | uprobe_buffer_disable(); |
1139 | |
1140 | err_flags: |
1141 | if (file) |
1142 | trace_probe_remove_file(tp, file); |
1143 | else |
1144 | trace_probe_clear_flag(tp, TP_FLAG_PROFILE); |
1145 | |
1146 | return ret; |
1147 | } |
1148 | |
1149 | static void probe_event_disable(struct trace_event_call *call, |
1150 | struct trace_event_file *file) |
1151 | { |
1152 | struct trace_probe *tp; |
1153 | |
1154 | tp = trace_probe_primary_from_call(call); |
1155 | if (WARN_ON_ONCE(!tp)) |
1156 | return; |
1157 | |
1158 | if (!trace_probe_is_enabled(tp)) |
1159 | return; |
1160 | |
1161 | if (file) { |
1162 | if (trace_probe_remove_file(tp, file) < 0) |
1163 | return; |
1164 | |
1165 | if (trace_probe_is_enabled(tp)) |
1166 | return; |
1167 | } else |
1168 | trace_probe_clear_flag(tp, TP_FLAG_PROFILE); |
1169 | |
1170 | __probe_event_disable(tp); |
1171 | uprobe_buffer_disable(); |
1172 | } |
1173 | |
1174 | static int uprobe_event_define_fields(struct trace_event_call *event_call) |
1175 | { |
1176 | int ret, size; |
1177 | struct uprobe_trace_entry_head field; |
1178 | struct trace_uprobe *tu; |
1179 | |
1180 | tu = trace_uprobe_primary_from_call(call: event_call); |
1181 | if (unlikely(!tu)) |
1182 | return -ENODEV; |
1183 | |
1184 | if (is_ret_probe(tu)) { |
1185 | DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0); |
1186 | DEFINE_FIELD(unsigned long, vaddr[1], FIELD_STRING_RETIP, 0); |
1187 | size = SIZEOF_TRACE_ENTRY(true); |
1188 | } else { |
1189 | DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0); |
1190 | size = SIZEOF_TRACE_ENTRY(false); |
1191 | } |
1192 | |
1193 | return traceprobe_define_arg_fields(event_call, offset: size, tp: &tu->tp); |
1194 | } |
1195 | |
1196 | #ifdef CONFIG_PERF_EVENTS |
1197 | static bool |
1198 | __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) |
1199 | { |
1200 | struct perf_event *event; |
1201 | |
1202 | if (filter->nr_systemwide) |
1203 | return true; |
1204 | |
1205 | list_for_each_entry(event, &filter->perf_events, hw.tp_list) { |
1206 | if (event->hw.target->mm == mm) |
1207 | return true; |
1208 | } |
1209 | |
1210 | return false; |
1211 | } |
1212 | |
1213 | static inline bool |
1214 | trace_uprobe_filter_event(struct trace_uprobe_filter *filter, |
1215 | struct perf_event *event) |
1216 | { |
1217 | return __uprobe_perf_filter(filter, mm: event->hw.target->mm); |
1218 | } |
1219 | |
1220 | static bool trace_uprobe_filter_remove(struct trace_uprobe_filter *filter, |
1221 | struct perf_event *event) |
1222 | { |
1223 | bool done; |
1224 | |
1225 | write_lock(&filter->rwlock); |
1226 | if (event->hw.target) { |
1227 | list_del(entry: &event->hw.tp_list); |
1228 | done = filter->nr_systemwide || |
1229 | (event->hw.target->flags & PF_EXITING) || |
1230 | trace_uprobe_filter_event(filter, event); |
1231 | } else { |
1232 | filter->nr_systemwide--; |
1233 | done = filter->nr_systemwide; |
1234 | } |
1235 | write_unlock(&filter->rwlock); |
1236 | |
1237 | return done; |
1238 | } |
1239 | |
1240 | /* This returns true if the filter always covers target mm */ |
1241 | static bool trace_uprobe_filter_add(struct trace_uprobe_filter *filter, |
1242 | struct perf_event *event) |
1243 | { |
1244 | bool done; |
1245 | |
1246 | write_lock(&filter->rwlock); |
1247 | if (event->hw.target) { |
1248 | /* |
1249 | * event->parent != NULL means copy_process(), we can avoid |
1250 | * uprobe_apply(). current->mm must be probed and we can rely |
1251 | * on dup_mmap() which preserves the already installed bp's. |
1252 | * |
1253 | * attr.enable_on_exec means that exec/mmap will install the |
1254 | * breakpoints we need. |
1255 | */ |
1256 | done = filter->nr_systemwide || |
1257 | event->parent || event->attr.enable_on_exec || |
1258 | trace_uprobe_filter_event(filter, event); |
1259 | list_add(new: &event->hw.tp_list, head: &filter->perf_events); |
1260 | } else { |
1261 | done = filter->nr_systemwide; |
1262 | filter->nr_systemwide++; |
1263 | } |
1264 | write_unlock(&filter->rwlock); |
1265 | |
1266 | return done; |
1267 | } |
1268 | |
1269 | static int uprobe_perf_close(struct trace_event_call *call, |
1270 | struct perf_event *event) |
1271 | { |
1272 | struct trace_probe *tp; |
1273 | struct trace_uprobe *tu; |
1274 | int ret = 0; |
1275 | |
1276 | tp = trace_probe_primary_from_call(call); |
1277 | if (WARN_ON_ONCE(!tp)) |
1278 | return -ENODEV; |
1279 | |
1280 | tu = container_of(tp, struct trace_uprobe, tp); |
1281 | if (trace_uprobe_filter_remove(filter: tu->tp.event->filter, event)) |
1282 | return 0; |
1283 | |
1284 | list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { |
1285 | ret = uprobe_apply(inode: tu->inode, offset: tu->offset, uc: &tu->consumer, false); |
1286 | if (ret) |
1287 | break; |
1288 | } |
1289 | |
1290 | return ret; |
1291 | } |
1292 | |
1293 | static int uprobe_perf_open(struct trace_event_call *call, |
1294 | struct perf_event *event) |
1295 | { |
1296 | struct trace_probe *tp; |
1297 | struct trace_uprobe *tu; |
1298 | int err = 0; |
1299 | |
1300 | tp = trace_probe_primary_from_call(call); |
1301 | if (WARN_ON_ONCE(!tp)) |
1302 | return -ENODEV; |
1303 | |
1304 | tu = container_of(tp, struct trace_uprobe, tp); |
1305 | if (trace_uprobe_filter_add(filter: tu->tp.event->filter, event)) |
1306 | return 0; |
1307 | |
1308 | list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { |
1309 | err = uprobe_apply(inode: tu->inode, offset: tu->offset, uc: &tu->consumer, true); |
1310 | if (err) { |
1311 | uprobe_perf_close(call, event); |
1312 | break; |
1313 | } |
1314 | } |
1315 | |
1316 | return err; |
1317 | } |
1318 | |
1319 | static bool uprobe_perf_filter(struct uprobe_consumer *uc, |
1320 | enum uprobe_filter_ctx ctx, struct mm_struct *mm) |
1321 | { |
1322 | struct trace_uprobe_filter *filter; |
1323 | struct trace_uprobe *tu; |
1324 | int ret; |
1325 | |
1326 | tu = container_of(uc, struct trace_uprobe, consumer); |
1327 | filter = tu->tp.event->filter; |
1328 | |
1329 | read_lock(&filter->rwlock); |
1330 | ret = __uprobe_perf_filter(filter, mm); |
1331 | read_unlock(&filter->rwlock); |
1332 | |
1333 | return ret; |
1334 | } |
1335 | |
1336 | static void __uprobe_perf_func(struct trace_uprobe *tu, |
1337 | unsigned long func, struct pt_regs *regs, |
1338 | struct uprobe_cpu_buffer *ucb, int dsize) |
1339 | { |
1340 | struct trace_event_call *call = trace_probe_event_call(tp: &tu->tp); |
1341 | struct uprobe_trace_entry_head *entry; |
1342 | struct hlist_head *head; |
1343 | void *data; |
1344 | int size, esize; |
1345 | int rctx; |
1346 | |
1347 | #ifdef CONFIG_BPF_EVENTS |
1348 | if (bpf_prog_array_valid(call)) { |
1349 | u32 ret; |
1350 | |
1351 | ret = bpf_prog_run_array_uprobe(array_rcu: call->prog_array, ctx: regs, run_prog: bpf_prog_run); |
1352 | if (!ret) |
1353 | return; |
1354 | } |
1355 | #endif /* CONFIG_BPF_EVENTS */ |
1356 | |
1357 | esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); |
1358 | |
1359 | size = esize + tu->tp.size + dsize; |
1360 | size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32); |
1361 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough" )) |
1362 | return; |
1363 | |
1364 | preempt_disable(); |
1365 | head = this_cpu_ptr(call->perf_events); |
1366 | if (hlist_empty(h: head)) |
1367 | goto out; |
1368 | |
1369 | entry = perf_trace_buf_alloc(size, NULL, rctxp: &rctx); |
1370 | if (!entry) |
1371 | goto out; |
1372 | |
1373 | if (is_ret_probe(tu)) { |
1374 | entry->vaddr[0] = func; |
1375 | entry->vaddr[1] = instruction_pointer(regs); |
1376 | data = DATAOF_TRACE_ENTRY(entry, true); |
1377 | } else { |
1378 | entry->vaddr[0] = instruction_pointer(regs); |
1379 | data = DATAOF_TRACE_ENTRY(entry, false); |
1380 | } |
1381 | |
1382 | memcpy(data, ucb->buf, tu->tp.size + dsize); |
1383 | |
1384 | if (size - esize > tu->tp.size + dsize) { |
1385 | int len = tu->tp.size + dsize; |
1386 | |
1387 | memset(data + len, 0, size - esize - len); |
1388 | } |
1389 | |
1390 | perf_trace_buf_submit(raw_data: entry, size, rctx, type: call->event.type, count: 1, regs, |
1391 | head, NULL); |
1392 | out: |
1393 | preempt_enable(); |
1394 | } |
1395 | |
1396 | /* uprobe profile handler */ |
1397 | static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs, |
1398 | struct uprobe_cpu_buffer *ucb, int dsize) |
1399 | { |
1400 | if (!uprobe_perf_filter(uc: &tu->consumer, ctx: 0, current->mm)) |
1401 | return UPROBE_HANDLER_REMOVE; |
1402 | |
1403 | if (!is_ret_probe(tu)) |
1404 | __uprobe_perf_func(tu, func: 0, regs, ucb, dsize); |
1405 | return 0; |
1406 | } |
1407 | |
1408 | static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, |
1409 | struct pt_regs *regs, |
1410 | struct uprobe_cpu_buffer *ucb, int dsize) |
1411 | { |
1412 | __uprobe_perf_func(tu, func, regs, ucb, dsize); |
1413 | } |
1414 | |
1415 | int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, |
1416 | const char **filename, u64 *probe_offset, |
1417 | u64 *probe_addr, bool perf_type_tracepoint) |
1418 | { |
1419 | const char *pevent = trace_event_name(call: event->tp_event); |
1420 | const char *group = event->tp_event->class->system; |
1421 | struct trace_uprobe *tu; |
1422 | |
1423 | if (perf_type_tracepoint) |
1424 | tu = find_probe_event(event: pevent, group); |
1425 | else |
1426 | tu = trace_uprobe_primary_from_call(call: event->tp_event); |
1427 | if (!tu) |
1428 | return -EINVAL; |
1429 | |
1430 | *fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE |
1431 | : BPF_FD_TYPE_UPROBE; |
1432 | *filename = tu->filename; |
1433 | *probe_offset = tu->offset; |
1434 | *probe_addr = 0; |
1435 | return 0; |
1436 | } |
1437 | #endif /* CONFIG_PERF_EVENTS */ |
1438 | |
1439 | static int |
1440 | trace_uprobe_register(struct trace_event_call *event, enum trace_reg type, |
1441 | void *data) |
1442 | { |
1443 | struct trace_event_file *file = data; |
1444 | |
1445 | switch (type) { |
1446 | case TRACE_REG_REGISTER: |
1447 | return probe_event_enable(call: event, file, NULL); |
1448 | |
1449 | case TRACE_REG_UNREGISTER: |
1450 | probe_event_disable(call: event, file); |
1451 | return 0; |
1452 | |
1453 | #ifdef CONFIG_PERF_EVENTS |
1454 | case TRACE_REG_PERF_REGISTER: |
1455 | return probe_event_enable(call: event, NULL, filter: uprobe_perf_filter); |
1456 | |
1457 | case TRACE_REG_PERF_UNREGISTER: |
1458 | probe_event_disable(call: event, NULL); |
1459 | return 0; |
1460 | |
1461 | case TRACE_REG_PERF_OPEN: |
1462 | return uprobe_perf_open(call: event, event: data); |
1463 | |
1464 | case TRACE_REG_PERF_CLOSE: |
1465 | return uprobe_perf_close(call: event, event: data); |
1466 | |
1467 | #endif |
1468 | default: |
1469 | return 0; |
1470 | } |
1471 | } |
1472 | |
1473 | static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) |
1474 | { |
1475 | struct trace_uprobe *tu; |
1476 | struct uprobe_dispatch_data udd; |
1477 | struct uprobe_cpu_buffer *ucb; |
1478 | int dsize, esize; |
1479 | int ret = 0; |
1480 | |
1481 | |
1482 | tu = container_of(con, struct trace_uprobe, consumer); |
1483 | tu->nhit++; |
1484 | |
1485 | udd.tu = tu; |
1486 | udd.bp_addr = instruction_pointer(regs); |
1487 | |
1488 | current->utask->vaddr = (unsigned long) &udd; |
1489 | |
1490 | if (WARN_ON_ONCE(!uprobe_cpu_buffer)) |
1491 | return 0; |
1492 | |
1493 | dsize = __get_data_size(tp: &tu->tp, regs, NULL); |
1494 | esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); |
1495 | |
1496 | ucb = uprobe_buffer_get(); |
1497 | store_trace_args(data: ucb->buf, tp: &tu->tp, rec: regs, NULL, header_size: esize, maxlen: dsize); |
1498 | |
1499 | if (trace_probe_test_flag(tp: &tu->tp, TP_FLAG_TRACE)) |
1500 | ret |= uprobe_trace_func(tu, regs, ucb, dsize); |
1501 | |
1502 | #ifdef CONFIG_PERF_EVENTS |
1503 | if (trace_probe_test_flag(tp: &tu->tp, TP_FLAG_PROFILE)) |
1504 | ret |= uprobe_perf_func(tu, regs, ucb, dsize); |
1505 | #endif |
1506 | uprobe_buffer_put(ucb); |
1507 | return ret; |
1508 | } |
1509 | |
1510 | static int uretprobe_dispatcher(struct uprobe_consumer *con, |
1511 | unsigned long func, struct pt_regs *regs) |
1512 | { |
1513 | struct trace_uprobe *tu; |
1514 | struct uprobe_dispatch_data udd; |
1515 | struct uprobe_cpu_buffer *ucb; |
1516 | int dsize, esize; |
1517 | |
1518 | tu = container_of(con, struct trace_uprobe, consumer); |
1519 | |
1520 | udd.tu = tu; |
1521 | udd.bp_addr = func; |
1522 | |
1523 | current->utask->vaddr = (unsigned long) &udd; |
1524 | |
1525 | if (WARN_ON_ONCE(!uprobe_cpu_buffer)) |
1526 | return 0; |
1527 | |
1528 | dsize = __get_data_size(tp: &tu->tp, regs, NULL); |
1529 | esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); |
1530 | |
1531 | ucb = uprobe_buffer_get(); |
1532 | store_trace_args(data: ucb->buf, tp: &tu->tp, rec: regs, NULL, header_size: esize, maxlen: dsize); |
1533 | |
1534 | if (trace_probe_test_flag(tp: &tu->tp, TP_FLAG_TRACE)) |
1535 | uretprobe_trace_func(tu, func, regs, ucb, dsize); |
1536 | |
1537 | #ifdef CONFIG_PERF_EVENTS |
1538 | if (trace_probe_test_flag(tp: &tu->tp, TP_FLAG_PROFILE)) |
1539 | uretprobe_perf_func(tu, func, regs, ucb, dsize); |
1540 | #endif |
1541 | uprobe_buffer_put(ucb); |
1542 | return 0; |
1543 | } |
1544 | |
1545 | static struct trace_event_functions uprobe_funcs = { |
1546 | .trace = print_uprobe_event |
1547 | }; |
1548 | |
1549 | static struct trace_event_fields uprobe_fields_array[] = { |
1550 | { .type = TRACE_FUNCTION_TYPE, |
1551 | .define_fields = uprobe_event_define_fields }, |
1552 | {} |
1553 | }; |
1554 | |
1555 | static inline void init_trace_event_call(struct trace_uprobe *tu) |
1556 | { |
1557 | struct trace_event_call *call = trace_probe_event_call(tp: &tu->tp); |
1558 | call->event.funcs = &uprobe_funcs; |
1559 | call->class->fields_array = uprobe_fields_array; |
1560 | |
1561 | call->flags = TRACE_EVENT_FL_UPROBE | TRACE_EVENT_FL_CAP_ANY; |
1562 | call->class->reg = trace_uprobe_register; |
1563 | } |
1564 | |
1565 | static int register_uprobe_event(struct trace_uprobe *tu) |
1566 | { |
1567 | init_trace_event_call(tu); |
1568 | |
1569 | return trace_probe_register_event_call(tp: &tu->tp); |
1570 | } |
1571 | |
1572 | static int unregister_uprobe_event(struct trace_uprobe *tu) |
1573 | { |
1574 | return trace_probe_unregister_event_call(tp: &tu->tp); |
1575 | } |
1576 | |
1577 | #ifdef CONFIG_PERF_EVENTS |
1578 | struct trace_event_call * |
1579 | create_local_trace_uprobe(char *name, unsigned long offs, |
1580 | unsigned long ref_ctr_offset, bool is_return) |
1581 | { |
1582 | enum probe_print_type ptype; |
1583 | struct trace_uprobe *tu; |
1584 | struct path path; |
1585 | int ret; |
1586 | |
1587 | ret = kern_path(name, LOOKUP_FOLLOW, &path); |
1588 | if (ret) |
1589 | return ERR_PTR(error: ret); |
1590 | |
1591 | if (!d_is_reg(dentry: path.dentry)) { |
1592 | path_put(&path); |
1593 | return ERR_PTR(error: -EINVAL); |
1594 | } |
1595 | |
1596 | /* |
1597 | * local trace_kprobes are not added to dyn_event, so they are never |
1598 | * searched in find_trace_kprobe(). Therefore, there is no concern of |
1599 | * duplicated name "DUMMY_EVENT" here. |
1600 | */ |
1601 | tu = alloc_trace_uprobe(UPROBE_EVENT_SYSTEM, event: "DUMMY_EVENT" , nargs: 0, |
1602 | is_ret: is_return); |
1603 | |
1604 | if (IS_ERR(ptr: tu)) { |
1605 | pr_info("Failed to allocate trace_uprobe.(%d)\n" , |
1606 | (int)PTR_ERR(tu)); |
1607 | path_put(&path); |
1608 | return ERR_CAST(ptr: tu); |
1609 | } |
1610 | |
1611 | tu->offset = offs; |
1612 | tu->path = path; |
1613 | tu->ref_ctr_offset = ref_ctr_offset; |
1614 | tu->filename = kstrdup(s: name, GFP_KERNEL); |
1615 | if (!tu->filename) { |
1616 | ret = -ENOMEM; |
1617 | goto error; |
1618 | } |
1619 | |
1620 | init_trace_event_call(tu); |
1621 | |
1622 | ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL; |
1623 | if (traceprobe_set_print_fmt(tp: &tu->tp, ptype) < 0) { |
1624 | ret = -ENOMEM; |
1625 | goto error; |
1626 | } |
1627 | |
1628 | return trace_probe_event_call(tp: &tu->tp); |
1629 | error: |
1630 | free_trace_uprobe(tu); |
1631 | return ERR_PTR(error: ret); |
1632 | } |
1633 | |
1634 | void destroy_local_trace_uprobe(struct trace_event_call *event_call) |
1635 | { |
1636 | struct trace_uprobe *tu; |
1637 | |
1638 | tu = trace_uprobe_primary_from_call(call: event_call); |
1639 | |
1640 | free_trace_uprobe(tu); |
1641 | } |
1642 | #endif /* CONFIG_PERF_EVENTS */ |
1643 | |
1644 | /* Make a trace interface for controlling probe points */ |
1645 | static __init int init_uprobe_trace(void) |
1646 | { |
1647 | int ret; |
1648 | |
1649 | ret = dyn_event_register(ops: &trace_uprobe_ops); |
1650 | if (ret) |
1651 | return ret; |
1652 | |
1653 | ret = tracing_init_dentry(); |
1654 | if (ret) |
1655 | return 0; |
1656 | |
1657 | trace_create_file(name: "uprobe_events" , TRACE_MODE_WRITE, NULL, |
1658 | NULL, fops: &uprobe_events_ops); |
1659 | /* Profile interface */ |
1660 | trace_create_file(name: "uprobe_profile" , TRACE_MODE_READ, NULL, |
1661 | NULL, fops: &uprobe_profile_ops); |
1662 | return 0; |
1663 | } |
1664 | |
1665 | fs_initcall(init_uprobe_trace); |
1666 | |