1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _LINUX_PTRACE_H |
3 | #define _LINUX_PTRACE_H |
4 | |
5 | #include <linux/compiler.h> /* For unlikely. */ |
6 | #include <linux/sched.h> /* For struct task_struct. */ |
7 | #include <linux/sched/signal.h> /* For send_sig(), same_thread_group(), etc. */ |
8 | #include <linux/err.h> /* for IS_ERR_VALUE */ |
9 | #include <linux/bug.h> /* For BUG_ON. */ |
10 | #include <linux/pid_namespace.h> /* For task_active_pid_ns. */ |
11 | #include <uapi/linux/ptrace.h> |
12 | #include <linux/seccomp.h> |
13 | |
14 | /* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */ |
15 | struct syscall_info { |
16 | __u64 sp; |
17 | struct seccomp_data data; |
18 | }; |
19 | |
20 | extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, |
21 | void *buf, int len, unsigned int gup_flags); |
22 | |
23 | /* |
24 | * Ptrace flags |
25 | * |
26 | * The owner ship rules for task->ptrace which holds the ptrace |
27 | * flags is simple. When a task is running it owns it's task->ptrace |
28 | * flags. When the a task is stopped the ptracer owns task->ptrace. |
29 | */ |
30 | |
31 | #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ |
32 | #define PT_PTRACED 0x00000001 |
33 | |
34 | #define PT_OPT_FLAG_SHIFT 3 |
35 | /* PT_TRACE_* event enable flags */ |
36 | #define PT_EVENT_FLAG(event) (1 << (PT_OPT_FLAG_SHIFT + (event))) |
37 | #define PT_TRACESYSGOOD PT_EVENT_FLAG(0) |
38 | #define PT_TRACE_FORK PT_EVENT_FLAG(PTRACE_EVENT_FORK) |
39 | #define PT_TRACE_VFORK PT_EVENT_FLAG(PTRACE_EVENT_VFORK) |
40 | #define PT_TRACE_CLONE PT_EVENT_FLAG(PTRACE_EVENT_CLONE) |
41 | #define PT_TRACE_EXEC PT_EVENT_FLAG(PTRACE_EVENT_EXEC) |
42 | #define PT_TRACE_VFORK_DONE PT_EVENT_FLAG(PTRACE_EVENT_VFORK_DONE) |
43 | #define PT_TRACE_EXIT PT_EVENT_FLAG(PTRACE_EVENT_EXIT) |
44 | #define PT_TRACE_SECCOMP PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP) |
45 | |
46 | #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) |
47 | #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) |
48 | |
49 | extern long arch_ptrace(struct task_struct *child, long request, |
50 | unsigned long addr, unsigned long data); |
51 | extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); |
52 | extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); |
53 | extern void ptrace_disable(struct task_struct *); |
54 | extern int ptrace_request(struct task_struct *child, long request, |
55 | unsigned long addr, unsigned long data); |
56 | extern int ptrace_notify(int exit_code, unsigned long message); |
57 | extern void __ptrace_link(struct task_struct *child, |
58 | struct task_struct *new_parent, |
59 | const struct cred *ptracer_cred); |
60 | extern void __ptrace_unlink(struct task_struct *child); |
61 | extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); |
62 | #define PTRACE_MODE_READ 0x01 |
63 | #define PTRACE_MODE_ATTACH 0x02 |
64 | #define PTRACE_MODE_NOAUDIT 0x04 |
65 | #define PTRACE_MODE_FSCREDS 0x08 |
66 | #define PTRACE_MODE_REALCREDS 0x10 |
67 | |
68 | /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ |
69 | #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) |
70 | #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) |
71 | #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) |
72 | #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) |
73 | |
74 | /** |
75 | * ptrace_may_access - check whether the caller is permitted to access |
76 | * a target task. |
77 | * @task: target task |
78 | * @mode: selects type of access and caller credentials |
79 | * |
80 | * Returns true on success, false on denial. |
81 | * |
82 | * One of the flags PTRACE_MODE_FSCREDS and PTRACE_MODE_REALCREDS must |
83 | * be set in @mode to specify whether the access was requested through |
84 | * a filesystem syscall (should use effective capabilities and fsuid |
85 | * of the caller) or through an explicit syscall such as |
86 | * process_vm_writev or ptrace (and should use the real credentials). |
87 | */ |
88 | extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); |
89 | |
90 | static inline int ptrace_reparented(struct task_struct *child) |
91 | { |
92 | return !same_thread_group(p1: child->real_parent, p2: child->parent); |
93 | } |
94 | |
95 | static inline void ptrace_unlink(struct task_struct *child) |
96 | { |
97 | if (unlikely(child->ptrace)) |
98 | __ptrace_unlink(child); |
99 | } |
100 | |
101 | int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, |
102 | unsigned long data); |
103 | int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, |
104 | unsigned long data); |
105 | |
106 | /** |
107 | * ptrace_parent - return the task that is tracing the given task |
108 | * @task: task to consider |
109 | * |
110 | * Returns %NULL if no one is tracing @task, or the &struct task_struct |
111 | * pointer to its tracer. |
112 | * |
113 | * Must called under rcu_read_lock(). The pointer returned might be kept |
114 | * live only by RCU. During exec, this may be called with task_lock() held |
115 | * on @task, still held from when check_unsafe_exec() was called. |
116 | */ |
117 | static inline struct task_struct *ptrace_parent(struct task_struct *task) |
118 | { |
119 | if (unlikely(task->ptrace)) |
120 | return rcu_dereference(task->parent); |
121 | return NULL; |
122 | } |
123 | |
124 | /** |
125 | * ptrace_event_enabled - test whether a ptrace event is enabled |
126 | * @task: ptracee of interest |
127 | * @event: %PTRACE_EVENT_* to test |
128 | * |
129 | * Test whether @event is enabled for ptracee @task. |
130 | * |
131 | * Returns %true if @event is enabled, %false otherwise. |
132 | */ |
133 | static inline bool ptrace_event_enabled(struct task_struct *task, int event) |
134 | { |
135 | return task->ptrace & PT_EVENT_FLAG(event); |
136 | } |
137 | |
138 | /** |
139 | * ptrace_event - possibly stop for a ptrace event notification |
140 | * @event: %PTRACE_EVENT_* value to report |
141 | * @message: value for %PTRACE_GETEVENTMSG to return |
142 | * |
143 | * Check whether @event is enabled and, if so, report @event and @message |
144 | * to the ptrace parent. |
145 | * |
146 | * Called without locks. |
147 | */ |
148 | static inline void ptrace_event(int event, unsigned long message) |
149 | { |
150 | if (unlikely(ptrace_event_enabled(current, event))) { |
151 | ptrace_notify(exit_code: (event << 8) | SIGTRAP, message); |
152 | } else if (event == PTRACE_EVENT_EXEC) { |
153 | /* legacy EXEC report via SIGTRAP */ |
154 | if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED) |
155 | send_sig(SIGTRAP, current, 0); |
156 | } |
157 | } |
158 | |
159 | /** |
160 | * ptrace_event_pid - possibly stop for a ptrace event notification |
161 | * @event: %PTRACE_EVENT_* value to report |
162 | * @pid: process identifier for %PTRACE_GETEVENTMSG to return |
163 | * |
164 | * Check whether @event is enabled and, if so, report @event and @pid |
165 | * to the ptrace parent. @pid is reported as the pid_t seen from the |
166 | * ptrace parent's pid namespace. |
167 | * |
168 | * Called without locks. |
169 | */ |
170 | static inline void ptrace_event_pid(int event, struct pid *pid) |
171 | { |
172 | /* |
173 | * FIXME: There's a potential race if a ptracer in a different pid |
174 | * namespace than parent attaches between computing message below and |
175 | * when we acquire tasklist_lock in ptrace_stop(). If this happens, |
176 | * the ptracer will get a bogus pid from PTRACE_GETEVENTMSG. |
177 | */ |
178 | unsigned long message = 0; |
179 | struct pid_namespace *ns; |
180 | |
181 | rcu_read_lock(); |
182 | ns = task_active_pid_ns(rcu_dereference(current->parent)); |
183 | if (ns) |
184 | message = pid_nr_ns(pid, ns); |
185 | rcu_read_unlock(); |
186 | |
187 | ptrace_event(event, message); |
188 | } |
189 | |
190 | /** |
191 | * ptrace_init_task - initialize ptrace state for a new child |
192 | * @child: new child task |
193 | * @ptrace: true if child should be ptrace'd by parent's tracer |
194 | * |
195 | * This is called immediately after adding @child to its parent's children |
196 | * list. @ptrace is false in the normal case, and true to ptrace @child. |
197 | * |
198 | * Called with current's siglock and write_lock_irq(&tasklist_lock) held. |
199 | */ |
200 | static inline void ptrace_init_task(struct task_struct *child, bool ptrace) |
201 | { |
202 | INIT_LIST_HEAD(list: &child->ptrace_entry); |
203 | INIT_LIST_HEAD(list: &child->ptraced); |
204 | child->jobctl = 0; |
205 | child->ptrace = 0; |
206 | child->parent = child->real_parent; |
207 | |
208 | if (unlikely(ptrace) && current->ptrace) { |
209 | child->ptrace = current->ptrace; |
210 | __ptrace_link(child, current->parent, current->ptracer_cred); |
211 | |
212 | if (child->ptrace & PT_SEIZED) |
213 | task_set_jobctl_pending(task: child, JOBCTL_TRAP_STOP); |
214 | else |
215 | sigaddset(set: &child->pending.signal, SIGSTOP); |
216 | } |
217 | else |
218 | child->ptracer_cred = NULL; |
219 | } |
220 | |
221 | /** |
222 | * ptrace_release_task - final ptrace-related cleanup of a zombie being reaped |
223 | * @task: task in %EXIT_DEAD state |
224 | * |
225 | * Called with write_lock(&tasklist_lock) held. |
226 | */ |
227 | static inline void ptrace_release_task(struct task_struct *task) |
228 | { |
229 | BUG_ON(!list_empty(&task->ptraced)); |
230 | ptrace_unlink(child: task); |
231 | BUG_ON(!list_empty(&task->ptrace_entry)); |
232 | } |
233 | |
234 | #ifndef force_successful_syscall_return |
235 | /* |
236 | * System call handlers that, upon successful completion, need to return a |
237 | * negative value should call force_successful_syscall_return() right before |
238 | * returning. On architectures where the syscall convention provides for a |
239 | * separate error flag (e.g., alpha, ia64, ppc{,64}, sparc{,64}, possibly |
240 | * others), this macro can be used to ensure that the error flag will not get |
241 | * set. On architectures which do not support a separate error flag, the macro |
242 | * is a no-op and the spurious error condition needs to be filtered out by some |
243 | * other means (e.g., in user-level, by passing an extra argument to the |
244 | * syscall handler, or something along those lines). |
245 | */ |
246 | #define force_successful_syscall_return() do { } while (0) |
247 | #endif |
248 | |
249 | #ifndef is_syscall_success |
250 | /* |
251 | * On most systems we can tell if a syscall is a success based on if the retval |
252 | * is an error value. On some systems like ia64 and powerpc they have different |
253 | * indicators of success/failure and must define their own. |
254 | */ |
255 | #define is_syscall_success(regs) (!IS_ERR_VALUE((unsigned long)(regs_return_value(regs)))) |
256 | #endif |
257 | |
258 | /* |
259 | * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__. |
260 | * |
261 | * These do-nothing inlines are used when the arch does not |
262 | * implement single-step. The kerneldoc comments are here |
263 | * to document the interface for all arch definitions. |
264 | */ |
265 | |
266 | #ifndef arch_has_single_step |
267 | /** |
268 | * arch_has_single_step - does this CPU support user-mode single-step? |
269 | * |
270 | * If this is defined, then there must be function declarations or |
271 | * inlines for user_enable_single_step() and user_disable_single_step(). |
272 | * arch_has_single_step() should evaluate to nonzero iff the machine |
273 | * supports instruction single-step for user mode. |
274 | * It can be a constant or it can test a CPU feature bit. |
275 | */ |
276 | #define arch_has_single_step() (0) |
277 | |
278 | /** |
279 | * user_enable_single_step - single-step in user-mode task |
280 | * @task: either current or a task stopped in %TASK_TRACED |
281 | * |
282 | * This can only be called when arch_has_single_step() has returned nonzero. |
283 | * Set @task so that when it returns to user mode, it will trap after the |
284 | * next single instruction executes. If arch_has_block_step() is defined, |
285 | * this must clear the effects of user_enable_block_step() too. |
286 | */ |
287 | static inline void user_enable_single_step(struct task_struct *task) |
288 | { |
289 | BUG(); /* This can never be called. */ |
290 | } |
291 | |
292 | /** |
293 | * user_disable_single_step - cancel user-mode single-step |
294 | * @task: either current or a task stopped in %TASK_TRACED |
295 | * |
296 | * Clear @task of the effects of user_enable_single_step() and |
297 | * user_enable_block_step(). This can be called whether or not either |
298 | * of those was ever called on @task, and even if arch_has_single_step() |
299 | * returned zero. |
300 | */ |
301 | static inline void user_disable_single_step(struct task_struct *task) |
302 | { |
303 | } |
304 | #else |
305 | extern void user_enable_single_step(struct task_struct *); |
306 | extern void user_disable_single_step(struct task_struct *); |
307 | #endif /* arch_has_single_step */ |
308 | |
309 | #ifndef arch_has_block_step |
310 | /** |
311 | * arch_has_block_step - does this CPU support user-mode block-step? |
312 | * |
313 | * If this is defined, then there must be a function declaration or inline |
314 | * for user_enable_block_step(), and arch_has_single_step() must be defined |
315 | * too. arch_has_block_step() should evaluate to nonzero iff the machine |
316 | * supports step-until-branch for user mode. It can be a constant or it |
317 | * can test a CPU feature bit. |
318 | */ |
319 | #define arch_has_block_step() (0) |
320 | |
321 | /** |
322 | * user_enable_block_step - step until branch in user-mode task |
323 | * @task: either current or a task stopped in %TASK_TRACED |
324 | * |
325 | * This can only be called when arch_has_block_step() has returned nonzero, |
326 | * and will never be called when single-instruction stepping is being used. |
327 | * Set @task so that when it returns to user mode, it will trap after the |
328 | * next branch or trap taken. |
329 | */ |
330 | static inline void user_enable_block_step(struct task_struct *task) |
331 | { |
332 | BUG(); /* This can never be called. */ |
333 | } |
334 | #else |
335 | extern void user_enable_block_step(struct task_struct *); |
336 | #endif /* arch_has_block_step */ |
337 | |
338 | #ifdef ARCH_HAS_USER_SINGLE_STEP_REPORT |
339 | extern void user_single_step_report(struct pt_regs *regs); |
340 | #else |
341 | static inline void user_single_step_report(struct pt_regs *regs) |
342 | { |
343 | kernel_siginfo_t info; |
344 | clear_siginfo(&info); |
345 | info.si_signo = SIGTRAP; |
346 | info.si_errno = 0; |
347 | info.si_code = SI_USER; |
348 | info.si_pid = 0; |
349 | info.si_uid = 0; |
350 | force_sig_info(&info); |
351 | } |
352 | #endif |
353 | |
354 | #ifndef arch_ptrace_stop_needed |
355 | /** |
356 | * arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called |
357 | * |
358 | * This is called with the siglock held, to decide whether or not it's |
359 | * necessary to release the siglock and call arch_ptrace_stop(). It can be |
360 | * defined to a constant if arch_ptrace_stop() is never required, or always |
361 | * is. On machines where this makes sense, it should be defined to a quick |
362 | * test to optimize out calling arch_ptrace_stop() when it would be |
363 | * superfluous. For example, if the thread has not been back to user mode |
364 | * since the last stop, the thread state might indicate that nothing needs |
365 | * to be done. |
366 | * |
367 | * This is guaranteed to be invoked once before a task stops for ptrace and |
368 | * may include arch-specific operations necessary prior to a ptrace stop. |
369 | */ |
370 | #define arch_ptrace_stop_needed() (0) |
371 | #endif |
372 | |
373 | #ifndef arch_ptrace_stop |
374 | /** |
375 | * arch_ptrace_stop - Do machine-specific work before stopping for ptrace |
376 | * |
377 | * This is called with no locks held when arch_ptrace_stop_needed() has |
378 | * just returned nonzero. It is allowed to block, e.g. for user memory |
379 | * access. The arch can have machine-specific work to be done before |
380 | * ptrace stops. On ia64, register backing store gets written back to user |
381 | * memory here. Since this can be costly (requires dropping the siglock), |
382 | * we only do it when the arch requires it for this particular stop, as |
383 | * indicated by arch_ptrace_stop_needed(). |
384 | */ |
385 | #define arch_ptrace_stop() do { } while (0) |
386 | #endif |
387 | |
388 | #ifndef current_pt_regs |
389 | #define current_pt_regs() task_pt_regs(current) |
390 | #endif |
391 | |
392 | #ifndef current_user_stack_pointer |
393 | #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) |
394 | #endif |
395 | |
396 | #ifndef exception_ip |
397 | #define exception_ip(x) instruction_pointer(x) |
398 | #endif |
399 | |
400 | extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); |
401 | |
402 | extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); |
403 | |
404 | /* |
405 | * ptrace report for syscall entry and exit looks identical. |
406 | */ |
407 | static inline int ptrace_report_syscall(unsigned long message) |
408 | { |
409 | int ptrace = current->ptrace; |
410 | int signr; |
411 | |
412 | if (!(ptrace & PT_PTRACED)) |
413 | return 0; |
414 | |
415 | signr = ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), |
416 | message); |
417 | |
418 | /* |
419 | * this isn't the same as continuing with a signal, but it will do |
420 | * for normal use. strace only continues with a signal if the |
421 | * stopping signal is not SIGTRAP. -brl |
422 | */ |
423 | if (signr) |
424 | send_sig(signr, current, 1); |
425 | |
426 | return fatal_signal_pending(current); |
427 | } |
428 | |
429 | /** |
430 | * ptrace_report_syscall_entry - task is about to attempt a system call |
431 | * @regs: user register state of current task |
432 | * |
433 | * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or |
434 | * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just |
435 | * entered the kernel for a system call. Full user register state is |
436 | * available here. Changing the values in @regs can affect the system |
437 | * call number and arguments to be tried. It is safe to block here, |
438 | * preventing the system call from beginning. |
439 | * |
440 | * Returns zero normally, or nonzero if the calling arch code should abort |
441 | * the system call. That must prevent normal entry so no system call is |
442 | * made. If @task ever returns to user mode after this, its register state |
443 | * is unspecified, but should be something harmless like an %ENOSYS error |
444 | * return. It should preserve enough information so that syscall_rollback() |
445 | * can work (see asm-generic/syscall.h). |
446 | * |
447 | * Called without locks, just after entering kernel mode. |
448 | */ |
449 | static inline __must_check int ptrace_report_syscall_entry( |
450 | struct pt_regs *regs) |
451 | { |
452 | return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY); |
453 | } |
454 | |
455 | /** |
456 | * ptrace_report_syscall_exit - task has just finished a system call |
457 | * @regs: user register state of current task |
458 | * @step: nonzero if simulating single-step or block-step |
459 | * |
460 | * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when |
461 | * the current task has just finished an attempted system call. Full |
462 | * user register state is available here. It is safe to block here, |
463 | * preventing signals from being processed. |
464 | * |
465 | * If @step is nonzero, this report is also in lieu of the normal |
466 | * trap that would follow the system call instruction because |
467 | * user_enable_block_step() or user_enable_single_step() was used. |
468 | * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set. |
469 | * |
470 | * Called without locks, just before checking for pending signals. |
471 | */ |
472 | static inline void ptrace_report_syscall_exit(struct pt_regs *regs, int step) |
473 | { |
474 | if (step) |
475 | user_single_step_report(regs); |
476 | else |
477 | ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT); |
478 | } |
479 | #endif |
480 | |