| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
| 4 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs |
| 5 | * |
| 6 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson |
| 7 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes |
| 8 | * 2000-2002 x86-64 support by Andi Kleen |
| 9 | */ |
| 10 | |
| 11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 12 | |
| 13 | #include <linux/sched.h> |
| 14 | #include <linux/sched/task_stack.h> |
| 15 | #include <linux/mm.h> |
| 16 | #include <linux/smp.h> |
| 17 | #include <linux/kernel.h> |
| 18 | #include <linux/kstrtox.h> |
| 19 | #include <linux/errno.h> |
| 20 | #include <linux/wait.h> |
| 21 | #include <linux/unistd.h> |
| 22 | #include <linux/stddef.h> |
| 23 | #include <linux/personality.h> |
| 24 | #include <linux/uaccess.h> |
| 25 | #include <linux/user-return-notifier.h> |
| 26 | #include <linux/uprobes.h> |
| 27 | #include <linux/context_tracking.h> |
| 28 | #include <linux/entry-common.h> |
| 29 | #include <linux/syscalls.h> |
| 30 | #include <linux/rseq.h> |
| 31 | |
| 32 | #include <asm/processor.h> |
| 33 | #include <asm/ucontext.h> |
| 34 | #include <asm/fpu/signal.h> |
| 35 | #include <asm/fpu/xstate.h> |
| 36 | #include <asm/vdso.h> |
| 37 | #include <asm/mce.h> |
| 38 | #include <asm/sighandling.h> |
| 39 | #include <asm/vm86.h> |
| 40 | |
| 41 | #include <asm/syscall.h> |
| 42 | #include <asm/sigframe.h> |
| 43 | #include <asm/signal.h> |
| 44 | #include <asm/shstk.h> |
| 45 | |
| 46 | static inline int is_ia32_compat_frame(struct ksignal *ksig) |
| 47 | { |
| 48 | return IS_ENABLED(CONFIG_IA32_EMULATION) && |
| 49 | ksig->ka.sa.sa_flags & SA_IA32_ABI; |
| 50 | } |
| 51 | |
| 52 | static inline int is_ia32_frame(struct ksignal *ksig) |
| 53 | { |
| 54 | return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(ksig); |
| 55 | } |
| 56 | |
| 57 | static inline int is_x32_frame(struct ksignal *ksig) |
| 58 | { |
| 59 | return IS_ENABLED(CONFIG_X86_X32_ABI) && |
| 60 | ksig->ka.sa.sa_flags & SA_X32_ABI; |
| 61 | } |
| 62 | |
| 63 | /* |
| 64 | * Enable all pkeys temporarily, so as to ensure that both the current |
| 65 | * execution stack as well as the alternate signal stack are writeable. |
| 66 | * The application can use any of the available pkeys to protect the |
| 67 | * alternate signal stack, and we don't know which one it is, so enable |
| 68 | * all. The PKRU register will be reset to init_pkru later in the flow, |
| 69 | * in fpu__clear_user_states(), and it is the application's responsibility |
| 70 | * to enable the appropriate pkey as the first step in the signal handler |
| 71 | * so that the handler does not segfault. |
| 72 | */ |
| 73 | static inline u32 sig_prepare_pkru(void) |
| 74 | { |
| 75 | u32 orig_pkru = read_pkru(); |
| 76 | |
| 77 | write_pkru(pkru: 0); |
| 78 | return orig_pkru; |
| 79 | } |
| 80 | |
| 81 | /* |
| 82 | * Set up a signal frame. |
| 83 | */ |
| 84 | |
| 85 | /* x86 ABI requires 16-byte alignment */ |
| 86 | #define FRAME_ALIGNMENT 16UL |
| 87 | |
| 88 | #define MAX_FRAME_PADDING (FRAME_ALIGNMENT - 1) |
| 89 | |
| 90 | /* |
| 91 | * Determine which stack to use.. |
| 92 | */ |
| 93 | void __user * |
| 94 | get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, |
| 95 | void __user **fpstate) |
| 96 | { |
| 97 | struct k_sigaction *ka = &ksig->ka; |
| 98 | int ia32_frame = is_ia32_frame(ksig); |
| 99 | /* Default to using normal stack */ |
| 100 | bool nested_altstack = on_sig_stack(sp: regs->sp); |
| 101 | bool entering_altstack = false; |
| 102 | unsigned long math_size = 0; |
| 103 | unsigned long sp = regs->sp; |
| 104 | unsigned long buf_fx = 0; |
| 105 | u32 pkru; |
| 106 | |
| 107 | /* redzone */ |
| 108 | if (!ia32_frame) |
| 109 | sp -= 128; |
| 110 | |
| 111 | /* This is the X/Open sanctioned signal stack switching. */ |
| 112 | if (ka->sa.sa_flags & SA_ONSTACK) { |
| 113 | /* |
| 114 | * This checks nested_altstack via sas_ss_flags(). Sensible |
| 115 | * programs use SS_AUTODISARM, which disables that check, and |
| 116 | * programs that don't use SS_AUTODISARM get compatible. |
| 117 | */ |
| 118 | if (sas_ss_flags(sp) == 0) { |
| 119 | sp = current->sas_ss_sp + current->sas_ss_size; |
| 120 | entering_altstack = true; |
| 121 | } |
| 122 | } else if (ia32_frame && |
| 123 | !nested_altstack && |
| 124 | regs->ss != __USER_DS && |
| 125 | !(ka->sa.sa_flags & SA_RESTORER) && |
| 126 | ka->sa.sa_restorer) { |
| 127 | /* This is the legacy signal stack switching. */ |
| 128 | sp = (unsigned long) ka->sa.sa_restorer; |
| 129 | entering_altstack = true; |
| 130 | } |
| 131 | |
| 132 | sp = fpu__alloc_mathframe(sp, ia32_frame, buf_fx: &buf_fx, size: &math_size); |
| 133 | *fpstate = (void __user *)sp; |
| 134 | |
| 135 | sp -= frame_size; |
| 136 | |
| 137 | if (ia32_frame) |
| 138 | /* |
| 139 | * Align the stack pointer according to the i386 ABI, |
| 140 | * i.e. so that on function entry ((sp + 4) & 15) == 0. |
| 141 | */ |
| 142 | sp = ((sp + 4) & -FRAME_ALIGNMENT) - 4; |
| 143 | else |
| 144 | sp = round_down(sp, FRAME_ALIGNMENT) - 8; |
| 145 | |
| 146 | /* |
| 147 | * If we are on the alternate signal stack and would overflow it, don't. |
| 148 | * Return an always-bogus address instead so we will die with SIGSEGV. |
| 149 | */ |
| 150 | if (unlikely((nested_altstack || entering_altstack) && |
| 151 | !__on_sig_stack(sp))) { |
| 152 | |
| 153 | if (show_unhandled_signals && printk_ratelimit()) |
| 154 | pr_info("%s[%d] overflowed sigaltstack\n" , |
| 155 | current->comm, task_pid_nr(current)); |
| 156 | |
| 157 | return (void __user *)-1L; |
| 158 | } |
| 159 | |
| 160 | /* Update PKRU to enable access to the alternate signal stack. */ |
| 161 | pkru = sig_prepare_pkru(); |
| 162 | /* save i387 and extended state */ |
| 163 | if (!copy_fpstate_to_sigframe(buf: *fpstate, fp: (void __user *)buf_fx, size: math_size, pkru)) { |
| 164 | /* |
| 165 | * Restore PKRU to the original, user-defined value; disable |
| 166 | * extra pkeys enabled for the alternate signal stack, if any. |
| 167 | */ |
| 168 | write_pkru(pkru); |
| 169 | return (void __user *)-1L; |
| 170 | } |
| 171 | |
| 172 | return (void __user *)sp; |
| 173 | } |
| 174 | |
| 175 | /* |
| 176 | * There are four different struct types for signal frame: sigframe_ia32, |
| 177 | * rt_sigframe_ia32, rt_sigframe_x32, and rt_sigframe. Use the worst case |
| 178 | * -- the largest size. It means the size for 64-bit apps is a bit more |
| 179 | * than needed, but this keeps the code simple. |
| 180 | */ |
| 181 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) |
| 182 | # define MAX_FRAME_SIGINFO_UCTXT_SIZE sizeof(struct sigframe_ia32) |
| 183 | #else |
| 184 | # define MAX_FRAME_SIGINFO_UCTXT_SIZE sizeof(struct rt_sigframe) |
| 185 | #endif |
| 186 | |
| 187 | /* |
| 188 | * The FP state frame contains an XSAVE buffer which must be 64-byte aligned. |
| 189 | * If a signal frame starts at an unaligned address, extra space is required. |
| 190 | * This is the max alignment padding, conservatively. |
| 191 | */ |
| 192 | #define MAX_XSAVE_PADDING 63UL |
| 193 | |
| 194 | /* |
| 195 | * The frame data is composed of the following areas and laid out as: |
| 196 | * |
| 197 | * ------------------------- |
| 198 | * | alignment padding | |
| 199 | * ------------------------- |
| 200 | * | (f)xsave frame | |
| 201 | * ------------------------- |
| 202 | * | fsave header | |
| 203 | * ------------------------- |
| 204 | * | alignment padding | |
| 205 | * ------------------------- |
| 206 | * | siginfo + ucontext | |
| 207 | * ------------------------- |
| 208 | */ |
| 209 | |
| 210 | /* max_frame_size tells userspace the worst case signal stack size. */ |
| 211 | static unsigned long __ro_after_init max_frame_size; |
| 212 | static unsigned int __ro_after_init fpu_default_state_size; |
| 213 | |
| 214 | static int __init init_sigframe_size(void) |
| 215 | { |
| 216 | fpu_default_state_size = fpu__get_fpstate_size(); |
| 217 | |
| 218 | max_frame_size = MAX_FRAME_SIGINFO_UCTXT_SIZE + MAX_FRAME_PADDING; |
| 219 | |
| 220 | max_frame_size += fpu_default_state_size + MAX_XSAVE_PADDING; |
| 221 | |
| 222 | /* Userspace expects an aligned size. */ |
| 223 | max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT); |
| 224 | |
| 225 | pr_info("max sigframe size: %lu\n" , max_frame_size); |
| 226 | return 0; |
| 227 | } |
| 228 | early_initcall(init_sigframe_size); |
| 229 | |
| 230 | unsigned long get_sigframe_size(void) |
| 231 | { |
| 232 | return max_frame_size; |
| 233 | } |
| 234 | |
| 235 | static int |
| 236 | setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) |
| 237 | { |
| 238 | /* Perform fixup for the pre-signal frame. */ |
| 239 | rseq_signal_deliver(ksig, regs); |
| 240 | |
| 241 | /* Set up the stack frame */ |
| 242 | if (is_ia32_frame(ksig)) { |
| 243 | if (ksig->ka.sa.sa_flags & SA_SIGINFO) |
| 244 | return ia32_setup_rt_frame(ksig, regs); |
| 245 | else |
| 246 | return ia32_setup_frame(ksig, regs); |
| 247 | } else if (is_x32_frame(ksig)) { |
| 248 | return x32_setup_rt_frame(ksig, regs); |
| 249 | } else { |
| 250 | return x64_setup_rt_frame(ksig, regs); |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | static void |
| 255 | handle_signal(struct ksignal *ksig, struct pt_regs *regs) |
| 256 | { |
| 257 | bool stepping, failed; |
| 258 | struct fpu *fpu = x86_task_fpu(current); |
| 259 | |
| 260 | if (v8086_mode(regs)) |
| 261 | save_v86_state(a: (struct kernel_vm86_regs *) regs, VM86_SIGNAL); |
| 262 | |
| 263 | /* Are we from a system call? */ |
| 264 | if (syscall_get_nr(current, regs) != -1) { |
| 265 | /* If so, check system call restarting.. */ |
| 266 | switch (syscall_get_error(current, regs)) { |
| 267 | case -ERESTART_RESTARTBLOCK: |
| 268 | case -ERESTARTNOHAND: |
| 269 | regs->ax = -EINTR; |
| 270 | break; |
| 271 | |
| 272 | case -ERESTARTSYS: |
| 273 | if (!(ksig->ka.sa.sa_flags & SA_RESTART)) { |
| 274 | regs->ax = -EINTR; |
| 275 | break; |
| 276 | } |
| 277 | fallthrough; |
| 278 | case -ERESTARTNOINTR: |
| 279 | regs->ax = regs->orig_ax; |
| 280 | regs->ip -= 2; |
| 281 | break; |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | /* |
| 286 | * If TF is set due to a debugger (TIF_FORCED_TF), clear TF now |
| 287 | * so that register information in the sigcontext is correct and |
| 288 | * then notify the tracer before entering the signal handler. |
| 289 | */ |
| 290 | stepping = test_thread_flag(TIF_SINGLESTEP); |
| 291 | if (stepping) |
| 292 | user_disable_single_step(current); |
| 293 | |
| 294 | failed = (setup_rt_frame(ksig, regs) < 0); |
| 295 | if (!failed) { |
| 296 | /* |
| 297 | * Clear the direction flag as per the ABI for function entry. |
| 298 | * |
| 299 | * Clear RF when entering the signal handler, because |
| 300 | * it might disable possible debug exception from the |
| 301 | * signal handler. |
| 302 | * |
| 303 | * Clear TF for the case when it wasn't set by debugger to |
| 304 | * avoid the recursive send_sigtrap() in SIGTRAP handler. |
| 305 | */ |
| 306 | regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF); |
| 307 | /* |
| 308 | * Ensure the signal handler starts with the new fpu state. |
| 309 | */ |
| 310 | fpu__clear_user_states(fpu); |
| 311 | } |
| 312 | signal_setup_done(failed, ksig, stepping); |
| 313 | } |
| 314 | |
| 315 | static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) |
| 316 | { |
| 317 | #ifdef CONFIG_IA32_EMULATION |
| 318 | if (current->restart_block.arch_data & TS_COMPAT) |
| 319 | return __NR_ia32_restart_syscall; |
| 320 | #endif |
| 321 | #ifdef CONFIG_X86_X32_ABI |
| 322 | return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT); |
| 323 | #else |
| 324 | return __NR_restart_syscall; |
| 325 | #endif |
| 326 | } |
| 327 | |
| 328 | /* |
| 329 | * Note that 'init' is a special process: it doesn't get signals it doesn't |
| 330 | * want to handle. Thus you cannot kill init even with a SIGKILL even by |
| 331 | * mistake. |
| 332 | */ |
| 333 | void arch_do_signal_or_restart(struct pt_regs *regs) |
| 334 | { |
| 335 | struct ksignal ksig; |
| 336 | |
| 337 | if (get_signal(ksig: &ksig)) { |
| 338 | /* Whee! Actually deliver the signal. */ |
| 339 | handle_signal(ksig: &ksig, regs); |
| 340 | return; |
| 341 | } |
| 342 | |
| 343 | /* Did we come from a system call? */ |
| 344 | if (syscall_get_nr(current, regs) != -1) { |
| 345 | /* Restart the system call - no handlers present */ |
| 346 | switch (syscall_get_error(current, regs)) { |
| 347 | case -ERESTARTNOHAND: |
| 348 | case -ERESTARTSYS: |
| 349 | case -ERESTARTNOINTR: |
| 350 | regs->ax = regs->orig_ax; |
| 351 | regs->ip -= 2; |
| 352 | break; |
| 353 | |
| 354 | case -ERESTART_RESTARTBLOCK: |
| 355 | regs->ax = get_nr_restart_syscall(regs); |
| 356 | regs->ip -= 2; |
| 357 | break; |
| 358 | } |
| 359 | } |
| 360 | |
| 361 | /* |
| 362 | * If there's no signal to deliver, we just put the saved sigmask |
| 363 | * back. |
| 364 | */ |
| 365 | restore_saved_sigmask(); |
| 366 | } |
| 367 | |
| 368 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
| 369 | { |
| 370 | struct task_struct *me = current; |
| 371 | |
| 372 | if (show_unhandled_signals && printk_ratelimit()) { |
| 373 | printk("%s" |
| 374 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx" , |
| 375 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, |
| 376 | me->comm, me->pid, where, frame, |
| 377 | regs->ip, regs->sp, regs->orig_ax); |
| 378 | print_vma_addr(KERN_CONT " in " , rip: regs->ip); |
| 379 | pr_cont("\n" ); |
| 380 | } |
| 381 | |
| 382 | force_sig(SIGSEGV); |
| 383 | } |
| 384 | |
| 385 | #ifdef CONFIG_DYNAMIC_SIGFRAME |
| 386 | #ifdef CONFIG_STRICT_SIGALTSTACK_SIZE |
| 387 | static bool strict_sigaltstack_size __ro_after_init = true; |
| 388 | #else |
| 389 | static bool strict_sigaltstack_size __ro_after_init = false; |
| 390 | #endif |
| 391 | |
| 392 | static int __init strict_sas_size(char *arg) |
| 393 | { |
| 394 | return kstrtobool(s: arg, res: &strict_sigaltstack_size) == 0; |
| 395 | } |
| 396 | __setup("strict_sas_size" , strict_sas_size); |
| 397 | |
| 398 | /* |
| 399 | * MINSIGSTKSZ is 2048 and can't be changed despite the fact that AVX512 |
| 400 | * exceeds that size already. As such programs might never use the |
| 401 | * sigaltstack they just continued to work. While always checking against |
| 402 | * the real size would be correct, this might be considered a regression. |
| 403 | * |
| 404 | * Therefore avoid the sanity check, unless enforced by kernel |
| 405 | * configuration or command line option. |
| 406 | * |
| 407 | * When dynamic FPU features are supported, the check is also enforced when |
| 408 | * the task has permissions to use dynamic features. Tasks which have no |
| 409 | * permission are checked against the size of the non-dynamic feature set |
| 410 | * if strict checking is enabled. This avoids forcing all tasks on the |
| 411 | * system to allocate large sigaltstacks even if they are never going |
| 412 | * to use a dynamic feature. As this is serialized via sighand::siglock |
| 413 | * any permission request for a dynamic feature either happened already |
| 414 | * or will see the newly install sigaltstack size in the permission checks. |
| 415 | */ |
| 416 | bool sigaltstack_size_valid(size_t ss_size) |
| 417 | { |
| 418 | unsigned long fsize = max_frame_size - fpu_default_state_size; |
| 419 | u64 mask; |
| 420 | |
| 421 | lockdep_assert_held(¤t->sighand->siglock); |
| 422 | |
| 423 | if (!fpu_state_size_dynamic() && !strict_sigaltstack_size) |
| 424 | return true; |
| 425 | |
| 426 | fsize += x86_task_fpu(current->group_leader)->perm.__user_state_size; |
| 427 | if (likely(ss_size > fsize)) |
| 428 | return true; |
| 429 | |
| 430 | if (strict_sigaltstack_size) |
| 431 | return ss_size > fsize; |
| 432 | |
| 433 | mask = x86_task_fpu(current->group_leader)->perm.__state_perm; |
| 434 | if (mask & XFEATURE_MASK_USER_DYNAMIC) |
| 435 | return ss_size > fsize; |
| 436 | |
| 437 | return true; |
| 438 | } |
| 439 | #endif /* CONFIG_DYNAMIC_SIGFRAME */ |
| 440 | |