| 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | |
| 3 | #include <linux/compat.h> |
| 4 | #include <linux/context_tracking.h> |
| 5 | #include <linux/randomize_kstack.h> |
| 6 | |
| 7 | #include <asm/interrupt.h> |
| 8 | #include <asm/kup.h> |
| 9 | #include <asm/syscall.h> |
| 10 | #include <asm/time.h> |
| 11 | #include <asm/tm.h> |
| 12 | #include <asm/unistd.h> |
| 13 | |
| 14 | |
| 15 | /* Has to run notrace because it is entered not completely "reconciled" */ |
| 16 | notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) |
| 17 | { |
| 18 | long ret; |
| 19 | syscall_fn f; |
| 20 | |
| 21 | kuap_lock(); |
| 22 | |
| 23 | add_random_kstack_offset(); |
| 24 | |
| 25 | if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) |
| 26 | BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); |
| 27 | |
| 28 | trace_hardirqs_off(); /* finish reconciling */ |
| 29 | |
| 30 | CT_WARN_ON(ct_state() == CT_STATE_KERNEL); |
| 31 | user_exit_irqoff(); |
| 32 | |
| 33 | BUG_ON(regs_is_unrecoverable(regs)); |
| 34 | BUG_ON(!user_mode(regs)); |
| 35 | BUG_ON(arch_irq_disabled_regs(regs)); |
| 36 | |
| 37 | #ifdef CONFIG_PPC_PKEY |
| 38 | if (mmu_has_feature(MMU_FTR_PKEY)) { |
| 39 | unsigned long amr, iamr; |
| 40 | bool flush_needed = false; |
| 41 | /* |
| 42 | * When entering from userspace we mostly have the AMR/IAMR |
| 43 | * different from kernel default values. Hence don't compare. |
| 44 | */ |
| 45 | amr = mfspr(SPRN_AMR); |
| 46 | iamr = mfspr(SPRN_IAMR); |
| 47 | regs->amr = amr; |
| 48 | regs->iamr = iamr; |
| 49 | if (mmu_has_feature(MMU_FTR_KUAP)) { |
| 50 | mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); |
| 51 | flush_needed = true; |
| 52 | } |
| 53 | if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { |
| 54 | mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); |
| 55 | flush_needed = true; |
| 56 | } |
| 57 | if (flush_needed) |
| 58 | isync(); |
| 59 | } else |
| 60 | #endif |
| 61 | kuap_assert_locked(); |
| 62 | |
| 63 | booke_restore_dbcr0(); |
| 64 | |
| 65 | account_cpu_user_entry(); |
| 66 | |
| 67 | account_stolen_time(); |
| 68 | |
| 69 | /* |
| 70 | * This is not required for the syscall exit path, but makes the |
| 71 | * stack frame look nicer. If this was initialised in the first stack |
| 72 | * frame, or if the unwinder was taught the first stack frame always |
| 73 | * returns to user with IRQS_ENABLED, this store could be avoided! |
| 74 | */ |
| 75 | irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); |
| 76 | |
| 77 | /* |
| 78 | * If system call is called with TM active, set _TIF_RESTOREALL to |
| 79 | * prevent RFSCV being used to return to userspace, because POWER9 |
| 80 | * TM implementation has problems with this instruction returning to |
| 81 | * transactional state. Final register values are not relevant because |
| 82 | * the transaction will be aborted upon return anyway. Or in the case |
| 83 | * of unsupported_scv SIGILL fault, the return state does not much |
| 84 | * matter because it's an edge case. |
| 85 | */ |
| 86 | if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && |
| 87 | unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) |
| 88 | set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); |
| 89 | |
| 90 | /* |
| 91 | * If the system call was made with a transaction active, doom it and |
| 92 | * return without performing the system call. Unless it was an |
| 93 | * unsupported scv vector, in which case it's treated like an illegal |
| 94 | * instruction. |
| 95 | */ |
| 96 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
| 97 | if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && |
| 98 | !trap_is_unsupported_scv(regs)) { |
| 99 | /* Enable TM in the kernel, and disable EE (for scv) */ |
| 100 | hard_irq_disable(); |
| 101 | mtmsr(mfmsr() | MSR_TM); |
| 102 | |
| 103 | /* tabort, this dooms the transaction, nothing else */ |
| 104 | asm volatile(".long 0x7c00071d | ((%0) << 16)" |
| 105 | :: "r" (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); |
| 106 | |
| 107 | /* |
| 108 | * Userspace will never see the return value. Execution will |
| 109 | * resume after the tbegin. of the aborted transaction with the |
| 110 | * checkpointed register state. A context switch could occur |
| 111 | * or signal delivered to the process before resuming the |
| 112 | * doomed transaction context, but that should all be handled |
| 113 | * as expected. |
| 114 | */ |
| 115 | return -ENOSYS; |
| 116 | } |
| 117 | #endif // CONFIG_PPC_TRANSACTIONAL_MEM |
| 118 | |
| 119 | local_irq_enable(); |
| 120 | |
| 121 | if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) { |
| 122 | if (unlikely(trap_is_unsupported_scv(regs))) { |
| 123 | /* Unsupported scv vector */ |
| 124 | _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); |
| 125 | return regs->gpr[3]; |
| 126 | } |
| 127 | /* |
| 128 | * We use the return value of do_syscall_trace_enter() as the |
| 129 | * syscall number. If the syscall was rejected for any reason |
| 130 | * do_syscall_trace_enter() returns an invalid syscall number |
| 131 | * and the test against NR_syscalls will fail and the return |
| 132 | * value to be used is in regs->gpr[3]. |
| 133 | */ |
| 134 | r0 = do_syscall_trace_enter(regs); |
| 135 | if (unlikely(r0 >= NR_syscalls)) |
| 136 | return regs->gpr[3]; |
| 137 | |
| 138 | } else if (unlikely(r0 >= NR_syscalls)) { |
| 139 | if (unlikely(trap_is_unsupported_scv(regs))) { |
| 140 | /* Unsupported scv vector */ |
| 141 | _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); |
| 142 | return regs->gpr[3]; |
| 143 | } |
| 144 | return -ENOSYS; |
| 145 | } |
| 146 | |
| 147 | /* May be faster to do array_index_nospec? */ |
| 148 | barrier_nospec(); |
| 149 | |
| 150 | #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER |
| 151 | // No COMPAT if we have SYSCALL_WRAPPER, see Kconfig |
| 152 | f = (void *)sys_call_table[r0]; |
| 153 | ret = f(regs); |
| 154 | #else |
| 155 | if (unlikely(is_compat_task())) { |
| 156 | unsigned long r3, r4, r5, r6, r7, r8; |
| 157 | |
| 158 | f = (void *)compat_sys_call_table[r0]; |
| 159 | |
| 160 | r3 = regs->gpr[3] & 0x00000000ffffffffULL; |
| 161 | r4 = regs->gpr[4] & 0x00000000ffffffffULL; |
| 162 | r5 = regs->gpr[5] & 0x00000000ffffffffULL; |
| 163 | r6 = regs->gpr[6] & 0x00000000ffffffffULL; |
| 164 | r7 = regs->gpr[7] & 0x00000000ffffffffULL; |
| 165 | r8 = regs->gpr[8] & 0x00000000ffffffffULL; |
| 166 | |
| 167 | ret = f(r3, r4, r5, r6, r7, r8); |
| 168 | } else { |
| 169 | f = (void *)sys_call_table[r0]; |
| 170 | |
| 171 | ret = f(regs->gpr[3], regs->gpr[4], regs->gpr[5], |
| 172 | regs->gpr[6], regs->gpr[7], regs->gpr[8]); |
| 173 | } |
| 174 | #endif |
| 175 | |
| 176 | /* |
| 177 | * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), |
| 178 | * so the maximum stack offset is 1k bytes (10 bits). |
| 179 | * |
| 180 | * The actual entropy will be further reduced by the compiler when |
| 181 | * applying stack alignment constraints: the powerpc architecture |
| 182 | * may have two kinds of stack alignment (16-bytes and 8-bytes). |
| 183 | * |
| 184 | * So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3]. |
| 185 | */ |
| 186 | choose_random_kstack_offset(mftb()); |
| 187 | |
| 188 | return ret; |
| 189 | } |
| 190 | |