1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | #include <linux/extable.h> |
3 | #include <linux/uaccess.h> |
4 | #include <linux/sched/debug.h> |
5 | #include <linux/bitfield.h> |
6 | #include <xen/xen.h> |
7 | |
8 | #include <asm/fpu/api.h> |
9 | #include <asm/sev.h> |
10 | #include <asm/traps.h> |
11 | #include <asm/kdebug.h> |
12 | #include <asm/insn-eval.h> |
13 | #include <asm/sgx.h> |
14 | |
15 | static inline unsigned long *pt_regs_nr(struct pt_regs *regs, int nr) |
16 | { |
17 | int reg_offset = pt_regs_offset(regs, regno: nr); |
18 | static unsigned long __dummy; |
19 | |
20 | if (WARN_ON_ONCE(reg_offset < 0)) |
21 | return &__dummy; |
22 | |
23 | return (unsigned long *)((unsigned long)regs + reg_offset); |
24 | } |
25 | |
26 | static inline unsigned long |
27 | ex_fixup_addr(const struct exception_table_entry *x) |
28 | { |
29 | return (unsigned long)&x->fixup + x->fixup; |
30 | } |
31 | |
32 | static bool ex_handler_default(const struct exception_table_entry *e, |
33 | struct pt_regs *regs) |
34 | { |
35 | if (e->data & EX_FLAG_CLEAR_AX) |
36 | regs->ax = 0; |
37 | if (e->data & EX_FLAG_CLEAR_DX) |
38 | regs->dx = 0; |
39 | |
40 | regs->ip = ex_fixup_addr(x: e); |
41 | return true; |
42 | } |
43 | |
44 | /* |
45 | * This is the *very* rare case where we do a "load_unaligned_zeropad()" |
46 | * and it's a page crosser into a non-existent page. |
47 | * |
48 | * This happens when we optimistically load a pathname a word-at-a-time |
49 | * and the name is less than the full word and the next page is not |
50 | * mapped. Typically that only happens for CONFIG_DEBUG_PAGEALLOC. |
51 | * |
52 | * NOTE! The faulting address is always a 'mov mem,reg' type instruction |
53 | * of size 'long', and the exception fixup must always point to right |
54 | * after the instruction. |
55 | */ |
56 | static bool ex_handler_zeropad(const struct exception_table_entry *e, |
57 | struct pt_regs *regs, |
58 | unsigned long fault_addr) |
59 | { |
60 | struct insn insn; |
61 | const unsigned long mask = sizeof(long) - 1; |
62 | unsigned long offset, addr, next_ip, len; |
63 | unsigned long *reg; |
64 | |
65 | next_ip = ex_fixup_addr(x: e); |
66 | len = next_ip - regs->ip; |
67 | if (len > MAX_INSN_SIZE) |
68 | return false; |
69 | |
70 | if (insn_decode(insn: &insn, kaddr: (void *) regs->ip, buf_len: len, m: INSN_MODE_KERN)) |
71 | return false; |
72 | if (insn.length != len) |
73 | return false; |
74 | |
75 | if (insn.opcode.bytes[0] != 0x8b) |
76 | return false; |
77 | if (insn.opnd_bytes != sizeof(long)) |
78 | return false; |
79 | |
80 | addr = (unsigned long) insn_get_addr_ref(insn: &insn, regs); |
81 | if (addr == ~0ul) |
82 | return false; |
83 | |
84 | offset = addr & mask; |
85 | addr = addr & ~mask; |
86 | if (fault_addr != addr + sizeof(long)) |
87 | return false; |
88 | |
89 | reg = insn_get_modrm_reg_ptr(insn: &insn, regs); |
90 | if (!reg) |
91 | return false; |
92 | |
93 | *reg = *(unsigned long *)addr >> (offset * 8); |
94 | return ex_handler_default(e, regs); |
95 | } |
96 | |
97 | static bool ex_handler_fault(const struct exception_table_entry *fixup, |
98 | struct pt_regs *regs, int trapnr) |
99 | { |
100 | regs->ax = trapnr; |
101 | return ex_handler_default(e: fixup, regs); |
102 | } |
103 | |
104 | static bool ex_handler_sgx(const struct exception_table_entry *fixup, |
105 | struct pt_regs *regs, int trapnr) |
106 | { |
107 | regs->ax = trapnr | SGX_ENCLS_FAULT_FLAG; |
108 | return ex_handler_default(e: fixup, regs); |
109 | } |
110 | |
111 | /* |
112 | * Handler for when we fail to restore a task's FPU state. We should never get |
113 | * here because the FPU state of a task using the FPU (task->thread.fpu.state) |
114 | * should always be valid. However, past bugs have allowed userspace to set |
115 | * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn(). |
116 | * These caused XRSTOR to fail when switching to the task, leaking the FPU |
117 | * registers of the task previously executing on the CPU. Mitigate this class |
118 | * of vulnerability by restoring from the initial state (essentially, zeroing |
119 | * out all the FPU registers) if we can't restore from the task's FPU state. |
120 | */ |
121 | static bool ex_handler_fprestore(const struct exception_table_entry *fixup, |
122 | struct pt_regs *regs) |
123 | { |
124 | regs->ip = ex_fixup_addr(x: fixup); |
125 | |
126 | WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers." , |
127 | (void *)instruction_pointer(regs)); |
128 | |
129 | fpu_reset_from_exception_fixup(); |
130 | return true; |
131 | } |
132 | |
133 | /* |
134 | * On x86-64, we end up being imprecise with 'access_ok()', and allow |
135 | * non-canonical user addresses to make the range comparisons simpler, |
136 | * and to not have to worry about LAM being enabled. |
137 | * |
138 | * In fact, we allow up to one page of "slop" at the sign boundary, |
139 | * which means that we can do access_ok() by just checking the sign |
140 | * of the pointer for the common case of having a small access size. |
141 | */ |
142 | static bool gp_fault_address_ok(unsigned long fault_address) |
143 | { |
144 | #ifdef CONFIG_X86_64 |
145 | /* Is it in the "user space" part of the non-canonical space? */ |
146 | if (valid_user_address(fault_address)) |
147 | return true; |
148 | |
149 | /* .. or just above it? */ |
150 | fault_address -= PAGE_SIZE; |
151 | if (valid_user_address(fault_address)) |
152 | return true; |
153 | #endif |
154 | return false; |
155 | } |
156 | |
157 | static bool ex_handler_uaccess(const struct exception_table_entry *fixup, |
158 | struct pt_regs *regs, int trapnr, |
159 | unsigned long fault_address) |
160 | { |
161 | WARN_ONCE(trapnr == X86_TRAP_GP && !gp_fault_address_ok(fault_address), |
162 | "General protection fault in user access. Non-canonical address?" ); |
163 | return ex_handler_default(e: fixup, regs); |
164 | } |
165 | |
166 | static bool ex_handler_copy(const struct exception_table_entry *fixup, |
167 | struct pt_regs *regs, int trapnr) |
168 | { |
169 | WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?" ); |
170 | return ex_handler_fault(fixup, regs, trapnr); |
171 | } |
172 | |
173 | static bool ex_handler_msr(const struct exception_table_entry *fixup, |
174 | struct pt_regs *regs, bool wrmsr, bool safe, int reg) |
175 | { |
176 | if (__ONCE_LITE_IF(!safe && wrmsr)) { |
177 | pr_warn("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n" , |
178 | (unsigned int)regs->cx, (unsigned int)regs->dx, |
179 | (unsigned int)regs->ax, regs->ip, (void *)regs->ip); |
180 | show_stack_regs(regs); |
181 | } |
182 | |
183 | if (__ONCE_LITE_IF(!safe && !wrmsr)) { |
184 | pr_warn("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n" , |
185 | (unsigned int)regs->cx, regs->ip, (void *)regs->ip); |
186 | show_stack_regs(regs); |
187 | } |
188 | |
189 | if (!wrmsr) { |
190 | /* Pretend that the read succeeded and returned 0. */ |
191 | regs->ax = 0; |
192 | regs->dx = 0; |
193 | } |
194 | |
195 | if (safe) |
196 | *pt_regs_nr(regs, nr: reg) = -EIO; |
197 | |
198 | return ex_handler_default(e: fixup, regs); |
199 | } |
200 | |
201 | static bool ex_handler_clear_fs(const struct exception_table_entry *fixup, |
202 | struct pt_regs *regs) |
203 | { |
204 | if (static_cpu_has(X86_BUG_NULL_SEG)) |
205 | asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS)); |
206 | asm volatile ("mov %0, %%fs" : : "rm" (0)); |
207 | return ex_handler_default(e: fixup, regs); |
208 | } |
209 | |
210 | static bool ex_handler_imm_reg(const struct exception_table_entry *fixup, |
211 | struct pt_regs *regs, int reg, int imm) |
212 | { |
213 | *pt_regs_nr(regs, nr: reg) = (long)imm; |
214 | return ex_handler_default(e: fixup, regs); |
215 | } |
216 | |
217 | static bool ex_handler_ucopy_len(const struct exception_table_entry *fixup, |
218 | struct pt_regs *regs, int trapnr, |
219 | unsigned long fault_address, |
220 | int reg, int imm) |
221 | { |
222 | regs->cx = imm * regs->cx + *pt_regs_nr(regs, nr: reg); |
223 | return ex_handler_uaccess(fixup, regs, trapnr, fault_address); |
224 | } |
225 | |
226 | int ex_get_fixup_type(unsigned long ip) |
227 | { |
228 | const struct exception_table_entry *e = search_exception_tables(add: ip); |
229 | |
230 | return e ? FIELD_GET(EX_DATA_TYPE_MASK, e->data) : EX_TYPE_NONE; |
231 | } |
232 | |
233 | int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, |
234 | unsigned long fault_addr) |
235 | { |
236 | const struct exception_table_entry *e; |
237 | int type, reg, imm; |
238 | |
239 | #ifdef CONFIG_PNPBIOS |
240 | if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { |
241 | extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; |
242 | extern u32 pnp_bios_is_utter_crap; |
243 | pnp_bios_is_utter_crap = 1; |
244 | printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n" ); |
245 | __asm__ volatile( |
246 | "movl %0, %%esp\n\t" |
247 | "jmp *%1\n\t" |
248 | : : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip)); |
249 | panic("do_trap: can't hit this" ); |
250 | } |
251 | #endif |
252 | |
253 | e = search_exception_tables(add: regs->ip); |
254 | if (!e) |
255 | return 0; |
256 | |
257 | type = FIELD_GET(EX_DATA_TYPE_MASK, e->data); |
258 | reg = FIELD_GET(EX_DATA_REG_MASK, e->data); |
259 | imm = FIELD_GET(EX_DATA_IMM_MASK, e->data); |
260 | |
261 | switch (type) { |
262 | case EX_TYPE_DEFAULT: |
263 | case EX_TYPE_DEFAULT_MCE_SAFE: |
264 | return ex_handler_default(e, regs); |
265 | case EX_TYPE_FAULT: |
266 | case EX_TYPE_FAULT_MCE_SAFE: |
267 | return ex_handler_fault(fixup: e, regs, trapnr); |
268 | case EX_TYPE_UACCESS: |
269 | return ex_handler_uaccess(fixup: e, regs, trapnr, fault_address: fault_addr); |
270 | case EX_TYPE_COPY: |
271 | return ex_handler_copy(fixup: e, regs, trapnr); |
272 | case EX_TYPE_CLEAR_FS: |
273 | return ex_handler_clear_fs(fixup: e, regs); |
274 | case EX_TYPE_FPU_RESTORE: |
275 | return ex_handler_fprestore(fixup: e, regs); |
276 | case EX_TYPE_BPF: |
277 | return ex_handler_bpf(x: e, regs); |
278 | case EX_TYPE_WRMSR: |
279 | return ex_handler_msr(fixup: e, regs, wrmsr: true, safe: false, reg); |
280 | case EX_TYPE_RDMSR: |
281 | return ex_handler_msr(fixup: e, regs, wrmsr: false, safe: false, reg); |
282 | case EX_TYPE_WRMSR_SAFE: |
283 | return ex_handler_msr(fixup: e, regs, wrmsr: true, safe: true, reg); |
284 | case EX_TYPE_RDMSR_SAFE: |
285 | return ex_handler_msr(fixup: e, regs, wrmsr: false, safe: true, reg); |
286 | case EX_TYPE_WRMSR_IN_MCE: |
287 | ex_handler_msr_mce(regs, wrmsr: true); |
288 | break; |
289 | case EX_TYPE_RDMSR_IN_MCE: |
290 | ex_handler_msr_mce(regs, wrmsr: false); |
291 | break; |
292 | case EX_TYPE_POP_REG: |
293 | regs->sp += sizeof(long); |
294 | fallthrough; |
295 | case EX_TYPE_IMM_REG: |
296 | return ex_handler_imm_reg(fixup: e, regs, reg, imm); |
297 | case EX_TYPE_FAULT_SGX: |
298 | return ex_handler_sgx(fixup: e, regs, trapnr); |
299 | case EX_TYPE_UCOPY_LEN: |
300 | return ex_handler_ucopy_len(fixup: e, regs, trapnr, fault_address: fault_addr, reg, imm); |
301 | case EX_TYPE_ZEROPAD: |
302 | return ex_handler_zeropad(e, regs, fault_addr); |
303 | } |
304 | BUG(); |
305 | } |
306 | |
307 | extern unsigned int early_recursion_flag; |
308 | |
309 | /* Restricted version used during very early boot */ |
310 | void __init early_fixup_exception(struct pt_regs *regs, int trapnr) |
311 | { |
312 | /* Ignore early NMIs. */ |
313 | if (trapnr == X86_TRAP_NMI) |
314 | return; |
315 | |
316 | if (early_recursion_flag > 2) |
317 | goto halt_loop; |
318 | |
319 | /* |
320 | * Old CPUs leave the high bits of CS on the stack |
321 | * undefined. I'm not sure which CPUs do this, but at least |
322 | * the 486 DX works this way. |
323 | * Xen pv domains are not using the default __KERNEL_CS. |
324 | */ |
325 | if (!xen_pv_domain() && regs->cs != __KERNEL_CS) |
326 | goto fail; |
327 | |
328 | /* |
329 | * The full exception fixup machinery is available as soon as |
330 | * the early IDT is loaded. This means that it is the |
331 | * responsibility of extable users to either function correctly |
332 | * when handlers are invoked early or to simply avoid causing |
333 | * exceptions before they're ready to handle them. |
334 | * |
335 | * This is better than filtering which handlers can be used, |
336 | * because refusing to call a handler here is guaranteed to |
337 | * result in a hard-to-debug panic. |
338 | * |
339 | * Keep in mind that not all vectors actually get here. Early |
340 | * page faults, for example, are special. |
341 | */ |
342 | if (fixup_exception(regs, trapnr, error_code: regs->orig_ax, fault_addr: 0)) |
343 | return; |
344 | |
345 | if (trapnr == X86_TRAP_UD) { |
346 | if (report_bug(bug_addr: regs->ip, regs) == BUG_TRAP_TYPE_WARN) { |
347 | /* Skip the ud2. */ |
348 | regs->ip += LEN_UD2; |
349 | return; |
350 | } |
351 | |
352 | /* |
353 | * If this was a BUG and report_bug returns or if this |
354 | * was just a normal #UD, we want to continue onward and |
355 | * crash. |
356 | */ |
357 | } |
358 | |
359 | fail: |
360 | early_printk(fmt: "PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n" , |
361 | (unsigned)trapnr, (unsigned long)regs->cs, regs->ip, |
362 | regs->orig_ax, read_cr2()); |
363 | |
364 | show_regs(regs); |
365 | |
366 | halt_loop: |
367 | while (true) |
368 | halt(); |
369 | } |
370 | |