1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2012,2013 - ARM Ltd |
4 | * Author: Marc Zyngier <marc.zyngier@arm.com> |
5 | * |
6 | * Derived from arch/arm/kvm/handle_exit.c: |
7 | * Copyright (C) 2012 - Virtual Open Systems and Columbia University |
8 | * Author: Christoffer Dall <c.dall@virtualopensystems.com> |
9 | */ |
10 | |
11 | #include <linux/kvm.h> |
12 | #include <linux/kvm_host.h> |
13 | |
14 | #include <asm/esr.h> |
15 | #include <asm/exception.h> |
16 | #include <asm/kvm_asm.h> |
17 | #include <asm/kvm_emulate.h> |
18 | #include <asm/kvm_mmu.h> |
19 | #include <asm/kvm_nested.h> |
20 | #include <asm/debug-monitors.h> |
21 | #include <asm/stacktrace/nvhe.h> |
22 | #include <asm/traps.h> |
23 | |
24 | #include <kvm/arm_hypercalls.h> |
25 | |
26 | #define CREATE_TRACE_POINTS |
27 | #include "trace_handle_exit.h" |
28 | |
29 | typedef int (*exit_handle_fn)(struct kvm_vcpu *); |
30 | |
31 | static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr) |
32 | { |
33 | if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr)) |
34 | kvm_inject_vabt(vcpu); |
35 | } |
36 | |
37 | static int handle_hvc(struct kvm_vcpu *vcpu) |
38 | { |
39 | trace_kvm_hvc_arm64(vcpu_pc: *vcpu_pc(vcpu), r0: vcpu_get_reg(vcpu, 0), |
40 | imm: kvm_vcpu_hvc_get_imm(vcpu)); |
41 | vcpu->stat.hvc_exit_stat++; |
42 | |
43 | /* Forward hvc instructions to the virtual EL2 if the guest has EL2. */ |
44 | if (vcpu_has_nv(vcpu)) { |
45 | if (vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_HCD) |
46 | kvm_inject_undefined(vcpu); |
47 | else |
48 | kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); |
49 | |
50 | return 1; |
51 | } |
52 | |
53 | return kvm_smccc_call_handler(vcpu); |
54 | } |
55 | |
56 | static int handle_smc(struct kvm_vcpu *vcpu) |
57 | { |
58 | /* |
59 | * "If an SMC instruction executed at Non-secure EL1 is |
60 | * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a |
61 | * Trap exception, not a Secure Monitor Call exception [...]" |
62 | * |
63 | * We need to advance the PC after the trap, as it would |
64 | * otherwise return to the same address. Furthermore, pre-incrementing |
65 | * the PC before potentially exiting to userspace maintains the same |
66 | * abstraction for both SMCs and HVCs. |
67 | */ |
68 | kvm_incr_pc(vcpu); |
69 | |
70 | /* |
71 | * SMCs with a nonzero immediate are reserved according to DEN0028E 2.9 |
72 | * "SMC and HVC immediate value". |
73 | */ |
74 | if (kvm_vcpu_hvc_get_imm(vcpu)) { |
75 | vcpu_set_reg(vcpu, 0, ~0UL); |
76 | return 1; |
77 | } |
78 | |
79 | /* |
80 | * If imm is zero then it is likely an SMCCC call. |
81 | * |
82 | * Note that on ARMv8.3, even if EL3 is not implemented, SMC executed |
83 | * at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than |
84 | * being treated as UNDEFINED. |
85 | */ |
86 | return kvm_smccc_call_handler(vcpu); |
87 | } |
88 | |
89 | /* |
90 | * Guest access to FP/ASIMD registers are routed to this handler only |
91 | * when the system doesn't support FP/ASIMD. |
92 | */ |
93 | static int handle_no_fpsimd(struct kvm_vcpu *vcpu) |
94 | { |
95 | kvm_inject_undefined(vcpu); |
96 | return 1; |
97 | } |
98 | |
99 | /** |
100 | * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event |
101 | * instruction executed by a guest |
102 | * |
103 | * @vcpu: the vcpu pointer |
104 | * |
105 | * WFE[T]: Yield the CPU and come back to this vcpu when the scheduler |
106 | * decides to. |
107 | * WFI: Simply call kvm_vcpu_halt(), which will halt execution of |
108 | * world-switches and schedule other host processes until there is an |
109 | * incoming IRQ or FIQ to the VM. |
110 | * WFIT: Same as WFI, with a timed wakeup implemented as a background timer |
111 | * |
112 | * WF{I,E}T can immediately return if the deadline has already expired. |
113 | */ |
114 | static int kvm_handle_wfx(struct kvm_vcpu *vcpu) |
115 | { |
116 | u64 esr = kvm_vcpu_get_esr(vcpu); |
117 | |
118 | if (esr & ESR_ELx_WFx_ISS_WFE) { |
119 | trace_kvm_wfx_arm64(vcpu_pc: *vcpu_pc(vcpu), is_wfe: true); |
120 | vcpu->stat.wfe_exit_stat++; |
121 | } else { |
122 | trace_kvm_wfx_arm64(vcpu_pc: *vcpu_pc(vcpu), is_wfe: false); |
123 | vcpu->stat.wfi_exit_stat++; |
124 | } |
125 | |
126 | if (esr & ESR_ELx_WFx_ISS_WFxT) { |
127 | if (esr & ESR_ELx_WFx_ISS_RV) { |
128 | u64 val, now; |
129 | |
130 | now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT); |
131 | val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); |
132 | |
133 | if (now >= val) |
134 | goto out; |
135 | } else { |
136 | /* Treat WFxT as WFx if RN is invalid */ |
137 | esr &= ~ESR_ELx_WFx_ISS_WFxT; |
138 | } |
139 | } |
140 | |
141 | if (esr & ESR_ELx_WFx_ISS_WFE) { |
142 | kvm_vcpu_on_spin(vcpu, yield_to_kernel_mode: vcpu_mode_priv(vcpu)); |
143 | } else { |
144 | if (esr & ESR_ELx_WFx_ISS_WFxT) |
145 | vcpu_set_flag(vcpu, IN_WFIT); |
146 | |
147 | kvm_vcpu_wfi(vcpu); |
148 | } |
149 | out: |
150 | kvm_incr_pc(vcpu); |
151 | |
152 | return 1; |
153 | } |
154 | |
155 | /** |
156 | * kvm_handle_guest_debug - handle a debug exception instruction |
157 | * |
158 | * @vcpu: the vcpu pointer |
159 | * |
160 | * We route all debug exceptions through the same handler. If both the |
161 | * guest and host are using the same debug facilities it will be up to |
162 | * userspace to re-inject the correct exception for guest delivery. |
163 | * |
164 | * @return: 0 (while setting vcpu->run->exit_reason) |
165 | */ |
166 | static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) |
167 | { |
168 | struct kvm_run *run = vcpu->run; |
169 | u64 esr = kvm_vcpu_get_esr(vcpu); |
170 | |
171 | run->exit_reason = KVM_EXIT_DEBUG; |
172 | run->debug.arch.hsr = lower_32_bits(esr); |
173 | run->debug.arch.hsr_high = upper_32_bits(esr); |
174 | run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID; |
175 | |
176 | switch (ESR_ELx_EC(esr)) { |
177 | case ESR_ELx_EC_WATCHPT_LOW: |
178 | run->debug.arch.far = vcpu->arch.fault.far_el2; |
179 | break; |
180 | case ESR_ELx_EC_SOFTSTP_LOW: |
181 | vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING); |
182 | break; |
183 | } |
184 | |
185 | return 0; |
186 | } |
187 | |
188 | static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu) |
189 | { |
190 | u64 esr = kvm_vcpu_get_esr(vcpu); |
191 | |
192 | kvm_pr_unimpl("Unknown exception class: esr: %#016llx -- %s\n" , |
193 | esr, esr_get_class_string(esr)); |
194 | |
195 | kvm_inject_undefined(vcpu); |
196 | return 1; |
197 | } |
198 | |
199 | /* |
200 | * Guest access to SVE registers should be routed to this handler only |
201 | * when the system doesn't support SVE. |
202 | */ |
203 | static int handle_sve(struct kvm_vcpu *vcpu) |
204 | { |
205 | kvm_inject_undefined(vcpu); |
206 | return 1; |
207 | } |
208 | |
209 | /* |
210 | * Guest usage of a ptrauth instruction (which the guest EL1 did not turn into |
211 | * a NOP). If we get here, it is that we didn't fixup ptrauth on exit, and all |
212 | * that we can do is give the guest an UNDEF. |
213 | */ |
214 | static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu) |
215 | { |
216 | kvm_inject_undefined(vcpu); |
217 | return 1; |
218 | } |
219 | |
220 | static int kvm_handle_eret(struct kvm_vcpu *vcpu) |
221 | { |
222 | if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET) |
223 | return kvm_handle_ptrauth(vcpu); |
224 | |
225 | /* |
226 | * If we got here, two possibilities: |
227 | * |
228 | * - the guest is in EL2, and we need to fully emulate ERET |
229 | * |
230 | * - the guest is in EL1, and we need to reinject the |
231 | * exception into the L1 hypervisor. |
232 | * |
233 | * If KVM ever traps ERET for its own use, we'll have to |
234 | * revisit this. |
235 | */ |
236 | if (is_hyp_ctxt(vcpu)) |
237 | kvm_emulate_nested_eret(vcpu); |
238 | else |
239 | kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); |
240 | |
241 | return 1; |
242 | } |
243 | |
244 | static int handle_svc(struct kvm_vcpu *vcpu) |
245 | { |
246 | /* |
247 | * So far, SVC traps only for NV via HFGITR_EL2. A SVC from a |
248 | * 32bit guest would be caught by vpcu_mode_is_bad_32bit(), so |
249 | * we should only have to deal with a 64 bit exception. |
250 | */ |
251 | kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); |
252 | return 1; |
253 | } |
254 | |
255 | static exit_handle_fn arm_exit_handlers[] = { |
256 | [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec, |
257 | [ESR_ELx_EC_WFx] = kvm_handle_wfx, |
258 | [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, |
259 | [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64, |
260 | [ESR_ELx_EC_CP14_MR] = kvm_handle_cp14_32, |
261 | [ESR_ELx_EC_CP14_LS] = kvm_handle_cp14_load_store, |
262 | [ESR_ELx_EC_CP10_ID] = kvm_handle_cp10_id, |
263 | [ESR_ELx_EC_CP14_64] = kvm_handle_cp14_64, |
264 | [ESR_ELx_EC_HVC32] = handle_hvc, |
265 | [ESR_ELx_EC_SMC32] = handle_smc, |
266 | [ESR_ELx_EC_HVC64] = handle_hvc, |
267 | [ESR_ELx_EC_SMC64] = handle_smc, |
268 | [ESR_ELx_EC_SVC64] = handle_svc, |
269 | [ESR_ELx_EC_SYS64] = kvm_handle_sys_reg, |
270 | [ESR_ELx_EC_SVE] = handle_sve, |
271 | [ESR_ELx_EC_ERET] = kvm_handle_eret, |
272 | [ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort, |
273 | [ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort, |
274 | [ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug, |
275 | [ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug, |
276 | [ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug, |
277 | [ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug, |
278 | [ESR_ELx_EC_BRK64] = kvm_handle_guest_debug, |
279 | [ESR_ELx_EC_FP_ASIMD] = handle_no_fpsimd, |
280 | [ESR_ELx_EC_PAC] = kvm_handle_ptrauth, |
281 | }; |
282 | |
283 | static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) |
284 | { |
285 | u64 esr = kvm_vcpu_get_esr(vcpu); |
286 | u8 esr_ec = ESR_ELx_EC(esr); |
287 | |
288 | return arm_exit_handlers[esr_ec]; |
289 | } |
290 | |
291 | /* |
292 | * We may be single-stepping an emulated instruction. If the emulation |
293 | * has been completed in the kernel, we can return to userspace with a |
294 | * KVM_EXIT_DEBUG, otherwise userspace needs to complete its |
295 | * emulation first. |
296 | */ |
297 | static int handle_trap_exceptions(struct kvm_vcpu *vcpu) |
298 | { |
299 | int handled; |
300 | |
301 | /* |
302 | * See ARM ARM B1.14.1: "Hyp traps on instructions |
303 | * that fail their condition code check" |
304 | */ |
305 | if (!kvm_condition_valid(vcpu)) { |
306 | kvm_incr_pc(vcpu); |
307 | handled = 1; |
308 | } else { |
309 | exit_handle_fn exit_handler; |
310 | |
311 | exit_handler = kvm_get_exit_handler(vcpu); |
312 | handled = exit_handler(vcpu); |
313 | } |
314 | |
315 | return handled; |
316 | } |
317 | |
318 | /* |
319 | * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on |
320 | * proper exit to userspace. |
321 | */ |
322 | int handle_exit(struct kvm_vcpu *vcpu, int exception_index) |
323 | { |
324 | struct kvm_run *run = vcpu->run; |
325 | |
326 | if (ARM_SERROR_PENDING(exception_index)) { |
327 | /* |
328 | * The SError is handled by handle_exit_early(). If the guest |
329 | * survives it will re-execute the original instruction. |
330 | */ |
331 | return 1; |
332 | } |
333 | |
334 | exception_index = ARM_EXCEPTION_CODE(exception_index); |
335 | |
336 | switch (exception_index) { |
337 | case ARM_EXCEPTION_IRQ: |
338 | return 1; |
339 | case ARM_EXCEPTION_EL1_SERROR: |
340 | return 1; |
341 | case ARM_EXCEPTION_TRAP: |
342 | return handle_trap_exceptions(vcpu); |
343 | case ARM_EXCEPTION_HYP_GONE: |
344 | /* |
345 | * EL2 has been reset to the hyp-stub. This happens when a guest |
346 | * is pre-emptied by kvm_reboot()'s shutdown call. |
347 | */ |
348 | run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
349 | return 0; |
350 | case ARM_EXCEPTION_IL: |
351 | /* |
352 | * We attempted an illegal exception return. Guest state must |
353 | * have been corrupted somehow. Give up. |
354 | */ |
355 | run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
356 | return -EINVAL; |
357 | default: |
358 | kvm_pr_unimpl("Unsupported exception type: %d" , |
359 | exception_index); |
360 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
361 | return 0; |
362 | } |
363 | } |
364 | |
365 | /* For exit types that need handling before we can be preempted */ |
366 | void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index) |
367 | { |
368 | if (ARM_SERROR_PENDING(exception_index)) { |
369 | if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) { |
370 | u64 disr = kvm_vcpu_get_disr(vcpu); |
371 | |
372 | kvm_handle_guest_serror(vcpu, esr: disr_to_esr(disr)); |
373 | } else { |
374 | kvm_inject_vabt(vcpu); |
375 | } |
376 | |
377 | return; |
378 | } |
379 | |
380 | exception_index = ARM_EXCEPTION_CODE(exception_index); |
381 | |
382 | if (exception_index == ARM_EXCEPTION_EL1_SERROR) |
383 | kvm_handle_guest_serror(vcpu, esr: kvm_vcpu_get_esr(vcpu)); |
384 | } |
385 | |
386 | void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, |
387 | u64 elr_virt, u64 elr_phys, |
388 | u64 par, uintptr_t vcpu, |
389 | u64 far, u64 hpfar) { |
390 | u64 elr_in_kimg = __phys_to_kimg(elr_phys); |
391 | u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt; |
392 | u64 mode = spsr & PSR_MODE_MASK; |
393 | u64 panic_addr = elr_virt + hyp_offset; |
394 | |
395 | if (mode != PSR_MODE_EL2t && mode != PSR_MODE_EL2h) { |
396 | kvm_err("Invalid host exception to nVHE hyp!\n" ); |
397 | } else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && |
398 | (esr & ESR_ELx_BRK64_ISS_COMMENT_MASK) == BUG_BRK_IMM) { |
399 | const char *file = NULL; |
400 | unsigned int line = 0; |
401 | |
402 | /* All hyp bugs, including warnings, are treated as fatal. */ |
403 | if (!is_protected_kvm_enabled() || |
404 | IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { |
405 | struct bug_entry *bug = find_bug(bugaddr: elr_in_kimg); |
406 | |
407 | if (bug) |
408 | bug_get_file_line(bug, file: &file, line: &line); |
409 | } |
410 | |
411 | if (file) |
412 | kvm_err("nVHE hyp BUG at: %s:%u!\n" , file, line); |
413 | else |
414 | kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n" , panic_addr, |
415 | (void *)(panic_addr + kaslr_offset())); |
416 | } else { |
417 | kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n" , panic_addr, |
418 | (void *)(panic_addr + kaslr_offset())); |
419 | } |
420 | |
421 | /* Dump the nVHE hypervisor backtrace */ |
422 | kvm_nvhe_dump_backtrace(hyp_offset); |
423 | |
424 | /* |
425 | * Hyp has panicked and we're going to handle that by panicking the |
426 | * kernel. The kernel offset will be revealed in the panic so we're |
427 | * also safe to reveal the hyp offset as a debugging aid for translating |
428 | * hyp VAs to vmlinux addresses. |
429 | */ |
430 | kvm_err("Hyp Offset: 0x%llx\n" , hyp_offset); |
431 | |
432 | panic(fmt: "HYP panic:\nPS:%08llx PC:%016llx ESR:%016llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n" , |
433 | spsr, elr_virt, esr, far, hpfar, par, vcpu); |
434 | } |
435 | |