1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #include <linux/linkage.h> |
3 | #include <asm/asm.h> |
4 | #include <asm/bitsperlong.h> |
5 | #include <asm/kvm_vcpu_regs.h> |
6 | #include <asm/nospec-branch.h> |
7 | #include <asm/percpu.h> |
8 | #include <asm/segment.h> |
9 | #include "kvm-asm-offsets.h" |
10 | #include "run_flags.h" |
11 | |
12 | #define WORD_SIZE (BITS_PER_LONG / 8) |
13 | |
14 | #define VCPU_RAX __VCPU_REGS_RAX * WORD_SIZE |
15 | #define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE |
16 | #define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE |
17 | #define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE |
18 | /* Intentionally omit RSP as it's context switched by hardware */ |
19 | #define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE |
20 | #define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE |
21 | #define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE |
22 | |
23 | #ifdef CONFIG_X86_64 |
24 | #define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE |
25 | #define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE |
26 | #define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE |
27 | #define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE |
28 | #define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE |
29 | #define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE |
30 | #define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE |
31 | #define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE |
32 | #endif |
33 | |
34 | .macro VMX_DO_EVENT_IRQOFF call_insn call_target |
35 | /* |
36 | * Unconditionally create a stack frame, getting the correct RSP on the |
37 | * stack (for x86-64) would take two instructions anyways, and RBP can |
38 | * be used to restore RSP to make objtool happy (see below). |
39 | */ |
40 | push %_ASM_BP |
41 | mov %_ASM_SP, %_ASM_BP |
42 | |
43 | #ifdef CONFIG_X86_64 |
44 | /* |
45 | * Align RSP to a 16-byte boundary (to emulate CPU behavior) before |
46 | * creating the synthetic interrupt stack frame for the IRQ/NMI. |
47 | */ |
48 | and $-16, %rsp |
49 | push $__KERNEL_DS |
50 | push %rbp |
51 | #endif |
52 | pushf |
53 | push $__KERNEL_CS |
54 | \call_insn \call_target |
55 | |
56 | /* |
57 | * "Restore" RSP from RBP, even though IRET has already unwound RSP to |
58 | * the correct value. objtool doesn't know the callee will IRET and, |
59 | * without the explicit restore, thinks the stack is getting walloped. |
60 | * Using an unwind hint is problematic due to x86-64's dynamic alignment. |
61 | */ |
62 | mov %_ASM_BP, %_ASM_SP |
63 | pop %_ASM_BP |
64 | RET |
65 | .endm |
66 | |
67 | .section .noinstr.text, "ax" |
68 | |
69 | /** |
70 | * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode |
71 | * @vmx: struct vcpu_vmx * |
72 | * @regs: unsigned long * (to guest registers) |
73 | * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH |
74 | * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl |
75 | * |
76 | * Returns: |
77 | * 0 on VM-Exit, 1 on VM-Fail |
78 | */ |
79 | SYM_FUNC_START(__vmx_vcpu_run) |
80 | push %_ASM_BP |
81 | mov %_ASM_SP, %_ASM_BP |
82 | #ifdef CONFIG_X86_64 |
83 | push %r15 |
84 | push %r14 |
85 | push %r13 |
86 | push %r12 |
87 | #else |
88 | push %edi |
89 | push %esi |
90 | #endif |
91 | push %_ASM_BX |
92 | |
93 | /* Save @vmx for SPEC_CTRL handling */ |
94 | push %_ASM_ARG1 |
95 | |
96 | /* Save @flags for SPEC_CTRL handling */ |
97 | push %_ASM_ARG3 |
98 | |
99 | /* |
100 | * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and |
101 | * @regs is needed after VM-Exit to save the guest's register values. |
102 | */ |
103 | push %_ASM_ARG2 |
104 | |
105 | /* Copy @flags to EBX, _ASM_ARG3 is volatile. */ |
106 | mov %_ASM_ARG3L, %ebx |
107 | |
108 | lea (%_ASM_SP), %_ASM_ARG2 |
109 | call vmx_update_host_rsp |
110 | |
111 | ALTERNATIVE "jmp .Lspec_ctrl_done" , "" , X86_FEATURE_MSR_SPEC_CTRL |
112 | |
113 | /* |
114 | * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the |
115 | * host's, write the MSR. |
116 | * |
117 | * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, |
118 | * there must not be any returns or indirect branches between this code |
119 | * and vmentry. |
120 | */ |
121 | mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI |
122 | movl VMX_spec_ctrl(%_ASM_DI), %edi |
123 | movl PER_CPU_VAR(x86_spec_ctrl_current), %esi |
124 | cmp %edi, %esi |
125 | je .Lspec_ctrl_done |
126 | mov $MSR_IA32_SPEC_CTRL, %ecx |
127 | xor %edx, %edx |
128 | mov %edi, %eax |
129 | wrmsr |
130 | |
131 | .Lspec_ctrl_done: |
132 | |
133 | /* |
134 | * Since vmentry is serializing on affected CPUs, there's no need for |
135 | * an LFENCE to stop speculation from skipping the wrmsr. |
136 | */ |
137 | |
138 | /* Load @regs to RAX. */ |
139 | mov (%_ASM_SP), %_ASM_AX |
140 | |
141 | /* Check if vmlaunch or vmresume is needed */ |
142 | bt $VMX_RUN_VMRESUME_SHIFT, %ebx |
143 | |
144 | /* Load guest registers. Don't clobber flags. */ |
145 | mov VCPU_RCX(%_ASM_AX), %_ASM_CX |
146 | mov VCPU_RDX(%_ASM_AX), %_ASM_DX |
147 | mov VCPU_RBX(%_ASM_AX), %_ASM_BX |
148 | mov VCPU_RBP(%_ASM_AX), %_ASM_BP |
149 | mov VCPU_RSI(%_ASM_AX), %_ASM_SI |
150 | mov VCPU_RDI(%_ASM_AX), %_ASM_DI |
151 | #ifdef CONFIG_X86_64 |
152 | mov VCPU_R8 (%_ASM_AX), %r8 |
153 | mov VCPU_R9 (%_ASM_AX), %r9 |
154 | mov VCPU_R10(%_ASM_AX), %r10 |
155 | mov VCPU_R11(%_ASM_AX), %r11 |
156 | mov VCPU_R12(%_ASM_AX), %r12 |
157 | mov VCPU_R13(%_ASM_AX), %r13 |
158 | mov VCPU_R14(%_ASM_AX), %r14 |
159 | mov VCPU_R15(%_ASM_AX), %r15 |
160 | #endif |
161 | /* Load guest RAX. This kills the @regs pointer! */ |
162 | mov VCPU_RAX(%_ASM_AX), %_ASM_AX |
163 | |
164 | /* Clobbers EFLAGS.ZF */ |
165 | CLEAR_CPU_BUFFERS |
166 | |
167 | /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ |
168 | jnc .Lvmlaunch |
169 | |
170 | /* |
171 | * After a successful VMRESUME/VMLAUNCH, control flow "magically" |
172 | * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting. |
173 | * So this isn't a typical function and objtool needs to be told to |
174 | * save the unwind state here and restore it below. |
175 | */ |
176 | UNWIND_HINT_SAVE |
177 | |
178 | /* |
179 | * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at |
180 | * the 'vmx_vmexit' label below. |
181 | */ |
182 | .Lvmresume: |
183 | vmresume |
184 | jmp .Lvmfail |
185 | |
186 | .Lvmlaunch: |
187 | vmlaunch |
188 | jmp .Lvmfail |
189 | |
190 | _ASM_EXTABLE(.Lvmresume, .Lfixup) |
191 | _ASM_EXTABLE(.Lvmlaunch, .Lfixup) |
192 | |
193 | SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) |
194 | |
195 | /* Restore unwind state from before the VMRESUME/VMLAUNCH. */ |
196 | UNWIND_HINT_RESTORE |
197 | ENDBR |
198 | |
199 | /* Temporarily save guest's RAX. */ |
200 | push %_ASM_AX |
201 | |
202 | /* Reload @regs to RAX. */ |
203 | mov WORD_SIZE(%_ASM_SP), %_ASM_AX |
204 | |
205 | /* Save all guest registers, including RAX from the stack */ |
206 | pop VCPU_RAX(%_ASM_AX) |
207 | mov %_ASM_CX, VCPU_RCX(%_ASM_AX) |
208 | mov %_ASM_DX, VCPU_RDX(%_ASM_AX) |
209 | mov %_ASM_BX, VCPU_RBX(%_ASM_AX) |
210 | mov %_ASM_BP, VCPU_RBP(%_ASM_AX) |
211 | mov %_ASM_SI, VCPU_RSI(%_ASM_AX) |
212 | mov %_ASM_DI, VCPU_RDI(%_ASM_AX) |
213 | #ifdef CONFIG_X86_64 |
214 | mov %r8, VCPU_R8 (%_ASM_AX) |
215 | mov %r9, VCPU_R9 (%_ASM_AX) |
216 | mov %r10, VCPU_R10(%_ASM_AX) |
217 | mov %r11, VCPU_R11(%_ASM_AX) |
218 | mov %r12, VCPU_R12(%_ASM_AX) |
219 | mov %r13, VCPU_R13(%_ASM_AX) |
220 | mov %r14, VCPU_R14(%_ASM_AX) |
221 | mov %r15, VCPU_R15(%_ASM_AX) |
222 | #endif |
223 | |
224 | /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ |
225 | xor %ebx, %ebx |
226 | |
227 | .Lclear_regs: |
228 | /* Discard @regs. The register is irrelevant, it just can't be RBX. */ |
229 | pop %_ASM_AX |
230 | |
231 | /* |
232 | * Clear all general purpose registers except RSP and RBX to prevent |
233 | * speculative use of the guest's values, even those that are reloaded |
234 | * via the stack. In theory, an L1 cache miss when restoring registers |
235 | * could lead to speculative execution with the guest's values. |
236 | * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially |
237 | * free. RSP and RBX are exempt as RSP is restored by hardware during |
238 | * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return |
239 | * value. |
240 | */ |
241 | xor %eax, %eax |
242 | xor %ecx, %ecx |
243 | xor %edx, %edx |
244 | xor %ebp, %ebp |
245 | xor %esi, %esi |
246 | xor %edi, %edi |
247 | #ifdef CONFIG_X86_64 |
248 | xor %r8d, %r8d |
249 | xor %r9d, %r9d |
250 | xor %r10d, %r10d |
251 | xor %r11d, %r11d |
252 | xor %r12d, %r12d |
253 | xor %r13d, %r13d |
254 | xor %r14d, %r14d |
255 | xor %r15d, %r15d |
256 | #endif |
257 | |
258 | /* |
259 | * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before |
260 | * the first unbalanced RET after vmexit! |
261 | * |
262 | * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB |
263 | * entries and (in some cases) RSB underflow. |
264 | * |
265 | * eIBRS has its own protection against poisoned RSB, so it doesn't |
266 | * need the RSB filling sequence. But it does need to be enabled, and a |
267 | * single call to retire, before the first unbalanced RET. |
268 | */ |
269 | |
270 | FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\ |
271 | X86_FEATURE_RSB_VMEXIT_LITE |
272 | |
273 | pop %_ASM_ARG2 /* @flags */ |
274 | pop %_ASM_ARG1 /* @vmx */ |
275 | |
276 | call vmx_spec_ctrl_restore_host |
277 | |
278 | CLEAR_BRANCH_HISTORY_VMEXIT |
279 | |
280 | /* Put return value in AX */ |
281 | mov %_ASM_BX, %_ASM_AX |
282 | |
283 | pop %_ASM_BX |
284 | #ifdef CONFIG_X86_64 |
285 | pop %r12 |
286 | pop %r13 |
287 | pop %r14 |
288 | pop %r15 |
289 | #else |
290 | pop %esi |
291 | pop %edi |
292 | #endif |
293 | pop %_ASM_BP |
294 | RET |
295 | |
296 | .Lfixup: |
297 | cmpb $0, _ASM_RIP(kvm_rebooting) |
298 | jne .Lvmfail |
299 | ud2 |
300 | .Lvmfail: |
301 | /* VM-Fail: set return value to 1 */ |
302 | mov $1, %_ASM_BX |
303 | jmp .Lclear_regs |
304 | |
305 | SYM_FUNC_END(__vmx_vcpu_run) |
306 | |
307 | SYM_FUNC_START(vmx_do_nmi_irqoff) |
308 | VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx |
309 | SYM_FUNC_END(vmx_do_nmi_irqoff) |
310 | |
311 | #ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT |
312 | |
313 | /** |
314 | * vmread_error_trampoline - Trampoline from inline asm to vmread_error() |
315 | * @field: VMCS field encoding that failed |
316 | * @fault: %true if the VMREAD faulted, %false if it failed |
317 | * |
318 | * Save and restore volatile registers across a call to vmread_error(). Note, |
319 | * all parameters are passed on the stack. |
320 | */ |
321 | SYM_FUNC_START(vmread_error_trampoline) |
322 | push %_ASM_BP |
323 | mov %_ASM_SP, %_ASM_BP |
324 | |
325 | push %_ASM_AX |
326 | push %_ASM_CX |
327 | push %_ASM_DX |
328 | #ifdef CONFIG_X86_64 |
329 | push %rdi |
330 | push %rsi |
331 | push %r8 |
332 | push %r9 |
333 | push %r10 |
334 | push %r11 |
335 | #endif |
336 | |
337 | /* Load @field and @fault to arg1 and arg2 respectively. */ |
338 | mov 3*WORD_SIZE(%_ASM_BP), %_ASM_ARG2 |
339 | mov 2*WORD_SIZE(%_ASM_BP), %_ASM_ARG1 |
340 | |
341 | call vmread_error_trampoline2 |
342 | |
343 | /* Zero out @fault, which will be popped into the result register. */ |
344 | _ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP) |
345 | |
346 | #ifdef CONFIG_X86_64 |
347 | pop %r11 |
348 | pop %r10 |
349 | pop %r9 |
350 | pop %r8 |
351 | pop %rsi |
352 | pop %rdi |
353 | #endif |
354 | pop %_ASM_DX |
355 | pop %_ASM_CX |
356 | pop %_ASM_AX |
357 | pop %_ASM_BP |
358 | |
359 | RET |
360 | SYM_FUNC_END(vmread_error_trampoline) |
361 | #endif |
362 | |
363 | .section .text, "ax" |
364 | |
365 | SYM_FUNC_START(vmx_do_interrupt_irqoff) |
366 | VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1 |
367 | SYM_FUNC_END(vmx_do_interrupt_irqoff) |
368 | |