1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Kernel-based Virtual Machine driver for Linux |
4 | * |
5 | * AMD SVM-SEV support |
6 | * |
7 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. |
8 | */ |
9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
10 | |
11 | #include <linux/kvm_types.h> |
12 | #include <linux/kvm_host.h> |
13 | #include <linux/kernel.h> |
14 | #include <linux/highmem.h> |
15 | #include <linux/psp.h> |
16 | #include <linux/psp-sev.h> |
17 | #include <linux/pagemap.h> |
18 | #include <linux/swap.h> |
19 | #include <linux/misc_cgroup.h> |
20 | #include <linux/processor.h> |
21 | #include <linux/trace_events.h> |
22 | |
23 | #include <asm/pkru.h> |
24 | #include <asm/trapnr.h> |
25 | #include <asm/fpu/xcr.h> |
26 | #include <asm/debugreg.h> |
27 | |
28 | #include "mmu.h" |
29 | #include "x86.h" |
30 | #include "svm.h" |
31 | #include "svm_ops.h" |
32 | #include "cpuid.h" |
33 | #include "trace.h" |
34 | |
35 | #ifndef CONFIG_KVM_AMD_SEV |
36 | /* |
37 | * When this config is not defined, SEV feature is not supported and APIs in |
38 | * this file are not used but this file still gets compiled into the KVM AMD |
39 | * module. |
40 | * |
41 | * We will not have MISC_CG_RES_SEV and MISC_CG_RES_SEV_ES entries in the enum |
42 | * misc_res_type {} defined in linux/misc_cgroup.h. |
43 | * |
44 | * Below macros allow compilation to succeed. |
45 | */ |
46 | #define MISC_CG_RES_SEV MISC_CG_RES_TYPES |
47 | #define MISC_CG_RES_SEV_ES MISC_CG_RES_TYPES |
48 | #endif |
49 | |
50 | #ifdef CONFIG_KVM_AMD_SEV |
51 | /* enable/disable SEV support */ |
52 | static bool sev_enabled = true; |
53 | module_param_named(sev, sev_enabled, bool, 0444); |
54 | |
55 | /* enable/disable SEV-ES support */ |
56 | static bool sev_es_enabled = true; |
57 | module_param_named(sev_es, sev_es_enabled, bool, 0444); |
58 | |
59 | /* enable/disable SEV-ES DebugSwap support */ |
60 | static bool sev_es_debug_swap_enabled = false; |
61 | module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444); |
62 | #else |
63 | #define sev_enabled false |
64 | #define sev_es_enabled false |
65 | #define sev_es_debug_swap_enabled false |
66 | #endif /* CONFIG_KVM_AMD_SEV */ |
67 | |
68 | static u8 sev_enc_bit; |
69 | static DECLARE_RWSEM(sev_deactivate_lock); |
70 | static DEFINE_MUTEX(sev_bitmap_lock); |
71 | unsigned int max_sev_asid; |
72 | static unsigned int min_sev_asid; |
73 | static unsigned long sev_me_mask; |
74 | static unsigned int nr_asids; |
75 | static unsigned long *sev_asid_bitmap; |
76 | static unsigned long *sev_reclaim_asid_bitmap; |
77 | |
78 | struct enc_region { |
79 | struct list_head list; |
80 | unsigned long npages; |
81 | struct page **pages; |
82 | unsigned long uaddr; |
83 | unsigned long size; |
84 | }; |
85 | |
86 | /* Called with the sev_bitmap_lock held, or on shutdown */ |
87 | static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) |
88 | { |
89 | int ret, error = 0; |
90 | unsigned int asid; |
91 | |
92 | /* Check if there are any ASIDs to reclaim before performing a flush */ |
93 | asid = find_next_bit(addr: sev_reclaim_asid_bitmap, size: nr_asids, offset: min_asid); |
94 | if (asid > max_asid) |
95 | return -EBUSY; |
96 | |
97 | /* |
98 | * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail, |
99 | * so it must be guarded. |
100 | */ |
101 | down_write(sem: &sev_deactivate_lock); |
102 | |
103 | wbinvd_on_all_cpus(); |
104 | ret = sev_guest_df_flush(error: &error); |
105 | |
106 | up_write(sem: &sev_deactivate_lock); |
107 | |
108 | if (ret) |
109 | pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n" , ret, error); |
110 | |
111 | return ret; |
112 | } |
113 | |
114 | static inline bool is_mirroring_enc_context(struct kvm *kvm) |
115 | { |
116 | return !!to_kvm_svm(kvm)->sev_info.enc_context_owner; |
117 | } |
118 | |
119 | /* Must be called with the sev_bitmap_lock held */ |
120 | static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid) |
121 | { |
122 | if (sev_flush_asids(min_asid, max_asid)) |
123 | return false; |
124 | |
125 | /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ |
126 | bitmap_xor(dst: sev_asid_bitmap, src1: sev_asid_bitmap, src2: sev_reclaim_asid_bitmap, |
127 | nbits: nr_asids); |
128 | bitmap_zero(dst: sev_reclaim_asid_bitmap, nbits: nr_asids); |
129 | |
130 | return true; |
131 | } |
132 | |
133 | static int sev_misc_cg_try_charge(struct kvm_sev_info *sev) |
134 | { |
135 | enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; |
136 | return misc_cg_try_charge(type, cg: sev->misc_cg, amount: 1); |
137 | } |
138 | |
139 | static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) |
140 | { |
141 | enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; |
142 | misc_cg_uncharge(type, cg: sev->misc_cg, amount: 1); |
143 | } |
144 | |
145 | static int sev_asid_new(struct kvm_sev_info *sev) |
146 | { |
147 | /* |
148 | * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. |
149 | * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. |
150 | * Note: min ASID can end up larger than the max if basic SEV support is |
151 | * effectively disabled by disallowing use of ASIDs for SEV guests. |
152 | */ |
153 | unsigned int min_asid = sev->es_active ? 1 : min_sev_asid; |
154 | unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; |
155 | unsigned int asid; |
156 | bool retry = true; |
157 | int ret; |
158 | |
159 | if (min_asid > max_asid) |
160 | return -ENOTTY; |
161 | |
162 | WARN_ON(sev->misc_cg); |
163 | sev->misc_cg = get_current_misc_cg(); |
164 | ret = sev_misc_cg_try_charge(sev); |
165 | if (ret) { |
166 | put_misc_cg(cg: sev->misc_cg); |
167 | sev->misc_cg = NULL; |
168 | return ret; |
169 | } |
170 | |
171 | mutex_lock(&sev_bitmap_lock); |
172 | |
173 | again: |
174 | asid = find_next_zero_bit(addr: sev_asid_bitmap, size: max_asid + 1, offset: min_asid); |
175 | if (asid > max_asid) { |
176 | if (retry && __sev_recycle_asids(min_asid, max_asid)) { |
177 | retry = false; |
178 | goto again; |
179 | } |
180 | mutex_unlock(lock: &sev_bitmap_lock); |
181 | ret = -EBUSY; |
182 | goto e_uncharge; |
183 | } |
184 | |
185 | __set_bit(asid, sev_asid_bitmap); |
186 | |
187 | mutex_unlock(lock: &sev_bitmap_lock); |
188 | |
189 | sev->asid = asid; |
190 | return 0; |
191 | e_uncharge: |
192 | sev_misc_cg_uncharge(sev); |
193 | put_misc_cg(cg: sev->misc_cg); |
194 | sev->misc_cg = NULL; |
195 | return ret; |
196 | } |
197 | |
198 | static unsigned int sev_get_asid(struct kvm *kvm) |
199 | { |
200 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
201 | |
202 | return sev->asid; |
203 | } |
204 | |
205 | static void sev_asid_free(struct kvm_sev_info *sev) |
206 | { |
207 | struct svm_cpu_data *sd; |
208 | int cpu; |
209 | |
210 | mutex_lock(&sev_bitmap_lock); |
211 | |
212 | __set_bit(sev->asid, sev_reclaim_asid_bitmap); |
213 | |
214 | for_each_possible_cpu(cpu) { |
215 | sd = per_cpu_ptr(&svm_data, cpu); |
216 | sd->sev_vmcbs[sev->asid] = NULL; |
217 | } |
218 | |
219 | mutex_unlock(lock: &sev_bitmap_lock); |
220 | |
221 | sev_misc_cg_uncharge(sev); |
222 | put_misc_cg(cg: sev->misc_cg); |
223 | sev->misc_cg = NULL; |
224 | } |
225 | |
226 | static void sev_decommission(unsigned int handle) |
227 | { |
228 | struct sev_data_decommission decommission; |
229 | |
230 | if (!handle) |
231 | return; |
232 | |
233 | decommission.handle = handle; |
234 | sev_guest_decommission(data: &decommission, NULL); |
235 | } |
236 | |
237 | static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) |
238 | { |
239 | struct sev_data_deactivate deactivate; |
240 | |
241 | if (!handle) |
242 | return; |
243 | |
244 | deactivate.handle = handle; |
245 | |
246 | /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */ |
247 | down_read(sem: &sev_deactivate_lock); |
248 | sev_guest_deactivate(data: &deactivate, NULL); |
249 | up_read(sem: &sev_deactivate_lock); |
250 | |
251 | sev_decommission(handle); |
252 | } |
253 | |
254 | static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) |
255 | { |
256 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
257 | struct sev_platform_init_args init_args = {0}; |
258 | int ret; |
259 | |
260 | if (kvm->created_vcpus) |
261 | return -EINVAL; |
262 | |
263 | if (unlikely(sev->active)) |
264 | return -EINVAL; |
265 | |
266 | sev->active = true; |
267 | sev->es_active = argp->id == KVM_SEV_ES_INIT; |
268 | ret = sev_asid_new(sev); |
269 | if (ret) |
270 | goto e_no_asid; |
271 | |
272 | init_args.probe = false; |
273 | ret = sev_platform_init(args: &init_args); |
274 | if (ret) |
275 | goto e_free; |
276 | |
277 | INIT_LIST_HEAD(list: &sev->regions_list); |
278 | INIT_LIST_HEAD(list: &sev->mirror_vms); |
279 | |
280 | kvm_set_apicv_inhibit(kvm, reason: APICV_INHIBIT_REASON_SEV); |
281 | |
282 | return 0; |
283 | |
284 | e_free: |
285 | argp->error = init_args.error; |
286 | sev_asid_free(sev); |
287 | sev->asid = 0; |
288 | e_no_asid: |
289 | sev->es_active = false; |
290 | sev->active = false; |
291 | return ret; |
292 | } |
293 | |
294 | static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) |
295 | { |
296 | unsigned int asid = sev_get_asid(kvm); |
297 | struct sev_data_activate activate; |
298 | int ret; |
299 | |
300 | /* activate ASID on the given handle */ |
301 | activate.handle = handle; |
302 | activate.asid = asid; |
303 | ret = sev_guest_activate(data: &activate, error); |
304 | |
305 | return ret; |
306 | } |
307 | |
308 | static int __sev_issue_cmd(int fd, int id, void *data, int *error) |
309 | { |
310 | struct fd f; |
311 | int ret; |
312 | |
313 | f = fdget(fd); |
314 | if (!f.file) |
315 | return -EBADF; |
316 | |
317 | ret = sev_issue_cmd_external_user(filep: f.file, id, data, error); |
318 | |
319 | fdput(fd: f); |
320 | return ret; |
321 | } |
322 | |
323 | static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error) |
324 | { |
325 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
326 | |
327 | return __sev_issue_cmd(fd: sev->fd, id, data, error); |
328 | } |
329 | |
330 | static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) |
331 | { |
332 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
333 | struct sev_data_launch_start start; |
334 | struct kvm_sev_launch_start params; |
335 | void *dh_blob, *session_blob; |
336 | int *error = &argp->error; |
337 | int ret; |
338 | |
339 | if (!sev_guest(kvm)) |
340 | return -ENOTTY; |
341 | |
342 | if (copy_from_user(to: ¶ms, from: (void __user *)(uintptr_t)argp->data, n: sizeof(params))) |
343 | return -EFAULT; |
344 | |
345 | memset(&start, 0, sizeof(start)); |
346 | |
347 | dh_blob = NULL; |
348 | if (params.dh_uaddr) { |
349 | dh_blob = psp_copy_user_blob(uaddr: params.dh_uaddr, len: params.dh_len); |
350 | if (IS_ERR(ptr: dh_blob)) |
351 | return PTR_ERR(ptr: dh_blob); |
352 | |
353 | start.dh_cert_address = __sme_set(__pa(dh_blob)); |
354 | start.dh_cert_len = params.dh_len; |
355 | } |
356 | |
357 | session_blob = NULL; |
358 | if (params.session_uaddr) { |
359 | session_blob = psp_copy_user_blob(uaddr: params.session_uaddr, len: params.session_len); |
360 | if (IS_ERR(ptr: session_blob)) { |
361 | ret = PTR_ERR(ptr: session_blob); |
362 | goto e_free_dh; |
363 | } |
364 | |
365 | start.session_address = __sme_set(__pa(session_blob)); |
366 | start.session_len = params.session_len; |
367 | } |
368 | |
369 | start.handle = params.handle; |
370 | start.policy = params.policy; |
371 | |
372 | /* create memory encryption context */ |
373 | ret = __sev_issue_cmd(fd: argp->sev_fd, id: SEV_CMD_LAUNCH_START, data: &start, error); |
374 | if (ret) |
375 | goto e_free_session; |
376 | |
377 | /* Bind ASID to this guest */ |
378 | ret = sev_bind_asid(kvm, handle: start.handle, error); |
379 | if (ret) { |
380 | sev_decommission(handle: start.handle); |
381 | goto e_free_session; |
382 | } |
383 | |
384 | /* return handle to userspace */ |
385 | params.handle = start.handle; |
386 | if (copy_to_user(to: (void __user *)(uintptr_t)argp->data, from: ¶ms, n: sizeof(params))) { |
387 | sev_unbind_asid(kvm, handle: start.handle); |
388 | ret = -EFAULT; |
389 | goto e_free_session; |
390 | } |
391 | |
392 | sev->handle = start.handle; |
393 | sev->fd = argp->sev_fd; |
394 | |
395 | e_free_session: |
396 | kfree(objp: session_blob); |
397 | e_free_dh: |
398 | kfree(objp: dh_blob); |
399 | return ret; |
400 | } |
401 | |
402 | static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, |
403 | unsigned long ulen, unsigned long *n, |
404 | int write) |
405 | { |
406 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
407 | unsigned long npages, size; |
408 | int npinned; |
409 | unsigned long locked, lock_limit; |
410 | struct page **pages; |
411 | unsigned long first, last; |
412 | int ret; |
413 | |
414 | lockdep_assert_held(&kvm->lock); |
415 | |
416 | if (ulen == 0 || uaddr + ulen < uaddr) |
417 | return ERR_PTR(error: -EINVAL); |
418 | |
419 | /* Calculate number of pages. */ |
420 | first = (uaddr & PAGE_MASK) >> PAGE_SHIFT; |
421 | last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT; |
422 | npages = (last - first + 1); |
423 | |
424 | locked = sev->pages_locked + npages; |
425 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; |
426 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { |
427 | pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n" , locked, lock_limit); |
428 | return ERR_PTR(error: -ENOMEM); |
429 | } |
430 | |
431 | if (WARN_ON_ONCE(npages > INT_MAX)) |
432 | return ERR_PTR(error: -EINVAL); |
433 | |
434 | /* Avoid using vmalloc for smaller buffers. */ |
435 | size = npages * sizeof(struct page *); |
436 | if (size > PAGE_SIZE) |
437 | pages = __vmalloc(size, GFP_KERNEL_ACCOUNT); |
438 | else |
439 | pages = kmalloc(size, GFP_KERNEL_ACCOUNT); |
440 | |
441 | if (!pages) |
442 | return ERR_PTR(error: -ENOMEM); |
443 | |
444 | /* Pin the user virtual address. */ |
445 | npinned = pin_user_pages_fast(start: uaddr, nr_pages: npages, gup_flags: write ? FOLL_WRITE : 0, pages); |
446 | if (npinned != npages) { |
447 | pr_err("SEV: Failure locking %lu pages.\n" , npages); |
448 | ret = -ENOMEM; |
449 | goto err; |
450 | } |
451 | |
452 | *n = npages; |
453 | sev->pages_locked = locked; |
454 | |
455 | return pages; |
456 | |
457 | err: |
458 | if (npinned > 0) |
459 | unpin_user_pages(pages, npages: npinned); |
460 | |
461 | kvfree(addr: pages); |
462 | return ERR_PTR(error: ret); |
463 | } |
464 | |
465 | static void sev_unpin_memory(struct kvm *kvm, struct page **pages, |
466 | unsigned long npages) |
467 | { |
468 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
469 | |
470 | unpin_user_pages(pages, npages); |
471 | kvfree(addr: pages); |
472 | sev->pages_locked -= npages; |
473 | } |
474 | |
475 | static void sev_clflush_pages(struct page *pages[], unsigned long npages) |
476 | { |
477 | uint8_t *page_virtual; |
478 | unsigned long i; |
479 | |
480 | if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 || |
481 | pages == NULL) |
482 | return; |
483 | |
484 | for (i = 0; i < npages; i++) { |
485 | page_virtual = kmap_local_page(page: pages[i]); |
486 | clflush_cache_range(addr: page_virtual, PAGE_SIZE); |
487 | kunmap_local(page_virtual); |
488 | cond_resched(); |
489 | } |
490 | } |
491 | |
492 | static unsigned long get_num_contig_pages(unsigned long idx, |
493 | struct page **inpages, unsigned long npages) |
494 | { |
495 | unsigned long paddr, next_paddr; |
496 | unsigned long i = idx + 1, pages = 1; |
497 | |
498 | /* find the number of contiguous pages starting from idx */ |
499 | paddr = __sme_page_pa(inpages[idx]); |
500 | while (i < npages) { |
501 | next_paddr = __sme_page_pa(inpages[i++]); |
502 | if ((paddr + PAGE_SIZE) == next_paddr) { |
503 | pages++; |
504 | paddr = next_paddr; |
505 | continue; |
506 | } |
507 | break; |
508 | } |
509 | |
510 | return pages; |
511 | } |
512 | |
513 | static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) |
514 | { |
515 | unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i; |
516 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
517 | struct kvm_sev_launch_update_data params; |
518 | struct sev_data_launch_update_data data; |
519 | struct page **inpages; |
520 | int ret; |
521 | |
522 | if (!sev_guest(kvm)) |
523 | return -ENOTTY; |
524 | |
525 | if (copy_from_user(to: ¶ms, from: (void __user *)(uintptr_t)argp->data, n: sizeof(params))) |
526 | return -EFAULT; |
527 | |
528 | vaddr = params.uaddr; |
529 | size = params.len; |
530 | vaddr_end = vaddr + size; |
531 | |
532 | /* Lock the user memory. */ |
533 | inpages = sev_pin_memory(kvm, uaddr: vaddr, ulen: size, n: &npages, write: 1); |
534 | if (IS_ERR(ptr: inpages)) |
535 | return PTR_ERR(ptr: inpages); |
536 | |
537 | /* |
538 | * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in |
539 | * place; the cache may contain the data that was written unencrypted. |
540 | */ |
541 | sev_clflush_pages(pages: inpages, npages); |
542 | |
543 | data.reserved = 0; |
544 | data.handle = sev->handle; |
545 | |
546 | for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) { |
547 | int offset, len; |
548 | |
549 | /* |
550 | * If the user buffer is not page-aligned, calculate the offset |
551 | * within the page. |
552 | */ |
553 | offset = vaddr & (PAGE_SIZE - 1); |
554 | |
555 | /* Calculate the number of pages that can be encrypted in one go. */ |
556 | pages = get_num_contig_pages(idx: i, inpages, npages); |
557 | |
558 | len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size); |
559 | |
560 | data.len = len; |
561 | data.address = __sme_page_pa(inpages[i]) + offset; |
562 | ret = sev_issue_cmd(kvm, id: SEV_CMD_LAUNCH_UPDATE_DATA, data: &data, error: &argp->error); |
563 | if (ret) |
564 | goto e_unpin; |
565 | |
566 | size -= len; |
567 | next_vaddr = vaddr + len; |
568 | } |
569 | |
570 | e_unpin: |
571 | /* content of memory is updated, mark pages dirty */ |
572 | for (i = 0; i < npages; i++) { |
573 | set_page_dirty_lock(inpages[i]); |
574 | mark_page_accessed(inpages[i]); |
575 | } |
576 | /* unlock the user pages */ |
577 | sev_unpin_memory(kvm, pages: inpages, npages); |
578 | return ret; |
579 | } |
580 | |
581 | static int sev_es_sync_vmsa(struct vcpu_svm *svm) |
582 | { |
583 | struct sev_es_save_area *save = svm->sev_es.vmsa; |
584 | |
585 | /* Check some debug related fields before encrypting the VMSA */ |
586 | if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1)) |
587 | return -EINVAL; |
588 | |
589 | /* |
590 | * SEV-ES will use a VMSA that is pointed to by the VMCB, not |
591 | * the traditional VMSA that is part of the VMCB. Copy the |
592 | * traditional VMSA as it has been built so far (in prep |
593 | * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state. |
594 | */ |
595 | memcpy(save, &svm->vmcb->save, sizeof(svm->vmcb->save)); |
596 | |
597 | /* Sync registgers */ |
598 | save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX]; |
599 | save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX]; |
600 | save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
601 | save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX]; |
602 | save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP]; |
603 | save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP]; |
604 | save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI]; |
605 | save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI]; |
606 | #ifdef CONFIG_X86_64 |
607 | save->r8 = svm->vcpu.arch.regs[VCPU_REGS_R8]; |
608 | save->r9 = svm->vcpu.arch.regs[VCPU_REGS_R9]; |
609 | save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10]; |
610 | save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11]; |
611 | save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12]; |
612 | save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13]; |
613 | save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14]; |
614 | save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15]; |
615 | #endif |
616 | save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP]; |
617 | |
618 | /* Sync some non-GPR registers before encrypting */ |
619 | save->xcr0 = svm->vcpu.arch.xcr0; |
620 | save->pkru = svm->vcpu.arch.pkru; |
621 | save->xss = svm->vcpu.arch.ia32_xss; |
622 | save->dr6 = svm->vcpu.arch.dr6; |
623 | |
624 | if (sev_es_debug_swap_enabled) { |
625 | save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP; |
626 | pr_warn_once("Enabling DebugSwap with KVM_SEV_ES_INIT. " |
627 | "This will not work starting with Linux 6.10\n" ); |
628 | } |
629 | |
630 | pr_debug("Virtual Machine Save Area (VMSA):\n" ); |
631 | print_hex_dump_debug("" , DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); |
632 | |
633 | return 0; |
634 | } |
635 | |
636 | static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, |
637 | int *error) |
638 | { |
639 | struct sev_data_launch_update_vmsa vmsa; |
640 | struct vcpu_svm *svm = to_svm(vcpu); |
641 | int ret; |
642 | |
643 | if (vcpu->guest_debug) { |
644 | pr_warn_once("KVM_SET_GUEST_DEBUG for SEV-ES guest is not supported" ); |
645 | return -EINVAL; |
646 | } |
647 | |
648 | /* Perform some pre-encryption checks against the VMSA */ |
649 | ret = sev_es_sync_vmsa(svm); |
650 | if (ret) |
651 | return ret; |
652 | |
653 | /* |
654 | * The LAUNCH_UPDATE_VMSA command will perform in-place encryption of |
655 | * the VMSA memory content (i.e it will write the same memory region |
656 | * with the guest's key), so invalidate it first. |
657 | */ |
658 | clflush_cache_range(addr: svm->sev_es.vmsa, PAGE_SIZE); |
659 | |
660 | vmsa.reserved = 0; |
661 | vmsa.handle = to_kvm_svm(kvm)->sev_info.handle; |
662 | vmsa.address = __sme_pa(svm->sev_es.vmsa); |
663 | vmsa.len = PAGE_SIZE; |
664 | ret = sev_issue_cmd(kvm, id: SEV_CMD_LAUNCH_UPDATE_VMSA, data: &vmsa, error); |
665 | if (ret) |
666 | return ret; |
667 | |
668 | vcpu->arch.guest_state_protected = true; |
669 | return 0; |
670 | } |
671 | |
672 | static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) |
673 | { |
674 | struct kvm_vcpu *vcpu; |
675 | unsigned long i; |
676 | int ret; |
677 | |
678 | if (!sev_es_guest(kvm)) |
679 | return -ENOTTY; |
680 | |
681 | kvm_for_each_vcpu(i, vcpu, kvm) { |
682 | ret = mutex_lock_killable(&vcpu->mutex); |
683 | if (ret) |
684 | return ret; |
685 | |
686 | ret = __sev_launch_update_vmsa(kvm, vcpu, error: &argp->error); |
687 | |
688 | mutex_unlock(lock: &vcpu->mutex); |
689 | if (ret) |
690 | return ret; |
691 | } |
692 | |
693 | return 0; |
694 | } |
695 | |
696 | static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) |
697 | { |
698 | void __user *measure = (void __user *)(uintptr_t)argp->data; |
699 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
700 | struct sev_data_launch_measure data; |
701 | struct kvm_sev_launch_measure params; |
702 | void __user *p = NULL; |
703 | void *blob = NULL; |
704 | int ret; |
705 | |
706 | if (!sev_guest(kvm)) |
707 | return -ENOTTY; |
708 | |
709 | if (copy_from_user(to: ¶ms, from: measure, n: sizeof(params))) |
710 | return -EFAULT; |
711 | |
712 | memset(&data, 0, sizeof(data)); |
713 | |
714 | /* User wants to query the blob length */ |
715 | if (!params.len) |
716 | goto cmd; |
717 | |
718 | p = (void __user *)(uintptr_t)params.uaddr; |
719 | if (p) { |
720 | if (params.len > SEV_FW_BLOB_MAX_SIZE) |
721 | return -EINVAL; |
722 | |
723 | blob = kzalloc(size: params.len, GFP_KERNEL_ACCOUNT); |
724 | if (!blob) |
725 | return -ENOMEM; |
726 | |
727 | data.address = __psp_pa(blob); |
728 | data.len = params.len; |
729 | } |
730 | |
731 | cmd: |
732 | data.handle = sev->handle; |
733 | ret = sev_issue_cmd(kvm, id: SEV_CMD_LAUNCH_MEASURE, data: &data, error: &argp->error); |
734 | |
735 | /* |
736 | * If we query the session length, FW responded with expected data. |
737 | */ |
738 | if (!params.len) |
739 | goto done; |
740 | |
741 | if (ret) |
742 | goto e_free_blob; |
743 | |
744 | if (blob) { |
745 | if (copy_to_user(to: p, from: blob, n: params.len)) |
746 | ret = -EFAULT; |
747 | } |
748 | |
749 | done: |
750 | params.len = data.len; |
751 | if (copy_to_user(to: measure, from: ¶ms, n: sizeof(params))) |
752 | ret = -EFAULT; |
753 | e_free_blob: |
754 | kfree(objp: blob); |
755 | return ret; |
756 | } |
757 | |
758 | static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) |
759 | { |
760 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
761 | struct sev_data_launch_finish data; |
762 | |
763 | if (!sev_guest(kvm)) |
764 | return -ENOTTY; |
765 | |
766 | data.handle = sev->handle; |
767 | return sev_issue_cmd(kvm, id: SEV_CMD_LAUNCH_FINISH, data: &data, error: &argp->error); |
768 | } |
769 | |
770 | static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) |
771 | { |
772 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
773 | struct kvm_sev_guest_status params; |
774 | struct sev_data_guest_status data; |
775 | int ret; |
776 | |
777 | if (!sev_guest(kvm)) |
778 | return -ENOTTY; |
779 | |
780 | memset(&data, 0, sizeof(data)); |
781 | |
782 | data.handle = sev->handle; |
783 | ret = sev_issue_cmd(kvm, id: SEV_CMD_GUEST_STATUS, data: &data, error: &argp->error); |
784 | if (ret) |
785 | return ret; |
786 | |
787 | params.policy = data.policy; |
788 | params.state = data.state; |
789 | params.handle = data.handle; |
790 | |
791 | if (copy_to_user(to: (void __user *)(uintptr_t)argp->data, from: ¶ms, n: sizeof(params))) |
792 | ret = -EFAULT; |
793 | |
794 | return ret; |
795 | } |
796 | |
797 | static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, |
798 | unsigned long dst, int size, |
799 | int *error, bool enc) |
800 | { |
801 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
802 | struct sev_data_dbg data; |
803 | |
804 | data.reserved = 0; |
805 | data.handle = sev->handle; |
806 | data.dst_addr = dst; |
807 | data.src_addr = src; |
808 | data.len = size; |
809 | |
810 | return sev_issue_cmd(kvm, |
811 | id: enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT, |
812 | data: &data, error); |
813 | } |
814 | |
815 | static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, |
816 | unsigned long dst_paddr, int sz, int *err) |
817 | { |
818 | int offset; |
819 | |
820 | /* |
821 | * Its safe to read more than we are asked, caller should ensure that |
822 | * destination has enough space. |
823 | */ |
824 | offset = src_paddr & 15; |
825 | src_paddr = round_down(src_paddr, 16); |
826 | sz = round_up(sz + offset, 16); |
827 | |
828 | return __sev_issue_dbg_cmd(kvm, src: src_paddr, dst: dst_paddr, size: sz, error: err, enc: false); |
829 | } |
830 | |
831 | static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, |
832 | void __user *dst_uaddr, |
833 | unsigned long dst_paddr, |
834 | int size, int *err) |
835 | { |
836 | struct page *tpage = NULL; |
837 | int ret, offset; |
838 | |
839 | /* if inputs are not 16-byte then use intermediate buffer */ |
840 | if (!IS_ALIGNED(dst_paddr, 16) || |
841 | !IS_ALIGNED(paddr, 16) || |
842 | !IS_ALIGNED(size, 16)) { |
843 | tpage = (void *)alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
844 | if (!tpage) |
845 | return -ENOMEM; |
846 | |
847 | dst_paddr = __sme_page_pa(tpage); |
848 | } |
849 | |
850 | ret = __sev_dbg_decrypt(kvm, src_paddr: paddr, dst_paddr, sz: size, err); |
851 | if (ret) |
852 | goto e_free; |
853 | |
854 | if (tpage) { |
855 | offset = paddr & 15; |
856 | if (copy_to_user(to: dst_uaddr, page_address(tpage) + offset, n: size)) |
857 | ret = -EFAULT; |
858 | } |
859 | |
860 | e_free: |
861 | if (tpage) |
862 | __free_page(tpage); |
863 | |
864 | return ret; |
865 | } |
866 | |
867 | static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, |
868 | void __user *vaddr, |
869 | unsigned long dst_paddr, |
870 | void __user *dst_vaddr, |
871 | int size, int *error) |
872 | { |
873 | struct page *src_tpage = NULL; |
874 | struct page *dst_tpage = NULL; |
875 | int ret, len = size; |
876 | |
877 | /* If source buffer is not aligned then use an intermediate buffer */ |
878 | if (!IS_ALIGNED((unsigned long)vaddr, 16)) { |
879 | src_tpage = alloc_page(GFP_KERNEL_ACCOUNT); |
880 | if (!src_tpage) |
881 | return -ENOMEM; |
882 | |
883 | if (copy_from_user(page_address(src_tpage), from: vaddr, n: size)) { |
884 | __free_page(src_tpage); |
885 | return -EFAULT; |
886 | } |
887 | |
888 | paddr = __sme_page_pa(src_tpage); |
889 | } |
890 | |
891 | /* |
892 | * If destination buffer or length is not aligned then do read-modify-write: |
893 | * - decrypt destination in an intermediate buffer |
894 | * - copy the source buffer in an intermediate buffer |
895 | * - use the intermediate buffer as source buffer |
896 | */ |
897 | if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { |
898 | int dst_offset; |
899 | |
900 | dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT); |
901 | if (!dst_tpage) { |
902 | ret = -ENOMEM; |
903 | goto e_free; |
904 | } |
905 | |
906 | ret = __sev_dbg_decrypt(kvm, src_paddr: dst_paddr, |
907 | __sme_page_pa(dst_tpage), sz: size, err: error); |
908 | if (ret) |
909 | goto e_free; |
910 | |
911 | /* |
912 | * If source is kernel buffer then use memcpy() otherwise |
913 | * copy_from_user(). |
914 | */ |
915 | dst_offset = dst_paddr & 15; |
916 | |
917 | if (src_tpage) |
918 | memcpy(page_address(dst_tpage) + dst_offset, |
919 | page_address(src_tpage), size); |
920 | else { |
921 | if (copy_from_user(page_address(dst_tpage) + dst_offset, |
922 | from: vaddr, n: size)) { |
923 | ret = -EFAULT; |
924 | goto e_free; |
925 | } |
926 | } |
927 | |
928 | paddr = __sme_page_pa(dst_tpage); |
929 | dst_paddr = round_down(dst_paddr, 16); |
930 | len = round_up(size, 16); |
931 | } |
932 | |
933 | ret = __sev_issue_dbg_cmd(kvm, src: paddr, dst: dst_paddr, size: len, error, enc: true); |
934 | |
935 | e_free: |
936 | if (src_tpage) |
937 | __free_page(src_tpage); |
938 | if (dst_tpage) |
939 | __free_page(dst_tpage); |
940 | return ret; |
941 | } |
942 | |
943 | static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) |
944 | { |
945 | unsigned long vaddr, vaddr_end, next_vaddr; |
946 | unsigned long dst_vaddr; |
947 | struct page **src_p, **dst_p; |
948 | struct kvm_sev_dbg debug; |
949 | unsigned long n; |
950 | unsigned int size; |
951 | int ret; |
952 | |
953 | if (!sev_guest(kvm)) |
954 | return -ENOTTY; |
955 | |
956 | if (copy_from_user(to: &debug, from: (void __user *)(uintptr_t)argp->data, n: sizeof(debug))) |
957 | return -EFAULT; |
958 | |
959 | if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr) |
960 | return -EINVAL; |
961 | if (!debug.dst_uaddr) |
962 | return -EINVAL; |
963 | |
964 | vaddr = debug.src_uaddr; |
965 | size = debug.len; |
966 | vaddr_end = vaddr + size; |
967 | dst_vaddr = debug.dst_uaddr; |
968 | |
969 | for (; vaddr < vaddr_end; vaddr = next_vaddr) { |
970 | int len, s_off, d_off; |
971 | |
972 | /* lock userspace source and destination page */ |
973 | src_p = sev_pin_memory(kvm, uaddr: vaddr & PAGE_MASK, PAGE_SIZE, n: &n, write: 0); |
974 | if (IS_ERR(ptr: src_p)) |
975 | return PTR_ERR(ptr: src_p); |
976 | |
977 | dst_p = sev_pin_memory(kvm, uaddr: dst_vaddr & PAGE_MASK, PAGE_SIZE, n: &n, write: 1); |
978 | if (IS_ERR(ptr: dst_p)) { |
979 | sev_unpin_memory(kvm, pages: src_p, npages: n); |
980 | return PTR_ERR(ptr: dst_p); |
981 | } |
982 | |
983 | /* |
984 | * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify |
985 | * the pages; flush the destination too so that future accesses do not |
986 | * see stale data. |
987 | */ |
988 | sev_clflush_pages(pages: src_p, npages: 1); |
989 | sev_clflush_pages(pages: dst_p, npages: 1); |
990 | |
991 | /* |
992 | * Since user buffer may not be page aligned, calculate the |
993 | * offset within the page. |
994 | */ |
995 | s_off = vaddr & ~PAGE_MASK; |
996 | d_off = dst_vaddr & ~PAGE_MASK; |
997 | len = min_t(size_t, (PAGE_SIZE - s_off), size); |
998 | |
999 | if (dec) |
1000 | ret = __sev_dbg_decrypt_user(kvm, |
1001 | __sme_page_pa(src_p[0]) + s_off, |
1002 | dst_uaddr: (void __user *)dst_vaddr, |
1003 | __sme_page_pa(dst_p[0]) + d_off, |
1004 | size: len, err: &argp->error); |
1005 | else |
1006 | ret = __sev_dbg_encrypt_user(kvm, |
1007 | __sme_page_pa(src_p[0]) + s_off, |
1008 | vaddr: (void __user *)vaddr, |
1009 | __sme_page_pa(dst_p[0]) + d_off, |
1010 | dst_vaddr: (void __user *)dst_vaddr, |
1011 | size: len, error: &argp->error); |
1012 | |
1013 | sev_unpin_memory(kvm, pages: src_p, npages: n); |
1014 | sev_unpin_memory(kvm, pages: dst_p, npages: n); |
1015 | |
1016 | if (ret) |
1017 | goto err; |
1018 | |
1019 | next_vaddr = vaddr + len; |
1020 | dst_vaddr = dst_vaddr + len; |
1021 | size -= len; |
1022 | } |
1023 | err: |
1024 | return ret; |
1025 | } |
1026 | |
1027 | static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) |
1028 | { |
1029 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
1030 | struct sev_data_launch_secret data; |
1031 | struct kvm_sev_launch_secret params; |
1032 | struct page **pages; |
1033 | void *blob, *hdr; |
1034 | unsigned long n, i; |
1035 | int ret, offset; |
1036 | |
1037 | if (!sev_guest(kvm)) |
1038 | return -ENOTTY; |
1039 | |
1040 | if (copy_from_user(to: ¶ms, from: (void __user *)(uintptr_t)argp->data, n: sizeof(params))) |
1041 | return -EFAULT; |
1042 | |
1043 | pages = sev_pin_memory(kvm, uaddr: params.guest_uaddr, ulen: params.guest_len, n: &n, write: 1); |
1044 | if (IS_ERR(ptr: pages)) |
1045 | return PTR_ERR(ptr: pages); |
1046 | |
1047 | /* |
1048 | * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in |
1049 | * place; the cache may contain the data that was written unencrypted. |
1050 | */ |
1051 | sev_clflush_pages(pages, npages: n); |
1052 | |
1053 | /* |
1054 | * The secret must be copied into contiguous memory region, lets verify |
1055 | * that userspace memory pages are contiguous before we issue command. |
1056 | */ |
1057 | if (get_num_contig_pages(idx: 0, inpages: pages, npages: n) != n) { |
1058 | ret = -EINVAL; |
1059 | goto e_unpin_memory; |
1060 | } |
1061 | |
1062 | memset(&data, 0, sizeof(data)); |
1063 | |
1064 | offset = params.guest_uaddr & (PAGE_SIZE - 1); |
1065 | data.guest_address = __sme_page_pa(pages[0]) + offset; |
1066 | data.guest_len = params.guest_len; |
1067 | |
1068 | blob = psp_copy_user_blob(uaddr: params.trans_uaddr, len: params.trans_len); |
1069 | if (IS_ERR(ptr: blob)) { |
1070 | ret = PTR_ERR(ptr: blob); |
1071 | goto e_unpin_memory; |
1072 | } |
1073 | |
1074 | data.trans_address = __psp_pa(blob); |
1075 | data.trans_len = params.trans_len; |
1076 | |
1077 | hdr = psp_copy_user_blob(uaddr: params.hdr_uaddr, len: params.hdr_len); |
1078 | if (IS_ERR(ptr: hdr)) { |
1079 | ret = PTR_ERR(ptr: hdr); |
1080 | goto e_free_blob; |
1081 | } |
1082 | data.hdr_address = __psp_pa(hdr); |
1083 | data.hdr_len = params.hdr_len; |
1084 | |
1085 | data.handle = sev->handle; |
1086 | ret = sev_issue_cmd(kvm, id: SEV_CMD_LAUNCH_UPDATE_SECRET, data: &data, error: &argp->error); |
1087 | |
1088 | kfree(objp: hdr); |
1089 | |
1090 | e_free_blob: |
1091 | kfree(objp: blob); |
1092 | e_unpin_memory: |
1093 | /* content of memory is updated, mark pages dirty */ |
1094 | for (i = 0; i < n; i++) { |
1095 | set_page_dirty_lock(pages[i]); |
1096 | mark_page_accessed(pages[i]); |
1097 | } |
1098 | sev_unpin_memory(kvm, pages, npages: n); |
1099 | return ret; |
1100 | } |
1101 | |
1102 | static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp) |
1103 | { |
1104 | void __user *report = (void __user *)(uintptr_t)argp->data; |
1105 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
1106 | struct sev_data_attestation_report data; |
1107 | struct kvm_sev_attestation_report params; |
1108 | void __user *p; |
1109 | void *blob = NULL; |
1110 | int ret; |
1111 | |
1112 | if (!sev_guest(kvm)) |
1113 | return -ENOTTY; |
1114 | |
1115 | if (copy_from_user(to: ¶ms, from: (void __user *)(uintptr_t)argp->data, n: sizeof(params))) |
1116 | return -EFAULT; |
1117 | |
1118 | memset(&data, 0, sizeof(data)); |
1119 | |
1120 | /* User wants to query the blob length */ |
1121 | if (!params.len) |
1122 | goto cmd; |
1123 | |
1124 | p = (void __user *)(uintptr_t)params.uaddr; |
1125 | if (p) { |
1126 | if (params.len > SEV_FW_BLOB_MAX_SIZE) |
1127 | return -EINVAL; |
1128 | |
1129 | blob = kzalloc(size: params.len, GFP_KERNEL_ACCOUNT); |
1130 | if (!blob) |
1131 | return -ENOMEM; |
1132 | |
1133 | data.address = __psp_pa(blob); |
1134 | data.len = params.len; |
1135 | memcpy(data.mnonce, params.mnonce, sizeof(params.mnonce)); |
1136 | } |
1137 | cmd: |
1138 | data.handle = sev->handle; |
1139 | ret = sev_issue_cmd(kvm, id: SEV_CMD_ATTESTATION_REPORT, data: &data, error: &argp->error); |
1140 | /* |
1141 | * If we query the session length, FW responded with expected data. |
1142 | */ |
1143 | if (!params.len) |
1144 | goto done; |
1145 | |
1146 | if (ret) |
1147 | goto e_free_blob; |
1148 | |
1149 | if (blob) { |
1150 | if (copy_to_user(to: p, from: blob, n: params.len)) |
1151 | ret = -EFAULT; |
1152 | } |
1153 | |
1154 | done: |
1155 | params.len = data.len; |
1156 | if (copy_to_user(to: report, from: ¶ms, n: sizeof(params))) |
1157 | ret = -EFAULT; |
1158 | e_free_blob: |
1159 | kfree(objp: blob); |
1160 | return ret; |
1161 | } |
1162 | |
1163 | /* Userspace wants to query session length. */ |
1164 | static int |
1165 | __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp, |
1166 | struct kvm_sev_send_start *params) |
1167 | { |
1168 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
1169 | struct sev_data_send_start data; |
1170 | int ret; |
1171 | |
1172 | memset(&data, 0, sizeof(data)); |
1173 | data.handle = sev->handle; |
1174 | ret = sev_issue_cmd(kvm, id: SEV_CMD_SEND_START, data: &data, error: &argp->error); |
1175 | |
1176 | params->session_len = data.session_len; |
1177 | if (copy_to_user(to: (void __user *)(uintptr_t)argp->data, from: params, |
1178 | n: sizeof(struct kvm_sev_send_start))) |
1179 | ret = -EFAULT; |
1180 | |
1181 | return ret; |
1182 | } |
1183 | |
1184 | static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp) |
1185 | { |
1186 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
1187 | struct sev_data_send_start data; |
1188 | struct kvm_sev_send_start params; |
1189 | void *amd_certs, *session_data; |
1190 | void *pdh_cert, *plat_certs; |
1191 | int ret; |
1192 | |
1193 | if (!sev_guest(kvm)) |
1194 | return -ENOTTY; |
1195 | |
1196 | if (copy_from_user(to: ¶ms, from: (void __user *)(uintptr_t)argp->data, |
1197 | n: sizeof(struct kvm_sev_send_start))) |
1198 | return -EFAULT; |
1199 | |
1200 | /* if session_len is zero, userspace wants to query the session length */ |
1201 | if (!params.session_len) |
1202 | return __sev_send_start_query_session_length(kvm, argp, |
1203 | params: ¶ms); |
1204 | |
1205 | /* some sanity checks */ |
1206 | if (!params.pdh_cert_uaddr || !params.pdh_cert_len || |
1207 | !params.session_uaddr || params.session_len > SEV_FW_BLOB_MAX_SIZE) |
1208 | return -EINVAL; |
1209 | |
1210 | /* allocate the memory to hold the session data blob */ |
1211 | session_data = kzalloc(size: params.session_len, GFP_KERNEL_ACCOUNT); |
1212 | if (!session_data) |
1213 | return -ENOMEM; |
1214 | |
1215 | /* copy the certificate blobs from userspace */ |
1216 | pdh_cert = psp_copy_user_blob(uaddr: params.pdh_cert_uaddr, |
1217 | len: params.pdh_cert_len); |
1218 | if (IS_ERR(ptr: pdh_cert)) { |
1219 | ret = PTR_ERR(ptr: pdh_cert); |
1220 | goto e_free_session; |
1221 | } |
1222 | |
1223 | plat_certs = psp_copy_user_blob(uaddr: params.plat_certs_uaddr, |
1224 | len: params.plat_certs_len); |
1225 | if (IS_ERR(ptr: plat_certs)) { |
1226 | ret = PTR_ERR(ptr: plat_certs); |
1227 | goto e_free_pdh; |
1228 | } |
1229 | |
1230 | amd_certs = psp_copy_user_blob(uaddr: params.amd_certs_uaddr, |
1231 | len: params.amd_certs_len); |
1232 | if (IS_ERR(ptr: amd_certs)) { |
1233 | ret = PTR_ERR(ptr: amd_certs); |
1234 | goto e_free_plat_cert; |
1235 | } |
1236 | |
1237 | /* populate the FW SEND_START field with system physical address */ |
1238 | memset(&data, 0, sizeof(data)); |
1239 | data.pdh_cert_address = __psp_pa(pdh_cert); |
1240 | data.pdh_cert_len = params.pdh_cert_len; |
1241 | data.plat_certs_address = __psp_pa(plat_certs); |
1242 | data.plat_certs_len = params.plat_certs_len; |
1243 | data.amd_certs_address = __psp_pa(amd_certs); |
1244 | data.amd_certs_len = params.amd_certs_len; |
1245 | data.session_address = __psp_pa(session_data); |
1246 | data.session_len = params.session_len; |
1247 | data.handle = sev->handle; |
1248 | |
1249 | ret = sev_issue_cmd(kvm, id: SEV_CMD_SEND_START, data: &data, error: &argp->error); |
1250 | |
1251 | if (!ret && copy_to_user(to: (void __user *)(uintptr_t)params.session_uaddr, |
1252 | from: session_data, n: params.session_len)) { |
1253 | ret = -EFAULT; |
1254 | goto e_free_amd_cert; |
1255 | } |
1256 | |
1257 | params.policy = data.policy; |
1258 | params.session_len = data.session_len; |
1259 | if (copy_to_user(to: (void __user *)(uintptr_t)argp->data, from: ¶ms, |
1260 | n: sizeof(struct kvm_sev_send_start))) |
1261 | ret = -EFAULT; |
1262 | |
1263 | e_free_amd_cert: |
1264 | kfree(objp: amd_certs); |
1265 | e_free_plat_cert: |
1266 | kfree(objp: plat_certs); |
1267 | e_free_pdh: |
1268 | kfree(objp: pdh_cert); |
1269 | e_free_session: |
1270 | kfree(objp: session_data); |
1271 | return ret; |
1272 | } |
1273 | |
1274 | /* Userspace wants to query either header or trans length. */ |
1275 | static int |
1276 | __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp, |
1277 | struct kvm_sev_send_update_data *params) |
1278 | { |
1279 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
1280 | struct sev_data_send_update_data data; |
1281 | int ret; |
1282 | |
1283 | memset(&data, 0, sizeof(data)); |
1284 | data.handle = sev->handle; |
1285 | ret = sev_issue_cmd(kvm, id: SEV_CMD_SEND_UPDATE_DATA, data: &data, error: &argp->error); |
1286 | |
1287 | params->hdr_len = data.hdr_len; |
1288 | params->trans_len = data.trans_len; |
1289 | |
1290 | if (copy_to_user(to: (void __user *)(uintptr_t)argp->data, from: params, |
1291 | n: sizeof(struct kvm_sev_send_update_data))) |
1292 | ret = -EFAULT; |
1293 | |
1294 | return ret; |
1295 | } |
1296 | |
1297 | static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) |
1298 | { |
1299 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
1300 | struct sev_data_send_update_data data; |
1301 | struct kvm_sev_send_update_data params; |
1302 | void *hdr, *trans_data; |
1303 | struct page **guest_page; |
1304 | unsigned long n; |
1305 | int ret, offset; |
1306 | |
1307 | if (!sev_guest(kvm)) |
1308 | return -ENOTTY; |
1309 | |
1310 | if (copy_from_user(to: ¶ms, from: (void __user *)(uintptr_t)argp->data, |
1311 | n: sizeof(struct kvm_sev_send_update_data))) |
1312 | return -EFAULT; |
1313 | |
1314 | /* userspace wants to query either header or trans length */ |
1315 | if (!params.trans_len || !params.hdr_len) |
1316 | return __sev_send_update_data_query_lengths(kvm, argp, params: ¶ms); |
1317 | |
1318 | if (!params.trans_uaddr || !params.guest_uaddr || |
1319 | !params.guest_len || !params.hdr_uaddr) |
1320 | return -EINVAL; |
1321 | |
1322 | /* Check if we are crossing the page boundary */ |
1323 | offset = params.guest_uaddr & (PAGE_SIZE - 1); |
1324 | if (params.guest_len > PAGE_SIZE || (params.guest_len + offset) > PAGE_SIZE) |
1325 | return -EINVAL; |
1326 | |
1327 | /* Pin guest memory */ |
1328 | guest_page = sev_pin_memory(kvm, uaddr: params.guest_uaddr & PAGE_MASK, |
1329 | PAGE_SIZE, n: &n, write: 0); |
1330 | if (IS_ERR(ptr: guest_page)) |
1331 | return PTR_ERR(ptr: guest_page); |
1332 | |
1333 | /* allocate memory for header and transport buffer */ |
1334 | ret = -ENOMEM; |
1335 | hdr = kzalloc(size: params.hdr_len, GFP_KERNEL_ACCOUNT); |
1336 | if (!hdr) |
1337 | goto e_unpin; |
1338 | |
1339 | trans_data = kzalloc(size: params.trans_len, GFP_KERNEL_ACCOUNT); |
1340 | if (!trans_data) |
1341 | goto e_free_hdr; |
1342 | |
1343 | memset(&data, 0, sizeof(data)); |
1344 | data.hdr_address = __psp_pa(hdr); |
1345 | data.hdr_len = params.hdr_len; |
1346 | data.trans_address = __psp_pa(trans_data); |
1347 | data.trans_len = params.trans_len; |
1348 | |
1349 | /* The SEND_UPDATE_DATA command requires C-bit to be always set. */ |
1350 | data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; |
1351 | data.guest_address |= sev_me_mask; |
1352 | data.guest_len = params.guest_len; |
1353 | data.handle = sev->handle; |
1354 | |
1355 | ret = sev_issue_cmd(kvm, id: SEV_CMD_SEND_UPDATE_DATA, data: &data, error: &argp->error); |
1356 | |
1357 | if (ret) |
1358 | goto e_free_trans_data; |
1359 | |
1360 | /* copy transport buffer to user space */ |
1361 | if (copy_to_user(to: (void __user *)(uintptr_t)params.trans_uaddr, |
1362 | from: trans_data, n: params.trans_len)) { |
1363 | ret = -EFAULT; |
1364 | goto e_free_trans_data; |
1365 | } |
1366 | |
1367 | /* Copy packet header to userspace. */ |
1368 | if (copy_to_user(to: (void __user *)(uintptr_t)params.hdr_uaddr, from: hdr, |
1369 | n: params.hdr_len)) |
1370 | ret = -EFAULT; |
1371 | |
1372 | e_free_trans_data: |
1373 | kfree(objp: trans_data); |
1374 | e_free_hdr: |
1375 | kfree(objp: hdr); |
1376 | e_unpin: |
1377 | sev_unpin_memory(kvm, pages: guest_page, npages: n); |
1378 | |
1379 | return ret; |
1380 | } |
1381 | |
1382 | static int sev_send_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) |
1383 | { |
1384 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
1385 | struct sev_data_send_finish data; |
1386 | |
1387 | if (!sev_guest(kvm)) |
1388 | return -ENOTTY; |
1389 | |
1390 | data.handle = sev->handle; |
1391 | return sev_issue_cmd(kvm, id: SEV_CMD_SEND_FINISH, data: &data, error: &argp->error); |
1392 | } |
1393 | |
1394 | static int sev_send_cancel(struct kvm *kvm, struct kvm_sev_cmd *argp) |
1395 | { |
1396 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
1397 | struct sev_data_send_cancel data; |
1398 | |
1399 | if (!sev_guest(kvm)) |
1400 | return -ENOTTY; |
1401 | |
1402 | data.handle = sev->handle; |
1403 | return sev_issue_cmd(kvm, id: SEV_CMD_SEND_CANCEL, data: &data, error: &argp->error); |
1404 | } |
1405 | |
1406 | static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp) |
1407 | { |
1408 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
1409 | struct sev_data_receive_start start; |
1410 | struct kvm_sev_receive_start params; |
1411 | int *error = &argp->error; |
1412 | void *session_data; |
1413 | void *pdh_data; |
1414 | int ret; |
1415 | |
1416 | if (!sev_guest(kvm)) |
1417 | return -ENOTTY; |
1418 | |
1419 | /* Get parameter from the userspace */ |
1420 | if (copy_from_user(to: ¶ms, from: (void __user *)(uintptr_t)argp->data, |
1421 | n: sizeof(struct kvm_sev_receive_start))) |
1422 | return -EFAULT; |
1423 | |
1424 | /* some sanity checks */ |
1425 | if (!params.pdh_uaddr || !params.pdh_len || |
1426 | !params.session_uaddr || !params.session_len) |
1427 | return -EINVAL; |
1428 | |
1429 | pdh_data = psp_copy_user_blob(uaddr: params.pdh_uaddr, len: params.pdh_len); |
1430 | if (IS_ERR(ptr: pdh_data)) |
1431 | return PTR_ERR(ptr: pdh_data); |
1432 | |
1433 | session_data = psp_copy_user_blob(uaddr: params.session_uaddr, |
1434 | len: params.session_len); |
1435 | if (IS_ERR(ptr: session_data)) { |
1436 | ret = PTR_ERR(ptr: session_data); |
1437 | goto e_free_pdh; |
1438 | } |
1439 | |
1440 | memset(&start, 0, sizeof(start)); |
1441 | start.handle = params.handle; |
1442 | start.policy = params.policy; |
1443 | start.pdh_cert_address = __psp_pa(pdh_data); |
1444 | start.pdh_cert_len = params.pdh_len; |
1445 | start.session_address = __psp_pa(session_data); |
1446 | start.session_len = params.session_len; |
1447 | |
1448 | /* create memory encryption context */ |
1449 | ret = __sev_issue_cmd(fd: argp->sev_fd, id: SEV_CMD_RECEIVE_START, data: &start, |
1450 | error); |
1451 | if (ret) |
1452 | goto e_free_session; |
1453 | |
1454 | /* Bind ASID to this guest */ |
1455 | ret = sev_bind_asid(kvm, handle: start.handle, error); |
1456 | if (ret) { |
1457 | sev_decommission(handle: start.handle); |
1458 | goto e_free_session; |
1459 | } |
1460 | |
1461 | params.handle = start.handle; |
1462 | if (copy_to_user(to: (void __user *)(uintptr_t)argp->data, |
1463 | from: ¶ms, n: sizeof(struct kvm_sev_receive_start))) { |
1464 | ret = -EFAULT; |
1465 | sev_unbind_asid(kvm, handle: start.handle); |
1466 | goto e_free_session; |
1467 | } |
1468 | |
1469 | sev->handle = start.handle; |
1470 | sev->fd = argp->sev_fd; |
1471 | |
1472 | e_free_session: |
1473 | kfree(objp: session_data); |
1474 | e_free_pdh: |
1475 | kfree(objp: pdh_data); |
1476 | |
1477 | return ret; |
1478 | } |
1479 | |
1480 | static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) |
1481 | { |
1482 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
1483 | struct kvm_sev_receive_update_data params; |
1484 | struct sev_data_receive_update_data data; |
1485 | void *hdr = NULL, *trans = NULL; |
1486 | struct page **guest_page; |
1487 | unsigned long n; |
1488 | int ret, offset; |
1489 | |
1490 | if (!sev_guest(kvm)) |
1491 | return -EINVAL; |
1492 | |
1493 | if (copy_from_user(to: ¶ms, from: (void __user *)(uintptr_t)argp->data, |
1494 | n: sizeof(struct kvm_sev_receive_update_data))) |
1495 | return -EFAULT; |
1496 | |
1497 | if (!params.hdr_uaddr || !params.hdr_len || |
1498 | !params.guest_uaddr || !params.guest_len || |
1499 | !params.trans_uaddr || !params.trans_len) |
1500 | return -EINVAL; |
1501 | |
1502 | /* Check if we are crossing the page boundary */ |
1503 | offset = params.guest_uaddr & (PAGE_SIZE - 1); |
1504 | if (params.guest_len > PAGE_SIZE || (params.guest_len + offset) > PAGE_SIZE) |
1505 | return -EINVAL; |
1506 | |
1507 | hdr = psp_copy_user_blob(uaddr: params.hdr_uaddr, len: params.hdr_len); |
1508 | if (IS_ERR(ptr: hdr)) |
1509 | return PTR_ERR(ptr: hdr); |
1510 | |
1511 | trans = psp_copy_user_blob(uaddr: params.trans_uaddr, len: params.trans_len); |
1512 | if (IS_ERR(ptr: trans)) { |
1513 | ret = PTR_ERR(ptr: trans); |
1514 | goto e_free_hdr; |
1515 | } |
1516 | |
1517 | memset(&data, 0, sizeof(data)); |
1518 | data.hdr_address = __psp_pa(hdr); |
1519 | data.hdr_len = params.hdr_len; |
1520 | data.trans_address = __psp_pa(trans); |
1521 | data.trans_len = params.trans_len; |
1522 | |
1523 | /* Pin guest memory */ |
1524 | guest_page = sev_pin_memory(kvm, uaddr: params.guest_uaddr & PAGE_MASK, |
1525 | PAGE_SIZE, n: &n, write: 1); |
1526 | if (IS_ERR(ptr: guest_page)) { |
1527 | ret = PTR_ERR(ptr: guest_page); |
1528 | goto e_free_trans; |
1529 | } |
1530 | |
1531 | /* |
1532 | * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP |
1533 | * encrypts the written data with the guest's key, and the cache may |
1534 | * contain dirty, unencrypted data. |
1535 | */ |
1536 | sev_clflush_pages(pages: guest_page, npages: n); |
1537 | |
1538 | /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */ |
1539 | data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; |
1540 | data.guest_address |= sev_me_mask; |
1541 | data.guest_len = params.guest_len; |
1542 | data.handle = sev->handle; |
1543 | |
1544 | ret = sev_issue_cmd(kvm, id: SEV_CMD_RECEIVE_UPDATE_DATA, data: &data, |
1545 | error: &argp->error); |
1546 | |
1547 | sev_unpin_memory(kvm, pages: guest_page, npages: n); |
1548 | |
1549 | e_free_trans: |
1550 | kfree(objp: trans); |
1551 | e_free_hdr: |
1552 | kfree(objp: hdr); |
1553 | |
1554 | return ret; |
1555 | } |
1556 | |
1557 | static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) |
1558 | { |
1559 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
1560 | struct sev_data_receive_finish data; |
1561 | |
1562 | if (!sev_guest(kvm)) |
1563 | return -ENOTTY; |
1564 | |
1565 | data.handle = sev->handle; |
1566 | return sev_issue_cmd(kvm, id: SEV_CMD_RECEIVE_FINISH, data: &data, error: &argp->error); |
1567 | } |
1568 | |
1569 | static bool is_cmd_allowed_from_mirror(u32 cmd_id) |
1570 | { |
1571 | /* |
1572 | * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES |
1573 | * active mirror VMs. Also allow the debugging and status commands. |
1574 | */ |
1575 | if (cmd_id == KVM_SEV_LAUNCH_UPDATE_VMSA || |
1576 | cmd_id == KVM_SEV_GUEST_STATUS || cmd_id == KVM_SEV_DBG_DECRYPT || |
1577 | cmd_id == KVM_SEV_DBG_ENCRYPT) |
1578 | return true; |
1579 | |
1580 | return false; |
1581 | } |
1582 | |
1583 | static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) |
1584 | { |
1585 | struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm: dst_kvm)->sev_info; |
1586 | struct kvm_sev_info *src_sev = &to_kvm_svm(kvm: src_kvm)->sev_info; |
1587 | int r = -EBUSY; |
1588 | |
1589 | if (dst_kvm == src_kvm) |
1590 | return -EINVAL; |
1591 | |
1592 | /* |
1593 | * Bail if these VMs are already involved in a migration to avoid |
1594 | * deadlock between two VMs trying to migrate to/from each other. |
1595 | */ |
1596 | if (atomic_cmpxchg_acquire(v: &dst_sev->migration_in_progress, old: 0, new: 1)) |
1597 | return -EBUSY; |
1598 | |
1599 | if (atomic_cmpxchg_acquire(v: &src_sev->migration_in_progress, old: 0, new: 1)) |
1600 | goto release_dst; |
1601 | |
1602 | r = -EINTR; |
1603 | if (mutex_lock_killable(&dst_kvm->lock)) |
1604 | goto release_src; |
1605 | if (mutex_lock_killable_nested(lock: &src_kvm->lock, SINGLE_DEPTH_NESTING)) |
1606 | goto unlock_dst; |
1607 | return 0; |
1608 | |
1609 | unlock_dst: |
1610 | mutex_unlock(lock: &dst_kvm->lock); |
1611 | release_src: |
1612 | atomic_set_release(v: &src_sev->migration_in_progress, i: 0); |
1613 | release_dst: |
1614 | atomic_set_release(v: &dst_sev->migration_in_progress, i: 0); |
1615 | return r; |
1616 | } |
1617 | |
1618 | static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) |
1619 | { |
1620 | struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm: dst_kvm)->sev_info; |
1621 | struct kvm_sev_info *src_sev = &to_kvm_svm(kvm: src_kvm)->sev_info; |
1622 | |
1623 | mutex_unlock(lock: &dst_kvm->lock); |
1624 | mutex_unlock(lock: &src_kvm->lock); |
1625 | atomic_set_release(v: &dst_sev->migration_in_progress, i: 0); |
1626 | atomic_set_release(v: &src_sev->migration_in_progress, i: 0); |
1627 | } |
1628 | |
1629 | /* vCPU mutex subclasses. */ |
1630 | enum sev_migration_role { |
1631 | SEV_MIGRATION_SOURCE = 0, |
1632 | SEV_MIGRATION_TARGET, |
1633 | SEV_NR_MIGRATION_ROLES, |
1634 | }; |
1635 | |
1636 | static int sev_lock_vcpus_for_migration(struct kvm *kvm, |
1637 | enum sev_migration_role role) |
1638 | { |
1639 | struct kvm_vcpu *vcpu; |
1640 | unsigned long i, j; |
1641 | |
1642 | kvm_for_each_vcpu(i, vcpu, kvm) { |
1643 | if (mutex_lock_killable_nested(lock: &vcpu->mutex, subclass: role)) |
1644 | goto out_unlock; |
1645 | |
1646 | #ifdef CONFIG_PROVE_LOCKING |
1647 | if (!i) |
1648 | /* |
1649 | * Reset the role to one that avoids colliding with |
1650 | * the role used for the first vcpu mutex. |
1651 | */ |
1652 | role = SEV_NR_MIGRATION_ROLES; |
1653 | else |
1654 | mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); |
1655 | #endif |
1656 | } |
1657 | |
1658 | return 0; |
1659 | |
1660 | out_unlock: |
1661 | |
1662 | kvm_for_each_vcpu(j, vcpu, kvm) { |
1663 | if (i == j) |
1664 | break; |
1665 | |
1666 | #ifdef CONFIG_PROVE_LOCKING |
1667 | if (j) |
1668 | mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_); |
1669 | #endif |
1670 | |
1671 | mutex_unlock(lock: &vcpu->mutex); |
1672 | } |
1673 | return -EINTR; |
1674 | } |
1675 | |
1676 | static void sev_unlock_vcpus_for_migration(struct kvm *kvm) |
1677 | { |
1678 | struct kvm_vcpu *vcpu; |
1679 | unsigned long i; |
1680 | bool first = true; |
1681 | |
1682 | kvm_for_each_vcpu(i, vcpu, kvm) { |
1683 | if (first) |
1684 | first = false; |
1685 | else |
1686 | mutex_acquire(&vcpu->mutex.dep_map, |
1687 | SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_); |
1688 | |
1689 | mutex_unlock(lock: &vcpu->mutex); |
1690 | } |
1691 | } |
1692 | |
1693 | static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) |
1694 | { |
1695 | struct kvm_sev_info *dst = &to_kvm_svm(kvm: dst_kvm)->sev_info; |
1696 | struct kvm_sev_info *src = &to_kvm_svm(kvm: src_kvm)->sev_info; |
1697 | struct kvm_vcpu *dst_vcpu, *src_vcpu; |
1698 | struct vcpu_svm *dst_svm, *src_svm; |
1699 | struct kvm_sev_info *mirror; |
1700 | unsigned long i; |
1701 | |
1702 | dst->active = true; |
1703 | dst->asid = src->asid; |
1704 | dst->handle = src->handle; |
1705 | dst->pages_locked = src->pages_locked; |
1706 | dst->enc_context_owner = src->enc_context_owner; |
1707 | dst->es_active = src->es_active; |
1708 | |
1709 | src->asid = 0; |
1710 | src->active = false; |
1711 | src->handle = 0; |
1712 | src->pages_locked = 0; |
1713 | src->enc_context_owner = NULL; |
1714 | src->es_active = false; |
1715 | |
1716 | list_cut_before(list: &dst->regions_list, head: &src->regions_list, entry: &src->regions_list); |
1717 | |
1718 | /* |
1719 | * If this VM has mirrors, "transfer" each mirror's refcount of the |
1720 | * source to the destination (this KVM). The caller holds a reference |
1721 | * to the source, so there's no danger of use-after-free. |
1722 | */ |
1723 | list_cut_before(list: &dst->mirror_vms, head: &src->mirror_vms, entry: &src->mirror_vms); |
1724 | list_for_each_entry(mirror, &dst->mirror_vms, mirror_entry) { |
1725 | kvm_get_kvm(kvm: dst_kvm); |
1726 | kvm_put_kvm(kvm: src_kvm); |
1727 | mirror->enc_context_owner = dst_kvm; |
1728 | } |
1729 | |
1730 | /* |
1731 | * If this VM is a mirror, remove the old mirror from the owners list |
1732 | * and add the new mirror to the list. |
1733 | */ |
1734 | if (is_mirroring_enc_context(kvm: dst_kvm)) { |
1735 | struct kvm_sev_info *owner_sev_info = |
1736 | &to_kvm_svm(kvm: dst->enc_context_owner)->sev_info; |
1737 | |
1738 | list_del(entry: &src->mirror_entry); |
1739 | list_add_tail(new: &dst->mirror_entry, head: &owner_sev_info->mirror_vms); |
1740 | } |
1741 | |
1742 | kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) { |
1743 | dst_svm = to_svm(vcpu: dst_vcpu); |
1744 | |
1745 | sev_init_vmcb(svm: dst_svm); |
1746 | |
1747 | if (!dst->es_active) |
1748 | continue; |
1749 | |
1750 | /* |
1751 | * Note, the source is not required to have the same number of |
1752 | * vCPUs as the destination when migrating a vanilla SEV VM. |
1753 | */ |
1754 | src_vcpu = kvm_get_vcpu(kvm: src_kvm, i); |
1755 | src_svm = to_svm(vcpu: src_vcpu); |
1756 | |
1757 | /* |
1758 | * Transfer VMSA and GHCB state to the destination. Nullify and |
1759 | * clear source fields as appropriate, the state now belongs to |
1760 | * the destination. |
1761 | */ |
1762 | memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es)); |
1763 | dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa; |
1764 | dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa; |
1765 | dst_vcpu->arch.guest_state_protected = true; |
1766 | |
1767 | memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es)); |
1768 | src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE; |
1769 | src_svm->vmcb->control.vmsa_pa = INVALID_PAGE; |
1770 | src_vcpu->arch.guest_state_protected = false; |
1771 | } |
1772 | } |
1773 | |
1774 | static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) |
1775 | { |
1776 | struct kvm_vcpu *src_vcpu; |
1777 | unsigned long i; |
1778 | |
1779 | if (!sev_es_guest(kvm: src)) |
1780 | return 0; |
1781 | |
1782 | if (atomic_read(v: &src->online_vcpus) != atomic_read(v: &dst->online_vcpus)) |
1783 | return -EINVAL; |
1784 | |
1785 | kvm_for_each_vcpu(i, src_vcpu, src) { |
1786 | if (!src_vcpu->arch.guest_state_protected) |
1787 | return -EINVAL; |
1788 | } |
1789 | |
1790 | return 0; |
1791 | } |
1792 | |
1793 | int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) |
1794 | { |
1795 | struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info; |
1796 | struct kvm_sev_info *src_sev, *cg_cleanup_sev; |
1797 | struct fd f = fdget(fd: source_fd); |
1798 | struct kvm *source_kvm; |
1799 | bool charged = false; |
1800 | int ret; |
1801 | |
1802 | if (!f.file) |
1803 | return -EBADF; |
1804 | |
1805 | if (!file_is_kvm(file: f.file)) { |
1806 | ret = -EBADF; |
1807 | goto out_fput; |
1808 | } |
1809 | |
1810 | source_kvm = f.file->private_data; |
1811 | ret = sev_lock_two_vms(dst_kvm: kvm, src_kvm: source_kvm); |
1812 | if (ret) |
1813 | goto out_fput; |
1814 | |
1815 | if (sev_guest(kvm) || !sev_guest(kvm: source_kvm)) { |
1816 | ret = -EINVAL; |
1817 | goto out_unlock; |
1818 | } |
1819 | |
1820 | src_sev = &to_kvm_svm(kvm: source_kvm)->sev_info; |
1821 | |
1822 | dst_sev->misc_cg = get_current_misc_cg(); |
1823 | cg_cleanup_sev = dst_sev; |
1824 | if (dst_sev->misc_cg != src_sev->misc_cg) { |
1825 | ret = sev_misc_cg_try_charge(sev: dst_sev); |
1826 | if (ret) |
1827 | goto out_dst_cgroup; |
1828 | charged = true; |
1829 | } |
1830 | |
1831 | ret = sev_lock_vcpus_for_migration(kvm, role: SEV_MIGRATION_SOURCE); |
1832 | if (ret) |
1833 | goto out_dst_cgroup; |
1834 | ret = sev_lock_vcpus_for_migration(kvm: source_kvm, role: SEV_MIGRATION_TARGET); |
1835 | if (ret) |
1836 | goto out_dst_vcpu; |
1837 | |
1838 | ret = sev_check_source_vcpus(dst: kvm, src: source_kvm); |
1839 | if (ret) |
1840 | goto out_source_vcpu; |
1841 | |
1842 | sev_migrate_from(dst_kvm: kvm, src_kvm: source_kvm); |
1843 | kvm_vm_dead(kvm: source_kvm); |
1844 | cg_cleanup_sev = src_sev; |
1845 | ret = 0; |
1846 | |
1847 | out_source_vcpu: |
1848 | sev_unlock_vcpus_for_migration(kvm: source_kvm); |
1849 | out_dst_vcpu: |
1850 | sev_unlock_vcpus_for_migration(kvm); |
1851 | out_dst_cgroup: |
1852 | /* Operates on the source on success, on the destination on failure. */ |
1853 | if (charged) |
1854 | sev_misc_cg_uncharge(sev: cg_cleanup_sev); |
1855 | put_misc_cg(cg: cg_cleanup_sev->misc_cg); |
1856 | cg_cleanup_sev->misc_cg = NULL; |
1857 | out_unlock: |
1858 | sev_unlock_two_vms(dst_kvm: kvm, src_kvm: source_kvm); |
1859 | out_fput: |
1860 | fdput(fd: f); |
1861 | return ret; |
1862 | } |
1863 | |
1864 | int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp) |
1865 | { |
1866 | struct kvm_sev_cmd sev_cmd; |
1867 | int r; |
1868 | |
1869 | if (!sev_enabled) |
1870 | return -ENOTTY; |
1871 | |
1872 | if (!argp) |
1873 | return 0; |
1874 | |
1875 | if (copy_from_user(to: &sev_cmd, from: argp, n: sizeof(struct kvm_sev_cmd))) |
1876 | return -EFAULT; |
1877 | |
1878 | mutex_lock(&kvm->lock); |
1879 | |
1880 | /* Only the enc_context_owner handles some memory enc operations. */ |
1881 | if (is_mirroring_enc_context(kvm) && |
1882 | !is_cmd_allowed_from_mirror(cmd_id: sev_cmd.id)) { |
1883 | r = -EINVAL; |
1884 | goto out; |
1885 | } |
1886 | |
1887 | switch (sev_cmd.id) { |
1888 | case KVM_SEV_ES_INIT: |
1889 | if (!sev_es_enabled) { |
1890 | r = -ENOTTY; |
1891 | goto out; |
1892 | } |
1893 | fallthrough; |
1894 | case KVM_SEV_INIT: |
1895 | r = sev_guest_init(kvm, argp: &sev_cmd); |
1896 | break; |
1897 | case KVM_SEV_LAUNCH_START: |
1898 | r = sev_launch_start(kvm, argp: &sev_cmd); |
1899 | break; |
1900 | case KVM_SEV_LAUNCH_UPDATE_DATA: |
1901 | r = sev_launch_update_data(kvm, argp: &sev_cmd); |
1902 | break; |
1903 | case KVM_SEV_LAUNCH_UPDATE_VMSA: |
1904 | r = sev_launch_update_vmsa(kvm, argp: &sev_cmd); |
1905 | break; |
1906 | case KVM_SEV_LAUNCH_MEASURE: |
1907 | r = sev_launch_measure(kvm, argp: &sev_cmd); |
1908 | break; |
1909 | case KVM_SEV_LAUNCH_FINISH: |
1910 | r = sev_launch_finish(kvm, argp: &sev_cmd); |
1911 | break; |
1912 | case KVM_SEV_GUEST_STATUS: |
1913 | r = sev_guest_status(kvm, argp: &sev_cmd); |
1914 | break; |
1915 | case KVM_SEV_DBG_DECRYPT: |
1916 | r = sev_dbg_crypt(kvm, argp: &sev_cmd, dec: true); |
1917 | break; |
1918 | case KVM_SEV_DBG_ENCRYPT: |
1919 | r = sev_dbg_crypt(kvm, argp: &sev_cmd, dec: false); |
1920 | break; |
1921 | case KVM_SEV_LAUNCH_SECRET: |
1922 | r = sev_launch_secret(kvm, argp: &sev_cmd); |
1923 | break; |
1924 | case KVM_SEV_GET_ATTESTATION_REPORT: |
1925 | r = sev_get_attestation_report(kvm, argp: &sev_cmd); |
1926 | break; |
1927 | case KVM_SEV_SEND_START: |
1928 | r = sev_send_start(kvm, argp: &sev_cmd); |
1929 | break; |
1930 | case KVM_SEV_SEND_UPDATE_DATA: |
1931 | r = sev_send_update_data(kvm, argp: &sev_cmd); |
1932 | break; |
1933 | case KVM_SEV_SEND_FINISH: |
1934 | r = sev_send_finish(kvm, argp: &sev_cmd); |
1935 | break; |
1936 | case KVM_SEV_SEND_CANCEL: |
1937 | r = sev_send_cancel(kvm, argp: &sev_cmd); |
1938 | break; |
1939 | case KVM_SEV_RECEIVE_START: |
1940 | r = sev_receive_start(kvm, argp: &sev_cmd); |
1941 | break; |
1942 | case KVM_SEV_RECEIVE_UPDATE_DATA: |
1943 | r = sev_receive_update_data(kvm, argp: &sev_cmd); |
1944 | break; |
1945 | case KVM_SEV_RECEIVE_FINISH: |
1946 | r = sev_receive_finish(kvm, argp: &sev_cmd); |
1947 | break; |
1948 | default: |
1949 | r = -EINVAL; |
1950 | goto out; |
1951 | } |
1952 | |
1953 | if (copy_to_user(to: argp, from: &sev_cmd, n: sizeof(struct kvm_sev_cmd))) |
1954 | r = -EFAULT; |
1955 | |
1956 | out: |
1957 | mutex_unlock(lock: &kvm->lock); |
1958 | return r; |
1959 | } |
1960 | |
1961 | int sev_mem_enc_register_region(struct kvm *kvm, |
1962 | struct kvm_enc_region *range) |
1963 | { |
1964 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
1965 | struct enc_region *region; |
1966 | int ret = 0; |
1967 | |
1968 | if (!sev_guest(kvm)) |
1969 | return -ENOTTY; |
1970 | |
1971 | /* If kvm is mirroring encryption context it isn't responsible for it */ |
1972 | if (is_mirroring_enc_context(kvm)) |
1973 | return -EINVAL; |
1974 | |
1975 | if (range->addr > ULONG_MAX || range->size > ULONG_MAX) |
1976 | return -EINVAL; |
1977 | |
1978 | region = kzalloc(size: sizeof(*region), GFP_KERNEL_ACCOUNT); |
1979 | if (!region) |
1980 | return -ENOMEM; |
1981 | |
1982 | mutex_lock(&kvm->lock); |
1983 | region->pages = sev_pin_memory(kvm, uaddr: range->addr, ulen: range->size, n: ®ion->npages, write: 1); |
1984 | if (IS_ERR(ptr: region->pages)) { |
1985 | ret = PTR_ERR(ptr: region->pages); |
1986 | mutex_unlock(lock: &kvm->lock); |
1987 | goto e_free; |
1988 | } |
1989 | |
1990 | /* |
1991 | * The guest may change the memory encryption attribute from C=0 -> C=1 |
1992 | * or vice versa for this memory range. Lets make sure caches are |
1993 | * flushed to ensure that guest data gets written into memory with |
1994 | * correct C-bit. Note, this must be done before dropping kvm->lock, |
1995 | * as region and its array of pages can be freed by a different task |
1996 | * once kvm->lock is released. |
1997 | */ |
1998 | sev_clflush_pages(pages: region->pages, npages: region->npages); |
1999 | |
2000 | region->uaddr = range->addr; |
2001 | region->size = range->size; |
2002 | |
2003 | list_add_tail(new: ®ion->list, head: &sev->regions_list); |
2004 | mutex_unlock(lock: &kvm->lock); |
2005 | |
2006 | return ret; |
2007 | |
2008 | e_free: |
2009 | kfree(objp: region); |
2010 | return ret; |
2011 | } |
2012 | |
2013 | static struct enc_region * |
2014 | find_enc_region(struct kvm *kvm, struct kvm_enc_region *range) |
2015 | { |
2016 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
2017 | struct list_head *head = &sev->regions_list; |
2018 | struct enc_region *i; |
2019 | |
2020 | list_for_each_entry(i, head, list) { |
2021 | if (i->uaddr == range->addr && |
2022 | i->size == range->size) |
2023 | return i; |
2024 | } |
2025 | |
2026 | return NULL; |
2027 | } |
2028 | |
2029 | static void __unregister_enc_region_locked(struct kvm *kvm, |
2030 | struct enc_region *region) |
2031 | { |
2032 | sev_unpin_memory(kvm, pages: region->pages, npages: region->npages); |
2033 | list_del(entry: ®ion->list); |
2034 | kfree(objp: region); |
2035 | } |
2036 | |
2037 | int sev_mem_enc_unregister_region(struct kvm *kvm, |
2038 | struct kvm_enc_region *range) |
2039 | { |
2040 | struct enc_region *region; |
2041 | int ret; |
2042 | |
2043 | /* If kvm is mirroring encryption context it isn't responsible for it */ |
2044 | if (is_mirroring_enc_context(kvm)) |
2045 | return -EINVAL; |
2046 | |
2047 | mutex_lock(&kvm->lock); |
2048 | |
2049 | if (!sev_guest(kvm)) { |
2050 | ret = -ENOTTY; |
2051 | goto failed; |
2052 | } |
2053 | |
2054 | region = find_enc_region(kvm, range); |
2055 | if (!region) { |
2056 | ret = -EINVAL; |
2057 | goto failed; |
2058 | } |
2059 | |
2060 | /* |
2061 | * Ensure that all guest tagged cache entries are flushed before |
2062 | * releasing the pages back to the system for use. CLFLUSH will |
2063 | * not do this, so issue a WBINVD. |
2064 | */ |
2065 | wbinvd_on_all_cpus(); |
2066 | |
2067 | __unregister_enc_region_locked(kvm, region); |
2068 | |
2069 | mutex_unlock(lock: &kvm->lock); |
2070 | return 0; |
2071 | |
2072 | failed: |
2073 | mutex_unlock(lock: &kvm->lock); |
2074 | return ret; |
2075 | } |
2076 | |
2077 | int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) |
2078 | { |
2079 | struct fd f = fdget(fd: source_fd); |
2080 | struct kvm *source_kvm; |
2081 | struct kvm_sev_info *source_sev, *mirror_sev; |
2082 | int ret; |
2083 | |
2084 | if (!f.file) |
2085 | return -EBADF; |
2086 | |
2087 | if (!file_is_kvm(file: f.file)) { |
2088 | ret = -EBADF; |
2089 | goto e_source_fput; |
2090 | } |
2091 | |
2092 | source_kvm = f.file->private_data; |
2093 | ret = sev_lock_two_vms(dst_kvm: kvm, src_kvm: source_kvm); |
2094 | if (ret) |
2095 | goto e_source_fput; |
2096 | |
2097 | /* |
2098 | * Mirrors of mirrors should work, but let's not get silly. Also |
2099 | * disallow out-of-band SEV/SEV-ES init if the target is already an |
2100 | * SEV guest, or if vCPUs have been created. KVM relies on vCPUs being |
2101 | * created after SEV/SEV-ES initialization, e.g. to init intercepts. |
2102 | */ |
2103 | if (sev_guest(kvm) || !sev_guest(kvm: source_kvm) || |
2104 | is_mirroring_enc_context(kvm: source_kvm) || kvm->created_vcpus) { |
2105 | ret = -EINVAL; |
2106 | goto e_unlock; |
2107 | } |
2108 | |
2109 | /* |
2110 | * The mirror kvm holds an enc_context_owner ref so its asid can't |
2111 | * disappear until we're done with it |
2112 | */ |
2113 | source_sev = &to_kvm_svm(kvm: source_kvm)->sev_info; |
2114 | kvm_get_kvm(kvm: source_kvm); |
2115 | mirror_sev = &to_kvm_svm(kvm)->sev_info; |
2116 | list_add_tail(new: &mirror_sev->mirror_entry, head: &source_sev->mirror_vms); |
2117 | |
2118 | /* Set enc_context_owner and copy its encryption context over */ |
2119 | mirror_sev->enc_context_owner = source_kvm; |
2120 | mirror_sev->active = true; |
2121 | mirror_sev->asid = source_sev->asid; |
2122 | mirror_sev->fd = source_sev->fd; |
2123 | mirror_sev->es_active = source_sev->es_active; |
2124 | mirror_sev->handle = source_sev->handle; |
2125 | INIT_LIST_HEAD(list: &mirror_sev->regions_list); |
2126 | INIT_LIST_HEAD(list: &mirror_sev->mirror_vms); |
2127 | ret = 0; |
2128 | |
2129 | /* |
2130 | * Do not copy ap_jump_table. Since the mirror does not share the same |
2131 | * KVM contexts as the original, and they may have different |
2132 | * memory-views. |
2133 | */ |
2134 | |
2135 | e_unlock: |
2136 | sev_unlock_two_vms(dst_kvm: kvm, src_kvm: source_kvm); |
2137 | e_source_fput: |
2138 | fdput(fd: f); |
2139 | return ret; |
2140 | } |
2141 | |
2142 | void sev_vm_destroy(struct kvm *kvm) |
2143 | { |
2144 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
2145 | struct list_head *head = &sev->regions_list; |
2146 | struct list_head *pos, *q; |
2147 | |
2148 | if (!sev_guest(kvm)) |
2149 | return; |
2150 | |
2151 | WARN_ON(!list_empty(&sev->mirror_vms)); |
2152 | |
2153 | /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */ |
2154 | if (is_mirroring_enc_context(kvm)) { |
2155 | struct kvm *owner_kvm = sev->enc_context_owner; |
2156 | |
2157 | mutex_lock(&owner_kvm->lock); |
2158 | list_del(entry: &sev->mirror_entry); |
2159 | mutex_unlock(lock: &owner_kvm->lock); |
2160 | kvm_put_kvm(kvm: owner_kvm); |
2161 | return; |
2162 | } |
2163 | |
2164 | /* |
2165 | * Ensure that all guest tagged cache entries are flushed before |
2166 | * releasing the pages back to the system for use. CLFLUSH will |
2167 | * not do this, so issue a WBINVD. |
2168 | */ |
2169 | wbinvd_on_all_cpus(); |
2170 | |
2171 | /* |
2172 | * if userspace was terminated before unregistering the memory regions |
2173 | * then lets unpin all the registered memory. |
2174 | */ |
2175 | if (!list_empty(head)) { |
2176 | list_for_each_safe(pos, q, head) { |
2177 | __unregister_enc_region_locked(kvm, |
2178 | list_entry(pos, struct enc_region, list)); |
2179 | cond_resched(); |
2180 | } |
2181 | } |
2182 | |
2183 | sev_unbind_asid(kvm, handle: sev->handle); |
2184 | sev_asid_free(sev); |
2185 | } |
2186 | |
2187 | void __init sev_set_cpu_caps(void) |
2188 | { |
2189 | if (!sev_enabled) |
2190 | kvm_cpu_cap_clear(X86_FEATURE_SEV); |
2191 | if (!sev_es_enabled) |
2192 | kvm_cpu_cap_clear(X86_FEATURE_SEV_ES); |
2193 | } |
2194 | |
2195 | void __init sev_hardware_setup(void) |
2196 | { |
2197 | #ifdef CONFIG_KVM_AMD_SEV |
2198 | unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count; |
2199 | bool sev_es_supported = false; |
2200 | bool sev_supported = false; |
2201 | |
2202 | if (!sev_enabled || !npt_enabled || !nrips) |
2203 | goto out; |
2204 | |
2205 | /* |
2206 | * SEV must obviously be supported in hardware. Sanity check that the |
2207 | * CPU supports decode assists, which is mandatory for SEV guests to |
2208 | * support instruction emulation. Ditto for flushing by ASID, as SEV |
2209 | * guests are bound to a single ASID, i.e. KVM can't rotate to a new |
2210 | * ASID to effect a TLB flush. |
2211 | */ |
2212 | if (!boot_cpu_has(X86_FEATURE_SEV) || |
2213 | WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) || |
2214 | WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_FLUSHBYASID))) |
2215 | goto out; |
2216 | |
2217 | /* Retrieve SEV CPUID information */ |
2218 | cpuid(op: 0x8000001f, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
2219 | |
2220 | /* Set encryption bit location for SEV-ES guests */ |
2221 | sev_enc_bit = ebx & 0x3f; |
2222 | |
2223 | /* Maximum number of encrypted guests supported simultaneously */ |
2224 | max_sev_asid = ecx; |
2225 | if (!max_sev_asid) |
2226 | goto out; |
2227 | |
2228 | /* Minimum ASID value that should be used for SEV guest */ |
2229 | min_sev_asid = edx; |
2230 | sev_me_mask = 1UL << (ebx & 0x3f); |
2231 | |
2232 | /* |
2233 | * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap, |
2234 | * even though it's never used, so that the bitmap is indexed by the |
2235 | * actual ASID. |
2236 | */ |
2237 | nr_asids = max_sev_asid + 1; |
2238 | sev_asid_bitmap = bitmap_zalloc(nbits: nr_asids, GFP_KERNEL); |
2239 | if (!sev_asid_bitmap) |
2240 | goto out; |
2241 | |
2242 | sev_reclaim_asid_bitmap = bitmap_zalloc(nbits: nr_asids, GFP_KERNEL); |
2243 | if (!sev_reclaim_asid_bitmap) { |
2244 | bitmap_free(bitmap: sev_asid_bitmap); |
2245 | sev_asid_bitmap = NULL; |
2246 | goto out; |
2247 | } |
2248 | |
2249 | if (min_sev_asid <= max_sev_asid) { |
2250 | sev_asid_count = max_sev_asid - min_sev_asid + 1; |
2251 | WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); |
2252 | } |
2253 | sev_supported = true; |
2254 | |
2255 | /* SEV-ES support requested? */ |
2256 | if (!sev_es_enabled) |
2257 | goto out; |
2258 | |
2259 | /* |
2260 | * SEV-ES requires MMIO caching as KVM doesn't have access to the guest |
2261 | * instruction stream, i.e. can't emulate in response to a #NPF and |
2262 | * instead relies on #NPF(RSVD) being reflected into the guest as #VC |
2263 | * (the guest can then do a #VMGEXIT to request MMIO emulation). |
2264 | */ |
2265 | if (!enable_mmio_caching) |
2266 | goto out; |
2267 | |
2268 | /* Does the CPU support SEV-ES? */ |
2269 | if (!boot_cpu_has(X86_FEATURE_SEV_ES)) |
2270 | goto out; |
2271 | |
2272 | /* Has the system been allocated ASIDs for SEV-ES? */ |
2273 | if (min_sev_asid == 1) |
2274 | goto out; |
2275 | |
2276 | sev_es_asid_count = min_sev_asid - 1; |
2277 | WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count)); |
2278 | sev_es_supported = true; |
2279 | |
2280 | out: |
2281 | if (boot_cpu_has(X86_FEATURE_SEV)) |
2282 | pr_info("SEV %s (ASIDs %u - %u)\n" , |
2283 | sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : |
2284 | "unusable" : |
2285 | "disabled" , |
2286 | min_sev_asid, max_sev_asid); |
2287 | if (boot_cpu_has(X86_FEATURE_SEV_ES)) |
2288 | pr_info("SEV-ES %s (ASIDs %u - %u)\n" , |
2289 | sev_es_supported ? "enabled" : "disabled" , |
2290 | min_sev_asid > 1 ? 1 : 0, min_sev_asid - 1); |
2291 | |
2292 | sev_enabled = sev_supported; |
2293 | sev_es_enabled = sev_es_supported; |
2294 | if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) || |
2295 | !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP)) |
2296 | sev_es_debug_swap_enabled = false; |
2297 | #endif |
2298 | } |
2299 | |
2300 | void sev_hardware_unsetup(void) |
2301 | { |
2302 | if (!sev_enabled) |
2303 | return; |
2304 | |
2305 | /* No need to take sev_bitmap_lock, all VMs have been destroyed. */ |
2306 | sev_flush_asids(min_asid: 1, max_asid: max_sev_asid); |
2307 | |
2308 | bitmap_free(bitmap: sev_asid_bitmap); |
2309 | bitmap_free(bitmap: sev_reclaim_asid_bitmap); |
2310 | |
2311 | misc_cg_set_capacity(type: MISC_CG_RES_SEV, capacity: 0); |
2312 | misc_cg_set_capacity(type: MISC_CG_RES_SEV_ES, capacity: 0); |
2313 | } |
2314 | |
2315 | int sev_cpu_init(struct svm_cpu_data *sd) |
2316 | { |
2317 | if (!sev_enabled) |
2318 | return 0; |
2319 | |
2320 | sd->sev_vmcbs = kcalloc(n: nr_asids, size: sizeof(void *), GFP_KERNEL); |
2321 | if (!sd->sev_vmcbs) |
2322 | return -ENOMEM; |
2323 | |
2324 | return 0; |
2325 | } |
2326 | |
2327 | /* |
2328 | * Pages used by hardware to hold guest encrypted state must be flushed before |
2329 | * returning them to the system. |
2330 | */ |
2331 | static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) |
2332 | { |
2333 | unsigned int asid = sev_get_asid(kvm: vcpu->kvm); |
2334 | |
2335 | /* |
2336 | * Note! The address must be a kernel address, as regular page walk |
2337 | * checks are performed by VM_PAGE_FLUSH, i.e. operating on a user |
2338 | * address is non-deterministic and unsafe. This function deliberately |
2339 | * takes a pointer to deter passing in a user address. |
2340 | */ |
2341 | unsigned long addr = (unsigned long)va; |
2342 | |
2343 | /* |
2344 | * If CPU enforced cache coherency for encrypted mappings of the |
2345 | * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache |
2346 | * flush is still needed in order to work properly with DMA devices. |
2347 | */ |
2348 | if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) { |
2349 | clflush_cache_range(addr: va, PAGE_SIZE); |
2350 | return; |
2351 | } |
2352 | |
2353 | /* |
2354 | * VM Page Flush takes a host virtual address and a guest ASID. Fall |
2355 | * back to WBINVD if this faults so as not to make any problems worse |
2356 | * by leaving stale encrypted data in the cache. |
2357 | */ |
2358 | if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid))) |
2359 | goto do_wbinvd; |
2360 | |
2361 | return; |
2362 | |
2363 | do_wbinvd: |
2364 | wbinvd_on_all_cpus(); |
2365 | } |
2366 | |
2367 | void sev_guest_memory_reclaimed(struct kvm *kvm) |
2368 | { |
2369 | if (!sev_guest(kvm)) |
2370 | return; |
2371 | |
2372 | wbinvd_on_all_cpus(); |
2373 | } |
2374 | |
2375 | void sev_free_vcpu(struct kvm_vcpu *vcpu) |
2376 | { |
2377 | struct vcpu_svm *svm; |
2378 | |
2379 | if (!sev_es_guest(kvm: vcpu->kvm)) |
2380 | return; |
2381 | |
2382 | svm = to_svm(vcpu); |
2383 | |
2384 | if (vcpu->arch.guest_state_protected) |
2385 | sev_flush_encrypted_page(vcpu, va: svm->sev_es.vmsa); |
2386 | |
2387 | __free_page(virt_to_page(svm->sev_es.vmsa)); |
2388 | |
2389 | if (svm->sev_es.ghcb_sa_free) |
2390 | kvfree(addr: svm->sev_es.ghcb_sa); |
2391 | } |
2392 | |
2393 | static void dump_ghcb(struct vcpu_svm *svm) |
2394 | { |
2395 | struct ghcb *ghcb = svm->sev_es.ghcb; |
2396 | unsigned int nbits; |
2397 | |
2398 | /* Re-use the dump_invalid_vmcb module parameter */ |
2399 | if (!dump_invalid_vmcb) { |
2400 | pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n" ); |
2401 | return; |
2402 | } |
2403 | |
2404 | nbits = sizeof(ghcb->save.valid_bitmap) * 8; |
2405 | |
2406 | pr_err("GHCB (GPA=%016llx):\n" , svm->vmcb->control.ghcb_gpa); |
2407 | pr_err("%-20s%016llx is_valid: %u\n" , "sw_exit_code" , |
2408 | ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb)); |
2409 | pr_err("%-20s%016llx is_valid: %u\n" , "sw_exit_info_1" , |
2410 | ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb)); |
2411 | pr_err("%-20s%016llx is_valid: %u\n" , "sw_exit_info_2" , |
2412 | ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb)); |
2413 | pr_err("%-20s%016llx is_valid: %u\n" , "sw_scratch" , |
2414 | ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb)); |
2415 | pr_err("%-20s%*pb\n" , "valid_bitmap" , nbits, ghcb->save.valid_bitmap); |
2416 | } |
2417 | |
2418 | static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) |
2419 | { |
2420 | struct kvm_vcpu *vcpu = &svm->vcpu; |
2421 | struct ghcb *ghcb = svm->sev_es.ghcb; |
2422 | |
2423 | /* |
2424 | * The GHCB protocol so far allows for the following data |
2425 | * to be returned: |
2426 | * GPRs RAX, RBX, RCX, RDX |
2427 | * |
2428 | * Copy their values, even if they may not have been written during the |
2429 | * VM-Exit. It's the guest's responsibility to not consume random data. |
2430 | */ |
2431 | ghcb_set_rax(ghcb, value: vcpu->arch.regs[VCPU_REGS_RAX]); |
2432 | ghcb_set_rbx(ghcb, value: vcpu->arch.regs[VCPU_REGS_RBX]); |
2433 | ghcb_set_rcx(ghcb, value: vcpu->arch.regs[VCPU_REGS_RCX]); |
2434 | ghcb_set_rdx(ghcb, value: vcpu->arch.regs[VCPU_REGS_RDX]); |
2435 | } |
2436 | |
2437 | static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) |
2438 | { |
2439 | struct vmcb_control_area *control = &svm->vmcb->control; |
2440 | struct kvm_vcpu *vcpu = &svm->vcpu; |
2441 | struct ghcb *ghcb = svm->sev_es.ghcb; |
2442 | u64 exit_code; |
2443 | |
2444 | /* |
2445 | * The GHCB protocol so far allows for the following data |
2446 | * to be supplied: |
2447 | * GPRs RAX, RBX, RCX, RDX |
2448 | * XCR0 |
2449 | * CPL |
2450 | * |
2451 | * VMMCALL allows the guest to provide extra registers. KVM also |
2452 | * expects RSI for hypercalls, so include that, too. |
2453 | * |
2454 | * Copy their values to the appropriate location if supplied. |
2455 | */ |
2456 | memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); |
2457 | |
2458 | BUILD_BUG_ON(sizeof(svm->sev_es.valid_bitmap) != sizeof(ghcb->save.valid_bitmap)); |
2459 | memcpy(&svm->sev_es.valid_bitmap, &ghcb->save.valid_bitmap, sizeof(ghcb->save.valid_bitmap)); |
2460 | |
2461 | vcpu->arch.regs[VCPU_REGS_RAX] = kvm_ghcb_get_rax_if_valid(svm, ghcb); |
2462 | vcpu->arch.regs[VCPU_REGS_RBX] = kvm_ghcb_get_rbx_if_valid(svm, ghcb); |
2463 | vcpu->arch.regs[VCPU_REGS_RCX] = kvm_ghcb_get_rcx_if_valid(svm, ghcb); |
2464 | vcpu->arch.regs[VCPU_REGS_RDX] = kvm_ghcb_get_rdx_if_valid(svm, ghcb); |
2465 | vcpu->arch.regs[VCPU_REGS_RSI] = kvm_ghcb_get_rsi_if_valid(svm, ghcb); |
2466 | |
2467 | svm->vmcb->save.cpl = kvm_ghcb_get_cpl_if_valid(svm, ghcb); |
2468 | |
2469 | if (kvm_ghcb_xcr0_is_valid(svm)) { |
2470 | vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb); |
2471 | kvm_update_cpuid_runtime(vcpu); |
2472 | } |
2473 | |
2474 | /* Copy the GHCB exit information into the VMCB fields */ |
2475 | exit_code = ghcb_get_sw_exit_code(ghcb); |
2476 | control->exit_code = lower_32_bits(exit_code); |
2477 | control->exit_code_hi = upper_32_bits(exit_code); |
2478 | control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb); |
2479 | control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb); |
2480 | svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm, ghcb); |
2481 | |
2482 | /* Clear the valid entries fields */ |
2483 | memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); |
2484 | } |
2485 | |
2486 | static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) |
2487 | { |
2488 | return (((u64)control->exit_code_hi) << 32) | control->exit_code; |
2489 | } |
2490 | |
2491 | static int sev_es_validate_vmgexit(struct vcpu_svm *svm) |
2492 | { |
2493 | struct vmcb_control_area *control = &svm->vmcb->control; |
2494 | struct kvm_vcpu *vcpu = &svm->vcpu; |
2495 | u64 exit_code; |
2496 | u64 reason; |
2497 | |
2498 | /* |
2499 | * Retrieve the exit code now even though it may not be marked valid |
2500 | * as it could help with debugging. |
2501 | */ |
2502 | exit_code = kvm_ghcb_get_sw_exit_code(control); |
2503 | |
2504 | /* Only GHCB Usage code 0 is supported */ |
2505 | if (svm->sev_es.ghcb->ghcb_usage) { |
2506 | reason = GHCB_ERR_INVALID_USAGE; |
2507 | goto vmgexit_err; |
2508 | } |
2509 | |
2510 | reason = GHCB_ERR_MISSING_INPUT; |
2511 | |
2512 | if (!kvm_ghcb_sw_exit_code_is_valid(svm) || |
2513 | !kvm_ghcb_sw_exit_info_1_is_valid(svm) || |
2514 | !kvm_ghcb_sw_exit_info_2_is_valid(svm)) |
2515 | goto vmgexit_err; |
2516 | |
2517 | switch (exit_code) { |
2518 | case SVM_EXIT_READ_DR7: |
2519 | break; |
2520 | case SVM_EXIT_WRITE_DR7: |
2521 | if (!kvm_ghcb_rax_is_valid(svm)) |
2522 | goto vmgexit_err; |
2523 | break; |
2524 | case SVM_EXIT_RDTSC: |
2525 | break; |
2526 | case SVM_EXIT_RDPMC: |
2527 | if (!kvm_ghcb_rcx_is_valid(svm)) |
2528 | goto vmgexit_err; |
2529 | break; |
2530 | case SVM_EXIT_CPUID: |
2531 | if (!kvm_ghcb_rax_is_valid(svm) || |
2532 | !kvm_ghcb_rcx_is_valid(svm)) |
2533 | goto vmgexit_err; |
2534 | if (vcpu->arch.regs[VCPU_REGS_RAX] == 0xd) |
2535 | if (!kvm_ghcb_xcr0_is_valid(svm)) |
2536 | goto vmgexit_err; |
2537 | break; |
2538 | case SVM_EXIT_INVD: |
2539 | break; |
2540 | case SVM_EXIT_IOIO: |
2541 | if (control->exit_info_1 & SVM_IOIO_STR_MASK) { |
2542 | if (!kvm_ghcb_sw_scratch_is_valid(svm)) |
2543 | goto vmgexit_err; |
2544 | } else { |
2545 | if (!(control->exit_info_1 & SVM_IOIO_TYPE_MASK)) |
2546 | if (!kvm_ghcb_rax_is_valid(svm)) |
2547 | goto vmgexit_err; |
2548 | } |
2549 | break; |
2550 | case SVM_EXIT_MSR: |
2551 | if (!kvm_ghcb_rcx_is_valid(svm)) |
2552 | goto vmgexit_err; |
2553 | if (control->exit_info_1) { |
2554 | if (!kvm_ghcb_rax_is_valid(svm) || |
2555 | !kvm_ghcb_rdx_is_valid(svm)) |
2556 | goto vmgexit_err; |
2557 | } |
2558 | break; |
2559 | case SVM_EXIT_VMMCALL: |
2560 | if (!kvm_ghcb_rax_is_valid(svm) || |
2561 | !kvm_ghcb_cpl_is_valid(svm)) |
2562 | goto vmgexit_err; |
2563 | break; |
2564 | case SVM_EXIT_RDTSCP: |
2565 | break; |
2566 | case SVM_EXIT_WBINVD: |
2567 | break; |
2568 | case SVM_EXIT_MONITOR: |
2569 | if (!kvm_ghcb_rax_is_valid(svm) || |
2570 | !kvm_ghcb_rcx_is_valid(svm) || |
2571 | !kvm_ghcb_rdx_is_valid(svm)) |
2572 | goto vmgexit_err; |
2573 | break; |
2574 | case SVM_EXIT_MWAIT: |
2575 | if (!kvm_ghcb_rax_is_valid(svm) || |
2576 | !kvm_ghcb_rcx_is_valid(svm)) |
2577 | goto vmgexit_err; |
2578 | break; |
2579 | case SVM_VMGEXIT_MMIO_READ: |
2580 | case SVM_VMGEXIT_MMIO_WRITE: |
2581 | if (!kvm_ghcb_sw_scratch_is_valid(svm)) |
2582 | goto vmgexit_err; |
2583 | break; |
2584 | case SVM_VMGEXIT_NMI_COMPLETE: |
2585 | case SVM_VMGEXIT_AP_HLT_LOOP: |
2586 | case SVM_VMGEXIT_AP_JUMP_TABLE: |
2587 | case SVM_VMGEXIT_UNSUPPORTED_EVENT: |
2588 | break; |
2589 | default: |
2590 | reason = GHCB_ERR_INVALID_EVENT; |
2591 | goto vmgexit_err; |
2592 | } |
2593 | |
2594 | return 0; |
2595 | |
2596 | vmgexit_err: |
2597 | if (reason == GHCB_ERR_INVALID_USAGE) { |
2598 | vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n" , |
2599 | svm->sev_es.ghcb->ghcb_usage); |
2600 | } else if (reason == GHCB_ERR_INVALID_EVENT) { |
2601 | vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n" , |
2602 | exit_code); |
2603 | } else { |
2604 | vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n" , |
2605 | exit_code); |
2606 | dump_ghcb(svm); |
2607 | } |
2608 | |
2609 | ghcb_set_sw_exit_info_1(ghcb: svm->sev_es.ghcb, value: 2); |
2610 | ghcb_set_sw_exit_info_2(ghcb: svm->sev_es.ghcb, value: reason); |
2611 | |
2612 | /* Resume the guest to "return" the error code. */ |
2613 | return 1; |
2614 | } |
2615 | |
2616 | void sev_es_unmap_ghcb(struct vcpu_svm *svm) |
2617 | { |
2618 | if (!svm->sev_es.ghcb) |
2619 | return; |
2620 | |
2621 | if (svm->sev_es.ghcb_sa_free) { |
2622 | /* |
2623 | * The scratch area lives outside the GHCB, so there is a |
2624 | * buffer that, depending on the operation performed, may |
2625 | * need to be synced, then freed. |
2626 | */ |
2627 | if (svm->sev_es.ghcb_sa_sync) { |
2628 | kvm_write_guest(kvm: svm->vcpu.kvm, |
2629 | gpa: svm->sev_es.sw_scratch, |
2630 | data: svm->sev_es.ghcb_sa, |
2631 | len: svm->sev_es.ghcb_sa_len); |
2632 | svm->sev_es.ghcb_sa_sync = false; |
2633 | } |
2634 | |
2635 | kvfree(addr: svm->sev_es.ghcb_sa); |
2636 | svm->sev_es.ghcb_sa = NULL; |
2637 | svm->sev_es.ghcb_sa_free = false; |
2638 | } |
2639 | |
2640 | trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb); |
2641 | |
2642 | sev_es_sync_to_ghcb(svm); |
2643 | |
2644 | kvm_vcpu_unmap(vcpu: &svm->vcpu, map: &svm->sev_es.ghcb_map, dirty: true); |
2645 | svm->sev_es.ghcb = NULL; |
2646 | } |
2647 | |
2648 | void pre_sev_run(struct vcpu_svm *svm, int cpu) |
2649 | { |
2650 | struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); |
2651 | unsigned int asid = sev_get_asid(kvm: svm->vcpu.kvm); |
2652 | |
2653 | /* Assign the asid allocated with this SEV guest */ |
2654 | svm->asid = asid; |
2655 | |
2656 | /* |
2657 | * Flush guest TLB: |
2658 | * |
2659 | * 1) when different VMCB for the same ASID is to be run on the same host CPU. |
2660 | * 2) or this VMCB was executed on different host CPU in previous VMRUNs. |
2661 | */ |
2662 | if (sd->sev_vmcbs[asid] == svm->vmcb && |
2663 | svm->vcpu.arch.last_vmentry_cpu == cpu) |
2664 | return; |
2665 | |
2666 | sd->sev_vmcbs[asid] = svm->vmcb; |
2667 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; |
2668 | vmcb_mark_dirty(vmcb: svm->vmcb, bit: VMCB_ASID); |
2669 | } |
2670 | |
2671 | #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) |
2672 | static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) |
2673 | { |
2674 | struct vmcb_control_area *control = &svm->vmcb->control; |
2675 | u64 ghcb_scratch_beg, ghcb_scratch_end; |
2676 | u64 scratch_gpa_beg, scratch_gpa_end; |
2677 | void *scratch_va; |
2678 | |
2679 | scratch_gpa_beg = svm->sev_es.sw_scratch; |
2680 | if (!scratch_gpa_beg) { |
2681 | pr_err("vmgexit: scratch gpa not provided\n" ); |
2682 | goto e_scratch; |
2683 | } |
2684 | |
2685 | scratch_gpa_end = scratch_gpa_beg + len; |
2686 | if (scratch_gpa_end < scratch_gpa_beg) { |
2687 | pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n" , |
2688 | len, scratch_gpa_beg); |
2689 | goto e_scratch; |
2690 | } |
2691 | |
2692 | if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) { |
2693 | /* Scratch area begins within GHCB */ |
2694 | ghcb_scratch_beg = control->ghcb_gpa + |
2695 | offsetof(struct ghcb, shared_buffer); |
2696 | ghcb_scratch_end = control->ghcb_gpa + |
2697 | offsetof(struct ghcb, reserved_0xff0); |
2698 | |
2699 | /* |
2700 | * If the scratch area begins within the GHCB, it must be |
2701 | * completely contained in the GHCB shared buffer area. |
2702 | */ |
2703 | if (scratch_gpa_beg < ghcb_scratch_beg || |
2704 | scratch_gpa_end > ghcb_scratch_end) { |
2705 | pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n" , |
2706 | scratch_gpa_beg, scratch_gpa_end); |
2707 | goto e_scratch; |
2708 | } |
2709 | |
2710 | scratch_va = (void *)svm->sev_es.ghcb; |
2711 | scratch_va += (scratch_gpa_beg - control->ghcb_gpa); |
2712 | } else { |
2713 | /* |
2714 | * The guest memory must be read into a kernel buffer, so |
2715 | * limit the size |
2716 | */ |
2717 | if (len > GHCB_SCRATCH_AREA_LIMIT) { |
2718 | pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n" , |
2719 | len, GHCB_SCRATCH_AREA_LIMIT); |
2720 | goto e_scratch; |
2721 | } |
2722 | scratch_va = kvzalloc(size: len, GFP_KERNEL_ACCOUNT); |
2723 | if (!scratch_va) |
2724 | return -ENOMEM; |
2725 | |
2726 | if (kvm_read_guest(kvm: svm->vcpu.kvm, gpa: scratch_gpa_beg, data: scratch_va, len)) { |
2727 | /* Unable to copy scratch area from guest */ |
2728 | pr_err("vmgexit: kvm_read_guest for scratch area failed\n" ); |
2729 | |
2730 | kvfree(addr: scratch_va); |
2731 | return -EFAULT; |
2732 | } |
2733 | |
2734 | /* |
2735 | * The scratch area is outside the GHCB. The operation will |
2736 | * dictate whether the buffer needs to be synced before running |
2737 | * the vCPU next time (i.e. a read was requested so the data |
2738 | * must be written back to the guest memory). |
2739 | */ |
2740 | svm->sev_es.ghcb_sa_sync = sync; |
2741 | svm->sev_es.ghcb_sa_free = true; |
2742 | } |
2743 | |
2744 | svm->sev_es.ghcb_sa = scratch_va; |
2745 | svm->sev_es.ghcb_sa_len = len; |
2746 | |
2747 | return 0; |
2748 | |
2749 | e_scratch: |
2750 | ghcb_set_sw_exit_info_1(ghcb: svm->sev_es.ghcb, value: 2); |
2751 | ghcb_set_sw_exit_info_2(ghcb: svm->sev_es.ghcb, GHCB_ERR_INVALID_SCRATCH_AREA); |
2752 | |
2753 | return 1; |
2754 | } |
2755 | |
2756 | static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, |
2757 | unsigned int pos) |
2758 | { |
2759 | svm->vmcb->control.ghcb_gpa &= ~(mask << pos); |
2760 | svm->vmcb->control.ghcb_gpa |= (value & mask) << pos; |
2761 | } |
2762 | |
2763 | static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos) |
2764 | { |
2765 | return (svm->vmcb->control.ghcb_gpa >> pos) & mask; |
2766 | } |
2767 | |
2768 | static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) |
2769 | { |
2770 | svm->vmcb->control.ghcb_gpa = value; |
2771 | } |
2772 | |
2773 | static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) |
2774 | { |
2775 | struct vmcb_control_area *control = &svm->vmcb->control; |
2776 | struct kvm_vcpu *vcpu = &svm->vcpu; |
2777 | u64 ghcb_info; |
2778 | int ret = 1; |
2779 | |
2780 | ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK; |
2781 | |
2782 | trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id, |
2783 | control->ghcb_gpa); |
2784 | |
2785 | switch (ghcb_info) { |
2786 | case GHCB_MSR_SEV_INFO_REQ: |
2787 | set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, |
2788 | GHCB_VERSION_MIN, |
2789 | sev_enc_bit)); |
2790 | break; |
2791 | case GHCB_MSR_CPUID_REQ: { |
2792 | u64 cpuid_fn, cpuid_reg, cpuid_value; |
2793 | |
2794 | cpuid_fn = get_ghcb_msr_bits(svm, |
2795 | GHCB_MSR_CPUID_FUNC_MASK, |
2796 | GHCB_MSR_CPUID_FUNC_POS); |
2797 | |
2798 | /* Initialize the registers needed by the CPUID intercept */ |
2799 | vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn; |
2800 | vcpu->arch.regs[VCPU_REGS_RCX] = 0; |
2801 | |
2802 | ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID); |
2803 | if (!ret) { |
2804 | /* Error, keep GHCB MSR value as-is */ |
2805 | break; |
2806 | } |
2807 | |
2808 | cpuid_reg = get_ghcb_msr_bits(svm, |
2809 | GHCB_MSR_CPUID_REG_MASK, |
2810 | GHCB_MSR_CPUID_REG_POS); |
2811 | if (cpuid_reg == 0) |
2812 | cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX]; |
2813 | else if (cpuid_reg == 1) |
2814 | cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX]; |
2815 | else if (cpuid_reg == 2) |
2816 | cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX]; |
2817 | else |
2818 | cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX]; |
2819 | |
2820 | set_ghcb_msr_bits(svm, value: cpuid_value, |
2821 | GHCB_MSR_CPUID_VALUE_MASK, |
2822 | GHCB_MSR_CPUID_VALUE_POS); |
2823 | |
2824 | set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP, |
2825 | GHCB_MSR_INFO_MASK, |
2826 | GHCB_MSR_INFO_POS); |
2827 | break; |
2828 | } |
2829 | case GHCB_MSR_TERM_REQ: { |
2830 | u64 reason_set, reason_code; |
2831 | |
2832 | reason_set = get_ghcb_msr_bits(svm, |
2833 | GHCB_MSR_TERM_REASON_SET_MASK, |
2834 | GHCB_MSR_TERM_REASON_SET_POS); |
2835 | reason_code = get_ghcb_msr_bits(svm, |
2836 | GHCB_MSR_TERM_REASON_MASK, |
2837 | GHCB_MSR_TERM_REASON_POS); |
2838 | pr_info("SEV-ES guest requested termination: %#llx:%#llx\n" , |
2839 | reason_set, reason_code); |
2840 | |
2841 | vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; |
2842 | vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SEV_TERM; |
2843 | vcpu->run->system_event.ndata = 1; |
2844 | vcpu->run->system_event.data[0] = control->ghcb_gpa; |
2845 | |
2846 | return 0; |
2847 | } |
2848 | default: |
2849 | /* Error, keep GHCB MSR value as-is */ |
2850 | break; |
2851 | } |
2852 | |
2853 | trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id, |
2854 | control->ghcb_gpa, ret); |
2855 | |
2856 | return ret; |
2857 | } |
2858 | |
2859 | int sev_handle_vmgexit(struct kvm_vcpu *vcpu) |
2860 | { |
2861 | struct vcpu_svm *svm = to_svm(vcpu); |
2862 | struct vmcb_control_area *control = &svm->vmcb->control; |
2863 | u64 ghcb_gpa, exit_code; |
2864 | int ret; |
2865 | |
2866 | /* Validate the GHCB */ |
2867 | ghcb_gpa = control->ghcb_gpa; |
2868 | if (ghcb_gpa & GHCB_MSR_INFO_MASK) |
2869 | return sev_handle_vmgexit_msr_protocol(svm); |
2870 | |
2871 | if (!ghcb_gpa) { |
2872 | vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n" ); |
2873 | |
2874 | /* Without a GHCB, just return right back to the guest */ |
2875 | return 1; |
2876 | } |
2877 | |
2878 | if (kvm_vcpu_map(vcpu, gpa: ghcb_gpa >> PAGE_SHIFT, map: &svm->sev_es.ghcb_map)) { |
2879 | /* Unable to map GHCB from guest */ |
2880 | vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n" , |
2881 | ghcb_gpa); |
2882 | |
2883 | /* Without a GHCB, just return right back to the guest */ |
2884 | return 1; |
2885 | } |
2886 | |
2887 | svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva; |
2888 | |
2889 | trace_kvm_vmgexit_enter(vcpu->vcpu_id, svm->sev_es.ghcb); |
2890 | |
2891 | sev_es_sync_from_ghcb(svm); |
2892 | ret = sev_es_validate_vmgexit(svm); |
2893 | if (ret) |
2894 | return ret; |
2895 | |
2896 | ghcb_set_sw_exit_info_1(ghcb: svm->sev_es.ghcb, value: 0); |
2897 | ghcb_set_sw_exit_info_2(ghcb: svm->sev_es.ghcb, value: 0); |
2898 | |
2899 | exit_code = kvm_ghcb_get_sw_exit_code(control); |
2900 | switch (exit_code) { |
2901 | case SVM_VMGEXIT_MMIO_READ: |
2902 | ret = setup_vmgexit_scratch(svm, sync: true, len: control->exit_info_2); |
2903 | if (ret) |
2904 | break; |
2905 | |
2906 | ret = kvm_sev_es_mmio_read(vcpu, |
2907 | control->exit_info_1, |
2908 | control->exit_info_2, |
2909 | svm->sev_es.ghcb_sa); |
2910 | break; |
2911 | case SVM_VMGEXIT_MMIO_WRITE: |
2912 | ret = setup_vmgexit_scratch(svm, sync: false, len: control->exit_info_2); |
2913 | if (ret) |
2914 | break; |
2915 | |
2916 | ret = kvm_sev_es_mmio_write(vcpu, |
2917 | control->exit_info_1, |
2918 | control->exit_info_2, |
2919 | svm->sev_es.ghcb_sa); |
2920 | break; |
2921 | case SVM_VMGEXIT_NMI_COMPLETE: |
2922 | ++vcpu->stat.nmi_window_exits; |
2923 | svm->nmi_masked = false; |
2924 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
2925 | ret = 1; |
2926 | break; |
2927 | case SVM_VMGEXIT_AP_HLT_LOOP: |
2928 | ret = kvm_emulate_ap_reset_hold(vcpu); |
2929 | break; |
2930 | case SVM_VMGEXIT_AP_JUMP_TABLE: { |
2931 | struct kvm_sev_info *sev = &to_kvm_svm(kvm: vcpu->kvm)->sev_info; |
2932 | |
2933 | switch (control->exit_info_1) { |
2934 | case 0: |
2935 | /* Set AP jump table address */ |
2936 | sev->ap_jump_table = control->exit_info_2; |
2937 | break; |
2938 | case 1: |
2939 | /* Get AP jump table address */ |
2940 | ghcb_set_sw_exit_info_2(ghcb: svm->sev_es.ghcb, value: sev->ap_jump_table); |
2941 | break; |
2942 | default: |
2943 | pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n" , |
2944 | control->exit_info_1); |
2945 | ghcb_set_sw_exit_info_1(ghcb: svm->sev_es.ghcb, value: 2); |
2946 | ghcb_set_sw_exit_info_2(ghcb: svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT); |
2947 | } |
2948 | |
2949 | ret = 1; |
2950 | break; |
2951 | } |
2952 | case SVM_VMGEXIT_UNSUPPORTED_EVENT: |
2953 | vcpu_unimpl(vcpu, |
2954 | "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n" , |
2955 | control->exit_info_1, control->exit_info_2); |
2956 | ret = -EINVAL; |
2957 | break; |
2958 | default: |
2959 | ret = svm_invoke_exit_handler(vcpu, exit_code); |
2960 | } |
2961 | |
2962 | return ret; |
2963 | } |
2964 | |
2965 | int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) |
2966 | { |
2967 | int count; |
2968 | int bytes; |
2969 | int r; |
2970 | |
2971 | if (svm->vmcb->control.exit_info_2 > INT_MAX) |
2972 | return -EINVAL; |
2973 | |
2974 | count = svm->vmcb->control.exit_info_2; |
2975 | if (unlikely(check_mul_overflow(count, size, &bytes))) |
2976 | return -EINVAL; |
2977 | |
2978 | r = setup_vmgexit_scratch(svm, sync: in, len: bytes); |
2979 | if (r) |
2980 | return r; |
2981 | |
2982 | return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa, |
2983 | count, in); |
2984 | } |
2985 | |
2986 | static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) |
2987 | { |
2988 | struct kvm_vcpu *vcpu = &svm->vcpu; |
2989 | |
2990 | if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { |
2991 | bool v_tsc_aux = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) || |
2992 | guest_cpuid_has(vcpu, X86_FEATURE_RDPID); |
2993 | |
2994 | set_msr_interception(vcpu, msrpm: svm->msrpm, MSR_TSC_AUX, read: v_tsc_aux, write: v_tsc_aux); |
2995 | } |
2996 | |
2997 | /* |
2998 | * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if |
2999 | * the host/guest supports its use. |
3000 | * |
3001 | * guest_can_use() checks a number of requirements on the host/guest to |
3002 | * ensure that MSR_IA32_XSS is available, but it might report true even |
3003 | * if X86_FEATURE_XSAVES isn't configured in the guest to ensure host |
3004 | * MSR_IA32_XSS is always properly restored. For SEV-ES, it is better |
3005 | * to further check that the guest CPUID actually supports |
3006 | * X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved |
3007 | * guests will still get intercepted and caught in the normal |
3008 | * kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths. |
3009 | */ |
3010 | if (guest_can_use(vcpu, X86_FEATURE_XSAVES) && |
3011 | guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) |
3012 | set_msr_interception(vcpu, msrpm: svm->msrpm, MSR_IA32_XSS, read: 1, write: 1); |
3013 | else |
3014 | set_msr_interception(vcpu, msrpm: svm->msrpm, MSR_IA32_XSS, read: 0, write: 0); |
3015 | } |
3016 | |
3017 | void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) |
3018 | { |
3019 | struct kvm_vcpu *vcpu = &svm->vcpu; |
3020 | struct kvm_cpuid_entry2 *best; |
3021 | |
3022 | /* For sev guests, the memory encryption bit is not reserved in CR3. */ |
3023 | best = kvm_find_cpuid_entry(vcpu, 0x8000001F); |
3024 | if (best) |
3025 | vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f)); |
3026 | |
3027 | if (sev_es_guest(kvm: svm->vcpu.kvm)) |
3028 | sev_es_vcpu_after_set_cpuid(svm); |
3029 | } |
3030 | |
3031 | static void sev_es_init_vmcb(struct vcpu_svm *svm) |
3032 | { |
3033 | struct vmcb *vmcb = svm->vmcb01.ptr; |
3034 | struct kvm_vcpu *vcpu = &svm->vcpu; |
3035 | |
3036 | svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; |
3037 | svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; |
3038 | |
3039 | /* |
3040 | * An SEV-ES guest requires a VMSA area that is a separate from the |
3041 | * VMCB page. Do not include the encryption mask on the VMSA physical |
3042 | * address since hardware will access it using the guest key. Note, |
3043 | * the VMSA will be NULL if this vCPU is the destination for intrahost |
3044 | * migration, and will be copied later. |
3045 | */ |
3046 | if (svm->sev_es.vmsa) |
3047 | svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); |
3048 | |
3049 | /* Can't intercept CR register access, HV can't modify CR registers */ |
3050 | svm_clr_intercept(svm, bit: INTERCEPT_CR0_READ); |
3051 | svm_clr_intercept(svm, bit: INTERCEPT_CR4_READ); |
3052 | svm_clr_intercept(svm, bit: INTERCEPT_CR8_READ); |
3053 | svm_clr_intercept(svm, bit: INTERCEPT_CR0_WRITE); |
3054 | svm_clr_intercept(svm, bit: INTERCEPT_CR4_WRITE); |
3055 | svm_clr_intercept(svm, bit: INTERCEPT_CR8_WRITE); |
3056 | |
3057 | svm_clr_intercept(svm, bit: INTERCEPT_SELECTIVE_CR0); |
3058 | |
3059 | /* Track EFER/CR register changes */ |
3060 | svm_set_intercept(svm, bit: TRAP_EFER_WRITE); |
3061 | svm_set_intercept(svm, bit: TRAP_CR0_WRITE); |
3062 | svm_set_intercept(svm, bit: TRAP_CR4_WRITE); |
3063 | svm_set_intercept(svm, bit: TRAP_CR8_WRITE); |
3064 | |
3065 | vmcb->control.intercepts[INTERCEPT_DR] = 0; |
3066 | if (!sev_es_debug_swap_enabled) { |
3067 | vmcb_set_intercept(control: &vmcb->control, bit: INTERCEPT_DR7_READ); |
3068 | vmcb_set_intercept(control: &vmcb->control, bit: INTERCEPT_DR7_WRITE); |
3069 | recalc_intercepts(svm); |
3070 | } else { |
3071 | /* |
3072 | * Disable #DB intercept iff DebugSwap is enabled. KVM doesn't |
3073 | * allow debugging SEV-ES guests, and enables DebugSwap iff |
3074 | * NO_NESTED_DATA_BP is supported, so there's no reason to |
3075 | * intercept #DB when DebugSwap is enabled. For simplicity |
3076 | * with respect to guest debug, intercept #DB for other VMs |
3077 | * even if NO_NESTED_DATA_BP is supported, i.e. even if the |
3078 | * guest can't DoS the CPU with infinite #DB vectoring. |
3079 | */ |
3080 | clr_exception_intercept(svm, DB_VECTOR); |
3081 | } |
3082 | |
3083 | /* Can't intercept XSETBV, HV can't modify XCR0 directly */ |
3084 | svm_clr_intercept(svm, bit: INTERCEPT_XSETBV); |
3085 | |
3086 | /* Clear intercepts on selected MSRs */ |
3087 | set_msr_interception(vcpu, msrpm: svm->msrpm, MSR_EFER, read: 1, write: 1); |
3088 | set_msr_interception(vcpu, msrpm: svm->msrpm, MSR_IA32_CR_PAT, read: 1, write: 1); |
3089 | set_msr_interception(vcpu, msrpm: svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, read: 1, write: 1); |
3090 | set_msr_interception(vcpu, msrpm: svm->msrpm, MSR_IA32_LASTBRANCHTOIP, read: 1, write: 1); |
3091 | set_msr_interception(vcpu, msrpm: svm->msrpm, MSR_IA32_LASTINTFROMIP, read: 1, write: 1); |
3092 | set_msr_interception(vcpu, msrpm: svm->msrpm, MSR_IA32_LASTINTTOIP, read: 1, write: 1); |
3093 | } |
3094 | |
3095 | void sev_init_vmcb(struct vcpu_svm *svm) |
3096 | { |
3097 | svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; |
3098 | clr_exception_intercept(svm, UD_VECTOR); |
3099 | |
3100 | /* |
3101 | * Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as |
3102 | * KVM can't decrypt guest memory to decode the faulting instruction. |
3103 | */ |
3104 | clr_exception_intercept(svm, GP_VECTOR); |
3105 | |
3106 | if (sev_es_guest(kvm: svm->vcpu.kvm)) |
3107 | sev_es_init_vmcb(svm); |
3108 | } |
3109 | |
3110 | void sev_es_vcpu_reset(struct vcpu_svm *svm) |
3111 | { |
3112 | /* |
3113 | * Set the GHCB MSR value as per the GHCB specification when emulating |
3114 | * vCPU RESET for an SEV-ES guest. |
3115 | */ |
3116 | set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, |
3117 | GHCB_VERSION_MIN, |
3118 | sev_enc_bit)); |
3119 | } |
3120 | |
3121 | void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa) |
3122 | { |
3123 | /* |
3124 | * All host state for SEV-ES guests is categorized into three swap types |
3125 | * based on how it is handled by hardware during a world switch: |
3126 | * |
3127 | * A: VMRUN: Host state saved in host save area |
3128 | * VMEXIT: Host state loaded from host save area |
3129 | * |
3130 | * B: VMRUN: Host state _NOT_ saved in host save area |
3131 | * VMEXIT: Host state loaded from host save area |
3132 | * |
3133 | * C: VMRUN: Host state _NOT_ saved in host save area |
3134 | * VMEXIT: Host state initialized to default(reset) values |
3135 | * |
3136 | * Manually save type-B state, i.e. state that is loaded by VMEXIT but |
3137 | * isn't saved by VMRUN, that isn't already saved by VMSAVE (performed |
3138 | * by common SVM code). |
3139 | */ |
3140 | hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); |
3141 | hostsa->pkru = read_pkru(); |
3142 | hostsa->xss = host_xss; |
3143 | |
3144 | /* |
3145 | * If DebugSwap is enabled, debug registers are loaded but NOT saved by |
3146 | * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both |
3147 | * saves and loads debug registers (Type-A). |
3148 | */ |
3149 | if (sev_es_debug_swap_enabled) { |
3150 | hostsa->dr0 = native_get_debugreg(regno: 0); |
3151 | hostsa->dr1 = native_get_debugreg(regno: 1); |
3152 | hostsa->dr2 = native_get_debugreg(regno: 2); |
3153 | hostsa->dr3 = native_get_debugreg(regno: 3); |
3154 | hostsa->dr0_addr_mask = amd_get_dr_addr_mask(dr: 0); |
3155 | hostsa->dr1_addr_mask = amd_get_dr_addr_mask(dr: 1); |
3156 | hostsa->dr2_addr_mask = amd_get_dr_addr_mask(dr: 2); |
3157 | hostsa->dr3_addr_mask = amd_get_dr_addr_mask(dr: 3); |
3158 | } |
3159 | } |
3160 | |
3161 | void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) |
3162 | { |
3163 | struct vcpu_svm *svm = to_svm(vcpu); |
3164 | |
3165 | /* First SIPI: Use the values as initially set by the VMM */ |
3166 | if (!svm->sev_es.received_first_sipi) { |
3167 | svm->sev_es.received_first_sipi = true; |
3168 | return; |
3169 | } |
3170 | |
3171 | /* |
3172 | * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where |
3173 | * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a |
3174 | * non-zero value. |
3175 | */ |
3176 | if (!svm->sev_es.ghcb) |
3177 | return; |
3178 | |
3179 | ghcb_set_sw_exit_info_2(ghcb: svm->sev_es.ghcb, value: 1); |
3180 | } |
3181 | |
3182 | struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) |
3183 | { |
3184 | unsigned long pfn; |
3185 | struct page *p; |
3186 | |
3187 | if (!cc_platform_has(attr: CC_ATTR_HOST_SEV_SNP)) |
3188 | return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
3189 | |
3190 | /* |
3191 | * Allocate an SNP-safe page to workaround the SNP erratum where |
3192 | * the CPU will incorrectly signal an RMP violation #PF if a |
3193 | * hugepage (2MB or 1GB) collides with the RMP entry of a |
3194 | * 2MB-aligned VMCB, VMSA, or AVIC backing page. |
3195 | * |
3196 | * Allocate one extra page, choose a page which is not |
3197 | * 2MB-aligned, and free the other. |
3198 | */ |
3199 | p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, order: 1); |
3200 | if (!p) |
3201 | return NULL; |
3202 | |
3203 | split_page(page: p, order: 1); |
3204 | |
3205 | pfn = page_to_pfn(p); |
3206 | if (IS_ALIGNED(pfn, PTRS_PER_PMD)) |
3207 | __free_page(p++); |
3208 | else |
3209 | __free_page(p + 1); |
3210 | |
3211 | return p; |
3212 | } |
3213 | |