1 | // SPDX-License-Identifier: GPL-2.0-only |
---|---|
2 | /* |
3 | * Copyright (C) 2021 Google LLC |
4 | * Author: Fuad Tabba <tabba@google.com> |
5 | */ |
6 | |
7 | #include <linux/kvm_host.h> |
8 | #include <linux/mm.h> |
9 | |
10 | #include <asm/kvm_emulate.h> |
11 | |
12 | #include <nvhe/mem_protect.h> |
13 | #include <nvhe/memory.h> |
14 | #include <nvhe/pkvm.h> |
15 | #include <nvhe/trap_handler.h> |
16 | |
17 | /* Used by icache_is_aliasing(). */ |
18 | unsigned long __icache_flags; |
19 | |
20 | /* Used by kvm_get_vttbr(). */ |
21 | unsigned int kvm_arm_vmid_bits; |
22 | |
23 | unsigned int kvm_host_sve_max_vl; |
24 | |
25 | /* |
26 | * The currently loaded hyp vCPU for each physical CPU. Used only when |
27 | * protected KVM is enabled, but for both protected and non-protected VMs. |
28 | */ |
29 | static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu); |
30 | |
31 | static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu) |
32 | { |
33 | vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; |
34 | |
35 | if (has_hvhe()) |
36 | vcpu->arch.hcr_el2 |= HCR_E2H; |
37 | |
38 | if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) { |
39 | /* route synchronous external abort exceptions to EL2 */ |
40 | vcpu->arch.hcr_el2 |= HCR_TEA; |
41 | /* trap error record accesses */ |
42 | vcpu->arch.hcr_el2 |= HCR_TERR; |
43 | } |
44 | |
45 | if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) |
46 | vcpu->arch.hcr_el2 |= HCR_FWB; |
47 | |
48 | if (cpus_have_final_cap(ARM64_HAS_EVT) && |
49 | !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE) && |
50 | kvm_read_vm_id_reg(vcpu->kvm, SYS_CTR_EL0) == read_cpuid(CTR_EL0)) |
51 | vcpu->arch.hcr_el2 |= HCR_TID4; |
52 | else |
53 | vcpu->arch.hcr_el2 |= HCR_TID2; |
54 | |
55 | if (vcpu_has_ptrauth(vcpu)) |
56 | vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); |
57 | |
58 | if (kvm_has_mte(vcpu->kvm)) |
59 | vcpu->arch.hcr_el2 |= HCR_ATA; |
60 | } |
61 | |
62 | static void pvm_init_traps_hcr(struct kvm_vcpu *vcpu) |
63 | { |
64 | struct kvm *kvm = vcpu->kvm; |
65 | u64 val = vcpu->arch.hcr_el2; |
66 | |
67 | /* No support for AArch32. */ |
68 | val |= HCR_RW; |
69 | |
70 | /* |
71 | * Always trap: |
72 | * - Feature id registers: to control features exposed to guests |
73 | * - Implementation-defined features |
74 | */ |
75 | val |= HCR_TACR | HCR_TIDCP | HCR_TID3 | HCR_TID1; |
76 | |
77 | if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) { |
78 | val |= HCR_TERR | HCR_TEA; |
79 | val &= ~(HCR_FIEN); |
80 | } |
81 | |
82 | if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP)) |
83 | val &= ~(HCR_AMVOFFEN); |
84 | |
85 | if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, IMP)) { |
86 | val |= HCR_TID5; |
87 | val &= ~(HCR_DCT | HCR_ATA); |
88 | } |
89 | |
90 | if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP)) |
91 | val |= HCR_TLOR; |
92 | |
93 | vcpu->arch.hcr_el2 = val; |
94 | } |
95 | |
96 | static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu) |
97 | { |
98 | struct kvm *kvm = vcpu->kvm; |
99 | u64 val = vcpu->arch.mdcr_el2; |
100 | |
101 | if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP)) { |
102 | val |= MDCR_EL2_TPM | MDCR_EL2_TPMCR; |
103 | val &= ~(MDCR_EL2_HPME | MDCR_EL2_MTPME | MDCR_EL2_HPMN_MASK); |
104 | } |
105 | |
106 | if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DebugVer, IMP)) |
107 | val |= MDCR_EL2_TDRA | MDCR_EL2_TDA; |
108 | |
109 | if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DoubleLock, IMP)) |
110 | val |= MDCR_EL2_TDOSA; |
111 | |
112 | if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP)) { |
113 | val |= MDCR_EL2_TPMS; |
114 | val &= ~MDCR_EL2_E2PB_MASK; |
115 | } |
116 | |
117 | if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP)) |
118 | val |= MDCR_EL2_TTRF; |
119 | |
120 | if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, ExtTrcBuff, IMP)) |
121 | val |= MDCR_EL2_E2TB_MASK; |
122 | |
123 | /* Trap Debug Communications Channel registers */ |
124 | if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP)) |
125 | val |= MDCR_EL2_TDCC; |
126 | |
127 | vcpu->arch.mdcr_el2 = val; |
128 | } |
129 | |
130 | /* |
131 | * Check that cpu features that are neither trapped nor supported are not |
132 | * enabled for protected VMs. |
133 | */ |
134 | static int pkvm_check_pvm_cpu_features(struct kvm_vcpu *vcpu) |
135 | { |
136 | struct kvm *kvm = vcpu->kvm; |
137 | |
138 | /* Protected KVM does not support AArch32 guests. */ |
139 | if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) || |
140 | kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32)) |
141 | return -EINVAL; |
142 | |
143 | /* |
144 | * Linux guests assume support for floating-point and Advanced SIMD. Do |
145 | * not change the trapping behavior for these from the KVM default. |
146 | */ |
147 | if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, FP, IMP) || |
148 | !kvm_has_feat(kvm, ID_AA64PFR0_EL1, AdvSIMD, IMP)) |
149 | return -EINVAL; |
150 | |
151 | /* No SME support in KVM right now. Check to catch if it changes. */ |
152 | if (kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP)) |
153 | return -EINVAL; |
154 | |
155 | return 0; |
156 | } |
157 | |
158 | /* |
159 | * Initialize trap register values in protected mode. |
160 | */ |
161 | static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) |
162 | { |
163 | struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; |
164 | int ret; |
165 | |
166 | vcpu->arch.mdcr_el2 = 0; |
167 | |
168 | pkvm_vcpu_reset_hcr(vcpu); |
169 | |
170 | if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu))) { |
171 | struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; |
172 | |
173 | /* Trust the host for non-protected vcpu features. */ |
174 | vcpu->arch.hcrx_el2 = host_vcpu->arch.hcrx_el2; |
175 | return 0; |
176 | } |
177 | |
178 | ret = pkvm_check_pvm_cpu_features(vcpu); |
179 | if (ret) |
180 | return ret; |
181 | |
182 | pvm_init_traps_hcr(vcpu); |
183 | pvm_init_traps_mdcr(vcpu); |
184 | vcpu_set_hcrx(vcpu); |
185 | |
186 | return 0; |
187 | } |
188 | |
189 | /* |
190 | * Start the VM table handle at the offset defined instead of at 0. |
191 | * Mainly for sanity checking and debugging. |
192 | */ |
193 | #define HANDLE_OFFSET 0x1000 |
194 | |
195 | static unsigned int vm_handle_to_idx(pkvm_handle_t handle) |
196 | { |
197 | return handle - HANDLE_OFFSET; |
198 | } |
199 | |
200 | static pkvm_handle_t idx_to_vm_handle(unsigned int idx) |
201 | { |
202 | return idx + HANDLE_OFFSET; |
203 | } |
204 | |
205 | /* |
206 | * Spinlock for protecting state related to the VM table. Protects writes |
207 | * to 'vm_table', 'nr_table_entries', and other per-vm state on initialization. |
208 | * Also protects reads and writes to 'last_hyp_vcpu_lookup'. |
209 | */ |
210 | DEFINE_HYP_SPINLOCK(vm_table_lock); |
211 | |
212 | /* |
213 | * The table of VM entries for protected VMs in hyp. |
214 | * Allocated at hyp initialization and setup. |
215 | */ |
216 | static struct pkvm_hyp_vm **vm_table; |
217 | |
218 | void pkvm_hyp_vm_table_init(void *tbl) |
219 | { |
220 | WARN_ON(vm_table); |
221 | vm_table = tbl; |
222 | } |
223 | |
224 | /* |
225 | * Return the hyp vm structure corresponding to the handle. |
226 | */ |
227 | static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle) |
228 | { |
229 | unsigned int idx = vm_handle_to_idx(handle); |
230 | |
231 | if (unlikely(idx >= KVM_MAX_PVMS)) |
232 | return NULL; |
233 | |
234 | return vm_table[idx]; |
235 | } |
236 | |
237 | struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, |
238 | unsigned int vcpu_idx) |
239 | { |
240 | struct pkvm_hyp_vcpu *hyp_vcpu = NULL; |
241 | struct pkvm_hyp_vm *hyp_vm; |
242 | |
243 | /* Cannot load a new vcpu without putting the old one first. */ |
244 | if (__this_cpu_read(loaded_hyp_vcpu)) |
245 | return NULL; |
246 | |
247 | hyp_spin_lock(&vm_table_lock); |
248 | hyp_vm = get_vm_by_handle(handle); |
249 | if (!hyp_vm || hyp_vm->kvm.created_vcpus <= vcpu_idx) |
250 | goto unlock; |
251 | |
252 | hyp_vcpu = hyp_vm->vcpus[vcpu_idx]; |
253 | if (!hyp_vcpu) |
254 | goto unlock; |
255 | |
256 | /* Ensure vcpu isn't loaded on more than one cpu simultaneously. */ |
257 | if (unlikely(hyp_vcpu->loaded_hyp_vcpu)) { |
258 | hyp_vcpu = NULL; |
259 | goto unlock; |
260 | } |
261 | |
262 | hyp_vcpu->loaded_hyp_vcpu = this_cpu_ptr(&loaded_hyp_vcpu); |
263 | hyp_page_ref_inc(hyp_virt_to_page(hyp_vm)); |
264 | unlock: |
265 | hyp_spin_unlock(&vm_table_lock); |
266 | |
267 | if (hyp_vcpu) |
268 | __this_cpu_write(loaded_hyp_vcpu, hyp_vcpu); |
269 | return hyp_vcpu; |
270 | } |
271 | |
272 | void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) |
273 | { |
274 | struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu); |
275 | |
276 | hyp_spin_lock(&vm_table_lock); |
277 | hyp_vcpu->loaded_hyp_vcpu = NULL; |
278 | __this_cpu_write(loaded_hyp_vcpu, NULL); |
279 | hyp_page_ref_dec(hyp_virt_to_page(hyp_vm)); |
280 | hyp_spin_unlock(&vm_table_lock); |
281 | } |
282 | |
283 | struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void) |
284 | { |
285 | return __this_cpu_read(loaded_hyp_vcpu); |
286 | |
287 | } |
288 | |
289 | struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle) |
290 | { |
291 | struct pkvm_hyp_vm *hyp_vm; |
292 | |
293 | hyp_spin_lock(&vm_table_lock); |
294 | hyp_vm = get_vm_by_handle(handle); |
295 | if (hyp_vm) |
296 | hyp_page_ref_inc(hyp_virt_to_page(hyp_vm)); |
297 | hyp_spin_unlock(&vm_table_lock); |
298 | |
299 | return hyp_vm; |
300 | } |
301 | |
302 | void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm) |
303 | { |
304 | hyp_spin_lock(&vm_table_lock); |
305 | hyp_page_ref_dec(hyp_virt_to_page(hyp_vm)); |
306 | hyp_spin_unlock(&vm_table_lock); |
307 | } |
308 | |
309 | struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle) |
310 | { |
311 | struct pkvm_hyp_vm *hyp_vm = get_pkvm_hyp_vm(handle); |
312 | |
313 | if (hyp_vm && pkvm_hyp_vm_is_protected(hyp_vm)) { |
314 | put_pkvm_hyp_vm(hyp_vm); |
315 | hyp_vm = NULL; |
316 | } |
317 | |
318 | return hyp_vm; |
319 | } |
320 | |
321 | static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struct kvm *host_kvm) |
322 | { |
323 | struct kvm *kvm = &hyp_vm->kvm; |
324 | unsigned long host_arch_flags = READ_ONCE(host_kvm->arch.flags); |
325 | DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES); |
326 | |
327 | /* CTR_EL0 is always under host control, even for protected VMs. */ |
328 | hyp_vm->kvm.arch.ctr_el0 = host_kvm->arch.ctr_el0; |
329 | |
330 | if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags)) |
331 | set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); |
332 | |
333 | /* No restrictions for non-protected VMs. */ |
334 | if (!kvm_vm_is_protected(kvm)) { |
335 | hyp_vm->kvm.arch.flags = host_arch_flags; |
336 | |
337 | bitmap_copy(kvm->arch.vcpu_features, |
338 | host_kvm->arch.vcpu_features, |
339 | KVM_VCPU_MAX_FEATURES); |
340 | |
341 | if (test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &host_arch_flags)) |
342 | hyp_vm->kvm.arch.midr_el1 = host_kvm->arch.midr_el1; |
343 | |
344 | return; |
345 | } |
346 | |
347 | bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES); |
348 | |
349 | set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features); |
350 | |
351 | if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PMU_V3)) |
352 | set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features); |
353 | |
354 | if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_ADDRESS)) |
355 | set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features); |
356 | |
357 | if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_GENERIC)) |
358 | set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features); |
359 | |
360 | if (kvm_pvm_ext_allowed(KVM_CAP_ARM_SVE)) { |
361 | set_bit(KVM_ARM_VCPU_SVE, allowed_features); |
362 | kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_GUEST_HAS_SVE); |
363 | } |
364 | |
365 | bitmap_and(kvm->arch.vcpu_features, host_kvm->arch.vcpu_features, |
366 | allowed_features, KVM_VCPU_MAX_FEATURES); |
367 | } |
368 | |
369 | static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu) |
370 | { |
371 | if (host_vcpu) |
372 | hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1); |
373 | } |
374 | |
375 | static void unpin_host_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu) |
376 | { |
377 | void *sve_state; |
378 | |
379 | if (!vcpu_has_feature(&hyp_vcpu->vcpu, KVM_ARM_VCPU_SVE)) |
380 | return; |
381 | |
382 | sve_state = kern_hyp_va(hyp_vcpu->vcpu.arch.sve_state); |
383 | hyp_unpin_shared_mem(sve_state, |
384 | sve_state + vcpu_sve_state_size(&hyp_vcpu->vcpu)); |
385 | } |
386 | |
387 | static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[], |
388 | unsigned int nr_vcpus) |
389 | { |
390 | int i; |
391 | |
392 | for (i = 0; i < nr_vcpus; i++) { |
393 | struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vcpus[i]; |
394 | |
395 | if (!hyp_vcpu) |
396 | continue; |
397 | |
398 | unpin_host_vcpu(host_vcpu: hyp_vcpu->host_vcpu); |
399 | unpin_host_sve_state(hyp_vcpu); |
400 | } |
401 | } |
402 | |
403 | static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, |
404 | unsigned int nr_vcpus) |
405 | { |
406 | hyp_vm->host_kvm = host_kvm; |
407 | hyp_vm->kvm.created_vcpus = nr_vcpus; |
408 | hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; |
409 | hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled); |
410 | hyp_vm->kvm.arch.flags = 0; |
411 | pkvm_init_features_from_host(hyp_vm, host_kvm); |
412 | } |
413 | |
414 | static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu) |
415 | { |
416 | struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; |
417 | unsigned int sve_max_vl; |
418 | size_t sve_state_size; |
419 | void *sve_state; |
420 | int ret = 0; |
421 | |
422 | if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) { |
423 | vcpu_clear_flag(vcpu, VCPU_SVE_FINALIZED); |
424 | return 0; |
425 | } |
426 | |
427 | /* Limit guest vector length to the maximum supported by the host. */ |
428 | sve_max_vl = min(READ_ONCE(host_vcpu->arch.sve_max_vl), kvm_host_sve_max_vl); |
429 | sve_state_size = sve_state_size_from_vl(sve_max_vl); |
430 | sve_state = kern_hyp_va(READ_ONCE(host_vcpu->arch.sve_state)); |
431 | |
432 | if (!sve_state || !sve_state_size) { |
433 | ret = -EINVAL; |
434 | goto err; |
435 | } |
436 | |
437 | ret = hyp_pin_shared_mem(sve_state, sve_state + sve_state_size); |
438 | if (ret) |
439 | goto err; |
440 | |
441 | vcpu->arch.sve_state = sve_state; |
442 | vcpu->arch.sve_max_vl = sve_max_vl; |
443 | |
444 | return 0; |
445 | err: |
446 | clear_bit(KVM_ARM_VCPU_SVE, vcpu->kvm->arch.vcpu_features); |
447 | return ret; |
448 | } |
449 | |
450 | static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, |
451 | struct pkvm_hyp_vm *hyp_vm, |
452 | struct kvm_vcpu *host_vcpu) |
453 | { |
454 | int ret = 0; |
455 | |
456 | if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1)) |
457 | return -EBUSY; |
458 | |
459 | hyp_vcpu->host_vcpu = host_vcpu; |
460 | |
461 | hyp_vcpu->vcpu.kvm = &hyp_vm->kvm; |
462 | hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id); |
463 | hyp_vcpu->vcpu.vcpu_idx = READ_ONCE(host_vcpu->vcpu_idx); |
464 | |
465 | hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu; |
466 | hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags); |
467 | hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; |
468 | |
469 | if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) |
470 | kvm_init_pvm_id_regs(&hyp_vcpu->vcpu); |
471 | |
472 | ret = pkvm_vcpu_init_traps(hyp_vcpu); |
473 | if (ret) |
474 | goto done; |
475 | |
476 | ret = pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu); |
477 | done: |
478 | if (ret) |
479 | unpin_host_vcpu(host_vcpu); |
480 | return ret; |
481 | } |
482 | |
483 | static int find_free_vm_table_entry(struct kvm *host_kvm) |
484 | { |
485 | int i; |
486 | |
487 | for (i = 0; i < KVM_MAX_PVMS; ++i) { |
488 | if (!vm_table[i]) |
489 | return i; |
490 | } |
491 | |
492 | return -ENOMEM; |
493 | } |
494 | |
495 | /* |
496 | * Allocate a VM table entry and insert a pointer to the new vm. |
497 | * |
498 | * Return a unique handle to the protected VM on success, |
499 | * negative error code on failure. |
500 | */ |
501 | static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, |
502 | struct pkvm_hyp_vm *hyp_vm) |
503 | { |
504 | struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; |
505 | int idx; |
506 | |
507 | hyp_assert_lock_held(&vm_table_lock); |
508 | |
509 | /* |
510 | * Initializing protected state might have failed, yet a malicious |
511 | * host could trigger this function. Thus, ensure that 'vm_table' |
512 | * exists. |
513 | */ |
514 | if (unlikely(!vm_table)) |
515 | return -EINVAL; |
516 | |
517 | idx = find_free_vm_table_entry(host_kvm); |
518 | if (idx < 0) |
519 | return idx; |
520 | |
521 | hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx); |
522 | |
523 | /* VMID 0 is reserved for the host */ |
524 | atomic64_set(v: &mmu->vmid.id, i: idx + 1); |
525 | |
526 | mmu->arch = &hyp_vm->kvm.arch; |
527 | mmu->pgt = &hyp_vm->pgt; |
528 | |
529 | vm_table[idx] = hyp_vm; |
530 | return hyp_vm->kvm.arch.pkvm.handle; |
531 | } |
532 | |
533 | /* |
534 | * Deallocate and remove the VM table entry corresponding to the handle. |
535 | */ |
536 | static void remove_vm_table_entry(pkvm_handle_t handle) |
537 | { |
538 | hyp_assert_lock_held(&vm_table_lock); |
539 | vm_table[vm_handle_to_idx(handle)] = NULL; |
540 | } |
541 | |
542 | static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus) |
543 | { |
544 | return size_add(sizeof(struct pkvm_hyp_vm), |
545 | size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus)); |
546 | } |
547 | |
548 | static void *map_donated_memory_noclear(unsigned long host_va, size_t size) |
549 | { |
550 | void *va = (void *)kern_hyp_va(host_va); |
551 | |
552 | if (!PAGE_ALIGNED(va)) |
553 | return NULL; |
554 | |
555 | if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va), |
556 | PAGE_ALIGN(size) >> PAGE_SHIFT)) |
557 | return NULL; |
558 | |
559 | return va; |
560 | } |
561 | |
562 | static void *map_donated_memory(unsigned long host_va, size_t size) |
563 | { |
564 | void *va = map_donated_memory_noclear(host_va, size); |
565 | |
566 | if (va) |
567 | memset(va, 0, size); |
568 | |
569 | return va; |
570 | } |
571 | |
572 | static void __unmap_donated_memory(void *va, size_t size) |
573 | { |
574 | kvm_flush_dcache_to_poc(va, size); |
575 | WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va), |
576 | PAGE_ALIGN(size) >> PAGE_SHIFT)); |
577 | } |
578 | |
579 | static void unmap_donated_memory(void *va, size_t size) |
580 | { |
581 | if (!va) |
582 | return; |
583 | |
584 | memset(va, 0, size); |
585 | __unmap_donated_memory(va, size); |
586 | } |
587 | |
588 | static void unmap_donated_memory_noclear(void *va, size_t size) |
589 | { |
590 | if (!va) |
591 | return; |
592 | |
593 | __unmap_donated_memory(va, size); |
594 | } |
595 | |
596 | /* |
597 | * Initialize the hypervisor copy of the protected VM state using the |
598 | * memory donated by the host. |
599 | * |
600 | * Unmaps the donated memory from the host at stage 2. |
601 | * |
602 | * host_kvm: A pointer to the host's struct kvm. |
603 | * vm_hva: The host va of the area being donated for the VM state. |
604 | * Must be page aligned. |
605 | * pgd_hva: The host va of the area being donated for the stage-2 PGD for |
606 | * the VM. Must be page aligned. Its size is implied by the VM's |
607 | * VTCR. |
608 | * |
609 | * Return a unique handle to the protected VM on success, |
610 | * negative error code on failure. |
611 | */ |
612 | int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, |
613 | unsigned long pgd_hva) |
614 | { |
615 | struct pkvm_hyp_vm *hyp_vm = NULL; |
616 | size_t vm_size, pgd_size; |
617 | unsigned int nr_vcpus; |
618 | void *pgd = NULL; |
619 | int ret; |
620 | |
621 | ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1); |
622 | if (ret) |
623 | return ret; |
624 | |
625 | nr_vcpus = READ_ONCE(host_kvm->created_vcpus); |
626 | if (nr_vcpus < 1) { |
627 | ret = -EINVAL; |
628 | goto err_unpin_kvm; |
629 | } |
630 | |
631 | vm_size = pkvm_get_hyp_vm_size(nr_vcpus); |
632 | pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr); |
633 | |
634 | ret = -ENOMEM; |
635 | |
636 | hyp_vm = map_donated_memory(host_va: vm_hva, size: vm_size); |
637 | if (!hyp_vm) |
638 | goto err_remove_mappings; |
639 | |
640 | pgd = map_donated_memory_noclear(host_va: pgd_hva, size: pgd_size); |
641 | if (!pgd) |
642 | goto err_remove_mappings; |
643 | |
644 | init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus); |
645 | |
646 | hyp_spin_lock(&vm_table_lock); |
647 | ret = insert_vm_table_entry(host_kvm, hyp_vm); |
648 | if (ret < 0) |
649 | goto err_unlock; |
650 | |
651 | ret = kvm_guest_prepare_stage2(hyp_vm, pgd); |
652 | if (ret) |
653 | goto err_remove_vm_table_entry; |
654 | hyp_spin_unlock(&vm_table_lock); |
655 | |
656 | return hyp_vm->kvm.arch.pkvm.handle; |
657 | |
658 | err_remove_vm_table_entry: |
659 | remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle); |
660 | err_unlock: |
661 | hyp_spin_unlock(&vm_table_lock); |
662 | err_remove_mappings: |
663 | unmap_donated_memory(va: hyp_vm, size: vm_size); |
664 | unmap_donated_memory(va: pgd, size: pgd_size); |
665 | err_unpin_kvm: |
666 | hyp_unpin_shared_mem(host_kvm, host_kvm + 1); |
667 | return ret; |
668 | } |
669 | |
670 | /* |
671 | * Initialize the hypervisor copy of the protected vCPU state using the |
672 | * memory donated by the host. |
673 | * |
674 | * handle: The handle for the protected vm. |
675 | * host_vcpu: A pointer to the corresponding host vcpu. |
676 | * vcpu_hva: The host va of the area being donated for the vcpu state. |
677 | * Must be page aligned. The size of the area must be equal to |
678 | * the page-aligned size of 'struct pkvm_hyp_vcpu'. |
679 | * Return 0 on success, negative error code on failure. |
680 | */ |
681 | int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, |
682 | unsigned long vcpu_hva) |
683 | { |
684 | struct pkvm_hyp_vcpu *hyp_vcpu; |
685 | struct pkvm_hyp_vm *hyp_vm; |
686 | unsigned int idx; |
687 | int ret; |
688 | |
689 | hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu)); |
690 | if (!hyp_vcpu) |
691 | return -ENOMEM; |
692 | |
693 | hyp_spin_lock(&vm_table_lock); |
694 | |
695 | hyp_vm = get_vm_by_handle(handle); |
696 | if (!hyp_vm) { |
697 | ret = -ENOENT; |
698 | goto unlock; |
699 | } |
700 | |
701 | ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu); |
702 | if (ret) |
703 | goto unlock; |
704 | |
705 | idx = hyp_vcpu->vcpu.vcpu_idx; |
706 | if (idx >= hyp_vm->kvm.created_vcpus) { |
707 | ret = -EINVAL; |
708 | goto unlock; |
709 | } |
710 | |
711 | if (hyp_vm->vcpus[idx]) { |
712 | ret = -EINVAL; |
713 | goto unlock; |
714 | } |
715 | |
716 | hyp_vm->vcpus[idx] = hyp_vcpu; |
717 | unlock: |
718 | hyp_spin_unlock(&vm_table_lock); |
719 | |
720 | if (ret) |
721 | unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); |
722 | return ret; |
723 | } |
724 | |
725 | static void |
726 | teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size) |
727 | { |
728 | size = PAGE_ALIGN(size); |
729 | memset(addr, 0, size); |
730 | |
731 | for (void *start = addr; start < addr + size; start += PAGE_SIZE) |
732 | push_hyp_memcache(mc, start, hyp_virt_to_phys); |
733 | |
734 | unmap_donated_memory_noclear(va: addr, size); |
735 | } |
736 | |
737 | int __pkvm_teardown_vm(pkvm_handle_t handle) |
738 | { |
739 | struct kvm_hyp_memcache *mc, *stage2_mc; |
740 | struct pkvm_hyp_vm *hyp_vm; |
741 | struct kvm *host_kvm; |
742 | unsigned int idx; |
743 | size_t vm_size; |
744 | int err; |
745 | |
746 | hyp_spin_lock(&vm_table_lock); |
747 | hyp_vm = get_vm_by_handle(handle); |
748 | if (!hyp_vm) { |
749 | err = -ENOENT; |
750 | goto err_unlock; |
751 | } |
752 | |
753 | if (WARN_ON(hyp_page_count(hyp_vm))) { |
754 | err = -EBUSY; |
755 | goto err_unlock; |
756 | } |
757 | |
758 | host_kvm = hyp_vm->host_kvm; |
759 | |
760 | /* Ensure the VMID is clean before it can be reallocated */ |
761 | __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu); |
762 | remove_vm_table_entry(handle); |
763 | hyp_spin_unlock(&vm_table_lock); |
764 | |
765 | /* Reclaim guest pages (including page-table pages) */ |
766 | mc = &host_kvm->arch.pkvm.teardown_mc; |
767 | stage2_mc = &host_kvm->arch.pkvm.stage2_teardown_mc; |
768 | reclaim_pgtable_pages(hyp_vm, stage2_mc); |
769 | unpin_host_vcpus(hyp_vcpus: hyp_vm->vcpus, nr_vcpus: hyp_vm->kvm.created_vcpus); |
770 | |
771 | /* Push the metadata pages to the teardown memcache */ |
772 | for (idx = 0; idx < hyp_vm->kvm.created_vcpus; ++idx) { |
773 | struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx]; |
774 | struct kvm_hyp_memcache *vcpu_mc; |
775 | |
776 | if (!hyp_vcpu) |
777 | continue; |
778 | |
779 | vcpu_mc = &hyp_vcpu->vcpu.arch.pkvm_memcache; |
780 | |
781 | while (vcpu_mc->nr_pages) { |
782 | void *addr = pop_hyp_memcache(vcpu_mc, hyp_phys_to_virt); |
783 | |
784 | push_hyp_memcache(stage2_mc, addr, hyp_virt_to_phys); |
785 | unmap_donated_memory_noclear(va: addr, PAGE_SIZE); |
786 | } |
787 | |
788 | teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu)); |
789 | } |
790 | |
791 | vm_size = pkvm_get_hyp_vm_size(nr_vcpus: hyp_vm->kvm.created_vcpus); |
792 | teardown_donated_memory(mc, addr: hyp_vm, size: vm_size); |
793 | hyp_unpin_shared_mem(host_kvm, host_kvm + 1); |
794 | return 0; |
795 | |
796 | err_unlock: |
797 | hyp_spin_unlock(&vm_table_lock); |
798 | return err; |
799 | } |
800 |
Definitions
- __icache_flags
- kvm_arm_vmid_bits
- kvm_host_sve_max_vl
- loaded_hyp_vcpu
- pkvm_vcpu_reset_hcr
- pvm_init_traps_hcr
- pvm_init_traps_mdcr
- pkvm_check_pvm_cpu_features
- pkvm_vcpu_init_traps
- vm_handle_to_idx
- idx_to_vm_handle
- vm_table
- pkvm_hyp_vm_table_init
- get_vm_by_handle
- pkvm_load_hyp_vcpu
- pkvm_put_hyp_vcpu
- pkvm_get_loaded_hyp_vcpu
- get_pkvm_hyp_vm
- put_pkvm_hyp_vm
- get_np_pkvm_hyp_vm
- pkvm_init_features_from_host
- unpin_host_vcpu
- unpin_host_sve_state
- unpin_host_vcpus
- init_pkvm_hyp_vm
- pkvm_vcpu_init_sve
- init_pkvm_hyp_vcpu
- find_free_vm_table_entry
- insert_vm_table_entry
- remove_vm_table_entry
- pkvm_get_hyp_vm_size
- map_donated_memory_noclear
- map_donated_memory
- __unmap_donated_memory
- unmap_donated_memory
- unmap_donated_memory_noclear
- __pkvm_init_vm
- __pkvm_init_vcpu
- teardown_donated_memory
Improve your Profiling and Debugging skills
Find out more