1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Hosting Protected Virtual Machines |
4 | * |
5 | * Copyright IBM Corp. 2019, 2020 |
6 | * Author(s): Janosch Frank <frankja@linux.ibm.com> |
7 | */ |
8 | #include <linux/kvm.h> |
9 | #include <linux/kvm_host.h> |
10 | #include <linux/minmax.h> |
11 | #include <linux/pagemap.h> |
12 | #include <linux/sched/signal.h> |
13 | #include <asm/gmap.h> |
14 | #include <asm/uv.h> |
15 | #include <asm/mman.h> |
16 | #include <linux/pagewalk.h> |
17 | #include <linux/sched/mm.h> |
18 | #include <linux/mmu_notifier.h> |
19 | #include "kvm-s390.h" |
20 | |
21 | bool kvm_s390_pv_is_protected(struct kvm *kvm) |
22 | { |
23 | lockdep_assert_held(&kvm->lock); |
24 | return !!kvm_s390_pv_get_handle(kvm); |
25 | } |
26 | EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected); |
27 | |
28 | bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu) |
29 | { |
30 | lockdep_assert_held(&vcpu->mutex); |
31 | return !!kvm_s390_pv_cpu_get_handle(vcpu); |
32 | } |
33 | EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected); |
34 | |
35 | /** |
36 | * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to |
37 | * be destroyed |
38 | * |
39 | * @list: list head for the list of leftover VMs |
40 | * @old_gmap_table: the gmap table of the leftover protected VM |
41 | * @handle: the handle of the leftover protected VM |
42 | * @stor_var: pointer to the variable storage of the leftover protected VM |
43 | * @stor_base: address of the base storage of the leftover protected VM |
44 | * |
45 | * Represents a protected VM that is still registered with the Ultravisor, |
46 | * but which does not correspond any longer to an active KVM VM. It should |
47 | * be destroyed at some point later, either asynchronously or when the |
48 | * process terminates. |
49 | */ |
50 | struct pv_vm_to_be_destroyed { |
51 | struct list_head list; |
52 | unsigned long old_gmap_table; |
53 | u64 handle; |
54 | void *stor_var; |
55 | unsigned long stor_base; |
56 | }; |
57 | |
58 | static void kvm_s390_clear_pv_state(struct kvm *kvm) |
59 | { |
60 | kvm->arch.pv.handle = 0; |
61 | kvm->arch.pv.guest_len = 0; |
62 | kvm->arch.pv.stor_base = 0; |
63 | kvm->arch.pv.stor_var = NULL; |
64 | } |
65 | |
66 | int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) |
67 | { |
68 | int cc; |
69 | |
70 | if (!kvm_s390_pv_cpu_get_handle(vcpu)) |
71 | return 0; |
72 | |
73 | cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc); |
74 | |
75 | KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x" , |
76 | vcpu->vcpu_id, *rc, *rrc); |
77 | WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x" , *rc, *rrc); |
78 | |
79 | /* Intended memory leak for something that should never happen. */ |
80 | if (!cc) |
81 | free_pages(addr: vcpu->arch.pv.stor_base, |
82 | order: get_order(size: uv_info.guest_cpu_stor_len)); |
83 | |
84 | free_page((unsigned long)sida_addr(vcpu->arch.sie_block)); |
85 | vcpu->arch.sie_block->pv_handle_cpu = 0; |
86 | vcpu->arch.sie_block->pv_handle_config = 0; |
87 | memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv)); |
88 | vcpu->arch.sie_block->sdf = 0; |
89 | /* |
90 | * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0). |
91 | * Use the reset value of gbea to avoid leaking the kernel pointer of |
92 | * the just freed sida. |
93 | */ |
94 | vcpu->arch.sie_block->gbea = 1; |
95 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
96 | |
97 | return cc ? EIO : 0; |
98 | } |
99 | |
100 | int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) |
101 | { |
102 | struct uv_cb_csc uvcb = { |
103 | .header.cmd = UVC_CMD_CREATE_SEC_CPU, |
104 | .header.len = sizeof(uvcb), |
105 | }; |
106 | void *sida_addr; |
107 | int cc; |
108 | |
109 | if (kvm_s390_pv_cpu_get_handle(vcpu)) |
110 | return -EINVAL; |
111 | |
112 | vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, |
113 | order: get_order(size: uv_info.guest_cpu_stor_len)); |
114 | if (!vcpu->arch.pv.stor_base) |
115 | return -ENOMEM; |
116 | |
117 | /* Input */ |
118 | uvcb.guest_handle = kvm_s390_pv_get_handle(kvm: vcpu->kvm); |
119 | uvcb.num = vcpu->arch.sie_block->icpua; |
120 | uvcb.state_origin = virt_to_phys(address: vcpu->arch.sie_block); |
121 | uvcb.stor_origin = virt_to_phys(address: (void *)vcpu->arch.pv.stor_base); |
122 | |
123 | /* Alloc Secure Instruction Data Area Designation */ |
124 | sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
125 | if (!sida_addr) { |
126 | free_pages(addr: vcpu->arch.pv.stor_base, |
127 | order: get_order(size: uv_info.guest_cpu_stor_len)); |
128 | return -ENOMEM; |
129 | } |
130 | vcpu->arch.sie_block->sidad = virt_to_phys(address: sida_addr); |
131 | |
132 | cc = uv_call(0, (u64)&uvcb); |
133 | *rc = uvcb.header.rc; |
134 | *rrc = uvcb.header.rrc; |
135 | KVM_UV_EVENT(vcpu->kvm, 3, |
136 | "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x" , |
137 | vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc, |
138 | uvcb.header.rrc); |
139 | |
140 | if (cc) { |
141 | u16 dummy; |
142 | |
143 | kvm_s390_pv_destroy_cpu(vcpu, rc: &dummy, rrc: &dummy); |
144 | return -EIO; |
145 | } |
146 | |
147 | /* Output */ |
148 | vcpu->arch.pv.handle = uvcb.cpu_handle; |
149 | vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle; |
150 | vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(kvm: vcpu->kvm); |
151 | vcpu->arch.sie_block->sdf = 2; |
152 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
153 | return 0; |
154 | } |
155 | |
156 | /* only free resources when the destroy was successful */ |
157 | static void kvm_s390_pv_dealloc_vm(struct kvm *kvm) |
158 | { |
159 | vfree(addr: kvm->arch.pv.stor_var); |
160 | free_pages(addr: kvm->arch.pv.stor_base, |
161 | order: get_order(size: uv_info.guest_base_stor_len)); |
162 | kvm_s390_clear_pv_state(kvm); |
163 | } |
164 | |
165 | static int kvm_s390_pv_alloc_vm(struct kvm *kvm) |
166 | { |
167 | unsigned long base = uv_info.guest_base_stor_len; |
168 | unsigned long virt = uv_info.guest_virt_var_stor_len; |
169 | unsigned long npages = 0, vlen = 0; |
170 | |
171 | kvm->arch.pv.stor_var = NULL; |
172 | kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, order: get_order(size: base)); |
173 | if (!kvm->arch.pv.stor_base) |
174 | return -ENOMEM; |
175 | |
176 | /* |
177 | * Calculate current guest storage for allocation of the |
178 | * variable storage, which is based on the length in MB. |
179 | * |
180 | * Slots are sorted by GFN |
181 | */ |
182 | mutex_lock(&kvm->slots_lock); |
183 | npages = kvm_s390_get_gfn_end(slots: kvm_memslots(kvm)); |
184 | mutex_unlock(lock: &kvm->slots_lock); |
185 | |
186 | kvm->arch.pv.guest_len = npages * PAGE_SIZE; |
187 | |
188 | /* Allocate variable storage */ |
189 | vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE); |
190 | vlen += uv_info.guest_virt_base_stor_len; |
191 | kvm->arch.pv.stor_var = vzalloc(size: vlen); |
192 | if (!kvm->arch.pv.stor_var) |
193 | goto out_err; |
194 | return 0; |
195 | |
196 | out_err: |
197 | kvm_s390_pv_dealloc_vm(kvm); |
198 | return -ENOMEM; |
199 | } |
200 | |
201 | /** |
202 | * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM. |
203 | * @kvm: the KVM that was associated with this leftover protected VM |
204 | * @leftover: details about the leftover protected VM that needs a clean up |
205 | * @rc: the RC code of the Destroy Secure Configuration UVC |
206 | * @rrc: the RRC code of the Destroy Secure Configuration UVC |
207 | * |
208 | * Destroy one leftover protected VM. |
209 | * On success, kvm->mm->context.protected_count will be decremented atomically |
210 | * and all other resources used by the VM will be freed. |
211 | * |
212 | * Return: 0 in case of success, otherwise 1 |
213 | */ |
214 | static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm, |
215 | struct pv_vm_to_be_destroyed *leftover, |
216 | u16 *rc, u16 *rrc) |
217 | { |
218 | int cc; |
219 | |
220 | /* It used the destroy-fast UVC, nothing left to do here */ |
221 | if (!leftover->handle) |
222 | goto done_fast; |
223 | cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc); |
224 | KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x" , *rc, *rrc); |
225 | WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x" , *rc, *rrc); |
226 | if (cc) |
227 | return cc; |
228 | /* |
229 | * Intentionally leak unusable memory. If the UVC fails, the memory |
230 | * used for the VM and its metadata is permanently unusable. |
231 | * This can only happen in case of a serious KVM or hardware bug; it |
232 | * is not expected to happen in normal operation. |
233 | */ |
234 | free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len)); |
235 | free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER); |
236 | vfree(addr: leftover->stor_var); |
237 | done_fast: |
238 | atomic_dec(v: &kvm->mm->context.protected_count); |
239 | return 0; |
240 | } |
241 | |
242 | /** |
243 | * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory. |
244 | * @kvm: the VM whose memory is to be cleared. |
245 | * |
246 | * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot. |
247 | * The CPUs of the protected VM need to be destroyed beforehand. |
248 | */ |
249 | static void kvm_s390_destroy_lower_2g(struct kvm *kvm) |
250 | { |
251 | const unsigned long pages_2g = SZ_2G / PAGE_SIZE; |
252 | struct kvm_memory_slot *slot; |
253 | unsigned long len; |
254 | int srcu_idx; |
255 | |
256 | srcu_idx = srcu_read_lock(ssp: &kvm->srcu); |
257 | |
258 | /* Take the memslot containing guest absolute address 0 */ |
259 | slot = gfn_to_memslot(kvm, gfn: 0); |
260 | /* Clear all slots or parts thereof that are below 2GB */ |
261 | while (slot && slot->base_gfn < pages_2g) { |
262 | len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE; |
263 | s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len); |
264 | /* Take the next memslot */ |
265 | slot = gfn_to_memslot(kvm, gfn: slot->base_gfn + slot->npages); |
266 | } |
267 | |
268 | srcu_read_unlock(ssp: &kvm->srcu, idx: srcu_idx); |
269 | } |
270 | |
271 | static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc) |
272 | { |
273 | struct uv_cb_destroy_fast uvcb = { |
274 | .header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST, |
275 | .header.len = sizeof(uvcb), |
276 | .handle = kvm_s390_pv_get_handle(kvm), |
277 | }; |
278 | int cc; |
279 | |
280 | cc = uv_call_sched(0, (u64)&uvcb); |
281 | if (rc) |
282 | *rc = uvcb.header.rc; |
283 | if (rrc) |
284 | *rrc = uvcb.header.rrc; |
285 | WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); |
286 | KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x" , |
287 | uvcb.header.rc, uvcb.header.rrc); |
288 | WARN_ONCE(cc && uvcb.header.rc != 0x104, |
289 | "protvirt destroy vm fast failed handle %llx rc %x rrc %x" , |
290 | kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc); |
291 | /* Intended memory leak on "impossible" error */ |
292 | if (!cc) |
293 | kvm_s390_pv_dealloc_vm(kvm); |
294 | return cc ? -EIO : 0; |
295 | } |
296 | |
297 | static inline bool is_destroy_fast_available(void) |
298 | { |
299 | return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list); |
300 | } |
301 | |
302 | /** |
303 | * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown. |
304 | * @kvm: the VM |
305 | * @rc: return value for the RC field of the UVCB |
306 | * @rrc: return value for the RRC field of the UVCB |
307 | * |
308 | * Set aside the protected VM for a subsequent teardown. The VM will be able |
309 | * to continue immediately as a non-secure VM, and the information needed to |
310 | * properly tear down the protected VM is set aside. If another protected VM |
311 | * was already set aside without starting its teardown, this function will |
312 | * fail. |
313 | * The CPUs of the protected VM need to be destroyed beforehand. |
314 | * |
315 | * Context: kvm->lock needs to be held |
316 | * |
317 | * Return: 0 in case of success, -EINVAL if another protected VM was already set |
318 | * aside, -ENOMEM if the system ran out of memory. |
319 | */ |
320 | int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) |
321 | { |
322 | struct pv_vm_to_be_destroyed *priv; |
323 | int res = 0; |
324 | |
325 | lockdep_assert_held(&kvm->lock); |
326 | /* |
327 | * If another protected VM was already prepared for teardown, refuse. |
328 | * A normal deinitialization has to be performed instead. |
329 | */ |
330 | if (kvm->arch.pv.set_aside) |
331 | return -EINVAL; |
332 | |
333 | /* Guest with segment type ASCE, refuse to destroy asynchronously */ |
334 | if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) |
335 | return -EINVAL; |
336 | |
337 | priv = kzalloc(size: sizeof(*priv), GFP_KERNEL); |
338 | if (!priv) |
339 | return -ENOMEM; |
340 | |
341 | if (is_destroy_fast_available()) { |
342 | res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc); |
343 | } else { |
344 | priv->stor_var = kvm->arch.pv.stor_var; |
345 | priv->stor_base = kvm->arch.pv.stor_base; |
346 | priv->handle = kvm_s390_pv_get_handle(kvm); |
347 | priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table; |
348 | WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); |
349 | if (s390_replace_asce(kvm->arch.gmap)) |
350 | res = -ENOMEM; |
351 | } |
352 | |
353 | if (res) { |
354 | kfree(objp: priv); |
355 | return res; |
356 | } |
357 | |
358 | kvm_s390_destroy_lower_2g(kvm); |
359 | kvm_s390_clear_pv_state(kvm); |
360 | kvm->arch.pv.set_aside = priv; |
361 | |
362 | *rc = UVC_RC_EXECUTED; |
363 | *rrc = 42; |
364 | return 0; |
365 | } |
366 | |
367 | /** |
368 | * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM |
369 | * @kvm: the KVM whose protected VM needs to be deinitialized |
370 | * @rc: the RC code of the UVC |
371 | * @rrc: the RRC code of the UVC |
372 | * |
373 | * Deinitialize the current protected VM. This function will destroy and |
374 | * cleanup the current protected VM, but it will not cleanup the guest |
375 | * memory. This function should only be called when the protected VM has |
376 | * just been created and therefore does not have any guest memory, or when |
377 | * the caller cleans up the guest memory separately. |
378 | * |
379 | * This function should not fail, but if it does, the donated memory must |
380 | * not be freed. |
381 | * |
382 | * Context: kvm->lock needs to be held |
383 | * |
384 | * Return: 0 in case of success, otherwise -EIO |
385 | */ |
386 | int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) |
387 | { |
388 | int cc; |
389 | |
390 | cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), |
391 | UVC_CMD_DESTROY_SEC_CONF, rc, rrc); |
392 | WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); |
393 | if (!cc) { |
394 | atomic_dec(v: &kvm->mm->context.protected_count); |
395 | kvm_s390_pv_dealloc_vm(kvm); |
396 | } else { |
397 | /* Intended memory leak on "impossible" error */ |
398 | s390_replace_asce(kvm->arch.gmap); |
399 | } |
400 | KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x" , *rc, *rrc); |
401 | WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x" , *rc, *rrc); |
402 | |
403 | return cc ? -EIO : 0; |
404 | } |
405 | |
406 | /** |
407 | * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated |
408 | * with a specific KVM. |
409 | * @kvm: the KVM to be cleaned up |
410 | * @rc: the RC code of the first failing UVC |
411 | * @rrc: the RRC code of the first failing UVC |
412 | * |
413 | * This function will clean up all protected VMs associated with a KVM. |
414 | * This includes the active one, the one prepared for deinitialization with |
415 | * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list. |
416 | * |
417 | * Context: kvm->lock needs to be held unless being called from |
418 | * kvm_arch_destroy_vm. |
419 | * |
420 | * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO |
421 | */ |
422 | int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc) |
423 | { |
424 | struct pv_vm_to_be_destroyed *cur; |
425 | bool need_zap = false; |
426 | u16 _rc, _rrc; |
427 | int cc = 0; |
428 | |
429 | /* |
430 | * Nothing to do if the counter was already 0. Otherwise make sure |
431 | * the counter does not reach 0 before calling s390_uv_destroy_range. |
432 | */ |
433 | if (!atomic_inc_not_zero(v: &kvm->mm->context.protected_count)) |
434 | return 0; |
435 | |
436 | *rc = 1; |
437 | /* If the current VM is protected, destroy it */ |
438 | if (kvm_s390_pv_get_handle(kvm)) { |
439 | cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc); |
440 | need_zap = true; |
441 | } |
442 | |
443 | /* If a previous protected VM was set aside, put it in the need_cleanup list */ |
444 | if (kvm->arch.pv.set_aside) { |
445 | list_add(new: kvm->arch.pv.set_aside, head: &kvm->arch.pv.need_cleanup); |
446 | kvm->arch.pv.set_aside = NULL; |
447 | } |
448 | |
449 | /* Cleanup all protected VMs in the need_cleanup list */ |
450 | while (!list_empty(head: &kvm->arch.pv.need_cleanup)) { |
451 | cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list); |
452 | need_zap = true; |
453 | if (kvm_s390_pv_dispose_one_leftover(kvm, leftover: cur, rc: &_rc, rrc: &_rrc)) { |
454 | cc = 1; |
455 | /* |
456 | * Only return the first error rc and rrc, so make |
457 | * sure it is not overwritten. All destroys will |
458 | * additionally be reported via KVM_UV_EVENT(). |
459 | */ |
460 | if (*rc == UVC_RC_EXECUTED) { |
461 | *rc = _rc; |
462 | *rrc = _rrc; |
463 | } |
464 | } |
465 | list_del(entry: &cur->list); |
466 | kfree(objp: cur); |
467 | } |
468 | |
469 | /* |
470 | * If the mm still has a mapping, try to mark all its pages as |
471 | * accessible. The counter should not reach zero before this |
472 | * cleanup has been performed. |
473 | */ |
474 | if (need_zap && mmget_not_zero(mm: kvm->mm)) { |
475 | s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE); |
476 | mmput(kvm->mm); |
477 | } |
478 | |
479 | /* Now the counter can safely reach 0 */ |
480 | atomic_dec(v: &kvm->mm->context.protected_count); |
481 | return cc ? -EIO : 0; |
482 | } |
483 | |
484 | /** |
485 | * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM. |
486 | * @kvm: the VM previously associated with the protected VM |
487 | * @rc: return value for the RC field of the UVCB |
488 | * @rrc: return value for the RRC field of the UVCB |
489 | * |
490 | * Tear down the protected VM that had been previously prepared for teardown |
491 | * using kvm_s390_pv_set_aside_vm. Ideally this should be called by |
492 | * userspace asynchronously from a separate thread. |
493 | * |
494 | * Context: kvm->lock must not be held. |
495 | * |
496 | * Return: 0 in case of success, -EINVAL if no protected VM had been |
497 | * prepared for asynchronous teardowm, -EIO in case of other errors. |
498 | */ |
499 | int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc) |
500 | { |
501 | struct pv_vm_to_be_destroyed *p; |
502 | int ret = 0; |
503 | |
504 | lockdep_assert_not_held(&kvm->lock); |
505 | mutex_lock(&kvm->lock); |
506 | p = kvm->arch.pv.set_aside; |
507 | kvm->arch.pv.set_aside = NULL; |
508 | mutex_unlock(lock: &kvm->lock); |
509 | if (!p) |
510 | return -EINVAL; |
511 | |
512 | /* When a fatal signal is received, stop immediately */ |
513 | if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX)) |
514 | goto done; |
515 | if (kvm_s390_pv_dispose_one_leftover(kvm, leftover: p, rc, rrc)) |
516 | ret = -EIO; |
517 | kfree(objp: p); |
518 | p = NULL; |
519 | done: |
520 | /* |
521 | * p is not NULL if we aborted because of a fatal signal, in which |
522 | * case queue the leftover for later cleanup. |
523 | */ |
524 | if (p) { |
525 | mutex_lock(&kvm->lock); |
526 | list_add(new: &p->list, head: &kvm->arch.pv.need_cleanup); |
527 | mutex_unlock(lock: &kvm->lock); |
528 | /* Did not finish, but pretend things went well */ |
529 | *rc = UVC_RC_EXECUTED; |
530 | *rrc = 42; |
531 | } |
532 | return ret; |
533 | } |
534 | |
535 | static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, |
536 | struct mm_struct *mm) |
537 | { |
538 | struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier); |
539 | u16 dummy; |
540 | int r; |
541 | |
542 | /* |
543 | * No locking is needed since this is the last thread of the last user of this |
544 | * struct mm. |
545 | * When the struct kvm gets deinitialized, this notifier is also |
546 | * unregistered. This means that if this notifier runs, then the |
547 | * struct kvm is still valid. |
548 | */ |
549 | r = kvm_s390_cpus_from_pv(kvm, rc: &dummy, rrc: &dummy); |
550 | if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm)) |
551 | kvm_s390_pv_deinit_vm_fast(kvm, rc: &dummy, rrc: &dummy); |
552 | } |
553 | |
554 | static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = { |
555 | .release = kvm_s390_pv_mmu_notifier_release, |
556 | }; |
557 | |
558 | int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) |
559 | { |
560 | struct uv_cb_cgc uvcb = { |
561 | .header.cmd = UVC_CMD_CREATE_SEC_CONF, |
562 | .header.len = sizeof(uvcb) |
563 | }; |
564 | int cc, ret; |
565 | u16 dummy; |
566 | |
567 | ret = kvm_s390_pv_alloc_vm(kvm); |
568 | if (ret) |
569 | return ret; |
570 | |
571 | /* Inputs */ |
572 | uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */ |
573 | uvcb.guest_stor_len = kvm->arch.pv.guest_len; |
574 | uvcb.guest_asce = kvm->arch.gmap->asce; |
575 | uvcb.guest_sca = virt_to_phys(address: kvm->arch.sca); |
576 | uvcb.conf_base_stor_origin = |
577 | virt_to_phys(address: (void *)kvm->arch.pv.stor_base); |
578 | uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; |
579 | uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap; |
580 | uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr; |
581 | |
582 | cc = uv_call_sched(0, (u64)&uvcb); |
583 | *rc = uvcb.header.rc; |
584 | *rrc = uvcb.header.rrc; |
585 | KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x" , |
586 | uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw); |
587 | |
588 | /* Outputs */ |
589 | kvm->arch.pv.handle = uvcb.guest_handle; |
590 | |
591 | atomic_inc(v: &kvm->mm->context.protected_count); |
592 | if (cc) { |
593 | if (uvcb.header.rc & UVC_RC_NEED_DESTROY) { |
594 | kvm_s390_pv_deinit_vm(kvm, rc: &dummy, rrc: &dummy); |
595 | } else { |
596 | atomic_dec(v: &kvm->mm->context.protected_count); |
597 | kvm_s390_pv_dealloc_vm(kvm); |
598 | } |
599 | return -EIO; |
600 | } |
601 | kvm->arch.gmap->guest_handle = uvcb.guest_handle; |
602 | /* Add the notifier only once. No races because we hold kvm->lock */ |
603 | if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) { |
604 | kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops; |
605 | mmu_notifier_register(subscription: &kvm->arch.pv.mmu_notifier, mm: kvm->mm); |
606 | } |
607 | return 0; |
608 | } |
609 | |
610 | int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc, |
611 | u16 *rrc) |
612 | { |
613 | struct uv_cb_ssc uvcb = { |
614 | .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS, |
615 | .header.len = sizeof(uvcb), |
616 | .sec_header_origin = (u64)hdr, |
617 | .sec_header_len = length, |
618 | .guest_handle = kvm_s390_pv_get_handle(kvm), |
619 | }; |
620 | int cc = uv_call(0, (u64)&uvcb); |
621 | |
622 | *rc = uvcb.header.rc; |
623 | *rrc = uvcb.header.rrc; |
624 | KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x" , |
625 | *rc, *rrc); |
626 | return cc ? -EINVAL : 0; |
627 | } |
628 | |
629 | static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak, |
630 | u64 offset, u16 *rc, u16 *rrc) |
631 | { |
632 | struct uv_cb_unp uvcb = { |
633 | .header.cmd = UVC_CMD_UNPACK_IMG, |
634 | .header.len = sizeof(uvcb), |
635 | .guest_handle = kvm_s390_pv_get_handle(kvm), |
636 | .gaddr = addr, |
637 | .tweak[0] = tweak, |
638 | .tweak[1] = offset, |
639 | }; |
640 | int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb); |
641 | |
642 | *rc = uvcb.header.rc; |
643 | *rrc = uvcb.header.rrc; |
644 | |
645 | if (ret && ret != -EAGAIN) |
646 | KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x" , |
647 | uvcb.gaddr, *rc, *rrc); |
648 | return ret; |
649 | } |
650 | |
651 | int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size, |
652 | unsigned long tweak, u16 *rc, u16 *rrc) |
653 | { |
654 | u64 offset = 0; |
655 | int ret = 0; |
656 | |
657 | if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK) |
658 | return -EINVAL; |
659 | |
660 | KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx" , |
661 | addr, size); |
662 | |
663 | while (offset < size) { |
664 | ret = unpack_one(kvm, addr, tweak, offset, rc, rrc); |
665 | if (ret == -EAGAIN) { |
666 | cond_resched(); |
667 | if (fatal_signal_pending(current)) |
668 | break; |
669 | continue; |
670 | } |
671 | if (ret) |
672 | break; |
673 | addr += PAGE_SIZE; |
674 | offset += PAGE_SIZE; |
675 | } |
676 | if (!ret) |
677 | KVM_UV_EVENT(kvm, 3, "%s" , "PROTVIRT VM UNPACK: successful" ); |
678 | return ret; |
679 | } |
680 | |
681 | int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state) |
682 | { |
683 | struct uv_cb_cpu_set_state uvcb = { |
684 | .header.cmd = UVC_CMD_CPU_SET_STATE, |
685 | .header.len = sizeof(uvcb), |
686 | .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu), |
687 | .state = state, |
688 | }; |
689 | int cc; |
690 | |
691 | cc = uv_call(0, (u64)&uvcb); |
692 | KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x" , |
693 | vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc); |
694 | if (cc) |
695 | return -EINVAL; |
696 | return 0; |
697 | } |
698 | |
699 | int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc) |
700 | { |
701 | struct uv_cb_dump_cpu uvcb = { |
702 | .header.cmd = UVC_CMD_DUMP_CPU, |
703 | .header.len = sizeof(uvcb), |
704 | .cpu_handle = vcpu->arch.pv.handle, |
705 | .dump_area_origin = (u64)buff, |
706 | }; |
707 | int cc; |
708 | |
709 | cc = uv_call_sched(0, (u64)&uvcb); |
710 | *rc = uvcb.header.rc; |
711 | *rrc = uvcb.header.rrc; |
712 | return cc; |
713 | } |
714 | |
715 | /* Size of the cache for the storage state dump data. 1MB for now */ |
716 | #define DUMP_BUFF_LEN HPAGE_SIZE |
717 | |
718 | /** |
719 | * kvm_s390_pv_dump_stor_state |
720 | * |
721 | * @kvm: pointer to the guest's KVM struct |
722 | * @buff_user: Userspace pointer where we will write the results to |
723 | * @gaddr: Starting absolute guest address for which the storage state |
724 | * is requested. |
725 | * @buff_user_len: Length of the buff_user buffer |
726 | * @rc: Pointer to where the uvcb return code is stored |
727 | * @rrc: Pointer to where the uvcb return reason code is stored |
728 | * |
729 | * Stores buff_len bytes of tweak component values to buff_user |
730 | * starting with the 1MB block specified by the absolute guest address |
731 | * (gaddr). The gaddr pointer will be updated with the last address |
732 | * for which data was written when returning to userspace. buff_user |
733 | * might be written to even if an error rc is returned. For instance |
734 | * if we encounter a fault after writing the first page of data. |
735 | * |
736 | * Context: kvm->lock needs to be held |
737 | * |
738 | * Return: |
739 | * 0 on success |
740 | * -ENOMEM if allocating the cache fails |
741 | * -EINVAL if gaddr is not aligned to 1MB |
742 | * -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len |
743 | * -EINVAL if the UV call fails, rc and rrc will be set in this case |
744 | * -EFAULT if copying the result to buff_user failed |
745 | */ |
746 | int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user, |
747 | u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc) |
748 | { |
749 | struct uv_cb_dump_stor_state uvcb = { |
750 | .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE, |
751 | .header.len = sizeof(uvcb), |
752 | .config_handle = kvm->arch.pv.handle, |
753 | .gaddr = *gaddr, |
754 | .dump_area_origin = 0, |
755 | }; |
756 | const u64 increment_len = uv_info.conf_dump_storage_state_len; |
757 | size_t buff_kvm_size; |
758 | size_t size_done = 0; |
759 | u8 *buff_kvm = NULL; |
760 | int cc, ret; |
761 | |
762 | ret = -EINVAL; |
763 | /* UV call processes 1MB guest storage chunks at a time */ |
764 | if (!IS_ALIGNED(*gaddr, HPAGE_SIZE)) |
765 | goto out; |
766 | |
767 | /* |
768 | * We provide the storage state for 1MB chunks of guest |
769 | * storage. The buffer will need to be aligned to |
770 | * conf_dump_storage_state_len so we don't end on a partial |
771 | * chunk. |
772 | */ |
773 | if (!buff_user_len || |
774 | !IS_ALIGNED(buff_user_len, increment_len)) |
775 | goto out; |
776 | |
777 | /* |
778 | * Allocate a buffer from which we will later copy to the user |
779 | * process. We don't want userspace to dictate our buffer size |
780 | * so we limit it to DUMP_BUFF_LEN. |
781 | */ |
782 | ret = -ENOMEM; |
783 | buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN); |
784 | buff_kvm = vzalloc(size: buff_kvm_size); |
785 | if (!buff_kvm) |
786 | goto out; |
787 | |
788 | ret = 0; |
789 | uvcb.dump_area_origin = (u64)buff_kvm; |
790 | /* We will loop until the user buffer is filled or an error occurs */ |
791 | do { |
792 | /* Get 1MB worth of guest storage state data */ |
793 | cc = uv_call_sched(0, (u64)&uvcb); |
794 | |
795 | /* All or nothing */ |
796 | if (cc) { |
797 | ret = -EINVAL; |
798 | break; |
799 | } |
800 | |
801 | size_done += increment_len; |
802 | uvcb.dump_area_origin += increment_len; |
803 | buff_user_len -= increment_len; |
804 | uvcb.gaddr += HPAGE_SIZE; |
805 | |
806 | /* KVM Buffer full, time to copy to the process */ |
807 | if (!buff_user_len || size_done == DUMP_BUFF_LEN) { |
808 | if (copy_to_user(to: buff_user, from: buff_kvm, n: size_done)) { |
809 | ret = -EFAULT; |
810 | break; |
811 | } |
812 | |
813 | buff_user += size_done; |
814 | size_done = 0; |
815 | uvcb.dump_area_origin = (u64)buff_kvm; |
816 | } |
817 | } while (buff_user_len); |
818 | |
819 | /* Report back where we ended dumping */ |
820 | *gaddr = uvcb.gaddr; |
821 | |
822 | /* Lets only log errors, we don't want to spam */ |
823 | out: |
824 | if (ret) |
825 | KVM_UV_EVENT(kvm, 3, |
826 | "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x" , |
827 | uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc); |
828 | *rc = uvcb.header.rc; |
829 | *rrc = uvcb.header.rrc; |
830 | vfree(addr: buff_kvm); |
831 | |
832 | return ret; |
833 | } |
834 | |
835 | /** |
836 | * kvm_s390_pv_dump_complete |
837 | * |
838 | * @kvm: pointer to the guest's KVM struct |
839 | * @buff_user: Userspace pointer where we will write the results to |
840 | * @rc: Pointer to where the uvcb return code is stored |
841 | * @rrc: Pointer to where the uvcb return reason code is stored |
842 | * |
843 | * Completes the dumping operation and writes the completion data to |
844 | * user space. |
845 | * |
846 | * Context: kvm->lock needs to be held |
847 | * |
848 | * Return: |
849 | * 0 on success |
850 | * -ENOMEM if allocating the completion buffer fails |
851 | * -EINVAL if the UV call fails, rc and rrc will be set in this case |
852 | * -EFAULT if copying the result to buff_user failed |
853 | */ |
854 | int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user, |
855 | u16 *rc, u16 *rrc) |
856 | { |
857 | struct uv_cb_dump_complete complete = { |
858 | .header.len = sizeof(complete), |
859 | .header.cmd = UVC_CMD_DUMP_COMPLETE, |
860 | .config_handle = kvm_s390_pv_get_handle(kvm), |
861 | }; |
862 | u64 *compl_data; |
863 | int ret; |
864 | |
865 | /* Allocate dump area */ |
866 | compl_data = vzalloc(uv_info.conf_dump_finalize_len); |
867 | if (!compl_data) |
868 | return -ENOMEM; |
869 | complete.dump_area_origin = (u64)compl_data; |
870 | |
871 | ret = uv_call_sched(0, (u64)&complete); |
872 | *rc = complete.header.rc; |
873 | *rrc = complete.header.rrc; |
874 | KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x" , |
875 | complete.header.rc, complete.header.rrc); |
876 | |
877 | if (!ret) { |
878 | /* |
879 | * kvm_s390_pv_dealloc_vm() will also (mem)set |
880 | * this to false on a reboot or other destroy |
881 | * operation for this vm. |
882 | */ |
883 | kvm->arch.pv.dumping = false; |
884 | kvm_s390_vcpu_unblock_all(kvm); |
885 | ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len); |
886 | if (ret) |
887 | ret = -EFAULT; |
888 | } |
889 | vfree(addr: compl_data); |
890 | /* If the UVC returned an error, translate it to -EINVAL */ |
891 | if (ret > 0) |
892 | ret = -EINVAL; |
893 | return ret; |
894 | } |
895 | |