| 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * Copyright (C) 2020 Google LLC |
| 4 | * Author: Quentin Perret <qperret@google.com> |
| 5 | */ |
| 6 | |
| 7 | #include <linux/kvm_host.h> |
| 8 | #include <asm/kvm_emulate.h> |
| 9 | #include <asm/kvm_hyp.h> |
| 10 | #include <asm/kvm_mmu.h> |
| 11 | #include <asm/kvm_pgtable.h> |
| 12 | #include <asm/kvm_pkvm.h> |
| 13 | #include <asm/stage2_pgtable.h> |
| 14 | |
| 15 | #include <hyp/fault.h> |
| 16 | |
| 17 | #include <nvhe/gfp.h> |
| 18 | #include <nvhe/memory.h> |
| 19 | #include <nvhe/mem_protect.h> |
| 20 | #include <nvhe/mm.h> |
| 21 | |
| 22 | #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) |
| 23 | |
| 24 | struct host_mmu host_mmu; |
| 25 | |
| 26 | static struct hyp_pool host_s2_pool; |
| 27 | |
| 28 | static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm); |
| 29 | #define current_vm (*this_cpu_ptr(&__current_vm)) |
| 30 | |
| 31 | static void guest_lock_component(struct pkvm_hyp_vm *vm) |
| 32 | { |
| 33 | hyp_spin_lock(&vm->lock); |
| 34 | current_vm = vm; |
| 35 | } |
| 36 | |
| 37 | static void guest_unlock_component(struct pkvm_hyp_vm *vm) |
| 38 | { |
| 39 | current_vm = NULL; |
| 40 | hyp_spin_unlock(&vm->lock); |
| 41 | } |
| 42 | |
| 43 | static void host_lock_component(void) |
| 44 | { |
| 45 | hyp_spin_lock(&host_mmu.lock); |
| 46 | } |
| 47 | |
| 48 | static void host_unlock_component(void) |
| 49 | { |
| 50 | hyp_spin_unlock(&host_mmu.lock); |
| 51 | } |
| 52 | |
| 53 | static void hyp_lock_component(void) |
| 54 | { |
| 55 | hyp_spin_lock(&pkvm_pgd_lock); |
| 56 | } |
| 57 | |
| 58 | static void hyp_unlock_component(void) |
| 59 | { |
| 60 | hyp_spin_unlock(&pkvm_pgd_lock); |
| 61 | } |
| 62 | |
| 63 | #define for_each_hyp_page(__p, __st, __sz) \ |
| 64 | for (struct hyp_page *__p = hyp_phys_to_page(__st), \ |
| 65 | *__e = __p + ((__sz) >> PAGE_SHIFT); \ |
| 66 | __p < __e; __p++) |
| 67 | |
| 68 | static void *host_s2_zalloc_pages_exact(size_t size) |
| 69 | { |
| 70 | void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size)); |
| 71 | |
| 72 | hyp_split_page(hyp_virt_to_page(addr)); |
| 73 | |
| 74 | /* |
| 75 | * The size of concatenated PGDs is always a power of two of PAGE_SIZE, |
| 76 | * so there should be no need to free any of the tail pages to make the |
| 77 | * allocation exact. |
| 78 | */ |
| 79 | WARN_ON(size != (PAGE_SIZE << get_order(size))); |
| 80 | |
| 81 | return addr; |
| 82 | } |
| 83 | |
| 84 | static void *host_s2_zalloc_page(void *pool) |
| 85 | { |
| 86 | return hyp_alloc_pages(pool, 0); |
| 87 | } |
| 88 | |
| 89 | static void host_s2_get_page(void *addr) |
| 90 | { |
| 91 | hyp_get_page(&host_s2_pool, addr); |
| 92 | } |
| 93 | |
| 94 | static void host_s2_put_page(void *addr) |
| 95 | { |
| 96 | hyp_put_page(&host_s2_pool, addr); |
| 97 | } |
| 98 | |
| 99 | static void host_s2_free_unlinked_table(void *addr, s8 level) |
| 100 | { |
| 101 | kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level); |
| 102 | } |
| 103 | |
| 104 | static int prepare_s2_pool(void *pgt_pool_base) |
| 105 | { |
| 106 | unsigned long nr_pages, pfn; |
| 107 | int ret; |
| 108 | |
| 109 | pfn = hyp_virt_to_pfn(pgt_pool_base); |
| 110 | nr_pages = host_s2_pgtable_pages(); |
| 111 | ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0); |
| 112 | if (ret) |
| 113 | return ret; |
| 114 | |
| 115 | host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) { |
| 116 | .zalloc_pages_exact = host_s2_zalloc_pages_exact, |
| 117 | .zalloc_page = host_s2_zalloc_page, |
| 118 | .free_unlinked_table = host_s2_free_unlinked_table, |
| 119 | .phys_to_virt = hyp_phys_to_virt, |
| 120 | .virt_to_phys = hyp_virt_to_phys, |
| 121 | .page_count = hyp_page_count, |
| 122 | .get_page = host_s2_get_page, |
| 123 | .put_page = host_s2_put_page, |
| 124 | }; |
| 125 | |
| 126 | return 0; |
| 127 | } |
| 128 | |
| 129 | static void prepare_host_vtcr(void) |
| 130 | { |
| 131 | u32 parange, phys_shift; |
| 132 | |
| 133 | /* The host stage 2 is id-mapped, so use parange for T0SZ */ |
| 134 | parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); |
| 135 | phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); |
| 136 | |
| 137 | host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, |
| 138 | id_aa64mmfr1_el1_sys_val, phys_shift); |
| 139 | } |
| 140 | |
| 141 | static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot); |
| 142 | |
| 143 | int kvm_host_prepare_stage2(void *pgt_pool_base) |
| 144 | { |
| 145 | struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; |
| 146 | int ret; |
| 147 | |
| 148 | prepare_host_vtcr(); |
| 149 | hyp_spin_lock_init(&host_mmu.lock); |
| 150 | mmu->arch = &host_mmu.arch; |
| 151 | |
| 152 | ret = prepare_s2_pool(pgt_pool_base); |
| 153 | if (ret) |
| 154 | return ret; |
| 155 | |
| 156 | ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu, |
| 157 | &host_mmu.mm_ops, KVM_HOST_S2_FLAGS, |
| 158 | host_stage2_force_pte_cb); |
| 159 | if (ret) |
| 160 | return ret; |
| 161 | |
| 162 | mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd); |
| 163 | mmu->pgt = &host_mmu.pgt; |
| 164 | atomic64_set(v: &mmu->vmid.id, i: 0); |
| 165 | |
| 166 | return 0; |
| 167 | } |
| 168 | |
| 169 | static void *guest_s2_zalloc_pages_exact(size_t size) |
| 170 | { |
| 171 | void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size)); |
| 172 | |
| 173 | WARN_ON(size != (PAGE_SIZE << get_order(size))); |
| 174 | hyp_split_page(hyp_virt_to_page(addr)); |
| 175 | |
| 176 | return addr; |
| 177 | } |
| 178 | |
| 179 | static void guest_s2_free_pages_exact(void *addr, unsigned long size) |
| 180 | { |
| 181 | u8 order = get_order(size); |
| 182 | unsigned int i; |
| 183 | |
| 184 | for (i = 0; i < (1 << order); i++) |
| 185 | hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE)); |
| 186 | } |
| 187 | |
| 188 | static void *guest_s2_zalloc_page(void *mc) |
| 189 | { |
| 190 | struct hyp_page *p; |
| 191 | void *addr; |
| 192 | |
| 193 | addr = hyp_alloc_pages(¤t_vm->pool, 0); |
| 194 | if (addr) |
| 195 | return addr; |
| 196 | |
| 197 | addr = pop_hyp_memcache(mc, hyp_phys_to_virt); |
| 198 | if (!addr) |
| 199 | return addr; |
| 200 | |
| 201 | memset(addr, 0, PAGE_SIZE); |
| 202 | p = hyp_virt_to_page(addr); |
| 203 | p->refcount = 1; |
| 204 | p->order = 0; |
| 205 | |
| 206 | return addr; |
| 207 | } |
| 208 | |
| 209 | static void guest_s2_get_page(void *addr) |
| 210 | { |
| 211 | hyp_get_page(¤t_vm->pool, addr); |
| 212 | } |
| 213 | |
| 214 | static void guest_s2_put_page(void *addr) |
| 215 | { |
| 216 | hyp_put_page(¤t_vm->pool, addr); |
| 217 | } |
| 218 | |
| 219 | static void __apply_guest_page(void *va, size_t size, |
| 220 | void (*func)(void *addr, size_t size)) |
| 221 | { |
| 222 | size += va - PTR_ALIGN_DOWN(va, PAGE_SIZE); |
| 223 | va = PTR_ALIGN_DOWN(va, PAGE_SIZE); |
| 224 | size = PAGE_ALIGN(size); |
| 225 | |
| 226 | while (size) { |
| 227 | size_t map_size = PAGE_SIZE; |
| 228 | void *map; |
| 229 | |
| 230 | if (IS_ALIGNED((unsigned long)va, PMD_SIZE) && size >= PMD_SIZE) |
| 231 | map = hyp_fixblock_map(__hyp_pa(va), &map_size); |
| 232 | else |
| 233 | map = hyp_fixmap_map(__hyp_pa(va)); |
| 234 | |
| 235 | func(map, map_size); |
| 236 | |
| 237 | if (map_size == PMD_SIZE) |
| 238 | hyp_fixblock_unmap(); |
| 239 | else |
| 240 | hyp_fixmap_unmap(); |
| 241 | |
| 242 | size -= map_size; |
| 243 | va += map_size; |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | static void clean_dcache_guest_page(void *va, size_t size) |
| 248 | { |
| 249 | __apply_guest_page(va, size, __clean_dcache_guest_page); |
| 250 | } |
| 251 | |
| 252 | static void invalidate_icache_guest_page(void *va, size_t size) |
| 253 | { |
| 254 | __apply_guest_page(va, size, __invalidate_icache_guest_page); |
| 255 | } |
| 256 | |
| 257 | int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) |
| 258 | { |
| 259 | struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu; |
| 260 | unsigned long nr_pages; |
| 261 | int ret; |
| 262 | |
| 263 | nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT; |
| 264 | ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); |
| 265 | if (ret) |
| 266 | return ret; |
| 267 | |
| 268 | hyp_spin_lock_init(&vm->lock); |
| 269 | vm->mm_ops = (struct kvm_pgtable_mm_ops) { |
| 270 | .zalloc_pages_exact = guest_s2_zalloc_pages_exact, |
| 271 | .free_pages_exact = guest_s2_free_pages_exact, |
| 272 | .zalloc_page = guest_s2_zalloc_page, |
| 273 | .phys_to_virt = hyp_phys_to_virt, |
| 274 | .virt_to_phys = hyp_virt_to_phys, |
| 275 | .page_count = hyp_page_count, |
| 276 | .get_page = guest_s2_get_page, |
| 277 | .put_page = guest_s2_put_page, |
| 278 | .dcache_clean_inval_poc = clean_dcache_guest_page, |
| 279 | .icache_inval_pou = invalidate_icache_guest_page, |
| 280 | }; |
| 281 | |
| 282 | guest_lock_component(vm); |
| 283 | ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, NULL); |
| 284 | guest_unlock_component(vm); |
| 285 | if (ret) |
| 286 | return ret; |
| 287 | |
| 288 | vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd); |
| 289 | |
| 290 | return 0; |
| 291 | } |
| 292 | |
| 293 | void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) |
| 294 | { |
| 295 | struct hyp_page *page; |
| 296 | void *addr; |
| 297 | |
| 298 | /* Dump all pgtable pages in the hyp_pool */ |
| 299 | guest_lock_component(vm); |
| 300 | kvm_pgtable_stage2_destroy(&vm->pgt); |
| 301 | vm->kvm.arch.mmu.pgd_phys = 0ULL; |
| 302 | guest_unlock_component(vm); |
| 303 | |
| 304 | /* Drain the hyp_pool into the memcache */ |
| 305 | addr = hyp_alloc_pages(&vm->pool, 0); |
| 306 | while (addr) { |
| 307 | page = hyp_virt_to_page(addr); |
| 308 | page->refcount = 0; |
| 309 | page->order = 0; |
| 310 | push_hyp_memcache(mc, addr, hyp_virt_to_phys); |
| 311 | WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1)); |
| 312 | addr = hyp_alloc_pages(&vm->pool, 0); |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | int __pkvm_prot_finalize(void) |
| 317 | { |
| 318 | struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; |
| 319 | struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); |
| 320 | |
| 321 | if (params->hcr_el2 & HCR_VM) |
| 322 | return -EPERM; |
| 323 | |
| 324 | params->vttbr = kvm_get_vttbr(mmu); |
| 325 | params->vtcr = mmu->vtcr; |
| 326 | params->hcr_el2 |= HCR_VM; |
| 327 | |
| 328 | /* |
| 329 | * The CMO below not only cleans the updated params to the |
| 330 | * PoC, but also provides the DSB that ensures ongoing |
| 331 | * page-table walks that have started before we trapped to EL2 |
| 332 | * have completed. |
| 333 | */ |
| 334 | kvm_flush_dcache_to_poc(params, sizeof(*params)); |
| 335 | |
| 336 | write_sysreg_hcr(params->hcr_el2); |
| 337 | __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); |
| 338 | |
| 339 | /* |
| 340 | * Make sure to have an ISB before the TLB maintenance below but only |
| 341 | * when __load_stage2() doesn't include one already. |
| 342 | */ |
| 343 | asm(ALTERNATIVE("isb" , "nop" , ARM64_WORKAROUND_SPECULATIVE_AT)); |
| 344 | |
| 345 | /* Invalidate stale HCR bits that may be cached in TLBs */ |
| 346 | __tlbi(vmalls12e1); |
| 347 | dsb(nsh); |
| 348 | isb(); |
| 349 | |
| 350 | return 0; |
| 351 | } |
| 352 | |
| 353 | static int host_stage2_unmap_dev_all(void) |
| 354 | { |
| 355 | struct kvm_pgtable *pgt = &host_mmu.pgt; |
| 356 | struct memblock_region *reg; |
| 357 | u64 addr = 0; |
| 358 | int i, ret; |
| 359 | |
| 360 | /* Unmap all non-memory regions to recycle the pages */ |
| 361 | for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) { |
| 362 | reg = &hyp_memory[i]; |
| 363 | ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr); |
| 364 | if (ret) |
| 365 | return ret; |
| 366 | } |
| 367 | return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); |
| 368 | } |
| 369 | |
| 370 | /* |
| 371 | * Ensure the PFN range is contained within PA-range. |
| 372 | * |
| 373 | * This check is also robust to overflows and is therefore a requirement before |
| 374 | * using a pfn/nr_pages pair from an untrusted source. |
| 375 | */ |
| 376 | static bool pfn_range_is_valid(u64 pfn, u64 nr_pages) |
| 377 | { |
| 378 | u64 limit = BIT(kvm_phys_shift(&host_mmu.arch.mmu) - PAGE_SHIFT); |
| 379 | |
| 380 | return pfn < limit && ((limit - pfn) >= nr_pages); |
| 381 | } |
| 382 | |
| 383 | struct kvm_mem_range { |
| 384 | u64 start; |
| 385 | u64 end; |
| 386 | }; |
| 387 | |
| 388 | static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) |
| 389 | { |
| 390 | int cur, left = 0, right = hyp_memblock_nr; |
| 391 | struct memblock_region *reg; |
| 392 | phys_addr_t end; |
| 393 | |
| 394 | range->start = 0; |
| 395 | range->end = ULONG_MAX; |
| 396 | |
| 397 | /* The list of memblock regions is sorted, binary search it */ |
| 398 | while (left < right) { |
| 399 | cur = (left + right) >> 1; |
| 400 | reg = &hyp_memory[cur]; |
| 401 | end = reg->base + reg->size; |
| 402 | if (addr < reg->base) { |
| 403 | right = cur; |
| 404 | range->end = reg->base; |
| 405 | } else if (addr >= end) { |
| 406 | left = cur + 1; |
| 407 | range->start = end; |
| 408 | } else { |
| 409 | range->start = reg->base; |
| 410 | range->end = end; |
| 411 | return reg; |
| 412 | } |
| 413 | } |
| 414 | |
| 415 | return NULL; |
| 416 | } |
| 417 | |
| 418 | bool addr_is_memory(phys_addr_t phys) |
| 419 | { |
| 420 | struct kvm_mem_range range; |
| 421 | |
| 422 | return !!find_mem_range(addr: phys, range: &range); |
| 423 | } |
| 424 | |
| 425 | static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) |
| 426 | { |
| 427 | return range->start <= addr && addr < range->end; |
| 428 | } |
| 429 | |
| 430 | static int check_range_allowed_memory(u64 start, u64 end) |
| 431 | { |
| 432 | struct memblock_region *reg; |
| 433 | struct kvm_mem_range range; |
| 434 | |
| 435 | /* |
| 436 | * Callers can't check the state of a range that overlaps memory and |
| 437 | * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range. |
| 438 | */ |
| 439 | reg = find_mem_range(addr: start, range: &range); |
| 440 | if (!is_in_mem_range(addr: end - 1, range: &range)) |
| 441 | return -EINVAL; |
| 442 | |
| 443 | if (!reg || reg->flags & MEMBLOCK_NOMAP) |
| 444 | return -EPERM; |
| 445 | |
| 446 | return 0; |
| 447 | } |
| 448 | |
| 449 | static bool range_is_memory(u64 start, u64 end) |
| 450 | { |
| 451 | struct kvm_mem_range r; |
| 452 | |
| 453 | if (!find_mem_range(addr: start, range: &r)) |
| 454 | return false; |
| 455 | |
| 456 | return is_in_mem_range(addr: end - 1, range: &r); |
| 457 | } |
| 458 | |
| 459 | static inline int __host_stage2_idmap(u64 start, u64 end, |
| 460 | enum kvm_pgtable_prot prot) |
| 461 | { |
| 462 | return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start, |
| 463 | prot, &host_s2_pool, 0); |
| 464 | } |
| 465 | |
| 466 | /* |
| 467 | * The pool has been provided with enough pages to cover all of memory with |
| 468 | * page granularity, but it is difficult to know how much of the MMIO range |
| 469 | * we will need to cover upfront, so we may need to 'recycle' the pages if we |
| 470 | * run out. |
| 471 | */ |
| 472 | #define host_stage2_try(fn, ...) \ |
| 473 | ({ \ |
| 474 | int __ret; \ |
| 475 | hyp_assert_lock_held(&host_mmu.lock); \ |
| 476 | __ret = fn(__VA_ARGS__); \ |
| 477 | if (__ret == -ENOMEM) { \ |
| 478 | __ret = host_stage2_unmap_dev_all(); \ |
| 479 | if (!__ret) \ |
| 480 | __ret = fn(__VA_ARGS__); \ |
| 481 | } \ |
| 482 | __ret; \ |
| 483 | }) |
| 484 | |
| 485 | static inline bool range_included(struct kvm_mem_range *child, |
| 486 | struct kvm_mem_range *parent) |
| 487 | { |
| 488 | return parent->start <= child->start && child->end <= parent->end; |
| 489 | } |
| 490 | |
| 491 | static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) |
| 492 | { |
| 493 | struct kvm_mem_range cur; |
| 494 | kvm_pte_t pte; |
| 495 | u64 granule; |
| 496 | s8 level; |
| 497 | int ret; |
| 498 | |
| 499 | hyp_assert_lock_held(&host_mmu.lock); |
| 500 | ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level); |
| 501 | if (ret) |
| 502 | return ret; |
| 503 | |
| 504 | if (kvm_pte_valid(pte)) |
| 505 | return -EAGAIN; |
| 506 | |
| 507 | if (pte) { |
| 508 | WARN_ON(addr_is_memory(addr) && |
| 509 | get_host_state(hyp_phys_to_page(addr)) != PKVM_NOPAGE); |
| 510 | return -EPERM; |
| 511 | } |
| 512 | |
| 513 | for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) { |
| 514 | if (!kvm_level_supports_block_mapping(level)) |
| 515 | continue; |
| 516 | granule = kvm_granule_size(level); |
| 517 | cur.start = ALIGN_DOWN(addr, granule); |
| 518 | cur.end = cur.start + granule; |
| 519 | if (!range_included(&cur, range)) |
| 520 | continue; |
| 521 | *range = cur; |
| 522 | return 0; |
| 523 | } |
| 524 | |
| 525 | WARN_ON(1); |
| 526 | |
| 527 | return -EINVAL; |
| 528 | } |
| 529 | |
| 530 | int host_stage2_idmap_locked(phys_addr_t addr, u64 size, |
| 531 | enum kvm_pgtable_prot prot) |
| 532 | { |
| 533 | return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); |
| 534 | } |
| 535 | |
| 536 | static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state) |
| 537 | { |
| 538 | for_each_hyp_page(page, addr, size) |
| 539 | set_host_state(page, state); |
| 540 | } |
| 541 | |
| 542 | int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) |
| 543 | { |
| 544 | int ret; |
| 545 | |
| 546 | if (!range_is_memory(start: addr, end: addr + size)) |
| 547 | return -EPERM; |
| 548 | |
| 549 | ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, |
| 550 | addr, size, &host_s2_pool, owner_id); |
| 551 | if (ret) |
| 552 | return ret; |
| 553 | |
| 554 | /* Don't forget to update the vmemmap tracking for the host */ |
| 555 | if (owner_id == PKVM_ID_HOST) |
| 556 | __host_update_page_state(addr, size, PKVM_PAGE_OWNED); |
| 557 | else |
| 558 | __host_update_page_state(addr, size, PKVM_NOPAGE); |
| 559 | |
| 560 | return 0; |
| 561 | } |
| 562 | |
| 563 | static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) |
| 564 | { |
| 565 | /* |
| 566 | * Block mappings must be used with care in the host stage-2 as a |
| 567 | * kvm_pgtable_stage2_map() operation targeting a page in the range of |
| 568 | * an existing block will delete the block under the assumption that |
| 569 | * mappings in the rest of the block range can always be rebuilt lazily. |
| 570 | * That assumption is correct for the host stage-2 with RWX mappings |
| 571 | * targeting memory or RW mappings targeting MMIO ranges (see |
| 572 | * host_stage2_idmap() below which implements some of the host memory |
| 573 | * abort logic). However, this is not safe for any other mappings where |
| 574 | * the host stage-2 page-table is in fact the only place where this |
| 575 | * state is stored. In all those cases, it is safer to use page-level |
| 576 | * mappings, hence avoiding to lose the state because of side-effects in |
| 577 | * kvm_pgtable_stage2_map(). |
| 578 | */ |
| 579 | if (range_is_memory(addr, end)) |
| 580 | return prot != PKVM_HOST_MEM_PROT; |
| 581 | else |
| 582 | return prot != PKVM_HOST_MMIO_PROT; |
| 583 | } |
| 584 | |
| 585 | static int host_stage2_idmap(u64 addr) |
| 586 | { |
| 587 | struct kvm_mem_range range; |
| 588 | bool is_memory = !!find_mem_range(addr, range: &range); |
| 589 | enum kvm_pgtable_prot prot; |
| 590 | int ret; |
| 591 | |
| 592 | prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT; |
| 593 | |
| 594 | host_lock_component(); |
| 595 | ret = host_stage2_adjust_range(addr, range: &range); |
| 596 | if (ret) |
| 597 | goto unlock; |
| 598 | |
| 599 | ret = host_stage2_idmap_locked(addr: range.start, size: range.end - range.start, prot: prot); |
| 600 | unlock: |
| 601 | host_unlock_component(); |
| 602 | |
| 603 | return ret; |
| 604 | } |
| 605 | |
| 606 | void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) |
| 607 | { |
| 608 | struct kvm_vcpu_fault_info fault; |
| 609 | u64 esr, addr; |
| 610 | int ret = 0; |
| 611 | |
| 612 | esr = read_sysreg_el2(SYS_ESR); |
| 613 | if (!__get_fault_info(esr, &fault)) { |
| 614 | /* |
| 615 | * We've presumably raced with a page-table change which caused |
| 616 | * AT to fail, try again. |
| 617 | */ |
| 618 | return; |
| 619 | } |
| 620 | |
| 621 | |
| 622 | /* |
| 623 | * Yikes, we couldn't resolve the fault IPA. This should reinject an |
| 624 | * abort into the host when we figure out how to do that. |
| 625 | */ |
| 626 | BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS)); |
| 627 | addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12; |
| 628 | |
| 629 | ret = host_stage2_idmap(addr); |
| 630 | BUG_ON(ret && ret != -EAGAIN); |
| 631 | } |
| 632 | |
| 633 | struct check_walk_data { |
| 634 | enum pkvm_page_state desired; |
| 635 | enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr); |
| 636 | }; |
| 637 | |
| 638 | static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, |
| 639 | enum kvm_pgtable_walk_flags visit) |
| 640 | { |
| 641 | struct check_walk_data *d = ctx->arg; |
| 642 | |
| 643 | return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM; |
| 644 | } |
| 645 | |
| 646 | static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, |
| 647 | struct check_walk_data *data) |
| 648 | { |
| 649 | struct kvm_pgtable_walker walker = { |
| 650 | .cb = __check_page_state_visitor, |
| 651 | .arg = data, |
| 652 | .flags = KVM_PGTABLE_WALK_LEAF, |
| 653 | }; |
| 654 | |
| 655 | return kvm_pgtable_walk(pgt, addr, size, &walker); |
| 656 | } |
| 657 | |
| 658 | static int __host_check_page_state_range(u64 addr, u64 size, |
| 659 | enum pkvm_page_state state) |
| 660 | { |
| 661 | int ret; |
| 662 | |
| 663 | ret = check_range_allowed_memory(start: addr, end: addr + size); |
| 664 | if (ret) |
| 665 | return ret; |
| 666 | |
| 667 | hyp_assert_lock_held(&host_mmu.lock); |
| 668 | |
| 669 | for_each_hyp_page(page, addr, size) { |
| 670 | if (get_host_state(page) != state) |
| 671 | return -EPERM; |
| 672 | } |
| 673 | |
| 674 | return 0; |
| 675 | } |
| 676 | |
| 677 | static int __host_set_page_state_range(u64 addr, u64 size, |
| 678 | enum pkvm_page_state state) |
| 679 | { |
| 680 | if (get_host_state(hyp_phys_to_page(addr)) == PKVM_NOPAGE) { |
| 681 | int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT); |
| 682 | |
| 683 | if (ret) |
| 684 | return ret; |
| 685 | } |
| 686 | |
| 687 | __host_update_page_state(addr, size, state: state); |
| 688 | |
| 689 | return 0; |
| 690 | } |
| 691 | |
| 692 | static void __hyp_set_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state) |
| 693 | { |
| 694 | for_each_hyp_page(page, phys, size) |
| 695 | set_hyp_state(page, state); |
| 696 | } |
| 697 | |
| 698 | static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state) |
| 699 | { |
| 700 | for_each_hyp_page(page, phys, size) { |
| 701 | if (get_hyp_state(page) != state) |
| 702 | return -EPERM; |
| 703 | } |
| 704 | |
| 705 | return 0; |
| 706 | } |
| 707 | |
| 708 | static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr) |
| 709 | { |
| 710 | if (!kvm_pte_valid(pte)) |
| 711 | return PKVM_NOPAGE; |
| 712 | |
| 713 | return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); |
| 714 | } |
| 715 | |
| 716 | static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr, |
| 717 | u64 size, enum pkvm_page_state state) |
| 718 | { |
| 719 | struct check_walk_data d = { |
| 720 | .desired = state, |
| 721 | .get_page_state = guest_get_page_state, |
| 722 | }; |
| 723 | |
| 724 | hyp_assert_lock_held(&vm->lock); |
| 725 | return check_page_state_range(pgt: &vm->pgt, addr, size, data: &d); |
| 726 | } |
| 727 | |
| 728 | int __pkvm_host_share_hyp(u64 pfn) |
| 729 | { |
| 730 | u64 phys = hyp_pfn_to_phys(pfn); |
| 731 | u64 size = PAGE_SIZE; |
| 732 | int ret; |
| 733 | |
| 734 | host_lock_component(); |
| 735 | hyp_lock_component(); |
| 736 | |
| 737 | ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); |
| 738 | if (ret) |
| 739 | goto unlock; |
| 740 | ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE); |
| 741 | if (ret) |
| 742 | goto unlock; |
| 743 | |
| 744 | __hyp_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED); |
| 745 | WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED)); |
| 746 | |
| 747 | unlock: |
| 748 | hyp_unlock_component(); |
| 749 | host_unlock_component(); |
| 750 | |
| 751 | return ret; |
| 752 | } |
| 753 | |
| 754 | int __pkvm_host_unshare_hyp(u64 pfn) |
| 755 | { |
| 756 | u64 phys = hyp_pfn_to_phys(pfn); |
| 757 | u64 virt = (u64)__hyp_va(phys); |
| 758 | u64 size = PAGE_SIZE; |
| 759 | int ret; |
| 760 | |
| 761 | host_lock_component(); |
| 762 | hyp_lock_component(); |
| 763 | |
| 764 | ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); |
| 765 | if (ret) |
| 766 | goto unlock; |
| 767 | ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED); |
| 768 | if (ret) |
| 769 | goto unlock; |
| 770 | if (hyp_page_count((void *)virt)) { |
| 771 | ret = -EBUSY; |
| 772 | goto unlock; |
| 773 | } |
| 774 | |
| 775 | __hyp_set_page_state_range(phys, size, PKVM_NOPAGE); |
| 776 | WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED)); |
| 777 | |
| 778 | unlock: |
| 779 | hyp_unlock_component(); |
| 780 | host_unlock_component(); |
| 781 | |
| 782 | return ret; |
| 783 | } |
| 784 | |
| 785 | int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) |
| 786 | { |
| 787 | u64 phys = hyp_pfn_to_phys(pfn); |
| 788 | u64 size = PAGE_SIZE * nr_pages; |
| 789 | void *virt = __hyp_va(phys); |
| 790 | int ret; |
| 791 | |
| 792 | if (!pfn_range_is_valid(pfn, nr_pages)) |
| 793 | return -EINVAL; |
| 794 | |
| 795 | host_lock_component(); |
| 796 | hyp_lock_component(); |
| 797 | |
| 798 | ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); |
| 799 | if (ret) |
| 800 | goto unlock; |
| 801 | ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE); |
| 802 | if (ret) |
| 803 | goto unlock; |
| 804 | |
| 805 | __hyp_set_page_state_range(phys, size, PKVM_PAGE_OWNED); |
| 806 | WARN_ON(pkvm_create_mappings_locked(virt, virt + size, PAGE_HYP)); |
| 807 | WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP)); |
| 808 | |
| 809 | unlock: |
| 810 | hyp_unlock_component(); |
| 811 | host_unlock_component(); |
| 812 | |
| 813 | return ret; |
| 814 | } |
| 815 | |
| 816 | int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) |
| 817 | { |
| 818 | u64 phys = hyp_pfn_to_phys(pfn); |
| 819 | u64 size = PAGE_SIZE * nr_pages; |
| 820 | u64 virt = (u64)__hyp_va(phys); |
| 821 | int ret; |
| 822 | |
| 823 | if (!pfn_range_is_valid(pfn, nr_pages)) |
| 824 | return -EINVAL; |
| 825 | |
| 826 | host_lock_component(); |
| 827 | hyp_lock_component(); |
| 828 | |
| 829 | ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_OWNED); |
| 830 | if (ret) |
| 831 | goto unlock; |
| 832 | ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE); |
| 833 | if (ret) |
| 834 | goto unlock; |
| 835 | |
| 836 | __hyp_set_page_state_range(phys, size, PKVM_NOPAGE); |
| 837 | WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); |
| 838 | WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST)); |
| 839 | |
| 840 | unlock: |
| 841 | hyp_unlock_component(); |
| 842 | host_unlock_component(); |
| 843 | |
| 844 | return ret; |
| 845 | } |
| 846 | |
| 847 | int hyp_pin_shared_mem(void *from, void *to) |
| 848 | { |
| 849 | u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); |
| 850 | u64 end = PAGE_ALIGN((u64)to); |
| 851 | u64 phys = __hyp_pa(start); |
| 852 | u64 size = end - start; |
| 853 | struct hyp_page *p; |
| 854 | int ret; |
| 855 | |
| 856 | host_lock_component(); |
| 857 | hyp_lock_component(); |
| 858 | |
| 859 | ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); |
| 860 | if (ret) |
| 861 | goto unlock; |
| 862 | |
| 863 | ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED); |
| 864 | if (ret) |
| 865 | goto unlock; |
| 866 | |
| 867 | for (cur = start; cur < end; cur += PAGE_SIZE) { |
| 868 | p = hyp_virt_to_page(cur); |
| 869 | hyp_page_ref_inc(p); |
| 870 | if (p->refcount == 1) |
| 871 | WARN_ON(pkvm_create_mappings_locked((void *)cur, |
| 872 | (void *)cur + PAGE_SIZE, |
| 873 | PAGE_HYP)); |
| 874 | } |
| 875 | |
| 876 | unlock: |
| 877 | hyp_unlock_component(); |
| 878 | host_unlock_component(); |
| 879 | |
| 880 | return ret; |
| 881 | } |
| 882 | |
| 883 | void hyp_unpin_shared_mem(void *from, void *to) |
| 884 | { |
| 885 | u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); |
| 886 | u64 end = PAGE_ALIGN((u64)to); |
| 887 | struct hyp_page *p; |
| 888 | |
| 889 | host_lock_component(); |
| 890 | hyp_lock_component(); |
| 891 | |
| 892 | for (cur = start; cur < end; cur += PAGE_SIZE) { |
| 893 | p = hyp_virt_to_page(cur); |
| 894 | if (p->refcount == 1) |
| 895 | WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, cur, PAGE_SIZE) != PAGE_SIZE); |
| 896 | hyp_page_ref_dec(p); |
| 897 | } |
| 898 | |
| 899 | hyp_unlock_component(); |
| 900 | host_unlock_component(); |
| 901 | } |
| 902 | |
| 903 | int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages) |
| 904 | { |
| 905 | u64 phys = hyp_pfn_to_phys(pfn); |
| 906 | u64 size = PAGE_SIZE * nr_pages; |
| 907 | int ret; |
| 908 | |
| 909 | if (!pfn_range_is_valid(pfn, nr_pages)) |
| 910 | return -EINVAL; |
| 911 | |
| 912 | host_lock_component(); |
| 913 | ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); |
| 914 | if (!ret) |
| 915 | ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); |
| 916 | host_unlock_component(); |
| 917 | |
| 918 | return ret; |
| 919 | } |
| 920 | |
| 921 | int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages) |
| 922 | { |
| 923 | u64 phys = hyp_pfn_to_phys(pfn); |
| 924 | u64 size = PAGE_SIZE * nr_pages; |
| 925 | int ret; |
| 926 | |
| 927 | if (!pfn_range_is_valid(pfn, nr_pages)) |
| 928 | return -EINVAL; |
| 929 | |
| 930 | host_lock_component(); |
| 931 | ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); |
| 932 | if (!ret) |
| 933 | ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED); |
| 934 | host_unlock_component(); |
| 935 | |
| 936 | return ret; |
| 937 | } |
| 938 | |
| 939 | static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *size) |
| 940 | { |
| 941 | size_t block_size; |
| 942 | |
| 943 | if (nr_pages == 1) { |
| 944 | *size = PAGE_SIZE; |
| 945 | return 0; |
| 946 | } |
| 947 | |
| 948 | /* We solely support second to last level huge mapping */ |
| 949 | block_size = kvm_granule_size(KVM_PGTABLE_LAST_LEVEL - 1); |
| 950 | |
| 951 | if (nr_pages != block_size >> PAGE_SHIFT) |
| 952 | return -EINVAL; |
| 953 | |
| 954 | if (!IS_ALIGNED(phys | ipa, block_size)) |
| 955 | return -EINVAL; |
| 956 | |
| 957 | *size = block_size; |
| 958 | return 0; |
| 959 | } |
| 960 | |
| 961 | int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu, |
| 962 | enum kvm_pgtable_prot prot) |
| 963 | { |
| 964 | struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); |
| 965 | u64 phys = hyp_pfn_to_phys(pfn); |
| 966 | u64 ipa = hyp_pfn_to_phys(gfn); |
| 967 | u64 size; |
| 968 | int ret; |
| 969 | |
| 970 | if (prot & ~KVM_PGTABLE_PROT_RWX) |
| 971 | return -EINVAL; |
| 972 | |
| 973 | if (!pfn_range_is_valid(pfn, nr_pages)) |
| 974 | return -EINVAL; |
| 975 | |
| 976 | ret = __guest_check_transition_size(phys, ipa, nr_pages, size: &size); |
| 977 | if (ret) |
| 978 | return ret; |
| 979 | |
| 980 | ret = check_range_allowed_memory(start: phys, end: phys + size); |
| 981 | if (ret) |
| 982 | return ret; |
| 983 | |
| 984 | host_lock_component(); |
| 985 | guest_lock_component(vm); |
| 986 | |
| 987 | ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE); |
| 988 | if (ret) |
| 989 | goto unlock; |
| 990 | |
| 991 | for_each_hyp_page(page, phys, size) { |
| 992 | switch (get_host_state(page)) { |
| 993 | case PKVM_PAGE_OWNED: |
| 994 | continue; |
| 995 | case PKVM_PAGE_SHARED_OWNED: |
| 996 | if (page->host_share_guest_count == U32_MAX) { |
| 997 | ret = -EBUSY; |
| 998 | goto unlock; |
| 999 | } |
| 1000 | |
| 1001 | /* Only host to np-guest multi-sharing is tolerated */ |
| 1002 | if (page->host_share_guest_count) |
| 1003 | continue; |
| 1004 | |
| 1005 | fallthrough; |
| 1006 | default: |
| 1007 | ret = -EPERM; |
| 1008 | goto unlock; |
| 1009 | } |
| 1010 | } |
| 1011 | |
| 1012 | for_each_hyp_page(page, phys, size) { |
| 1013 | set_host_state(page, PKVM_PAGE_SHARED_OWNED); |
| 1014 | page->host_share_guest_count++; |
| 1015 | } |
| 1016 | |
| 1017 | WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys, |
| 1018 | pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED), |
| 1019 | &vcpu->vcpu.arch.pkvm_memcache, 0)); |
| 1020 | |
| 1021 | unlock: |
| 1022 | guest_unlock_component(vm); |
| 1023 | host_unlock_component(); |
| 1024 | |
| 1025 | return ret; |
| 1026 | } |
| 1027 | |
| 1028 | static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa, u64 size) |
| 1029 | { |
| 1030 | enum pkvm_page_state state; |
| 1031 | kvm_pte_t pte; |
| 1032 | u64 phys; |
| 1033 | s8 level; |
| 1034 | int ret; |
| 1035 | |
| 1036 | ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level); |
| 1037 | if (ret) |
| 1038 | return ret; |
| 1039 | if (!kvm_pte_valid(pte)) |
| 1040 | return -ENOENT; |
| 1041 | if (size && kvm_granule_size(level) != size) |
| 1042 | return -E2BIG; |
| 1043 | |
| 1044 | if (!size) |
| 1045 | size = kvm_granule_size(level); |
| 1046 | |
| 1047 | state = guest_get_page_state(pte, ipa); |
| 1048 | if (state != PKVM_PAGE_SHARED_BORROWED) |
| 1049 | return -EPERM; |
| 1050 | |
| 1051 | phys = kvm_pte_to_phys(pte); |
| 1052 | ret = check_range_allowed_memory(start: phys, end: phys + size); |
| 1053 | if (WARN_ON(ret)) |
| 1054 | return ret; |
| 1055 | |
| 1056 | for_each_hyp_page(page, phys, size) { |
| 1057 | if (get_host_state(page) != PKVM_PAGE_SHARED_OWNED) |
| 1058 | return -EPERM; |
| 1059 | if (WARN_ON(!page->host_share_guest_count)) |
| 1060 | return -EINVAL; |
| 1061 | } |
| 1062 | |
| 1063 | *__phys = phys; |
| 1064 | |
| 1065 | return 0; |
| 1066 | } |
| 1067 | |
| 1068 | int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm) |
| 1069 | { |
| 1070 | u64 ipa = hyp_pfn_to_phys(gfn); |
| 1071 | u64 size, phys; |
| 1072 | int ret; |
| 1073 | |
| 1074 | ret = __guest_check_transition_size(phys: 0, ipa, nr_pages, size: &size); |
| 1075 | if (ret) |
| 1076 | return ret; |
| 1077 | |
| 1078 | host_lock_component(); |
| 1079 | guest_lock_component(vm); |
| 1080 | |
| 1081 | ret = __check_host_shared_guest(vm, phys: &phys, ipa, size); |
| 1082 | if (ret) |
| 1083 | goto unlock; |
| 1084 | |
| 1085 | ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, size); |
| 1086 | if (ret) |
| 1087 | goto unlock; |
| 1088 | |
| 1089 | for_each_hyp_page(page, phys, size) { |
| 1090 | /* __check_host_shared_guest() protects against underflow */ |
| 1091 | page->host_share_guest_count--; |
| 1092 | if (!page->host_share_guest_count) |
| 1093 | set_host_state(page, PKVM_PAGE_OWNED); |
| 1094 | } |
| 1095 | |
| 1096 | unlock: |
| 1097 | guest_unlock_component(vm); |
| 1098 | host_unlock_component(); |
| 1099 | |
| 1100 | return ret; |
| 1101 | } |
| 1102 | |
| 1103 | static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa, u64 size) |
| 1104 | { |
| 1105 | u64 phys; |
| 1106 | int ret; |
| 1107 | |
| 1108 | if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) |
| 1109 | return; |
| 1110 | |
| 1111 | host_lock_component(); |
| 1112 | guest_lock_component(vm); |
| 1113 | |
| 1114 | ret = __check_host_shared_guest(vm, phys: &phys, ipa, size); |
| 1115 | |
| 1116 | guest_unlock_component(vm); |
| 1117 | host_unlock_component(); |
| 1118 | |
| 1119 | WARN_ON(ret && ret != -ENOENT); |
| 1120 | } |
| 1121 | |
| 1122 | int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot) |
| 1123 | { |
| 1124 | struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); |
| 1125 | u64 ipa = hyp_pfn_to_phys(gfn); |
| 1126 | int ret; |
| 1127 | |
| 1128 | if (pkvm_hyp_vm_is_protected(vm)) |
| 1129 | return -EPERM; |
| 1130 | |
| 1131 | if (prot & ~KVM_PGTABLE_PROT_RWX) |
| 1132 | return -EINVAL; |
| 1133 | |
| 1134 | assert_host_shared_guest(vm, ipa, size: 0); |
| 1135 | guest_lock_component(vm); |
| 1136 | ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); |
| 1137 | guest_unlock_component(vm); |
| 1138 | |
| 1139 | return ret; |
| 1140 | } |
| 1141 | |
| 1142 | int __pkvm_host_wrprotect_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm) |
| 1143 | { |
| 1144 | u64 size, ipa = hyp_pfn_to_phys(gfn); |
| 1145 | int ret; |
| 1146 | |
| 1147 | if (pkvm_hyp_vm_is_protected(vm)) |
| 1148 | return -EPERM; |
| 1149 | |
| 1150 | ret = __guest_check_transition_size(phys: 0, ipa, nr_pages, size: &size); |
| 1151 | if (ret) |
| 1152 | return ret; |
| 1153 | |
| 1154 | assert_host_shared_guest(vm, ipa, size); |
| 1155 | guest_lock_component(vm); |
| 1156 | ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, size); |
| 1157 | guest_unlock_component(vm); |
| 1158 | |
| 1159 | return ret; |
| 1160 | } |
| 1161 | |
| 1162 | int __pkvm_host_test_clear_young_guest(u64 gfn, u64 nr_pages, bool mkold, struct pkvm_hyp_vm *vm) |
| 1163 | { |
| 1164 | u64 size, ipa = hyp_pfn_to_phys(gfn); |
| 1165 | int ret; |
| 1166 | |
| 1167 | if (pkvm_hyp_vm_is_protected(vm)) |
| 1168 | return -EPERM; |
| 1169 | |
| 1170 | ret = __guest_check_transition_size(phys: 0, ipa, nr_pages, size: &size); |
| 1171 | if (ret) |
| 1172 | return ret; |
| 1173 | |
| 1174 | assert_host_shared_guest(vm, ipa, size); |
| 1175 | guest_lock_component(vm); |
| 1176 | ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, size, mkold); |
| 1177 | guest_unlock_component(vm); |
| 1178 | |
| 1179 | return ret; |
| 1180 | } |
| 1181 | |
| 1182 | int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) |
| 1183 | { |
| 1184 | struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); |
| 1185 | u64 ipa = hyp_pfn_to_phys(gfn); |
| 1186 | |
| 1187 | if (pkvm_hyp_vm_is_protected(vm)) |
| 1188 | return -EPERM; |
| 1189 | |
| 1190 | assert_host_shared_guest(vm, ipa, size: 0); |
| 1191 | guest_lock_component(vm); |
| 1192 | kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); |
| 1193 | guest_unlock_component(vm); |
| 1194 | |
| 1195 | return 0; |
| 1196 | } |
| 1197 | |
| 1198 | #ifdef CONFIG_NVHE_EL2_DEBUG |
| 1199 | struct pkvm_expected_state { |
| 1200 | enum pkvm_page_state host; |
| 1201 | enum pkvm_page_state hyp; |
| 1202 | enum pkvm_page_state guest[2]; /* [ gfn, gfn + 1 ] */ |
| 1203 | }; |
| 1204 | |
| 1205 | static struct pkvm_expected_state selftest_state; |
| 1206 | static struct hyp_page *selftest_page; |
| 1207 | |
| 1208 | static struct pkvm_hyp_vm selftest_vm = { |
| 1209 | .kvm = { |
| 1210 | .arch = { |
| 1211 | .mmu = { |
| 1212 | .arch = &selftest_vm.kvm.arch, |
| 1213 | .pgt = &selftest_vm.pgt, |
| 1214 | }, |
| 1215 | }, |
| 1216 | }, |
| 1217 | }; |
| 1218 | |
| 1219 | static struct pkvm_hyp_vcpu selftest_vcpu = { |
| 1220 | .vcpu = { |
| 1221 | .arch = { |
| 1222 | .hw_mmu = &selftest_vm.kvm.arch.mmu, |
| 1223 | }, |
| 1224 | .kvm = &selftest_vm.kvm, |
| 1225 | }, |
| 1226 | }; |
| 1227 | |
| 1228 | static void init_selftest_vm(void *virt) |
| 1229 | { |
| 1230 | struct hyp_page *p = hyp_virt_to_page(virt); |
| 1231 | int i; |
| 1232 | |
| 1233 | selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; |
| 1234 | WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt)); |
| 1235 | |
| 1236 | for (i = 0; i < pkvm_selftest_pages(); i++) { |
| 1237 | if (p[i].refcount) |
| 1238 | continue; |
| 1239 | p[i].refcount = 1; |
| 1240 | hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i])); |
| 1241 | } |
| 1242 | } |
| 1243 | |
| 1244 | static u64 selftest_ipa(void) |
| 1245 | { |
| 1246 | return BIT(selftest_vm.pgt.ia_bits - 1); |
| 1247 | } |
| 1248 | |
| 1249 | static void assert_page_state(void) |
| 1250 | { |
| 1251 | void *virt = hyp_page_to_virt(selftest_page); |
| 1252 | u64 size = PAGE_SIZE << selftest_page->order; |
| 1253 | struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu; |
| 1254 | u64 phys = hyp_virt_to_phys(virt); |
| 1255 | u64 ipa[2] = { selftest_ipa(), selftest_ipa() + PAGE_SIZE }; |
| 1256 | struct pkvm_hyp_vm *vm; |
| 1257 | |
| 1258 | vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); |
| 1259 | |
| 1260 | host_lock_component(); |
| 1261 | WARN_ON(__host_check_page_state_range(phys, size, selftest_state.host)); |
| 1262 | host_unlock_component(); |
| 1263 | |
| 1264 | hyp_lock_component(); |
| 1265 | WARN_ON(__hyp_check_page_state_range(phys, size, selftest_state.hyp)); |
| 1266 | hyp_unlock_component(); |
| 1267 | |
| 1268 | guest_lock_component(&selftest_vm); |
| 1269 | WARN_ON(__guest_check_page_state_range(vm, ipa[0], size, selftest_state.guest[0])); |
| 1270 | WARN_ON(__guest_check_page_state_range(vm, ipa[1], size, selftest_state.guest[1])); |
| 1271 | guest_unlock_component(&selftest_vm); |
| 1272 | } |
| 1273 | |
| 1274 | #define assert_transition_res(res, fn, ...) \ |
| 1275 | do { \ |
| 1276 | WARN_ON(fn(__VA_ARGS__) != res); \ |
| 1277 | assert_page_state(); \ |
| 1278 | } while (0) |
| 1279 | |
| 1280 | void pkvm_ownership_selftest(void *base) |
| 1281 | { |
| 1282 | enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX; |
| 1283 | void *virt = hyp_alloc_pages(&host_s2_pool, 0); |
| 1284 | struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu; |
| 1285 | struct pkvm_hyp_vm *vm = &selftest_vm; |
| 1286 | u64 phys, size, pfn, gfn; |
| 1287 | |
| 1288 | WARN_ON(!virt); |
| 1289 | selftest_page = hyp_virt_to_page(virt); |
| 1290 | selftest_page->refcount = 0; |
| 1291 | init_selftest_vm(base); |
| 1292 | |
| 1293 | size = PAGE_SIZE << selftest_page->order; |
| 1294 | phys = hyp_virt_to_phys(virt); |
| 1295 | pfn = hyp_phys_to_pfn(phys); |
| 1296 | gfn = hyp_phys_to_pfn(selftest_ipa()); |
| 1297 | |
| 1298 | selftest_state.host = PKVM_NOPAGE; |
| 1299 | selftest_state.hyp = PKVM_PAGE_OWNED; |
| 1300 | selftest_state.guest[0] = selftest_state.guest[1] = PKVM_NOPAGE; |
| 1301 | assert_page_state(); |
| 1302 | assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); |
| 1303 | assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); |
| 1304 | assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn); |
| 1305 | assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); |
| 1306 | assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1); |
| 1307 | assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size); |
| 1308 | assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); |
| 1309 | assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); |
| 1310 | |
| 1311 | selftest_state.host = PKVM_PAGE_OWNED; |
| 1312 | selftest_state.hyp = PKVM_NOPAGE; |
| 1313 | assert_transition_res(0, __pkvm_hyp_donate_host, pfn, 1); |
| 1314 | assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); |
| 1315 | assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn); |
| 1316 | assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1); |
| 1317 | assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); |
| 1318 | assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size); |
| 1319 | |
| 1320 | selftest_state.host = PKVM_PAGE_SHARED_OWNED; |
| 1321 | selftest_state.hyp = PKVM_PAGE_SHARED_BORROWED; |
| 1322 | assert_transition_res(0, __pkvm_host_share_hyp, pfn); |
| 1323 | assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); |
| 1324 | assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); |
| 1325 | assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); |
| 1326 | assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); |
| 1327 | assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); |
| 1328 | assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); |
| 1329 | |
| 1330 | assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size); |
| 1331 | assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size); |
| 1332 | hyp_unpin_shared_mem(virt, virt + size); |
| 1333 | WARN_ON(hyp_page_count(virt) != 1); |
| 1334 | assert_transition_res(-EBUSY, __pkvm_host_unshare_hyp, pfn); |
| 1335 | assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); |
| 1336 | assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); |
| 1337 | assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); |
| 1338 | assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); |
| 1339 | assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); |
| 1340 | assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); |
| 1341 | |
| 1342 | hyp_unpin_shared_mem(virt, virt + size); |
| 1343 | assert_page_state(); |
| 1344 | WARN_ON(hyp_page_count(virt)); |
| 1345 | |
| 1346 | selftest_state.host = PKVM_PAGE_OWNED; |
| 1347 | selftest_state.hyp = PKVM_NOPAGE; |
| 1348 | assert_transition_res(0, __pkvm_host_unshare_hyp, pfn); |
| 1349 | |
| 1350 | selftest_state.host = PKVM_PAGE_SHARED_OWNED; |
| 1351 | selftest_state.hyp = PKVM_NOPAGE; |
| 1352 | assert_transition_res(0, __pkvm_host_share_ffa, pfn, 1); |
| 1353 | assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); |
| 1354 | assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); |
| 1355 | assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); |
| 1356 | assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn); |
| 1357 | assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); |
| 1358 | assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); |
| 1359 | assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); |
| 1360 | assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size); |
| 1361 | |
| 1362 | selftest_state.host = PKVM_PAGE_OWNED; |
| 1363 | selftest_state.hyp = PKVM_NOPAGE; |
| 1364 | assert_transition_res(0, __pkvm_host_unshare_ffa, pfn, 1); |
| 1365 | assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1); |
| 1366 | |
| 1367 | selftest_state.host = PKVM_PAGE_SHARED_OWNED; |
| 1368 | selftest_state.guest[0] = PKVM_PAGE_SHARED_BORROWED; |
| 1369 | assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); |
| 1370 | assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); |
| 1371 | assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); |
| 1372 | assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); |
| 1373 | assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); |
| 1374 | assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn); |
| 1375 | assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); |
| 1376 | assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size); |
| 1377 | |
| 1378 | selftest_state.guest[1] = PKVM_PAGE_SHARED_BORROWED; |
| 1379 | assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot); |
| 1380 | WARN_ON(hyp_virt_to_page(virt)->host_share_guest_count != 2); |
| 1381 | |
| 1382 | selftest_state.guest[0] = PKVM_NOPAGE; |
| 1383 | assert_transition_res(0, __pkvm_host_unshare_guest, gfn, 1, vm); |
| 1384 | |
| 1385 | selftest_state.guest[1] = PKVM_NOPAGE; |
| 1386 | selftest_state.host = PKVM_PAGE_OWNED; |
| 1387 | assert_transition_res(0, __pkvm_host_unshare_guest, gfn + 1, 1, vm); |
| 1388 | |
| 1389 | selftest_state.host = PKVM_NOPAGE; |
| 1390 | selftest_state.hyp = PKVM_PAGE_OWNED; |
| 1391 | assert_transition_res(0, __pkvm_host_donate_hyp, pfn, 1); |
| 1392 | |
| 1393 | selftest_page->refcount = 1; |
| 1394 | hyp_put_page(&host_s2_pool, virt); |
| 1395 | } |
| 1396 | #endif |
| 1397 | |