1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2020 - Google LLC
4 * Author: Quentin Perret <qperret@google.com>
5 */
6
7#include <linux/init.h>
8#include <linux/kmemleak.h>
9#include <linux/kvm_host.h>
10#include <linux/memblock.h>
11#include <linux/mutex.h>
12#include <linux/sort.h>
13
14#include <asm/kvm_pkvm.h>
15
16#include "hyp_constants.h"
17
18DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
19
20static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
21static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
22
23phys_addr_t hyp_mem_base;
24phys_addr_t hyp_mem_size;
25
26static int cmp_hyp_memblock(const void *p1, const void *p2)
27{
28 const struct memblock_region *r1 = p1;
29 const struct memblock_region *r2 = p2;
30
31 return r1->base < r2->base ? -1 : (r1->base > r2->base);
32}
33
34static void __init sort_memblock_regions(void)
35{
36 sort(base: hyp_memory,
37 num: *hyp_memblock_nr_ptr,
38 size: sizeof(struct memblock_region),
39 cmp_func: cmp_hyp_memblock,
40 NULL);
41}
42
43static int __init register_memblock_regions(void)
44{
45 struct memblock_region *reg;
46
47 for_each_mem_region(reg) {
48 if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
49 return -ENOMEM;
50
51 hyp_memory[*hyp_memblock_nr_ptr] = *reg;
52 (*hyp_memblock_nr_ptr)++;
53 }
54 sort_memblock_regions();
55
56 return 0;
57}
58
59void __init kvm_hyp_reserve(void)
60{
61 u64 hyp_mem_pages = 0;
62 int ret;
63
64 if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
65 return;
66
67 if (kvm_get_mode() != KVM_MODE_PROTECTED)
68 return;
69
70 ret = register_memblock_regions();
71 if (ret) {
72 *hyp_memblock_nr_ptr = 0;
73 kvm_err("Failed to register hyp memblocks: %d\n", ret);
74 return;
75 }
76
77 hyp_mem_pages += hyp_s1_pgtable_pages();
78 hyp_mem_pages += host_s2_pgtable_pages();
79 hyp_mem_pages += hyp_vm_table_pages();
80 hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
81 hyp_mem_pages += hyp_ffa_proxy_pages();
82
83 /*
84 * Try to allocate a PMD-aligned region to reduce TLB pressure once
85 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
86 */
87 hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
88 hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
89 PMD_SIZE);
90 if (!hyp_mem_base)
91 hyp_mem_base = memblock_phys_alloc(size: hyp_mem_size, PAGE_SIZE);
92 else
93 hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
94
95 if (!hyp_mem_base) {
96 kvm_err("Failed to reserve hyp memory\n");
97 return;
98 }
99
100 kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
101 hyp_mem_base);
102}
103
104/*
105 * Allocates and donates memory for hypervisor VM structs at EL2.
106 *
107 * Allocates space for the VM state, which includes the hyp vm as well as
108 * the hyp vcpus.
109 *
110 * Stores an opaque handler in the kvm struct for future reference.
111 *
112 * Return 0 on success, negative error code on failure.
113 */
114static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
115{
116 size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz;
117 struct kvm_vcpu *host_vcpu;
118 pkvm_handle_t handle;
119 void *pgd, *hyp_vm;
120 unsigned long idx;
121 int ret;
122
123 if (host_kvm->created_vcpus < 1)
124 return -EINVAL;
125
126 pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
127
128 /*
129 * The PGD pages will be reclaimed using a hyp_memcache which implies
130 * page granularity. So, use alloc_pages_exact() to get individual
131 * refcounts.
132 */
133 pgd = alloc_pages_exact(size: pgd_sz, GFP_KERNEL_ACCOUNT);
134 if (!pgd)
135 return -ENOMEM;
136
137 /* Allocate memory to donate to hyp for vm and vcpu pointers. */
138 hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
139 size_mul(sizeof(void *),
140 host_kvm->created_vcpus)));
141 hyp_vm = alloc_pages_exact(size: hyp_vm_sz, GFP_KERNEL_ACCOUNT);
142 if (!hyp_vm) {
143 ret = -ENOMEM;
144 goto free_pgd;
145 }
146
147 /* Donate the VM memory to hyp and let hyp initialize it. */
148 ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
149 if (ret < 0)
150 goto free_vm;
151
152 handle = ret;
153
154 host_kvm->arch.pkvm.handle = handle;
155
156 /* Donate memory for the vcpus at hyp and initialize it. */
157 hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
158 kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
159 void *hyp_vcpu;
160
161 /* Indexing of the vcpus to be sequential starting at 0. */
162 if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
163 ret = -EINVAL;
164 goto destroy_vm;
165 }
166
167 hyp_vcpu = alloc_pages_exact(size: hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
168 if (!hyp_vcpu) {
169 ret = -ENOMEM;
170 goto destroy_vm;
171 }
172
173 ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
174 hyp_vcpu);
175 if (ret) {
176 free_pages_exact(virt: hyp_vcpu, size: hyp_vcpu_sz);
177 goto destroy_vm;
178 }
179 }
180
181 return 0;
182
183destroy_vm:
184 pkvm_destroy_hyp_vm(host_kvm);
185 return ret;
186free_vm:
187 free_pages_exact(virt: hyp_vm, size: hyp_vm_sz);
188free_pgd:
189 free_pages_exact(virt: pgd, size: pgd_sz);
190 return ret;
191}
192
193int pkvm_create_hyp_vm(struct kvm *host_kvm)
194{
195 int ret = 0;
196
197 mutex_lock(&host_kvm->lock);
198 if (!host_kvm->arch.pkvm.handle)
199 ret = __pkvm_create_hyp_vm(host_kvm);
200 mutex_unlock(lock: &host_kvm->lock);
201
202 return ret;
203}
204
205void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
206{
207 if (host_kvm->arch.pkvm.handle) {
208 WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
209 host_kvm->arch.pkvm.handle));
210 }
211
212 host_kvm->arch.pkvm.handle = 0;
213 free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
214}
215
216int pkvm_init_host_vm(struct kvm *host_kvm)
217{
218 mutex_init(&host_kvm->lock);
219 return 0;
220}
221
222static void __init _kvm_host_prot_finalize(void *arg)
223{
224 int *err = arg;
225
226 if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
227 WRITE_ONCE(*err, -EINVAL);
228}
229
230static int __init pkvm_drop_host_privileges(void)
231{
232 int ret = 0;
233
234 /*
235 * Flip the static key upfront as that may no longer be possible
236 * once the host stage 2 is installed.
237 */
238 static_branch_enable(&kvm_protected_mode_initialized);
239 on_each_cpu(func: _kvm_host_prot_finalize, info: &ret, wait: 1);
240 return ret;
241}
242
243static int __init finalize_pkvm(void)
244{
245 int ret;
246
247 if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
248 return 0;
249
250 /*
251 * Exclude HYP sections from kmemleak so that they don't get peeked
252 * at, which would end badly once inaccessible.
253 */
254 kmemleak_free_part(ptr: __hyp_bss_start, size: __hyp_bss_end - __hyp_bss_start);
255 kmemleak_free_part_phys(phys: hyp_mem_base, size: hyp_mem_size);
256
257 ret = pkvm_drop_host_privileges();
258 if (ret)
259 pr_err("Failed to finalize Hyp protection: %d\n", ret);
260
261 return ret;
262}
263device_initcall_sync(finalize_pkvm);
264

source code of linux/arch/arm64/kvm/pkvm.c