1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Support KVM gust page tracking |
4 | * |
5 | * This feature allows us to track page access in guest. Currently, only |
6 | * write access is tracked. |
7 | * |
8 | * Copyright(C) 2015 Intel Corporation. |
9 | * |
10 | * Author: |
11 | * Xiao Guangrong <guangrong.xiao@linux.intel.com> |
12 | */ |
13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
14 | |
15 | #include <linux/lockdep.h> |
16 | #include <linux/kvm_host.h> |
17 | #include <linux/rculist.h> |
18 | |
19 | #include "mmu.h" |
20 | #include "mmu_internal.h" |
21 | #include "page_track.h" |
22 | |
23 | static bool kvm_external_write_tracking_enabled(struct kvm *kvm) |
24 | { |
25 | #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING |
26 | /* |
27 | * Read external_write_tracking_enabled before related pointers. Pairs |
28 | * with the smp_store_release in kvm_page_track_write_tracking_enable(). |
29 | */ |
30 | return smp_load_acquire(&kvm->arch.external_write_tracking_enabled); |
31 | #else |
32 | return false; |
33 | #endif |
34 | } |
35 | |
36 | bool kvm_page_track_write_tracking_enabled(struct kvm *kvm) |
37 | { |
38 | return kvm_external_write_tracking_enabled(kvm) || |
39 | kvm_shadow_root_allocated(kvm) || !tdp_enabled; |
40 | } |
41 | |
42 | void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) |
43 | { |
44 | kvfree(addr: slot->arch.gfn_write_track); |
45 | slot->arch.gfn_write_track = NULL; |
46 | } |
47 | |
48 | static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot, |
49 | unsigned long npages) |
50 | { |
51 | const size_t size = sizeof(*slot->arch.gfn_write_track); |
52 | |
53 | if (!slot->arch.gfn_write_track) |
54 | slot->arch.gfn_write_track = __vcalloc(n: npages, size, |
55 | GFP_KERNEL_ACCOUNT); |
56 | |
57 | return slot->arch.gfn_write_track ? 0 : -ENOMEM; |
58 | } |
59 | |
60 | int kvm_page_track_create_memslot(struct kvm *kvm, |
61 | struct kvm_memory_slot *slot, |
62 | unsigned long npages) |
63 | { |
64 | if (!kvm_page_track_write_tracking_enabled(kvm)) |
65 | return 0; |
66 | |
67 | return __kvm_page_track_write_tracking_alloc(slot, npages); |
68 | } |
69 | |
70 | int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) |
71 | { |
72 | return __kvm_page_track_write_tracking_alloc(slot, npages: slot->npages); |
73 | } |
74 | |
75 | static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn, |
76 | short count) |
77 | { |
78 | int index, val; |
79 | |
80 | index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); |
81 | |
82 | val = slot->arch.gfn_write_track[index]; |
83 | |
84 | if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX)) |
85 | return; |
86 | |
87 | slot->arch.gfn_write_track[index] += count; |
88 | } |
89 | |
90 | void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, |
91 | gfn_t gfn) |
92 | { |
93 | lockdep_assert_held_write(&kvm->mmu_lock); |
94 | |
95 | lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || |
96 | srcu_read_lock_held(&kvm->srcu)); |
97 | |
98 | if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) |
99 | return; |
100 | |
101 | update_gfn_write_track(slot, gfn, count: 1); |
102 | |
103 | /* |
104 | * new track stops large page mapping for the |
105 | * tracked page. |
106 | */ |
107 | kvm_mmu_gfn_disallow_lpage(slot, gfn); |
108 | |
109 | if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, min_level: PG_LEVEL_4K)) |
110 | kvm_flush_remote_tlbs(kvm); |
111 | } |
112 | |
113 | void __kvm_write_track_remove_gfn(struct kvm *kvm, |
114 | struct kvm_memory_slot *slot, gfn_t gfn) |
115 | { |
116 | lockdep_assert_held_write(&kvm->mmu_lock); |
117 | |
118 | lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || |
119 | srcu_read_lock_held(&kvm->srcu)); |
120 | |
121 | if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) |
122 | return; |
123 | |
124 | update_gfn_write_track(slot, gfn, count: -1); |
125 | |
126 | /* |
127 | * allow large page mapping for the tracked page |
128 | * after the tracker is gone. |
129 | */ |
130 | kvm_mmu_gfn_allow_lpage(slot, gfn); |
131 | } |
132 | |
133 | /* |
134 | * check if the corresponding access on the specified guest page is tracked. |
135 | */ |
136 | bool kvm_gfn_is_write_tracked(struct kvm *kvm, |
137 | const struct kvm_memory_slot *slot, gfn_t gfn) |
138 | { |
139 | int index; |
140 | |
141 | if (!slot) |
142 | return false; |
143 | |
144 | if (!kvm_page_track_write_tracking_enabled(kvm)) |
145 | return false; |
146 | |
147 | index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); |
148 | return !!READ_ONCE(slot->arch.gfn_write_track[index]); |
149 | } |
150 | |
151 | #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING |
152 | void kvm_page_track_cleanup(struct kvm *kvm) |
153 | { |
154 | struct kvm_page_track_notifier_head *head; |
155 | |
156 | head = &kvm->arch.track_notifier_head; |
157 | cleanup_srcu_struct(ssp: &head->track_srcu); |
158 | } |
159 | |
160 | int kvm_page_track_init(struct kvm *kvm) |
161 | { |
162 | struct kvm_page_track_notifier_head *head; |
163 | |
164 | head = &kvm->arch.track_notifier_head; |
165 | INIT_HLIST_HEAD(&head->track_notifier_list); |
166 | return init_srcu_struct(&head->track_srcu); |
167 | } |
168 | |
169 | static int kvm_enable_external_write_tracking(struct kvm *kvm) |
170 | { |
171 | struct kvm_memslots *slots; |
172 | struct kvm_memory_slot *slot; |
173 | int r = 0, i, bkt; |
174 | |
175 | mutex_lock(&kvm->slots_arch_lock); |
176 | |
177 | /* |
178 | * Check for *any* write tracking user (not just external users) under |
179 | * lock. This avoids unnecessary work, e.g. if KVM itself is using |
180 | * write tracking, or if two external users raced when registering. |
181 | */ |
182 | if (kvm_page_track_write_tracking_enabled(kvm)) |
183 | goto out_success; |
184 | |
185 | for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { |
186 | slots = __kvm_memslots(kvm, as_id: i); |
187 | kvm_for_each_memslot(slot, bkt, slots) { |
188 | /* |
189 | * Intentionally do NOT free allocations on failure to |
190 | * avoid having to track which allocations were made |
191 | * now versus when the memslot was created. The |
192 | * metadata is guaranteed to be freed when the slot is |
193 | * freed, and will be kept/used if userspace retries |
194 | * the failed ioctl() instead of killing the VM. |
195 | */ |
196 | r = kvm_page_track_write_tracking_alloc(slot); |
197 | if (r) |
198 | goto out_unlock; |
199 | } |
200 | } |
201 | |
202 | out_success: |
203 | /* |
204 | * Ensure that external_write_tracking_enabled becomes true strictly |
205 | * after all the related pointers are set. |
206 | */ |
207 | smp_store_release(&kvm->arch.external_write_tracking_enabled, true); |
208 | out_unlock: |
209 | mutex_unlock(lock: &kvm->slots_arch_lock); |
210 | return r; |
211 | } |
212 | |
213 | /* |
214 | * register the notifier so that event interception for the tracked guest |
215 | * pages can be received. |
216 | */ |
217 | int kvm_page_track_register_notifier(struct kvm *kvm, |
218 | struct kvm_page_track_notifier_node *n) |
219 | { |
220 | struct kvm_page_track_notifier_head *head; |
221 | int r; |
222 | |
223 | if (!kvm || kvm->mm != current->mm) |
224 | return -ESRCH; |
225 | |
226 | if (!kvm_external_write_tracking_enabled(kvm)) { |
227 | r = kvm_enable_external_write_tracking(kvm); |
228 | if (r) |
229 | return r; |
230 | } |
231 | |
232 | kvm_get_kvm(kvm); |
233 | |
234 | head = &kvm->arch.track_notifier_head; |
235 | |
236 | write_lock(&kvm->mmu_lock); |
237 | hlist_add_head_rcu(n: &n->node, h: &head->track_notifier_list); |
238 | write_unlock(&kvm->mmu_lock); |
239 | return 0; |
240 | } |
241 | EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier); |
242 | |
243 | /* |
244 | * stop receiving the event interception. It is the opposed operation of |
245 | * kvm_page_track_register_notifier(). |
246 | */ |
247 | void kvm_page_track_unregister_notifier(struct kvm *kvm, |
248 | struct kvm_page_track_notifier_node *n) |
249 | { |
250 | struct kvm_page_track_notifier_head *head; |
251 | |
252 | head = &kvm->arch.track_notifier_head; |
253 | |
254 | write_lock(&kvm->mmu_lock); |
255 | hlist_del_rcu(n: &n->node); |
256 | write_unlock(&kvm->mmu_lock); |
257 | synchronize_srcu(ssp: &head->track_srcu); |
258 | |
259 | kvm_put_kvm(kvm); |
260 | } |
261 | EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier); |
262 | |
263 | /* |
264 | * Notify the node that write access is intercepted and write emulation is |
265 | * finished at this time. |
266 | * |
267 | * The node should figure out if the written page is the one that node is |
268 | * interested in by itself. |
269 | */ |
270 | void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes) |
271 | { |
272 | struct kvm_page_track_notifier_head *head; |
273 | struct kvm_page_track_notifier_node *n; |
274 | int idx; |
275 | |
276 | head = &kvm->arch.track_notifier_head; |
277 | |
278 | if (hlist_empty(h: &head->track_notifier_list)) |
279 | return; |
280 | |
281 | idx = srcu_read_lock(ssp: &head->track_srcu); |
282 | hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, |
283 | srcu_read_lock_held(&head->track_srcu)) |
284 | if (n->track_write) |
285 | n->track_write(gpa, new, bytes, n); |
286 | srcu_read_unlock(ssp: &head->track_srcu, idx); |
287 | } |
288 | |
289 | /* |
290 | * Notify external page track nodes that a memory region is being removed from |
291 | * the VM, e.g. so that users can free any associated metadata. |
292 | */ |
293 | void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot) |
294 | { |
295 | struct kvm_page_track_notifier_head *head; |
296 | struct kvm_page_track_notifier_node *n; |
297 | int idx; |
298 | |
299 | head = &kvm->arch.track_notifier_head; |
300 | |
301 | if (hlist_empty(h: &head->track_notifier_list)) |
302 | return; |
303 | |
304 | idx = srcu_read_lock(ssp: &head->track_srcu); |
305 | hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, |
306 | srcu_read_lock_held(&head->track_srcu)) |
307 | if (n->track_remove_region) |
308 | n->track_remove_region(slot->base_gfn, slot->npages, n); |
309 | srcu_read_unlock(ssp: &head->track_srcu, idx); |
310 | } |
311 | |
312 | /* |
313 | * add guest page to the tracking pool so that corresponding access on that |
314 | * page will be intercepted. |
315 | * |
316 | * @kvm: the guest instance we are interested in. |
317 | * @gfn: the guest page. |
318 | */ |
319 | int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn) |
320 | { |
321 | struct kvm_memory_slot *slot; |
322 | int idx; |
323 | |
324 | idx = srcu_read_lock(ssp: &kvm->srcu); |
325 | |
326 | slot = gfn_to_memslot(kvm, gfn); |
327 | if (!slot) { |
328 | srcu_read_unlock(ssp: &kvm->srcu, idx); |
329 | return -EINVAL; |
330 | } |
331 | |
332 | write_lock(&kvm->mmu_lock); |
333 | __kvm_write_track_add_gfn(kvm, slot, gfn); |
334 | write_unlock(&kvm->mmu_lock); |
335 | |
336 | srcu_read_unlock(ssp: &kvm->srcu, idx); |
337 | |
338 | return 0; |
339 | } |
340 | EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn); |
341 | |
342 | /* |
343 | * remove the guest page from the tracking pool which stops the interception |
344 | * of corresponding access on that page. |
345 | * |
346 | * @kvm: the guest instance we are interested in. |
347 | * @gfn: the guest page. |
348 | */ |
349 | int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn) |
350 | { |
351 | struct kvm_memory_slot *slot; |
352 | int idx; |
353 | |
354 | idx = srcu_read_lock(ssp: &kvm->srcu); |
355 | |
356 | slot = gfn_to_memslot(kvm, gfn); |
357 | if (!slot) { |
358 | srcu_read_unlock(ssp: &kvm->srcu, idx); |
359 | return -EINVAL; |
360 | } |
361 | |
362 | write_lock(&kvm->mmu_lock); |
363 | __kvm_write_track_remove_gfn(kvm, slot, gfn); |
364 | write_unlock(&kvm->mmu_lock); |
365 | |
366 | srcu_read_unlock(ssp: &kvm->srcu, idx); |
367 | |
368 | return 0; |
369 | } |
370 | EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn); |
371 | #endif |
372 | |