1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * A memslot-related performance benchmark. |
4 | * |
5 | * Copyright (C) 2021 Oracle and/or its affiliates. |
6 | * |
7 | * Basic guest setup / host vCPU thread code lifted from set_memory_region_test. |
8 | */ |
9 | #include <pthread.h> |
10 | #include <sched.h> |
11 | #include <semaphore.h> |
12 | #include <stdatomic.h> |
13 | #include <stdbool.h> |
14 | #include <stdint.h> |
15 | #include <stdio.h> |
16 | #include <stdlib.h> |
17 | #include <string.h> |
18 | #include <sys/mman.h> |
19 | #include <time.h> |
20 | #include <unistd.h> |
21 | |
22 | #include <linux/compiler.h> |
23 | #include <linux/sizes.h> |
24 | |
25 | #include <test_util.h> |
26 | #include <kvm_util.h> |
27 | #include <processor.h> |
28 | |
29 | #define SZ_64K |
30 | |
31 | #define MEM_SIZE (SZ_512M + MEM_EXTRA_SIZE) |
32 | #define MEM_GPA SZ_256M |
33 | #define MEM_AUX_GPA MEM_GPA |
34 | #define MEM_SYNC_GPA MEM_AUX_GPA |
35 | #define MEM_TEST_GPA (MEM_AUX_GPA + MEM_EXTRA_SIZE) |
36 | #define MEM_TEST_SIZE (MEM_SIZE - MEM_EXTRA_SIZE) |
37 | |
38 | /* |
39 | * 32 MiB is max size that gets well over 100 iterations on 509 slots. |
40 | * Considering that each slot needs to have at least one page up to |
41 | * 8194 slots in use can then be tested (although with slightly |
42 | * limited resolution). |
43 | */ |
44 | #define MEM_SIZE_MAP (SZ_32M + MEM_EXTRA_SIZE) |
45 | #define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - MEM_EXTRA_SIZE) |
46 | |
47 | /* |
48 | * 128 MiB is min size that fills 32k slots with at least one page in each |
49 | * while at the same time gets 100+ iterations in such test |
50 | * |
51 | * 2 MiB chunk size like a typical huge page |
52 | */ |
53 | #define MEM_TEST_UNMAP_SIZE SZ_128M |
54 | #define MEM_TEST_UNMAP_CHUNK_SIZE SZ_2M |
55 | |
56 | /* |
57 | * For the move active test the middle of the test area is placed on |
58 | * a memslot boundary: half lies in the memslot being moved, half in |
59 | * other memslot(s). |
60 | * |
61 | * We have different number of memory slots, excluding the reserved |
62 | * memory slot 0, on various architectures and configurations. The |
63 | * memory size in this test is calculated by picking the maximal |
64 | * last memory slot's memory size, with alignment to the largest |
65 | * supported page size (64KB). In this way, the selected memory |
66 | * size for this test is compatible with test_memslot_move_prepare(). |
67 | * |
68 | * architecture slots memory-per-slot memory-on-last-slot |
69 | * -------------------------------------------------------------- |
70 | * x86-4KB 32763 16KB 160KB |
71 | * arm64-4KB 32766 16KB 112KB |
72 | * arm64-16KB 32766 16KB 112KB |
73 | * arm64-64KB 8192 64KB 128KB |
74 | */ |
75 | #define MEM_TEST_MOVE_SIZE (3 * SZ_64K) |
76 | #define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE) |
77 | static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE, |
78 | "invalid move test region size" ); |
79 | |
80 | #define MEM_TEST_VAL_1 0x1122334455667788 |
81 | #define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00 |
82 | |
83 | struct vm_data { |
84 | struct kvm_vm *vm; |
85 | struct kvm_vcpu *vcpu; |
86 | pthread_t vcpu_thread; |
87 | uint32_t nslots; |
88 | uint64_t npages; |
89 | uint64_t pages_per_slot; |
90 | void **hva_slots; |
91 | bool mmio_ok; |
92 | uint64_t mmio_gpa_min; |
93 | uint64_t mmio_gpa_max; |
94 | }; |
95 | |
96 | struct sync_area { |
97 | uint32_t guest_page_size; |
98 | atomic_bool start_flag; |
99 | atomic_bool exit_flag; |
100 | atomic_bool sync_flag; |
101 | void *move_area_ptr; |
102 | }; |
103 | |
104 | /* |
105 | * Technically, we need also for the atomic bool to be address-free, which |
106 | * is recommended, but not strictly required, by C11 for lockless |
107 | * implementations. |
108 | * However, in practice both GCC and Clang fulfill this requirement on |
109 | * all KVM-supported platforms. |
110 | */ |
111 | static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless" ); |
112 | |
113 | static sem_t vcpu_ready; |
114 | |
115 | static bool map_unmap_verify; |
116 | |
117 | static bool verbose; |
118 | #define pr_info_v(...) \ |
119 | do { \ |
120 | if (verbose) \ |
121 | pr_info(__VA_ARGS__); \ |
122 | } while (0) |
123 | |
124 | static void check_mmio_access(struct vm_data *data, struct kvm_run *run) |
125 | { |
126 | TEST_ASSERT(data->mmio_ok, "Unexpected mmio exit" ); |
127 | TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read" ); |
128 | TEST_ASSERT(run->mmio.len == 8, |
129 | "Unexpected exit mmio size = %u" , run->mmio.len); |
130 | TEST_ASSERT(run->mmio.phys_addr >= data->mmio_gpa_min && |
131 | run->mmio.phys_addr <= data->mmio_gpa_max, |
132 | "Unexpected exit mmio address = 0x%llx" , |
133 | run->mmio.phys_addr); |
134 | } |
135 | |
136 | static void *vcpu_worker(void *__data) |
137 | { |
138 | struct vm_data *data = __data; |
139 | struct kvm_vcpu *vcpu = data->vcpu; |
140 | struct kvm_run *run = vcpu->run; |
141 | struct ucall uc; |
142 | |
143 | while (1) { |
144 | vcpu_run(vcpu); |
145 | |
146 | switch (get_ucall(vcpu, &uc)) { |
147 | case UCALL_SYNC: |
148 | TEST_ASSERT(uc.args[1] == 0, |
149 | "Unexpected sync ucall, got %lx" , |
150 | (ulong)uc.args[1]); |
151 | sem_post(&vcpu_ready); |
152 | continue; |
153 | case UCALL_NONE: |
154 | if (run->exit_reason == KVM_EXIT_MMIO) |
155 | check_mmio_access(data, run); |
156 | else |
157 | goto done; |
158 | break; |
159 | case UCALL_ABORT: |
160 | REPORT_GUEST_ASSERT(uc); |
161 | break; |
162 | case UCALL_DONE: |
163 | goto done; |
164 | default: |
165 | TEST_FAIL("Unknown ucall %lu" , uc.cmd); |
166 | } |
167 | } |
168 | |
169 | done: |
170 | return NULL; |
171 | } |
172 | |
173 | static void wait_for_vcpu(void) |
174 | { |
175 | struct timespec ts; |
176 | |
177 | TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts), |
178 | "clock_gettime() failed: %d" , errno); |
179 | |
180 | ts.tv_sec += 2; |
181 | TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts), |
182 | "sem_timedwait() failed: %d" , errno); |
183 | } |
184 | |
185 | static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) |
186 | { |
187 | uint64_t gpage, pgoffs; |
188 | uint32_t slot, slotoffs; |
189 | void *base; |
190 | uint32_t guest_page_size = data->vm->page_size; |
191 | |
192 | TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate" ); |
193 | TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size, |
194 | "Too high gpa to translate" ); |
195 | gpa -= MEM_GPA; |
196 | |
197 | gpage = gpa / guest_page_size; |
198 | pgoffs = gpa % guest_page_size; |
199 | slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1); |
200 | slotoffs = gpage - (slot * data->pages_per_slot); |
201 | |
202 | if (rempages) { |
203 | uint64_t slotpages; |
204 | |
205 | if (slot == data->nslots - 1) |
206 | slotpages = data->npages - slot * data->pages_per_slot; |
207 | else |
208 | slotpages = data->pages_per_slot; |
209 | |
210 | TEST_ASSERT(!pgoffs, |
211 | "Asking for remaining pages in slot but gpa not page aligned" ); |
212 | *rempages = slotpages - slotoffs; |
213 | } |
214 | |
215 | base = data->hva_slots[slot]; |
216 | return (uint8_t *)base + slotoffs * guest_page_size + pgoffs; |
217 | } |
218 | |
219 | static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot) |
220 | { |
221 | uint32_t guest_page_size = data->vm->page_size; |
222 | |
223 | TEST_ASSERT(slot < data->nslots, "Too high slot number" ); |
224 | |
225 | return MEM_GPA + slot * data->pages_per_slot * guest_page_size; |
226 | } |
227 | |
228 | static struct vm_data *alloc_vm(void) |
229 | { |
230 | struct vm_data *data; |
231 | |
232 | data = malloc(sizeof(*data)); |
233 | TEST_ASSERT(data, "malloc(vmdata) failed" ); |
234 | |
235 | data->vm = NULL; |
236 | data->vcpu = NULL; |
237 | data->hva_slots = NULL; |
238 | |
239 | return data; |
240 | } |
241 | |
242 | static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size, |
243 | uint64_t pages_per_slot, uint64_t rempages) |
244 | { |
245 | if (!pages_per_slot) |
246 | return false; |
247 | |
248 | if ((pages_per_slot * guest_page_size) % host_page_size) |
249 | return false; |
250 | |
251 | if ((rempages * guest_page_size) % host_page_size) |
252 | return false; |
253 | |
254 | return true; |
255 | } |
256 | |
257 | |
258 | static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size) |
259 | { |
260 | uint32_t guest_page_size = data->vm->page_size; |
261 | uint64_t mempages, pages_per_slot, rempages; |
262 | uint64_t slots; |
263 | |
264 | mempages = data->npages; |
265 | slots = data->nslots; |
266 | while (--slots > 1) { |
267 | pages_per_slot = mempages / slots; |
268 | if (!pages_per_slot) |
269 | continue; |
270 | |
271 | rempages = mempages % pages_per_slot; |
272 | if (check_slot_pages(host_page_size, guest_page_size, |
273 | pages_per_slot, rempages)) |
274 | return slots + 1; /* slot 0 is reserved */ |
275 | } |
276 | |
277 | return 0; |
278 | } |
279 | |
280 | static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, |
281 | void *guest_code, uint64_t mem_size, |
282 | struct timespec *slot_runtime) |
283 | { |
284 | uint64_t mempages, rempages; |
285 | uint64_t guest_addr; |
286 | uint32_t slot, host_page_size, guest_page_size; |
287 | struct timespec tstart; |
288 | struct sync_area *sync; |
289 | |
290 | host_page_size = getpagesize(); |
291 | guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; |
292 | mempages = mem_size / guest_page_size; |
293 | |
294 | data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code); |
295 | TEST_ASSERT(data->vm->page_size == guest_page_size, "Invalid VM page size" ); |
296 | |
297 | data->npages = mempages; |
298 | TEST_ASSERT(data->npages > 1, "Can't test without any memory" ); |
299 | data->nslots = nslots; |
300 | data->pages_per_slot = data->npages / data->nslots; |
301 | rempages = data->npages % data->nslots; |
302 | if (!check_slot_pages(host_page_size, guest_page_size, |
303 | pages_per_slot: data->pages_per_slot, rempages)) { |
304 | *maxslots = get_max_slots(data, host_page_size); |
305 | return false; |
306 | } |
307 | |
308 | data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots); |
309 | TEST_ASSERT(data->hva_slots, "malloc() fail" ); |
310 | |
311 | pr_info_v("Adding slots 1..%i, each slot with %" PRIu64" pages + %" PRIu64" extra pages last\n" , |
312 | data->nslots, data->pages_per_slot, rempages); |
313 | |
314 | clock_gettime(CLOCK_MONOTONIC, &tstart); |
315 | for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { |
316 | uint64_t npages; |
317 | |
318 | npages = data->pages_per_slot; |
319 | if (slot == data->nslots) |
320 | npages += rempages; |
321 | |
322 | vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS, |
323 | guest_addr, slot, npages, |
324 | 0); |
325 | guest_addr += npages * guest_page_size; |
326 | } |
327 | *slot_runtime = timespec_elapsed(tstart); |
328 | |
329 | for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { |
330 | uint64_t npages; |
331 | uint64_t gpa; |
332 | |
333 | npages = data->pages_per_slot; |
334 | if (slot == data->nslots) |
335 | npages += rempages; |
336 | |
337 | gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot); |
338 | TEST_ASSERT(gpa == guest_addr, |
339 | "vm_phy_pages_alloc() failed" ); |
340 | |
341 | data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr); |
342 | memset(data->hva_slots[slot - 1], 0, npages * guest_page_size); |
343 | |
344 | guest_addr += npages * guest_page_size; |
345 | } |
346 | |
347 | virt_map(data->vm, MEM_GPA, MEM_GPA, data->npages); |
348 | |
349 | sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); |
350 | sync->guest_page_size = data->vm->page_size; |
351 | atomic_init(&sync->start_flag, false); |
352 | atomic_init(&sync->exit_flag, false); |
353 | atomic_init(&sync->sync_flag, false); |
354 | |
355 | data->mmio_ok = false; |
356 | |
357 | return true; |
358 | } |
359 | |
360 | static void launch_vm(struct vm_data *data) |
361 | { |
362 | pr_info_v("Launching the test VM\n" ); |
363 | |
364 | pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data); |
365 | |
366 | /* Ensure the guest thread is spun up. */ |
367 | wait_for_vcpu(); |
368 | } |
369 | |
370 | static void free_vm(struct vm_data *data) |
371 | { |
372 | kvm_vm_free(data->vm); |
373 | free(data->hva_slots); |
374 | free(data); |
375 | } |
376 | |
377 | static void wait_guest_exit(struct vm_data *data) |
378 | { |
379 | pthread_join(data->vcpu_thread, NULL); |
380 | } |
381 | |
382 | static void let_guest_run(struct sync_area *sync) |
383 | { |
384 | atomic_store_explicit(&sync->start_flag, true, memory_order_release); |
385 | } |
386 | |
387 | static void guest_spin_until_start(void) |
388 | { |
389 | struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; |
390 | |
391 | while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire)) |
392 | ; |
393 | } |
394 | |
395 | static void make_guest_exit(struct sync_area *sync) |
396 | { |
397 | atomic_store_explicit(&sync->exit_flag, true, memory_order_release); |
398 | } |
399 | |
400 | static bool _guest_should_exit(void) |
401 | { |
402 | struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; |
403 | |
404 | return atomic_load_explicit(&sync->exit_flag, memory_order_acquire); |
405 | } |
406 | |
407 | #define guest_should_exit() unlikely(_guest_should_exit()) |
408 | |
409 | /* |
410 | * noinline so we can easily see how much time the host spends waiting |
411 | * for the guest. |
412 | * For the same reason use alarm() instead of polling clock_gettime() |
413 | * to implement a wait timeout. |
414 | */ |
415 | static noinline void host_perform_sync(struct sync_area *sync) |
416 | { |
417 | alarm(2); |
418 | |
419 | atomic_store_explicit(&sync->sync_flag, true, memory_order_release); |
420 | while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire)) |
421 | ; |
422 | |
423 | alarm(0); |
424 | } |
425 | |
426 | static bool guest_perform_sync(void) |
427 | { |
428 | struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; |
429 | bool expected; |
430 | |
431 | do { |
432 | if (guest_should_exit()) |
433 | return false; |
434 | |
435 | expected = true; |
436 | } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag, |
437 | &expected, false, |
438 | memory_order_acq_rel, |
439 | memory_order_relaxed)); |
440 | |
441 | return true; |
442 | } |
443 | |
444 | static void guest_code_test_memslot_move(void) |
445 | { |
446 | struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; |
447 | uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); |
448 | uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr); |
449 | |
450 | GUEST_SYNC(0); |
451 | |
452 | guest_spin_until_start(); |
453 | |
454 | while (!guest_should_exit()) { |
455 | uintptr_t ptr; |
456 | |
457 | for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE; |
458 | ptr += page_size) |
459 | *(uint64_t *)ptr = MEM_TEST_VAL_1; |
460 | |
461 | /* |
462 | * No host sync here since the MMIO exits are so expensive |
463 | * that the host would spend most of its time waiting for |
464 | * the guest and so instead of measuring memslot move |
465 | * performance we would measure the performance and |
466 | * likelihood of MMIO exits |
467 | */ |
468 | } |
469 | |
470 | GUEST_DONE(); |
471 | } |
472 | |
473 | static void guest_code_test_memslot_map(void) |
474 | { |
475 | struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; |
476 | uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); |
477 | |
478 | GUEST_SYNC(0); |
479 | |
480 | guest_spin_until_start(); |
481 | |
482 | while (1) { |
483 | uintptr_t ptr; |
484 | |
485 | for (ptr = MEM_TEST_GPA; |
486 | ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; |
487 | ptr += page_size) |
488 | *(uint64_t *)ptr = MEM_TEST_VAL_1; |
489 | |
490 | if (!guest_perform_sync()) |
491 | break; |
492 | |
493 | for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; |
494 | ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; |
495 | ptr += page_size) |
496 | *(uint64_t *)ptr = MEM_TEST_VAL_2; |
497 | |
498 | if (!guest_perform_sync()) |
499 | break; |
500 | } |
501 | |
502 | GUEST_DONE(); |
503 | } |
504 | |
505 | static void guest_code_test_memslot_unmap(void) |
506 | { |
507 | struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; |
508 | |
509 | GUEST_SYNC(0); |
510 | |
511 | guest_spin_until_start(); |
512 | |
513 | while (1) { |
514 | uintptr_t ptr = MEM_TEST_GPA; |
515 | |
516 | /* |
517 | * We can afford to access (map) just a small number of pages |
518 | * per host sync as otherwise the host will spend |
519 | * a significant amount of its time waiting for the guest |
520 | * (instead of doing unmap operations), so this will |
521 | * effectively turn this test into a map performance test. |
522 | * |
523 | * Just access a single page to be on the safe side. |
524 | */ |
525 | *(uint64_t *)ptr = MEM_TEST_VAL_1; |
526 | |
527 | if (!guest_perform_sync()) |
528 | break; |
529 | |
530 | ptr += MEM_TEST_UNMAP_SIZE / 2; |
531 | *(uint64_t *)ptr = MEM_TEST_VAL_2; |
532 | |
533 | if (!guest_perform_sync()) |
534 | break; |
535 | } |
536 | |
537 | GUEST_DONE(); |
538 | } |
539 | |
540 | static void guest_code_test_memslot_rw(void) |
541 | { |
542 | struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; |
543 | uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); |
544 | |
545 | GUEST_SYNC(0); |
546 | |
547 | guest_spin_until_start(); |
548 | |
549 | while (1) { |
550 | uintptr_t ptr; |
551 | |
552 | for (ptr = MEM_TEST_GPA; |
553 | ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) |
554 | *(uint64_t *)ptr = MEM_TEST_VAL_1; |
555 | |
556 | if (!guest_perform_sync()) |
557 | break; |
558 | |
559 | for (ptr = MEM_TEST_GPA + page_size / 2; |
560 | ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) { |
561 | uint64_t val = *(uint64_t *)ptr; |
562 | |
563 | GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2); |
564 | *(uint64_t *)ptr = 0; |
565 | } |
566 | |
567 | if (!guest_perform_sync()) |
568 | break; |
569 | } |
570 | |
571 | GUEST_DONE(); |
572 | } |
573 | |
574 | static bool test_memslot_move_prepare(struct vm_data *data, |
575 | struct sync_area *sync, |
576 | uint64_t *maxslots, bool isactive) |
577 | { |
578 | uint32_t guest_page_size = data->vm->page_size; |
579 | uint64_t movesrcgpa, movetestgpa; |
580 | |
581 | movesrcgpa = vm_slot2gpa(data, slot: data->nslots - 1); |
582 | |
583 | if (isactive) { |
584 | uint64_t lastpages; |
585 | |
586 | vm_gpa2hva(data, gpa: movesrcgpa, rempages: &lastpages); |
587 | if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) { |
588 | *maxslots = 0; |
589 | return false; |
590 | } |
591 | } |
592 | |
593 | movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1)); |
594 | sync->move_area_ptr = (void *)movetestgpa; |
595 | |
596 | if (isactive) { |
597 | data->mmio_ok = true; |
598 | data->mmio_gpa_min = movesrcgpa; |
599 | data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1; |
600 | } |
601 | |
602 | return true; |
603 | } |
604 | |
605 | static bool test_memslot_move_prepare_active(struct vm_data *data, |
606 | struct sync_area *sync, |
607 | uint64_t *maxslots) |
608 | { |
609 | return test_memslot_move_prepare(data, sync, maxslots, isactive: true); |
610 | } |
611 | |
612 | static bool test_memslot_move_prepare_inactive(struct vm_data *data, |
613 | struct sync_area *sync, |
614 | uint64_t *maxslots) |
615 | { |
616 | return test_memslot_move_prepare(data, sync, maxslots, isactive: false); |
617 | } |
618 | |
619 | static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync) |
620 | { |
621 | uint64_t movesrcgpa; |
622 | |
623 | movesrcgpa = vm_slot2gpa(data, slot: data->nslots - 1); |
624 | vm_mem_region_move(data->vm, data->nslots - 1 + 1, |
625 | MEM_TEST_MOVE_GPA_DEST); |
626 | vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa); |
627 | } |
628 | |
629 | static void test_memslot_do_unmap(struct vm_data *data, |
630 | uint64_t offsp, uint64_t count) |
631 | { |
632 | uint64_t gpa, ctr; |
633 | uint32_t guest_page_size = data->vm->page_size; |
634 | |
635 | for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) { |
636 | uint64_t npages; |
637 | void *hva; |
638 | int ret; |
639 | |
640 | hva = vm_gpa2hva(data, gpa, rempages: &npages); |
641 | TEST_ASSERT(npages, "Empty memory slot at gptr 0x%" PRIx64, gpa); |
642 | npages = min(npages, count - ctr); |
643 | ret = madvise(hva, npages * guest_page_size, MADV_DONTNEED); |
644 | TEST_ASSERT(!ret, |
645 | "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%" PRIx64, |
646 | hva, gpa); |
647 | ctr += npages; |
648 | gpa += npages * guest_page_size; |
649 | } |
650 | TEST_ASSERT(ctr == count, |
651 | "madvise(MADV_DONTNEED) should exactly cover all of the requested area" ); |
652 | } |
653 | |
654 | static void test_memslot_map_unmap_check(struct vm_data *data, |
655 | uint64_t offsp, uint64_t valexp) |
656 | { |
657 | uint64_t gpa; |
658 | uint64_t *val; |
659 | uint32_t guest_page_size = data->vm->page_size; |
660 | |
661 | if (!map_unmap_verify) |
662 | return; |
663 | |
664 | gpa = MEM_TEST_GPA + offsp * guest_page_size; |
665 | val = (typeof(val))vm_gpa2hva(data, gpa, NULL); |
666 | TEST_ASSERT(*val == valexp, |
667 | "Guest written values should read back correctly before unmap (%" PRIu64" vs %" PRIu64" @ %" PRIx64")" , |
668 | *val, valexp, gpa); |
669 | *val = 0; |
670 | } |
671 | |
672 | static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) |
673 | { |
674 | uint32_t guest_page_size = data->vm->page_size; |
675 | uint64_t guest_pages = MEM_TEST_MAP_SIZE / guest_page_size; |
676 | |
677 | /* |
678 | * Unmap the second half of the test area while guest writes to (maps) |
679 | * the first half. |
680 | */ |
681 | test_memslot_do_unmap(data, offsp: guest_pages / 2, count: guest_pages / 2); |
682 | |
683 | /* |
684 | * Wait for the guest to finish writing the first half of the test |
685 | * area, verify the written value on the first and the last page of |
686 | * this area and then unmap it. |
687 | * Meanwhile, the guest is writing to (mapping) the second half of |
688 | * the test area. |
689 | */ |
690 | host_perform_sync(sync); |
691 | test_memslot_map_unmap_check(data, offsp: 0, MEM_TEST_VAL_1); |
692 | test_memslot_map_unmap_check(data, offsp: guest_pages / 2 - 1, MEM_TEST_VAL_1); |
693 | test_memslot_do_unmap(data, offsp: 0, count: guest_pages / 2); |
694 | |
695 | |
696 | /* |
697 | * Wait for the guest to finish writing the second half of the test |
698 | * area and verify the written value on the first and the last page |
699 | * of this area. |
700 | * The area will be unmapped at the beginning of the next loop |
701 | * iteration. |
702 | * Meanwhile, the guest is writing to (mapping) the first half of |
703 | * the test area. |
704 | */ |
705 | host_perform_sync(sync); |
706 | test_memslot_map_unmap_check(data, offsp: guest_pages / 2, MEM_TEST_VAL_2); |
707 | test_memslot_map_unmap_check(data, offsp: guest_pages - 1, MEM_TEST_VAL_2); |
708 | } |
709 | |
710 | static void test_memslot_unmap_loop_common(struct vm_data *data, |
711 | struct sync_area *sync, |
712 | uint64_t chunk) |
713 | { |
714 | uint32_t guest_page_size = data->vm->page_size; |
715 | uint64_t guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size; |
716 | uint64_t ctr; |
717 | |
718 | /* |
719 | * Wait for the guest to finish mapping page(s) in the first half |
720 | * of the test area, verify the written value and then perform unmap |
721 | * of this area. |
722 | * Meanwhile, the guest is writing to (mapping) page(s) in the second |
723 | * half of the test area. |
724 | */ |
725 | host_perform_sync(sync); |
726 | test_memslot_map_unmap_check(data, offsp: 0, MEM_TEST_VAL_1); |
727 | for (ctr = 0; ctr < guest_pages / 2; ctr += chunk) |
728 | test_memslot_do_unmap(data, offsp: ctr, count: chunk); |
729 | |
730 | /* Likewise, but for the opposite host / guest areas */ |
731 | host_perform_sync(sync); |
732 | test_memslot_map_unmap_check(data, offsp: guest_pages / 2, MEM_TEST_VAL_2); |
733 | for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk) |
734 | test_memslot_do_unmap(data, offsp: ctr, count: chunk); |
735 | } |
736 | |
737 | static void test_memslot_unmap_loop(struct vm_data *data, |
738 | struct sync_area *sync) |
739 | { |
740 | uint32_t host_page_size = getpagesize(); |
741 | uint32_t guest_page_size = data->vm->page_size; |
742 | uint64_t guest_chunk_pages = guest_page_size >= host_page_size ? |
743 | 1 : host_page_size / guest_page_size; |
744 | |
745 | test_memslot_unmap_loop_common(data, sync, chunk: guest_chunk_pages); |
746 | } |
747 | |
748 | static void test_memslot_unmap_loop_chunked(struct vm_data *data, |
749 | struct sync_area *sync) |
750 | { |
751 | uint32_t guest_page_size = data->vm->page_size; |
752 | uint64_t guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size; |
753 | |
754 | test_memslot_unmap_loop_common(data, sync, chunk: guest_chunk_pages); |
755 | } |
756 | |
757 | static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) |
758 | { |
759 | uint64_t gptr; |
760 | uint32_t guest_page_size = data->vm->page_size; |
761 | |
762 | for (gptr = MEM_TEST_GPA + guest_page_size / 2; |
763 | gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) |
764 | *(uint64_t *)vm_gpa2hva(data, gpa: gptr, NULL) = MEM_TEST_VAL_2; |
765 | |
766 | host_perform_sync(sync); |
767 | |
768 | for (gptr = MEM_TEST_GPA; |
769 | gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) { |
770 | uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gpa: gptr, NULL); |
771 | uint64_t val = *vptr; |
772 | |
773 | TEST_ASSERT(val == MEM_TEST_VAL_1, |
774 | "Guest written values should read back correctly (is %" PRIu64" @ %" PRIx64")" , |
775 | val, gptr); |
776 | *vptr = 0; |
777 | } |
778 | |
779 | host_perform_sync(sync); |
780 | } |
781 | |
782 | struct test_data { |
783 | const char *name; |
784 | uint64_t mem_size; |
785 | void (*guest_code)(void); |
786 | bool (*prepare)(struct vm_data *data, struct sync_area *sync, |
787 | uint64_t *maxslots); |
788 | void (*loop)(struct vm_data *data, struct sync_area *sync); |
789 | }; |
790 | |
791 | static bool test_execute(int nslots, uint64_t *maxslots, |
792 | unsigned int maxtime, |
793 | const struct test_data *tdata, |
794 | uint64_t *nloops, |
795 | struct timespec *slot_runtime, |
796 | struct timespec *guest_runtime) |
797 | { |
798 | uint64_t mem_size = tdata->mem_size ? : MEM_SIZE; |
799 | struct vm_data *data; |
800 | struct sync_area *sync; |
801 | struct timespec tstart; |
802 | bool ret = true; |
803 | |
804 | data = alloc_vm(); |
805 | if (!prepare_vm(data, nslots, maxslots, guest_code: tdata->guest_code, |
806 | mem_size, slot_runtime)) { |
807 | ret = false; |
808 | goto exit_free; |
809 | } |
810 | |
811 | sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); |
812 | if (tdata->prepare && |
813 | !tdata->prepare(data, sync, maxslots)) { |
814 | ret = false; |
815 | goto exit_free; |
816 | } |
817 | |
818 | launch_vm(data); |
819 | |
820 | clock_gettime(CLOCK_MONOTONIC, &tstart); |
821 | let_guest_run(sync); |
822 | |
823 | while (1) { |
824 | *guest_runtime = timespec_elapsed(tstart); |
825 | if (guest_runtime->tv_sec >= maxtime) |
826 | break; |
827 | |
828 | tdata->loop(data, sync); |
829 | |
830 | (*nloops)++; |
831 | } |
832 | |
833 | make_guest_exit(sync); |
834 | wait_guest_exit(data); |
835 | |
836 | exit_free: |
837 | free_vm(data); |
838 | |
839 | return ret; |
840 | } |
841 | |
842 | static const struct test_data tests[] = { |
843 | { |
844 | .name = "map" , |
845 | .mem_size = MEM_SIZE_MAP, |
846 | .guest_code = guest_code_test_memslot_map, |
847 | .loop = test_memslot_map_loop, |
848 | }, |
849 | { |
850 | .name = "unmap" , |
851 | .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE, |
852 | .guest_code = guest_code_test_memslot_unmap, |
853 | .loop = test_memslot_unmap_loop, |
854 | }, |
855 | { |
856 | .name = "unmap chunked" , |
857 | .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE, |
858 | .guest_code = guest_code_test_memslot_unmap, |
859 | .loop = test_memslot_unmap_loop_chunked, |
860 | }, |
861 | { |
862 | .name = "move active area" , |
863 | .guest_code = guest_code_test_memslot_move, |
864 | .prepare = test_memslot_move_prepare_active, |
865 | .loop = test_memslot_move_loop, |
866 | }, |
867 | { |
868 | .name = "move inactive area" , |
869 | .guest_code = guest_code_test_memslot_move, |
870 | .prepare = test_memslot_move_prepare_inactive, |
871 | .loop = test_memslot_move_loop, |
872 | }, |
873 | { |
874 | .name = "RW" , |
875 | .guest_code = guest_code_test_memslot_rw, |
876 | .loop = test_memslot_rw_loop |
877 | }, |
878 | }; |
879 | |
880 | #define NTESTS ARRAY_SIZE(tests) |
881 | |
882 | struct test_args { |
883 | int tfirst; |
884 | int tlast; |
885 | int nslots; |
886 | int seconds; |
887 | int runs; |
888 | }; |
889 | |
890 | static void help(char *name, struct test_args *targs) |
891 | { |
892 | int ctr; |
893 | |
894 | pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n" , |
895 | name); |
896 | pr_info(" -h: print this help screen.\n" ); |
897 | pr_info(" -v: enable verbose mode (not for benchmarking).\n" ); |
898 | pr_info(" -d: enable extra debug checks.\n" ); |
899 | pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n" , |
900 | targs->nslots); |
901 | pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n" , |
902 | targs->tfirst, NTESTS - 1); |
903 | pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n" , |
904 | targs->tlast, NTESTS - 1); |
905 | pr_info(" -l: specify the test length in seconds (currently: %i)\n" , |
906 | targs->seconds); |
907 | pr_info(" -r: specify the number of runs per test (currently: %i)\n" , |
908 | targs->runs); |
909 | |
910 | pr_info("\nAvailable tests:\n" ); |
911 | for (ctr = 0; ctr < NTESTS; ctr++) |
912 | pr_info("%d: %s\n" , ctr, tests[ctr].name); |
913 | } |
914 | |
915 | static bool check_memory_sizes(void) |
916 | { |
917 | uint32_t host_page_size = getpagesize(); |
918 | uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; |
919 | |
920 | if (host_page_size > SZ_64K || guest_page_size > SZ_64K) { |
921 | pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n" , |
922 | host_page_size, guest_page_size); |
923 | return false; |
924 | } |
925 | |
926 | if (MEM_SIZE % guest_page_size || |
927 | MEM_TEST_SIZE % guest_page_size) { |
928 | pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n" ); |
929 | return false; |
930 | } |
931 | |
932 | if (MEM_SIZE_MAP % guest_page_size || |
933 | MEM_TEST_MAP_SIZE % guest_page_size || |
934 | (MEM_TEST_MAP_SIZE / guest_page_size) <= 2 || |
935 | (MEM_TEST_MAP_SIZE / guest_page_size) % 2) { |
936 | pr_info("invalid MEM_SIZE_MAP or MEM_TEST_MAP_SIZE\n" ); |
937 | return false; |
938 | } |
939 | |
940 | if (MEM_TEST_UNMAP_SIZE > MEM_TEST_SIZE || |
941 | MEM_TEST_UNMAP_SIZE % guest_page_size || |
942 | (MEM_TEST_UNMAP_SIZE / guest_page_size) % |
943 | (2 * MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size)) { |
944 | pr_info("invalid MEM_TEST_UNMAP_SIZE or MEM_TEST_UNMAP_CHUNK_SIZE\n" ); |
945 | return false; |
946 | } |
947 | |
948 | return true; |
949 | } |
950 | |
951 | static bool parse_args(int argc, char *argv[], |
952 | struct test_args *targs) |
953 | { |
954 | uint32_t max_mem_slots; |
955 | int opt; |
956 | |
957 | while ((opt = getopt(argc, argv, "hvds:f:e:l:r:" )) != -1) { |
958 | switch (opt) { |
959 | case 'h': |
960 | default: |
961 | help(name: argv[0], targs); |
962 | return false; |
963 | case 'v': |
964 | verbose = true; |
965 | break; |
966 | case 'd': |
967 | map_unmap_verify = true; |
968 | break; |
969 | case 's': |
970 | targs->nslots = atoi_paranoid(optarg); |
971 | if (targs->nslots <= 1 && targs->nslots != -1) { |
972 | pr_info("Slot count cap must be larger than 1 or -1 for no cap\n" ); |
973 | return false; |
974 | } |
975 | break; |
976 | case 'f': |
977 | targs->tfirst = atoi_non_negative("First test" , optarg); |
978 | break; |
979 | case 'e': |
980 | targs->tlast = atoi_non_negative("Last test" , optarg); |
981 | if (targs->tlast >= NTESTS) { |
982 | pr_info("Last test to run has to be non-negative and less than %zu\n" , |
983 | NTESTS); |
984 | return false; |
985 | } |
986 | break; |
987 | case 'l': |
988 | targs->seconds = atoi_non_negative("Test length" , optarg); |
989 | break; |
990 | case 'r': |
991 | targs->runs = atoi_positive("Runs per test" , optarg); |
992 | break; |
993 | } |
994 | } |
995 | |
996 | if (optind < argc) { |
997 | help(name: argv[0], targs); |
998 | return false; |
999 | } |
1000 | |
1001 | if (targs->tfirst > targs->tlast) { |
1002 | pr_info("First test to run cannot be greater than the last test to run\n" ); |
1003 | return false; |
1004 | } |
1005 | |
1006 | max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); |
1007 | if (max_mem_slots <= 1) { |
1008 | pr_info("KVM_CAP_NR_MEMSLOTS should be greater than 1\n" ); |
1009 | return false; |
1010 | } |
1011 | |
1012 | /* Memory slot 0 is reserved */ |
1013 | if (targs->nslots == -1) |
1014 | targs->nslots = max_mem_slots - 1; |
1015 | else |
1016 | targs->nslots = min_t(int, targs->nslots, max_mem_slots) - 1; |
1017 | |
1018 | pr_info_v("Allowed Number of memory slots: %" PRIu32"\n" , |
1019 | targs->nslots + 1); |
1020 | |
1021 | return true; |
1022 | } |
1023 | |
1024 | struct test_result { |
1025 | struct timespec slot_runtime, guest_runtime, iter_runtime; |
1026 | int64_t slottimens, runtimens; |
1027 | uint64_t nloops; |
1028 | }; |
1029 | |
1030 | static bool test_loop(const struct test_data *data, |
1031 | const struct test_args *targs, |
1032 | struct test_result *rbestslottime, |
1033 | struct test_result *rbestruntime) |
1034 | { |
1035 | uint64_t maxslots; |
1036 | struct test_result result = {}; |
1037 | |
1038 | if (!test_execute(nslots: targs->nslots, maxslots: &maxslots, maxtime: targs->seconds, tdata: data, |
1039 | nloops: &result.nloops, |
1040 | slot_runtime: &result.slot_runtime, guest_runtime: &result.guest_runtime)) { |
1041 | if (maxslots) |
1042 | pr_info("Memslot count too high for this test, decrease the cap (max is %" PRIu64")\n" , |
1043 | maxslots); |
1044 | else |
1045 | pr_info("Memslot count may be too high for this test, try adjusting the cap\n" ); |
1046 | |
1047 | return false; |
1048 | } |
1049 | |
1050 | pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n" , |
1051 | result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec, |
1052 | result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec); |
1053 | if (!result.nloops) { |
1054 | pr_info("No full loops done - too short test time or system too loaded?\n" ); |
1055 | return true; |
1056 | } |
1057 | |
1058 | result.iter_runtime = timespec_div(result.guest_runtime, |
1059 | result.nloops); |
1060 | pr_info("Done %" PRIu64" iterations, avg %ld.%.9lds each\n" , |
1061 | result.nloops, |
1062 | result.iter_runtime.tv_sec, |
1063 | result.iter_runtime.tv_nsec); |
1064 | result.slottimens = timespec_to_ns(result.slot_runtime); |
1065 | result.runtimens = timespec_to_ns(result.iter_runtime); |
1066 | |
1067 | /* |
1068 | * Only rank the slot setup time for tests using the whole test memory |
1069 | * area so they are comparable |
1070 | */ |
1071 | if (!data->mem_size && |
1072 | (!rbestslottime->slottimens || |
1073 | result.slottimens < rbestslottime->slottimens)) |
1074 | *rbestslottime = result; |
1075 | if (!rbestruntime->runtimens || |
1076 | result.runtimens < rbestruntime->runtimens) |
1077 | *rbestruntime = result; |
1078 | |
1079 | return true; |
1080 | } |
1081 | |
1082 | int main(int argc, char *argv[]) |
1083 | { |
1084 | struct test_args targs = { |
1085 | .tfirst = 0, |
1086 | .tlast = NTESTS - 1, |
1087 | .nslots = -1, |
1088 | .seconds = 5, |
1089 | .runs = 1, |
1090 | }; |
1091 | struct test_result rbestslottime = {}; |
1092 | int tctr; |
1093 | |
1094 | if (!check_memory_sizes()) |
1095 | return -1; |
1096 | |
1097 | if (!parse_args(argc, argv, targs: &targs)) |
1098 | return -1; |
1099 | |
1100 | for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) { |
1101 | const struct test_data *data = &tests[tctr]; |
1102 | unsigned int runctr; |
1103 | struct test_result rbestruntime = {}; |
1104 | |
1105 | if (tctr > targs.tfirst) |
1106 | pr_info("\n" ); |
1107 | |
1108 | pr_info("Testing %s performance with %i runs, %d seconds each\n" , |
1109 | data->name, targs.runs, targs.seconds); |
1110 | |
1111 | for (runctr = 0; runctr < targs.runs; runctr++) |
1112 | if (!test_loop(data, targs: &targs, |
1113 | rbestslottime: &rbestslottime, rbestruntime: &rbestruntime)) |
1114 | break; |
1115 | |
1116 | if (rbestruntime.runtimens) |
1117 | pr_info("Best runtime result was %ld.%.9lds per iteration (with %" PRIu64" iterations)\n" , |
1118 | rbestruntime.iter_runtime.tv_sec, |
1119 | rbestruntime.iter_runtime.tv_nsec, |
1120 | rbestruntime.nloops); |
1121 | } |
1122 | |
1123 | if (rbestslottime.slottimens) |
1124 | pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n" , |
1125 | rbestslottime.slot_runtime.tv_sec, |
1126 | rbestslottime.slot_runtime.tv_nsec); |
1127 | |
1128 | return 0; |
1129 | } |
1130 | |