1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * KVM demand paging test |
4 | * Adapted from dirty_log_test.c |
5 | * |
6 | * Copyright (C) 2018, Red Hat, Inc. |
7 | * Copyright (C) 2019, Google, Inc. |
8 | */ |
9 | |
10 | #define _GNU_SOURCE /* for pipe2 */ |
11 | |
12 | #include <inttypes.h> |
13 | #include <stdio.h> |
14 | #include <stdlib.h> |
15 | #include <time.h> |
16 | #include <poll.h> |
17 | #include <pthread.h> |
18 | #include <linux/userfaultfd.h> |
19 | #include <sys/syscall.h> |
20 | |
21 | #include "kvm_util.h" |
22 | #include "test_util.h" |
23 | #include "memstress.h" |
24 | #include "guest_modes.h" |
25 | #include "userfaultfd_util.h" |
26 | |
27 | #ifdef __NR_userfaultfd |
28 | |
29 | static int nr_vcpus = 1; |
30 | static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; |
31 | |
32 | static size_t demand_paging_size; |
33 | static char *guest_data_prototype; |
34 | |
35 | static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) |
36 | { |
37 | struct kvm_vcpu *vcpu = vcpu_args->vcpu; |
38 | int vcpu_idx = vcpu_args->vcpu_idx; |
39 | struct kvm_run *run = vcpu->run; |
40 | struct timespec start; |
41 | struct timespec ts_diff; |
42 | int ret; |
43 | |
44 | clock_gettime(CLOCK_MONOTONIC, &start); |
45 | |
46 | /* Let the guest access its memory */ |
47 | ret = _vcpu_run(vcpu); |
48 | TEST_ASSERT(ret == 0, "vcpu_run failed: %d" , ret); |
49 | if (get_ucall(vcpu, NULL) != UCALL_SYNC) { |
50 | TEST_ASSERT(false, |
51 | "Invalid guest sync status: exit_reason=%s" , |
52 | exit_reason_str(run->exit_reason)); |
53 | } |
54 | |
55 | ts_diff = timespec_elapsed(start); |
56 | PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n" , vcpu_idx, |
57 | ts_diff.tv_sec, ts_diff.tv_nsec); |
58 | } |
59 | |
60 | static int handle_uffd_page_request(int uffd_mode, int uffd, |
61 | struct uffd_msg *msg) |
62 | { |
63 | pid_t tid = syscall(__NR_gettid); |
64 | uint64_t addr = msg->arg.pagefault.address; |
65 | struct timespec start; |
66 | struct timespec ts_diff; |
67 | int r; |
68 | |
69 | clock_gettime(CLOCK_MONOTONIC, &start); |
70 | |
71 | if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) { |
72 | struct uffdio_copy copy; |
73 | |
74 | copy.src = (uint64_t)guest_data_prototype; |
75 | copy.dst = addr; |
76 | copy.len = demand_paging_size; |
77 | copy.mode = 0; |
78 | |
79 | r = ioctl(uffd, UFFDIO_COPY, ©); |
80 | if (r == -1) { |
81 | pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n" , |
82 | addr, tid, errno); |
83 | return r; |
84 | } |
85 | } else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { |
86 | struct uffdio_continue cont = {0}; |
87 | |
88 | cont.range.start = addr; |
89 | cont.range.len = demand_paging_size; |
90 | |
91 | r = ioctl(uffd, UFFDIO_CONTINUE, &cont); |
92 | if (r == -1) { |
93 | pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n" , |
94 | addr, tid, errno); |
95 | return r; |
96 | } |
97 | } else { |
98 | TEST_FAIL("Invalid uffd mode %d" , uffd_mode); |
99 | } |
100 | |
101 | ts_diff = timespec_elapsed(start); |
102 | |
103 | PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n" , tid, |
104 | timespec_to_ns(ts_diff)); |
105 | PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n" , |
106 | demand_paging_size, addr, tid); |
107 | |
108 | return 0; |
109 | } |
110 | |
111 | struct test_params { |
112 | int uffd_mode; |
113 | useconds_t uffd_delay; |
114 | enum vm_mem_backing_src_type src_type; |
115 | bool partition_vcpu_memory_access; |
116 | }; |
117 | |
118 | static void prefault_mem(void *alias, uint64_t len) |
119 | { |
120 | size_t p; |
121 | |
122 | TEST_ASSERT(alias != NULL, "Alias required for minor faults" ); |
123 | for (p = 0; p < (len / demand_paging_size); ++p) { |
124 | memcpy(alias + (p * demand_paging_size), |
125 | guest_data_prototype, demand_paging_size); |
126 | } |
127 | } |
128 | |
129 | static void run_test(enum vm_guest_mode mode, void *arg) |
130 | { |
131 | struct memstress_vcpu_args *vcpu_args; |
132 | struct test_params *p = arg; |
133 | struct uffd_desc **uffd_descs = NULL; |
134 | struct timespec start; |
135 | struct timespec ts_diff; |
136 | struct kvm_vm *vm; |
137 | int i; |
138 | |
139 | vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, |
140 | p->src_type, p->partition_vcpu_memory_access); |
141 | |
142 | demand_paging_size = get_backing_src_pagesz(p->src_type); |
143 | |
144 | guest_data_prototype = malloc(demand_paging_size); |
145 | TEST_ASSERT(guest_data_prototype, |
146 | "Failed to allocate buffer for guest data pattern" ); |
147 | memset(guest_data_prototype, 0xAB, demand_paging_size); |
148 | |
149 | if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { |
150 | for (i = 0; i < nr_vcpus; i++) { |
151 | vcpu_args = &memstress_args.vcpu_args[i]; |
152 | prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa), |
153 | vcpu_args->pages * memstress_args.guest_page_size); |
154 | } |
155 | } |
156 | |
157 | if (p->uffd_mode) { |
158 | uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *)); |
159 | TEST_ASSERT(uffd_descs, "Memory allocation failed" ); |
160 | for (i = 0; i < nr_vcpus; i++) { |
161 | void *vcpu_hva; |
162 | |
163 | vcpu_args = &memstress_args.vcpu_args[i]; |
164 | |
165 | /* Cache the host addresses of the region */ |
166 | vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); |
167 | /* |
168 | * Set up user fault fd to handle demand paging |
169 | * requests. |
170 | */ |
171 | uffd_descs[i] = uffd_setup_demand_paging( |
172 | p->uffd_mode, p->uffd_delay, vcpu_hva, |
173 | vcpu_args->pages * memstress_args.guest_page_size, |
174 | &handle_uffd_page_request); |
175 | } |
176 | } |
177 | |
178 | pr_info("Finished creating vCPUs and starting uffd threads\n" ); |
179 | |
180 | clock_gettime(CLOCK_MONOTONIC, &start); |
181 | memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); |
182 | pr_info("Started all vCPUs\n" ); |
183 | |
184 | memstress_join_vcpu_threads(nr_vcpus); |
185 | ts_diff = timespec_elapsed(start); |
186 | pr_info("All vCPU threads joined\n" ); |
187 | |
188 | if (p->uffd_mode) { |
189 | /* Tell the user fault fd handler threads to quit */ |
190 | for (i = 0; i < nr_vcpus; i++) |
191 | uffd_stop_demand_paging(uffd_descs[i]); |
192 | } |
193 | |
194 | pr_info("Total guest execution time: %ld.%.9lds\n" , |
195 | ts_diff.tv_sec, ts_diff.tv_nsec); |
196 | pr_info("Overall demand paging rate: %f pgs/sec\n" , |
197 | memstress_args.vcpu_args[0].pages * nr_vcpus / |
198 | ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC)); |
199 | |
200 | memstress_destroy_vm(vm); |
201 | |
202 | free(guest_data_prototype); |
203 | if (p->uffd_mode) |
204 | free(uffd_descs); |
205 | } |
206 | |
207 | static void help(char *name) |
208 | { |
209 | puts("" ); |
210 | printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n" |
211 | " [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n" , name); |
212 | guest_modes_help(); |
213 | printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" |
214 | " UFFD registration mode: 'MISSING' or 'MINOR'.\n" ); |
215 | kvm_print_vcpu_pinning_help(); |
216 | printf(" -d: add a delay in usec to the User Fault\n" |
217 | " FD handler to simulate demand paging\n" |
218 | " overheads. Ignored without -u.\n" ); |
219 | printf(" -b: specify the size of the memory region which should be\n" |
220 | " demand paged by each vCPU. e.g. 10M or 3G.\n" |
221 | " Default: 1G\n" ); |
222 | backing_src_help("-s" ); |
223 | printf(" -v: specify the number of vCPUs to run.\n" ); |
224 | printf(" -o: Overlap guest memory accesses instead of partitioning\n" |
225 | " them into a separate region of memory for each vCPU.\n" ); |
226 | puts("" ); |
227 | exit(0); |
228 | } |
229 | |
230 | int main(int argc, char *argv[]) |
231 | { |
232 | int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); |
233 | const char *cpulist = NULL; |
234 | struct test_params p = { |
235 | .src_type = DEFAULT_VM_MEM_SRC, |
236 | .partition_vcpu_memory_access = true, |
237 | }; |
238 | int opt; |
239 | |
240 | guest_modes_append_default(); |
241 | |
242 | while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:c:o" )) != -1) { |
243 | switch (opt) { |
244 | case 'm': |
245 | guest_modes_cmdline(optarg); |
246 | break; |
247 | case 'u': |
248 | if (!strcmp("MISSING" , optarg)) |
249 | p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING; |
250 | else if (!strcmp("MINOR" , optarg)) |
251 | p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR; |
252 | TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'." ); |
253 | break; |
254 | case 'd': |
255 | p.uffd_delay = strtoul(optarg, NULL, 0); |
256 | TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported." ); |
257 | break; |
258 | case 'b': |
259 | guest_percpu_mem_size = parse_size(optarg); |
260 | break; |
261 | case 's': |
262 | p.src_type = parse_backing_src_type(optarg); |
263 | break; |
264 | case 'v': |
265 | nr_vcpus = atoi_positive("Number of vCPUs" , optarg); |
266 | TEST_ASSERT(nr_vcpus <= max_vcpus, |
267 | "Invalid number of vcpus, must be between 1 and %d" , max_vcpus); |
268 | break; |
269 | case 'c': |
270 | cpulist = optarg; |
271 | break; |
272 | case 'o': |
273 | p.partition_vcpu_memory_access = false; |
274 | break; |
275 | case 'h': |
276 | default: |
277 | help(argv[0]); |
278 | break; |
279 | } |
280 | } |
281 | |
282 | if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR && |
283 | !backing_src_is_shared(p.src_type)) { |
284 | TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s" ); |
285 | } |
286 | |
287 | if (cpulist) { |
288 | kvm_parse_vcpu_pinning(cpulist, memstress_args.vcpu_to_pcpu, |
289 | nr_vcpus); |
290 | memstress_args.pin_vcpus = true; |
291 | } |
292 | |
293 | for_each_guest_mode(run_test, &p); |
294 | |
295 | return 0; |
296 | } |
297 | |
298 | #else /* __NR_userfaultfd */ |
299 | |
300 | #warning "missing __NR_userfaultfd definition" |
301 | |
302 | int main(void) |
303 | { |
304 | print_skip("__NR_userfaultfd must be present for userfaultfd test" ); |
305 | return KSFT_SKIP; |
306 | } |
307 | |
308 | #endif /* __NR_userfaultfd */ |
309 | |