1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * GUP long-term page pinning tests. |
4 | * |
5 | * Copyright 2023, Red Hat, Inc. |
6 | * |
7 | * Author(s): David Hildenbrand <david@redhat.com> |
8 | */ |
9 | #define _GNU_SOURCE |
10 | #include <stdlib.h> |
11 | #include <string.h> |
12 | #include <stdbool.h> |
13 | #include <stdint.h> |
14 | #include <unistd.h> |
15 | #include <errno.h> |
16 | #include <fcntl.h> |
17 | #include <assert.h> |
18 | #include <sys/mman.h> |
19 | #include <sys/ioctl.h> |
20 | #include <sys/vfs.h> |
21 | #include <linux/magic.h> |
22 | #include <linux/memfd.h> |
23 | |
24 | #include "local_config.h" |
25 | #ifdef LOCAL_CONFIG_HAVE_LIBURING |
26 | #include <liburing.h> |
27 | #endif /* LOCAL_CONFIG_HAVE_LIBURING */ |
28 | |
29 | #include "../../../../mm/gup_test.h" |
30 | #include "../kselftest.h" |
31 | #include "vm_util.h" |
32 | |
33 | static size_t pagesize; |
34 | static int nr_hugetlbsizes; |
35 | static size_t hugetlbsizes[10]; |
36 | static int gup_fd; |
37 | |
38 | static __fsword_t get_fs_type(int fd) |
39 | { |
40 | struct statfs fs; |
41 | int ret; |
42 | |
43 | do { |
44 | ret = fstatfs(fd, &fs); |
45 | } while (ret && errno == EINTR); |
46 | |
47 | return ret ? 0 : fs.f_type; |
48 | } |
49 | |
50 | static bool fs_is_unknown(__fsword_t fs_type) |
51 | { |
52 | /* |
53 | * We only support some filesystems in our tests when dealing with |
54 | * R/W long-term pinning. For these filesystems, we can be fairly sure |
55 | * whether they support it or not. |
56 | */ |
57 | switch (fs_type) { |
58 | case TMPFS_MAGIC: |
59 | case HUGETLBFS_MAGIC: |
60 | case BTRFS_SUPER_MAGIC: |
61 | case EXT4_SUPER_MAGIC: |
62 | case XFS_SUPER_MAGIC: |
63 | return false; |
64 | default: |
65 | return true; |
66 | } |
67 | } |
68 | |
69 | static bool fs_supports_writable_longterm_pinning(__fsword_t fs_type) |
70 | { |
71 | assert(!fs_is_unknown(fs_type)); |
72 | switch (fs_type) { |
73 | case TMPFS_MAGIC: |
74 | case HUGETLBFS_MAGIC: |
75 | return true; |
76 | default: |
77 | return false; |
78 | } |
79 | } |
80 | |
81 | enum test_type { |
82 | TEST_TYPE_RO, |
83 | TEST_TYPE_RO_FAST, |
84 | TEST_TYPE_RW, |
85 | TEST_TYPE_RW_FAST, |
86 | #ifdef LOCAL_CONFIG_HAVE_LIBURING |
87 | TEST_TYPE_IOURING, |
88 | #endif /* LOCAL_CONFIG_HAVE_LIBURING */ |
89 | }; |
90 | |
91 | static void do_test(int fd, size_t size, enum test_type type, bool shared) |
92 | { |
93 | __fsword_t fs_type = get_fs_type(fd); |
94 | bool should_work; |
95 | char *mem; |
96 | int ret; |
97 | |
98 | if (ftruncate(fd, size)) { |
99 | ksft_test_result_fail(msg: "ftruncate() failed\n" ); |
100 | return; |
101 | } |
102 | |
103 | if (fallocate(fd, 0, 0, size)) { |
104 | if (size == pagesize) |
105 | ksft_test_result_fail(msg: "fallocate() failed\n" ); |
106 | else |
107 | ksft_test_result_skip(msg: "need more free huge pages\n" ); |
108 | return; |
109 | } |
110 | |
111 | mem = mmap(NULL, size, PROT_READ | PROT_WRITE, |
112 | shared ? MAP_SHARED : MAP_PRIVATE, fd, 0); |
113 | if (mem == MAP_FAILED) { |
114 | if (size == pagesize || shared) |
115 | ksft_test_result_fail(msg: "mmap() failed\n" ); |
116 | else |
117 | ksft_test_result_skip(msg: "need more free huge pages\n" ); |
118 | return; |
119 | } |
120 | |
121 | /* |
122 | * Fault in the page writable such that GUP-fast can eventually pin |
123 | * it immediately. |
124 | */ |
125 | memset(mem, 0, size); |
126 | |
127 | switch (type) { |
128 | case TEST_TYPE_RO: |
129 | case TEST_TYPE_RO_FAST: |
130 | case TEST_TYPE_RW: |
131 | case TEST_TYPE_RW_FAST: { |
132 | struct pin_longterm_test args; |
133 | const bool fast = type == TEST_TYPE_RO_FAST || |
134 | type == TEST_TYPE_RW_FAST; |
135 | const bool rw = type == TEST_TYPE_RW || |
136 | type == TEST_TYPE_RW_FAST; |
137 | |
138 | if (gup_fd < 0) { |
139 | ksft_test_result_skip(msg: "gup_test not available\n" ); |
140 | break; |
141 | } |
142 | |
143 | if (rw && shared && fs_is_unknown(fs_type)) { |
144 | ksft_test_result_skip(msg: "Unknown filesystem\n" ); |
145 | return; |
146 | } |
147 | /* |
148 | * R/O pinning or pinning in a private mapping is always |
149 | * expected to work. Otherwise, we expect long-term R/W pinning |
150 | * to only succeed for special fielesystems. |
151 | */ |
152 | should_work = !shared || !rw || |
153 | fs_supports_writable_longterm_pinning(fs_type); |
154 | |
155 | args.addr = (__u64)(uintptr_t)mem; |
156 | args.size = size; |
157 | args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; |
158 | args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0; |
159 | ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); |
160 | if (ret && errno == EINVAL) { |
161 | ksft_test_result_skip(msg: "PIN_LONGTERM_TEST_START failed\n" ); |
162 | break; |
163 | } else if (ret && errno == EFAULT) { |
164 | ksft_test_result(!should_work, "Should have failed\n" ); |
165 | break; |
166 | } else if (ret) { |
167 | ksft_test_result_fail(msg: "PIN_LONGTERM_TEST_START failed\n" ); |
168 | break; |
169 | } |
170 | |
171 | if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP)) |
172 | ksft_print_msg(msg: "[INFO] PIN_LONGTERM_TEST_STOP failed\n" ); |
173 | |
174 | /* |
175 | * TODO: if the kernel ever supports long-term R/W pinning on |
176 | * some previously unsupported filesystems, we might want to |
177 | * perform some additional tests for possible data corruptions. |
178 | */ |
179 | ksft_test_result(should_work, "Should have worked\n" ); |
180 | break; |
181 | } |
182 | #ifdef LOCAL_CONFIG_HAVE_LIBURING |
183 | case TEST_TYPE_IOURING: { |
184 | struct io_uring ring; |
185 | struct iovec iov; |
186 | |
187 | /* io_uring always pins pages writable. */ |
188 | if (shared && fs_is_unknown(fs_type)) { |
189 | ksft_test_result_skip("Unknown filesystem\n" ); |
190 | return; |
191 | } |
192 | should_work = !shared || |
193 | fs_supports_writable_longterm_pinning(fs_type); |
194 | |
195 | /* Skip on errors, as we might just lack kernel support. */ |
196 | ret = io_uring_queue_init(1, &ring, 0); |
197 | if (ret < 0) { |
198 | ksft_test_result_skip("io_uring_queue_init() failed\n" ); |
199 | break; |
200 | } |
201 | /* |
202 | * Register the range as a fixed buffer. This will FOLL_WRITE | |
203 | * FOLL_PIN | FOLL_LONGTERM the range. |
204 | */ |
205 | iov.iov_base = mem; |
206 | iov.iov_len = size; |
207 | ret = io_uring_register_buffers(&ring, &iov, 1); |
208 | /* Only new kernels return EFAULT. */ |
209 | if (ret && (errno == ENOSPC || errno == EOPNOTSUPP || |
210 | errno == EFAULT)) { |
211 | ksft_test_result(!should_work, "Should have failed\n" ); |
212 | } else if (ret) { |
213 | /* |
214 | * We might just lack support or have insufficient |
215 | * MEMLOCK limits. |
216 | */ |
217 | ksft_test_result_skip("io_uring_register_buffers() failed\n" ); |
218 | } else { |
219 | ksft_test_result(should_work, "Should have worked\n" ); |
220 | io_uring_unregister_buffers(&ring); |
221 | } |
222 | |
223 | io_uring_queue_exit(&ring); |
224 | break; |
225 | } |
226 | #endif /* LOCAL_CONFIG_HAVE_LIBURING */ |
227 | default: |
228 | assert(false); |
229 | } |
230 | |
231 | munmap(mem, size); |
232 | } |
233 | |
234 | typedef void (*test_fn)(int fd, size_t size); |
235 | |
236 | static void run_with_memfd(test_fn fn, const char *desc) |
237 | { |
238 | int fd; |
239 | |
240 | ksft_print_msg(msg: "[RUN] %s ... with memfd\n" , desc); |
241 | |
242 | fd = memfd_create("test" , 0); |
243 | if (fd < 0) { |
244 | ksft_test_result_fail(msg: "memfd_create() failed\n" ); |
245 | return; |
246 | } |
247 | |
248 | fn(fd, pagesize); |
249 | close(fd); |
250 | } |
251 | |
252 | static void run_with_tmpfile(test_fn fn, const char *desc) |
253 | { |
254 | FILE *file; |
255 | int fd; |
256 | |
257 | ksft_print_msg(msg: "[RUN] %s ... with tmpfile\n" , desc); |
258 | |
259 | file = tmpfile(); |
260 | if (!file) { |
261 | ksft_test_result_fail(msg: "tmpfile() failed\n" ); |
262 | return; |
263 | } |
264 | |
265 | fd = fileno(file); |
266 | if (fd < 0) { |
267 | ksft_test_result_fail(msg: "fileno() failed\n" ); |
268 | goto close; |
269 | } |
270 | |
271 | fn(fd, pagesize); |
272 | close: |
273 | fclose(file); |
274 | } |
275 | |
276 | static void run_with_local_tmpfile(test_fn fn, const char *desc) |
277 | { |
278 | char filename[] = __FILE__"_tmpfile_XXXXXX" ; |
279 | int fd; |
280 | |
281 | ksft_print_msg(msg: "[RUN] %s ... with local tmpfile\n" , desc); |
282 | |
283 | fd = mkstemp(filename); |
284 | if (fd < 0) { |
285 | ksft_test_result_fail(msg: "mkstemp() failed\n" ); |
286 | return; |
287 | } |
288 | |
289 | if (unlink(filename)) { |
290 | ksft_test_result_fail(msg: "unlink() failed\n" ); |
291 | goto close; |
292 | } |
293 | |
294 | fn(fd, pagesize); |
295 | close: |
296 | close(fd); |
297 | } |
298 | |
299 | static void run_with_memfd_hugetlb(test_fn fn, const char *desc, |
300 | size_t hugetlbsize) |
301 | { |
302 | int flags = MFD_HUGETLB; |
303 | int fd; |
304 | |
305 | ksft_print_msg(msg: "[RUN] %s ... with memfd hugetlb (%zu kB)\n" , desc, |
306 | hugetlbsize / 1024); |
307 | |
308 | flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; |
309 | |
310 | fd = memfd_create("test" , flags); |
311 | if (fd < 0) { |
312 | ksft_test_result_skip(msg: "memfd_create() failed\n" ); |
313 | return; |
314 | } |
315 | |
316 | fn(fd, hugetlbsize); |
317 | close(fd); |
318 | } |
319 | |
320 | struct test_case { |
321 | const char *desc; |
322 | test_fn fn; |
323 | }; |
324 | |
325 | static void test_shared_rw_pin(int fd, size_t size) |
326 | { |
327 | do_test(fd, size, type: TEST_TYPE_RW, shared: true); |
328 | } |
329 | |
330 | static void test_shared_rw_fast_pin(int fd, size_t size) |
331 | { |
332 | do_test(fd, size, type: TEST_TYPE_RW_FAST, shared: true); |
333 | } |
334 | |
335 | static void test_shared_ro_pin(int fd, size_t size) |
336 | { |
337 | do_test(fd, size, type: TEST_TYPE_RO, shared: true); |
338 | } |
339 | |
340 | static void test_shared_ro_fast_pin(int fd, size_t size) |
341 | { |
342 | do_test(fd, size, type: TEST_TYPE_RO_FAST, shared: true); |
343 | } |
344 | |
345 | static void test_private_rw_pin(int fd, size_t size) |
346 | { |
347 | do_test(fd, size, type: TEST_TYPE_RW, shared: false); |
348 | } |
349 | |
350 | static void test_private_rw_fast_pin(int fd, size_t size) |
351 | { |
352 | do_test(fd, size, type: TEST_TYPE_RW_FAST, shared: false); |
353 | } |
354 | |
355 | static void test_private_ro_pin(int fd, size_t size) |
356 | { |
357 | do_test(fd, size, type: TEST_TYPE_RO, shared: false); |
358 | } |
359 | |
360 | static void test_private_ro_fast_pin(int fd, size_t size) |
361 | { |
362 | do_test(fd, size, type: TEST_TYPE_RO_FAST, shared: false); |
363 | } |
364 | |
365 | #ifdef LOCAL_CONFIG_HAVE_LIBURING |
366 | static void test_shared_iouring(int fd, size_t size) |
367 | { |
368 | do_test(fd, size, TEST_TYPE_IOURING, true); |
369 | } |
370 | |
371 | static void test_private_iouring(int fd, size_t size) |
372 | { |
373 | do_test(fd, size, TEST_TYPE_IOURING, false); |
374 | } |
375 | #endif /* LOCAL_CONFIG_HAVE_LIBURING */ |
376 | |
377 | static const struct test_case test_cases[] = { |
378 | { |
379 | "R/W longterm GUP pin in MAP_SHARED file mapping" , |
380 | test_shared_rw_pin, |
381 | }, |
382 | { |
383 | "R/W longterm GUP-fast pin in MAP_SHARED file mapping" , |
384 | test_shared_rw_fast_pin, |
385 | }, |
386 | { |
387 | "R/O longterm GUP pin in MAP_SHARED file mapping" , |
388 | test_shared_ro_pin, |
389 | }, |
390 | { |
391 | "R/O longterm GUP-fast pin in MAP_SHARED file mapping" , |
392 | test_shared_ro_fast_pin, |
393 | }, |
394 | { |
395 | "R/W longterm GUP pin in MAP_PRIVATE file mapping" , |
396 | test_private_rw_pin, |
397 | }, |
398 | { |
399 | "R/W longterm GUP-fast pin in MAP_PRIVATE file mapping" , |
400 | test_private_rw_fast_pin, |
401 | }, |
402 | { |
403 | "R/O longterm GUP pin in MAP_PRIVATE file mapping" , |
404 | test_private_ro_pin, |
405 | }, |
406 | { |
407 | "R/O longterm GUP-fast pin in MAP_PRIVATE file mapping" , |
408 | test_private_ro_fast_pin, |
409 | }, |
410 | #ifdef LOCAL_CONFIG_HAVE_LIBURING |
411 | { |
412 | "io_uring fixed buffer with MAP_SHARED file mapping" , |
413 | test_shared_iouring, |
414 | }, |
415 | { |
416 | "io_uring fixed buffer with MAP_PRIVATE file mapping" , |
417 | test_private_iouring, |
418 | }, |
419 | #endif /* LOCAL_CONFIG_HAVE_LIBURING */ |
420 | }; |
421 | |
422 | static void run_test_case(struct test_case const *test_case) |
423 | { |
424 | int i; |
425 | |
426 | run_with_memfd(fn: test_case->fn, desc: test_case->desc); |
427 | run_with_tmpfile(fn: test_case->fn, desc: test_case->desc); |
428 | run_with_local_tmpfile(fn: test_case->fn, desc: test_case->desc); |
429 | for (i = 0; i < nr_hugetlbsizes; i++) |
430 | run_with_memfd_hugetlb(fn: test_case->fn, desc: test_case->desc, |
431 | hugetlbsize: hugetlbsizes[i]); |
432 | } |
433 | |
434 | static int tests_per_test_case(void) |
435 | { |
436 | return 3 + nr_hugetlbsizes; |
437 | } |
438 | |
439 | int main(int argc, char **argv) |
440 | { |
441 | int i, err; |
442 | |
443 | pagesize = getpagesize(); |
444 | nr_hugetlbsizes = detect_hugetlb_page_sizes(sizes: hugetlbsizes, |
445 | ARRAY_SIZE(hugetlbsizes)); |
446 | |
447 | ksft_print_header(); |
448 | ksft_set_plan(ARRAY_SIZE(test_cases) * tests_per_test_case()); |
449 | |
450 | gup_fd = open("/sys/kernel/debug/gup_test" , O_RDWR); |
451 | |
452 | for (i = 0; i < ARRAY_SIZE(test_cases); i++) |
453 | run_test_case(test_case: &test_cases[i]); |
454 | |
455 | err = ksft_get_fail_cnt(); |
456 | if (err) |
457 | ksft_exit_fail_msg(msg: "%d out of %d tests failed\n" , |
458 | err, ksft_test_num()); |
459 | return ksft_exit_pass(); |
460 | } |
461 | |