1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Test handling of code that might set PTE/PMD dirty in read-only VMAs. |
4 | * Setting a PTE/PMD dirty must not accidentally set the PTE/PMD writable. |
5 | * |
6 | * Copyright 2023, Red Hat, Inc. |
7 | * |
8 | * Author(s): David Hildenbrand <david@redhat.com> |
9 | */ |
10 | #include <fcntl.h> |
11 | #include <signal.h> |
12 | #include <unistd.h> |
13 | #include <string.h> |
14 | #include <errno.h> |
15 | #include <stdlib.h> |
16 | #include <stdbool.h> |
17 | #include <stdint.h> |
18 | #include <sys/mman.h> |
19 | #include <setjmp.h> |
20 | #include <sys/syscall.h> |
21 | #include <sys/ioctl.h> |
22 | #include <linux/userfaultfd.h> |
23 | #include <linux/mempolicy.h> |
24 | |
25 | #include "../kselftest.h" |
26 | #include "vm_util.h" |
27 | |
28 | static size_t pagesize; |
29 | static size_t thpsize; |
30 | static int mem_fd; |
31 | static int pagemap_fd; |
32 | static sigjmp_buf env; |
33 | |
34 | static void signal_handler(int sig) |
35 | { |
36 | if (sig == SIGSEGV) |
37 | siglongjmp(env, 1); |
38 | siglongjmp(env, 2); |
39 | } |
40 | |
41 | static void do_test_write_sigsegv(char *mem) |
42 | { |
43 | char orig = *mem; |
44 | int ret; |
45 | |
46 | if (signal(SIGSEGV, signal_handler) == SIG_ERR) { |
47 | ksft_test_result_fail(msg: "signal() failed\n" ); |
48 | return; |
49 | } |
50 | |
51 | ret = sigsetjmp(env, 1); |
52 | if (!ret) |
53 | *mem = orig + 1; |
54 | |
55 | if (signal(SIGSEGV, SIG_DFL) == SIG_ERR) |
56 | ksft_test_result_fail(msg: "signal() failed\n" ); |
57 | |
58 | ksft_test_result(ret == 1 && *mem == orig, |
59 | "SIGSEGV generated, page not modified\n" ); |
60 | } |
61 | |
62 | static char *mmap_thp_range(int prot, char **_mmap_mem, size_t *_mmap_size) |
63 | { |
64 | const size_t mmap_size = 2 * thpsize; |
65 | char *mem, *mmap_mem; |
66 | |
67 | mmap_mem = mmap(NULL, mmap_size, prot, MAP_PRIVATE|MAP_ANON, |
68 | -1, 0); |
69 | if (mmap_mem == MAP_FAILED) { |
70 | ksft_test_result_fail(msg: "mmap() failed\n" ); |
71 | return MAP_FAILED; |
72 | } |
73 | mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); |
74 | |
75 | if (madvise(mem, thpsize, MADV_HUGEPAGE)) { |
76 | ksft_test_result_skip(msg: "MADV_HUGEPAGE failed\n" ); |
77 | munmap(mmap_mem, mmap_size); |
78 | return MAP_FAILED; |
79 | } |
80 | |
81 | *_mmap_mem = mmap_mem; |
82 | *_mmap_size = mmap_size; |
83 | return mem; |
84 | } |
85 | |
86 | static void test_ptrace_write(void) |
87 | { |
88 | char data = 1; |
89 | char *mem; |
90 | int ret; |
91 | |
92 | ksft_print_msg(msg: "[INFO] PTRACE write access\n" ); |
93 | |
94 | mem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0); |
95 | if (mem == MAP_FAILED) { |
96 | ksft_test_result_fail(msg: "mmap() failed\n" ); |
97 | return; |
98 | } |
99 | |
100 | /* Fault in the shared zeropage. */ |
101 | if (*mem != 0) { |
102 | ksft_test_result_fail(msg: "Memory not zero\n" ); |
103 | goto munmap; |
104 | } |
105 | |
106 | /* |
107 | * Unshare the page (populating a fresh anon page that might be set |
108 | * dirty in the PTE) in the read-only VMA using ptrace (FOLL_FORCE). |
109 | */ |
110 | lseek(mem_fd, (uintptr_t) mem, SEEK_SET); |
111 | ret = write(mem_fd, &data, 1); |
112 | if (ret != 1 || *mem != data) { |
113 | ksft_test_result_fail(msg: "write() failed\n" ); |
114 | goto munmap; |
115 | } |
116 | |
117 | do_test_write_sigsegv(mem); |
118 | munmap: |
119 | munmap(mem, pagesize); |
120 | } |
121 | |
122 | static void test_ptrace_write_thp(void) |
123 | { |
124 | char *mem, *mmap_mem; |
125 | size_t mmap_size; |
126 | char data = 1; |
127 | int ret; |
128 | |
129 | ksft_print_msg(msg: "[INFO] PTRACE write access to THP\n" ); |
130 | |
131 | mem = mmap_thp_range(prot: PROT_READ, mmap_mem: &mmap_mem, mmap_size: &mmap_size); |
132 | if (mem == MAP_FAILED) |
133 | return; |
134 | |
135 | /* |
136 | * Write to the first subpage in the read-only VMA using |
137 | * ptrace(FOLL_FORCE), eventually placing a fresh THP that is marked |
138 | * dirty in the PMD. |
139 | */ |
140 | lseek(mem_fd, (uintptr_t) mem, SEEK_SET); |
141 | ret = write(mem_fd, &data, 1); |
142 | if (ret != 1 || *mem != data) { |
143 | ksft_test_result_fail(msg: "write() failed\n" ); |
144 | goto munmap; |
145 | } |
146 | |
147 | /* MM populated a THP if we got the last subpage populated as well. */ |
148 | if (!pagemap_is_populated(fd: pagemap_fd, start: mem + thpsize - pagesize)) { |
149 | ksft_test_result_skip(msg: "Did not get a THP populated\n" ); |
150 | goto munmap; |
151 | } |
152 | |
153 | do_test_write_sigsegv(mem); |
154 | munmap: |
155 | munmap(mmap_mem, mmap_size); |
156 | } |
157 | |
158 | static void test_page_migration(void) |
159 | { |
160 | char *mem; |
161 | |
162 | ksft_print_msg(msg: "[INFO] Page migration\n" ); |
163 | |
164 | mem = mmap(NULL, pagesize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, |
165 | -1, 0); |
166 | if (mem == MAP_FAILED) { |
167 | ksft_test_result_fail(msg: "mmap() failed\n" ); |
168 | return; |
169 | } |
170 | |
171 | /* Populate a fresh page and dirty it. */ |
172 | memset(mem, 1, pagesize); |
173 | if (mprotect(mem, pagesize, PROT_READ)) { |
174 | ksft_test_result_fail(msg: "mprotect() failed\n" ); |
175 | goto munmap; |
176 | } |
177 | |
178 | /* Trigger page migration. Might not be available or fail. */ |
179 | if (syscall(__NR_mbind, mem, pagesize, MPOL_LOCAL, NULL, 0x7fful, |
180 | MPOL_MF_MOVE)) { |
181 | ksft_test_result_skip(msg: "mbind() failed\n" ); |
182 | goto munmap; |
183 | } |
184 | |
185 | do_test_write_sigsegv(mem); |
186 | munmap: |
187 | munmap(mem, pagesize); |
188 | } |
189 | |
190 | static void test_page_migration_thp(void) |
191 | { |
192 | char *mem, *mmap_mem; |
193 | size_t mmap_size; |
194 | |
195 | ksft_print_msg(msg: "[INFO] Page migration of THP\n" ); |
196 | |
197 | mem = mmap_thp_range(prot: PROT_READ|PROT_WRITE, mmap_mem: &mmap_mem, mmap_size: &mmap_size); |
198 | if (mem == MAP_FAILED) |
199 | return; |
200 | |
201 | /* |
202 | * Write to the first page, which might populate a fresh anon THP |
203 | * and dirty it. |
204 | */ |
205 | memset(mem, 1, pagesize); |
206 | if (mprotect(mem, thpsize, PROT_READ)) { |
207 | ksft_test_result_fail(msg: "mprotect() failed\n" ); |
208 | goto munmap; |
209 | } |
210 | |
211 | /* MM populated a THP if we got the last subpage populated as well. */ |
212 | if (!pagemap_is_populated(fd: pagemap_fd, start: mem + thpsize - pagesize)) { |
213 | ksft_test_result_skip(msg: "Did not get a THP populated\n" ); |
214 | goto munmap; |
215 | } |
216 | |
217 | /* Trigger page migration. Might not be available or fail. */ |
218 | if (syscall(__NR_mbind, mem, thpsize, MPOL_LOCAL, NULL, 0x7fful, |
219 | MPOL_MF_MOVE)) { |
220 | ksft_test_result_skip(msg: "mbind() failed\n" ); |
221 | goto munmap; |
222 | } |
223 | |
224 | do_test_write_sigsegv(mem); |
225 | munmap: |
226 | munmap(mmap_mem, mmap_size); |
227 | } |
228 | |
229 | static void test_pte_mapped_thp(void) |
230 | { |
231 | char *mem, *mmap_mem; |
232 | size_t mmap_size; |
233 | |
234 | ksft_print_msg(msg: "[INFO] PTE-mapping a THP\n" ); |
235 | |
236 | mem = mmap_thp_range(prot: PROT_READ|PROT_WRITE, mmap_mem: &mmap_mem, mmap_size: &mmap_size); |
237 | if (mem == MAP_FAILED) |
238 | return; |
239 | |
240 | /* |
241 | * Write to the first page, which might populate a fresh anon THP |
242 | * and dirty it. |
243 | */ |
244 | memset(mem, 1, pagesize); |
245 | if (mprotect(mem, thpsize, PROT_READ)) { |
246 | ksft_test_result_fail(msg: "mprotect() failed\n" ); |
247 | goto munmap; |
248 | } |
249 | |
250 | /* MM populated a THP if we got the last subpage populated as well. */ |
251 | if (!pagemap_is_populated(fd: pagemap_fd, start: mem + thpsize - pagesize)) { |
252 | ksft_test_result_skip(msg: "Did not get a THP populated\n" ); |
253 | goto munmap; |
254 | } |
255 | |
256 | /* Trigger PTE-mapping the THP by mprotect'ing the last subpage. */ |
257 | if (mprotect(mem + thpsize - pagesize, pagesize, |
258 | PROT_READ|PROT_WRITE)) { |
259 | ksft_test_result_fail(msg: "mprotect() failed\n" ); |
260 | goto munmap; |
261 | } |
262 | |
263 | do_test_write_sigsegv(mem); |
264 | munmap: |
265 | munmap(mmap_mem, mmap_size); |
266 | } |
267 | |
268 | #ifdef __NR_userfaultfd |
269 | static void test_uffdio_copy(void) |
270 | { |
271 | struct uffdio_register uffdio_register; |
272 | struct uffdio_copy uffdio_copy; |
273 | struct uffdio_api uffdio_api; |
274 | char *dst, *src; |
275 | int uffd; |
276 | |
277 | ksft_print_msg("[INFO] UFFDIO_COPY\n" ); |
278 | |
279 | src = malloc(pagesize); |
280 | memset(src, 1, pagesize); |
281 | dst = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0); |
282 | if (dst == MAP_FAILED) { |
283 | ksft_test_result_fail("mmap() failed\n" ); |
284 | return; |
285 | } |
286 | |
287 | uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); |
288 | if (uffd < 0) { |
289 | ksft_test_result_skip("__NR_userfaultfd failed\n" ); |
290 | goto munmap; |
291 | } |
292 | |
293 | uffdio_api.api = UFFD_API; |
294 | uffdio_api.features = 0; |
295 | if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) { |
296 | ksft_test_result_fail("UFFDIO_API failed\n" ); |
297 | goto close_uffd; |
298 | } |
299 | |
300 | uffdio_register.range.start = (unsigned long) dst; |
301 | uffdio_register.range.len = pagesize; |
302 | uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; |
303 | if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { |
304 | ksft_test_result_fail("UFFDIO_REGISTER failed\n" ); |
305 | goto close_uffd; |
306 | } |
307 | |
308 | /* Place a page in a read-only VMA, which might set the PTE dirty. */ |
309 | uffdio_copy.dst = (unsigned long) dst; |
310 | uffdio_copy.src = (unsigned long) src; |
311 | uffdio_copy.len = pagesize; |
312 | uffdio_copy.mode = 0; |
313 | if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) { |
314 | ksft_test_result_fail("UFFDIO_COPY failed\n" ); |
315 | goto close_uffd; |
316 | } |
317 | |
318 | do_test_write_sigsegv(dst); |
319 | close_uffd: |
320 | close(uffd); |
321 | munmap: |
322 | munmap(dst, pagesize); |
323 | free(src); |
324 | } |
325 | #endif /* __NR_userfaultfd */ |
326 | |
327 | int main(void) |
328 | { |
329 | int err, tests = 2; |
330 | |
331 | pagesize = getpagesize(); |
332 | thpsize = read_pmd_pagesize(); |
333 | if (thpsize) { |
334 | ksft_print_msg(msg: "[INFO] detected THP size: %zu KiB\n" , |
335 | thpsize / 1024); |
336 | tests += 3; |
337 | } |
338 | #ifdef __NR_userfaultfd |
339 | tests += 1; |
340 | #endif /* __NR_userfaultfd */ |
341 | |
342 | ksft_print_header(); |
343 | ksft_set_plan(plan: tests); |
344 | |
345 | mem_fd = open("/proc/self/mem" , O_RDWR); |
346 | if (mem_fd < 0) |
347 | ksft_exit_fail_msg(msg: "opening /proc/self/mem failed\n" ); |
348 | pagemap_fd = open("/proc/self/pagemap" , O_RDONLY); |
349 | if (pagemap_fd < 0) |
350 | ksft_exit_fail_msg(msg: "opening /proc/self/pagemap failed\n" ); |
351 | |
352 | /* |
353 | * On some ptrace(FOLL_FORCE) write access via /proc/self/mem in |
354 | * read-only VMAs, the kernel may set the PTE/PMD dirty. |
355 | */ |
356 | test_ptrace_write(); |
357 | if (thpsize) |
358 | test_ptrace_write_thp(); |
359 | /* |
360 | * On page migration, the kernel may set the PTE/PMD dirty when |
361 | * remapping the page. |
362 | */ |
363 | test_page_migration(); |
364 | if (thpsize) |
365 | test_page_migration_thp(); |
366 | /* PTE-mapping a THP might propagate the dirty PMD bit to the PTEs. */ |
367 | if (thpsize) |
368 | test_pte_mapped_thp(); |
369 | /* Placing a fresh page via userfaultfd may set the PTE dirty. */ |
370 | #ifdef __NR_userfaultfd |
371 | test_uffdio_copy(); |
372 | #endif /* __NR_userfaultfd */ |
373 | |
374 | err = ksft_get_fail_cnt(); |
375 | if (err) |
376 | ksft_exit_fail_msg(msg: "%d out of %d tests failed\n" , |
377 | err, ksft_test_num()); |
378 | return ksft_exit_pass(); |
379 | } |
380 | |