1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Test handling of code that might set PTE/PMD dirty in read-only VMAs.
4 * Setting a PTE/PMD dirty must not accidentally set the PTE/PMD writable.
5 *
6 * Copyright 2023, Red Hat, Inc.
7 *
8 * Author(s): David Hildenbrand <david@redhat.com>
9 */
10#include <fcntl.h>
11#include <signal.h>
12#include <unistd.h>
13#include <string.h>
14#include <errno.h>
15#include <stdlib.h>
16#include <stdbool.h>
17#include <stdint.h>
18#include <sys/mman.h>
19#include <setjmp.h>
20#include <sys/syscall.h>
21#include <sys/ioctl.h>
22#include <linux/userfaultfd.h>
23#include <linux/mempolicy.h>
24
25#include "../kselftest.h"
26#include "vm_util.h"
27
28static size_t pagesize;
29static size_t thpsize;
30static int mem_fd;
31static int pagemap_fd;
32static sigjmp_buf env;
33
34static void signal_handler(int sig)
35{
36 if (sig == SIGSEGV)
37 siglongjmp(env, 1);
38 siglongjmp(env, 2);
39}
40
41static void do_test_write_sigsegv(char *mem)
42{
43 char orig = *mem;
44 int ret;
45
46 if (signal(SIGSEGV, signal_handler) == SIG_ERR) {
47 ksft_test_result_fail(msg: "signal() failed\n");
48 return;
49 }
50
51 ret = sigsetjmp(env, 1);
52 if (!ret)
53 *mem = orig + 1;
54
55 if (signal(SIGSEGV, SIG_DFL) == SIG_ERR)
56 ksft_test_result_fail(msg: "signal() failed\n");
57
58 ksft_test_result(ret == 1 && *mem == orig,
59 "SIGSEGV generated, page not modified\n");
60}
61
62static char *mmap_thp_range(int prot, char **_mmap_mem, size_t *_mmap_size)
63{
64 const size_t mmap_size = 2 * thpsize;
65 char *mem, *mmap_mem;
66
67 mmap_mem = mmap(NULL, mmap_size, prot, MAP_PRIVATE|MAP_ANON,
68 -1, 0);
69 if (mmap_mem == MAP_FAILED) {
70 ksft_test_result_fail(msg: "mmap() failed\n");
71 return MAP_FAILED;
72 }
73 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
74
75 if (madvise(mem, thpsize, MADV_HUGEPAGE)) {
76 ksft_test_result_skip(msg: "MADV_HUGEPAGE failed\n");
77 munmap(mmap_mem, mmap_size);
78 return MAP_FAILED;
79 }
80
81 *_mmap_mem = mmap_mem;
82 *_mmap_size = mmap_size;
83 return mem;
84}
85
86static void test_ptrace_write(void)
87{
88 char data = 1;
89 char *mem;
90 int ret;
91
92 ksft_print_msg(msg: "[INFO] PTRACE write access\n");
93
94 mem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0);
95 if (mem == MAP_FAILED) {
96 ksft_test_result_fail(msg: "mmap() failed\n");
97 return;
98 }
99
100 /* Fault in the shared zeropage. */
101 if (*mem != 0) {
102 ksft_test_result_fail(msg: "Memory not zero\n");
103 goto munmap;
104 }
105
106 /*
107 * Unshare the page (populating a fresh anon page that might be set
108 * dirty in the PTE) in the read-only VMA using ptrace (FOLL_FORCE).
109 */
110 lseek(mem_fd, (uintptr_t) mem, SEEK_SET);
111 ret = write(mem_fd, &data, 1);
112 if (ret != 1 || *mem != data) {
113 ksft_test_result_fail(msg: "write() failed\n");
114 goto munmap;
115 }
116
117 do_test_write_sigsegv(mem);
118munmap:
119 munmap(mem, pagesize);
120}
121
122static void test_ptrace_write_thp(void)
123{
124 char *mem, *mmap_mem;
125 size_t mmap_size;
126 char data = 1;
127 int ret;
128
129 ksft_print_msg(msg: "[INFO] PTRACE write access to THP\n");
130
131 mem = mmap_thp_range(prot: PROT_READ, mmap_mem: &mmap_mem, mmap_size: &mmap_size);
132 if (mem == MAP_FAILED)
133 return;
134
135 /*
136 * Write to the first subpage in the read-only VMA using
137 * ptrace(FOLL_FORCE), eventually placing a fresh THP that is marked
138 * dirty in the PMD.
139 */
140 lseek(mem_fd, (uintptr_t) mem, SEEK_SET);
141 ret = write(mem_fd, &data, 1);
142 if (ret != 1 || *mem != data) {
143 ksft_test_result_fail(msg: "write() failed\n");
144 goto munmap;
145 }
146
147 /* MM populated a THP if we got the last subpage populated as well. */
148 if (!pagemap_is_populated(fd: pagemap_fd, start: mem + thpsize - pagesize)) {
149 ksft_test_result_skip(msg: "Did not get a THP populated\n");
150 goto munmap;
151 }
152
153 do_test_write_sigsegv(mem);
154munmap:
155 munmap(mmap_mem, mmap_size);
156}
157
158static void test_page_migration(void)
159{
160 char *mem;
161
162 ksft_print_msg(msg: "[INFO] Page migration\n");
163
164 mem = mmap(NULL, pagesize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON,
165 -1, 0);
166 if (mem == MAP_FAILED) {
167 ksft_test_result_fail(msg: "mmap() failed\n");
168 return;
169 }
170
171 /* Populate a fresh page and dirty it. */
172 memset(mem, 1, pagesize);
173 if (mprotect(mem, pagesize, PROT_READ)) {
174 ksft_test_result_fail(msg: "mprotect() failed\n");
175 goto munmap;
176 }
177
178 /* Trigger page migration. Might not be available or fail. */
179 if (syscall(__NR_mbind, mem, pagesize, MPOL_LOCAL, NULL, 0x7fful,
180 MPOL_MF_MOVE)) {
181 ksft_test_result_skip(msg: "mbind() failed\n");
182 goto munmap;
183 }
184
185 do_test_write_sigsegv(mem);
186munmap:
187 munmap(mem, pagesize);
188}
189
190static void test_page_migration_thp(void)
191{
192 char *mem, *mmap_mem;
193 size_t mmap_size;
194
195 ksft_print_msg(msg: "[INFO] Page migration of THP\n");
196
197 mem = mmap_thp_range(prot: PROT_READ|PROT_WRITE, mmap_mem: &mmap_mem, mmap_size: &mmap_size);
198 if (mem == MAP_FAILED)
199 return;
200
201 /*
202 * Write to the first page, which might populate a fresh anon THP
203 * and dirty it.
204 */
205 memset(mem, 1, pagesize);
206 if (mprotect(mem, thpsize, PROT_READ)) {
207 ksft_test_result_fail(msg: "mprotect() failed\n");
208 goto munmap;
209 }
210
211 /* MM populated a THP if we got the last subpage populated as well. */
212 if (!pagemap_is_populated(fd: pagemap_fd, start: mem + thpsize - pagesize)) {
213 ksft_test_result_skip(msg: "Did not get a THP populated\n");
214 goto munmap;
215 }
216
217 /* Trigger page migration. Might not be available or fail. */
218 if (syscall(__NR_mbind, mem, thpsize, MPOL_LOCAL, NULL, 0x7fful,
219 MPOL_MF_MOVE)) {
220 ksft_test_result_skip(msg: "mbind() failed\n");
221 goto munmap;
222 }
223
224 do_test_write_sigsegv(mem);
225munmap:
226 munmap(mmap_mem, mmap_size);
227}
228
229static void test_pte_mapped_thp(void)
230{
231 char *mem, *mmap_mem;
232 size_t mmap_size;
233
234 ksft_print_msg(msg: "[INFO] PTE-mapping a THP\n");
235
236 mem = mmap_thp_range(prot: PROT_READ|PROT_WRITE, mmap_mem: &mmap_mem, mmap_size: &mmap_size);
237 if (mem == MAP_FAILED)
238 return;
239
240 /*
241 * Write to the first page, which might populate a fresh anon THP
242 * and dirty it.
243 */
244 memset(mem, 1, pagesize);
245 if (mprotect(mem, thpsize, PROT_READ)) {
246 ksft_test_result_fail(msg: "mprotect() failed\n");
247 goto munmap;
248 }
249
250 /* MM populated a THP if we got the last subpage populated as well. */
251 if (!pagemap_is_populated(fd: pagemap_fd, start: mem + thpsize - pagesize)) {
252 ksft_test_result_skip(msg: "Did not get a THP populated\n");
253 goto munmap;
254 }
255
256 /* Trigger PTE-mapping the THP by mprotect'ing the last subpage. */
257 if (mprotect(mem + thpsize - pagesize, pagesize,
258 PROT_READ|PROT_WRITE)) {
259 ksft_test_result_fail(msg: "mprotect() failed\n");
260 goto munmap;
261 }
262
263 do_test_write_sigsegv(mem);
264munmap:
265 munmap(mmap_mem, mmap_size);
266}
267
268#ifdef __NR_userfaultfd
269static void test_uffdio_copy(void)
270{
271 struct uffdio_register uffdio_register;
272 struct uffdio_copy uffdio_copy;
273 struct uffdio_api uffdio_api;
274 char *dst, *src;
275 int uffd;
276
277 ksft_print_msg("[INFO] UFFDIO_COPY\n");
278
279 src = malloc(pagesize);
280 memset(src, 1, pagesize);
281 dst = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0);
282 if (dst == MAP_FAILED) {
283 ksft_test_result_fail("mmap() failed\n");
284 return;
285 }
286
287 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
288 if (uffd < 0) {
289 ksft_test_result_skip("__NR_userfaultfd failed\n");
290 goto munmap;
291 }
292
293 uffdio_api.api = UFFD_API;
294 uffdio_api.features = 0;
295 if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
296 ksft_test_result_fail("UFFDIO_API failed\n");
297 goto close_uffd;
298 }
299
300 uffdio_register.range.start = (unsigned long) dst;
301 uffdio_register.range.len = pagesize;
302 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
303 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
304 ksft_test_result_fail("UFFDIO_REGISTER failed\n");
305 goto close_uffd;
306 }
307
308 /* Place a page in a read-only VMA, which might set the PTE dirty. */
309 uffdio_copy.dst = (unsigned long) dst;
310 uffdio_copy.src = (unsigned long) src;
311 uffdio_copy.len = pagesize;
312 uffdio_copy.mode = 0;
313 if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) {
314 ksft_test_result_fail("UFFDIO_COPY failed\n");
315 goto close_uffd;
316 }
317
318 do_test_write_sigsegv(dst);
319close_uffd:
320 close(uffd);
321munmap:
322 munmap(dst, pagesize);
323 free(src);
324}
325#endif /* __NR_userfaultfd */
326
327int main(void)
328{
329 int err, tests = 2;
330
331 pagesize = getpagesize();
332 thpsize = read_pmd_pagesize();
333 if (thpsize) {
334 ksft_print_msg(msg: "[INFO] detected THP size: %zu KiB\n",
335 thpsize / 1024);
336 tests += 3;
337 }
338#ifdef __NR_userfaultfd
339 tests += 1;
340#endif /* __NR_userfaultfd */
341
342 ksft_print_header();
343 ksft_set_plan(plan: tests);
344
345 mem_fd = open("/proc/self/mem", O_RDWR);
346 if (mem_fd < 0)
347 ksft_exit_fail_msg(msg: "opening /proc/self/mem failed\n");
348 pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
349 if (pagemap_fd < 0)
350 ksft_exit_fail_msg(msg: "opening /proc/self/pagemap failed\n");
351
352 /*
353 * On some ptrace(FOLL_FORCE) write access via /proc/self/mem in
354 * read-only VMAs, the kernel may set the PTE/PMD dirty.
355 */
356 test_ptrace_write();
357 if (thpsize)
358 test_ptrace_write_thp();
359 /*
360 * On page migration, the kernel may set the PTE/PMD dirty when
361 * remapping the page.
362 */
363 test_page_migration();
364 if (thpsize)
365 test_page_migration_thp();
366 /* PTE-mapping a THP might propagate the dirty PMD bit to the PTEs. */
367 if (thpsize)
368 test_pte_mapped_thp();
369 /* Placing a fresh page via userfaultfd may set the PTE dirty. */
370#ifdef __NR_userfaultfd
371 test_uffdio_copy();
372#endif /* __NR_userfaultfd */
373
374 err = ksft_get_fail_cnt();
375 if (err)
376 ksft_exit_fail_msg(msg: "%d out of %d tests failed\n",
377 err, ksft_test_num());
378 return ksft_exit_pass();
379}
380

source code of linux/tools/testing/selftests/mm/mkdirty.c