1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual |
4 | * address range in a process via <debugfs>/split_huge_pages interface. |
5 | */ |
6 | |
7 | #define _GNU_SOURCE |
8 | #include <stdio.h> |
9 | #include <stdlib.h> |
10 | #include <stdarg.h> |
11 | #include <unistd.h> |
12 | #include <inttypes.h> |
13 | #include <string.h> |
14 | #include <fcntl.h> |
15 | #include <sys/mman.h> |
16 | #include <sys/mount.h> |
17 | #include <malloc.h> |
18 | #include <stdbool.h> |
19 | #include <time.h> |
20 | #include "vm_util.h" |
21 | #include "../kselftest.h" |
22 | |
23 | uint64_t pagesize; |
24 | unsigned int pageshift; |
25 | uint64_t pmd_pagesize; |
26 | |
27 | #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" |
28 | #define SMAP_PATH "/proc/self/smaps" |
29 | #define INPUT_MAX 80 |
30 | |
31 | #define PID_FMT "%d,0x%lx,0x%lx,%d" |
32 | #define PATH_FMT "%s,0x%lx,0x%lx,%d" |
33 | |
34 | #define PFN_MASK ((1UL<<55)-1) |
35 | #define KPF_THP (1UL<<22) |
36 | |
37 | int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) |
38 | { |
39 | uint64_t paddr; |
40 | uint64_t page_flags; |
41 | |
42 | if (pagemap_file) { |
43 | pread(pagemap_file, &paddr, sizeof(paddr), |
44 | ((long)vaddr >> pageshift) * sizeof(paddr)); |
45 | |
46 | if (kpageflags_file) { |
47 | pread(kpageflags_file, &page_flags, sizeof(page_flags), |
48 | (paddr & PFN_MASK) * sizeof(page_flags)); |
49 | |
50 | return !!(page_flags & KPF_THP); |
51 | } |
52 | } |
53 | return 0; |
54 | } |
55 | |
56 | static void write_file(const char *path, const char *buf, size_t buflen) |
57 | { |
58 | int fd; |
59 | ssize_t numwritten; |
60 | |
61 | fd = open(path, O_WRONLY); |
62 | if (fd == -1) |
63 | ksft_exit_fail_msg(msg: "%s open failed: %s\n" , path, strerror(errno)); |
64 | |
65 | numwritten = write(fd, buf, buflen - 1); |
66 | close(fd); |
67 | if (numwritten < 1) |
68 | ksft_exit_fail_msg(msg: "Write failed\n" ); |
69 | } |
70 | |
71 | static void write_debugfs(const char *fmt, ...) |
72 | { |
73 | char input[INPUT_MAX]; |
74 | int ret; |
75 | va_list argp; |
76 | |
77 | va_start(argp, fmt); |
78 | ret = vsnprintf(input, INPUT_MAX, fmt, argp); |
79 | va_end(argp); |
80 | |
81 | if (ret >= INPUT_MAX) |
82 | ksft_exit_fail_msg(msg: "%s: Debugfs input is too long\n" , __func__); |
83 | |
84 | write_file(SPLIT_DEBUGFS, input, ret + 1); |
85 | } |
86 | |
87 | void split_pmd_thp(void) |
88 | { |
89 | char *one_page; |
90 | size_t len = 4 * pmd_pagesize; |
91 | size_t i; |
92 | |
93 | one_page = memalign(pmd_pagesize, len); |
94 | if (!one_page) |
95 | ksft_exit_fail_msg("Fail to allocate memory: %s\n" , strerror(errno)); |
96 | |
97 | madvise(one_page, len, MADV_HUGEPAGE); |
98 | |
99 | for (i = 0; i < len; i++) |
100 | one_page[i] = (char)i; |
101 | |
102 | if (!check_huge_anon(one_page, 4, pmd_pagesize)) |
103 | ksft_exit_fail_msg(msg: "No THP is allocated\n" ); |
104 | |
105 | /* split all THPs */ |
106 | write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, |
107 | (uint64_t)one_page + len, 0); |
108 | |
109 | for (i = 0; i < len; i++) |
110 | if (one_page[i] != (char)i) |
111 | ksft_exit_fail_msg("%ld byte corrupted\n" , i); |
112 | |
113 | |
114 | if (!check_huge_anon(one_page, 0, pmd_pagesize)) |
115 | ksft_exit_fail_msg(msg: "Still AnonHugePages not split\n" ); |
116 | |
117 | ksft_test_result_pass(msg: "Split huge pages successful\n" ); |
118 | free(one_page); |
119 | } |
120 | |
121 | void split_pte_mapped_thp(void) |
122 | { |
123 | char *one_page, *pte_mapped, *pte_mapped2; |
124 | size_t len = 4 * pmd_pagesize; |
125 | uint64_t thp_size; |
126 | size_t i; |
127 | const char *pagemap_template = "/proc/%d/pagemap" ; |
128 | const char *kpageflags_proc = "/proc/kpageflags" ; |
129 | char pagemap_proc[255]; |
130 | int pagemap_fd; |
131 | int kpageflags_fd; |
132 | |
133 | if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) |
134 | ksft_exit_fail_msg("get pagemap proc error: %s\n" , strerror(errno)); |
135 | |
136 | pagemap_fd = open(pagemap_proc, O_RDONLY); |
137 | if (pagemap_fd == -1) |
138 | ksft_exit_fail_msg("read pagemap: %s\n" , strerror(errno)); |
139 | |
140 | kpageflags_fd = open(kpageflags_proc, O_RDONLY); |
141 | if (kpageflags_fd == -1) |
142 | ksft_exit_fail_msg("read kpageflags: %s\n" , strerror(errno)); |
143 | |
144 | one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, |
145 | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); |
146 | if (one_page == MAP_FAILED) |
147 | ksft_exit_fail_msg("Fail to allocate memory: %s\n" , strerror(errno)); |
148 | |
149 | madvise(one_page, len, MADV_HUGEPAGE); |
150 | |
151 | for (i = 0; i < len; i++) |
152 | one_page[i] = (char)i; |
153 | |
154 | if (!check_huge_anon(one_page, 4, pmd_pagesize)) |
155 | ksft_exit_fail_msg(msg: "No THP is allocated\n" ); |
156 | |
157 | /* remap the first pagesize of first THP */ |
158 | pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); |
159 | |
160 | /* remap the Nth pagesize of Nth THP */ |
161 | for (i = 1; i < 4; i++) { |
162 | pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, |
163 | pagesize, pagesize, |
164 | MREMAP_MAYMOVE|MREMAP_FIXED, |
165 | pte_mapped + pagesize * i); |
166 | if (pte_mapped2 == MAP_FAILED) |
167 | ksft_exit_fail_msg("mremap failed: %s\n" , strerror(errno)); |
168 | } |
169 | |
170 | /* smap does not show THPs after mremap, use kpageflags instead */ |
171 | thp_size = 0; |
172 | for (i = 0; i < pagesize * 4; i++) |
173 | if (i % pagesize == 0 && |
174 | is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) |
175 | thp_size++; |
176 | |
177 | if (thp_size != 4) |
178 | ksft_exit_fail_msg(msg: "Some THPs are missing during mremap\n" ); |
179 | |
180 | /* split all remapped THPs */ |
181 | write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, |
182 | (uint64_t)pte_mapped + pagesize * 4, 0); |
183 | |
184 | /* smap does not show THPs after mremap, use kpageflags instead */ |
185 | thp_size = 0; |
186 | for (i = 0; i < pagesize * 4; i++) { |
187 | if (pte_mapped[i] != (char)i) |
188 | ksft_exit_fail_msg("%ld byte corrupted\n" , i); |
189 | |
190 | if (i % pagesize == 0 && |
191 | is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) |
192 | thp_size++; |
193 | } |
194 | |
195 | if (thp_size) |
196 | ksft_exit_fail_msg("Still %ld THPs not split\n" , thp_size); |
197 | |
198 | ksft_test_result_pass(msg: "Split PTE-mapped huge pages successful\n" ); |
199 | munmap(one_page, len); |
200 | close(pagemap_fd); |
201 | close(kpageflags_fd); |
202 | } |
203 | |
204 | void split_file_backed_thp(void) |
205 | { |
206 | int status; |
207 | int fd; |
208 | ssize_t num_written; |
209 | char tmpfs_template[] = "/tmp/thp_split_XXXXXX" ; |
210 | const char *tmpfs_loc = mkdtemp(tmpfs_template); |
211 | char testfile[INPUT_MAX]; |
212 | uint64_t pgoff_start = 0, pgoff_end = 1024; |
213 | |
214 | ksft_print_msg(msg: "Please enable pr_debug in split_huge_pages_in_file() for more info.\n" ); |
215 | |
216 | status = mount("tmpfs" , tmpfs_loc, "tmpfs" , 0, "huge=always,size=4m" ); |
217 | |
218 | if (status) |
219 | ksft_exit_fail_msg(msg: "Unable to create a tmpfs for testing\n" ); |
220 | |
221 | status = snprintf(testfile, INPUT_MAX, "%s/thp_file" , tmpfs_loc); |
222 | if (status >= INPUT_MAX) { |
223 | ksft_exit_fail_msg(msg: "Fail to create file-backed THP split testing file\n" ); |
224 | } |
225 | |
226 | fd = open(testfile, O_CREAT|O_WRONLY, 0664); |
227 | if (fd == -1) { |
228 | ksft_perror(msg: "Cannot open testing file" ); |
229 | goto cleanup; |
230 | } |
231 | |
232 | /* write something to the file, so a file-backed THP can be allocated */ |
233 | num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); |
234 | close(fd); |
235 | |
236 | if (num_written < 1) { |
237 | ksft_perror(msg: "Fail to write data to testing file" ); |
238 | goto cleanup; |
239 | } |
240 | |
241 | /* split the file-backed THP */ |
242 | write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0); |
243 | |
244 | status = unlink(testfile); |
245 | if (status) { |
246 | ksft_perror(msg: "Cannot remove testing file" ); |
247 | goto cleanup; |
248 | } |
249 | |
250 | status = umount(tmpfs_loc); |
251 | if (status) { |
252 | rmdir(tmpfs_loc); |
253 | ksft_exit_fail_msg(msg: "Unable to umount %s\n" , tmpfs_loc); |
254 | } |
255 | |
256 | status = rmdir(tmpfs_loc); |
257 | if (status) |
258 | ksft_exit_fail_msg("cannot remove tmp dir: %s\n" , strerror(errno)); |
259 | |
260 | ksft_print_msg(msg: "Please check dmesg for more information\n" ); |
261 | ksft_test_result_pass(msg: "File-backed THP split test done\n" ); |
262 | return; |
263 | |
264 | cleanup: |
265 | umount(tmpfs_loc); |
266 | rmdir(tmpfs_loc); |
267 | ksft_exit_fail_msg(msg: "Error occurred\n" ); |
268 | } |
269 | |
270 | bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, |
271 | const char **thp_fs_loc) |
272 | { |
273 | if (xfs_path) { |
274 | *thp_fs_loc = xfs_path; |
275 | return false; |
276 | } |
277 | |
278 | *thp_fs_loc = mkdtemp(thp_fs_template); |
279 | |
280 | if (!*thp_fs_loc) |
281 | ksft_exit_fail_msg(msg: "cannot create temp folder\n" ); |
282 | |
283 | return true; |
284 | } |
285 | |
286 | void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) |
287 | { |
288 | int status; |
289 | |
290 | if (!created_tmp) |
291 | return; |
292 | |
293 | status = rmdir(thp_fs_loc); |
294 | if (status) |
295 | ksft_exit_fail_msg("cannot remove tmp dir: %s\n" , |
296 | strerror(errno)); |
297 | } |
298 | |
299 | int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, |
300 | char **addr) |
301 | { |
302 | size_t i; |
303 | int dummy; |
304 | |
305 | srand(time(NULL)); |
306 | |
307 | *fd = open(testfile, O_CREAT | O_RDWR, 0664); |
308 | if (*fd == -1) |
309 | ksft_exit_fail_msg(msg: "Failed to create a file at %s\n" , testfile); |
310 | |
311 | for (i = 0; i < fd_size; i++) { |
312 | unsigned char byte = (unsigned char)i; |
313 | |
314 | write(*fd, &byte, sizeof(byte)); |
315 | } |
316 | close(*fd); |
317 | sync(); |
318 | *fd = open("/proc/sys/vm/drop_caches" , O_WRONLY); |
319 | if (*fd == -1) { |
320 | ksft_perror(msg: "open drop_caches" ); |
321 | goto err_out_unlink; |
322 | } |
323 | if (write(*fd, "3" , 1) != 1) { |
324 | ksft_perror(msg: "write to drop_caches" ); |
325 | goto err_out_unlink; |
326 | } |
327 | close(*fd); |
328 | |
329 | *fd = open(testfile, O_RDWR); |
330 | if (*fd == -1) { |
331 | ksft_perror(msg: "Failed to open testfile\n" ); |
332 | goto err_out_unlink; |
333 | } |
334 | |
335 | *addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0); |
336 | if (*addr == (char *)-1) { |
337 | ksft_perror(msg: "cannot mmap" ); |
338 | goto err_out_close; |
339 | } |
340 | madvise(*addr, fd_size, MADV_HUGEPAGE); |
341 | |
342 | for (size_t i = 0; i < fd_size; i++) |
343 | dummy += *(*addr + i); |
344 | |
345 | if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { |
346 | ksft_print_msg(msg: "No large pagecache folio generated, please provide a filesystem supporting large folio\n" ); |
347 | munmap(*addr, fd_size); |
348 | close(*fd); |
349 | unlink(testfile); |
350 | ksft_test_result_skip(msg: "Pagecache folio split skipped\n" ); |
351 | return -2; |
352 | } |
353 | return 0; |
354 | err_out_close: |
355 | close(*fd); |
356 | err_out_unlink: |
357 | unlink(testfile); |
358 | ksft_exit_fail_msg(msg: "Failed to create large pagecache folios\n" ); |
359 | return -1; |
360 | } |
361 | |
362 | void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc) |
363 | { |
364 | int fd; |
365 | char *addr; |
366 | size_t i; |
367 | char testfile[INPUT_MAX]; |
368 | int err = 0; |
369 | |
370 | err = snprintf(testfile, INPUT_MAX, "%s/test" , fs_loc); |
371 | |
372 | if (err < 0) |
373 | ksft_exit_fail_msg(msg: "cannot generate right test file name\n" ); |
374 | |
375 | err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr); |
376 | if (err) |
377 | return; |
378 | err = 0; |
379 | |
380 | write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); |
381 | |
382 | for (i = 0; i < fd_size; i++) |
383 | if (*(addr + i) != (char)i) { |
384 | ksft_print_msg("%lu byte corrupted in the file\n" , i); |
385 | err = EXIT_FAILURE; |
386 | goto out; |
387 | } |
388 | |
389 | if (!check_huge_file(addr, 0, pmd_pagesize)) { |
390 | ksft_print_msg(msg: "Still FilePmdMapped not split\n" ); |
391 | err = EXIT_FAILURE; |
392 | goto out; |
393 | } |
394 | |
395 | out: |
396 | munmap(addr, fd_size); |
397 | close(fd); |
398 | unlink(testfile); |
399 | if (err) |
400 | ksft_exit_fail_msg(msg: "Split PMD-mapped pagecache folio to order %d failed\n" , order); |
401 | ksft_test_result_pass(msg: "Split PMD-mapped pagecache folio to order %d passed\n" , order); |
402 | } |
403 | |
404 | int main(int argc, char **argv) |
405 | { |
406 | int i; |
407 | size_t fd_size; |
408 | char *optional_xfs_path = NULL; |
409 | char fs_loc_template[] = "/tmp/thp_fs_XXXXXX" ; |
410 | const char *fs_loc; |
411 | bool created_tmp; |
412 | |
413 | ksft_print_header(); |
414 | |
415 | if (geteuid() != 0) { |
416 | ksft_print_msg(msg: "Please run the benchmark as root\n" ); |
417 | ksft_finished(); |
418 | } |
419 | |
420 | if (argc > 1) |
421 | optional_xfs_path = argv[1]; |
422 | |
423 | ksft_set_plan(plan: 3+9); |
424 | |
425 | pagesize = getpagesize(); |
426 | pageshift = ffs(pagesize) - 1; |
427 | pmd_pagesize = read_pmd_pagesize(); |
428 | if (!pmd_pagesize) |
429 | ksft_exit_fail_msg(msg: "Reading PMD pagesize failed\n" ); |
430 | |
431 | fd_size = 2 * pmd_pagesize; |
432 | |
433 | split_pmd_thp(); |
434 | split_pte_mapped_thp(); |
435 | split_file_backed_thp(); |
436 | |
437 | created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, |
438 | &fs_loc); |
439 | for (i = 8; i >= 0; i--) |
440 | split_thp_in_pagecache_to_order(fd_size, i, fs_loc); |
441 | cleanup_thp_fs(fs_loc, created_tmp); |
442 | |
443 | ksft_finished(); |
444 | |
445 | return 0; |
446 | } |
447 | |