1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <string.h> |
3 | #include <fcntl.h> |
4 | #include <dirent.h> |
5 | #include <sys/ioctl.h> |
6 | #include <linux/userfaultfd.h> |
7 | #include <linux/fs.h> |
8 | #include <sys/syscall.h> |
9 | #include <unistd.h> |
10 | #include "../kselftest.h" |
11 | #include "vm_util.h" |
12 | |
13 | #define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" |
14 | #define SMAP_FILE_PATH "/proc/self/smaps" |
15 | #define MAX_LINE_LENGTH 500 |
16 | |
17 | unsigned int __page_size; |
18 | unsigned int __page_shift; |
19 | |
20 | uint64_t pagemap_get_entry(int fd, char *start) |
21 | { |
22 | const unsigned long pfn = (unsigned long)start / getpagesize(); |
23 | uint64_t entry; |
24 | int ret; |
25 | |
26 | ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry)); |
27 | if (ret != sizeof(entry)) |
28 | ksft_exit_fail_msg(msg: "reading pagemap failed\n" ); |
29 | return entry; |
30 | } |
31 | |
32 | static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r) |
33 | { |
34 | struct pm_scan_arg arg; |
35 | |
36 | arg.start = (uintptr_t)start; |
37 | arg.end = (uintptr_t)(start + psize()); |
38 | arg.vec = (uintptr_t)r; |
39 | arg.vec_len = 1; |
40 | arg.flags = 0; |
41 | arg.size = sizeof(struct pm_scan_arg); |
42 | arg.max_pages = 0; |
43 | arg.category_inverted = 0; |
44 | arg.category_mask = 0; |
45 | arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE | |
46 | PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | |
47 | PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY; |
48 | arg.return_mask = arg.category_anyof_mask; |
49 | |
50 | return ioctl(fd, PAGEMAP_SCAN, &arg); |
51 | } |
52 | |
53 | static uint64_t pagemap_scan_get_categories(int fd, char *start) |
54 | { |
55 | struct page_region r; |
56 | long ret; |
57 | |
58 | ret = __pagemap_scan_get_categories(fd, start, r: &r); |
59 | if (ret < 0) |
60 | ksft_exit_fail_msg(msg: "PAGEMAP_SCAN failed: %s\n" , strerror(errno)); |
61 | if (ret == 0) |
62 | return 0; |
63 | return r.categories; |
64 | } |
65 | |
66 | /* `start` is any valid address. */ |
67 | static bool pagemap_scan_supported(int fd, char *start) |
68 | { |
69 | static int supported = -1; |
70 | int ret; |
71 | |
72 | if (supported != -1) |
73 | return supported; |
74 | |
75 | /* Provide an invalid address in order to trigger EFAULT. */ |
76 | ret = __pagemap_scan_get_categories(fd, start, r: (struct page_region *) ~0UL); |
77 | if (ret == 0) |
78 | ksft_exit_fail_msg(msg: "PAGEMAP_SCAN succeeded unexpectedly\n" ); |
79 | |
80 | supported = errno == EFAULT; |
81 | |
82 | return supported; |
83 | } |
84 | |
85 | static bool page_entry_is(int fd, char *start, char *desc, |
86 | uint64_t pagemap_flags, uint64_t pagescan_flags) |
87 | { |
88 | bool m = pagemap_get_entry(fd, start) & pagemap_flags; |
89 | |
90 | if (pagemap_scan_supported(fd, start)) { |
91 | bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags; |
92 | |
93 | if (m == s) |
94 | return m; |
95 | |
96 | ksft_exit_fail_msg( |
97 | msg: "read and ioctl return unmatched results for %s: %d %d" , desc, m, s); |
98 | } |
99 | return m; |
100 | } |
101 | |
102 | bool pagemap_is_softdirty(int fd, char *start) |
103 | { |
104 | return page_entry_is(fd, start, desc: "soft-dirty" , |
105 | PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY); |
106 | } |
107 | |
108 | bool pagemap_is_swapped(int fd, char *start) |
109 | { |
110 | return page_entry_is(fd, start, desc: "swap" , PM_SWAP, PAGE_IS_SWAPPED); |
111 | } |
112 | |
113 | bool pagemap_is_populated(int fd, char *start) |
114 | { |
115 | return page_entry_is(fd, start, desc: "populated" , |
116 | PM_PRESENT | PM_SWAP, |
117 | PAGE_IS_PRESENT | PAGE_IS_SWAPPED); |
118 | } |
119 | |
120 | unsigned long pagemap_get_pfn(int fd, char *start) |
121 | { |
122 | uint64_t entry = pagemap_get_entry(fd, start); |
123 | |
124 | /* If present (63th bit), PFN is at bit 0 -- 54. */ |
125 | if (entry & PM_PRESENT) |
126 | return entry & 0x007fffffffffffffull; |
127 | return -1ul; |
128 | } |
129 | |
130 | void clear_softdirty(void) |
131 | { |
132 | int ret; |
133 | const char *ctrl = "4" ; |
134 | int fd = open("/proc/self/clear_refs" , O_WRONLY); |
135 | |
136 | if (fd < 0) |
137 | ksft_exit_fail_msg(msg: "opening clear_refs failed\n" ); |
138 | ret = write(fd, ctrl, strlen(ctrl)); |
139 | close(fd); |
140 | if (ret != strlen(ctrl)) |
141 | ksft_exit_fail_msg(msg: "writing clear_refs failed\n" ); |
142 | } |
143 | |
144 | bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len) |
145 | { |
146 | while (fgets(buf, len, fp)) { |
147 | if (!strncmp(buf, pattern, strlen(pattern))) |
148 | return true; |
149 | } |
150 | return false; |
151 | } |
152 | |
153 | uint64_t read_pmd_pagesize(void) |
154 | { |
155 | int fd; |
156 | char buf[20]; |
157 | ssize_t num_read; |
158 | |
159 | fd = open(PMD_SIZE_FILE_PATH, O_RDONLY); |
160 | if (fd == -1) |
161 | return 0; |
162 | |
163 | num_read = read(fd, buf, 19); |
164 | if (num_read < 1) { |
165 | close(fd); |
166 | return 0; |
167 | } |
168 | buf[num_read] = '\0'; |
169 | close(fd); |
170 | |
171 | return strtoul(buf, NULL, 10); |
172 | } |
173 | |
174 | bool __check_huge(void *addr, char *pattern, int nr_hpages, |
175 | uint64_t hpage_size) |
176 | { |
177 | uint64_t thp = -1; |
178 | int ret; |
179 | FILE *fp; |
180 | char buffer[MAX_LINE_LENGTH]; |
181 | char addr_pattern[MAX_LINE_LENGTH]; |
182 | |
183 | ret = snprintf(buf: addr_pattern, MAX_LINE_LENGTH, fmt: "%08lx-" , |
184 | (unsigned long) addr); |
185 | if (ret >= MAX_LINE_LENGTH) |
186 | ksft_exit_fail_msg(msg: "%s: Pattern is too long\n" , __func__); |
187 | |
188 | fp = fopen(SMAP_FILE_PATH, "r" ); |
189 | if (!fp) |
190 | ksft_exit_fail_msg(msg: "%s: Failed to open file %s\n" , __func__, SMAP_FILE_PATH); |
191 | |
192 | if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer))) |
193 | goto err_out; |
194 | |
195 | /* |
196 | * Fetch the pattern in the same block and check the number of |
197 | * hugepages. |
198 | */ |
199 | if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer))) |
200 | goto err_out; |
201 | |
202 | snprintf(buf: addr_pattern, MAX_LINE_LENGTH, fmt: "%s%%9ld kB" , pattern); |
203 | |
204 | if (sscanf(buffer, addr_pattern, &thp) != 1) |
205 | ksft_exit_fail_msg(msg: "Reading smap error\n" ); |
206 | |
207 | err_out: |
208 | fclose(fp); |
209 | return thp == (nr_hpages * (hpage_size >> 10)); |
210 | } |
211 | |
212 | bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size) |
213 | { |
214 | return __check_huge(addr, pattern: "AnonHugePages: " , nr_hpages, hpage_size); |
215 | } |
216 | |
217 | bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size) |
218 | { |
219 | return __check_huge(addr, pattern: "FilePmdMapped:" , nr_hpages, hpage_size); |
220 | } |
221 | |
222 | bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size) |
223 | { |
224 | return __check_huge(addr, pattern: "ShmemPmdMapped:" , nr_hpages, hpage_size); |
225 | } |
226 | |
227 | int64_t allocate_transhuge(void *ptr, int pagemap_fd) |
228 | { |
229 | uint64_t ent[2]; |
230 | |
231 | /* drop pmd */ |
232 | if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE, |
233 | MAP_FIXED | MAP_ANONYMOUS | |
234 | MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr) |
235 | ksft_exit_fail_msg(msg: "mmap transhuge\n" ); |
236 | |
237 | if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE)) |
238 | ksft_exit_fail_msg(msg: "MADV_HUGEPAGE\n" ); |
239 | |
240 | /* allocate transparent huge page */ |
241 | *(volatile void **)ptr = ptr; |
242 | |
243 | if (pread(pagemap_fd, ent, sizeof(ent), |
244 | (uintptr_t)ptr >> (pshift() - 3)) != sizeof(ent)) |
245 | ksft_exit_fail_msg(msg: "read pagemap\n" ); |
246 | |
247 | if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) && |
248 | PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) && |
249 | !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - pshift())) - 1))) |
250 | return PAGEMAP_PFN(ent[0]); |
251 | |
252 | return -1; |
253 | } |
254 | |
255 | unsigned long default_huge_page_size(void) |
256 | { |
257 | unsigned long hps = 0; |
258 | char *line = NULL; |
259 | size_t linelen = 0; |
260 | FILE *f = fopen("/proc/meminfo" , "r" ); |
261 | |
262 | if (!f) |
263 | return 0; |
264 | while (getline(&line, &linelen, f) > 0) { |
265 | if (sscanf(line, "Hugepagesize: %lu kB" , &hps) == 1) { |
266 | hps <<= 10; |
267 | break; |
268 | } |
269 | } |
270 | |
271 | free(line); |
272 | fclose(f); |
273 | return hps; |
274 | } |
275 | |
276 | int detect_hugetlb_page_sizes(size_t sizes[], int max) |
277 | { |
278 | DIR *dir = opendir("/sys/kernel/mm/hugepages/" ); |
279 | int count = 0; |
280 | |
281 | if (!dir) |
282 | return 0; |
283 | |
284 | while (count < max) { |
285 | struct dirent *entry = readdir(dir); |
286 | size_t kb; |
287 | |
288 | if (!entry) |
289 | break; |
290 | if (entry->d_type != DT_DIR) |
291 | continue; |
292 | if (sscanf(entry->d_name, "hugepages-%zukB" , &kb) != 1) |
293 | continue; |
294 | sizes[count++] = kb * 1024; |
295 | ksft_print_msg(msg: "[INFO] detected hugetlb page size: %zu KiB\n" , |
296 | kb); |
297 | } |
298 | closedir(dir); |
299 | return count; |
300 | } |
301 | |
302 | /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */ |
303 | int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, |
304 | bool miss, bool wp, bool minor, uint64_t *ioctls) |
305 | { |
306 | struct uffdio_register uffdio_register = { 0 }; |
307 | uint64_t mode = 0; |
308 | int ret = 0; |
309 | |
310 | if (miss) |
311 | mode |= UFFDIO_REGISTER_MODE_MISSING; |
312 | if (wp) |
313 | mode |= UFFDIO_REGISTER_MODE_WP; |
314 | if (minor) |
315 | mode |= UFFDIO_REGISTER_MODE_MINOR; |
316 | |
317 | uffdio_register.range.start = (unsigned long)addr; |
318 | uffdio_register.range.len = len; |
319 | uffdio_register.mode = mode; |
320 | |
321 | if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) |
322 | ret = -errno; |
323 | else if (ioctls) |
324 | *ioctls = uffdio_register.ioctls; |
325 | |
326 | return ret; |
327 | } |
328 | |
329 | int uffd_register(int uffd, void *addr, uint64_t len, |
330 | bool miss, bool wp, bool minor) |
331 | { |
332 | return uffd_register_with_ioctls(uffd, addr, len, |
333 | miss, wp, minor, NULL); |
334 | } |
335 | |
336 | int uffd_unregister(int uffd, void *addr, uint64_t len) |
337 | { |
338 | struct uffdio_range range = { .start = (uintptr_t)addr, .len = len }; |
339 | int ret = 0; |
340 | |
341 | if (ioctl(uffd, UFFDIO_UNREGISTER, &range) == -1) |
342 | ret = -errno; |
343 | |
344 | return ret; |
345 | } |
346 | |
347 | unsigned long get_free_hugepages(void) |
348 | { |
349 | unsigned long fhp = 0; |
350 | char *line = NULL; |
351 | size_t linelen = 0; |
352 | FILE *f = fopen("/proc/meminfo" , "r" ); |
353 | |
354 | if (!f) |
355 | return fhp; |
356 | while (getline(&line, &linelen, f) > 0) { |
357 | if (sscanf(line, "HugePages_Free: %lu" , &fhp) == 1) |
358 | break; |
359 | } |
360 | |
361 | free(line); |
362 | fclose(f); |
363 | return fhp; |
364 | } |
365 | |