1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst) |
4 | * |
5 | * There are examples in here of: |
6 | * * how to set protection keys on memory |
7 | * * how to set/clear bits in pkey registers (the rights register) |
8 | * * how to handle SEGV_PKUERR signals and extract pkey-relevant |
9 | * information from the siginfo |
10 | * |
11 | * Things to add: |
12 | * make sure KSM and KSM COW breaking works |
13 | * prefault pages in at malloc, or not |
14 | * protect MPX bounds tables with protection keys? |
15 | * make sure VMA splitting/merging is working correctly |
16 | * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys |
17 | * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel |
18 | * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks |
19 | * |
20 | * Compile like this: |
21 | * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm |
22 | * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm |
23 | */ |
24 | #define _GNU_SOURCE |
25 | #define __SANE_USERSPACE_TYPES__ |
26 | #include <errno.h> |
27 | #include <linux/elf.h> |
28 | #include <linux/futex.h> |
29 | #include <time.h> |
30 | #include <sys/time.h> |
31 | #include <sys/syscall.h> |
32 | #include <string.h> |
33 | #include <stdio.h> |
34 | #include <stdint.h> |
35 | #include <stdbool.h> |
36 | #include <signal.h> |
37 | #include <assert.h> |
38 | #include <stdlib.h> |
39 | #include <ucontext.h> |
40 | #include <sys/mman.h> |
41 | #include <sys/types.h> |
42 | #include <sys/wait.h> |
43 | #include <sys/stat.h> |
44 | #include <fcntl.h> |
45 | #include <unistd.h> |
46 | #include <sys/ptrace.h> |
47 | #include <setjmp.h> |
48 | |
49 | #include "pkey-helpers.h" |
50 | |
51 | int iteration_nr = 1; |
52 | int test_nr; |
53 | |
54 | u64 shadow_pkey_reg; |
55 | int dprint_in_signal; |
56 | char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; |
57 | char buf[256]; |
58 | |
59 | void cat_into_file(char *str, char *file) |
60 | { |
61 | int fd = open(file, O_RDWR); |
62 | int ret; |
63 | |
64 | dprintf2("%s(): writing '%s' to '%s'\n" , __func__, str, file); |
65 | /* |
66 | * these need to be raw because they are called under |
67 | * pkey_assert() |
68 | */ |
69 | if (fd < 0) { |
70 | fprintf(stderr, "error opening '%s'\n" , str); |
71 | perror("error: " ); |
72 | exit(__LINE__); |
73 | } |
74 | |
75 | ret = write(fd, str, strlen(str)); |
76 | if (ret != strlen(str)) { |
77 | perror("write to file failed" ); |
78 | fprintf(stderr, "filename: '%s' str: '%s'\n" , file, str); |
79 | exit(__LINE__); |
80 | } |
81 | close(fd); |
82 | } |
83 | |
84 | #if CONTROL_TRACING > 0 |
85 | static int warned_tracing; |
86 | int tracing_root_ok(void) |
87 | { |
88 | if (geteuid() != 0) { |
89 | if (!warned_tracing) |
90 | fprintf(stderr, "WARNING: not run as root, " |
91 | "can not do tracing control\n" ); |
92 | warned_tracing = 1; |
93 | return 0; |
94 | } |
95 | return 1; |
96 | } |
97 | #endif |
98 | |
99 | void tracing_on(void) |
100 | { |
101 | #if CONTROL_TRACING > 0 |
102 | #define TRACEDIR "/sys/kernel/tracing" |
103 | char pidstr[32]; |
104 | |
105 | if (!tracing_root_ok()) |
106 | return; |
107 | |
108 | sprintf(pidstr, "%d" , getpid()); |
109 | cat_into_file("0" , TRACEDIR "/tracing_on" ); |
110 | cat_into_file("\n" , TRACEDIR "/trace" ); |
111 | if (1) { |
112 | cat_into_file("function_graph" , TRACEDIR "/current_tracer" ); |
113 | cat_into_file("1" , TRACEDIR "/options/funcgraph-proc" ); |
114 | } else { |
115 | cat_into_file("nop" , TRACEDIR "/current_tracer" ); |
116 | } |
117 | cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid" ); |
118 | cat_into_file("1" , TRACEDIR "/tracing_on" ); |
119 | dprintf1("enabled tracing\n" ); |
120 | #endif |
121 | } |
122 | |
123 | void tracing_off(void) |
124 | { |
125 | #if CONTROL_TRACING > 0 |
126 | if (!tracing_root_ok()) |
127 | return; |
128 | cat_into_file("0" , "/sys/kernel/tracing/tracing_on" ); |
129 | #endif |
130 | } |
131 | |
132 | void abort_hooks(void) |
133 | { |
134 | fprintf(stderr, "running %s()...\n" , __func__); |
135 | tracing_off(); |
136 | #ifdef SLEEP_ON_ABORT |
137 | sleep(SLEEP_ON_ABORT); |
138 | #endif |
139 | } |
140 | |
141 | /* |
142 | * This attempts to have roughly a page of instructions followed by a few |
143 | * instructions that do a write, and another page of instructions. That |
144 | * way, we are pretty sure that the write is in the second page of |
145 | * instructions and has at least a page of padding behind it. |
146 | * |
147 | * *That* lets us be sure to madvise() away the write instruction, which |
148 | * will then fault, which makes sure that the fault code handles |
149 | * execute-only memory properly. |
150 | */ |
151 | #ifdef __powerpc64__ |
152 | /* This way, both 4K and 64K alignment are maintained */ |
153 | __attribute__((__aligned__(65536))) |
154 | #else |
155 | __attribute__((__aligned__(PAGE_SIZE))) |
156 | #endif |
157 | void lots_o_noops_around_write(int *write_to_me) |
158 | { |
159 | dprintf3("running %s()\n" , __func__); |
160 | __page_o_noops(); |
161 | /* Assume this happens in the second page of instructions: */ |
162 | *write_to_me = __LINE__; |
163 | /* pad out by another page: */ |
164 | __page_o_noops(); |
165 | dprintf3("%s() done\n" , __func__); |
166 | } |
167 | |
168 | void dump_mem(void *dumpme, int len_bytes) |
169 | { |
170 | char *c = (void *)dumpme; |
171 | int i; |
172 | |
173 | for (i = 0; i < len_bytes; i += sizeof(u64)) { |
174 | u64 *ptr = (u64 *)(c + i); |
175 | dprintf1("dump[%03d][@%p]: %016llx\n" , i, ptr, *ptr); |
176 | } |
177 | } |
178 | |
179 | static u32 hw_pkey_get(int pkey, unsigned long flags) |
180 | { |
181 | u64 pkey_reg = __read_pkey_reg(); |
182 | |
183 | dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n" , |
184 | __func__, pkey, flags, 0, 0); |
185 | dprintf2("%s() raw pkey_reg: %016llx\n" , __func__, pkey_reg); |
186 | |
187 | return (u32) get_pkey_bits(reg: pkey_reg, pkey); |
188 | } |
189 | |
190 | static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) |
191 | { |
192 | u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); |
193 | u64 old_pkey_reg = __read_pkey_reg(); |
194 | u64 new_pkey_reg; |
195 | |
196 | /* make sure that 'rights' only contains the bits we expect: */ |
197 | assert(!(rights & ~mask)); |
198 | |
199 | /* modify bits accordingly in old pkey_reg and assign it */ |
200 | new_pkey_reg = set_pkey_bits(reg: old_pkey_reg, pkey, flags: rights); |
201 | |
202 | __write_pkey_reg(pkey_reg: new_pkey_reg); |
203 | |
204 | dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x" |
205 | " pkey_reg now: %016llx old_pkey_reg: %016llx\n" , |
206 | __func__, pkey, rights, flags, 0, __read_pkey_reg(), |
207 | old_pkey_reg); |
208 | return 0; |
209 | } |
210 | |
211 | void pkey_disable_set(int pkey, int flags) |
212 | { |
213 | unsigned long syscall_flags = 0; |
214 | int ret; |
215 | int pkey_rights; |
216 | u64 orig_pkey_reg = read_pkey_reg(); |
217 | |
218 | dprintf1("START->%s(%d, 0x%x)\n" , __func__, |
219 | pkey, flags); |
220 | pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); |
221 | |
222 | pkey_rights = hw_pkey_get(pkey, flags: syscall_flags); |
223 | |
224 | dprintf1("%s(%d) hw_pkey_get(%d): %x\n" , __func__, |
225 | pkey, pkey, pkey_rights); |
226 | |
227 | pkey_assert(pkey_rights >= 0); |
228 | |
229 | pkey_rights |= flags; |
230 | |
231 | ret = hw_pkey_set(pkey, rights: pkey_rights, flags: syscall_flags); |
232 | assert(!ret); |
233 | /* pkey_reg and flags have the same format */ |
234 | shadow_pkey_reg = set_pkey_bits(reg: shadow_pkey_reg, pkey, flags: pkey_rights); |
235 | dprintf1("%s(%d) shadow: 0x%016llx\n" , |
236 | __func__, pkey, shadow_pkey_reg); |
237 | |
238 | pkey_assert(ret >= 0); |
239 | |
240 | pkey_rights = hw_pkey_get(pkey, flags: syscall_flags); |
241 | dprintf1("%s(%d) hw_pkey_get(%d): %x\n" , __func__, |
242 | pkey, pkey, pkey_rights); |
243 | |
244 | dprintf1("%s(%d) pkey_reg: 0x%016llx\n" , |
245 | __func__, pkey, read_pkey_reg()); |
246 | if (flags) |
247 | pkey_assert(read_pkey_reg() >= orig_pkey_reg); |
248 | dprintf1("END<---%s(%d, 0x%x)\n" , __func__, |
249 | pkey, flags); |
250 | } |
251 | |
252 | void pkey_disable_clear(int pkey, int flags) |
253 | { |
254 | unsigned long syscall_flags = 0; |
255 | int ret; |
256 | int pkey_rights = hw_pkey_get(pkey, flags: syscall_flags); |
257 | u64 orig_pkey_reg = read_pkey_reg(); |
258 | |
259 | pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); |
260 | |
261 | dprintf1("%s(%d) hw_pkey_get(%d): %x\n" , __func__, |
262 | pkey, pkey, pkey_rights); |
263 | pkey_assert(pkey_rights >= 0); |
264 | |
265 | pkey_rights &= ~flags; |
266 | |
267 | ret = hw_pkey_set(pkey, rights: pkey_rights, flags: 0); |
268 | shadow_pkey_reg = set_pkey_bits(reg: shadow_pkey_reg, pkey, flags: pkey_rights); |
269 | pkey_assert(ret >= 0); |
270 | |
271 | pkey_rights = hw_pkey_get(pkey, flags: syscall_flags); |
272 | dprintf1("%s(%d) hw_pkey_get(%d): %x\n" , __func__, |
273 | pkey, pkey, pkey_rights); |
274 | |
275 | dprintf1("%s(%d) pkey_reg: 0x%016llx\n" , __func__, |
276 | pkey, read_pkey_reg()); |
277 | if (flags) |
278 | assert(read_pkey_reg() <= orig_pkey_reg); |
279 | } |
280 | |
281 | void pkey_write_allow(int pkey) |
282 | { |
283 | pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); |
284 | } |
285 | void pkey_write_deny(int pkey) |
286 | { |
287 | pkey_disable_set(pkey, PKEY_DISABLE_WRITE); |
288 | } |
289 | void pkey_access_allow(int pkey) |
290 | { |
291 | pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); |
292 | } |
293 | void pkey_access_deny(int pkey) |
294 | { |
295 | pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); |
296 | } |
297 | |
298 | static char *si_code_str(int si_code) |
299 | { |
300 | if (si_code == SEGV_MAPERR) |
301 | return "SEGV_MAPERR" ; |
302 | if (si_code == SEGV_ACCERR) |
303 | return "SEGV_ACCERR" ; |
304 | if (si_code == SEGV_BNDERR) |
305 | return "SEGV_BNDERR" ; |
306 | if (si_code == SEGV_PKUERR) |
307 | return "SEGV_PKUERR" ; |
308 | return "UNKNOWN" ; |
309 | } |
310 | |
311 | int pkey_faults; |
312 | int last_si_pkey = -1; |
313 | void signal_handler(int signum, siginfo_t *si, void *vucontext) |
314 | { |
315 | ucontext_t *uctxt = vucontext; |
316 | int trapno; |
317 | unsigned long ip; |
318 | char *fpregs; |
319 | #if defined(__i386__) || defined(__x86_64__) /* arch */ |
320 | u32 *pkey_reg_ptr; |
321 | int pkey_reg_offset; |
322 | #endif /* arch */ |
323 | u64 siginfo_pkey; |
324 | u32 *si_pkey_ptr; |
325 | |
326 | dprint_in_signal = 1; |
327 | dprintf1(">>>>===============SIGSEGV============================\n" ); |
328 | dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n" , |
329 | __func__, __LINE__, |
330 | __read_pkey_reg(), shadow_pkey_reg); |
331 | |
332 | trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; |
333 | ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; |
334 | fpregs = (char *) uctxt->uc_mcontext.fpregs; |
335 | |
336 | dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n" , |
337 | __func__, trapno, ip, si_code_str(si->si_code), |
338 | si->si_code); |
339 | |
340 | #if defined(__i386__) || defined(__x86_64__) /* arch */ |
341 | #ifdef __i386__ |
342 | /* |
343 | * 32-bit has some extra padding so that userspace can tell whether |
344 | * the XSTATE header is present in addition to the "legacy" FPU |
345 | * state. We just assume that it is here. |
346 | */ |
347 | fpregs += 0x70; |
348 | #endif /* i386 */ |
349 | pkey_reg_offset = pkey_reg_xstate_offset(); |
350 | pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]); |
351 | |
352 | /* |
353 | * If we got a PKEY fault, we *HAVE* to have at least one bit set in |
354 | * here. |
355 | */ |
356 | dprintf1("pkey_reg_xstate_offset: %d\n" , pkey_reg_xstate_offset()); |
357 | if (DEBUG_LEVEL > 4) |
358 | dump_mem(dumpme: pkey_reg_ptr - 128, len_bytes: 256); |
359 | pkey_assert(*pkey_reg_ptr); |
360 | #endif /* arch */ |
361 | |
362 | dprintf1("siginfo: %p\n" , si); |
363 | dprintf1(" fpregs: %p\n" , fpregs); |
364 | |
365 | if ((si->si_code == SEGV_MAPERR) || |
366 | (si->si_code == SEGV_ACCERR) || |
367 | (si->si_code == SEGV_BNDERR)) { |
368 | printf("non-PK si_code, exiting...\n" ); |
369 | exit(4); |
370 | } |
371 | |
372 | si_pkey_ptr = siginfo_get_pkey_ptr(si); |
373 | dprintf1("si_pkey_ptr: %p\n" , si_pkey_ptr); |
374 | dump_mem(dumpme: (u8 *)si_pkey_ptr - 8, len_bytes: 24); |
375 | siginfo_pkey = *si_pkey_ptr; |
376 | pkey_assert(siginfo_pkey < NR_PKEYS); |
377 | last_si_pkey = siginfo_pkey; |
378 | |
379 | /* |
380 | * need __read_pkey_reg() version so we do not do shadow_pkey_reg |
381 | * checking |
382 | */ |
383 | dprintf1("signal pkey_reg from pkey_reg: %016llx\n" , |
384 | __read_pkey_reg()); |
385 | dprintf1("pkey from siginfo: %016llx\n" , siginfo_pkey); |
386 | #if defined(__i386__) || defined(__x86_64__) /* arch */ |
387 | dprintf1("signal pkey_reg from xsave: %08x\n" , *pkey_reg_ptr); |
388 | *(u64 *)pkey_reg_ptr = 0x00000000; |
389 | dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction to continue\n" ); |
390 | #elif defined(__powerpc64__) /* arch */ |
391 | /* restore access and let the faulting instruction continue */ |
392 | pkey_access_allow(siginfo_pkey); |
393 | #endif /* arch */ |
394 | pkey_faults++; |
395 | dprintf1("<<<<==================================================\n" ); |
396 | dprint_in_signal = 0; |
397 | } |
398 | |
399 | int wait_all_children(void) |
400 | { |
401 | int status; |
402 | return waitpid(-1, &status, 0); |
403 | } |
404 | |
405 | void sig_chld(int x) |
406 | { |
407 | dprint_in_signal = 1; |
408 | dprintf2("[%d] SIGCHLD: %d\n" , getpid(), x); |
409 | dprint_in_signal = 0; |
410 | } |
411 | |
412 | void setup_sigsegv_handler(void) |
413 | { |
414 | int r, rs; |
415 | struct sigaction newact; |
416 | struct sigaction oldact; |
417 | |
418 | /* #PF is mapped to sigsegv */ |
419 | int signum = SIGSEGV; |
420 | |
421 | newact.sa_handler = 0; |
422 | newact.sa_sigaction = signal_handler; |
423 | |
424 | /*sigset_t - signals to block while in the handler */ |
425 | /* get the old signal mask. */ |
426 | rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); |
427 | pkey_assert(rs == 0); |
428 | |
429 | /* call sa_sigaction, not sa_handler*/ |
430 | newact.sa_flags = SA_SIGINFO; |
431 | |
432 | newact.sa_restorer = 0; /* void(*)(), obsolete */ |
433 | r = sigaction(signum, &newact, &oldact); |
434 | r = sigaction(SIGALRM, &newact, &oldact); |
435 | pkey_assert(r == 0); |
436 | } |
437 | |
438 | void setup_handlers(void) |
439 | { |
440 | signal(SIGCHLD, &sig_chld); |
441 | setup_sigsegv_handler(); |
442 | } |
443 | |
444 | pid_t fork_lazy_child(void) |
445 | { |
446 | pid_t forkret; |
447 | |
448 | forkret = fork(); |
449 | pkey_assert(forkret >= 0); |
450 | dprintf3("[%d] fork() ret: %d\n" , getpid(), forkret); |
451 | |
452 | if (!forkret) { |
453 | /* in the child */ |
454 | while (1) { |
455 | dprintf1("child sleeping...\n" ); |
456 | sleep(30); |
457 | } |
458 | } |
459 | return forkret; |
460 | } |
461 | |
462 | int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, |
463 | unsigned long pkey) |
464 | { |
465 | int sret; |
466 | |
467 | dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n" , __func__, |
468 | ptr, size, orig_prot, pkey); |
469 | |
470 | errno = 0; |
471 | sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey); |
472 | if (errno) { |
473 | dprintf2("SYS_mprotect_key sret: %d\n" , sret); |
474 | dprintf2("SYS_mprotect_key prot: 0x%lx\n" , orig_prot); |
475 | dprintf2("SYS_mprotect_key failed, errno: %d\n" , errno); |
476 | if (DEBUG_LEVEL >= 2) |
477 | perror("SYS_mprotect_pkey" ); |
478 | } |
479 | return sret; |
480 | } |
481 | |
482 | int sys_pkey_alloc(unsigned long flags, unsigned long init_val) |
483 | { |
484 | int ret = syscall(SYS_pkey_alloc, flags, init_val); |
485 | dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n" , |
486 | __func__, flags, init_val, ret, errno); |
487 | return ret; |
488 | } |
489 | |
490 | int alloc_pkey(void) |
491 | { |
492 | int ret; |
493 | unsigned long init_val = 0x0; |
494 | |
495 | dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n" , |
496 | __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); |
497 | ret = sys_pkey_alloc(flags: 0, init_val); |
498 | /* |
499 | * pkey_alloc() sets PKEY register, so we need to reflect it in |
500 | * shadow_pkey_reg: |
501 | */ |
502 | dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" |
503 | " shadow: 0x%016llx\n" , |
504 | __func__, __LINE__, ret, __read_pkey_reg(), |
505 | shadow_pkey_reg); |
506 | if (ret > 0) { |
507 | /* clear both the bits: */ |
508 | shadow_pkey_reg = set_pkey_bits(reg: shadow_pkey_reg, pkey: ret, |
509 | flags: ~PKEY_MASK); |
510 | dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" |
511 | " shadow: 0x%016llx\n" , |
512 | __func__, |
513 | __LINE__, ret, __read_pkey_reg(), |
514 | shadow_pkey_reg); |
515 | /* |
516 | * move the new state in from init_val |
517 | * (remember, we cheated and init_val == pkey_reg format) |
518 | */ |
519 | shadow_pkey_reg = set_pkey_bits(reg: shadow_pkey_reg, pkey: ret, |
520 | flags: init_val); |
521 | } |
522 | dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" |
523 | " shadow: 0x%016llx\n" , |
524 | __func__, __LINE__, ret, __read_pkey_reg(), |
525 | shadow_pkey_reg); |
526 | dprintf1("%s()::%d errno: %d\n" , __func__, __LINE__, errno); |
527 | /* for shadow checking: */ |
528 | read_pkey_reg(); |
529 | dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" |
530 | " shadow: 0x%016llx\n" , |
531 | __func__, __LINE__, ret, __read_pkey_reg(), |
532 | shadow_pkey_reg); |
533 | return ret; |
534 | } |
535 | |
536 | int sys_pkey_free(unsigned long pkey) |
537 | { |
538 | int ret = syscall(SYS_pkey_free, pkey); |
539 | dprintf1("%s(pkey=%ld) syscall ret: %d\n" , __func__, pkey, ret); |
540 | return ret; |
541 | } |
542 | |
543 | /* |
544 | * I had a bug where pkey bits could be set by mprotect() but |
545 | * not cleared. This ensures we get lots of random bit sets |
546 | * and clears on the vma and pte pkey bits. |
547 | */ |
548 | int alloc_random_pkey(void) |
549 | { |
550 | int max_nr_pkey_allocs; |
551 | int ret; |
552 | int i; |
553 | int alloced_pkeys[NR_PKEYS]; |
554 | int nr_alloced = 0; |
555 | int random_index; |
556 | memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); |
557 | |
558 | /* allocate every possible key and make a note of which ones we got */ |
559 | max_nr_pkey_allocs = NR_PKEYS; |
560 | for (i = 0; i < max_nr_pkey_allocs; i++) { |
561 | int new_pkey = alloc_pkey(); |
562 | if (new_pkey < 0) |
563 | break; |
564 | alloced_pkeys[nr_alloced++] = new_pkey; |
565 | } |
566 | |
567 | pkey_assert(nr_alloced > 0); |
568 | /* select a random one out of the allocated ones */ |
569 | random_index = rand() % nr_alloced; |
570 | ret = alloced_pkeys[random_index]; |
571 | /* now zero it out so we don't free it next */ |
572 | alloced_pkeys[random_index] = 0; |
573 | |
574 | /* go through the allocated ones that we did not want and free them */ |
575 | for (i = 0; i < nr_alloced; i++) { |
576 | int free_ret; |
577 | if (!alloced_pkeys[i]) |
578 | continue; |
579 | free_ret = sys_pkey_free(pkey: alloced_pkeys[i]); |
580 | pkey_assert(!free_ret); |
581 | } |
582 | dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" |
583 | " shadow: 0x%016llx\n" , __func__, |
584 | __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); |
585 | return ret; |
586 | } |
587 | |
588 | int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, |
589 | unsigned long pkey) |
590 | { |
591 | int nr_iterations = random() % 100; |
592 | int ret; |
593 | |
594 | while (0) { |
595 | int rpkey = alloc_random_pkey(); |
596 | ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); |
597 | dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n" , |
598 | ptr, size, orig_prot, pkey, ret); |
599 | if (nr_iterations-- < 0) |
600 | break; |
601 | |
602 | dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" |
603 | " shadow: 0x%016llx\n" , |
604 | __func__, __LINE__, ret, __read_pkey_reg(), |
605 | shadow_pkey_reg); |
606 | sys_pkey_free(pkey: rpkey); |
607 | dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" |
608 | " shadow: 0x%016llx\n" , |
609 | __func__, __LINE__, ret, __read_pkey_reg(), |
610 | shadow_pkey_reg); |
611 | } |
612 | pkey_assert(pkey < NR_PKEYS); |
613 | |
614 | ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); |
615 | dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n" , |
616 | ptr, size, orig_prot, pkey, ret); |
617 | pkey_assert(!ret); |
618 | dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" |
619 | " shadow: 0x%016llx\n" , __func__, |
620 | __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); |
621 | return ret; |
622 | } |
623 | |
624 | struct pkey_malloc_record { |
625 | void *ptr; |
626 | long size; |
627 | int prot; |
628 | }; |
629 | struct pkey_malloc_record *pkey_malloc_records; |
630 | struct pkey_malloc_record *pkey_last_malloc_record; |
631 | long nr_pkey_malloc_records; |
632 | void record_pkey_malloc(void *ptr, long size, int prot) |
633 | { |
634 | long i; |
635 | struct pkey_malloc_record *rec = NULL; |
636 | |
637 | for (i = 0; i < nr_pkey_malloc_records; i++) { |
638 | rec = &pkey_malloc_records[i]; |
639 | /* find a free record */ |
640 | if (rec) |
641 | break; |
642 | } |
643 | if (!rec) { |
644 | /* every record is full */ |
645 | size_t old_nr_records = nr_pkey_malloc_records; |
646 | size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); |
647 | size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); |
648 | dprintf2("new_nr_records: %zd\n" , new_nr_records); |
649 | dprintf2("new_size: %zd\n" , new_size); |
650 | pkey_malloc_records = realloc(pkey_malloc_records, new_size); |
651 | pkey_assert(pkey_malloc_records != NULL); |
652 | rec = &pkey_malloc_records[nr_pkey_malloc_records]; |
653 | /* |
654 | * realloc() does not initialize memory, so zero it from |
655 | * the first new record all the way to the end. |
656 | */ |
657 | for (i = 0; i < new_nr_records - old_nr_records; i++) |
658 | memset(rec + i, 0, sizeof(*rec)); |
659 | } |
660 | dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n" , |
661 | (int)(rec - pkey_malloc_records), rec, ptr, size); |
662 | rec->ptr = ptr; |
663 | rec->size = size; |
664 | rec->prot = prot; |
665 | pkey_last_malloc_record = rec; |
666 | nr_pkey_malloc_records++; |
667 | } |
668 | |
669 | void free_pkey_malloc(void *ptr) |
670 | { |
671 | long i; |
672 | int ret; |
673 | dprintf3("%s(%p)\n" , __func__, ptr); |
674 | for (i = 0; i < nr_pkey_malloc_records; i++) { |
675 | struct pkey_malloc_record *rec = &pkey_malloc_records[i]; |
676 | dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n" , |
677 | ptr, i, rec, rec->ptr, rec->size); |
678 | if ((ptr < rec->ptr) || |
679 | (ptr >= rec->ptr + rec->size)) |
680 | continue; |
681 | |
682 | dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n" , |
683 | ptr, i, rec, rec->ptr, rec->size); |
684 | nr_pkey_malloc_records--; |
685 | ret = munmap(rec->ptr, rec->size); |
686 | dprintf3("munmap ret: %d\n" , ret); |
687 | pkey_assert(!ret); |
688 | dprintf3("clearing rec->ptr, rec: %p\n" , rec); |
689 | rec->ptr = NULL; |
690 | dprintf3("done clearing rec->ptr, rec: %p\n" , rec); |
691 | return; |
692 | } |
693 | pkey_assert(false); |
694 | } |
695 | |
696 | |
697 | void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) |
698 | { |
699 | void *ptr; |
700 | int ret; |
701 | |
702 | read_pkey_reg(); |
703 | dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n" , __func__, |
704 | size, prot, pkey); |
705 | pkey_assert(pkey < NR_PKEYS); |
706 | ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
707 | pkey_assert(ptr != (void *)-1); |
708 | ret = mprotect_pkey(ptr: (void *)ptr, PAGE_SIZE, orig_prot: prot, pkey); |
709 | pkey_assert(!ret); |
710 | record_pkey_malloc(ptr, size, prot); |
711 | read_pkey_reg(); |
712 | |
713 | dprintf1("%s() for pkey %d @ %p\n" , __func__, pkey, ptr); |
714 | return ptr; |
715 | } |
716 | |
717 | void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) |
718 | { |
719 | int ret; |
720 | void *ptr; |
721 | |
722 | dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n" , __func__, |
723 | size, prot, pkey); |
724 | /* |
725 | * Guarantee we can fit at least one huge page in the resulting |
726 | * allocation by allocating space for 2: |
727 | */ |
728 | size = ALIGN_UP(size, HPAGE_SIZE * 2); |
729 | ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
730 | pkey_assert(ptr != (void *)-1); |
731 | record_pkey_malloc(ptr, size, prot); |
732 | mprotect_pkey(ptr, size, orig_prot: prot, pkey); |
733 | |
734 | dprintf1("unaligned ptr: %p\n" , ptr); |
735 | ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); |
736 | dprintf1(" aligned ptr: %p\n" , ptr); |
737 | ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); |
738 | dprintf1("MADV_HUGEPAGE ret: %d\n" , ret); |
739 | ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); |
740 | dprintf1("MADV_WILLNEED ret: %d\n" , ret); |
741 | memset(ptr, 0, HPAGE_SIZE); |
742 | |
743 | dprintf1("mmap()'d thp for pkey %d @ %p\n" , pkey, ptr); |
744 | return ptr; |
745 | } |
746 | |
747 | int hugetlb_setup_ok; |
748 | #define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages" |
749 | #define GET_NR_HUGE_PAGES 10 |
750 | void setup_hugetlbfs(void) |
751 | { |
752 | int err; |
753 | int fd; |
754 | char buf[256]; |
755 | long hpagesz_kb; |
756 | long hpagesz_mb; |
757 | |
758 | if (geteuid() != 0) { |
759 | fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n" ); |
760 | return; |
761 | } |
762 | |
763 | cat_into_file(__stringify(GET_NR_HUGE_PAGES), file: "/proc/sys/vm/nr_hugepages" ); |
764 | |
765 | /* |
766 | * Now go make sure that we got the pages and that they |
767 | * are PMD-level pages. Someone might have made PUD-level |
768 | * pages the default. |
769 | */ |
770 | hpagesz_kb = HPAGE_SIZE / 1024; |
771 | hpagesz_mb = hpagesz_kb / 1024; |
772 | sprintf(buf, SYSFS_FMT_NR_HUGE_PAGES, hpagesz_kb); |
773 | fd = open(buf, O_RDONLY); |
774 | if (fd < 0) { |
775 | fprintf(stderr, "opening sysfs %ldM hugetlb config: %s\n" , |
776 | hpagesz_mb, strerror(errno)); |
777 | return; |
778 | } |
779 | |
780 | /* -1 to guarantee leaving the trailing \0 */ |
781 | err = read(fd, buf, sizeof(buf)-1); |
782 | close(fd); |
783 | if (err <= 0) { |
784 | fprintf(stderr, "reading sysfs %ldM hugetlb config: %s\n" , |
785 | hpagesz_mb, strerror(errno)); |
786 | return; |
787 | } |
788 | |
789 | if (atoi(buf) != GET_NR_HUGE_PAGES) { |
790 | fprintf(stderr, "could not confirm %ldM pages, got: '%s' expected %d\n" , |
791 | hpagesz_mb, buf, GET_NR_HUGE_PAGES); |
792 | return; |
793 | } |
794 | |
795 | hugetlb_setup_ok = 1; |
796 | } |
797 | |
798 | void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) |
799 | { |
800 | void *ptr; |
801 | int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; |
802 | |
803 | if (!hugetlb_setup_ok) |
804 | return PTR_ERR_ENOTSUP; |
805 | |
806 | dprintf1("doing %s(%ld, %x, %x)\n" , __func__, size, prot, pkey); |
807 | size = ALIGN_UP(size, HPAGE_SIZE * 2); |
808 | pkey_assert(pkey < NR_PKEYS); |
809 | ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); |
810 | pkey_assert(ptr != (void *)-1); |
811 | mprotect_pkey(ptr, size, orig_prot: prot, pkey); |
812 | |
813 | record_pkey_malloc(ptr, size, prot); |
814 | |
815 | dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n" , pkey, ptr); |
816 | return ptr; |
817 | } |
818 | |
819 | void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) |
820 | { |
821 | void *ptr; |
822 | int fd; |
823 | |
824 | dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n" , __func__, |
825 | size, prot, pkey); |
826 | pkey_assert(pkey < NR_PKEYS); |
827 | fd = open("/dax/foo" , O_RDWR); |
828 | pkey_assert(fd >= 0); |
829 | |
830 | ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); |
831 | pkey_assert(ptr != (void *)-1); |
832 | |
833 | mprotect_pkey(ptr, size, orig_prot: prot, pkey); |
834 | |
835 | record_pkey_malloc(ptr, size, prot); |
836 | |
837 | dprintf1("mmap()'d for pkey %d @ %p\n" , pkey, ptr); |
838 | close(fd); |
839 | return ptr; |
840 | } |
841 | |
842 | void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { |
843 | |
844 | malloc_pkey_with_mprotect, |
845 | malloc_pkey_with_mprotect_subpage, |
846 | malloc_pkey_anon_huge, |
847 | malloc_pkey_hugetlb |
848 | /* can not do direct with the pkey_mprotect() API: |
849 | malloc_pkey_mmap_direct, |
850 | malloc_pkey_mmap_dax, |
851 | */ |
852 | }; |
853 | |
854 | void *malloc_pkey(long size, int prot, u16 pkey) |
855 | { |
856 | void *ret; |
857 | static int malloc_type; |
858 | int nr_malloc_types = ARRAY_SIZE(pkey_malloc); |
859 | |
860 | pkey_assert(pkey < NR_PKEYS); |
861 | |
862 | while (1) { |
863 | pkey_assert(malloc_type < nr_malloc_types); |
864 | |
865 | ret = pkey_malloc[malloc_type](size, prot, pkey); |
866 | pkey_assert(ret != (void *)-1); |
867 | |
868 | malloc_type++; |
869 | if (malloc_type >= nr_malloc_types) |
870 | malloc_type = (random()%nr_malloc_types); |
871 | |
872 | /* try again if the malloc_type we tried is unsupported */ |
873 | if (ret == PTR_ERR_ENOTSUP) |
874 | continue; |
875 | |
876 | break; |
877 | } |
878 | |
879 | dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n" , __func__, |
880 | size, prot, pkey, ret); |
881 | return ret; |
882 | } |
883 | |
884 | int last_pkey_faults; |
885 | #define UNKNOWN_PKEY -2 |
886 | void expected_pkey_fault(int pkey) |
887 | { |
888 | dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n" , |
889 | __func__, last_pkey_faults, pkey_faults); |
890 | dprintf2("%s(%d): last_si_pkey: %d\n" , __func__, pkey, last_si_pkey); |
891 | pkey_assert(last_pkey_faults + 1 == pkey_faults); |
892 | |
893 | /* |
894 | * For exec-only memory, we do not know the pkey in |
895 | * advance, so skip this check. |
896 | */ |
897 | if (pkey != UNKNOWN_PKEY) |
898 | pkey_assert(last_si_pkey == pkey); |
899 | |
900 | #if defined(__i386__) || defined(__x86_64__) /* arch */ |
901 | /* |
902 | * The signal handler shold have cleared out PKEY register to let the |
903 | * test program continue. We now have to restore it. |
904 | */ |
905 | if (__read_pkey_reg() != 0) |
906 | #else /* arch */ |
907 | if (__read_pkey_reg() != shadow_pkey_reg) |
908 | #endif /* arch */ |
909 | pkey_assert(0); |
910 | |
911 | __write_pkey_reg(pkey_reg: shadow_pkey_reg); |
912 | dprintf1("%s() set pkey_reg=%016llx to restore state after signal " |
913 | "nuked it\n" , __func__, shadow_pkey_reg); |
914 | last_pkey_faults = pkey_faults; |
915 | last_si_pkey = -1; |
916 | } |
917 | |
918 | #define do_not_expect_pkey_fault(msg) do { \ |
919 | if (last_pkey_faults != pkey_faults) \ |
920 | dprintf0("unexpected PKey fault: %s\n", msg); \ |
921 | pkey_assert(last_pkey_faults == pkey_faults); \ |
922 | } while (0) |
923 | |
924 | int test_fds[10] = { -1 }; |
925 | int nr_test_fds; |
926 | void __save_test_fd(int fd) |
927 | { |
928 | pkey_assert(fd >= 0); |
929 | pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); |
930 | test_fds[nr_test_fds] = fd; |
931 | nr_test_fds++; |
932 | } |
933 | |
934 | int get_test_read_fd(void) |
935 | { |
936 | int test_fd = open("/etc/passwd" , O_RDONLY); |
937 | __save_test_fd(fd: test_fd); |
938 | return test_fd; |
939 | } |
940 | |
941 | void close_test_fds(void) |
942 | { |
943 | int i; |
944 | |
945 | for (i = 0; i < nr_test_fds; i++) { |
946 | if (test_fds[i] < 0) |
947 | continue; |
948 | close(test_fds[i]); |
949 | test_fds[i] = -1; |
950 | } |
951 | nr_test_fds = 0; |
952 | } |
953 | |
954 | #define barrier() __asm__ __volatile__("": : :"memory") |
955 | __attribute__((noinline)) int read_ptr(int *ptr) |
956 | { |
957 | /* |
958 | * Keep GCC from optimizing this away somehow |
959 | */ |
960 | barrier(); |
961 | return *ptr; |
962 | } |
963 | |
964 | void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) |
965 | { |
966 | int i, err; |
967 | int max_nr_pkey_allocs; |
968 | int alloced_pkeys[NR_PKEYS]; |
969 | int nr_alloced = 0; |
970 | long size; |
971 | |
972 | pkey_assert(pkey_last_malloc_record); |
973 | size = pkey_last_malloc_record->size; |
974 | /* |
975 | * This is a bit of a hack. But mprotect() requires |
976 | * huge-page-aligned sizes when operating on hugetlbfs. |
977 | * So, make sure that we use something that's a multiple |
978 | * of a huge page when we can. |
979 | */ |
980 | if (size >= HPAGE_SIZE) |
981 | size = HPAGE_SIZE; |
982 | |
983 | /* allocate every possible key and make sure key-0 never got allocated */ |
984 | max_nr_pkey_allocs = NR_PKEYS; |
985 | for (i = 0; i < max_nr_pkey_allocs; i++) { |
986 | int new_pkey = alloc_pkey(); |
987 | pkey_assert(new_pkey != 0); |
988 | |
989 | if (new_pkey < 0) |
990 | break; |
991 | alloced_pkeys[nr_alloced++] = new_pkey; |
992 | } |
993 | /* free all the allocated keys */ |
994 | for (i = 0; i < nr_alloced; i++) { |
995 | int free_ret; |
996 | |
997 | if (!alloced_pkeys[i]) |
998 | continue; |
999 | free_ret = sys_pkey_free(pkey: alloced_pkeys[i]); |
1000 | pkey_assert(!free_ret); |
1001 | } |
1002 | |
1003 | /* attach key-0 in various modes */ |
1004 | err = sys_mprotect_pkey(ptr, size, PROT_READ, 0); |
1005 | pkey_assert(!err); |
1006 | err = sys_mprotect_pkey(ptr, size, PROT_WRITE, 0); |
1007 | pkey_assert(!err); |
1008 | err = sys_mprotect_pkey(ptr, size, PROT_EXEC, 0); |
1009 | pkey_assert(!err); |
1010 | err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE, 0); |
1011 | pkey_assert(!err); |
1012 | err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE|PROT_EXEC, 0); |
1013 | pkey_assert(!err); |
1014 | } |
1015 | |
1016 | void test_read_of_write_disabled_region(int *ptr, u16 pkey) |
1017 | { |
1018 | int ptr_contents; |
1019 | |
1020 | dprintf1("disabling write access to PKEY[1], doing read\n" ); |
1021 | pkey_write_deny(pkey); |
1022 | ptr_contents = read_ptr(ptr); |
1023 | dprintf1("*ptr: %d\n" , ptr_contents); |
1024 | dprintf1("\n" ); |
1025 | } |
1026 | void test_read_of_access_disabled_region(int *ptr, u16 pkey) |
1027 | { |
1028 | int ptr_contents; |
1029 | |
1030 | dprintf1("disabling access to PKEY[%02d], doing read @ %p\n" , pkey, ptr); |
1031 | read_pkey_reg(); |
1032 | pkey_access_deny(pkey); |
1033 | ptr_contents = read_ptr(ptr); |
1034 | dprintf1("*ptr: %d\n" , ptr_contents); |
1035 | expected_pkey_fault(pkey); |
1036 | } |
1037 | |
1038 | void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, |
1039 | u16 pkey) |
1040 | { |
1041 | int ptr_contents; |
1042 | |
1043 | dprintf1("disabling access to PKEY[%02d], doing read @ %p\n" , |
1044 | pkey, ptr); |
1045 | ptr_contents = read_ptr(ptr); |
1046 | dprintf1("reading ptr before disabling the read : %d\n" , |
1047 | ptr_contents); |
1048 | read_pkey_reg(); |
1049 | pkey_access_deny(pkey); |
1050 | ptr_contents = read_ptr(ptr); |
1051 | dprintf1("*ptr: %d\n" , ptr_contents); |
1052 | expected_pkey_fault(pkey); |
1053 | } |
1054 | |
1055 | void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, |
1056 | u16 pkey) |
1057 | { |
1058 | *ptr = __LINE__; |
1059 | dprintf1("disabling write access; after accessing the page, " |
1060 | "to PKEY[%02d], doing write\n" , pkey); |
1061 | pkey_write_deny(pkey); |
1062 | *ptr = __LINE__; |
1063 | expected_pkey_fault(pkey); |
1064 | } |
1065 | |
1066 | void test_write_of_write_disabled_region(int *ptr, u16 pkey) |
1067 | { |
1068 | dprintf1("disabling write access to PKEY[%02d], doing write\n" , pkey); |
1069 | pkey_write_deny(pkey); |
1070 | *ptr = __LINE__; |
1071 | expected_pkey_fault(pkey); |
1072 | } |
1073 | void test_write_of_access_disabled_region(int *ptr, u16 pkey) |
1074 | { |
1075 | dprintf1("disabling access to PKEY[%02d], doing write\n" , pkey); |
1076 | pkey_access_deny(pkey); |
1077 | *ptr = __LINE__; |
1078 | expected_pkey_fault(pkey); |
1079 | } |
1080 | |
1081 | void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, |
1082 | u16 pkey) |
1083 | { |
1084 | *ptr = __LINE__; |
1085 | dprintf1("disabling access; after accessing the page, " |
1086 | " to PKEY[%02d], doing write\n" , pkey); |
1087 | pkey_access_deny(pkey); |
1088 | *ptr = __LINE__; |
1089 | expected_pkey_fault(pkey); |
1090 | } |
1091 | |
1092 | void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) |
1093 | { |
1094 | int ret; |
1095 | int test_fd = get_test_read_fd(); |
1096 | |
1097 | dprintf1("disabling access to PKEY[%02d], " |
1098 | "having kernel read() to buffer\n" , pkey); |
1099 | pkey_access_deny(pkey); |
1100 | ret = read(test_fd, ptr, 1); |
1101 | dprintf1("read ret: %d\n" , ret); |
1102 | pkey_assert(ret); |
1103 | } |
1104 | void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) |
1105 | { |
1106 | int ret; |
1107 | int test_fd = get_test_read_fd(); |
1108 | |
1109 | pkey_write_deny(pkey); |
1110 | ret = read(test_fd, ptr, 100); |
1111 | dprintf1("read ret: %d\n" , ret); |
1112 | if (ret < 0 && (DEBUG_LEVEL > 0)) |
1113 | perror("verbose read result (OK for this to be bad)" ); |
1114 | pkey_assert(ret); |
1115 | } |
1116 | |
1117 | void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) |
1118 | { |
1119 | int pipe_ret, vmsplice_ret; |
1120 | struct iovec iov; |
1121 | int pipe_fds[2]; |
1122 | |
1123 | pipe_ret = pipe(pipe_fds); |
1124 | |
1125 | pkey_assert(pipe_ret == 0); |
1126 | dprintf1("disabling access to PKEY[%02d], " |
1127 | "having kernel vmsplice from buffer\n" , pkey); |
1128 | pkey_access_deny(pkey); |
1129 | iov.iov_base = ptr; |
1130 | iov.iov_len = PAGE_SIZE; |
1131 | vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); |
1132 | dprintf1("vmsplice() ret: %d\n" , vmsplice_ret); |
1133 | pkey_assert(vmsplice_ret == -1); |
1134 | |
1135 | close(pipe_fds[0]); |
1136 | close(pipe_fds[1]); |
1137 | } |
1138 | |
1139 | void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) |
1140 | { |
1141 | int ignored = 0xdada; |
1142 | int futex_ret; |
1143 | int some_int = __LINE__; |
1144 | |
1145 | dprintf1("disabling write to PKEY[%02d], " |
1146 | "doing futex gunk in buffer\n" , pkey); |
1147 | *ptr = some_int; |
1148 | pkey_write_deny(pkey); |
1149 | futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, |
1150 | &ignored, ignored); |
1151 | if (DEBUG_LEVEL > 0) |
1152 | perror("futex" ); |
1153 | dprintf1("futex() ret: %d\n" , futex_ret); |
1154 | } |
1155 | |
1156 | /* Assumes that all pkeys other than 'pkey' are unallocated */ |
1157 | void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) |
1158 | { |
1159 | int err; |
1160 | int i; |
1161 | |
1162 | /* Note: 0 is the default pkey, so don't mess with it */ |
1163 | for (i = 1; i < NR_PKEYS; i++) { |
1164 | if (pkey == i) |
1165 | continue; |
1166 | |
1167 | dprintf1("trying get/set/free to non-allocated pkey: %2d\n" , i); |
1168 | err = sys_pkey_free(pkey: i); |
1169 | pkey_assert(err); |
1170 | |
1171 | err = sys_pkey_free(pkey: i); |
1172 | pkey_assert(err); |
1173 | |
1174 | err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); |
1175 | pkey_assert(err); |
1176 | } |
1177 | } |
1178 | |
1179 | /* Assumes that all pkeys other than 'pkey' are unallocated */ |
1180 | void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) |
1181 | { |
1182 | int err; |
1183 | int bad_pkey = NR_PKEYS+99; |
1184 | |
1185 | /* pass a known-invalid pkey in: */ |
1186 | err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); |
1187 | pkey_assert(err); |
1188 | } |
1189 | |
1190 | void become_child(void) |
1191 | { |
1192 | pid_t forkret; |
1193 | |
1194 | forkret = fork(); |
1195 | pkey_assert(forkret >= 0); |
1196 | dprintf3("[%d] fork() ret: %d\n" , getpid(), forkret); |
1197 | |
1198 | if (!forkret) { |
1199 | /* in the child */ |
1200 | return; |
1201 | } |
1202 | exit(0); |
1203 | } |
1204 | |
1205 | /* Assumes that all pkeys other than 'pkey' are unallocated */ |
1206 | void test_pkey_alloc_exhaust(int *ptr, u16 pkey) |
1207 | { |
1208 | int err; |
1209 | int allocated_pkeys[NR_PKEYS] = {0}; |
1210 | int nr_allocated_pkeys = 0; |
1211 | int i; |
1212 | |
1213 | for (i = 0; i < NR_PKEYS*3; i++) { |
1214 | int new_pkey; |
1215 | dprintf1("%s() alloc loop: %d\n" , __func__, i); |
1216 | new_pkey = alloc_pkey(); |
1217 | dprintf4("%s()::%d, err: %d pkey_reg: 0x%016llx" |
1218 | " shadow: 0x%016llx\n" , |
1219 | __func__, __LINE__, err, __read_pkey_reg(), |
1220 | shadow_pkey_reg); |
1221 | read_pkey_reg(); /* for shadow checking */ |
1222 | dprintf2("%s() errno: %d ENOSPC: %d\n" , __func__, errno, ENOSPC); |
1223 | if ((new_pkey == -1) && (errno == ENOSPC)) { |
1224 | dprintf2("%s() failed to allocate pkey after %d tries\n" , |
1225 | __func__, nr_allocated_pkeys); |
1226 | } else { |
1227 | /* |
1228 | * Ensure the number of successes never |
1229 | * exceeds the number of keys supported |
1230 | * in the hardware. |
1231 | */ |
1232 | pkey_assert(nr_allocated_pkeys < NR_PKEYS); |
1233 | allocated_pkeys[nr_allocated_pkeys++] = new_pkey; |
1234 | } |
1235 | |
1236 | /* |
1237 | * Make sure that allocation state is properly |
1238 | * preserved across fork(). |
1239 | */ |
1240 | if (i == NR_PKEYS*2) |
1241 | become_child(); |
1242 | } |
1243 | |
1244 | dprintf3("%s()::%d\n" , __func__, __LINE__); |
1245 | |
1246 | /* |
1247 | * On x86: |
1248 | * There are 16 pkeys supported in hardware. Three are |
1249 | * allocated by the time we get here: |
1250 | * 1. The default key (0) |
1251 | * 2. One possibly consumed by an execute-only mapping. |
1252 | * 3. One allocated by the test code and passed in via |
1253 | * 'pkey' to this function. |
1254 | * Ensure that we can allocate at least another 13 (16-3). |
1255 | * |
1256 | * On powerpc: |
1257 | * There are either 5, 28, 29 or 32 pkeys supported in |
1258 | * hardware depending on the page size (4K or 64K) and |
1259 | * platform (powernv or powervm). Four are allocated by |
1260 | * the time we get here. These include pkey-0, pkey-1, |
1261 | * exec-only pkey and the one allocated by the test code. |
1262 | * Ensure that we can allocate the remaining. |
1263 | */ |
1264 | pkey_assert(i >= (NR_PKEYS - get_arch_reserved_keys() - 1)); |
1265 | |
1266 | for (i = 0; i < nr_allocated_pkeys; i++) { |
1267 | err = sys_pkey_free(pkey: allocated_pkeys[i]); |
1268 | pkey_assert(!err); |
1269 | read_pkey_reg(); /* for shadow checking */ |
1270 | } |
1271 | } |
1272 | |
1273 | void arch_force_pkey_reg_init(void) |
1274 | { |
1275 | #if defined(__i386__) || defined(__x86_64__) /* arch */ |
1276 | u64 *buf; |
1277 | |
1278 | /* |
1279 | * All keys should be allocated and set to allow reads and |
1280 | * writes, so the register should be all 0. If not, just |
1281 | * skip the test. |
1282 | */ |
1283 | if (read_pkey_reg()) |
1284 | return; |
1285 | |
1286 | /* |
1287 | * Just allocate an absurd about of memory rather than |
1288 | * doing the XSAVE size enumeration dance. |
1289 | */ |
1290 | buf = mmap(NULL, 1*MB, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
1291 | |
1292 | /* These __builtins require compiling with -mxsave */ |
1293 | |
1294 | /* XSAVE to build a valid buffer: */ |
1295 | __builtin_ia32_xsave(buf, XSTATE_PKEY); |
1296 | /* Clear XSTATE_BV[PKRU]: */ |
1297 | buf[XSTATE_BV_OFFSET/sizeof(u64)] &= ~XSTATE_PKEY; |
1298 | /* XRSTOR will likely get PKRU back to the init state: */ |
1299 | __builtin_ia32_xrstor(buf, XSTATE_PKEY); |
1300 | |
1301 | munmap(buf, 1*MB); |
1302 | #endif |
1303 | } |
1304 | |
1305 | |
1306 | /* |
1307 | * This is mostly useless on ppc for now. But it will not |
1308 | * hurt anything and should give some better coverage as |
1309 | * a long-running test that continually checks the pkey |
1310 | * register. |
1311 | */ |
1312 | void test_pkey_init_state(int *ptr, u16 pkey) |
1313 | { |
1314 | int err; |
1315 | int allocated_pkeys[NR_PKEYS] = {0}; |
1316 | int nr_allocated_pkeys = 0; |
1317 | int i; |
1318 | |
1319 | for (i = 0; i < NR_PKEYS; i++) { |
1320 | int new_pkey = alloc_pkey(); |
1321 | |
1322 | if (new_pkey < 0) |
1323 | continue; |
1324 | allocated_pkeys[nr_allocated_pkeys++] = new_pkey; |
1325 | } |
1326 | |
1327 | dprintf3("%s()::%d\n" , __func__, __LINE__); |
1328 | |
1329 | arch_force_pkey_reg_init(); |
1330 | |
1331 | /* |
1332 | * Loop for a bit, hoping to get exercise the kernel |
1333 | * context switch code. |
1334 | */ |
1335 | for (i = 0; i < 1000000; i++) |
1336 | read_pkey_reg(); |
1337 | |
1338 | for (i = 0; i < nr_allocated_pkeys; i++) { |
1339 | err = sys_pkey_free(pkey: allocated_pkeys[i]); |
1340 | pkey_assert(!err); |
1341 | read_pkey_reg(); /* for shadow checking */ |
1342 | } |
1343 | } |
1344 | |
1345 | /* |
1346 | * pkey 0 is special. It is allocated by default, so you do not |
1347 | * have to call pkey_alloc() to use it first. Make sure that it |
1348 | * is usable. |
1349 | */ |
1350 | void test_mprotect_with_pkey_0(int *ptr, u16 pkey) |
1351 | { |
1352 | long size; |
1353 | int prot; |
1354 | |
1355 | assert(pkey_last_malloc_record); |
1356 | size = pkey_last_malloc_record->size; |
1357 | /* |
1358 | * This is a bit of a hack. But mprotect() requires |
1359 | * huge-page-aligned sizes when operating on hugetlbfs. |
1360 | * So, make sure that we use something that's a multiple |
1361 | * of a huge page when we can. |
1362 | */ |
1363 | if (size >= HPAGE_SIZE) |
1364 | size = HPAGE_SIZE; |
1365 | prot = pkey_last_malloc_record->prot; |
1366 | |
1367 | /* Use pkey 0 */ |
1368 | mprotect_pkey(ptr, size, orig_prot: prot, pkey: 0); |
1369 | |
1370 | /* Make sure that we can set it back to the original pkey. */ |
1371 | mprotect_pkey(ptr, size, orig_prot: prot, pkey); |
1372 | } |
1373 | |
1374 | void test_ptrace_of_child(int *ptr, u16 pkey) |
1375 | { |
1376 | __attribute__((__unused__)) int peek_result; |
1377 | pid_t child_pid; |
1378 | void *ignored = 0; |
1379 | long ret; |
1380 | int status; |
1381 | /* |
1382 | * This is the "control" for our little expermient. Make sure |
1383 | * we can always access it when ptracing. |
1384 | */ |
1385 | int *plain_ptr_unaligned = malloc(HPAGE_SIZE); |
1386 | int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); |
1387 | |
1388 | /* |
1389 | * Fork a child which is an exact copy of this process, of course. |
1390 | * That means we can do all of our tests via ptrace() and then plain |
1391 | * memory access and ensure they work differently. |
1392 | */ |
1393 | child_pid = fork_lazy_child(); |
1394 | dprintf1("[%d] child pid: %d\n" , getpid(), child_pid); |
1395 | |
1396 | ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); |
1397 | if (ret) |
1398 | perror("attach" ); |
1399 | dprintf1("[%d] attach ret: %ld %d\n" , getpid(), ret, __LINE__); |
1400 | pkey_assert(ret != -1); |
1401 | ret = waitpid(child_pid, &status, WUNTRACED); |
1402 | if ((ret != child_pid) || !(WIFSTOPPED(status))) { |
1403 | fprintf(stderr, "weird waitpid result %ld stat %x\n" , |
1404 | ret, status); |
1405 | pkey_assert(0); |
1406 | } |
1407 | dprintf2("waitpid ret: %ld\n" , ret); |
1408 | dprintf2("waitpid status: %d\n" , status); |
1409 | |
1410 | pkey_access_deny(pkey); |
1411 | pkey_write_deny(pkey); |
1412 | |
1413 | /* Write access, untested for now: |
1414 | ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); |
1415 | pkey_assert(ret != -1); |
1416 | dprintf1("poke at %p: %ld\n", peek_at, ret); |
1417 | */ |
1418 | |
1419 | /* |
1420 | * Try to access the pkey-protected "ptr" via ptrace: |
1421 | */ |
1422 | ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); |
1423 | /* expect it to work, without an error: */ |
1424 | pkey_assert(ret != -1); |
1425 | /* Now access from the current task, and expect an exception: */ |
1426 | peek_result = read_ptr(ptr); |
1427 | expected_pkey_fault(pkey); |
1428 | |
1429 | /* |
1430 | * Try to access the NON-pkey-protected "plain_ptr" via ptrace: |
1431 | */ |
1432 | ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); |
1433 | /* expect it to work, without an error: */ |
1434 | pkey_assert(ret != -1); |
1435 | /* Now access from the current task, and expect NO exception: */ |
1436 | peek_result = read_ptr(ptr: plain_ptr); |
1437 | do_not_expect_pkey_fault("read plain pointer after ptrace" ); |
1438 | |
1439 | ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); |
1440 | pkey_assert(ret != -1); |
1441 | |
1442 | ret = kill(child_pid, SIGKILL); |
1443 | pkey_assert(ret != -1); |
1444 | |
1445 | wait(&status); |
1446 | |
1447 | free(plain_ptr_unaligned); |
1448 | } |
1449 | |
1450 | void *get_pointer_to_instructions(void) |
1451 | { |
1452 | void *p1; |
1453 | |
1454 | p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); |
1455 | dprintf3("&lots_o_noops: %p\n" , &lots_o_noops_around_write); |
1456 | /* lots_o_noops_around_write should be page-aligned already */ |
1457 | assert(p1 == &lots_o_noops_around_write); |
1458 | |
1459 | /* Point 'p1' at the *second* page of the function: */ |
1460 | p1 += PAGE_SIZE; |
1461 | |
1462 | /* |
1463 | * Try to ensure we fault this in on next touch to ensure |
1464 | * we get an instruction fault as opposed to a data one |
1465 | */ |
1466 | madvise(p1, PAGE_SIZE, MADV_DONTNEED); |
1467 | |
1468 | return p1; |
1469 | } |
1470 | |
1471 | void test_executing_on_unreadable_memory(int *ptr, u16 pkey) |
1472 | { |
1473 | void *p1; |
1474 | int scratch; |
1475 | int ptr_contents; |
1476 | int ret; |
1477 | |
1478 | p1 = get_pointer_to_instructions(); |
1479 | lots_o_noops_around_write(write_to_me: &scratch); |
1480 | ptr_contents = read_ptr(ptr: p1); |
1481 | dprintf2("ptr (%p) contents@%d: %x\n" , p1, __LINE__, ptr_contents); |
1482 | |
1483 | ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); |
1484 | pkey_assert(!ret); |
1485 | pkey_access_deny(pkey); |
1486 | |
1487 | dprintf2("pkey_reg: %016llx\n" , read_pkey_reg()); |
1488 | |
1489 | /* |
1490 | * Make sure this is an *instruction* fault |
1491 | */ |
1492 | madvise(p1, PAGE_SIZE, MADV_DONTNEED); |
1493 | lots_o_noops_around_write(write_to_me: &scratch); |
1494 | do_not_expect_pkey_fault("executing on PROT_EXEC memory" ); |
1495 | expect_fault_on_read_execonly_key(p1, pkey); |
1496 | } |
1497 | |
1498 | void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) |
1499 | { |
1500 | void *p1; |
1501 | int scratch; |
1502 | int ptr_contents; |
1503 | int ret; |
1504 | |
1505 | dprintf1("%s() start\n" , __func__); |
1506 | |
1507 | p1 = get_pointer_to_instructions(); |
1508 | lots_o_noops_around_write(write_to_me: &scratch); |
1509 | ptr_contents = read_ptr(ptr: p1); |
1510 | dprintf2("ptr (%p) contents@%d: %x\n" , p1, __LINE__, ptr_contents); |
1511 | |
1512 | /* Use a *normal* mprotect(), not mprotect_pkey(): */ |
1513 | ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); |
1514 | pkey_assert(!ret); |
1515 | |
1516 | /* |
1517 | * Reset the shadow, assuming that the above mprotect() |
1518 | * correctly changed PKRU, but to an unknown value since |
1519 | * the actual allocated pkey is unknown. |
1520 | */ |
1521 | shadow_pkey_reg = __read_pkey_reg(); |
1522 | |
1523 | dprintf2("pkey_reg: %016llx\n" , read_pkey_reg()); |
1524 | |
1525 | /* Make sure this is an *instruction* fault */ |
1526 | madvise(p1, PAGE_SIZE, MADV_DONTNEED); |
1527 | lots_o_noops_around_write(write_to_me: &scratch); |
1528 | do_not_expect_pkey_fault("executing on PROT_EXEC memory" ); |
1529 | expect_fault_on_read_execonly_key(p1, UNKNOWN_PKEY); |
1530 | |
1531 | /* |
1532 | * Put the memory back to non-PROT_EXEC. Should clear the |
1533 | * exec-only pkey off the VMA and allow it to be readable |
1534 | * again. Go to PROT_NONE first to check for a kernel bug |
1535 | * that did not clear the pkey when doing PROT_NONE. |
1536 | */ |
1537 | ret = mprotect(p1, PAGE_SIZE, PROT_NONE); |
1538 | pkey_assert(!ret); |
1539 | |
1540 | ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); |
1541 | pkey_assert(!ret); |
1542 | ptr_contents = read_ptr(ptr: p1); |
1543 | do_not_expect_pkey_fault("plain read on recently PROT_EXEC area" ); |
1544 | } |
1545 | |
1546 | #if defined(__i386__) || defined(__x86_64__) |
1547 | void test_ptrace_modifies_pkru(int *ptr, u16 pkey) |
1548 | { |
1549 | u32 new_pkru; |
1550 | pid_t child; |
1551 | int status, ret; |
1552 | int pkey_offset = pkey_reg_xstate_offset(); |
1553 | size_t xsave_size = cpu_max_xsave_size(); |
1554 | void *xsave; |
1555 | u32 *pkey_register; |
1556 | u64 *xstate_bv; |
1557 | struct iovec iov; |
1558 | |
1559 | new_pkru = ~read_pkey_reg(); |
1560 | /* Don't make PROT_EXEC mappings inaccessible */ |
1561 | new_pkru &= ~3; |
1562 | |
1563 | child = fork(); |
1564 | pkey_assert(child >= 0); |
1565 | dprintf3("[%d] fork() ret: %d\n" , getpid(), child); |
1566 | if (!child) { |
1567 | ptrace(PTRACE_TRACEME, 0, 0, 0); |
1568 | /* Stop and allow the tracer to modify PKRU directly */ |
1569 | raise(SIGSTOP); |
1570 | |
1571 | /* |
1572 | * need __read_pkey_reg() version so we do not do shadow_pkey_reg |
1573 | * checking |
1574 | */ |
1575 | if (__read_pkey_reg() != new_pkru) |
1576 | exit(1); |
1577 | |
1578 | /* Stop and allow the tracer to clear XSTATE_BV for PKRU */ |
1579 | raise(SIGSTOP); |
1580 | |
1581 | if (__read_pkey_reg() != 0) |
1582 | exit(1); |
1583 | |
1584 | /* Stop and allow the tracer to examine PKRU */ |
1585 | raise(SIGSTOP); |
1586 | |
1587 | exit(0); |
1588 | } |
1589 | |
1590 | pkey_assert(child == waitpid(child, &status, 0)); |
1591 | dprintf3("[%d] waitpid(%d) status: %x\n" , getpid(), child, status); |
1592 | pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); |
1593 | |
1594 | xsave = (void *)malloc(xsave_size); |
1595 | pkey_assert(xsave > 0); |
1596 | |
1597 | /* Modify the PKRU register directly */ |
1598 | iov.iov_base = xsave; |
1599 | iov.iov_len = xsave_size; |
1600 | ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); |
1601 | pkey_assert(ret == 0); |
1602 | |
1603 | pkey_register = (u32 *)(xsave + pkey_offset); |
1604 | pkey_assert(*pkey_register == read_pkey_reg()); |
1605 | |
1606 | *pkey_register = new_pkru; |
1607 | |
1608 | ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); |
1609 | pkey_assert(ret == 0); |
1610 | |
1611 | /* Test that the modification is visible in ptrace before any execution */ |
1612 | memset(xsave, 0xCC, xsave_size); |
1613 | ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); |
1614 | pkey_assert(ret == 0); |
1615 | pkey_assert(*pkey_register == new_pkru); |
1616 | |
1617 | /* Execute the tracee */ |
1618 | ret = ptrace(PTRACE_CONT, child, 0, 0); |
1619 | pkey_assert(ret == 0); |
1620 | |
1621 | /* Test that the tracee saw the PKRU value change */ |
1622 | pkey_assert(child == waitpid(child, &status, 0)); |
1623 | dprintf3("[%d] waitpid(%d) status: %x\n" , getpid(), child, status); |
1624 | pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); |
1625 | |
1626 | /* Test that the modification is visible in ptrace after execution */ |
1627 | memset(xsave, 0xCC, xsave_size); |
1628 | ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); |
1629 | pkey_assert(ret == 0); |
1630 | pkey_assert(*pkey_register == new_pkru); |
1631 | |
1632 | /* Clear the PKRU bit from XSTATE_BV */ |
1633 | xstate_bv = (u64 *)(xsave + 512); |
1634 | *xstate_bv &= ~(1 << 9); |
1635 | |
1636 | ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); |
1637 | pkey_assert(ret == 0); |
1638 | |
1639 | /* Test that the modification is visible in ptrace before any execution */ |
1640 | memset(xsave, 0xCC, xsave_size); |
1641 | ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); |
1642 | pkey_assert(ret == 0); |
1643 | pkey_assert(*pkey_register == 0); |
1644 | |
1645 | ret = ptrace(PTRACE_CONT, child, 0, 0); |
1646 | pkey_assert(ret == 0); |
1647 | |
1648 | /* Test that the tracee saw the PKRU value go to 0 */ |
1649 | pkey_assert(child == waitpid(child, &status, 0)); |
1650 | dprintf3("[%d] waitpid(%d) status: %x\n" , getpid(), child, status); |
1651 | pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); |
1652 | |
1653 | /* Test that the modification is visible in ptrace after execution */ |
1654 | memset(xsave, 0xCC, xsave_size); |
1655 | ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); |
1656 | pkey_assert(ret == 0); |
1657 | pkey_assert(*pkey_register == 0); |
1658 | |
1659 | ret = ptrace(PTRACE_CONT, child, 0, 0); |
1660 | pkey_assert(ret == 0); |
1661 | pkey_assert(child == waitpid(child, &status, 0)); |
1662 | dprintf3("[%d] waitpid(%d) status: %x\n" , getpid(), child, status); |
1663 | pkey_assert(WIFEXITED(status)); |
1664 | pkey_assert(WEXITSTATUS(status) == 0); |
1665 | free(xsave); |
1666 | } |
1667 | #endif |
1668 | |
1669 | void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) |
1670 | { |
1671 | int size = PAGE_SIZE; |
1672 | int sret; |
1673 | |
1674 | if (cpu_has_pkeys()) { |
1675 | dprintf1("SKIP: %s: no CPU support\n" , __func__); |
1676 | return; |
1677 | } |
1678 | |
1679 | sret = syscall(__NR_pkey_mprotect, ptr, size, PROT_READ, pkey); |
1680 | pkey_assert(sret < 0); |
1681 | } |
1682 | |
1683 | void (*pkey_tests[])(int *ptr, u16 pkey) = { |
1684 | test_read_of_write_disabled_region, |
1685 | test_read_of_access_disabled_region, |
1686 | test_read_of_access_disabled_region_with_page_already_mapped, |
1687 | test_write_of_write_disabled_region, |
1688 | test_write_of_write_disabled_region_with_page_already_mapped, |
1689 | test_write_of_access_disabled_region, |
1690 | test_write_of_access_disabled_region_with_page_already_mapped, |
1691 | test_kernel_write_of_access_disabled_region, |
1692 | test_kernel_write_of_write_disabled_region, |
1693 | test_kernel_gup_of_access_disabled_region, |
1694 | test_kernel_gup_write_to_write_disabled_region, |
1695 | test_executing_on_unreadable_memory, |
1696 | test_implicit_mprotect_exec_only_memory, |
1697 | test_mprotect_with_pkey_0, |
1698 | test_ptrace_of_child, |
1699 | test_pkey_init_state, |
1700 | test_pkey_syscalls_on_non_allocated_pkey, |
1701 | test_pkey_syscalls_bad_args, |
1702 | test_pkey_alloc_exhaust, |
1703 | test_pkey_alloc_free_attach_pkey0, |
1704 | #if defined(__i386__) || defined(__x86_64__) |
1705 | test_ptrace_modifies_pkru, |
1706 | #endif |
1707 | }; |
1708 | |
1709 | void run_tests_once(void) |
1710 | { |
1711 | int *ptr; |
1712 | int prot = PROT_READ|PROT_WRITE; |
1713 | |
1714 | for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { |
1715 | int pkey; |
1716 | int orig_pkey_faults = pkey_faults; |
1717 | |
1718 | dprintf1("======================\n" ); |
1719 | dprintf1("test %d preparing...\n" , test_nr); |
1720 | |
1721 | tracing_on(); |
1722 | pkey = alloc_random_pkey(); |
1723 | dprintf1("test %d starting with pkey: %d\n" , test_nr, pkey); |
1724 | ptr = malloc_pkey(PAGE_SIZE, prot, pkey); |
1725 | dprintf1("test %d starting...\n" , test_nr); |
1726 | pkey_tests[test_nr](ptr, pkey); |
1727 | dprintf1("freeing test memory: %p\n" , ptr); |
1728 | free_pkey_malloc(ptr); |
1729 | sys_pkey_free(pkey); |
1730 | |
1731 | dprintf1("pkey_faults: %d\n" , pkey_faults); |
1732 | dprintf1("orig_pkey_faults: %d\n" , orig_pkey_faults); |
1733 | |
1734 | tracing_off(); |
1735 | close_test_fds(); |
1736 | |
1737 | printf("test %2d PASSED (iteration %d)\n" , test_nr, iteration_nr); |
1738 | dprintf1("======================\n\n" ); |
1739 | } |
1740 | iteration_nr++; |
1741 | } |
1742 | |
1743 | void pkey_setup_shadow(void) |
1744 | { |
1745 | shadow_pkey_reg = __read_pkey_reg(); |
1746 | } |
1747 | |
1748 | pid_t parent_pid; |
1749 | |
1750 | void restore_settings_atexit(void) |
1751 | { |
1752 | if (parent_pid == getpid()) |
1753 | cat_into_file(str: buf, file: "/proc/sys/vm/nr_hugepages" ); |
1754 | } |
1755 | |
1756 | void save_settings(void) |
1757 | { |
1758 | int fd; |
1759 | int err; |
1760 | |
1761 | if (geteuid()) |
1762 | return; |
1763 | |
1764 | fd = open("/proc/sys/vm/nr_hugepages" , O_RDONLY); |
1765 | if (fd < 0) { |
1766 | fprintf(stderr, "error opening\n" ); |
1767 | perror("error: " ); |
1768 | exit(__LINE__); |
1769 | } |
1770 | |
1771 | /* -1 to guarantee leaving the trailing \0 */ |
1772 | err = read(fd, buf, sizeof(buf)-1); |
1773 | if (err < 0) { |
1774 | fprintf(stderr, "error reading\n" ); |
1775 | perror("error: " ); |
1776 | exit(__LINE__); |
1777 | } |
1778 | |
1779 | parent_pid = getpid(); |
1780 | atexit(restore_settings_atexit); |
1781 | close(fd); |
1782 | } |
1783 | |
1784 | int main(void) |
1785 | { |
1786 | int nr_iterations = 22; |
1787 | int pkeys_supported = is_pkeys_supported(); |
1788 | |
1789 | srand((unsigned int)time(NULL)); |
1790 | |
1791 | save_settings(); |
1792 | setup_handlers(); |
1793 | |
1794 | printf("has pkeys: %d\n" , pkeys_supported); |
1795 | |
1796 | if (!pkeys_supported) { |
1797 | int size = PAGE_SIZE; |
1798 | int *ptr; |
1799 | |
1800 | printf("running PKEY tests for unsupported CPU/OS\n" ); |
1801 | |
1802 | ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
1803 | assert(ptr != (void *)-1); |
1804 | test_mprotect_pkey_on_unsupported_cpu(ptr, pkey: 1); |
1805 | exit(0); |
1806 | } |
1807 | |
1808 | pkey_setup_shadow(); |
1809 | printf("startup pkey_reg: %016llx\n" , read_pkey_reg()); |
1810 | setup_hugetlbfs(); |
1811 | |
1812 | while (nr_iterations-- > 0) |
1813 | run_tests_once(); |
1814 | |
1815 | printf("done (all tests OK)\n" ); |
1816 | return 0; |
1817 | } |
1818 | |