1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* |
3 | * PowerPC Memory Protection Keys management |
4 | * |
5 | * Copyright 2017, Ram Pai, IBM Corporation. |
6 | */ |
7 | |
8 | #include <asm/mman.h> |
9 | #include <asm/mmu_context.h> |
10 | #include <asm/mmu.h> |
11 | #include <asm/setup.h> |
12 | #include <asm/smp.h> |
13 | #include <asm/firmware.h> |
14 | |
15 | #include <linux/pkeys.h> |
16 | #include <linux/of_fdt.h> |
17 | |
18 | |
19 | int num_pkey; /* Max number of pkeys supported */ |
20 | /* |
21 | * Keys marked in the reservation list cannot be allocated by userspace |
22 | */ |
23 | u32 reserved_allocation_mask __ro_after_init; |
24 | |
25 | /* Bits set for the initially allocated keys */ |
26 | static u32 initial_allocation_mask __ro_after_init; |
27 | |
28 | /* |
29 | * Even if we allocate keys with sys_pkey_alloc(), we need to make sure |
30 | * other thread still find the access denied using the same keys. |
31 | */ |
32 | u64 default_amr __ro_after_init = ~0x0UL; |
33 | u64 default_iamr __ro_after_init = 0x5555555555555555UL; |
34 | u64 default_uamor __ro_after_init; |
35 | EXPORT_SYMBOL(default_amr); |
36 | /* |
37 | * Key used to implement PROT_EXEC mmap. Denies READ/WRITE |
38 | * We pick key 2 because 0 is special key and 1 is reserved as per ISA. |
39 | */ |
40 | static int execute_only_key = 2; |
41 | static bool pkey_execute_disable_supported; |
42 | |
43 | |
44 | #define AMR_BITS_PER_PKEY 2 |
45 | #define AMR_RD_BIT 0x1UL |
46 | #define AMR_WR_BIT 0x2UL |
47 | #define IAMR_EX_BIT 0x1UL |
48 | #define PKEY_REG_BITS (sizeof(u64) * 8) |
49 | #define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY)) |
50 | |
51 | static int __init dt_scan_storage_keys(unsigned long node, |
52 | const char *uname, int depth, |
53 | void *data) |
54 | { |
55 | const char *type = of_get_flat_dt_prop(node, name: "device_type" , NULL); |
56 | const __be32 *prop; |
57 | int *pkeys_total = (int *) data; |
58 | |
59 | /* We are scanning "cpu" nodes only */ |
60 | if (type == NULL || strcmp(type, "cpu" ) != 0) |
61 | return 0; |
62 | |
63 | prop = of_get_flat_dt_prop(node, name: "ibm,processor-storage-keys" , NULL); |
64 | if (!prop) |
65 | return 0; |
66 | *pkeys_total = be32_to_cpu(prop[0]); |
67 | return 1; |
68 | } |
69 | |
70 | static int __init scan_pkey_feature(void) |
71 | { |
72 | int ret; |
73 | int pkeys_total = 0; |
74 | |
75 | /* |
76 | * Pkey is not supported with Radix translation. |
77 | */ |
78 | if (early_radix_enabled()) |
79 | return 0; |
80 | |
81 | ret = of_scan_flat_dt(it: dt_scan_storage_keys, data: &pkeys_total); |
82 | if (ret == 0) { |
83 | /* |
84 | * Let's assume 32 pkeys on P8/P9 bare metal, if its not defined by device |
85 | * tree. We make this exception since some version of skiboot forgot to |
86 | * expose this property on power8/9. |
87 | */ |
88 | if (!firmware_has_feature(FW_FEATURE_LPAR)) { |
89 | unsigned long pvr = mfspr(SPRN_PVR); |
90 | |
91 | if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E || |
92 | PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9 || |
93 | PVR_VER(pvr) == PVR_HX_C2000) |
94 | pkeys_total = 32; |
95 | } |
96 | } |
97 | |
98 | #ifdef CONFIG_PPC_MEM_KEYS |
99 | /* |
100 | * Adjust the upper limit, based on the number of bits supported by |
101 | * arch-neutral code. |
102 | */ |
103 | pkeys_total = min_t(int, pkeys_total, |
104 | ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1)); |
105 | #endif |
106 | return pkeys_total; |
107 | } |
108 | |
109 | void __init pkey_early_init_devtree(void) |
110 | { |
111 | int pkeys_total, i; |
112 | |
113 | #ifdef CONFIG_PPC_MEM_KEYS |
114 | /* |
115 | * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral |
116 | * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE. |
117 | * Ensure that the bits a distinct. |
118 | */ |
119 | BUILD_BUG_ON(PKEY_DISABLE_EXECUTE & |
120 | (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); |
121 | |
122 | /* |
123 | * pkey_to_vmflag_bits() assumes that the pkey bits are contiguous |
124 | * in the vmaflag. Make sure that is really the case. |
125 | */ |
126 | BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + |
127 | __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) |
128 | != (sizeof(u64) * BITS_PER_BYTE)); |
129 | #endif |
130 | /* |
131 | * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 |
132 | */ |
133 | if (!early_cpu_has_feature(CPU_FTR_ARCH_206)) |
134 | return; |
135 | |
136 | /* scan the device tree for pkey feature */ |
137 | pkeys_total = scan_pkey_feature(); |
138 | if (!pkeys_total) |
139 | goto out; |
140 | |
141 | /* Allow all keys to be modified by default */ |
142 | default_uamor = ~0x0UL; |
143 | |
144 | cur_cpu_spec->mmu_features |= MMU_FTR_PKEY; |
145 | |
146 | /* |
147 | * The device tree cannot be relied to indicate support for |
148 | * execute_disable support. Instead we use a PVR check. |
149 | */ |
150 | if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p)) |
151 | pkey_execute_disable_supported = false; |
152 | else |
153 | pkey_execute_disable_supported = true; |
154 | |
155 | #ifdef CONFIG_PPC_4K_PAGES |
156 | /* |
157 | * The OS can manage only 8 pkeys due to its inability to represent them |
158 | * in the Linux 4K PTE. Mark all other keys reserved. |
159 | */ |
160 | num_pkey = min(8, pkeys_total); |
161 | #else |
162 | num_pkey = pkeys_total; |
163 | #endif |
164 | |
165 | if (unlikely(num_pkey <= execute_only_key) || !pkey_execute_disable_supported) { |
166 | /* |
167 | * Insufficient number of keys to support |
168 | * execute only key. Mark it unavailable. |
169 | */ |
170 | execute_only_key = -1; |
171 | } else { |
172 | /* |
173 | * Mark the execute_only_pkey as not available for |
174 | * user allocation via pkey_alloc. |
175 | */ |
176 | reserved_allocation_mask |= (0x1 << execute_only_key); |
177 | |
178 | /* |
179 | * Deny READ/WRITE for execute_only_key. |
180 | * Allow execute in IAMR. |
181 | */ |
182 | default_amr |= (0x3ul << pkeyshift(execute_only_key)); |
183 | default_iamr &= ~(0x1ul << pkeyshift(execute_only_key)); |
184 | |
185 | /* |
186 | * Clear the uamor bits for this key. |
187 | */ |
188 | default_uamor &= ~(0x3ul << pkeyshift(execute_only_key)); |
189 | } |
190 | |
191 | if (unlikely(num_pkey <= 3)) { |
192 | /* |
193 | * Insufficient number of keys to support |
194 | * KUAP/KUEP feature. |
195 | */ |
196 | disable_kuep = true; |
197 | disable_kuap = true; |
198 | WARN(1, "Disabling kernel user protection due to low (%d) max supported keys\n" , num_pkey); |
199 | } else { |
200 | /* handle key which is used by kernel for KAUP */ |
201 | reserved_allocation_mask |= (0x1 << 3); |
202 | /* |
203 | * Mark access for kup_key in default amr so that |
204 | * we continue to operate with that AMR in |
205 | * copy_to/from_user(). |
206 | */ |
207 | default_amr &= ~(0x3ul << pkeyshift(3)); |
208 | default_iamr &= ~(0x1ul << pkeyshift(3)); |
209 | default_uamor &= ~(0x3ul << pkeyshift(3)); |
210 | } |
211 | |
212 | /* |
213 | * Allow access for only key 0. And prevent any other modification. |
214 | */ |
215 | default_amr &= ~(0x3ul << pkeyshift(0)); |
216 | default_iamr &= ~(0x1ul << pkeyshift(0)); |
217 | default_uamor &= ~(0x3ul << pkeyshift(0)); |
218 | /* |
219 | * key 0 is special in that we want to consider it an allocated |
220 | * key which is preallocated. We don't allow changing AMR bits |
221 | * w.r.t key 0. But one can pkey_free(key0) |
222 | */ |
223 | initial_allocation_mask |= (0x1 << 0); |
224 | |
225 | /* |
226 | * key 1 is recommended not to be used. PowerISA(3.0) page 1015, |
227 | * programming note. |
228 | */ |
229 | reserved_allocation_mask |= (0x1 << 1); |
230 | default_uamor &= ~(0x3ul << pkeyshift(1)); |
231 | |
232 | /* |
233 | * Prevent the usage of OS reserved keys. Update UAMOR |
234 | * for those keys. Also mark the rest of the bits in the |
235 | * 32 bit mask as reserved. |
236 | */ |
237 | for (i = num_pkey; i < 32 ; i++) { |
238 | reserved_allocation_mask |= (0x1 << i); |
239 | default_uamor &= ~(0x3ul << pkeyshift(i)); |
240 | } |
241 | /* |
242 | * Prevent the allocation of reserved keys too. |
243 | */ |
244 | initial_allocation_mask |= reserved_allocation_mask; |
245 | |
246 | pr_info("Enabling pkeys with max key count %d\n" , num_pkey); |
247 | out: |
248 | /* |
249 | * Setup uamor on boot cpu |
250 | */ |
251 | mtspr(SPRN_UAMOR, default_uamor); |
252 | |
253 | return; |
254 | } |
255 | |
256 | #ifdef CONFIG_PPC_KUEP |
257 | void setup_kuep(bool disabled) |
258 | { |
259 | if (disabled) |
260 | return; |
261 | /* |
262 | * On hash if PKEY feature is not enabled, disable KUAP too. |
263 | */ |
264 | if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY)) |
265 | return; |
266 | |
267 | if (smp_processor_id() == boot_cpuid) { |
268 | pr_info("Activating Kernel Userspace Execution Prevention\n" ); |
269 | cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUEP; |
270 | } |
271 | |
272 | /* |
273 | * Radix always uses key0 of the IAMR to determine if an access is |
274 | * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction |
275 | * fetch. |
276 | */ |
277 | mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); |
278 | isync(); |
279 | } |
280 | #endif |
281 | |
282 | #ifdef CONFIG_PPC_KUAP |
283 | void setup_kuap(bool disabled) |
284 | { |
285 | if (disabled) |
286 | return; |
287 | /* |
288 | * On hash if PKEY feature is not enabled, disable KUAP too. |
289 | */ |
290 | if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY)) |
291 | return; |
292 | |
293 | if (smp_processor_id() == boot_cpuid) { |
294 | pr_info("Activating Kernel Userspace Access Prevention\n" ); |
295 | cur_cpu_spec->mmu_features |= MMU_FTR_KUAP; |
296 | } |
297 | |
298 | /* |
299 | * Set the default kernel AMR values on all cpus. |
300 | */ |
301 | mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); |
302 | isync(); |
303 | } |
304 | #endif |
305 | |
306 | #ifdef CONFIG_PPC_MEM_KEYS |
307 | void pkey_mm_init(struct mm_struct *mm) |
308 | { |
309 | if (!mmu_has_feature(MMU_FTR_PKEY)) |
310 | return; |
311 | mm_pkey_allocation_map(mm) = initial_allocation_mask; |
312 | mm->context.execute_only_pkey = execute_only_key; |
313 | } |
314 | |
315 | static inline void init_amr(int pkey, u8 init_bits) |
316 | { |
317 | u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); |
318 | u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); |
319 | |
320 | current->thread.regs->amr = old_amr | new_amr_bits; |
321 | } |
322 | |
323 | static inline void init_iamr(int pkey, u8 init_bits) |
324 | { |
325 | u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); |
326 | u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); |
327 | |
328 | if (!likely(pkey_execute_disable_supported)) |
329 | return; |
330 | |
331 | current->thread.regs->iamr = old_iamr | new_iamr_bits; |
332 | } |
333 | |
334 | /* |
335 | * Set the access rights in AMR IAMR and UAMOR registers for @pkey to that |
336 | * specified in @init_val. |
337 | */ |
338 | int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, |
339 | unsigned long init_val) |
340 | { |
341 | u64 new_amr_bits = 0x0ul; |
342 | u64 new_iamr_bits = 0x0ul; |
343 | u64 pkey_bits, uamor_pkey_bits; |
344 | |
345 | /* |
346 | * Check whether the key is disabled by UAMOR. |
347 | */ |
348 | pkey_bits = 0x3ul << pkeyshift(pkey); |
349 | uamor_pkey_bits = (default_uamor & pkey_bits); |
350 | |
351 | /* |
352 | * Both the bits in UAMOR corresponding to the key should be set |
353 | */ |
354 | if (uamor_pkey_bits != pkey_bits) |
355 | return -EINVAL; |
356 | |
357 | if (init_val & PKEY_DISABLE_EXECUTE) { |
358 | if (!pkey_execute_disable_supported) |
359 | return -EINVAL; |
360 | new_iamr_bits |= IAMR_EX_BIT; |
361 | } |
362 | init_iamr(pkey, new_iamr_bits); |
363 | |
364 | /* Set the bits we need in AMR: */ |
365 | if (init_val & PKEY_DISABLE_ACCESS) |
366 | new_amr_bits |= AMR_RD_BIT | AMR_WR_BIT; |
367 | else if (init_val & PKEY_DISABLE_WRITE) |
368 | new_amr_bits |= AMR_WR_BIT; |
369 | |
370 | init_amr(pkey, new_amr_bits); |
371 | return 0; |
372 | } |
373 | |
374 | int execute_only_pkey(struct mm_struct *mm) |
375 | { |
376 | return mm->context.execute_only_pkey; |
377 | } |
378 | |
379 | static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) |
380 | { |
381 | /* Do this check first since the vm_flags should be hot */ |
382 | if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC) |
383 | return false; |
384 | |
385 | return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey); |
386 | } |
387 | |
388 | /* |
389 | * This should only be called for *plain* mprotect calls. |
390 | */ |
391 | int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, |
392 | int pkey) |
393 | { |
394 | /* |
395 | * If the currently associated pkey is execute-only, but the requested |
396 | * protection is not execute-only, move it back to the default pkey. |
397 | */ |
398 | if (vma_is_pkey_exec_only(vma) && (prot != PROT_EXEC)) |
399 | return 0; |
400 | |
401 | /* |
402 | * The requested protection is execute-only. Hence let's use an |
403 | * execute-only pkey. |
404 | */ |
405 | if (prot == PROT_EXEC) { |
406 | pkey = execute_only_pkey(vma->vm_mm); |
407 | if (pkey > 0) |
408 | return pkey; |
409 | } |
410 | |
411 | /* Nothing to override. */ |
412 | return vma_pkey(vma); |
413 | } |
414 | |
415 | static bool pkey_access_permitted(int pkey, bool write, bool execute) |
416 | { |
417 | int pkey_shift; |
418 | u64 amr; |
419 | |
420 | pkey_shift = pkeyshift(pkey); |
421 | if (execute) |
422 | return !(current_thread_iamr() & (IAMR_EX_BIT << pkey_shift)); |
423 | |
424 | amr = current_thread_amr(); |
425 | if (write) |
426 | return !(amr & (AMR_WR_BIT << pkey_shift)); |
427 | |
428 | return !(amr & (AMR_RD_BIT << pkey_shift)); |
429 | } |
430 | |
431 | bool arch_pte_access_permitted(u64 pte, bool write, bool execute) |
432 | { |
433 | if (!mmu_has_feature(MMU_FTR_PKEY)) |
434 | return true; |
435 | |
436 | return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute); |
437 | } |
438 | |
439 | /* |
440 | * We only want to enforce protection keys on the current thread because we |
441 | * effectively have no access to AMR/IAMR for other threads or any way to tell |
442 | * which AMR/IAMR in a threaded process we could use. |
443 | * |
444 | * So do not enforce things if the VMA is not from the current mm, or if we are |
445 | * in a kernel thread. |
446 | */ |
447 | bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, |
448 | bool execute, bool foreign) |
449 | { |
450 | if (!mmu_has_feature(MMU_FTR_PKEY)) |
451 | return true; |
452 | /* |
453 | * Do not enforce our key-permissions on a foreign vma. |
454 | */ |
455 | if (foreign || vma_is_foreign(vma)) |
456 | return true; |
457 | |
458 | return pkey_access_permitted(vma_pkey(vma), write, execute); |
459 | } |
460 | |
461 | void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) |
462 | { |
463 | if (!mmu_has_feature(MMU_FTR_PKEY)) |
464 | return; |
465 | |
466 | /* Duplicate the oldmm pkey state in mm: */ |
467 | mm_pkey_allocation_map(mm) = mm_pkey_allocation_map(oldmm); |
468 | mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; |
469 | } |
470 | |
471 | #endif /* CONFIG_PPC_MEM_KEYS */ |
472 | |