1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Flexible mmap layout support |
4 | * |
5 | * Based on code by Ingo Molnar and Andi Kleen, copyrighted |
6 | * as follows: |
7 | * |
8 | * Copyright 2003-2009 Red Hat Inc. |
9 | * All Rights Reserved. |
10 | * Copyright 2005 Andi Kleen, SUSE Labs. |
11 | * Copyright 2007 Jiri Kosina, SUSE Labs. |
12 | */ |
13 | |
14 | #include <linux/personality.h> |
15 | #include <linux/mm.h> |
16 | #include <linux/random.h> |
17 | #include <linux/limits.h> |
18 | #include <linux/sched/signal.h> |
19 | #include <linux/sched/mm.h> |
20 | #include <linux/compat.h> |
21 | #include <linux/elf-randomize.h> |
22 | #include <asm/elf.h> |
23 | #include <asm/io.h> |
24 | |
25 | #include "physaddr.h" |
26 | |
27 | struct va_alignment __read_mostly va_align = { |
28 | .flags = -1, |
29 | }; |
30 | |
31 | unsigned long task_size_32bit(void) |
32 | { |
33 | return IA32_PAGE_OFFSET; |
34 | } |
35 | |
36 | unsigned long task_size_64bit(int full_addr_space) |
37 | { |
38 | return full_addr_space ? TASK_SIZE_MAX : DEFAULT_MAP_WINDOW; |
39 | } |
40 | |
41 | static unsigned long stack_maxrandom_size(unsigned long task_size) |
42 | { |
43 | unsigned long max = 0; |
44 | if (current->flags & PF_RANDOMIZE) { |
45 | max = (-1UL) & __STACK_RND_MASK(task_size == task_size_32bit()); |
46 | max <<= PAGE_SHIFT; |
47 | } |
48 | |
49 | return max; |
50 | } |
51 | |
52 | #ifdef CONFIG_COMPAT |
53 | # define mmap32_rnd_bits mmap_rnd_compat_bits |
54 | # define mmap64_rnd_bits mmap_rnd_bits |
55 | #else |
56 | # define mmap32_rnd_bits mmap_rnd_bits |
57 | # define mmap64_rnd_bits mmap_rnd_bits |
58 | #endif |
59 | |
60 | #define SIZE_128M (128 * 1024 * 1024UL) |
61 | |
62 | static int mmap_is_legacy(void) |
63 | { |
64 | if (current->personality & ADDR_COMPAT_LAYOUT) |
65 | return 1; |
66 | |
67 | return sysctl_legacy_va_layout; |
68 | } |
69 | |
70 | static unsigned long arch_rnd(unsigned int rndbits) |
71 | { |
72 | if (!(current->flags & PF_RANDOMIZE)) |
73 | return 0; |
74 | return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT; |
75 | } |
76 | |
77 | unsigned long arch_mmap_rnd(void) |
78 | { |
79 | return arch_rnd(rndbits: mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits); |
80 | } |
81 | |
82 | static unsigned long mmap_base(unsigned long rnd, unsigned long task_size, |
83 | struct rlimit *rlim_stack) |
84 | { |
85 | unsigned long gap = rlim_stack->rlim_cur; |
86 | unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap; |
87 | unsigned long gap_min, gap_max; |
88 | |
89 | /* Values close to RLIM_INFINITY can overflow. */ |
90 | if (gap + pad > gap) |
91 | gap += pad; |
92 | |
93 | /* |
94 | * Top of mmap area (just below the process stack). |
95 | * Leave an at least ~128 MB hole with possible stack randomization. |
96 | */ |
97 | gap_min = SIZE_128M; |
98 | gap_max = (task_size / 6) * 5; |
99 | |
100 | if (gap < gap_min) |
101 | gap = gap_min; |
102 | else if (gap > gap_max) |
103 | gap = gap_max; |
104 | |
105 | return PAGE_ALIGN(task_size - gap - rnd); |
106 | } |
107 | |
108 | static unsigned long mmap_legacy_base(unsigned long rnd, |
109 | unsigned long task_size) |
110 | { |
111 | return __TASK_UNMAPPED_BASE(task_size) + rnd; |
112 | } |
113 | |
114 | /* |
115 | * This function, called very early during the creation of a new |
116 | * process VM image, sets up which VM layout function to use: |
117 | */ |
118 | static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base, |
119 | unsigned long random_factor, unsigned long task_size, |
120 | struct rlimit *rlim_stack) |
121 | { |
122 | *legacy_base = mmap_legacy_base(rnd: random_factor, task_size); |
123 | if (mmap_is_legacy()) |
124 | *base = *legacy_base; |
125 | else |
126 | *base = mmap_base(rnd: random_factor, task_size, rlim_stack); |
127 | } |
128 | |
129 | void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) |
130 | { |
131 | if (mmap_is_legacy()) |
132 | mm->get_unmapped_area = arch_get_unmapped_area; |
133 | else |
134 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
135 | |
136 | arch_pick_mmap_base(base: &mm->mmap_base, legacy_base: &mm->mmap_legacy_base, |
137 | random_factor: arch_rnd(mmap64_rnd_bits), task_size: task_size_64bit(full_addr_space: 0), |
138 | rlim_stack); |
139 | |
140 | #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES |
141 | /* |
142 | * The mmap syscall mapping base decision depends solely on the |
143 | * syscall type (64-bit or compat). This applies for 64bit |
144 | * applications and 32bit applications. The 64bit syscall uses |
145 | * mmap_base, the compat syscall uses mmap_compat_base. |
146 | */ |
147 | arch_pick_mmap_base(base: &mm->mmap_compat_base, legacy_base: &mm->mmap_compat_legacy_base, |
148 | random_factor: arch_rnd(mmap32_rnd_bits), task_size: task_size_32bit(), |
149 | rlim_stack); |
150 | #endif |
151 | } |
152 | |
153 | unsigned long get_mmap_base(int is_legacy) |
154 | { |
155 | struct mm_struct *mm = current->mm; |
156 | |
157 | #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES |
158 | if (in_32bit_syscall()) { |
159 | return is_legacy ? mm->mmap_compat_legacy_base |
160 | : mm->mmap_compat_base; |
161 | } |
162 | #endif |
163 | return is_legacy ? mm->mmap_legacy_base : mm->mmap_base; |
164 | } |
165 | |
166 | const char *arch_vma_name(struct vm_area_struct *vma) |
167 | { |
168 | return NULL; |
169 | } |
170 | |
171 | /** |
172 | * mmap_address_hint_valid - Validate the address hint of mmap |
173 | * @addr: Address hint |
174 | * @len: Mapping length |
175 | * |
176 | * Check whether @addr and @addr + @len result in a valid mapping. |
177 | * |
178 | * On 32bit this only checks whether @addr + @len is <= TASK_SIZE. |
179 | * |
180 | * On 64bit with 5-level page tables another sanity check is required |
181 | * because mappings requested by mmap(@addr, 0) which cross the 47-bit |
182 | * virtual address boundary can cause the following theoretical issue: |
183 | * |
184 | * An application calls mmap(addr, 0), i.e. without MAP_FIXED, where @addr |
185 | * is below the border of the 47-bit address space and @addr + @len is |
186 | * above the border. |
187 | * |
188 | * With 4-level paging this request succeeds, but the resulting mapping |
189 | * address will always be within the 47-bit virtual address space, because |
190 | * the hint address does not result in a valid mapping and is |
191 | * ignored. Hence applications which are not prepared to handle virtual |
192 | * addresses above 47-bit work correctly. |
193 | * |
194 | * With 5-level paging this request would be granted and result in a |
195 | * mapping which crosses the border of the 47-bit virtual address |
196 | * space. If the application cannot handle addresses above 47-bit this |
197 | * will lead to misbehaviour and hard to diagnose failures. |
198 | * |
199 | * Therefore ignore address hints which would result in a mapping crossing |
200 | * the 47-bit virtual address boundary. |
201 | * |
202 | * Note, that in the same scenario with MAP_FIXED the behaviour is |
203 | * different. The request with @addr < 47-bit and @addr + @len > 47-bit |
204 | * fails on a 4-level paging machine but succeeds on a 5-level paging |
205 | * machine. It is reasonable to expect that an application does not rely on |
206 | * the failure of such a fixed mapping request, so the restriction is not |
207 | * applied. |
208 | */ |
209 | bool mmap_address_hint_valid(unsigned long addr, unsigned long len) |
210 | { |
211 | if (TASK_SIZE - len < addr) |
212 | return false; |
213 | |
214 | return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW); |
215 | } |
216 | |
217 | /* Can we access it for direct reading/writing? Must be RAM: */ |
218 | int valid_phys_addr_range(phys_addr_t addr, size_t count) |
219 | { |
220 | return addr + count - 1 <= __pa(high_memory - 1); |
221 | } |
222 | |
223 | /* Can we access it through mmap? Must be a valid physical address: */ |
224 | int valid_mmap_phys_addr_range(unsigned long pfn, size_t count) |
225 | { |
226 | phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT; |
227 | |
228 | return phys_addr_valid(addr: addr + count - 1); |
229 | } |
230 | |
231 | /* |
232 | * Only allow root to set high MMIO mappings to PROT_NONE. |
233 | * This prevents an unpriv. user to set them to PROT_NONE and invert |
234 | * them, then pointing to valid memory for L1TF speculation. |
235 | * |
236 | * Note: for locked down kernels may want to disable the root override. |
237 | */ |
238 | bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) |
239 | { |
240 | if (!boot_cpu_has_bug(X86_BUG_L1TF)) |
241 | return true; |
242 | if (!__pte_needs_invert(pgprot_val(prot))) |
243 | return true; |
244 | /* If it's real memory always allow */ |
245 | if (pfn_valid(pfn)) |
246 | return true; |
247 | if (pfn >= l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN)) |
248 | return false; |
249 | return true; |
250 | } |
251 | |