1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * VDSO implementations. |
4 | * |
5 | * Copyright (C) 2012 ARM Limited |
6 | * |
7 | * Author: Will Deacon <will.deacon@arm.com> |
8 | */ |
9 | |
10 | #include <linux/cache.h> |
11 | #include <linux/clocksource.h> |
12 | #include <linux/elf.h> |
13 | #include <linux/err.h> |
14 | #include <linux/errno.h> |
15 | #include <linux/gfp.h> |
16 | #include <linux/kernel.h> |
17 | #include <linux/mm.h> |
18 | #include <linux/sched.h> |
19 | #include <linux/signal.h> |
20 | #include <linux/slab.h> |
21 | #include <linux/time_namespace.h> |
22 | #include <linux/timekeeper_internal.h> |
23 | #include <linux/vmalloc.h> |
24 | #include <vdso/datapage.h> |
25 | #include <vdso/helpers.h> |
26 | #include <vdso/vsyscall.h> |
27 | |
28 | #include <asm/cacheflush.h> |
29 | #include <asm/signal32.h> |
30 | #include <asm/vdso.h> |
31 | |
32 | enum vdso_abi { |
33 | VDSO_ABI_AA64, |
34 | VDSO_ABI_AA32, |
35 | }; |
36 | |
37 | enum vvar_pages { |
38 | VVAR_DATA_PAGE_OFFSET, |
39 | VVAR_TIMENS_PAGE_OFFSET, |
40 | VVAR_NR_PAGES, |
41 | }; |
42 | |
43 | struct vdso_abi_info { |
44 | const char *name; |
45 | const char *vdso_code_start; |
46 | const char *vdso_code_end; |
47 | unsigned long vdso_pages; |
48 | /* Data Mapping */ |
49 | struct vm_special_mapping *dm; |
50 | /* Code Mapping */ |
51 | struct vm_special_mapping *cm; |
52 | }; |
53 | |
54 | static struct vdso_abi_info vdso_info[] __ro_after_init = { |
55 | [VDSO_ABI_AA64] = { |
56 | .name = "vdso" , |
57 | .vdso_code_start = vdso_start, |
58 | .vdso_code_end = vdso_end, |
59 | }, |
60 | #ifdef CONFIG_COMPAT_VDSO |
61 | [VDSO_ABI_AA32] = { |
62 | .name = "vdso32" , |
63 | .vdso_code_start = vdso32_start, |
64 | .vdso_code_end = vdso32_end, |
65 | }, |
66 | #endif /* CONFIG_COMPAT_VDSO */ |
67 | }; |
68 | |
69 | /* |
70 | * The vDSO data page. |
71 | */ |
72 | static union vdso_data_store vdso_data_store __page_aligned_data; |
73 | struct vdso_data *vdso_data = vdso_data_store.data; |
74 | |
75 | static int vdso_mremap(const struct vm_special_mapping *sm, |
76 | struct vm_area_struct *new_vma) |
77 | { |
78 | current->mm->context.vdso = (void *)new_vma->vm_start; |
79 | |
80 | return 0; |
81 | } |
82 | |
83 | static int __init __vdso_init(enum vdso_abi abi) |
84 | { |
85 | int i; |
86 | struct page **vdso_pagelist; |
87 | unsigned long pfn; |
88 | |
89 | if (memcmp(p: vdso_info[abi].vdso_code_start, q: "\177ELF" , size: 4)) { |
90 | pr_err("vDSO is not a valid ELF object!\n" ); |
91 | return -EINVAL; |
92 | } |
93 | |
94 | vdso_info[abi].vdso_pages = ( |
95 | vdso_info[abi].vdso_code_end - |
96 | vdso_info[abi].vdso_code_start) >> |
97 | PAGE_SHIFT; |
98 | |
99 | vdso_pagelist = kcalloc(n: vdso_info[abi].vdso_pages, |
100 | size: sizeof(struct page *), |
101 | GFP_KERNEL); |
102 | if (vdso_pagelist == NULL) |
103 | return -ENOMEM; |
104 | |
105 | /* Grab the vDSO code pages. */ |
106 | pfn = sym_to_pfn(vdso_info[abi].vdso_code_start); |
107 | |
108 | for (i = 0; i < vdso_info[abi].vdso_pages; i++) |
109 | vdso_pagelist[i] = pfn_to_page(pfn + i); |
110 | |
111 | vdso_info[abi].cm->pages = vdso_pagelist; |
112 | |
113 | return 0; |
114 | } |
115 | |
116 | #ifdef CONFIG_TIME_NS |
117 | struct vdso_data *arch_get_vdso_data(void *vvar_page) |
118 | { |
119 | return (struct vdso_data *)(vvar_page); |
120 | } |
121 | |
122 | /* |
123 | * The vvar mapping contains data for a specific time namespace, so when a task |
124 | * changes namespace we must unmap its vvar data for the old namespace. |
125 | * Subsequent faults will map in data for the new namespace. |
126 | * |
127 | * For more details see timens_setup_vdso_data(). |
128 | */ |
129 | int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) |
130 | { |
131 | struct mm_struct *mm = task->mm; |
132 | struct vm_area_struct *vma; |
133 | VMA_ITERATOR(vmi, mm, 0); |
134 | |
135 | mmap_read_lock(mm); |
136 | |
137 | for_each_vma(vmi, vma) { |
138 | if (vma_is_special_mapping(vma, sm: vdso_info[VDSO_ABI_AA64].dm)) |
139 | zap_vma_pages(vma); |
140 | #ifdef CONFIG_COMPAT_VDSO |
141 | if (vma_is_special_mapping(vma, sm: vdso_info[VDSO_ABI_AA32].dm)) |
142 | zap_vma_pages(vma); |
143 | #endif |
144 | } |
145 | |
146 | mmap_read_unlock(mm); |
147 | return 0; |
148 | } |
149 | #endif |
150 | |
151 | static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, |
152 | struct vm_area_struct *vma, struct vm_fault *vmf) |
153 | { |
154 | struct page *timens_page = find_timens_vvar_page(vma); |
155 | unsigned long pfn; |
156 | |
157 | switch (vmf->pgoff) { |
158 | case VVAR_DATA_PAGE_OFFSET: |
159 | if (timens_page) |
160 | pfn = page_to_pfn(timens_page); |
161 | else |
162 | pfn = sym_to_pfn(vdso_data); |
163 | break; |
164 | #ifdef CONFIG_TIME_NS |
165 | case VVAR_TIMENS_PAGE_OFFSET: |
166 | /* |
167 | * If a task belongs to a time namespace then a namespace |
168 | * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and |
169 | * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET |
170 | * offset. |
171 | * See also the comment near timens_setup_vdso_data(). |
172 | */ |
173 | if (!timens_page) |
174 | return VM_FAULT_SIGBUS; |
175 | pfn = sym_to_pfn(vdso_data); |
176 | break; |
177 | #endif /* CONFIG_TIME_NS */ |
178 | default: |
179 | return VM_FAULT_SIGBUS; |
180 | } |
181 | |
182 | return vmf_insert_pfn(vma, addr: vmf->address, pfn); |
183 | } |
184 | |
185 | static int __setup_additional_pages(enum vdso_abi abi, |
186 | struct mm_struct *mm, |
187 | struct linux_binprm *bprm, |
188 | int uses_interp) |
189 | { |
190 | unsigned long vdso_base, vdso_text_len, vdso_mapping_len; |
191 | unsigned long gp_flags = 0; |
192 | void *ret; |
193 | |
194 | BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); |
195 | |
196 | vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT; |
197 | /* Be sure to map the data page */ |
198 | vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE; |
199 | |
200 | vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); |
201 | if (IS_ERR_VALUE(vdso_base)) { |
202 | ret = ERR_PTR(error: vdso_base); |
203 | goto up_fail; |
204 | } |
205 | |
206 | ret = _install_special_mapping(mm, addr: vdso_base, len: VVAR_NR_PAGES * PAGE_SIZE, |
207 | VM_READ|VM_MAYREAD|VM_PFNMAP, |
208 | spec: vdso_info[abi].dm); |
209 | if (IS_ERR(ptr: ret)) |
210 | goto up_fail; |
211 | |
212 | if (system_supports_bti_kernel()) |
213 | gp_flags = VM_ARM64_BTI; |
214 | |
215 | vdso_base += VVAR_NR_PAGES * PAGE_SIZE; |
216 | mm->context.vdso = (void *)vdso_base; |
217 | ret = _install_special_mapping(mm, addr: vdso_base, len: vdso_text_len, |
218 | VM_READ|VM_EXEC|gp_flags| |
219 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, |
220 | spec: vdso_info[abi].cm); |
221 | if (IS_ERR(ptr: ret)) |
222 | goto up_fail; |
223 | |
224 | return 0; |
225 | |
226 | up_fail: |
227 | mm->context.vdso = NULL; |
228 | return PTR_ERR(ptr: ret); |
229 | } |
230 | |
231 | #ifdef CONFIG_COMPAT |
232 | /* |
233 | * Create and map the vectors page for AArch32 tasks. |
234 | */ |
235 | enum aarch32_map { |
236 | AA32_MAP_VECTORS, /* kuser helpers */ |
237 | AA32_MAP_SIGPAGE, |
238 | AA32_MAP_VVAR, |
239 | AA32_MAP_VDSO, |
240 | }; |
241 | |
242 | static struct page *aarch32_vectors_page __ro_after_init; |
243 | static struct page *aarch32_sig_page __ro_after_init; |
244 | |
245 | static int aarch32_sigpage_mremap(const struct vm_special_mapping *sm, |
246 | struct vm_area_struct *new_vma) |
247 | { |
248 | current->mm->context.sigpage = (void *)new_vma->vm_start; |
249 | |
250 | return 0; |
251 | } |
252 | |
253 | static struct vm_special_mapping aarch32_vdso_maps[] = { |
254 | [AA32_MAP_VECTORS] = { |
255 | .name = "[vectors]" , /* ABI */ |
256 | .pages = &aarch32_vectors_page, |
257 | }, |
258 | [AA32_MAP_SIGPAGE] = { |
259 | .name = "[sigpage]" , /* ABI */ |
260 | .pages = &aarch32_sig_page, |
261 | .mremap = aarch32_sigpage_mremap, |
262 | }, |
263 | [AA32_MAP_VVAR] = { |
264 | .name = "[vvar]" , |
265 | .fault = vvar_fault, |
266 | }, |
267 | [AA32_MAP_VDSO] = { |
268 | .name = "[vdso]" , |
269 | .mremap = vdso_mremap, |
270 | }, |
271 | }; |
272 | |
273 | static int aarch32_alloc_kuser_vdso_page(void) |
274 | { |
275 | extern char __kuser_helper_start[], __kuser_helper_end[]; |
276 | int kuser_sz = __kuser_helper_end - __kuser_helper_start; |
277 | unsigned long vdso_page; |
278 | |
279 | if (!IS_ENABLED(CONFIG_KUSER_HELPERS)) |
280 | return 0; |
281 | |
282 | vdso_page = get_zeroed_page(GFP_KERNEL); |
283 | if (!vdso_page) |
284 | return -ENOMEM; |
285 | |
286 | memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start, |
287 | kuser_sz); |
288 | aarch32_vectors_page = virt_to_page((void *)vdso_page); |
289 | return 0; |
290 | } |
291 | |
292 | #define COMPAT_SIGPAGE_POISON_WORD 0xe7fddef1 |
293 | static int aarch32_alloc_sigpage(void) |
294 | { |
295 | extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; |
296 | int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; |
297 | __le32 poison = cpu_to_le32(COMPAT_SIGPAGE_POISON_WORD); |
298 | void *sigpage; |
299 | |
300 | sigpage = (void *)__get_free_page(GFP_KERNEL); |
301 | if (!sigpage) |
302 | return -ENOMEM; |
303 | |
304 | memset32(s: sigpage, v: (__force u32)poison, PAGE_SIZE / sizeof(poison)); |
305 | memcpy(sigpage, __aarch32_sigret_code_start, sigret_sz); |
306 | aarch32_sig_page = virt_to_page(sigpage); |
307 | return 0; |
308 | } |
309 | |
310 | static int __init __aarch32_alloc_vdso_pages(void) |
311 | { |
312 | |
313 | if (!IS_ENABLED(CONFIG_COMPAT_VDSO)) |
314 | return 0; |
315 | |
316 | vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; |
317 | vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; |
318 | |
319 | return __vdso_init(abi: VDSO_ABI_AA32); |
320 | } |
321 | |
322 | static int __init aarch32_alloc_vdso_pages(void) |
323 | { |
324 | int ret; |
325 | |
326 | ret = __aarch32_alloc_vdso_pages(); |
327 | if (ret) |
328 | return ret; |
329 | |
330 | ret = aarch32_alloc_sigpage(); |
331 | if (ret) |
332 | return ret; |
333 | |
334 | return aarch32_alloc_kuser_vdso_page(); |
335 | } |
336 | arch_initcall(aarch32_alloc_vdso_pages); |
337 | |
338 | static int aarch32_kuser_helpers_setup(struct mm_struct *mm) |
339 | { |
340 | void *ret; |
341 | |
342 | if (!IS_ENABLED(CONFIG_KUSER_HELPERS)) |
343 | return 0; |
344 | |
345 | /* |
346 | * Avoid VM_MAYWRITE for compatibility with arch/arm/, where it's |
347 | * not safe to CoW the page containing the CPU exception vectors. |
348 | */ |
349 | ret = _install_special_mapping(mm, addr: AARCH32_VECTORS_BASE, PAGE_SIZE, |
350 | VM_READ | VM_EXEC | |
351 | VM_MAYREAD | VM_MAYEXEC, |
352 | spec: &aarch32_vdso_maps[AA32_MAP_VECTORS]); |
353 | |
354 | return PTR_ERR_OR_ZERO(ptr: ret); |
355 | } |
356 | |
357 | static int aarch32_sigreturn_setup(struct mm_struct *mm) |
358 | { |
359 | unsigned long addr; |
360 | void *ret; |
361 | |
362 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); |
363 | if (IS_ERR_VALUE(addr)) { |
364 | ret = ERR_PTR(error: addr); |
365 | goto out; |
366 | } |
367 | |
368 | /* |
369 | * VM_MAYWRITE is required to allow gdb to Copy-on-Write and |
370 | * set breakpoints. |
371 | */ |
372 | ret = _install_special_mapping(mm, addr, PAGE_SIZE, |
373 | VM_READ | VM_EXEC | VM_MAYREAD | |
374 | VM_MAYWRITE | VM_MAYEXEC, |
375 | spec: &aarch32_vdso_maps[AA32_MAP_SIGPAGE]); |
376 | if (IS_ERR(ptr: ret)) |
377 | goto out; |
378 | |
379 | mm->context.sigpage = (void *)addr; |
380 | |
381 | out: |
382 | return PTR_ERR_OR_ZERO(ptr: ret); |
383 | } |
384 | |
385 | int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
386 | { |
387 | struct mm_struct *mm = current->mm; |
388 | int ret; |
389 | |
390 | if (mmap_write_lock_killable(mm)) |
391 | return -EINTR; |
392 | |
393 | ret = aarch32_kuser_helpers_setup(mm); |
394 | if (ret) |
395 | goto out; |
396 | |
397 | if (IS_ENABLED(CONFIG_COMPAT_VDSO)) { |
398 | ret = __setup_additional_pages(abi: VDSO_ABI_AA32, mm, bprm, |
399 | uses_interp); |
400 | if (ret) |
401 | goto out; |
402 | } |
403 | |
404 | ret = aarch32_sigreturn_setup(mm); |
405 | out: |
406 | mmap_write_unlock(mm); |
407 | return ret; |
408 | } |
409 | #endif /* CONFIG_COMPAT */ |
410 | |
411 | enum aarch64_map { |
412 | AA64_MAP_VVAR, |
413 | AA64_MAP_VDSO, |
414 | }; |
415 | |
416 | static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = { |
417 | [AA64_MAP_VVAR] = { |
418 | .name = "[vvar]" , |
419 | .fault = vvar_fault, |
420 | }, |
421 | [AA64_MAP_VDSO] = { |
422 | .name = "[vdso]" , |
423 | .mremap = vdso_mremap, |
424 | }, |
425 | }; |
426 | |
427 | static int __init vdso_init(void) |
428 | { |
429 | vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR]; |
430 | vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO]; |
431 | |
432 | return __vdso_init(abi: VDSO_ABI_AA64); |
433 | } |
434 | arch_initcall(vdso_init); |
435 | |
436 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
437 | { |
438 | struct mm_struct *mm = current->mm; |
439 | int ret; |
440 | |
441 | if (mmap_write_lock_killable(mm)) |
442 | return -EINTR; |
443 | |
444 | ret = __setup_additional_pages(abi: VDSO_ABI_AA64, mm, bprm, uses_interp); |
445 | mmap_write_unlock(mm); |
446 | |
447 | return ret; |
448 | } |
449 | |