1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * include/linux/userfaultfd_k.h |
4 | * |
5 | * Copyright (C) 2015 Red Hat, Inc. |
6 | * |
7 | */ |
8 | |
9 | #ifndef _LINUX_USERFAULTFD_K_H |
10 | #define _LINUX_USERFAULTFD_K_H |
11 | |
12 | #ifdef CONFIG_USERFAULTFD |
13 | |
14 | #include <linux/userfaultfd.h> /* linux/include/uapi/linux/userfaultfd.h */ |
15 | |
16 | #include <linux/fcntl.h> |
17 | #include <linux/mm.h> |
18 | #include <linux/swap.h> |
19 | #include <linux/swapops.h> |
20 | #include <asm-generic/pgtable_uffd.h> |
21 | #include <linux/hugetlb_inline.h> |
22 | |
23 | /* The set of all possible UFFD-related VM flags. */ |
24 | #define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR) |
25 | |
26 | /* |
27 | * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining |
28 | * new flags, since they might collide with O_* ones. We want |
29 | * to re-use O_* flags that couldn't possibly have a meaning |
30 | * from userfaultfd, in order to leave a free define-space for |
31 | * shared O_* flags. |
32 | */ |
33 | #define UFFD_CLOEXEC O_CLOEXEC |
34 | #define UFFD_NONBLOCK O_NONBLOCK |
35 | |
36 | #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) |
37 | #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) |
38 | |
39 | extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); |
40 | |
41 | /* A combined operation mode + behavior flags. */ |
42 | typedef unsigned int __bitwise uffd_flags_t; |
43 | |
44 | /* Mutually exclusive modes of operation. */ |
45 | enum mfill_atomic_mode { |
46 | MFILL_ATOMIC_COPY, |
47 | MFILL_ATOMIC_ZEROPAGE, |
48 | MFILL_ATOMIC_CONTINUE, |
49 | MFILL_ATOMIC_POISON, |
50 | NR_MFILL_ATOMIC_MODES, |
51 | }; |
52 | |
53 | #define MFILL_ATOMIC_MODE_BITS (const_ilog2(NR_MFILL_ATOMIC_MODES - 1) + 1) |
54 | #define MFILL_ATOMIC_BIT(nr) BIT(MFILL_ATOMIC_MODE_BITS + (nr)) |
55 | #define MFILL_ATOMIC_FLAG(nr) ((__force uffd_flags_t) MFILL_ATOMIC_BIT(nr)) |
56 | #define MFILL_ATOMIC_MODE_MASK ((__force uffd_flags_t) (MFILL_ATOMIC_BIT(0) - 1)) |
57 | |
58 | static inline bool uffd_flags_mode_is(uffd_flags_t flags, enum mfill_atomic_mode expected) |
59 | { |
60 | return (flags & MFILL_ATOMIC_MODE_MASK) == ((__force uffd_flags_t) expected); |
61 | } |
62 | |
63 | static inline uffd_flags_t uffd_flags_set_mode(uffd_flags_t flags, enum mfill_atomic_mode mode) |
64 | { |
65 | flags &= ~MFILL_ATOMIC_MODE_MASK; |
66 | return flags | ((__force uffd_flags_t) mode); |
67 | } |
68 | |
69 | /* Flags controlling behavior. These behavior changes are mode-independent. */ |
70 | #define MFILL_ATOMIC_WP MFILL_ATOMIC_FLAG(0) |
71 | |
72 | extern int mfill_atomic_install_pte(pmd_t *dst_pmd, |
73 | struct vm_area_struct *dst_vma, |
74 | unsigned long dst_addr, struct page *page, |
75 | bool newly_allocated, uffd_flags_t flags); |
76 | |
77 | extern ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start, |
78 | unsigned long src_start, unsigned long len, |
79 | atomic_t *mmap_changing, uffd_flags_t flags); |
80 | extern ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm, |
81 | unsigned long dst_start, |
82 | unsigned long len, |
83 | atomic_t *mmap_changing); |
84 | extern ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long dst_start, |
85 | unsigned long len, atomic_t *mmap_changing, |
86 | uffd_flags_t flags); |
87 | extern ssize_t mfill_atomic_poison(struct mm_struct *dst_mm, unsigned long start, |
88 | unsigned long len, atomic_t *mmap_changing, |
89 | uffd_flags_t flags); |
90 | extern int mwriteprotect_range(struct mm_struct *dst_mm, |
91 | unsigned long start, unsigned long len, |
92 | bool enable_wp, atomic_t *mmap_changing); |
93 | extern long uffd_wp_range(struct vm_area_struct *vma, |
94 | unsigned long start, unsigned long len, bool enable_wp); |
95 | |
96 | /* mm helpers */ |
97 | static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, |
98 | struct vm_userfaultfd_ctx vm_ctx) |
99 | { |
100 | return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx; |
101 | } |
102 | |
103 | /* |
104 | * Never enable huge pmd sharing on some uffd registered vmas: |
105 | * |
106 | * - VM_UFFD_WP VMAs, because write protect information is per pgtable entry. |
107 | * |
108 | * - VM_UFFD_MINOR VMAs, because otherwise we would never get minor faults for |
109 | * VMAs which share huge pmds. (If you have two mappings to the same |
110 | * underlying pages, and fault in the non-UFFD-registered one with a write, |
111 | * with huge pmd sharing this would *also* setup the second UFFD-registered |
112 | * mapping, and we'd not get minor faults.) |
113 | */ |
114 | static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma) |
115 | { |
116 | return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); |
117 | } |
118 | |
119 | /* |
120 | * Don't do fault around for either WP or MINOR registered uffd range. For |
121 | * MINOR registered range, fault around will be a total disaster and ptes can |
122 | * be installed without notifications; for WP it should mostly be fine as long |
123 | * as the fault around checks for pte_none() before the installation, however |
124 | * to be super safe we just forbid it. |
125 | */ |
126 | static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) |
127 | { |
128 | return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); |
129 | } |
130 | |
131 | static inline bool userfaultfd_missing(struct vm_area_struct *vma) |
132 | { |
133 | return vma->vm_flags & VM_UFFD_MISSING; |
134 | } |
135 | |
136 | static inline bool userfaultfd_wp(struct vm_area_struct *vma) |
137 | { |
138 | return vma->vm_flags & VM_UFFD_WP; |
139 | } |
140 | |
141 | static inline bool userfaultfd_minor(struct vm_area_struct *vma) |
142 | { |
143 | return vma->vm_flags & VM_UFFD_MINOR; |
144 | } |
145 | |
146 | static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, |
147 | pte_t pte) |
148 | { |
149 | return userfaultfd_wp(vma) && pte_uffd_wp(pte); |
150 | } |
151 | |
152 | static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, |
153 | pmd_t pmd) |
154 | { |
155 | return userfaultfd_wp(vma) && pmd_uffd_wp(pmd); |
156 | } |
157 | |
158 | static inline bool userfaultfd_armed(struct vm_area_struct *vma) |
159 | { |
160 | return vma->vm_flags & __VM_UFFD_FLAGS; |
161 | } |
162 | |
163 | static inline bool vma_can_userfault(struct vm_area_struct *vma, |
164 | unsigned long vm_flags, |
165 | bool wp_async) |
166 | { |
167 | vm_flags &= __VM_UFFD_FLAGS; |
168 | |
169 | if ((vm_flags & VM_UFFD_MINOR) && |
170 | (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) |
171 | return false; |
172 | |
173 | /* |
174 | * If wp async enabled, and WP is the only mode enabled, allow any |
175 | * memory type. |
176 | */ |
177 | if (wp_async && (vm_flags == VM_UFFD_WP)) |
178 | return true; |
179 | |
180 | #ifndef CONFIG_PTE_MARKER_UFFD_WP |
181 | /* |
182 | * If user requested uffd-wp but not enabled pte markers for |
183 | * uffd-wp, then shmem & hugetlbfs are not supported but only |
184 | * anonymous. |
185 | */ |
186 | if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) |
187 | return false; |
188 | #endif |
189 | |
190 | /* By default, allow any of anon|shmem|hugetlb */ |
191 | return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || |
192 | vma_is_shmem(vma); |
193 | } |
194 | |
195 | extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); |
196 | extern void dup_userfaultfd_complete(struct list_head *); |
197 | |
198 | extern void mremap_userfaultfd_prep(struct vm_area_struct *, |
199 | struct vm_userfaultfd_ctx *); |
200 | extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, |
201 | unsigned long from, unsigned long to, |
202 | unsigned long len); |
203 | |
204 | extern bool userfaultfd_remove(struct vm_area_struct *vma, |
205 | unsigned long start, |
206 | unsigned long end); |
207 | |
208 | extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, |
209 | unsigned long start, unsigned long end, struct list_head *uf); |
210 | extern void userfaultfd_unmap_complete(struct mm_struct *mm, |
211 | struct list_head *uf); |
212 | extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); |
213 | extern bool userfaultfd_wp_async(struct vm_area_struct *vma); |
214 | |
215 | #else /* CONFIG_USERFAULTFD */ |
216 | |
217 | /* mm helpers */ |
218 | static inline vm_fault_t handle_userfault(struct vm_fault *vmf, |
219 | unsigned long reason) |
220 | { |
221 | return VM_FAULT_SIGBUS; |
222 | } |
223 | |
224 | static inline long uffd_wp_range(struct vm_area_struct *vma, |
225 | unsigned long start, unsigned long len, |
226 | bool enable_wp) |
227 | { |
228 | return false; |
229 | } |
230 | |
231 | static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, |
232 | struct vm_userfaultfd_ctx vm_ctx) |
233 | { |
234 | return true; |
235 | } |
236 | |
237 | static inline bool userfaultfd_missing(struct vm_area_struct *vma) |
238 | { |
239 | return false; |
240 | } |
241 | |
242 | static inline bool userfaultfd_wp(struct vm_area_struct *vma) |
243 | { |
244 | return false; |
245 | } |
246 | |
247 | static inline bool userfaultfd_minor(struct vm_area_struct *vma) |
248 | { |
249 | return false; |
250 | } |
251 | |
252 | static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, |
253 | pte_t pte) |
254 | { |
255 | return false; |
256 | } |
257 | |
258 | static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, |
259 | pmd_t pmd) |
260 | { |
261 | return false; |
262 | } |
263 | |
264 | |
265 | static inline bool userfaultfd_armed(struct vm_area_struct *vma) |
266 | { |
267 | return false; |
268 | } |
269 | |
270 | static inline int dup_userfaultfd(struct vm_area_struct *vma, |
271 | struct list_head *l) |
272 | { |
273 | return 0; |
274 | } |
275 | |
276 | static inline void dup_userfaultfd_complete(struct list_head *l) |
277 | { |
278 | } |
279 | |
280 | static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, |
281 | struct vm_userfaultfd_ctx *ctx) |
282 | { |
283 | } |
284 | |
285 | static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, |
286 | unsigned long from, |
287 | unsigned long to, |
288 | unsigned long len) |
289 | { |
290 | } |
291 | |
292 | static inline bool userfaultfd_remove(struct vm_area_struct *vma, |
293 | unsigned long start, |
294 | unsigned long end) |
295 | { |
296 | return true; |
297 | } |
298 | |
299 | static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, |
300 | unsigned long start, unsigned long end, |
301 | struct list_head *uf) |
302 | { |
303 | return 0; |
304 | } |
305 | |
306 | static inline void userfaultfd_unmap_complete(struct mm_struct *mm, |
307 | struct list_head *uf) |
308 | { |
309 | } |
310 | |
311 | static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) |
312 | { |
313 | return false; |
314 | } |
315 | |
316 | static inline bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) |
317 | { |
318 | return false; |
319 | } |
320 | |
321 | static inline bool userfaultfd_wp_async(struct vm_area_struct *vma) |
322 | { |
323 | return false; |
324 | } |
325 | |
326 | #endif /* CONFIG_USERFAULTFD */ |
327 | |
328 | static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) |
329 | { |
330 | /* Only wr-protect mode uses pte markers */ |
331 | if (!userfaultfd_wp(vma)) |
332 | return false; |
333 | |
334 | /* File-based uffd-wp always need markers */ |
335 | if (!vma_is_anonymous(vma)) |
336 | return true; |
337 | |
338 | /* |
339 | * Anonymous uffd-wp only needs the markers if WP_UNPOPULATED |
340 | * enabled (to apply markers on zero pages). |
341 | */ |
342 | return userfaultfd_wp_unpopulated(vma); |
343 | } |
344 | |
345 | static inline bool pte_marker_entry_uffd_wp(swp_entry_t entry) |
346 | { |
347 | #ifdef CONFIG_PTE_MARKER_UFFD_WP |
348 | return is_pte_marker_entry(entry) && |
349 | (pte_marker_get(entry) & PTE_MARKER_UFFD_WP); |
350 | #else |
351 | return false; |
352 | #endif |
353 | } |
354 | |
355 | static inline bool pte_marker_uffd_wp(pte_t pte) |
356 | { |
357 | #ifdef CONFIG_PTE_MARKER_UFFD_WP |
358 | swp_entry_t entry; |
359 | |
360 | if (!is_swap_pte(pte)) |
361 | return false; |
362 | |
363 | entry = pte_to_swp_entry(pte); |
364 | |
365 | return pte_marker_entry_uffd_wp(entry); |
366 | #else |
367 | return false; |
368 | #endif |
369 | } |
370 | |
371 | /* |
372 | * Returns true if this is a swap pte and was uffd-wp wr-protected in either |
373 | * forms (pte marker or a normal swap pte), false otherwise. |
374 | */ |
375 | static inline bool pte_swp_uffd_wp_any(pte_t pte) |
376 | { |
377 | #ifdef CONFIG_PTE_MARKER_UFFD_WP |
378 | if (!is_swap_pte(pte)) |
379 | return false; |
380 | |
381 | if (pte_swp_uffd_wp(pte)) |
382 | return true; |
383 | |
384 | if (pte_marker_uffd_wp(pte)) |
385 | return true; |
386 | #endif |
387 | return false; |
388 | } |
389 | |
390 | #endif /* _LINUX_USERFAULTFD_K_H */ |
391 | |