1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES |
3 | */ |
4 | #ifndef __IOMMUFD_PRIVATE_H |
5 | #define __IOMMUFD_PRIVATE_H |
6 | |
7 | #include <linux/rwsem.h> |
8 | #include <linux/xarray.h> |
9 | #include <linux/refcount.h> |
10 | #include <linux/uaccess.h> |
11 | #include <linux/iommu.h> |
12 | #include <linux/iova_bitmap.h> |
13 | #include <uapi/linux/iommufd.h> |
14 | |
15 | struct iommu_domain; |
16 | struct iommu_group; |
17 | struct iommu_option; |
18 | struct iommufd_device; |
19 | |
20 | struct iommufd_ctx { |
21 | struct file *file; |
22 | struct xarray objects; |
23 | struct xarray groups; |
24 | wait_queue_head_t destroy_wait; |
25 | |
26 | u8 account_mode; |
27 | /* Compatibility with VFIO no iommu */ |
28 | u8 no_iommu_mode; |
29 | struct iommufd_ioas *vfio_ioas; |
30 | }; |
31 | |
32 | /* |
33 | * The IOVA to PFN map. The map automatically copies the PFNs into multiple |
34 | * domains and permits sharing of PFNs between io_pagetable instances. This |
35 | * supports both a design where IOAS's are 1:1 with a domain (eg because the |
36 | * domain is HW customized), or where the IOAS is 1:N with multiple generic |
37 | * domains. The io_pagetable holds an interval tree of iopt_areas which point |
38 | * to shared iopt_pages which hold the pfns mapped to the page table. |
39 | * |
40 | * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex |
41 | */ |
42 | struct io_pagetable { |
43 | struct rw_semaphore domains_rwsem; |
44 | struct xarray domains; |
45 | struct xarray access_list; |
46 | unsigned int next_domain_id; |
47 | |
48 | struct rw_semaphore iova_rwsem; |
49 | struct rb_root_cached area_itree; |
50 | /* IOVA that cannot become reserved, struct iopt_allowed */ |
51 | struct rb_root_cached allowed_itree; |
52 | /* IOVA that cannot be allocated, struct iopt_reserved */ |
53 | struct rb_root_cached reserved_itree; |
54 | u8 disable_large_pages; |
55 | unsigned long iova_alignment; |
56 | }; |
57 | |
58 | void iopt_init_table(struct io_pagetable *iopt); |
59 | void iopt_destroy_table(struct io_pagetable *iopt); |
60 | int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, |
61 | unsigned long length, struct list_head *pages_list); |
62 | void iopt_free_pages_list(struct list_head *pages_list); |
63 | enum { |
64 | IOPT_ALLOC_IOVA = 1 << 0, |
65 | }; |
66 | int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, |
67 | unsigned long *iova, void __user *uptr, |
68 | unsigned long length, int iommu_prot, |
69 | unsigned int flags); |
70 | int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, |
71 | unsigned long length, unsigned long *dst_iova, |
72 | int iommu_prot, unsigned int flags); |
73 | int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, |
74 | unsigned long length, unsigned long *unmapped); |
75 | int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); |
76 | |
77 | int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, |
78 | struct iommu_domain *domain, |
79 | unsigned long flags, |
80 | struct iommu_hwpt_get_dirty_bitmap *bitmap); |
81 | int iopt_set_dirty_tracking(struct io_pagetable *iopt, |
82 | struct iommu_domain *domain, bool enable); |
83 | |
84 | void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, |
85 | unsigned long length); |
86 | int iopt_table_add_domain(struct io_pagetable *iopt, |
87 | struct iommu_domain *domain); |
88 | void iopt_table_remove_domain(struct io_pagetable *iopt, |
89 | struct iommu_domain *domain); |
90 | int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, |
91 | struct device *dev, |
92 | phys_addr_t *sw_msi_start); |
93 | int iopt_set_allow_iova(struct io_pagetable *iopt, |
94 | struct rb_root_cached *allowed_iova); |
95 | int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, |
96 | unsigned long last, void *owner); |
97 | void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner); |
98 | int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, |
99 | size_t num_iovas); |
100 | void iopt_enable_large_pages(struct io_pagetable *iopt); |
101 | int iopt_disable_large_pages(struct io_pagetable *iopt); |
102 | |
103 | struct iommufd_ucmd { |
104 | struct iommufd_ctx *ictx; |
105 | void __user *ubuffer; |
106 | u32 user_size; |
107 | void *cmd; |
108 | }; |
109 | |
110 | int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, |
111 | unsigned long arg); |
112 | |
113 | /* Copy the response in ucmd->cmd back to userspace. */ |
114 | static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, |
115 | size_t cmd_len) |
116 | { |
117 | if (copy_to_user(to: ucmd->ubuffer, from: ucmd->cmd, |
118 | min_t(size_t, ucmd->user_size, cmd_len))) |
119 | return -EFAULT; |
120 | return 0; |
121 | } |
122 | |
123 | enum iommufd_object_type { |
124 | IOMMUFD_OBJ_NONE, |
125 | IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, |
126 | IOMMUFD_OBJ_DEVICE, |
127 | IOMMUFD_OBJ_HWPT_PAGING, |
128 | IOMMUFD_OBJ_HWPT_NESTED, |
129 | IOMMUFD_OBJ_IOAS, |
130 | IOMMUFD_OBJ_ACCESS, |
131 | #ifdef CONFIG_IOMMUFD_TEST |
132 | IOMMUFD_OBJ_SELFTEST, |
133 | #endif |
134 | IOMMUFD_OBJ_MAX, |
135 | }; |
136 | |
137 | /* Base struct for all objects with a userspace ID handle. */ |
138 | struct iommufd_object { |
139 | refcount_t shortterm_users; |
140 | refcount_t users; |
141 | enum iommufd_object_type type; |
142 | unsigned int id; |
143 | }; |
144 | |
145 | static inline bool iommufd_lock_obj(struct iommufd_object *obj) |
146 | { |
147 | if (!refcount_inc_not_zero(r: &obj->users)) |
148 | return false; |
149 | if (!refcount_inc_not_zero(r: &obj->shortterm_users)) { |
150 | /* |
151 | * If the caller doesn't already have a ref on obj this must be |
152 | * called under the xa_lock. Otherwise the caller is holding a |
153 | * ref on users. Thus it cannot be one before this decrement. |
154 | */ |
155 | refcount_dec(r: &obj->users); |
156 | return false; |
157 | } |
158 | return true; |
159 | } |
160 | |
161 | struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, |
162 | enum iommufd_object_type type); |
163 | static inline void iommufd_put_object(struct iommufd_ctx *ictx, |
164 | struct iommufd_object *obj) |
165 | { |
166 | /* |
167 | * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees |
168 | * a spurious !0 users with a 0 shortterm_users. |
169 | */ |
170 | refcount_dec(r: &obj->users); |
171 | if (refcount_dec_and_test(r: &obj->shortterm_users)) |
172 | wake_up_interruptible_all(&ictx->destroy_wait); |
173 | } |
174 | |
175 | void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj); |
176 | void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, |
177 | struct iommufd_object *obj); |
178 | void iommufd_object_finalize(struct iommufd_ctx *ictx, |
179 | struct iommufd_object *obj); |
180 | |
181 | enum { |
182 | REMOVE_WAIT_SHORTTERM = 1, |
183 | }; |
184 | int iommufd_object_remove(struct iommufd_ctx *ictx, |
185 | struct iommufd_object *to_destroy, u32 id, |
186 | unsigned int flags); |
187 | |
188 | /* |
189 | * The caller holds a users refcount and wants to destroy the object. At this |
190 | * point the caller has no shortterm_users reference and at least the xarray |
191 | * will be holding one. |
192 | */ |
193 | static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx, |
194 | struct iommufd_object *obj) |
195 | { |
196 | int ret; |
197 | |
198 | ret = iommufd_object_remove(ictx, to_destroy: obj, id: obj->id, flags: REMOVE_WAIT_SHORTTERM); |
199 | |
200 | /* |
201 | * If there is a bug and we couldn't destroy the object then we did put |
202 | * back the caller's users refcount and will eventually try to free it |
203 | * again during close. |
204 | */ |
205 | WARN_ON(ret); |
206 | } |
207 | |
208 | /* |
209 | * The HWPT allocated by autodomains is used in possibly many devices and |
210 | * is automatically destroyed when its refcount reaches zero. |
211 | * |
212 | * If userspace uses the HWPT manually, even for a short term, then it will |
213 | * disrupt this refcounting and the auto-free in the kernel will not work. |
214 | * Userspace that tries to use the automatically allocated HWPT must be careful |
215 | * to ensure that it is consistently destroyed, eg by not racing accesses |
216 | * and by not attaching an automatic HWPT to a device manually. |
217 | */ |
218 | static inline void |
219 | iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, |
220 | struct iommufd_object *obj) |
221 | { |
222 | iommufd_object_remove(ictx, to_destroy: obj, id: obj->id, flags: 0); |
223 | } |
224 | |
225 | struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, |
226 | size_t size, |
227 | enum iommufd_object_type type); |
228 | |
229 | #define __iommufd_object_alloc(ictx, ptr, type, obj) \ |
230 | container_of(_iommufd_object_alloc( \ |
231 | ictx, \ |
232 | sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \ |
233 | offsetof(typeof(*(ptr)), \ |
234 | obj) != 0), \ |
235 | type), \ |
236 | typeof(*(ptr)), obj) |
237 | |
238 | #define iommufd_object_alloc(ictx, ptr, type) \ |
239 | __iommufd_object_alloc(ictx, ptr, type, obj) |
240 | |
241 | /* |
242 | * The IO Address Space (IOAS) pagetable is a virtual page table backed by the |
243 | * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The |
244 | * mapping is copied into all of the associated domains and made available to |
245 | * in-kernel users. |
246 | * |
247 | * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable |
248 | * object. When we go to attach a device to an IOAS we need to get an |
249 | * iommu_domain and wrapping iommufd_hw_pagetable for it. |
250 | * |
251 | * An iommu_domain & iommfd_hw_pagetable will be automatically selected |
252 | * for a device based on the hwpt_list. If no suitable iommu_domain |
253 | * is found a new iommu_domain will be created. |
254 | */ |
255 | struct iommufd_ioas { |
256 | struct iommufd_object obj; |
257 | struct io_pagetable iopt; |
258 | struct mutex mutex; |
259 | struct list_head hwpt_list; |
260 | }; |
261 | |
262 | static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx, |
263 | u32 id) |
264 | { |
265 | return container_of(iommufd_get_object(ictx, id, |
266 | IOMMUFD_OBJ_IOAS), |
267 | struct iommufd_ioas, obj); |
268 | } |
269 | |
270 | struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx); |
271 | int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd); |
272 | void iommufd_ioas_destroy(struct iommufd_object *obj); |
273 | int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd); |
274 | int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd); |
275 | int iommufd_ioas_map(struct iommufd_ucmd *ucmd); |
276 | int iommufd_ioas_copy(struct iommufd_ucmd *ucmd); |
277 | int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd); |
278 | int iommufd_ioas_option(struct iommufd_ucmd *ucmd); |
279 | int iommufd_option_rlimit_mode(struct iommu_option *cmd, |
280 | struct iommufd_ctx *ictx); |
281 | |
282 | int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); |
283 | int iommufd_check_iova_range(struct io_pagetable *iopt, |
284 | struct iommu_hwpt_get_dirty_bitmap *bitmap); |
285 | |
286 | /* |
287 | * A HW pagetable is called an iommu_domain inside the kernel. This user object |
288 | * allows directly creating and inspecting the domains. Domains that have kernel |
289 | * owned page tables will be associated with an iommufd_ioas that provides the |
290 | * IOVA to PFN map. |
291 | */ |
292 | struct iommufd_hw_pagetable { |
293 | struct iommufd_object obj; |
294 | struct iommu_domain *domain; |
295 | }; |
296 | |
297 | struct iommufd_hwpt_paging { |
298 | struct iommufd_hw_pagetable common; |
299 | struct iommufd_ioas *ioas; |
300 | bool auto_domain : 1; |
301 | bool enforce_cache_coherency : 1; |
302 | bool msi_cookie : 1; |
303 | bool nest_parent : 1; |
304 | /* Head at iommufd_ioas::hwpt_list */ |
305 | struct list_head hwpt_item; |
306 | }; |
307 | |
308 | struct iommufd_hwpt_nested { |
309 | struct iommufd_hw_pagetable common; |
310 | struct iommufd_hwpt_paging *parent; |
311 | }; |
312 | |
313 | static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt) |
314 | { |
315 | return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING; |
316 | } |
317 | |
318 | static inline struct iommufd_hwpt_paging * |
319 | to_hwpt_paging(struct iommufd_hw_pagetable *hwpt) |
320 | { |
321 | return container_of(hwpt, struct iommufd_hwpt_paging, common); |
322 | } |
323 | |
324 | static inline struct iommufd_hwpt_paging * |
325 | iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id) |
326 | { |
327 | return container_of(iommufd_get_object(ucmd->ictx, id, |
328 | IOMMUFD_OBJ_HWPT_PAGING), |
329 | struct iommufd_hwpt_paging, common.obj); |
330 | } |
331 | |
332 | static inline struct iommufd_hw_pagetable * |
333 | iommufd_get_hwpt_nested(struct iommufd_ucmd *ucmd, u32 id) |
334 | { |
335 | return container_of(iommufd_get_object(ucmd->ictx, id, |
336 | IOMMUFD_OBJ_HWPT_NESTED), |
337 | struct iommufd_hw_pagetable, obj); |
338 | } |
339 | |
340 | int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd); |
341 | int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); |
342 | |
343 | struct iommufd_hwpt_paging * |
344 | iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, |
345 | struct iommufd_device *idev, u32 flags, |
346 | bool immediate_attach, |
347 | const struct iommu_user_data *user_data); |
348 | int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, |
349 | struct iommufd_device *idev); |
350 | struct iommufd_hw_pagetable * |
351 | iommufd_hw_pagetable_detach(struct iommufd_device *idev); |
352 | void iommufd_hwpt_paging_destroy(struct iommufd_object *obj); |
353 | void iommufd_hwpt_paging_abort(struct iommufd_object *obj); |
354 | void iommufd_hwpt_nested_destroy(struct iommufd_object *obj); |
355 | void iommufd_hwpt_nested_abort(struct iommufd_object *obj); |
356 | int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); |
357 | int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd); |
358 | |
359 | static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, |
360 | struct iommufd_hw_pagetable *hwpt) |
361 | { |
362 | if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) { |
363 | struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt); |
364 | |
365 | lockdep_assert_not_held(&hwpt_paging->ioas->mutex); |
366 | |
367 | if (hwpt_paging->auto_domain) { |
368 | iommufd_object_put_and_try_destroy(ictx, obj: &hwpt->obj); |
369 | return; |
370 | } |
371 | } |
372 | refcount_dec(r: &hwpt->obj.users); |
373 | } |
374 | |
375 | struct iommufd_group { |
376 | struct kref ref; |
377 | struct mutex lock; |
378 | struct iommufd_ctx *ictx; |
379 | struct iommu_group *group; |
380 | struct iommufd_hw_pagetable *hwpt; |
381 | struct list_head device_list; |
382 | phys_addr_t sw_msi_start; |
383 | }; |
384 | |
385 | /* |
386 | * A iommufd_device object represents the binding relationship between a |
387 | * consuming driver and the iommufd. These objects are created/destroyed by |
388 | * external drivers, not by userspace. |
389 | */ |
390 | struct iommufd_device { |
391 | struct iommufd_object obj; |
392 | struct iommufd_ctx *ictx; |
393 | struct iommufd_group *igroup; |
394 | struct list_head group_item; |
395 | /* always the physical device */ |
396 | struct device *dev; |
397 | bool enforce_cache_coherency; |
398 | }; |
399 | |
400 | static inline struct iommufd_device * |
401 | iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id) |
402 | { |
403 | return container_of(iommufd_get_object(ucmd->ictx, id, |
404 | IOMMUFD_OBJ_DEVICE), |
405 | struct iommufd_device, obj); |
406 | } |
407 | |
408 | void iommufd_device_destroy(struct iommufd_object *obj); |
409 | int iommufd_get_hw_info(struct iommufd_ucmd *ucmd); |
410 | |
411 | struct iommufd_access { |
412 | struct iommufd_object obj; |
413 | struct iommufd_ctx *ictx; |
414 | struct iommufd_ioas *ioas; |
415 | struct iommufd_ioas *ioas_unpin; |
416 | struct mutex ioas_lock; |
417 | const struct iommufd_access_ops *ops; |
418 | void *data; |
419 | unsigned long iova_alignment; |
420 | u32 iopt_access_list_id; |
421 | }; |
422 | |
423 | int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); |
424 | void iopt_remove_access(struct io_pagetable *iopt, |
425 | struct iommufd_access *access, |
426 | u32 iopt_access_list_id); |
427 | void iommufd_access_destroy_object(struct iommufd_object *obj); |
428 | |
429 | #ifdef CONFIG_IOMMUFD_TEST |
430 | int iommufd_test(struct iommufd_ucmd *ucmd); |
431 | void iommufd_selftest_destroy(struct iommufd_object *obj); |
432 | extern size_t iommufd_test_memory_limit; |
433 | void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, |
434 | unsigned int ioas_id, u64 *iova, u32 *flags); |
435 | bool iommufd_should_fail(void); |
436 | int __init iommufd_test_init(void); |
437 | void iommufd_test_exit(void); |
438 | bool iommufd_selftest_is_mock_dev(struct device *dev); |
439 | #else |
440 | static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, |
441 | unsigned int ioas_id, |
442 | u64 *iova, u32 *flags) |
443 | { |
444 | } |
445 | static inline bool iommufd_should_fail(void) |
446 | { |
447 | return false; |
448 | } |
449 | static inline int __init iommufd_test_init(void) |
450 | { |
451 | return 0; |
452 | } |
453 | static inline void iommufd_test_exit(void) |
454 | { |
455 | } |
456 | static inline bool iommufd_selftest_is_mock_dev(struct device *dev) |
457 | { |
458 | return false; |
459 | } |
460 | #endif |
461 | #endif |
462 | |