1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES |
3 | */ |
4 | #include <linux/iommufd.h> |
5 | #include <linux/slab.h> |
6 | #include <linux/iommu.h> |
7 | #include <uapi/linux/iommufd.h> |
8 | #include "../iommu-priv.h" |
9 | |
10 | #include "io_pagetable.h" |
11 | #include "iommufd_private.h" |
12 | |
13 | static bool allow_unsafe_interrupts; |
14 | module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); |
15 | MODULE_PARM_DESC( |
16 | allow_unsafe_interrupts, |
17 | "Allow IOMMUFD to bind to devices even if the platform cannot isolate " |
18 | "the MSI interrupt window. Enabling this is a security weakness." ); |
19 | |
20 | static void iommufd_group_release(struct kref *kref) |
21 | { |
22 | struct iommufd_group *igroup = |
23 | container_of(kref, struct iommufd_group, ref); |
24 | |
25 | WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list)); |
26 | |
27 | xa_cmpxchg(xa: &igroup->ictx->groups, index: iommu_group_id(group: igroup->group), old: igroup, |
28 | NULL, GFP_KERNEL); |
29 | iommu_group_put(group: igroup->group); |
30 | mutex_destroy(lock: &igroup->lock); |
31 | kfree(objp: igroup); |
32 | } |
33 | |
34 | static void iommufd_put_group(struct iommufd_group *group) |
35 | { |
36 | kref_put(kref: &group->ref, release: iommufd_group_release); |
37 | } |
38 | |
39 | static bool iommufd_group_try_get(struct iommufd_group *igroup, |
40 | struct iommu_group *group) |
41 | { |
42 | if (!igroup) |
43 | return false; |
44 | /* |
45 | * group ID's cannot be re-used until the group is put back which does |
46 | * not happen if we could get an igroup pointer under the xa_lock. |
47 | */ |
48 | if (WARN_ON(igroup->group != group)) |
49 | return false; |
50 | return kref_get_unless_zero(kref: &igroup->ref); |
51 | } |
52 | |
53 | /* |
54 | * iommufd needs to store some more data for each iommu_group, we keep a |
55 | * parallel xarray indexed by iommu_group id to hold this instead of putting it |
56 | * in the core structure. To keep things simple the iommufd_group memory is |
57 | * unique within the iommufd_ctx. This makes it easy to check there are no |
58 | * memory leaks. |
59 | */ |
60 | static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, |
61 | struct device *dev) |
62 | { |
63 | struct iommufd_group *new_igroup; |
64 | struct iommufd_group *cur_igroup; |
65 | struct iommufd_group *igroup; |
66 | struct iommu_group *group; |
67 | unsigned int id; |
68 | |
69 | group = iommu_group_get(dev); |
70 | if (!group) |
71 | return ERR_PTR(error: -ENODEV); |
72 | |
73 | id = iommu_group_id(group); |
74 | |
75 | xa_lock(&ictx->groups); |
76 | igroup = xa_load(&ictx->groups, index: id); |
77 | if (iommufd_group_try_get(igroup, group)) { |
78 | xa_unlock(&ictx->groups); |
79 | iommu_group_put(group); |
80 | return igroup; |
81 | } |
82 | xa_unlock(&ictx->groups); |
83 | |
84 | new_igroup = kzalloc(size: sizeof(*new_igroup), GFP_KERNEL); |
85 | if (!new_igroup) { |
86 | iommu_group_put(group); |
87 | return ERR_PTR(error: -ENOMEM); |
88 | } |
89 | |
90 | kref_init(kref: &new_igroup->ref); |
91 | mutex_init(&new_igroup->lock); |
92 | INIT_LIST_HEAD(list: &new_igroup->device_list); |
93 | new_igroup->sw_msi_start = PHYS_ADDR_MAX; |
94 | /* group reference moves into new_igroup */ |
95 | new_igroup->group = group; |
96 | |
97 | /* |
98 | * The ictx is not additionally refcounted here becase all objects using |
99 | * an igroup must put it before their destroy completes. |
100 | */ |
101 | new_igroup->ictx = ictx; |
102 | |
103 | /* |
104 | * We dropped the lock so igroup is invalid. NULL is a safe and likely |
105 | * value to assume for the xa_cmpxchg algorithm. |
106 | */ |
107 | cur_igroup = NULL; |
108 | xa_lock(&ictx->groups); |
109 | while (true) { |
110 | igroup = __xa_cmpxchg(&ictx->groups, index: id, old: cur_igroup, entry: new_igroup, |
111 | GFP_KERNEL); |
112 | if (xa_is_err(entry: igroup)) { |
113 | xa_unlock(&ictx->groups); |
114 | iommufd_put_group(group: new_igroup); |
115 | return ERR_PTR(error: xa_err(entry: igroup)); |
116 | } |
117 | |
118 | /* new_group was successfully installed */ |
119 | if (cur_igroup == igroup) { |
120 | xa_unlock(&ictx->groups); |
121 | return new_igroup; |
122 | } |
123 | |
124 | /* Check again if the current group is any good */ |
125 | if (iommufd_group_try_get(igroup, group)) { |
126 | xa_unlock(&ictx->groups); |
127 | iommufd_put_group(group: new_igroup); |
128 | return igroup; |
129 | } |
130 | cur_igroup = igroup; |
131 | } |
132 | } |
133 | |
134 | void iommufd_device_destroy(struct iommufd_object *obj) |
135 | { |
136 | struct iommufd_device *idev = |
137 | container_of(obj, struct iommufd_device, obj); |
138 | |
139 | iommu_device_release_dma_owner(dev: idev->dev); |
140 | iommufd_put_group(group: idev->igroup); |
141 | if (!iommufd_selftest_is_mock_dev(dev: idev->dev)) |
142 | iommufd_ctx_put(ictx: idev->ictx); |
143 | } |
144 | |
145 | /** |
146 | * iommufd_device_bind - Bind a physical device to an iommu fd |
147 | * @ictx: iommufd file descriptor |
148 | * @dev: Pointer to a physical device struct |
149 | * @id: Output ID number to return to userspace for this device |
150 | * |
151 | * A successful bind establishes an ownership over the device and returns |
152 | * struct iommufd_device pointer, otherwise returns error pointer. |
153 | * |
154 | * A driver using this API must set driver_managed_dma and must not touch |
155 | * the device until this routine succeeds and establishes ownership. |
156 | * |
157 | * Binding a PCI device places the entire RID under iommufd control. |
158 | * |
159 | * The caller must undo this with iommufd_device_unbind() |
160 | */ |
161 | struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, |
162 | struct device *dev, u32 *id) |
163 | { |
164 | struct iommufd_device *idev; |
165 | struct iommufd_group *igroup; |
166 | int rc; |
167 | |
168 | /* |
169 | * iommufd always sets IOMMU_CACHE because we offer no way for userspace |
170 | * to restore cache coherency. |
171 | */ |
172 | if (!device_iommu_capable(dev, cap: IOMMU_CAP_CACHE_COHERENCY)) |
173 | return ERR_PTR(error: -EINVAL); |
174 | |
175 | igroup = iommufd_get_group(ictx, dev); |
176 | if (IS_ERR(ptr: igroup)) |
177 | return ERR_CAST(ptr: igroup); |
178 | |
179 | /* |
180 | * For historical compat with VFIO the insecure interrupt path is |
181 | * allowed if the module parameter is set. Secure/Isolated means that a |
182 | * MemWr operation from the device (eg a simple DMA) cannot trigger an |
183 | * interrupt outside this iommufd context. |
184 | */ |
185 | if (!iommufd_selftest_is_mock_dev(dev) && |
186 | !iommu_group_has_isolated_msi(group: igroup->group)) { |
187 | if (!allow_unsafe_interrupts) { |
188 | rc = -EPERM; |
189 | goto out_group_put; |
190 | } |
191 | |
192 | dev_warn( |
193 | dev, |
194 | "MSI interrupts are not secure, they cannot be isolated by the platform. " |
195 | "Check that platform features like interrupt remapping are enabled. " |
196 | "Use the \"allow_unsafe_interrupts\" module parameter to override\n" ); |
197 | } |
198 | |
199 | rc = iommu_device_claim_dma_owner(dev, owner: ictx); |
200 | if (rc) |
201 | goto out_group_put; |
202 | |
203 | idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE); |
204 | if (IS_ERR(ptr: idev)) { |
205 | rc = PTR_ERR(ptr: idev); |
206 | goto out_release_owner; |
207 | } |
208 | idev->ictx = ictx; |
209 | if (!iommufd_selftest_is_mock_dev(dev)) |
210 | iommufd_ctx_get(ictx); |
211 | idev->dev = dev; |
212 | idev->enforce_cache_coherency = |
213 | device_iommu_capable(dev, cap: IOMMU_CAP_ENFORCE_CACHE_COHERENCY); |
214 | /* The calling driver is a user until iommufd_device_unbind() */ |
215 | refcount_inc(r: &idev->obj.users); |
216 | /* igroup refcount moves into iommufd_device */ |
217 | idev->igroup = igroup; |
218 | |
219 | /* |
220 | * If the caller fails after this success it must call |
221 | * iommufd_unbind_device() which is safe since we hold this refcount. |
222 | * This also means the device is a leaf in the graph and no other object |
223 | * can take a reference on it. |
224 | */ |
225 | iommufd_object_finalize(ictx, obj: &idev->obj); |
226 | *id = idev->obj.id; |
227 | return idev; |
228 | |
229 | out_release_owner: |
230 | iommu_device_release_dma_owner(dev); |
231 | out_group_put: |
232 | iommufd_put_group(group: igroup); |
233 | return ERR_PTR(error: rc); |
234 | } |
235 | EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); |
236 | |
237 | /** |
238 | * iommufd_ctx_has_group - True if any device within the group is bound |
239 | * to the ictx |
240 | * @ictx: iommufd file descriptor |
241 | * @group: Pointer to a physical iommu_group struct |
242 | * |
243 | * True if any device within the group has been bound to this ictx, ex. via |
244 | * iommufd_device_bind(), therefore implying ictx ownership of the group. |
245 | */ |
246 | bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group) |
247 | { |
248 | struct iommufd_object *obj; |
249 | unsigned long index; |
250 | |
251 | if (!ictx || !group) |
252 | return false; |
253 | |
254 | xa_lock(&ictx->objects); |
255 | xa_for_each(&ictx->objects, index, obj) { |
256 | if (obj->type == IOMMUFD_OBJ_DEVICE && |
257 | container_of(obj, struct iommufd_device, obj) |
258 | ->igroup->group == group) { |
259 | xa_unlock(&ictx->objects); |
260 | return true; |
261 | } |
262 | } |
263 | xa_unlock(&ictx->objects); |
264 | return false; |
265 | } |
266 | EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD); |
267 | |
268 | /** |
269 | * iommufd_device_unbind - Undo iommufd_device_bind() |
270 | * @idev: Device returned by iommufd_device_bind() |
271 | * |
272 | * Release the device from iommufd control. The DMA ownership will return back |
273 | * to unowned with DMA controlled by the DMA API. This invalidates the |
274 | * iommufd_device pointer, other APIs that consume it must not be called |
275 | * concurrently. |
276 | */ |
277 | void iommufd_device_unbind(struct iommufd_device *idev) |
278 | { |
279 | iommufd_object_destroy_user(ictx: idev->ictx, obj: &idev->obj); |
280 | } |
281 | EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); |
282 | |
283 | struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev) |
284 | { |
285 | return idev->ictx; |
286 | } |
287 | EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD); |
288 | |
289 | u32 iommufd_device_to_id(struct iommufd_device *idev) |
290 | { |
291 | return idev->obj.id; |
292 | } |
293 | EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD); |
294 | |
295 | static int iommufd_group_setup_msi(struct iommufd_group *igroup, |
296 | struct iommufd_hwpt_paging *hwpt_paging) |
297 | { |
298 | phys_addr_t sw_msi_start = igroup->sw_msi_start; |
299 | int rc; |
300 | |
301 | /* |
302 | * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to |
303 | * call iommu_get_msi_cookie() on its behalf. This is necessary to setup |
304 | * the MSI window so iommu_dma_prepare_msi() can install pages into our |
305 | * domain after request_irq(). If it is not done interrupts will not |
306 | * work on this domain. |
307 | * |
308 | * FIXME: This is conceptually broken for iommufd since we want to allow |
309 | * userspace to change the domains, eg switch from an identity IOAS to a |
310 | * DMA IOAS. There is currently no way to create a MSI window that |
311 | * matches what the IRQ layer actually expects in a newly created |
312 | * domain. |
313 | */ |
314 | if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) { |
315 | rc = iommu_get_msi_cookie(domain: hwpt_paging->common.domain, |
316 | base: sw_msi_start); |
317 | if (rc) |
318 | return rc; |
319 | |
320 | /* |
321 | * iommu_get_msi_cookie() can only be called once per domain, |
322 | * it returns -EBUSY on later calls. |
323 | */ |
324 | hwpt_paging->msi_cookie = true; |
325 | } |
326 | return 0; |
327 | } |
328 | |
329 | static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging, |
330 | struct iommufd_device *idev) |
331 | { |
332 | int rc; |
333 | |
334 | lockdep_assert_held(&idev->igroup->lock); |
335 | |
336 | rc = iopt_table_enforce_dev_resv_regions(iopt: &hwpt_paging->ioas->iopt, |
337 | dev: idev->dev, |
338 | sw_msi_start: &idev->igroup->sw_msi_start); |
339 | if (rc) |
340 | return rc; |
341 | |
342 | if (list_empty(head: &idev->igroup->device_list)) { |
343 | rc = iommufd_group_setup_msi(igroup: idev->igroup, hwpt_paging); |
344 | if (rc) { |
345 | iopt_remove_reserved_iova(iopt: &hwpt_paging->ioas->iopt, |
346 | owner: idev->dev); |
347 | return rc; |
348 | } |
349 | } |
350 | return 0; |
351 | } |
352 | |
353 | int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, |
354 | struct iommufd_device *idev) |
355 | { |
356 | int rc; |
357 | |
358 | mutex_lock(&idev->igroup->lock); |
359 | |
360 | if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) { |
361 | rc = -EINVAL; |
362 | goto err_unlock; |
363 | } |
364 | |
365 | if (hwpt_is_paging(hwpt)) { |
366 | rc = iommufd_hwpt_paging_attach(hwpt_paging: to_hwpt_paging(hwpt), idev); |
367 | if (rc) |
368 | goto err_unlock; |
369 | } |
370 | |
371 | /* |
372 | * Only attach to the group once for the first device that is in the |
373 | * group. All the other devices will follow this attachment. The user |
374 | * should attach every device individually to the hwpt as the per-device |
375 | * reserved regions are only updated during individual device |
376 | * attachment. |
377 | */ |
378 | if (list_empty(head: &idev->igroup->device_list)) { |
379 | rc = iommu_attach_group(domain: hwpt->domain, group: idev->igroup->group); |
380 | if (rc) |
381 | goto err_unresv; |
382 | idev->igroup->hwpt = hwpt; |
383 | } |
384 | refcount_inc(r: &hwpt->obj.users); |
385 | list_add_tail(new: &idev->group_item, head: &idev->igroup->device_list); |
386 | mutex_unlock(lock: &idev->igroup->lock); |
387 | return 0; |
388 | err_unresv: |
389 | if (hwpt_is_paging(hwpt)) |
390 | iopt_remove_reserved_iova(iopt: &to_hwpt_paging(hwpt)->ioas->iopt, |
391 | owner: idev->dev); |
392 | err_unlock: |
393 | mutex_unlock(lock: &idev->igroup->lock); |
394 | return rc; |
395 | } |
396 | |
397 | struct iommufd_hw_pagetable * |
398 | iommufd_hw_pagetable_detach(struct iommufd_device *idev) |
399 | { |
400 | struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; |
401 | |
402 | mutex_lock(&idev->igroup->lock); |
403 | list_del(entry: &idev->group_item); |
404 | if (list_empty(head: &idev->igroup->device_list)) { |
405 | iommu_detach_group(domain: hwpt->domain, group: idev->igroup->group); |
406 | idev->igroup->hwpt = NULL; |
407 | } |
408 | if (hwpt_is_paging(hwpt)) |
409 | iopt_remove_reserved_iova(iopt: &to_hwpt_paging(hwpt)->ioas->iopt, |
410 | owner: idev->dev); |
411 | mutex_unlock(lock: &idev->igroup->lock); |
412 | |
413 | /* Caller must destroy hwpt */ |
414 | return hwpt; |
415 | } |
416 | |
417 | static struct iommufd_hw_pagetable * |
418 | iommufd_device_do_attach(struct iommufd_device *idev, |
419 | struct iommufd_hw_pagetable *hwpt) |
420 | { |
421 | int rc; |
422 | |
423 | rc = iommufd_hw_pagetable_attach(hwpt, idev); |
424 | if (rc) |
425 | return ERR_PTR(error: rc); |
426 | return NULL; |
427 | } |
428 | |
429 | static void |
430 | iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, |
431 | struct iommufd_hwpt_paging *hwpt_paging) |
432 | { |
433 | struct iommufd_device *cur; |
434 | |
435 | lockdep_assert_held(&igroup->lock); |
436 | |
437 | list_for_each_entry(cur, &igroup->device_list, group_item) |
438 | iopt_remove_reserved_iova(iopt: &hwpt_paging->ioas->iopt, owner: cur->dev); |
439 | } |
440 | |
441 | static int |
442 | iommufd_group_do_replace_paging(struct iommufd_group *igroup, |
443 | struct iommufd_hwpt_paging *hwpt_paging) |
444 | { |
445 | struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt; |
446 | struct iommufd_device *cur; |
447 | int rc; |
448 | |
449 | lockdep_assert_held(&igroup->lock); |
450 | |
451 | if (!hwpt_is_paging(hwpt: old_hwpt) || |
452 | hwpt_paging->ioas != to_hwpt_paging(hwpt: old_hwpt)->ioas) { |
453 | list_for_each_entry(cur, &igroup->device_list, group_item) { |
454 | rc = iopt_table_enforce_dev_resv_regions( |
455 | iopt: &hwpt_paging->ioas->iopt, dev: cur->dev, NULL); |
456 | if (rc) |
457 | goto err_unresv; |
458 | } |
459 | } |
460 | |
461 | rc = iommufd_group_setup_msi(igroup, hwpt_paging); |
462 | if (rc) |
463 | goto err_unresv; |
464 | return 0; |
465 | |
466 | err_unresv: |
467 | iommufd_group_remove_reserved_iova(igroup, hwpt_paging); |
468 | return rc; |
469 | } |
470 | |
471 | static struct iommufd_hw_pagetable * |
472 | iommufd_device_do_replace(struct iommufd_device *idev, |
473 | struct iommufd_hw_pagetable *hwpt) |
474 | { |
475 | struct iommufd_group *igroup = idev->igroup; |
476 | struct iommufd_hw_pagetable *old_hwpt; |
477 | unsigned int num_devices; |
478 | int rc; |
479 | |
480 | mutex_lock(&idev->igroup->lock); |
481 | |
482 | if (igroup->hwpt == NULL) { |
483 | rc = -EINVAL; |
484 | goto err_unlock; |
485 | } |
486 | |
487 | if (hwpt == igroup->hwpt) { |
488 | mutex_unlock(lock: &idev->igroup->lock); |
489 | return NULL; |
490 | } |
491 | |
492 | old_hwpt = igroup->hwpt; |
493 | if (hwpt_is_paging(hwpt)) { |
494 | rc = iommufd_group_do_replace_paging(igroup, |
495 | hwpt_paging: to_hwpt_paging(hwpt)); |
496 | if (rc) |
497 | goto err_unlock; |
498 | } |
499 | |
500 | rc = iommu_group_replace_domain(group: igroup->group, new_domain: hwpt->domain); |
501 | if (rc) |
502 | goto err_unresv; |
503 | |
504 | if (hwpt_is_paging(hwpt: old_hwpt) && |
505 | (!hwpt_is_paging(hwpt) || |
506 | to_hwpt_paging(hwpt)->ioas != to_hwpt_paging(hwpt: old_hwpt)->ioas)) |
507 | iommufd_group_remove_reserved_iova(igroup, |
508 | hwpt_paging: to_hwpt_paging(hwpt: old_hwpt)); |
509 | |
510 | igroup->hwpt = hwpt; |
511 | |
512 | num_devices = list_count_nodes(head: &igroup->device_list); |
513 | /* |
514 | * Move the refcounts held by the device_list to the new hwpt. Retain a |
515 | * refcount for this thread as the caller will free it. |
516 | */ |
517 | refcount_add(i: num_devices, r: &hwpt->obj.users); |
518 | if (num_devices > 1) |
519 | WARN_ON(refcount_sub_and_test(num_devices - 1, |
520 | &old_hwpt->obj.users)); |
521 | mutex_unlock(lock: &idev->igroup->lock); |
522 | |
523 | /* Caller must destroy old_hwpt */ |
524 | return old_hwpt; |
525 | err_unresv: |
526 | if (hwpt_is_paging(hwpt)) |
527 | iommufd_group_remove_reserved_iova(igroup, |
528 | hwpt_paging: to_hwpt_paging(hwpt: old_hwpt)); |
529 | err_unlock: |
530 | mutex_unlock(lock: &idev->igroup->lock); |
531 | return ERR_PTR(error: rc); |
532 | } |
533 | |
534 | typedef struct iommufd_hw_pagetable *(*attach_fn)( |
535 | struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt); |
536 | |
537 | /* |
538 | * When automatically managing the domains we search for a compatible domain in |
539 | * the iopt and if one is found use it, otherwise create a new domain. |
540 | * Automatic domain selection will never pick a manually created domain. |
541 | */ |
542 | static struct iommufd_hw_pagetable * |
543 | iommufd_device_auto_get_domain(struct iommufd_device *idev, |
544 | struct iommufd_ioas *ioas, u32 *pt_id, |
545 | attach_fn do_attach) |
546 | { |
547 | /* |
548 | * iommufd_hw_pagetable_attach() is called by |
549 | * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as |
550 | * iommufd_device_do_attach(). So if we are in this mode then we prefer |
551 | * to use the immediate_attach path as it supports drivers that can't |
552 | * directly allocate a domain. |
553 | */ |
554 | bool immediate_attach = do_attach == iommufd_device_do_attach; |
555 | struct iommufd_hw_pagetable *destroy_hwpt; |
556 | struct iommufd_hwpt_paging *hwpt_paging; |
557 | struct iommufd_hw_pagetable *hwpt; |
558 | |
559 | /* |
560 | * There is no differentiation when domains are allocated, so any domain |
561 | * that is willing to attach to the device is interchangeable with any |
562 | * other. |
563 | */ |
564 | mutex_lock(&ioas->mutex); |
565 | list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) { |
566 | if (!hwpt_paging->auto_domain) |
567 | continue; |
568 | |
569 | hwpt = &hwpt_paging->common; |
570 | if (!iommufd_lock_obj(obj: &hwpt->obj)) |
571 | continue; |
572 | destroy_hwpt = (*do_attach)(idev, hwpt); |
573 | if (IS_ERR(ptr: destroy_hwpt)) { |
574 | iommufd_put_object(ictx: idev->ictx, obj: &hwpt->obj); |
575 | /* |
576 | * -EINVAL means the domain is incompatible with the |
577 | * device. Other error codes should propagate to |
578 | * userspace as failure. Success means the domain is |
579 | * attached. |
580 | */ |
581 | if (PTR_ERR(ptr: destroy_hwpt) == -EINVAL) |
582 | continue; |
583 | goto out_unlock; |
584 | } |
585 | *pt_id = hwpt->obj.id; |
586 | iommufd_put_object(ictx: idev->ictx, obj: &hwpt->obj); |
587 | goto out_unlock; |
588 | } |
589 | |
590 | hwpt_paging = iommufd_hwpt_paging_alloc(ictx: idev->ictx, ioas, idev, flags: 0, |
591 | immediate_attach, NULL); |
592 | if (IS_ERR(ptr: hwpt_paging)) { |
593 | destroy_hwpt = ERR_CAST(ptr: hwpt_paging); |
594 | goto out_unlock; |
595 | } |
596 | hwpt = &hwpt_paging->common; |
597 | |
598 | if (!immediate_attach) { |
599 | destroy_hwpt = (*do_attach)(idev, hwpt); |
600 | if (IS_ERR(ptr: destroy_hwpt)) |
601 | goto out_abort; |
602 | } else { |
603 | destroy_hwpt = NULL; |
604 | } |
605 | |
606 | hwpt_paging->auto_domain = true; |
607 | *pt_id = hwpt->obj.id; |
608 | |
609 | iommufd_object_finalize(ictx: idev->ictx, obj: &hwpt->obj); |
610 | mutex_unlock(lock: &ioas->mutex); |
611 | return destroy_hwpt; |
612 | |
613 | out_abort: |
614 | iommufd_object_abort_and_destroy(ictx: idev->ictx, obj: &hwpt->obj); |
615 | out_unlock: |
616 | mutex_unlock(lock: &ioas->mutex); |
617 | return destroy_hwpt; |
618 | } |
619 | |
620 | static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, |
621 | attach_fn do_attach) |
622 | { |
623 | struct iommufd_hw_pagetable *destroy_hwpt; |
624 | struct iommufd_object *pt_obj; |
625 | |
626 | pt_obj = iommufd_get_object(ictx: idev->ictx, id: *pt_id, type: IOMMUFD_OBJ_ANY); |
627 | if (IS_ERR(ptr: pt_obj)) |
628 | return PTR_ERR(ptr: pt_obj); |
629 | |
630 | switch (pt_obj->type) { |
631 | case IOMMUFD_OBJ_HWPT_NESTED: |
632 | case IOMMUFD_OBJ_HWPT_PAGING: { |
633 | struct iommufd_hw_pagetable *hwpt = |
634 | container_of(pt_obj, struct iommufd_hw_pagetable, obj); |
635 | |
636 | destroy_hwpt = (*do_attach)(idev, hwpt); |
637 | if (IS_ERR(ptr: destroy_hwpt)) |
638 | goto out_put_pt_obj; |
639 | break; |
640 | } |
641 | case IOMMUFD_OBJ_IOAS: { |
642 | struct iommufd_ioas *ioas = |
643 | container_of(pt_obj, struct iommufd_ioas, obj); |
644 | |
645 | destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id, |
646 | do_attach); |
647 | if (IS_ERR(ptr: destroy_hwpt)) |
648 | goto out_put_pt_obj; |
649 | break; |
650 | } |
651 | default: |
652 | destroy_hwpt = ERR_PTR(error: -EINVAL); |
653 | goto out_put_pt_obj; |
654 | } |
655 | iommufd_put_object(ictx: idev->ictx, obj: pt_obj); |
656 | |
657 | /* This destruction has to be after we unlock everything */ |
658 | if (destroy_hwpt) |
659 | iommufd_hw_pagetable_put(ictx: idev->ictx, hwpt: destroy_hwpt); |
660 | return 0; |
661 | |
662 | out_put_pt_obj: |
663 | iommufd_put_object(ictx: idev->ictx, obj: pt_obj); |
664 | return PTR_ERR(ptr: destroy_hwpt); |
665 | } |
666 | |
667 | /** |
668 | * iommufd_device_attach - Connect a device to an iommu_domain |
669 | * @idev: device to attach |
670 | * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING |
671 | * Output the IOMMUFD_OBJ_HWPT_PAGING ID |
672 | * |
673 | * This connects the device to an iommu_domain, either automatically or manually |
674 | * selected. Once this completes the device could do DMA. |
675 | * |
676 | * The caller should return the resulting pt_id back to userspace. |
677 | * This function is undone by calling iommufd_device_detach(). |
678 | */ |
679 | int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) |
680 | { |
681 | int rc; |
682 | |
683 | rc = iommufd_device_change_pt(idev, pt_id, do_attach: &iommufd_device_do_attach); |
684 | if (rc) |
685 | return rc; |
686 | |
687 | /* |
688 | * Pairs with iommufd_device_detach() - catches caller bugs attempting |
689 | * to destroy a device with an attachment. |
690 | */ |
691 | refcount_inc(r: &idev->obj.users); |
692 | return 0; |
693 | } |
694 | EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); |
695 | |
696 | /** |
697 | * iommufd_device_replace - Change the device's iommu_domain |
698 | * @idev: device to change |
699 | * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING |
700 | * Output the IOMMUFD_OBJ_HWPT_PAGING ID |
701 | * |
702 | * This is the same as:: |
703 | * |
704 | * iommufd_device_detach(); |
705 | * iommufd_device_attach(); |
706 | * |
707 | * If it fails then no change is made to the attachment. The iommu driver may |
708 | * implement this so there is no disruption in translation. This can only be |
709 | * called if iommufd_device_attach() has already succeeded. |
710 | */ |
711 | int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) |
712 | { |
713 | return iommufd_device_change_pt(idev, pt_id, |
714 | do_attach: &iommufd_device_do_replace); |
715 | } |
716 | EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD); |
717 | |
718 | /** |
719 | * iommufd_device_detach - Disconnect a device to an iommu_domain |
720 | * @idev: device to detach |
721 | * |
722 | * Undo iommufd_device_attach(). This disconnects the idev from the previously |
723 | * attached pt_id. The device returns back to a blocked DMA translation. |
724 | */ |
725 | void iommufd_device_detach(struct iommufd_device *idev) |
726 | { |
727 | struct iommufd_hw_pagetable *hwpt; |
728 | |
729 | hwpt = iommufd_hw_pagetable_detach(idev); |
730 | iommufd_hw_pagetable_put(ictx: idev->ictx, hwpt); |
731 | refcount_dec(r: &idev->obj.users); |
732 | } |
733 | EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); |
734 | |
735 | /* |
736 | * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at |
737 | * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should |
738 | * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas. |
739 | */ |
740 | static int iommufd_access_change_ioas(struct iommufd_access *access, |
741 | struct iommufd_ioas *new_ioas) |
742 | { |
743 | u32 iopt_access_list_id = access->iopt_access_list_id; |
744 | struct iommufd_ioas *cur_ioas = access->ioas; |
745 | int rc; |
746 | |
747 | lockdep_assert_held(&access->ioas_lock); |
748 | |
749 | /* We are racing with a concurrent detach, bail */ |
750 | if (cur_ioas != access->ioas_unpin) |
751 | return -EBUSY; |
752 | |
753 | if (cur_ioas == new_ioas) |
754 | return 0; |
755 | |
756 | /* |
757 | * Set ioas to NULL to block any further iommufd_access_pin_pages(). |
758 | * iommufd_access_unpin_pages() can continue using access->ioas_unpin. |
759 | */ |
760 | access->ioas = NULL; |
761 | |
762 | if (new_ioas) { |
763 | rc = iopt_add_access(iopt: &new_ioas->iopt, access); |
764 | if (rc) { |
765 | access->ioas = cur_ioas; |
766 | return rc; |
767 | } |
768 | refcount_inc(r: &new_ioas->obj.users); |
769 | } |
770 | |
771 | if (cur_ioas) { |
772 | if (access->ops->unmap) { |
773 | mutex_unlock(lock: &access->ioas_lock); |
774 | access->ops->unmap(access->data, 0, ULONG_MAX); |
775 | mutex_lock(&access->ioas_lock); |
776 | } |
777 | iopt_remove_access(iopt: &cur_ioas->iopt, access, iopt_access_list_id); |
778 | refcount_dec(r: &cur_ioas->obj.users); |
779 | } |
780 | |
781 | access->ioas = new_ioas; |
782 | access->ioas_unpin = new_ioas; |
783 | |
784 | return 0; |
785 | } |
786 | |
787 | static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id) |
788 | { |
789 | struct iommufd_ioas *ioas = iommufd_get_ioas(ictx: access->ictx, id); |
790 | int rc; |
791 | |
792 | if (IS_ERR(ptr: ioas)) |
793 | return PTR_ERR(ptr: ioas); |
794 | rc = iommufd_access_change_ioas(access, new_ioas: ioas); |
795 | iommufd_put_object(ictx: access->ictx, obj: &ioas->obj); |
796 | return rc; |
797 | } |
798 | |
799 | void iommufd_access_destroy_object(struct iommufd_object *obj) |
800 | { |
801 | struct iommufd_access *access = |
802 | container_of(obj, struct iommufd_access, obj); |
803 | |
804 | mutex_lock(&access->ioas_lock); |
805 | if (access->ioas) |
806 | WARN_ON(iommufd_access_change_ioas(access, NULL)); |
807 | mutex_unlock(lock: &access->ioas_lock); |
808 | iommufd_ctx_put(ictx: access->ictx); |
809 | } |
810 | |
811 | /** |
812 | * iommufd_access_create - Create an iommufd_access |
813 | * @ictx: iommufd file descriptor |
814 | * @ops: Driver's ops to associate with the access |
815 | * @data: Opaque data to pass into ops functions |
816 | * @id: Output ID number to return to userspace for this access |
817 | * |
818 | * An iommufd_access allows a driver to read/write to the IOAS without using |
819 | * DMA. The underlying CPU memory can be accessed using the |
820 | * iommufd_access_pin_pages() or iommufd_access_rw() functions. |
821 | * |
822 | * The provided ops are required to use iommufd_access_pin_pages(). |
823 | */ |
824 | struct iommufd_access * |
825 | iommufd_access_create(struct iommufd_ctx *ictx, |
826 | const struct iommufd_access_ops *ops, void *data, u32 *id) |
827 | { |
828 | struct iommufd_access *access; |
829 | |
830 | /* |
831 | * There is no uAPI for the access object, but to keep things symmetric |
832 | * use the object infrastructure anyhow. |
833 | */ |
834 | access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); |
835 | if (IS_ERR(ptr: access)) |
836 | return access; |
837 | |
838 | access->data = data; |
839 | access->ops = ops; |
840 | |
841 | if (ops->needs_pin_pages) |
842 | access->iova_alignment = PAGE_SIZE; |
843 | else |
844 | access->iova_alignment = 1; |
845 | |
846 | /* The calling driver is a user until iommufd_access_destroy() */ |
847 | refcount_inc(r: &access->obj.users); |
848 | access->ictx = ictx; |
849 | iommufd_ctx_get(ictx); |
850 | iommufd_object_finalize(ictx, obj: &access->obj); |
851 | *id = access->obj.id; |
852 | mutex_init(&access->ioas_lock); |
853 | return access; |
854 | } |
855 | EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); |
856 | |
857 | /** |
858 | * iommufd_access_destroy - Destroy an iommufd_access |
859 | * @access: The access to destroy |
860 | * |
861 | * The caller must stop using the access before destroying it. |
862 | */ |
863 | void iommufd_access_destroy(struct iommufd_access *access) |
864 | { |
865 | iommufd_object_destroy_user(ictx: access->ictx, obj: &access->obj); |
866 | } |
867 | EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); |
868 | |
869 | void iommufd_access_detach(struct iommufd_access *access) |
870 | { |
871 | mutex_lock(&access->ioas_lock); |
872 | if (WARN_ON(!access->ioas)) { |
873 | mutex_unlock(lock: &access->ioas_lock); |
874 | return; |
875 | } |
876 | WARN_ON(iommufd_access_change_ioas(access, NULL)); |
877 | mutex_unlock(lock: &access->ioas_lock); |
878 | } |
879 | EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD); |
880 | |
881 | int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) |
882 | { |
883 | int rc; |
884 | |
885 | mutex_lock(&access->ioas_lock); |
886 | if (WARN_ON(access->ioas)) { |
887 | mutex_unlock(lock: &access->ioas_lock); |
888 | return -EINVAL; |
889 | } |
890 | |
891 | rc = iommufd_access_change_ioas_id(access, id: ioas_id); |
892 | mutex_unlock(lock: &access->ioas_lock); |
893 | return rc; |
894 | } |
895 | EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD); |
896 | |
897 | int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) |
898 | { |
899 | int rc; |
900 | |
901 | mutex_lock(&access->ioas_lock); |
902 | if (!access->ioas) { |
903 | mutex_unlock(lock: &access->ioas_lock); |
904 | return -ENOENT; |
905 | } |
906 | rc = iommufd_access_change_ioas_id(access, id: ioas_id); |
907 | mutex_unlock(lock: &access->ioas_lock); |
908 | return rc; |
909 | } |
910 | EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD); |
911 | |
912 | /** |
913 | * iommufd_access_notify_unmap - Notify users of an iopt to stop using it |
914 | * @iopt: iopt to work on |
915 | * @iova: Starting iova in the iopt |
916 | * @length: Number of bytes |
917 | * |
918 | * After this function returns there should be no users attached to the pages |
919 | * linked to this iopt that intersect with iova,length. Anyone that has attached |
920 | * a user through iopt_access_pages() needs to detach it through |
921 | * iommufd_access_unpin_pages() before this function returns. |
922 | * |
923 | * iommufd_access_destroy() will wait for any outstanding unmap callback to |
924 | * complete. Once iommufd_access_destroy() no unmap ops are running or will |
925 | * run in the future. Due to this a driver must not create locking that prevents |
926 | * unmap to complete while iommufd_access_destroy() is running. |
927 | */ |
928 | void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, |
929 | unsigned long length) |
930 | { |
931 | struct iommufd_ioas *ioas = |
932 | container_of(iopt, struct iommufd_ioas, iopt); |
933 | struct iommufd_access *access; |
934 | unsigned long index; |
935 | |
936 | xa_lock(&ioas->iopt.access_list); |
937 | xa_for_each(&ioas->iopt.access_list, index, access) { |
938 | if (!iommufd_lock_obj(obj: &access->obj)) |
939 | continue; |
940 | xa_unlock(&ioas->iopt.access_list); |
941 | |
942 | access->ops->unmap(access->data, iova, length); |
943 | |
944 | iommufd_put_object(ictx: access->ictx, obj: &access->obj); |
945 | xa_lock(&ioas->iopt.access_list); |
946 | } |
947 | xa_unlock(&ioas->iopt.access_list); |
948 | } |
949 | |
950 | /** |
951 | * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages |
952 | * @access: IOAS access to act on |
953 | * @iova: Starting IOVA |
954 | * @length: Number of bytes to access |
955 | * |
956 | * Return the struct page's. The caller must stop accessing them before calling |
957 | * this. The iova/length must exactly match the one provided to access_pages. |
958 | */ |
959 | void iommufd_access_unpin_pages(struct iommufd_access *access, |
960 | unsigned long iova, unsigned long length) |
961 | { |
962 | struct iopt_area_contig_iter iter; |
963 | struct io_pagetable *iopt; |
964 | unsigned long last_iova; |
965 | struct iopt_area *area; |
966 | |
967 | if (WARN_ON(!length) || |
968 | WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) |
969 | return; |
970 | |
971 | mutex_lock(&access->ioas_lock); |
972 | /* |
973 | * The driver must be doing something wrong if it calls this before an |
974 | * iommufd_access_attach() or after an iommufd_access_detach(). |
975 | */ |
976 | if (WARN_ON(!access->ioas_unpin)) { |
977 | mutex_unlock(lock: &access->ioas_lock); |
978 | return; |
979 | } |
980 | iopt = &access->ioas_unpin->iopt; |
981 | |
982 | down_read(sem: &iopt->iova_rwsem); |
983 | iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) |
984 | iopt_area_remove_access( |
985 | area, start: iopt_area_iova_to_index(area, iova: iter.cur_iova), |
986 | last: iopt_area_iova_to_index( |
987 | area, |
988 | min(last_iova, iopt_area_last_iova(area)))); |
989 | WARN_ON(!iopt_area_contig_done(&iter)); |
990 | up_read(sem: &iopt->iova_rwsem); |
991 | mutex_unlock(lock: &access->ioas_lock); |
992 | } |
993 | EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); |
994 | |
995 | static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter) |
996 | { |
997 | if (iopt_area_start_byte(area: iter->area, iova: iter->cur_iova) % PAGE_SIZE) |
998 | return false; |
999 | |
1000 | if (!iopt_area_contig_done(iter) && |
1001 | (iopt_area_start_byte(area: iter->area, iova: iopt_area_last_iova(area: iter->area)) % |
1002 | PAGE_SIZE) != (PAGE_SIZE - 1)) |
1003 | return false; |
1004 | return true; |
1005 | } |
1006 | |
1007 | static bool check_area_prot(struct iopt_area *area, unsigned int flags) |
1008 | { |
1009 | if (flags & IOMMUFD_ACCESS_RW_WRITE) |
1010 | return area->iommu_prot & IOMMU_WRITE; |
1011 | return area->iommu_prot & IOMMU_READ; |
1012 | } |
1013 | |
1014 | /** |
1015 | * iommufd_access_pin_pages() - Return a list of pages under the iova |
1016 | * @access: IOAS access to act on |
1017 | * @iova: Starting IOVA |
1018 | * @length: Number of bytes to access |
1019 | * @out_pages: Output page list |
1020 | * @flags: IOPMMUFD_ACCESS_RW_* flags |
1021 | * |
1022 | * Reads @length bytes starting at iova and returns the struct page * pointers. |
1023 | * These can be kmap'd by the caller for CPU access. |
1024 | * |
1025 | * The caller must perform iommufd_access_unpin_pages() when done to balance |
1026 | * this. |
1027 | * |
1028 | * This API always requires a page aligned iova. This happens naturally if the |
1029 | * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However |
1030 | * smaller alignments have corner cases where this API can fail on otherwise |
1031 | * aligned iova. |
1032 | */ |
1033 | int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, |
1034 | unsigned long length, struct page **out_pages, |
1035 | unsigned int flags) |
1036 | { |
1037 | struct iopt_area_contig_iter iter; |
1038 | struct io_pagetable *iopt; |
1039 | unsigned long last_iova; |
1040 | struct iopt_area *area; |
1041 | int rc; |
1042 | |
1043 | /* Driver's ops don't support pin_pages */ |
1044 | if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && |
1045 | WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap)) |
1046 | return -EINVAL; |
1047 | |
1048 | if (!length) |
1049 | return -EINVAL; |
1050 | if (check_add_overflow(iova, length - 1, &last_iova)) |
1051 | return -EOVERFLOW; |
1052 | |
1053 | mutex_lock(&access->ioas_lock); |
1054 | if (!access->ioas) { |
1055 | mutex_unlock(lock: &access->ioas_lock); |
1056 | return -ENOENT; |
1057 | } |
1058 | iopt = &access->ioas->iopt; |
1059 | |
1060 | down_read(sem: &iopt->iova_rwsem); |
1061 | iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { |
1062 | unsigned long last = min(last_iova, iopt_area_last_iova(area)); |
1063 | unsigned long last_index = iopt_area_iova_to_index(area, iova: last); |
1064 | unsigned long index = |
1065 | iopt_area_iova_to_index(area, iova: iter.cur_iova); |
1066 | |
1067 | if (area->prevent_access || |
1068 | !iopt_area_contig_is_aligned(iter: &iter)) { |
1069 | rc = -EINVAL; |
1070 | goto err_remove; |
1071 | } |
1072 | |
1073 | if (!check_area_prot(area, flags)) { |
1074 | rc = -EPERM; |
1075 | goto err_remove; |
1076 | } |
1077 | |
1078 | rc = iopt_area_add_access(area, start: index, last: last_index, out_pages, |
1079 | flags); |
1080 | if (rc) |
1081 | goto err_remove; |
1082 | out_pages += last_index - index + 1; |
1083 | } |
1084 | if (!iopt_area_contig_done(iter: &iter)) { |
1085 | rc = -ENOENT; |
1086 | goto err_remove; |
1087 | } |
1088 | |
1089 | up_read(sem: &iopt->iova_rwsem); |
1090 | mutex_unlock(lock: &access->ioas_lock); |
1091 | return 0; |
1092 | |
1093 | err_remove: |
1094 | if (iova < iter.cur_iova) { |
1095 | last_iova = iter.cur_iova - 1; |
1096 | iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) |
1097 | iopt_area_remove_access( |
1098 | area, |
1099 | start: iopt_area_iova_to_index(area, iova: iter.cur_iova), |
1100 | last: iopt_area_iova_to_index( |
1101 | area, min(last_iova, |
1102 | iopt_area_last_iova(area)))); |
1103 | } |
1104 | up_read(sem: &iopt->iova_rwsem); |
1105 | mutex_unlock(lock: &access->ioas_lock); |
1106 | return rc; |
1107 | } |
1108 | EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); |
1109 | |
1110 | /** |
1111 | * iommufd_access_rw - Read or write data under the iova |
1112 | * @access: IOAS access to act on |
1113 | * @iova: Starting IOVA |
1114 | * @data: Kernel buffer to copy to/from |
1115 | * @length: Number of bytes to access |
1116 | * @flags: IOMMUFD_ACCESS_RW_* flags |
1117 | * |
1118 | * Copy kernel to/from data into the range given by IOVA/length. If flags |
1119 | * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized |
1120 | * by changing it into copy_to/from_user(). |
1121 | */ |
1122 | int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, |
1123 | void *data, size_t length, unsigned int flags) |
1124 | { |
1125 | struct iopt_area_contig_iter iter; |
1126 | struct io_pagetable *iopt; |
1127 | struct iopt_area *area; |
1128 | unsigned long last_iova; |
1129 | int rc; |
1130 | |
1131 | if (!length) |
1132 | return -EINVAL; |
1133 | if (check_add_overflow(iova, length - 1, &last_iova)) |
1134 | return -EOVERFLOW; |
1135 | |
1136 | mutex_lock(&access->ioas_lock); |
1137 | if (!access->ioas) { |
1138 | mutex_unlock(lock: &access->ioas_lock); |
1139 | return -ENOENT; |
1140 | } |
1141 | iopt = &access->ioas->iopt; |
1142 | |
1143 | down_read(sem: &iopt->iova_rwsem); |
1144 | iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { |
1145 | unsigned long last = min(last_iova, iopt_area_last_iova(area)); |
1146 | unsigned long bytes = (last - iter.cur_iova) + 1; |
1147 | |
1148 | if (area->prevent_access) { |
1149 | rc = -EINVAL; |
1150 | goto err_out; |
1151 | } |
1152 | |
1153 | if (!check_area_prot(area, flags)) { |
1154 | rc = -EPERM; |
1155 | goto err_out; |
1156 | } |
1157 | |
1158 | rc = iopt_pages_rw_access( |
1159 | pages: area->pages, start_byte: iopt_area_start_byte(area, iova: iter.cur_iova), |
1160 | data, length: bytes, flags); |
1161 | if (rc) |
1162 | goto err_out; |
1163 | data += bytes; |
1164 | } |
1165 | if (!iopt_area_contig_done(iter: &iter)) |
1166 | rc = -ENOENT; |
1167 | err_out: |
1168 | up_read(sem: &iopt->iova_rwsem); |
1169 | mutex_unlock(lock: &access->ioas_lock); |
1170 | return rc; |
1171 | } |
1172 | EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); |
1173 | |
1174 | int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) |
1175 | { |
1176 | struct iommu_hw_info *cmd = ucmd->cmd; |
1177 | void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr); |
1178 | const struct iommu_ops *ops; |
1179 | struct iommufd_device *idev; |
1180 | unsigned int data_len; |
1181 | unsigned int copy_len; |
1182 | void *data; |
1183 | int rc; |
1184 | |
1185 | if (cmd->flags || cmd->__reserved) |
1186 | return -EOPNOTSUPP; |
1187 | |
1188 | idev = iommufd_get_device(ucmd, id: cmd->dev_id); |
1189 | if (IS_ERR(ptr: idev)) |
1190 | return PTR_ERR(ptr: idev); |
1191 | |
1192 | ops = dev_iommu_ops(dev: idev->dev); |
1193 | if (ops->hw_info) { |
1194 | data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type); |
1195 | if (IS_ERR(ptr: data)) { |
1196 | rc = PTR_ERR(ptr: data); |
1197 | goto out_put; |
1198 | } |
1199 | |
1200 | /* |
1201 | * drivers that have hw_info callback should have a unique |
1202 | * iommu_hw_info_type. |
1203 | */ |
1204 | if (WARN_ON_ONCE(cmd->out_data_type == |
1205 | IOMMU_HW_INFO_TYPE_NONE)) { |
1206 | rc = -ENODEV; |
1207 | goto out_free; |
1208 | } |
1209 | } else { |
1210 | cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE; |
1211 | data_len = 0; |
1212 | data = NULL; |
1213 | } |
1214 | |
1215 | copy_len = min(cmd->data_len, data_len); |
1216 | if (copy_to_user(to: user_ptr, from: data, n: copy_len)) { |
1217 | rc = -EFAULT; |
1218 | goto out_free; |
1219 | } |
1220 | |
1221 | /* |
1222 | * Zero the trailing bytes if the user buffer is bigger than the |
1223 | * data size kernel actually has. |
1224 | */ |
1225 | if (copy_len < cmd->data_len) { |
1226 | if (clear_user(to: user_ptr + copy_len, n: cmd->data_len - copy_len)) { |
1227 | rc = -EFAULT; |
1228 | goto out_free; |
1229 | } |
1230 | } |
1231 | |
1232 | /* |
1233 | * We return the length the kernel supports so userspace may know what |
1234 | * the kernel capability is. It could be larger than the input buffer. |
1235 | */ |
1236 | cmd->data_len = data_len; |
1237 | |
1238 | cmd->out_capabilities = 0; |
1239 | if (device_iommu_capable(dev: idev->dev, cap: IOMMU_CAP_DIRTY_TRACKING)) |
1240 | cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; |
1241 | |
1242 | rc = iommufd_ucmd_respond(ucmd, cmd_len: sizeof(*cmd)); |
1243 | out_free: |
1244 | kfree(objp: data); |
1245 | out_put: |
1246 | iommufd_put_object(ictx: ucmd->ictx, obj: &idev->obj); |
1247 | return rc; |
1248 | } |
1249 | |