1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
3 *
4 * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The
5 * PFNs can be placed into an iommu_domain, or returned to the caller as a page
6 * list for access by an in-kernel user.
7 *
8 * The datastructure uses the iopt_pages to optimize the storage of the PFNs
9 * between the domains and xarray.
10 */
11#include <linux/iommufd.h>
12#include <linux/lockdep.h>
13#include <linux/iommu.h>
14#include <linux/sched/mm.h>
15#include <linux/err.h>
16#include <linux/slab.h>
17#include <linux/errno.h>
18#include <uapi/linux/iommufd.h>
19
20#include "io_pagetable.h"
21#include "double_span.h"
22
23struct iopt_pages_list {
24 struct iopt_pages *pages;
25 struct iopt_area *area;
26 struct list_head next;
27 unsigned long start_byte;
28 unsigned long length;
29};
30
31struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter,
32 struct io_pagetable *iopt,
33 unsigned long iova,
34 unsigned long last_iova)
35{
36 lockdep_assert_held(&iopt->iova_rwsem);
37
38 iter->cur_iova = iova;
39 iter->last_iova = last_iova;
40 iter->area = iopt_area_iter_first(iopt, start: iova, last: iova);
41 if (!iter->area)
42 return NULL;
43 if (!iter->area->pages) {
44 iter->area = NULL;
45 return NULL;
46 }
47 return iter->area;
48}
49
50struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter)
51{
52 unsigned long last_iova;
53
54 if (!iter->area)
55 return NULL;
56 last_iova = iopt_area_last_iova(area: iter->area);
57 if (iter->last_iova <= last_iova)
58 return NULL;
59
60 iter->cur_iova = last_iova + 1;
61 iter->area = iopt_area_iter_next(last_node: iter->area, start: iter->cur_iova,
62 last: iter->last_iova);
63 if (!iter->area)
64 return NULL;
65 if (iter->cur_iova != iopt_area_iova(area: iter->area) ||
66 !iter->area->pages) {
67 iter->area = NULL;
68 return NULL;
69 }
70 return iter->area;
71}
72
73static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span,
74 unsigned long length,
75 unsigned long iova_alignment,
76 unsigned long page_offset)
77{
78 if (span->is_used || span->last_hole - span->start_hole < length - 1)
79 return false;
80
81 span->start_hole = ALIGN(span->start_hole, iova_alignment) |
82 page_offset;
83 if (span->start_hole > span->last_hole ||
84 span->last_hole - span->start_hole < length - 1)
85 return false;
86 return true;
87}
88
89static bool __alloc_iova_check_used(struct interval_tree_span_iter *span,
90 unsigned long length,
91 unsigned long iova_alignment,
92 unsigned long page_offset)
93{
94 if (span->is_hole || span->last_used - span->start_used < length - 1)
95 return false;
96
97 span->start_used = ALIGN(span->start_used, iova_alignment) |
98 page_offset;
99 if (span->start_used > span->last_used ||
100 span->last_used - span->start_used < length - 1)
101 return false;
102 return true;
103}
104
105/*
106 * Automatically find a block of IOVA that is not being used and not reserved.
107 * Does not return a 0 IOVA even if it is valid.
108 */
109static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova,
110 unsigned long uptr, unsigned long length)
111{
112 unsigned long page_offset = uptr % PAGE_SIZE;
113 struct interval_tree_double_span_iter used_span;
114 struct interval_tree_span_iter allowed_span;
115 unsigned long iova_alignment;
116
117 lockdep_assert_held(&iopt->iova_rwsem);
118
119 /* Protect roundup_pow-of_two() from overflow */
120 if (length == 0 || length >= ULONG_MAX / 2)
121 return -EOVERFLOW;
122
123 /*
124 * Keep alignment present in the uptr when building the IOVA, this
125 * increases the chance we can map a THP.
126 */
127 if (!uptr)
128 iova_alignment = roundup_pow_of_two(length);
129 else
130 iova_alignment = min_t(unsigned long,
131 roundup_pow_of_two(length),
132 1UL << __ffs64(uptr));
133
134 if (iova_alignment < iopt->iova_alignment)
135 return -EINVAL;
136
137 interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree,
138 PAGE_SIZE, ULONG_MAX - PAGE_SIZE) {
139 if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) {
140 allowed_span.start_used = PAGE_SIZE;
141 allowed_span.last_used = ULONG_MAX - PAGE_SIZE;
142 allowed_span.is_hole = false;
143 }
144
145 if (!__alloc_iova_check_used(span: &allowed_span, length,
146 iova_alignment, page_offset))
147 continue;
148
149 interval_tree_for_each_double_span(
150 &used_span, &iopt->reserved_itree, &iopt->area_itree,
151 allowed_span.start_used, allowed_span.last_used) {
152 if (!__alloc_iova_check_hole(span: &used_span, length,
153 iova_alignment,
154 page_offset))
155 continue;
156
157 *iova = used_span.start_hole;
158 return 0;
159 }
160 }
161 return -ENOSPC;
162}
163
164static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova,
165 unsigned long length)
166{
167 unsigned long last;
168
169 lockdep_assert_held(&iopt->iova_rwsem);
170
171 if ((iova & (iopt->iova_alignment - 1)))
172 return -EINVAL;
173
174 if (check_add_overflow(iova, length - 1, &last))
175 return -EOVERFLOW;
176
177 /* No reserved IOVA intersects the range */
178 if (iopt_reserved_iter_first(iopt, start: iova, last))
179 return -EINVAL;
180
181 /* Check that there is not already a mapping in the range */
182 if (iopt_area_iter_first(iopt, start: iova, last))
183 return -EEXIST;
184 return 0;
185}
186
187/*
188 * The area takes a slice of the pages from start_bytes to start_byte + length
189 */
190static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area,
191 struct iopt_pages *pages, unsigned long iova,
192 unsigned long start_byte, unsigned long length,
193 int iommu_prot)
194{
195 lockdep_assert_held_write(&iopt->iova_rwsem);
196
197 if ((iommu_prot & IOMMU_WRITE) && !pages->writable)
198 return -EPERM;
199
200 area->iommu_prot = iommu_prot;
201 area->page_offset = start_byte % PAGE_SIZE;
202 if (area->page_offset & (iopt->iova_alignment - 1))
203 return -EINVAL;
204
205 area->node.start = iova;
206 if (check_add_overflow(iova, length - 1, &area->node.last))
207 return -EOVERFLOW;
208
209 area->pages_node.start = start_byte / PAGE_SIZE;
210 if (check_add_overflow(start_byte, length - 1, &area->pages_node.last))
211 return -EOVERFLOW;
212 area->pages_node.last = area->pages_node.last / PAGE_SIZE;
213 if (WARN_ON(area->pages_node.last >= pages->npages))
214 return -EOVERFLOW;
215
216 /*
217 * The area is inserted with a NULL pages indicating it is not fully
218 * initialized yet.
219 */
220 area->iopt = iopt;
221 interval_tree_insert(node: &area->node, root: &iopt->area_itree);
222 return 0;
223}
224
225static struct iopt_area *iopt_area_alloc(void)
226{
227 struct iopt_area *area;
228
229 area = kzalloc(size: sizeof(*area), GFP_KERNEL_ACCOUNT);
230 if (!area)
231 return NULL;
232 RB_CLEAR_NODE(&area->node.rb);
233 RB_CLEAR_NODE(&area->pages_node.rb);
234 return area;
235}
236
237static int iopt_alloc_area_pages(struct io_pagetable *iopt,
238 struct list_head *pages_list,
239 unsigned long length, unsigned long *dst_iova,
240 int iommu_prot, unsigned int flags)
241{
242 struct iopt_pages_list *elm;
243 unsigned long iova;
244 int rc = 0;
245
246 list_for_each_entry(elm, pages_list, next) {
247 elm->area = iopt_area_alloc();
248 if (!elm->area)
249 return -ENOMEM;
250 }
251
252 down_write(sem: &iopt->iova_rwsem);
253 if ((length & (iopt->iova_alignment - 1)) || !length) {
254 rc = -EINVAL;
255 goto out_unlock;
256 }
257
258 if (flags & IOPT_ALLOC_IOVA) {
259 /* Use the first entry to guess the ideal IOVA alignment */
260 elm = list_first_entry(pages_list, struct iopt_pages_list,
261 next);
262 rc = iopt_alloc_iova(
263 iopt, iova: dst_iova,
264 uptr: (uintptr_t)elm->pages->uptr + elm->start_byte, length);
265 if (rc)
266 goto out_unlock;
267 if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
268 WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) {
269 rc = -EINVAL;
270 goto out_unlock;
271 }
272 } else {
273 rc = iopt_check_iova(iopt, iova: *dst_iova, length);
274 if (rc)
275 goto out_unlock;
276 }
277
278 /*
279 * Areas are created with a NULL pages so that the IOVA space is
280 * reserved and we can unlock the iova_rwsem.
281 */
282 iova = *dst_iova;
283 list_for_each_entry(elm, pages_list, next) {
284 rc = iopt_insert_area(iopt, area: elm->area, pages: elm->pages, iova,
285 start_byte: elm->start_byte, length: elm->length, iommu_prot);
286 if (rc)
287 goto out_unlock;
288 iova += elm->length;
289 }
290
291out_unlock:
292 up_write(sem: &iopt->iova_rwsem);
293 return rc;
294}
295
296static void iopt_abort_area(struct iopt_area *area)
297{
298 if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
299 WARN_ON(area->pages);
300 if (area->iopt) {
301 down_write(sem: &area->iopt->iova_rwsem);
302 interval_tree_remove(node: &area->node, root: &area->iopt->area_itree);
303 up_write(sem: &area->iopt->iova_rwsem);
304 }
305 kfree(objp: area);
306}
307
308void iopt_free_pages_list(struct list_head *pages_list)
309{
310 struct iopt_pages_list *elm;
311
312 while ((elm = list_first_entry_or_null(pages_list,
313 struct iopt_pages_list, next))) {
314 if (elm->area)
315 iopt_abort_area(area: elm->area);
316 if (elm->pages)
317 iopt_put_pages(pages: elm->pages);
318 list_del(entry: &elm->next);
319 kfree(objp: elm);
320 }
321}
322
323static int iopt_fill_domains_pages(struct list_head *pages_list)
324{
325 struct iopt_pages_list *undo_elm;
326 struct iopt_pages_list *elm;
327 int rc;
328
329 list_for_each_entry(elm, pages_list, next) {
330 rc = iopt_area_fill_domains(area: elm->area, pages: elm->pages);
331 if (rc)
332 goto err_undo;
333 }
334 return 0;
335
336err_undo:
337 list_for_each_entry(undo_elm, pages_list, next) {
338 if (undo_elm == elm)
339 break;
340 iopt_area_unfill_domains(area: undo_elm->area, pages: undo_elm->pages);
341 }
342 return rc;
343}
344
345int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
346 unsigned long length, unsigned long *dst_iova,
347 int iommu_prot, unsigned int flags)
348{
349 struct iopt_pages_list *elm;
350 int rc;
351
352 rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova,
353 iommu_prot, flags);
354 if (rc)
355 return rc;
356
357 down_read(sem: &iopt->domains_rwsem);
358 rc = iopt_fill_domains_pages(pages_list);
359 if (rc)
360 goto out_unlock_domains;
361
362 down_write(sem: &iopt->iova_rwsem);
363 list_for_each_entry(elm, pages_list, next) {
364 /*
365 * area->pages must be set inside the domains_rwsem to ensure
366 * any newly added domains will get filled. Moves the reference
367 * in from the list.
368 */
369 elm->area->pages = elm->pages;
370 elm->pages = NULL;
371 elm->area = NULL;
372 }
373 up_write(sem: &iopt->iova_rwsem);
374out_unlock_domains:
375 up_read(sem: &iopt->domains_rwsem);
376 return rc;
377}
378
379/**
380 * iopt_map_user_pages() - Map a user VA to an iova in the io page table
381 * @ictx: iommufd_ctx the iopt is part of
382 * @iopt: io_pagetable to act on
383 * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains
384 * the chosen iova on output. Otherwise is the iova to map to on input
385 * @uptr: User VA to map
386 * @length: Number of bytes to map
387 * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping
388 * @flags: IOPT_ALLOC_IOVA or zero
389 *
390 * iova, uptr, and length must be aligned to iova_alignment. For domain backed
391 * page tables this will pin the pages and load them into the domain at iova.
392 * For non-domain page tables this will only setup a lazy reference and the
393 * caller must use iopt_access_pages() to touch them.
394 *
395 * iopt_unmap_iova() must be called to undo this before the io_pagetable can be
396 * destroyed.
397 */
398int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
399 unsigned long *iova, void __user *uptr,
400 unsigned long length, int iommu_prot,
401 unsigned int flags)
402{
403 struct iopt_pages_list elm = {};
404 LIST_HEAD(pages_list);
405 int rc;
406
407 elm.pages = iopt_alloc_pages(uptr, length, writable: iommu_prot & IOMMU_WRITE);
408 if (IS_ERR(ptr: elm.pages))
409 return PTR_ERR(ptr: elm.pages);
410 if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM &&
411 elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER)
412 elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM;
413 elm.start_byte = uptr - elm.pages->uptr;
414 elm.length = length;
415 list_add(new: &elm.next, head: &pages_list);
416
417 rc = iopt_map_pages(iopt, pages_list: &pages_list, length, dst_iova: iova, iommu_prot, flags);
418 if (rc) {
419 if (elm.area)
420 iopt_abort_area(area: elm.area);
421 if (elm.pages)
422 iopt_put_pages(pages: elm.pages);
423 return rc;
424 }
425 return 0;
426}
427
428struct iova_bitmap_fn_arg {
429 unsigned long flags;
430 struct io_pagetable *iopt;
431 struct iommu_domain *domain;
432 struct iommu_dirty_bitmap *dirty;
433};
434
435static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap,
436 unsigned long iova, size_t length,
437 void *opaque)
438{
439 struct iopt_area *area;
440 struct iopt_area_contig_iter iter;
441 struct iova_bitmap_fn_arg *arg = opaque;
442 struct iommu_domain *domain = arg->domain;
443 struct iommu_dirty_bitmap *dirty = arg->dirty;
444 const struct iommu_dirty_ops *ops = domain->dirty_ops;
445 unsigned long last_iova = iova + length - 1;
446 unsigned long flags = arg->flags;
447 int ret;
448
449 iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) {
450 unsigned long last = min(last_iova, iopt_area_last_iova(area));
451
452 ret = ops->read_and_clear_dirty(domain, iter.cur_iova,
453 last - iter.cur_iova + 1, flags,
454 dirty);
455 if (ret)
456 return ret;
457 }
458
459 if (!iopt_area_contig_done(iter: &iter))
460 return -EINVAL;
461 return 0;
462}
463
464static int
465iommu_read_and_clear_dirty(struct iommu_domain *domain,
466 struct io_pagetable *iopt, unsigned long flags,
467 struct iommu_hwpt_get_dirty_bitmap *bitmap)
468{
469 const struct iommu_dirty_ops *ops = domain->dirty_ops;
470 struct iommu_iotlb_gather gather;
471 struct iommu_dirty_bitmap dirty;
472 struct iova_bitmap_fn_arg arg;
473 struct iova_bitmap *iter;
474 int ret = 0;
475
476 if (!ops || !ops->read_and_clear_dirty)
477 return -EOPNOTSUPP;
478
479 iter = iova_bitmap_alloc(iova: bitmap->iova, length: bitmap->length,
480 page_size: bitmap->page_size,
481 u64_to_user_ptr(bitmap->data));
482 if (IS_ERR(ptr: iter))
483 return -ENOMEM;
484
485 iommu_dirty_bitmap_init(dirty: &dirty, bitmap: iter, gather: &gather);
486
487 arg.flags = flags;
488 arg.iopt = iopt;
489 arg.domain = domain;
490 arg.dirty = &dirty;
491 iova_bitmap_for_each(bitmap: iter, opaque: &arg, fn: __iommu_read_and_clear_dirty);
492
493 if (!(flags & IOMMU_DIRTY_NO_CLEAR))
494 iommu_iotlb_sync(domain, iotlb_gather: &gather);
495
496 iova_bitmap_free(bitmap: iter);
497
498 return ret;
499}
500
501int iommufd_check_iova_range(struct io_pagetable *iopt,
502 struct iommu_hwpt_get_dirty_bitmap *bitmap)
503{
504 size_t iommu_pgsize = iopt->iova_alignment;
505 u64 last_iova;
506
507 if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova))
508 return -EOVERFLOW;
509
510 if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX)
511 return -EOVERFLOW;
512
513 if ((bitmap->iova & (iommu_pgsize - 1)) ||
514 ((last_iova + 1) & (iommu_pgsize - 1)))
515 return -EINVAL;
516
517 if (!bitmap->page_size)
518 return -EINVAL;
519
520 if ((bitmap->iova & (bitmap->page_size - 1)) ||
521 ((last_iova + 1) & (bitmap->page_size - 1)))
522 return -EINVAL;
523
524 return 0;
525}
526
527int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
528 struct iommu_domain *domain,
529 unsigned long flags,
530 struct iommu_hwpt_get_dirty_bitmap *bitmap)
531{
532 int ret;
533
534 ret = iommufd_check_iova_range(iopt, bitmap);
535 if (ret)
536 return ret;
537
538 down_read(sem: &iopt->iova_rwsem);
539 ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap);
540 up_read(sem: &iopt->iova_rwsem);
541
542 return ret;
543}
544
545static int iopt_clear_dirty_data(struct io_pagetable *iopt,
546 struct iommu_domain *domain)
547{
548 const struct iommu_dirty_ops *ops = domain->dirty_ops;
549 struct iommu_iotlb_gather gather;
550 struct iommu_dirty_bitmap dirty;
551 struct iopt_area *area;
552 int ret = 0;
553
554 lockdep_assert_held_read(&iopt->iova_rwsem);
555
556 iommu_dirty_bitmap_init(dirty: &dirty, NULL, gather: &gather);
557
558 for (area = iopt_area_iter_first(iopt, start: 0, ULONG_MAX); area;
559 area = iopt_area_iter_next(last_node: area, start: 0, ULONG_MAX)) {
560 if (!area->pages)
561 continue;
562
563 ret = ops->read_and_clear_dirty(domain, iopt_area_iova(area),
564 iopt_area_length(area), 0,
565 &dirty);
566 if (ret)
567 break;
568 }
569
570 iommu_iotlb_sync(domain, iotlb_gather: &gather);
571 return ret;
572}
573
574int iopt_set_dirty_tracking(struct io_pagetable *iopt,
575 struct iommu_domain *domain, bool enable)
576{
577 const struct iommu_dirty_ops *ops = domain->dirty_ops;
578 int ret = 0;
579
580 if (!ops)
581 return -EOPNOTSUPP;
582
583 down_read(sem: &iopt->iova_rwsem);
584
585 /* Clear dirty bits from PTEs to ensure a clean snapshot */
586 if (enable) {
587 ret = iopt_clear_dirty_data(iopt, domain);
588 if (ret)
589 goto out_unlock;
590 }
591
592 ret = ops->set_dirty_tracking(domain, enable);
593
594out_unlock:
595 up_read(sem: &iopt->iova_rwsem);
596 return ret;
597}
598
599int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
600 unsigned long length, struct list_head *pages_list)
601{
602 struct iopt_area_contig_iter iter;
603 unsigned long last_iova;
604 struct iopt_area *area;
605 int rc;
606
607 if (!length)
608 return -EINVAL;
609 if (check_add_overflow(iova, length - 1, &last_iova))
610 return -EOVERFLOW;
611
612 down_read(sem: &iopt->iova_rwsem);
613 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
614 struct iopt_pages_list *elm;
615 unsigned long last = min(last_iova, iopt_area_last_iova(area));
616
617 elm = kzalloc(size: sizeof(*elm), GFP_KERNEL_ACCOUNT);
618 if (!elm) {
619 rc = -ENOMEM;
620 goto err_free;
621 }
622 elm->start_byte = iopt_area_start_byte(area, iova: iter.cur_iova);
623 elm->pages = area->pages;
624 elm->length = (last - iter.cur_iova) + 1;
625 kref_get(kref: &elm->pages->kref);
626 list_add_tail(new: &elm->next, head: pages_list);
627 }
628 if (!iopt_area_contig_done(iter: &iter)) {
629 rc = -ENOENT;
630 goto err_free;
631 }
632 up_read(sem: &iopt->iova_rwsem);
633 return 0;
634err_free:
635 up_read(sem: &iopt->iova_rwsem);
636 iopt_free_pages_list(pages_list);
637 return rc;
638}
639
640static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start,
641 unsigned long last, unsigned long *unmapped)
642{
643 struct iopt_area *area;
644 unsigned long unmapped_bytes = 0;
645 unsigned int tries = 0;
646 int rc = -ENOENT;
647
648 /*
649 * The domains_rwsem must be held in read mode any time any area->pages
650 * is NULL. This prevents domain attach/detatch from running
651 * concurrently with cleaning up the area.
652 */
653again:
654 down_read(sem: &iopt->domains_rwsem);
655 down_write(sem: &iopt->iova_rwsem);
656 while ((area = iopt_area_iter_first(iopt, start, last))) {
657 unsigned long area_last = iopt_area_last_iova(area);
658 unsigned long area_first = iopt_area_iova(area);
659 struct iopt_pages *pages;
660
661 /* Userspace should not race map/unmap's of the same area */
662 if (!area->pages) {
663 rc = -EBUSY;
664 goto out_unlock_iova;
665 }
666
667 if (area_first < start || area_last > last) {
668 rc = -ENOENT;
669 goto out_unlock_iova;
670 }
671
672 if (area_first != start)
673 tries = 0;
674
675 /*
676 * num_accesses writers must hold the iova_rwsem too, so we can
677 * safely read it under the write side of the iovam_rwsem
678 * without the pages->mutex.
679 */
680 if (area->num_accesses) {
681 size_t length = iopt_area_length(area);
682
683 start = area_first;
684 area->prevent_access = true;
685 up_write(sem: &iopt->iova_rwsem);
686 up_read(sem: &iopt->domains_rwsem);
687
688 iommufd_access_notify_unmap(iopt, iova: area_first, length);
689 /* Something is not responding to unmap requests. */
690 tries++;
691 if (WARN_ON(tries > 100))
692 return -EDEADLOCK;
693 goto again;
694 }
695
696 pages = area->pages;
697 area->pages = NULL;
698 up_write(sem: &iopt->iova_rwsem);
699
700 iopt_area_unfill_domains(area, pages);
701 iopt_abort_area(area);
702 iopt_put_pages(pages);
703
704 unmapped_bytes += area_last - area_first + 1;
705
706 down_write(sem: &iopt->iova_rwsem);
707 }
708 if (unmapped_bytes)
709 rc = 0;
710
711out_unlock_iova:
712 up_write(sem: &iopt->iova_rwsem);
713 up_read(sem: &iopt->domains_rwsem);
714 if (unmapped)
715 *unmapped = unmapped_bytes;
716 return rc;
717}
718
719/**
720 * iopt_unmap_iova() - Remove a range of iova
721 * @iopt: io_pagetable to act on
722 * @iova: Starting iova to unmap
723 * @length: Number of bytes to unmap
724 * @unmapped: Return number of bytes unmapped
725 *
726 * The requested range must be a superset of existing ranges.
727 * Splitting/truncating IOVA mappings is not allowed.
728 */
729int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
730 unsigned long length, unsigned long *unmapped)
731{
732 unsigned long iova_last;
733
734 if (!length)
735 return -EINVAL;
736
737 if (check_add_overflow(iova, length - 1, &iova_last))
738 return -EOVERFLOW;
739
740 return iopt_unmap_iova_range(iopt, start: iova, last: iova_last, unmapped);
741}
742
743int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped)
744{
745 int rc;
746
747 rc = iopt_unmap_iova_range(iopt, start: 0, ULONG_MAX, unmapped);
748 /* If the IOVAs are empty then unmap all succeeds */
749 if (rc == -ENOENT)
750 return 0;
751 return rc;
752}
753
754/* The caller must always free all the nodes in the allowed_iova rb_root. */
755int iopt_set_allow_iova(struct io_pagetable *iopt,
756 struct rb_root_cached *allowed_iova)
757{
758 struct iopt_allowed *allowed;
759
760 down_write(sem: &iopt->iova_rwsem);
761 swap(*allowed_iova, iopt->allowed_itree);
762
763 for (allowed = iopt_allowed_iter_first(iopt, start: 0, ULONG_MAX); allowed;
764 allowed = iopt_allowed_iter_next(last_node: allowed, start: 0, ULONG_MAX)) {
765 if (iopt_reserved_iter_first(iopt, start: allowed->node.start,
766 last: allowed->node.last)) {
767 swap(*allowed_iova, iopt->allowed_itree);
768 up_write(sem: &iopt->iova_rwsem);
769 return -EADDRINUSE;
770 }
771 }
772 up_write(sem: &iopt->iova_rwsem);
773 return 0;
774}
775
776int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
777 unsigned long last, void *owner)
778{
779 struct iopt_reserved *reserved;
780
781 lockdep_assert_held_write(&iopt->iova_rwsem);
782
783 if (iopt_area_iter_first(iopt, start, last) ||
784 iopt_allowed_iter_first(iopt, start, last))
785 return -EADDRINUSE;
786
787 reserved = kzalloc(size: sizeof(*reserved), GFP_KERNEL_ACCOUNT);
788 if (!reserved)
789 return -ENOMEM;
790 reserved->node.start = start;
791 reserved->node.last = last;
792 reserved->owner = owner;
793 interval_tree_insert(node: &reserved->node, root: &iopt->reserved_itree);
794 return 0;
795}
796
797static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner)
798{
799 struct iopt_reserved *reserved, *next;
800
801 lockdep_assert_held_write(&iopt->iova_rwsem);
802
803 for (reserved = iopt_reserved_iter_first(iopt, start: 0, ULONG_MAX); reserved;
804 reserved = next) {
805 next = iopt_reserved_iter_next(last_node: reserved, start: 0, ULONG_MAX);
806
807 if (reserved->owner == owner) {
808 interval_tree_remove(node: &reserved->node,
809 root: &iopt->reserved_itree);
810 kfree(objp: reserved);
811 }
812 }
813}
814
815void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner)
816{
817 down_write(sem: &iopt->iova_rwsem);
818 __iopt_remove_reserved_iova(iopt, owner);
819 up_write(sem: &iopt->iova_rwsem);
820}
821
822void iopt_init_table(struct io_pagetable *iopt)
823{
824 init_rwsem(&iopt->iova_rwsem);
825 init_rwsem(&iopt->domains_rwsem);
826 iopt->area_itree = RB_ROOT_CACHED;
827 iopt->allowed_itree = RB_ROOT_CACHED;
828 iopt->reserved_itree = RB_ROOT_CACHED;
829 xa_init_flags(xa: &iopt->domains, XA_FLAGS_ACCOUNT);
830 xa_init_flags(xa: &iopt->access_list, XA_FLAGS_ALLOC);
831
832 /*
833 * iopt's start as SW tables that can use the entire size_t IOVA space
834 * due to the use of size_t in the APIs. They have no alignment
835 * restriction.
836 */
837 iopt->iova_alignment = 1;
838}
839
840void iopt_destroy_table(struct io_pagetable *iopt)
841{
842 struct interval_tree_node *node;
843
844 if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
845 iopt_remove_reserved_iova(iopt, NULL);
846
847 while ((node = interval_tree_iter_first(root: &iopt->allowed_itree, start: 0,
848 ULONG_MAX))) {
849 interval_tree_remove(node, root: &iopt->allowed_itree);
850 kfree(container_of(node, struct iopt_allowed, node));
851 }
852
853 WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root));
854 WARN_ON(!xa_empty(&iopt->domains));
855 WARN_ON(!xa_empty(&iopt->access_list));
856 WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root));
857}
858
859/**
860 * iopt_unfill_domain() - Unfill a domain with PFNs
861 * @iopt: io_pagetable to act on
862 * @domain: domain to unfill
863 *
864 * This is used when removing a domain from the iopt. Every area in the iopt
865 * will be unmapped from the domain. The domain must already be removed from the
866 * domains xarray.
867 */
868static void iopt_unfill_domain(struct io_pagetable *iopt,
869 struct iommu_domain *domain)
870{
871 struct iopt_area *area;
872
873 lockdep_assert_held(&iopt->iova_rwsem);
874 lockdep_assert_held_write(&iopt->domains_rwsem);
875
876 /*
877 * Some other domain is holding all the pfns still, rapidly unmap this
878 * domain.
879 */
880 if (iopt->next_domain_id != 0) {
881 /* Pick an arbitrary remaining domain to act as storage */
882 struct iommu_domain *storage_domain =
883 xa_load(&iopt->domains, index: 0);
884
885 for (area = iopt_area_iter_first(iopt, start: 0, ULONG_MAX); area;
886 area = iopt_area_iter_next(last_node: area, start: 0, ULONG_MAX)) {
887 struct iopt_pages *pages = area->pages;
888
889 if (!pages)
890 continue;
891
892 mutex_lock(&pages->mutex);
893 if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
894 WARN_ON(!area->storage_domain);
895 if (area->storage_domain == domain)
896 area->storage_domain = storage_domain;
897 mutex_unlock(lock: &pages->mutex);
898
899 iopt_area_unmap_domain(area, domain);
900 }
901 return;
902 }
903
904 for (area = iopt_area_iter_first(iopt, start: 0, ULONG_MAX); area;
905 area = iopt_area_iter_next(last_node: area, start: 0, ULONG_MAX)) {
906 struct iopt_pages *pages = area->pages;
907
908 if (!pages)
909 continue;
910
911 mutex_lock(&pages->mutex);
912 interval_tree_remove(node: &area->pages_node, root: &pages->domains_itree);
913 WARN_ON(area->storage_domain != domain);
914 area->storage_domain = NULL;
915 iopt_area_unfill_domain(area, pages, domain);
916 mutex_unlock(lock: &pages->mutex);
917 }
918}
919
920/**
921 * iopt_fill_domain() - Fill a domain with PFNs
922 * @iopt: io_pagetable to act on
923 * @domain: domain to fill
924 *
925 * Fill the domain with PFNs from every area in the iopt. On failure the domain
926 * is left unchanged.
927 */
928static int iopt_fill_domain(struct io_pagetable *iopt,
929 struct iommu_domain *domain)
930{
931 struct iopt_area *end_area;
932 struct iopt_area *area;
933 int rc;
934
935 lockdep_assert_held(&iopt->iova_rwsem);
936 lockdep_assert_held_write(&iopt->domains_rwsem);
937
938 for (area = iopt_area_iter_first(iopt, start: 0, ULONG_MAX); area;
939 area = iopt_area_iter_next(last_node: area, start: 0, ULONG_MAX)) {
940 struct iopt_pages *pages = area->pages;
941
942 if (!pages)
943 continue;
944
945 mutex_lock(&pages->mutex);
946 rc = iopt_area_fill_domain(area, domain);
947 if (rc) {
948 mutex_unlock(lock: &pages->mutex);
949 goto out_unfill;
950 }
951 if (!area->storage_domain) {
952 WARN_ON(iopt->next_domain_id != 0);
953 area->storage_domain = domain;
954 interval_tree_insert(node: &area->pages_node,
955 root: &pages->domains_itree);
956 }
957 mutex_unlock(lock: &pages->mutex);
958 }
959 return 0;
960
961out_unfill:
962 end_area = area;
963 for (area = iopt_area_iter_first(iopt, start: 0, ULONG_MAX); area;
964 area = iopt_area_iter_next(last_node: area, start: 0, ULONG_MAX)) {
965 struct iopt_pages *pages = area->pages;
966
967 if (area == end_area)
968 break;
969 if (!pages)
970 continue;
971 mutex_lock(&pages->mutex);
972 if (iopt->next_domain_id == 0) {
973 interval_tree_remove(node: &area->pages_node,
974 root: &pages->domains_itree);
975 area->storage_domain = NULL;
976 }
977 iopt_area_unfill_domain(area, pages, domain);
978 mutex_unlock(lock: &pages->mutex);
979 }
980 return rc;
981}
982
983/* All existing area's conform to an increased page size */
984static int iopt_check_iova_alignment(struct io_pagetable *iopt,
985 unsigned long new_iova_alignment)
986{
987 unsigned long align_mask = new_iova_alignment - 1;
988 struct iopt_area *area;
989
990 lockdep_assert_held(&iopt->iova_rwsem);
991 lockdep_assert_held(&iopt->domains_rwsem);
992
993 for (area = iopt_area_iter_first(iopt, start: 0, ULONG_MAX); area;
994 area = iopt_area_iter_next(last_node: area, start: 0, ULONG_MAX))
995 if ((iopt_area_iova(area) & align_mask) ||
996 (iopt_area_length(area) & align_mask) ||
997 (area->page_offset & align_mask))
998 return -EADDRINUSE;
999
1000 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) {
1001 struct iommufd_access *access;
1002 unsigned long index;
1003
1004 xa_for_each(&iopt->access_list, index, access)
1005 if (WARN_ON(access->iova_alignment >
1006 new_iova_alignment))
1007 return -EADDRINUSE;
1008 }
1009 return 0;
1010}
1011
1012int iopt_table_add_domain(struct io_pagetable *iopt,
1013 struct iommu_domain *domain)
1014{
1015 const struct iommu_domain_geometry *geometry = &domain->geometry;
1016 struct iommu_domain *iter_domain;
1017 unsigned int new_iova_alignment;
1018 unsigned long index;
1019 int rc;
1020
1021 down_write(sem: &iopt->domains_rwsem);
1022 down_write(sem: &iopt->iova_rwsem);
1023
1024 xa_for_each(&iopt->domains, index, iter_domain) {
1025 if (WARN_ON(iter_domain == domain)) {
1026 rc = -EEXIST;
1027 goto out_unlock;
1028 }
1029 }
1030
1031 /*
1032 * The io page size drives the iova_alignment. Internally the iopt_pages
1033 * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE
1034 * objects into the iommu_domain.
1035 *
1036 * A iommu_domain must always be able to accept PAGE_SIZE to be
1037 * compatible as we can't guarantee higher contiguity.
1038 */
1039 new_iova_alignment = max_t(unsigned long,
1040 1UL << __ffs(domain->pgsize_bitmap),
1041 iopt->iova_alignment);
1042 if (new_iova_alignment > PAGE_SIZE) {
1043 rc = -EINVAL;
1044 goto out_unlock;
1045 }
1046 if (new_iova_alignment != iopt->iova_alignment) {
1047 rc = iopt_check_iova_alignment(iopt, new_iova_alignment);
1048 if (rc)
1049 goto out_unlock;
1050 }
1051
1052 /* No area exists that is outside the allowed domain aperture */
1053 if (geometry->aperture_start != 0) {
1054 rc = iopt_reserve_iova(iopt, start: 0, last: geometry->aperture_start - 1,
1055 owner: domain);
1056 if (rc)
1057 goto out_reserved;
1058 }
1059 if (geometry->aperture_end != ULONG_MAX) {
1060 rc = iopt_reserve_iova(iopt, start: geometry->aperture_end + 1,
1061 ULONG_MAX, owner: domain);
1062 if (rc)
1063 goto out_reserved;
1064 }
1065
1066 rc = xa_reserve(xa: &iopt->domains, index: iopt->next_domain_id, GFP_KERNEL);
1067 if (rc)
1068 goto out_reserved;
1069
1070 rc = iopt_fill_domain(iopt, domain);
1071 if (rc)
1072 goto out_release;
1073
1074 iopt->iova_alignment = new_iova_alignment;
1075 xa_store(&iopt->domains, index: iopt->next_domain_id, entry: domain, GFP_KERNEL);
1076 iopt->next_domain_id++;
1077 up_write(sem: &iopt->iova_rwsem);
1078 up_write(sem: &iopt->domains_rwsem);
1079 return 0;
1080out_release:
1081 xa_release(xa: &iopt->domains, index: iopt->next_domain_id);
1082out_reserved:
1083 __iopt_remove_reserved_iova(iopt, owner: domain);
1084out_unlock:
1085 up_write(sem: &iopt->iova_rwsem);
1086 up_write(sem: &iopt->domains_rwsem);
1087 return rc;
1088}
1089
1090static int iopt_calculate_iova_alignment(struct io_pagetable *iopt)
1091{
1092 unsigned long new_iova_alignment;
1093 struct iommufd_access *access;
1094 struct iommu_domain *domain;
1095 unsigned long index;
1096
1097 lockdep_assert_held_write(&iopt->iova_rwsem);
1098 lockdep_assert_held(&iopt->domains_rwsem);
1099
1100 /* See batch_iommu_map_small() */
1101 if (iopt->disable_large_pages)
1102 new_iova_alignment = PAGE_SIZE;
1103 else
1104 new_iova_alignment = 1;
1105
1106 xa_for_each(&iopt->domains, index, domain)
1107 new_iova_alignment = max_t(unsigned long,
1108 1UL << __ffs(domain->pgsize_bitmap),
1109 new_iova_alignment);
1110 xa_for_each(&iopt->access_list, index, access)
1111 new_iova_alignment = max_t(unsigned long,
1112 access->iova_alignment,
1113 new_iova_alignment);
1114
1115 if (new_iova_alignment > iopt->iova_alignment) {
1116 int rc;
1117
1118 rc = iopt_check_iova_alignment(iopt, new_iova_alignment);
1119 if (rc)
1120 return rc;
1121 }
1122 iopt->iova_alignment = new_iova_alignment;
1123 return 0;
1124}
1125
1126void iopt_table_remove_domain(struct io_pagetable *iopt,
1127 struct iommu_domain *domain)
1128{
1129 struct iommu_domain *iter_domain = NULL;
1130 unsigned long index;
1131
1132 down_write(sem: &iopt->domains_rwsem);
1133 down_write(sem: &iopt->iova_rwsem);
1134
1135 xa_for_each(&iopt->domains, index, iter_domain)
1136 if (iter_domain == domain)
1137 break;
1138 if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id)
1139 goto out_unlock;
1140
1141 /*
1142 * Compress the xarray to keep it linear by swapping the entry to erase
1143 * with the tail entry and shrinking the tail.
1144 */
1145 iopt->next_domain_id--;
1146 iter_domain = xa_erase(&iopt->domains, index: iopt->next_domain_id);
1147 if (index != iopt->next_domain_id)
1148 xa_store(&iopt->domains, index, entry: iter_domain, GFP_KERNEL);
1149
1150 iopt_unfill_domain(iopt, domain);
1151 __iopt_remove_reserved_iova(iopt, owner: domain);
1152
1153 WARN_ON(iopt_calculate_iova_alignment(iopt));
1154out_unlock:
1155 up_write(sem: &iopt->iova_rwsem);
1156 up_write(sem: &iopt->domains_rwsem);
1157}
1158
1159/**
1160 * iopt_area_split - Split an area into two parts at iova
1161 * @area: The area to split
1162 * @iova: Becomes the last of a new area
1163 *
1164 * This splits an area into two. It is part of the VFIO compatibility to allow
1165 * poking a hole in the mapping. The two areas continue to point at the same
1166 * iopt_pages, just with different starting bytes.
1167 */
1168static int iopt_area_split(struct iopt_area *area, unsigned long iova)
1169{
1170 unsigned long alignment = area->iopt->iova_alignment;
1171 unsigned long last_iova = iopt_area_last_iova(area);
1172 unsigned long start_iova = iopt_area_iova(area);
1173 unsigned long new_start = iova + 1;
1174 struct io_pagetable *iopt = area->iopt;
1175 struct iopt_pages *pages = area->pages;
1176 struct iopt_area *lhs;
1177 struct iopt_area *rhs;
1178 int rc;
1179
1180 lockdep_assert_held_write(&iopt->iova_rwsem);
1181
1182 if (iova == start_iova || iova == last_iova)
1183 return 0;
1184
1185 if (!pages || area->prevent_access)
1186 return -EBUSY;
1187
1188 if (new_start & (alignment - 1) ||
1189 iopt_area_start_byte(area, iova: new_start) & (alignment - 1))
1190 return -EINVAL;
1191
1192 lhs = iopt_area_alloc();
1193 if (!lhs)
1194 return -ENOMEM;
1195
1196 rhs = iopt_area_alloc();
1197 if (!rhs) {
1198 rc = -ENOMEM;
1199 goto err_free_lhs;
1200 }
1201
1202 mutex_lock(&pages->mutex);
1203 /*
1204 * Splitting is not permitted if an access exists, we don't track enough
1205 * information to split existing accesses.
1206 */
1207 if (area->num_accesses) {
1208 rc = -EINVAL;
1209 goto err_unlock;
1210 }
1211
1212 /*
1213 * Splitting is not permitted if a domain could have been mapped with
1214 * huge pages.
1215 */
1216 if (area->storage_domain && !iopt->disable_large_pages) {
1217 rc = -EINVAL;
1218 goto err_unlock;
1219 }
1220
1221 interval_tree_remove(node: &area->node, root: &iopt->area_itree);
1222 rc = iopt_insert_area(iopt, area: lhs, pages: area->pages, iova: start_iova,
1223 start_byte: iopt_area_start_byte(area, iova: start_iova),
1224 length: (new_start - 1) - start_iova + 1,
1225 iommu_prot: area->iommu_prot);
1226 if (WARN_ON(rc))
1227 goto err_insert;
1228
1229 rc = iopt_insert_area(iopt, area: rhs, pages: area->pages, iova: new_start,
1230 start_byte: iopt_area_start_byte(area, iova: new_start),
1231 length: last_iova - new_start + 1, iommu_prot: area->iommu_prot);
1232 if (WARN_ON(rc))
1233 goto err_remove_lhs;
1234
1235 /*
1236 * If the original area has filled a domain, domains_itree has to be
1237 * updated.
1238 */
1239 if (area->storage_domain) {
1240 interval_tree_remove(node: &area->pages_node, root: &pages->domains_itree);
1241 interval_tree_insert(node: &lhs->pages_node, root: &pages->domains_itree);
1242 interval_tree_insert(node: &rhs->pages_node, root: &pages->domains_itree);
1243 }
1244
1245 lhs->storage_domain = area->storage_domain;
1246 lhs->pages = area->pages;
1247 rhs->storage_domain = area->storage_domain;
1248 rhs->pages = area->pages;
1249 kref_get(kref: &rhs->pages->kref);
1250 kfree(objp: area);
1251 mutex_unlock(lock: &pages->mutex);
1252
1253 /*
1254 * No change to domains or accesses because the pages hasn't been
1255 * changed
1256 */
1257 return 0;
1258
1259err_remove_lhs:
1260 interval_tree_remove(node: &lhs->node, root: &iopt->area_itree);
1261err_insert:
1262 interval_tree_insert(node: &area->node, root: &iopt->area_itree);
1263err_unlock:
1264 mutex_unlock(lock: &pages->mutex);
1265 kfree(objp: rhs);
1266err_free_lhs:
1267 kfree(objp: lhs);
1268 return rc;
1269}
1270
1271int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
1272 size_t num_iovas)
1273{
1274 int rc = 0;
1275 int i;
1276
1277 down_write(sem: &iopt->iova_rwsem);
1278 for (i = 0; i < num_iovas; i++) {
1279 struct iopt_area *area;
1280
1281 area = iopt_area_iter_first(iopt, start: iovas[i], last: iovas[i]);
1282 if (!area)
1283 continue;
1284 rc = iopt_area_split(area, iova: iovas[i]);
1285 if (rc)
1286 break;
1287 }
1288 up_write(sem: &iopt->iova_rwsem);
1289 return rc;
1290}
1291
1292void iopt_enable_large_pages(struct io_pagetable *iopt)
1293{
1294 int rc;
1295
1296 down_write(sem: &iopt->domains_rwsem);
1297 down_write(sem: &iopt->iova_rwsem);
1298 WRITE_ONCE(iopt->disable_large_pages, false);
1299 rc = iopt_calculate_iova_alignment(iopt);
1300 WARN_ON(rc);
1301 up_write(sem: &iopt->iova_rwsem);
1302 up_write(sem: &iopt->domains_rwsem);
1303}
1304
1305int iopt_disable_large_pages(struct io_pagetable *iopt)
1306{
1307 int rc = 0;
1308
1309 down_write(sem: &iopt->domains_rwsem);
1310 down_write(sem: &iopt->iova_rwsem);
1311 if (iopt->disable_large_pages)
1312 goto out_unlock;
1313
1314 /* Won't do it if domains already have pages mapped in them */
1315 if (!xa_empty(xa: &iopt->domains) &&
1316 !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) {
1317 rc = -EINVAL;
1318 goto out_unlock;
1319 }
1320
1321 WRITE_ONCE(iopt->disable_large_pages, true);
1322 rc = iopt_calculate_iova_alignment(iopt);
1323 if (rc)
1324 WRITE_ONCE(iopt->disable_large_pages, false);
1325out_unlock:
1326 up_write(sem: &iopt->iova_rwsem);
1327 up_write(sem: &iopt->domains_rwsem);
1328 return rc;
1329}
1330
1331int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access)
1332{
1333 u32 new_id;
1334 int rc;
1335
1336 down_write(sem: &iopt->domains_rwsem);
1337 down_write(sem: &iopt->iova_rwsem);
1338 rc = xa_alloc(xa: &iopt->access_list, id: &new_id, entry: access, xa_limit_16b,
1339 GFP_KERNEL_ACCOUNT);
1340
1341 if (rc)
1342 goto out_unlock;
1343
1344 rc = iopt_calculate_iova_alignment(iopt);
1345 if (rc) {
1346 xa_erase(&iopt->access_list, index: new_id);
1347 goto out_unlock;
1348 }
1349 access->iopt_access_list_id = new_id;
1350
1351out_unlock:
1352 up_write(sem: &iopt->iova_rwsem);
1353 up_write(sem: &iopt->domains_rwsem);
1354 return rc;
1355}
1356
1357void iopt_remove_access(struct io_pagetable *iopt,
1358 struct iommufd_access *access,
1359 u32 iopt_access_list_id)
1360{
1361 down_write(sem: &iopt->domains_rwsem);
1362 down_write(sem: &iopt->iova_rwsem);
1363 WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access);
1364 WARN_ON(iopt_calculate_iova_alignment(iopt));
1365 up_write(sem: &iopt->iova_rwsem);
1366 up_write(sem: &iopt->domains_rwsem);
1367}
1368
1369/* Narrow the valid_iova_itree to include reserved ranges from a device. */
1370int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
1371 struct device *dev,
1372 phys_addr_t *sw_msi_start)
1373{
1374 struct iommu_resv_region *resv;
1375 LIST_HEAD(resv_regions);
1376 unsigned int num_hw_msi = 0;
1377 unsigned int num_sw_msi = 0;
1378 int rc;
1379
1380 if (iommufd_should_fail())
1381 return -EINVAL;
1382
1383 down_write(sem: &iopt->iova_rwsem);
1384 /* FIXME: drivers allocate memory but there is no failure propogated */
1385 iommu_get_resv_regions(dev, list: &resv_regions);
1386
1387 list_for_each_entry(resv, &resv_regions, list) {
1388 if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
1389 continue;
1390
1391 if (sw_msi_start && resv->type == IOMMU_RESV_MSI)
1392 num_hw_msi++;
1393 if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) {
1394 *sw_msi_start = resv->start;
1395 num_sw_msi++;
1396 }
1397
1398 rc = iopt_reserve_iova(iopt, start: resv->start,
1399 last: resv->length - 1 + resv->start, owner: dev);
1400 if (rc)
1401 goto out_reserved;
1402 }
1403
1404 /* Drivers must offer sane combinations of regions */
1405 if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) {
1406 rc = -EINVAL;
1407 goto out_reserved;
1408 }
1409
1410 rc = 0;
1411 goto out_free_resv;
1412
1413out_reserved:
1414 __iopt_remove_reserved_iova(iopt, owner: dev);
1415out_free_resv:
1416 iommu_put_resv_regions(dev, list: &resv_regions);
1417 up_write(sem: &iopt->iova_rwsem);
1418 return rc;
1419}
1420

source code of linux/drivers/iommu/iommufd/io_pagetable.c