1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * MMU-based software IOTLB.
4 *
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6 *
7 * Author: Xie Yongji <xieyongji@bytedance.com>
8 *
9 */
10
11#include <linux/slab.h>
12#include <linux/file.h>
13#include <linux/anon_inodes.h>
14#include <linux/highmem.h>
15#include <linux/vmalloc.h>
16#include <linux/vdpa.h>
17
18#include "iova_domain.h"
19
20static int vduse_iotlb_add_range(struct vduse_iova_domain *domain,
21 u64 start, u64 last,
22 u64 addr, unsigned int perm,
23 struct file *file, u64 offset)
24{
25 struct vdpa_map_file *map_file;
26 int ret;
27
28 map_file = kmalloc(size: sizeof(*map_file), GFP_ATOMIC);
29 if (!map_file)
30 return -ENOMEM;
31
32 map_file->file = get_file(f: file);
33 map_file->offset = offset;
34
35 ret = vhost_iotlb_add_range_ctx(iotlb: domain->iotlb, start, last,
36 addr, perm, opaque: map_file);
37 if (ret) {
38 fput(map_file->file);
39 kfree(objp: map_file);
40 return ret;
41 }
42 return 0;
43}
44
45static void vduse_iotlb_del_range(struct vduse_iova_domain *domain,
46 u64 start, u64 last)
47{
48 struct vdpa_map_file *map_file;
49 struct vhost_iotlb_map *map;
50
51 while ((map = vhost_iotlb_itree_first(iotlb: domain->iotlb, start, last))) {
52 map_file = (struct vdpa_map_file *)map->opaque;
53 fput(map_file->file);
54 kfree(objp: map_file);
55 vhost_iotlb_map_free(iotlb: domain->iotlb, map);
56 }
57}
58
59int vduse_domain_set_map(struct vduse_iova_domain *domain,
60 struct vhost_iotlb *iotlb)
61{
62 struct vdpa_map_file *map_file;
63 struct vhost_iotlb_map *map;
64 u64 start = 0ULL, last = ULLONG_MAX;
65 int ret;
66
67 spin_lock(lock: &domain->iotlb_lock);
68 vduse_iotlb_del_range(domain, start, last);
69
70 for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
71 map = vhost_iotlb_itree_next(map, start, last)) {
72 map_file = (struct vdpa_map_file *)map->opaque;
73 ret = vduse_iotlb_add_range(domain, start: map->start, last: map->last,
74 addr: map->addr, perm: map->perm,
75 file: map_file->file,
76 offset: map_file->offset);
77 if (ret)
78 goto err;
79 }
80 spin_unlock(lock: &domain->iotlb_lock);
81
82 return 0;
83err:
84 vduse_iotlb_del_range(domain, start, last);
85 spin_unlock(lock: &domain->iotlb_lock);
86 return ret;
87}
88
89void vduse_domain_clear_map(struct vduse_iova_domain *domain,
90 struct vhost_iotlb *iotlb)
91{
92 struct vhost_iotlb_map *map;
93 u64 start = 0ULL, last = ULLONG_MAX;
94
95 spin_lock(lock: &domain->iotlb_lock);
96 for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
97 map = vhost_iotlb_itree_next(map, start, last)) {
98 vduse_iotlb_del_range(domain, start: map->start, last: map->last);
99 }
100 spin_unlock(lock: &domain->iotlb_lock);
101}
102
103static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
104 u64 iova, u64 size, u64 paddr)
105{
106 struct vduse_bounce_map *map;
107 u64 last = iova + size - 1;
108
109 while (iova <= last) {
110 map = &domain->bounce_maps[iova >> PAGE_SHIFT];
111 if (!map->bounce_page) {
112 map->bounce_page = alloc_page(GFP_ATOMIC);
113 if (!map->bounce_page)
114 return -ENOMEM;
115 }
116 map->orig_phys = paddr;
117 paddr += PAGE_SIZE;
118 iova += PAGE_SIZE;
119 }
120 return 0;
121}
122
123static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
124 u64 iova, u64 size)
125{
126 struct vduse_bounce_map *map;
127 u64 last = iova + size - 1;
128
129 while (iova <= last) {
130 map = &domain->bounce_maps[iova >> PAGE_SHIFT];
131 map->orig_phys = INVALID_PHYS_ADDR;
132 iova += PAGE_SIZE;
133 }
134}
135
136static void do_bounce(phys_addr_t orig, void *addr, size_t size,
137 enum dma_data_direction dir)
138{
139 unsigned long pfn = PFN_DOWN(orig);
140 unsigned int offset = offset_in_page(orig);
141 struct page *page;
142 unsigned int sz = 0;
143
144 while (size) {
145 sz = min_t(size_t, PAGE_SIZE - offset, size);
146
147 page = pfn_to_page(pfn);
148 if (dir == DMA_TO_DEVICE)
149 memcpy_from_page(to: addr, page, offset, len: sz);
150 else
151 memcpy_to_page(page, offset, from: addr, len: sz);
152
153 size -= sz;
154 pfn++;
155 addr += sz;
156 offset = 0;
157 }
158}
159
160static void vduse_domain_bounce(struct vduse_iova_domain *domain,
161 dma_addr_t iova, size_t size,
162 enum dma_data_direction dir)
163{
164 struct vduse_bounce_map *map;
165 unsigned int offset;
166 void *addr;
167 size_t sz;
168
169 if (iova >= domain->bounce_size)
170 return;
171
172 while (size) {
173 map = &domain->bounce_maps[iova >> PAGE_SHIFT];
174 offset = offset_in_page(iova);
175 sz = min_t(size_t, PAGE_SIZE - offset, size);
176
177 if (WARN_ON(!map->bounce_page ||
178 map->orig_phys == INVALID_PHYS_ADDR))
179 return;
180
181 addr = kmap_local_page(page: map->bounce_page);
182 do_bounce(orig: map->orig_phys + offset, addr: addr + offset, size: sz, dir);
183 kunmap_local(addr);
184 size -= sz;
185 iova += sz;
186 }
187}
188
189static struct page *
190vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova)
191{
192 u64 start = iova & PAGE_MASK;
193 u64 last = start + PAGE_SIZE - 1;
194 struct vhost_iotlb_map *map;
195 struct page *page = NULL;
196
197 spin_lock(lock: &domain->iotlb_lock);
198 map = vhost_iotlb_itree_first(iotlb: domain->iotlb, start, last);
199 if (!map)
200 goto out;
201
202 page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
203 get_page(page);
204out:
205 spin_unlock(lock: &domain->iotlb_lock);
206
207 return page;
208}
209
210static struct page *
211vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
212{
213 struct vduse_bounce_map *map;
214 struct page *page = NULL;
215
216 read_lock(&domain->bounce_lock);
217 map = &domain->bounce_maps[iova >> PAGE_SHIFT];
218 if (domain->user_bounce_pages || !map->bounce_page)
219 goto out;
220
221 page = map->bounce_page;
222 get_page(page);
223out:
224 read_unlock(&domain->bounce_lock);
225
226 return page;
227}
228
229static void
230vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
231{
232 struct vduse_bounce_map *map;
233 unsigned long pfn, bounce_pfns;
234
235 bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
236
237 for (pfn = 0; pfn < bounce_pfns; pfn++) {
238 map = &domain->bounce_maps[pfn];
239 if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR))
240 continue;
241
242 if (!map->bounce_page)
243 continue;
244
245 __free_page(map->bounce_page);
246 map->bounce_page = NULL;
247 }
248}
249
250int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
251 struct page **pages, int count)
252{
253 struct vduse_bounce_map *map;
254 int i, ret;
255
256 /* Now we don't support partial mapping */
257 if (count != (domain->bounce_size >> PAGE_SHIFT))
258 return -EINVAL;
259
260 write_lock(&domain->bounce_lock);
261 ret = -EEXIST;
262 if (domain->user_bounce_pages)
263 goto out;
264
265 for (i = 0; i < count; i++) {
266 map = &domain->bounce_maps[i];
267 if (map->bounce_page) {
268 /* Copy kernel page to user page if it's in use */
269 if (map->orig_phys != INVALID_PHYS_ADDR)
270 memcpy_to_page(page: pages[i], offset: 0,
271 page_address(map->bounce_page),
272 PAGE_SIZE);
273 __free_page(map->bounce_page);
274 }
275 map->bounce_page = pages[i];
276 get_page(page: pages[i]);
277 }
278 domain->user_bounce_pages = true;
279 ret = 0;
280out:
281 write_unlock(&domain->bounce_lock);
282
283 return ret;
284}
285
286void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
287{
288 struct vduse_bounce_map *map;
289 unsigned long i, count;
290
291 write_lock(&domain->bounce_lock);
292 if (!domain->user_bounce_pages)
293 goto out;
294
295 count = domain->bounce_size >> PAGE_SHIFT;
296 for (i = 0; i < count; i++) {
297 struct page *page = NULL;
298
299 map = &domain->bounce_maps[i];
300 if (WARN_ON(!map->bounce_page))
301 continue;
302
303 /* Copy user page to kernel page if it's in use */
304 if (map->orig_phys != INVALID_PHYS_ADDR) {
305 page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL);
306 memcpy_from_page(page_address(page),
307 page: map->bounce_page, offset: 0, PAGE_SIZE);
308 }
309 put_page(page: map->bounce_page);
310 map->bounce_page = page;
311 }
312 domain->user_bounce_pages = false;
313out:
314 write_unlock(&domain->bounce_lock);
315}
316
317void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
318{
319 if (!domain->bounce_map)
320 return;
321
322 spin_lock(lock: &domain->iotlb_lock);
323 if (!domain->bounce_map)
324 goto unlock;
325
326 vduse_iotlb_del_range(domain, start: 0, last: domain->bounce_size - 1);
327 domain->bounce_map = 0;
328unlock:
329 spin_unlock(lock: &domain->iotlb_lock);
330}
331
332static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain)
333{
334 int ret = 0;
335
336 if (domain->bounce_map)
337 return 0;
338
339 spin_lock(lock: &domain->iotlb_lock);
340 if (domain->bounce_map)
341 goto unlock;
342
343 ret = vduse_iotlb_add_range(domain, start: 0, last: domain->bounce_size - 1,
344 addr: 0, VHOST_MAP_RW, file: domain->file, offset: 0);
345 if (ret)
346 goto unlock;
347
348 domain->bounce_map = 1;
349unlock:
350 spin_unlock(lock: &domain->iotlb_lock);
351 return ret;
352}
353
354static dma_addr_t
355vduse_domain_alloc_iova(struct iova_domain *iovad,
356 unsigned long size, unsigned long limit)
357{
358 unsigned long shift = iova_shift(iovad);
359 unsigned long iova_len = iova_align(iovad, size) >> shift;
360 unsigned long iova_pfn;
361
362 iova_pfn = alloc_iova_fast(iovad, size: iova_len, limit_pfn: limit >> shift, flush_rcache: true);
363
364 return (dma_addr_t)iova_pfn << shift;
365}
366
367static void vduse_domain_free_iova(struct iova_domain *iovad,
368 dma_addr_t iova, size_t size)
369{
370 unsigned long shift = iova_shift(iovad);
371 unsigned long iova_len = iova_align(iovad, size) >> shift;
372
373 free_iova_fast(iovad, pfn: iova >> shift, size: iova_len);
374}
375
376void vduse_domain_sync_single_for_device(struct vduse_iova_domain *domain,
377 dma_addr_t dma_addr, size_t size,
378 enum dma_data_direction dir)
379{
380 read_lock(&domain->bounce_lock);
381 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
382 vduse_domain_bounce(domain, iova: dma_addr, size, dir: DMA_TO_DEVICE);
383 read_unlock(&domain->bounce_lock);
384}
385
386void vduse_domain_sync_single_for_cpu(struct vduse_iova_domain *domain,
387 dma_addr_t dma_addr, size_t size,
388 enum dma_data_direction dir)
389{
390 read_lock(&domain->bounce_lock);
391 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
392 vduse_domain_bounce(domain, iova: dma_addr, size, dir: DMA_FROM_DEVICE);
393 read_unlock(&domain->bounce_lock);
394}
395
396dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
397 struct page *page, unsigned long offset,
398 size_t size, enum dma_data_direction dir,
399 unsigned long attrs)
400{
401 struct iova_domain *iovad = &domain->stream_iovad;
402 unsigned long limit = domain->bounce_size - 1;
403 phys_addr_t pa = page_to_phys(page) + offset;
404 dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
405
406 if (!iova)
407 return DMA_MAPPING_ERROR;
408
409 if (vduse_domain_init_bounce_map(domain))
410 goto err;
411
412 read_lock(&domain->bounce_lock);
413 if (vduse_domain_map_bounce_page(domain, iova: (u64)iova, size: (u64)size, paddr: pa))
414 goto err_unlock;
415
416 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
417 (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
418 vduse_domain_bounce(domain, iova, size, dir: DMA_TO_DEVICE);
419
420 read_unlock(&domain->bounce_lock);
421
422 return iova;
423err_unlock:
424 read_unlock(&domain->bounce_lock);
425err:
426 vduse_domain_free_iova(iovad, iova, size);
427 return DMA_MAPPING_ERROR;
428}
429
430void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
431 dma_addr_t dma_addr, size_t size,
432 enum dma_data_direction dir, unsigned long attrs)
433{
434 struct iova_domain *iovad = &domain->stream_iovad;
435 read_lock(&domain->bounce_lock);
436 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
437 (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
438 vduse_domain_bounce(domain, iova: dma_addr, size, dir: DMA_FROM_DEVICE);
439
440 vduse_domain_unmap_bounce_page(domain, iova: (u64)dma_addr, size: (u64)size);
441 read_unlock(&domain->bounce_lock);
442 vduse_domain_free_iova(iovad, iova: dma_addr, size);
443}
444
445void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
446 size_t size, dma_addr_t *dma_addr,
447 gfp_t flag, unsigned long attrs)
448{
449 struct iova_domain *iovad = &domain->consistent_iovad;
450 unsigned long limit = domain->iova_limit;
451 dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
452 void *orig = alloc_pages_exact(size, gfp_mask: flag);
453
454 if (!iova || !orig)
455 goto err;
456
457 spin_lock(lock: &domain->iotlb_lock);
458 if (vduse_iotlb_add_range(domain, start: (u64)iova, last: (u64)iova + size - 1,
459 virt_to_phys(address: orig), VHOST_MAP_RW,
460 file: domain->file, offset: (u64)iova)) {
461 spin_unlock(lock: &domain->iotlb_lock);
462 goto err;
463 }
464 spin_unlock(lock: &domain->iotlb_lock);
465
466 *dma_addr = iova;
467
468 return orig;
469err:
470 *dma_addr = DMA_MAPPING_ERROR;
471 if (orig)
472 free_pages_exact(virt: orig, size);
473 if (iova)
474 vduse_domain_free_iova(iovad, iova, size);
475
476 return NULL;
477}
478
479void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
480 void *vaddr, dma_addr_t dma_addr,
481 unsigned long attrs)
482{
483 struct iova_domain *iovad = &domain->consistent_iovad;
484 struct vhost_iotlb_map *map;
485 struct vdpa_map_file *map_file;
486 phys_addr_t pa;
487
488 spin_lock(lock: &domain->iotlb_lock);
489 map = vhost_iotlb_itree_first(iotlb: domain->iotlb, start: (u64)dma_addr,
490 last: (u64)dma_addr + size - 1);
491 if (WARN_ON(!map)) {
492 spin_unlock(lock: &domain->iotlb_lock);
493 return;
494 }
495 map_file = (struct vdpa_map_file *)map->opaque;
496 fput(map_file->file);
497 kfree(objp: map_file);
498 pa = map->addr;
499 vhost_iotlb_map_free(iotlb: domain->iotlb, map);
500 spin_unlock(lock: &domain->iotlb_lock);
501
502 vduse_domain_free_iova(iovad, iova: dma_addr, size);
503 free_pages_exact(phys_to_virt(address: pa), size);
504}
505
506static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
507{
508 struct vduse_iova_domain *domain = vmf->vma->vm_private_data;
509 unsigned long iova = vmf->pgoff << PAGE_SHIFT;
510 struct page *page;
511
512 if (!domain)
513 return VM_FAULT_SIGBUS;
514
515 if (iova < domain->bounce_size)
516 page = vduse_domain_get_bounce_page(domain, iova);
517 else
518 page = vduse_domain_get_coherent_page(domain, iova);
519
520 if (!page)
521 return VM_FAULT_SIGBUS;
522
523 vmf->page = page;
524
525 return 0;
526}
527
528static const struct vm_operations_struct vduse_domain_mmap_ops = {
529 .fault = vduse_domain_mmap_fault,
530};
531
532static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
533{
534 struct vduse_iova_domain *domain = file->private_data;
535
536 vm_flags_set(vma, VM_DONTDUMP | VM_DONTEXPAND);
537 vma->vm_private_data = domain;
538 vma->vm_ops = &vduse_domain_mmap_ops;
539
540 return 0;
541}
542
543static int vduse_domain_release(struct inode *inode, struct file *file)
544{
545 struct vduse_iova_domain *domain = file->private_data;
546
547 spin_lock(lock: &domain->iotlb_lock);
548 vduse_iotlb_del_range(domain, start: 0, ULLONG_MAX);
549 vduse_domain_remove_user_bounce_pages(domain);
550 vduse_domain_free_kernel_bounce_pages(domain);
551 spin_unlock(lock: &domain->iotlb_lock);
552 put_iova_domain(iovad: &domain->stream_iovad);
553 put_iova_domain(iovad: &domain->consistent_iovad);
554 vhost_iotlb_free(iotlb: domain->iotlb);
555 vfree(addr: domain->bounce_maps);
556 kfree(objp: domain);
557
558 return 0;
559}
560
561static const struct file_operations vduse_domain_fops = {
562 .owner = THIS_MODULE,
563 .mmap = vduse_domain_mmap,
564 .release = vduse_domain_release,
565};
566
567void vduse_domain_destroy(struct vduse_iova_domain *domain)
568{
569 fput(domain->file);
570}
571
572struct vduse_iova_domain *
573vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
574{
575 struct vduse_iova_domain *domain;
576 struct file *file;
577 struct vduse_bounce_map *map;
578 unsigned long pfn, bounce_pfns;
579 int ret;
580
581 bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
582 if (iova_limit <= bounce_size)
583 return NULL;
584
585 domain = kzalloc(size: sizeof(*domain), GFP_KERNEL);
586 if (!domain)
587 return NULL;
588
589 domain->iotlb = vhost_iotlb_alloc(limit: 0, flags: 0);
590 if (!domain->iotlb)
591 goto err_iotlb;
592
593 domain->iova_limit = iova_limit;
594 domain->bounce_size = PAGE_ALIGN(bounce_size);
595 domain->bounce_maps = vzalloc(size: bounce_pfns *
596 sizeof(struct vduse_bounce_map));
597 if (!domain->bounce_maps)
598 goto err_map;
599
600 for (pfn = 0; pfn < bounce_pfns; pfn++) {
601 map = &domain->bounce_maps[pfn];
602 map->orig_phys = INVALID_PHYS_ADDR;
603 }
604 file = anon_inode_getfile(name: "[vduse-domain]", fops: &vduse_domain_fops,
605 priv: domain, O_RDWR);
606 if (IS_ERR(ptr: file))
607 goto err_file;
608
609 domain->file = file;
610 rwlock_init(&domain->bounce_lock);
611 spin_lock_init(&domain->iotlb_lock);
612 init_iova_domain(iovad: &domain->stream_iovad,
613 PAGE_SIZE, IOVA_START_PFN);
614 ret = iova_domain_init_rcaches(iovad: &domain->stream_iovad);
615 if (ret)
616 goto err_iovad_stream;
617 init_iova_domain(iovad: &domain->consistent_iovad,
618 PAGE_SIZE, start_pfn: bounce_pfns);
619 ret = iova_domain_init_rcaches(iovad: &domain->consistent_iovad);
620 if (ret)
621 goto err_iovad_consistent;
622
623 return domain;
624err_iovad_consistent:
625 put_iova_domain(iovad: &domain->stream_iovad);
626err_iovad_stream:
627 fput(file);
628err_file:
629 vfree(addr: domain->bounce_maps);
630err_map:
631 vhost_iotlb_free(iotlb: domain->iotlb);
632err_iotlb:
633 kfree(objp: domain);
634 return NULL;
635}
636
637int vduse_domain_init(void)
638{
639 return iova_cache_get();
640}
641
642void vduse_domain_exit(void)
643{
644 iova_cache_put();
645}
646

source code of linux/drivers/vdpa/vdpa_user/iova_domain.c