1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * IOMMU API for s390 PCI devices |
4 | * |
5 | * Copyright IBM Corp. 2015 |
6 | * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> |
7 | */ |
8 | |
9 | #include <linux/pci.h> |
10 | #include <linux/iommu.h> |
11 | #include <linux/iommu-helper.h> |
12 | #include <linux/sizes.h> |
13 | #include <linux/rculist.h> |
14 | #include <linux/rcupdate.h> |
15 | #include <asm/pci_dma.h> |
16 | |
17 | #include "dma-iommu.h" |
18 | |
19 | static const struct iommu_ops s390_iommu_ops; |
20 | |
21 | static struct kmem_cache *dma_region_table_cache; |
22 | static struct kmem_cache *dma_page_table_cache; |
23 | |
24 | static u64 s390_iommu_aperture; |
25 | static u32 s390_iommu_aperture_factor = 1; |
26 | |
27 | struct s390_domain { |
28 | struct iommu_domain domain; |
29 | struct list_head devices; |
30 | struct zpci_iommu_ctrs ctrs; |
31 | unsigned long *dma_table; |
32 | spinlock_t list_lock; |
33 | struct rcu_head rcu; |
34 | }; |
35 | |
36 | static inline unsigned int calc_rtx(dma_addr_t ptr) |
37 | { |
38 | return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK; |
39 | } |
40 | |
41 | static inline unsigned int calc_sx(dma_addr_t ptr) |
42 | { |
43 | return ((unsigned long)ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK; |
44 | } |
45 | |
46 | static inline unsigned int calc_px(dma_addr_t ptr) |
47 | { |
48 | return ((unsigned long)ptr >> PAGE_SHIFT) & ZPCI_PT_MASK; |
49 | } |
50 | |
51 | static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa) |
52 | { |
53 | *entry &= ZPCI_PTE_FLAG_MASK; |
54 | *entry |= (pfaa & ZPCI_PTE_ADDR_MASK); |
55 | } |
56 | |
57 | static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto) |
58 | { |
59 | *entry &= ZPCI_RTE_FLAG_MASK; |
60 | *entry |= (sto & ZPCI_RTE_ADDR_MASK); |
61 | *entry |= ZPCI_TABLE_TYPE_RTX; |
62 | } |
63 | |
64 | static inline void set_st_pto(unsigned long *entry, phys_addr_t pto) |
65 | { |
66 | *entry &= ZPCI_STE_FLAG_MASK; |
67 | *entry |= (pto & ZPCI_STE_ADDR_MASK); |
68 | *entry |= ZPCI_TABLE_TYPE_SX; |
69 | } |
70 | |
71 | static inline void validate_rt_entry(unsigned long *entry) |
72 | { |
73 | *entry &= ~ZPCI_TABLE_VALID_MASK; |
74 | *entry &= ~ZPCI_TABLE_OFFSET_MASK; |
75 | *entry |= ZPCI_TABLE_VALID; |
76 | *entry |= ZPCI_TABLE_LEN_RTX; |
77 | } |
78 | |
79 | static inline void validate_st_entry(unsigned long *entry) |
80 | { |
81 | *entry &= ~ZPCI_TABLE_VALID_MASK; |
82 | *entry |= ZPCI_TABLE_VALID; |
83 | } |
84 | |
85 | static inline void invalidate_pt_entry(unsigned long *entry) |
86 | { |
87 | WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID); |
88 | *entry &= ~ZPCI_PTE_VALID_MASK; |
89 | *entry |= ZPCI_PTE_INVALID; |
90 | } |
91 | |
92 | static inline void validate_pt_entry(unsigned long *entry) |
93 | { |
94 | WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID); |
95 | *entry &= ~ZPCI_PTE_VALID_MASK; |
96 | *entry |= ZPCI_PTE_VALID; |
97 | } |
98 | |
99 | static inline void entry_set_protected(unsigned long *entry) |
100 | { |
101 | *entry &= ~ZPCI_TABLE_PROT_MASK; |
102 | *entry |= ZPCI_TABLE_PROTECTED; |
103 | } |
104 | |
105 | static inline void entry_clr_protected(unsigned long *entry) |
106 | { |
107 | *entry &= ~ZPCI_TABLE_PROT_MASK; |
108 | *entry |= ZPCI_TABLE_UNPROTECTED; |
109 | } |
110 | |
111 | static inline int reg_entry_isvalid(unsigned long entry) |
112 | { |
113 | return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID; |
114 | } |
115 | |
116 | static inline int pt_entry_isvalid(unsigned long entry) |
117 | { |
118 | return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; |
119 | } |
120 | |
121 | static inline unsigned long *get_rt_sto(unsigned long entry) |
122 | { |
123 | if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) |
124 | return phys_to_virt(address: entry & ZPCI_RTE_ADDR_MASK); |
125 | else |
126 | return NULL; |
127 | } |
128 | |
129 | static inline unsigned long *get_st_pto(unsigned long entry) |
130 | { |
131 | if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX) |
132 | return phys_to_virt(address: entry & ZPCI_STE_ADDR_MASK); |
133 | else |
134 | return NULL; |
135 | } |
136 | |
137 | static int __init dma_alloc_cpu_table_caches(void) |
138 | { |
139 | dma_region_table_cache = kmem_cache_create(name: "PCI_DMA_region_tables" , |
140 | size: ZPCI_TABLE_SIZE, |
141 | align: ZPCI_TABLE_ALIGN, |
142 | flags: 0, NULL); |
143 | if (!dma_region_table_cache) |
144 | return -ENOMEM; |
145 | |
146 | dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables" , |
147 | ZPCI_PT_SIZE, |
148 | ZPCI_PT_ALIGN, |
149 | 0, NULL); |
150 | if (!dma_page_table_cache) { |
151 | kmem_cache_destroy(s: dma_region_table_cache); |
152 | return -ENOMEM; |
153 | } |
154 | return 0; |
155 | } |
156 | |
157 | static unsigned long *dma_alloc_cpu_table(gfp_t gfp) |
158 | { |
159 | unsigned long *table, *entry; |
160 | |
161 | table = kmem_cache_alloc(cachep: dma_region_table_cache, flags: gfp); |
162 | if (!table) |
163 | return NULL; |
164 | |
165 | for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) |
166 | *entry = ZPCI_TABLE_INVALID; |
167 | return table; |
168 | } |
169 | |
170 | static void dma_free_cpu_table(void *table) |
171 | { |
172 | kmem_cache_free(s: dma_region_table_cache, objp: table); |
173 | } |
174 | |
175 | static void dma_free_page_table(void *table) |
176 | { |
177 | kmem_cache_free(s: dma_page_table_cache, objp: table); |
178 | } |
179 | |
180 | static void dma_free_seg_table(unsigned long entry) |
181 | { |
182 | unsigned long *sto = get_rt_sto(entry); |
183 | int sx; |
184 | |
185 | for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) |
186 | if (reg_entry_isvalid(sto[sx])) |
187 | dma_free_page_table(get_st_pto(sto[sx])); |
188 | |
189 | dma_free_cpu_table(table: sto); |
190 | } |
191 | |
192 | static void dma_cleanup_tables(unsigned long *table) |
193 | { |
194 | int rtx; |
195 | |
196 | if (!table) |
197 | return; |
198 | |
199 | for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) |
200 | if (reg_entry_isvalid(table[rtx])) |
201 | dma_free_seg_table(table[rtx]); |
202 | |
203 | dma_free_cpu_table(table); |
204 | } |
205 | |
206 | static unsigned long *dma_alloc_page_table(gfp_t gfp) |
207 | { |
208 | unsigned long *table, *entry; |
209 | |
210 | table = kmem_cache_alloc(cachep: dma_page_table_cache, flags: gfp); |
211 | if (!table) |
212 | return NULL; |
213 | |
214 | for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) |
215 | *entry = ZPCI_PTE_INVALID; |
216 | return table; |
217 | } |
218 | |
219 | static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp) |
220 | { |
221 | unsigned long old_rte, rte; |
222 | unsigned long *sto; |
223 | |
224 | rte = READ_ONCE(*rtep); |
225 | if (reg_entry_isvalid(entry: rte)) { |
226 | sto = get_rt_sto(entry: rte); |
227 | } else { |
228 | sto = dma_alloc_cpu_table(gfp); |
229 | if (!sto) |
230 | return NULL; |
231 | |
232 | set_rt_sto(entry: &rte, virt_to_phys(address: sto)); |
233 | validate_rt_entry(entry: &rte); |
234 | entry_clr_protected(entry: &rte); |
235 | |
236 | old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte); |
237 | if (old_rte != ZPCI_TABLE_INVALID) { |
238 | /* Somone else was faster, use theirs */ |
239 | dma_free_cpu_table(table: sto); |
240 | sto = get_rt_sto(entry: old_rte); |
241 | } |
242 | } |
243 | return sto; |
244 | } |
245 | |
246 | static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp) |
247 | { |
248 | unsigned long old_ste, ste; |
249 | unsigned long *pto; |
250 | |
251 | ste = READ_ONCE(*step); |
252 | if (reg_entry_isvalid(entry: ste)) { |
253 | pto = get_st_pto(entry: ste); |
254 | } else { |
255 | pto = dma_alloc_page_table(gfp); |
256 | if (!pto) |
257 | return NULL; |
258 | set_st_pto(entry: &ste, virt_to_phys(address: pto)); |
259 | validate_st_entry(entry: &ste); |
260 | entry_clr_protected(entry: &ste); |
261 | |
262 | old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste); |
263 | if (old_ste != ZPCI_TABLE_INVALID) { |
264 | /* Somone else was faster, use theirs */ |
265 | dma_free_page_table(table: pto); |
266 | pto = get_st_pto(entry: old_ste); |
267 | } |
268 | } |
269 | return pto; |
270 | } |
271 | |
272 | static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp) |
273 | { |
274 | unsigned long *sto, *pto; |
275 | unsigned int rtx, sx, px; |
276 | |
277 | rtx = calc_rtx(ptr: dma_addr); |
278 | sto = dma_get_seg_table_origin(rtep: &rto[rtx], gfp); |
279 | if (!sto) |
280 | return NULL; |
281 | |
282 | sx = calc_sx(ptr: dma_addr); |
283 | pto = dma_get_page_table_origin(step: &sto[sx], gfp); |
284 | if (!pto) |
285 | return NULL; |
286 | |
287 | px = calc_px(ptr: dma_addr); |
288 | return &pto[px]; |
289 | } |
290 | |
291 | static void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags) |
292 | { |
293 | unsigned long pte; |
294 | |
295 | pte = READ_ONCE(*ptep); |
296 | if (flags & ZPCI_PTE_INVALID) { |
297 | invalidate_pt_entry(entry: &pte); |
298 | } else { |
299 | set_pt_pfaa(entry: &pte, pfaa: page_addr); |
300 | validate_pt_entry(entry: &pte); |
301 | } |
302 | |
303 | if (flags & ZPCI_TABLE_PROTECTED) |
304 | entry_set_protected(entry: &pte); |
305 | else |
306 | entry_clr_protected(entry: &pte); |
307 | |
308 | xchg(ptep, pte); |
309 | } |
310 | |
311 | static struct s390_domain *to_s390_domain(struct iommu_domain *dom) |
312 | { |
313 | return container_of(dom, struct s390_domain, domain); |
314 | } |
315 | |
316 | static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap) |
317 | { |
318 | struct zpci_dev *zdev = to_zpci_dev(dev); |
319 | |
320 | switch (cap) { |
321 | case IOMMU_CAP_CACHE_COHERENCY: |
322 | return true; |
323 | case IOMMU_CAP_DEFERRED_FLUSH: |
324 | return zdev->pft != PCI_FUNC_TYPE_ISM; |
325 | default: |
326 | return false; |
327 | } |
328 | } |
329 | |
330 | static struct iommu_domain *s390_domain_alloc_paging(struct device *dev) |
331 | { |
332 | struct s390_domain *s390_domain; |
333 | |
334 | s390_domain = kzalloc(size: sizeof(*s390_domain), GFP_KERNEL); |
335 | if (!s390_domain) |
336 | return NULL; |
337 | |
338 | s390_domain->dma_table = dma_alloc_cpu_table(GFP_KERNEL); |
339 | if (!s390_domain->dma_table) { |
340 | kfree(objp: s390_domain); |
341 | return NULL; |
342 | } |
343 | s390_domain->domain.geometry.force_aperture = true; |
344 | s390_domain->domain.geometry.aperture_start = 0; |
345 | s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1; |
346 | |
347 | spin_lock_init(&s390_domain->list_lock); |
348 | INIT_LIST_HEAD_RCU(list: &s390_domain->devices); |
349 | |
350 | return &s390_domain->domain; |
351 | } |
352 | |
353 | static void s390_iommu_rcu_free_domain(struct rcu_head *head) |
354 | { |
355 | struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu); |
356 | |
357 | dma_cleanup_tables(table: s390_domain->dma_table); |
358 | kfree(objp: s390_domain); |
359 | } |
360 | |
361 | static void s390_domain_free(struct iommu_domain *domain) |
362 | { |
363 | struct s390_domain *s390_domain = to_s390_domain(dom: domain); |
364 | |
365 | rcu_read_lock(); |
366 | WARN_ON(!list_empty(&s390_domain->devices)); |
367 | rcu_read_unlock(); |
368 | |
369 | call_rcu(head: &s390_domain->rcu, func: s390_iommu_rcu_free_domain); |
370 | } |
371 | |
372 | static void s390_iommu_detach_device(struct iommu_domain *domain, |
373 | struct device *dev) |
374 | { |
375 | struct s390_domain *s390_domain = to_s390_domain(dom: domain); |
376 | struct zpci_dev *zdev = to_zpci_dev(dev); |
377 | unsigned long flags; |
378 | |
379 | spin_lock_irqsave(&s390_domain->list_lock, flags); |
380 | list_del_rcu(entry: &zdev->iommu_list); |
381 | spin_unlock_irqrestore(lock: &s390_domain->list_lock, flags); |
382 | |
383 | zpci_unregister_ioat(zdev, 0); |
384 | zdev->s390_domain = NULL; |
385 | zdev->dma_table = NULL; |
386 | } |
387 | |
388 | static int s390_iommu_attach_device(struct iommu_domain *domain, |
389 | struct device *dev) |
390 | { |
391 | struct s390_domain *s390_domain = to_s390_domain(dom: domain); |
392 | struct zpci_dev *zdev = to_zpci_dev(dev); |
393 | unsigned long flags; |
394 | u8 status; |
395 | int cc; |
396 | |
397 | if (!zdev) |
398 | return -ENODEV; |
399 | |
400 | if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma || |
401 | domain->geometry.aperture_end < zdev->start_dma)) |
402 | return -EINVAL; |
403 | |
404 | if (zdev->s390_domain) |
405 | s390_iommu_detach_device(domain: &zdev->s390_domain->domain, dev); |
406 | |
407 | cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, |
408 | virt_to_phys(address: s390_domain->dma_table), &status); |
409 | /* |
410 | * If the device is undergoing error recovery the reset code |
411 | * will re-establish the new domain. |
412 | */ |
413 | if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) |
414 | return -EIO; |
415 | |
416 | zdev->dma_table = s390_domain->dma_table; |
417 | zdev->s390_domain = s390_domain; |
418 | |
419 | spin_lock_irqsave(&s390_domain->list_lock, flags); |
420 | list_add_rcu(new: &zdev->iommu_list, head: &s390_domain->devices); |
421 | spin_unlock_irqrestore(lock: &s390_domain->list_lock, flags); |
422 | |
423 | return 0; |
424 | } |
425 | |
426 | static void s390_iommu_get_resv_regions(struct device *dev, |
427 | struct list_head *list) |
428 | { |
429 | struct zpci_dev *zdev = to_zpci_dev(dev); |
430 | struct iommu_resv_region *region; |
431 | |
432 | if (zdev->start_dma) { |
433 | region = iommu_alloc_resv_region(start: 0, length: zdev->start_dma, prot: 0, |
434 | type: IOMMU_RESV_RESERVED, GFP_KERNEL); |
435 | if (!region) |
436 | return; |
437 | list_add_tail(new: ®ion->list, head: list); |
438 | } |
439 | |
440 | if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) { |
441 | region = iommu_alloc_resv_region(zdev->end_dma + 1, |
442 | ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1, |
443 | 0, IOMMU_RESV_RESERVED, GFP_KERNEL); |
444 | if (!region) |
445 | return; |
446 | list_add_tail(new: ®ion->list, head: list); |
447 | } |
448 | } |
449 | |
450 | static struct iommu_device *s390_iommu_probe_device(struct device *dev) |
451 | { |
452 | struct zpci_dev *zdev; |
453 | |
454 | if (!dev_is_pci(dev)) |
455 | return ERR_PTR(error: -ENODEV); |
456 | |
457 | zdev = to_zpci_dev(dev); |
458 | |
459 | if (zdev->start_dma > zdev->end_dma || |
460 | zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1) |
461 | return ERR_PTR(error: -EINVAL); |
462 | |
463 | if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1) |
464 | zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1; |
465 | |
466 | if (zdev->tlb_refresh) |
467 | dev->iommu->shadow_on_flush = 1; |
468 | |
469 | return &zdev->iommu_dev; |
470 | } |
471 | |
472 | static void s390_iommu_release_device(struct device *dev) |
473 | { |
474 | struct zpci_dev *zdev = to_zpci_dev(dev); |
475 | |
476 | /* |
477 | * release_device is expected to detach any domain currently attached |
478 | * to the device, but keep it attached to other devices in the group. |
479 | */ |
480 | if (zdev) |
481 | s390_iommu_detach_device(domain: &zdev->s390_domain->domain, dev); |
482 | } |
483 | |
484 | static int zpci_refresh_all(struct zpci_dev *zdev) |
485 | { |
486 | return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma, |
487 | zdev->end_dma - zdev->start_dma + 1); |
488 | } |
489 | |
490 | static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain) |
491 | { |
492 | struct s390_domain *s390_domain = to_s390_domain(dom: domain); |
493 | struct zpci_dev *zdev; |
494 | |
495 | rcu_read_lock(); |
496 | list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { |
497 | atomic64_inc(v: &s390_domain->ctrs.global_rpcits); |
498 | zpci_refresh_all(zdev); |
499 | } |
500 | rcu_read_unlock(); |
501 | } |
502 | |
503 | static void s390_iommu_iotlb_sync(struct iommu_domain *domain, |
504 | struct iommu_iotlb_gather *gather) |
505 | { |
506 | struct s390_domain *s390_domain = to_s390_domain(dom: domain); |
507 | size_t size = gather->end - gather->start + 1; |
508 | struct zpci_dev *zdev; |
509 | |
510 | /* If gather was never added to there is nothing to flush */ |
511 | if (!gather->end) |
512 | return; |
513 | |
514 | rcu_read_lock(); |
515 | list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { |
516 | atomic64_inc(v: &s390_domain->ctrs.sync_rpcits); |
517 | zpci_refresh_trans((u64)zdev->fh << 32, gather->start, |
518 | size); |
519 | } |
520 | rcu_read_unlock(); |
521 | } |
522 | |
523 | static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain, |
524 | unsigned long iova, size_t size) |
525 | { |
526 | struct s390_domain *s390_domain = to_s390_domain(dom: domain); |
527 | struct zpci_dev *zdev; |
528 | int ret = 0; |
529 | |
530 | rcu_read_lock(); |
531 | list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { |
532 | if (!zdev->tlb_refresh) |
533 | continue; |
534 | atomic64_inc(v: &s390_domain->ctrs.sync_map_rpcits); |
535 | ret = zpci_refresh_trans((u64)zdev->fh << 32, |
536 | iova, size); |
537 | /* |
538 | * let the hypervisor discover invalidated entries |
539 | * allowing it to free IOVAs and unpin pages |
540 | */ |
541 | if (ret == -ENOMEM) { |
542 | ret = zpci_refresh_all(zdev); |
543 | if (ret) |
544 | break; |
545 | } |
546 | } |
547 | rcu_read_unlock(); |
548 | |
549 | return ret; |
550 | } |
551 | |
552 | static int s390_iommu_validate_trans(struct s390_domain *s390_domain, |
553 | phys_addr_t pa, dma_addr_t dma_addr, |
554 | unsigned long nr_pages, int flags, |
555 | gfp_t gfp) |
556 | { |
557 | phys_addr_t page_addr = pa & PAGE_MASK; |
558 | unsigned long *entry; |
559 | unsigned long i; |
560 | int rc; |
561 | |
562 | for (i = 0; i < nr_pages; i++) { |
563 | entry = dma_walk_cpu_trans(rto: s390_domain->dma_table, dma_addr, |
564 | gfp); |
565 | if (unlikely(!entry)) { |
566 | rc = -ENOMEM; |
567 | goto undo_cpu_trans; |
568 | } |
569 | dma_update_cpu_trans(ptep: entry, page_addr, flags); |
570 | page_addr += PAGE_SIZE; |
571 | dma_addr += PAGE_SIZE; |
572 | } |
573 | |
574 | return 0; |
575 | |
576 | undo_cpu_trans: |
577 | while (i-- > 0) { |
578 | dma_addr -= PAGE_SIZE; |
579 | entry = dma_walk_cpu_trans(rto: s390_domain->dma_table, |
580 | dma_addr, gfp); |
581 | if (!entry) |
582 | break; |
583 | dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); |
584 | } |
585 | |
586 | return rc; |
587 | } |
588 | |
589 | static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, |
590 | dma_addr_t dma_addr, unsigned long nr_pages) |
591 | { |
592 | unsigned long *entry; |
593 | unsigned long i; |
594 | int rc = 0; |
595 | |
596 | for (i = 0; i < nr_pages; i++) { |
597 | entry = dma_walk_cpu_trans(rto: s390_domain->dma_table, dma_addr, |
598 | GFP_ATOMIC); |
599 | if (unlikely(!entry)) { |
600 | rc = -EINVAL; |
601 | break; |
602 | } |
603 | dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); |
604 | dma_addr += PAGE_SIZE; |
605 | } |
606 | |
607 | return rc; |
608 | } |
609 | |
610 | static int s390_iommu_map_pages(struct iommu_domain *domain, |
611 | unsigned long iova, phys_addr_t paddr, |
612 | size_t pgsize, size_t pgcount, |
613 | int prot, gfp_t gfp, size_t *mapped) |
614 | { |
615 | struct s390_domain *s390_domain = to_s390_domain(dom: domain); |
616 | size_t size = pgcount << __ffs(pgsize); |
617 | int flags = ZPCI_PTE_VALID, rc = 0; |
618 | |
619 | if (pgsize != SZ_4K) |
620 | return -EINVAL; |
621 | |
622 | if (iova < s390_domain->domain.geometry.aperture_start || |
623 | (iova + size - 1) > s390_domain->domain.geometry.aperture_end) |
624 | return -EINVAL; |
625 | |
626 | if (!IS_ALIGNED(iova | paddr, pgsize)) |
627 | return -EINVAL; |
628 | |
629 | if (!(prot & IOMMU_WRITE)) |
630 | flags |= ZPCI_TABLE_PROTECTED; |
631 | |
632 | rc = s390_iommu_validate_trans(s390_domain, pa: paddr, dma_addr: iova, |
633 | nr_pages: pgcount, flags, gfp); |
634 | if (!rc) { |
635 | *mapped = size; |
636 | atomic64_add(i: pgcount, v: &s390_domain->ctrs.mapped_pages); |
637 | } |
638 | |
639 | return rc; |
640 | } |
641 | |
642 | static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, |
643 | dma_addr_t iova) |
644 | { |
645 | struct s390_domain *s390_domain = to_s390_domain(dom: domain); |
646 | unsigned long *rto, *sto, *pto; |
647 | unsigned long ste, pte, rte; |
648 | unsigned int rtx, sx, px; |
649 | phys_addr_t phys = 0; |
650 | |
651 | if (iova < domain->geometry.aperture_start || |
652 | iova > domain->geometry.aperture_end) |
653 | return 0; |
654 | |
655 | rtx = calc_rtx(ptr: iova); |
656 | sx = calc_sx(ptr: iova); |
657 | px = calc_px(ptr: iova); |
658 | rto = s390_domain->dma_table; |
659 | |
660 | rte = READ_ONCE(rto[rtx]); |
661 | if (reg_entry_isvalid(entry: rte)) { |
662 | sto = get_rt_sto(entry: rte); |
663 | ste = READ_ONCE(sto[sx]); |
664 | if (reg_entry_isvalid(entry: ste)) { |
665 | pto = get_st_pto(entry: ste); |
666 | pte = READ_ONCE(pto[px]); |
667 | if (pt_entry_isvalid(pte)) |
668 | phys = pte & ZPCI_PTE_ADDR_MASK; |
669 | } |
670 | } |
671 | |
672 | return phys; |
673 | } |
674 | |
675 | static size_t s390_iommu_unmap_pages(struct iommu_domain *domain, |
676 | unsigned long iova, |
677 | size_t pgsize, size_t pgcount, |
678 | struct iommu_iotlb_gather *gather) |
679 | { |
680 | struct s390_domain *s390_domain = to_s390_domain(dom: domain); |
681 | size_t size = pgcount << __ffs(pgsize); |
682 | int rc; |
683 | |
684 | if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start || |
685 | (iova + size - 1) > s390_domain->domain.geometry.aperture_end)) |
686 | return 0; |
687 | |
688 | rc = s390_iommu_invalidate_trans(s390_domain, dma_addr: iova, nr_pages: pgcount); |
689 | if (rc) |
690 | return 0; |
691 | |
692 | iommu_iotlb_gather_add_range(gather, iova, size); |
693 | atomic64_add(i: pgcount, v: &s390_domain->ctrs.unmapped_pages); |
694 | |
695 | return size; |
696 | } |
697 | |
698 | static void s390_iommu_probe_finalize(struct device *dev) |
699 | { |
700 | iommu_setup_dma_ops(dev, dma_base: 0, U64_MAX); |
701 | } |
702 | |
703 | struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev) |
704 | { |
705 | if (!zdev || !zdev->s390_domain) |
706 | return NULL; |
707 | return &zdev->s390_domain->ctrs; |
708 | } |
709 | |
710 | int zpci_init_iommu(struct zpci_dev *zdev) |
711 | { |
712 | u64 aperture_size; |
713 | int rc = 0; |
714 | |
715 | rc = iommu_device_sysfs_add(iommu: &zdev->iommu_dev, NULL, NULL, |
716 | fmt: "s390-iommu.%08x" , zdev->fid); |
717 | if (rc) |
718 | goto out_err; |
719 | |
720 | rc = iommu_device_register(iommu: &zdev->iommu_dev, ops: &s390_iommu_ops, NULL); |
721 | if (rc) |
722 | goto out_sysfs; |
723 | |
724 | zdev->start_dma = PAGE_ALIGN(zdev->start_dma); |
725 | aperture_size = min3(s390_iommu_aperture, |
726 | ZPCI_TABLE_SIZE_RT - zdev->start_dma, |
727 | zdev->end_dma - zdev->start_dma + 1); |
728 | zdev->end_dma = zdev->start_dma + aperture_size - 1; |
729 | |
730 | return 0; |
731 | |
732 | out_sysfs: |
733 | iommu_device_sysfs_remove(iommu: &zdev->iommu_dev); |
734 | |
735 | out_err: |
736 | return rc; |
737 | } |
738 | |
739 | void zpci_destroy_iommu(struct zpci_dev *zdev) |
740 | { |
741 | iommu_device_unregister(iommu: &zdev->iommu_dev); |
742 | iommu_device_sysfs_remove(iommu: &zdev->iommu_dev); |
743 | } |
744 | |
745 | static int __init s390_iommu_setup(char *str) |
746 | { |
747 | if (!strcmp(str, "strict" )) { |
748 | pr_warn("s390_iommu=strict deprecated; use iommu.strict=1 instead\n" ); |
749 | iommu_set_dma_strict(); |
750 | } |
751 | return 1; |
752 | } |
753 | |
754 | __setup("s390_iommu=" , s390_iommu_setup); |
755 | |
756 | static int __init s390_iommu_aperture_setup(char *str) |
757 | { |
758 | if (kstrtou32(s: str, base: 10, res: &s390_iommu_aperture_factor)) |
759 | s390_iommu_aperture_factor = 1; |
760 | return 1; |
761 | } |
762 | |
763 | __setup("s390_iommu_aperture=" , s390_iommu_aperture_setup); |
764 | |
765 | static int __init s390_iommu_init(void) |
766 | { |
767 | int rc; |
768 | |
769 | iommu_dma_forcedac = true; |
770 | s390_iommu_aperture = (u64)virt_to_phys(address: high_memory); |
771 | if (!s390_iommu_aperture_factor) |
772 | s390_iommu_aperture = ULONG_MAX; |
773 | else |
774 | s390_iommu_aperture *= s390_iommu_aperture_factor; |
775 | |
776 | rc = dma_alloc_cpu_table_caches(); |
777 | if (rc) |
778 | return rc; |
779 | |
780 | return rc; |
781 | } |
782 | subsys_initcall(s390_iommu_init); |
783 | |
784 | static const struct iommu_ops s390_iommu_ops = { |
785 | .capable = s390_iommu_capable, |
786 | .domain_alloc_paging = s390_domain_alloc_paging, |
787 | .probe_device = s390_iommu_probe_device, |
788 | .probe_finalize = s390_iommu_probe_finalize, |
789 | .release_device = s390_iommu_release_device, |
790 | .device_group = generic_device_group, |
791 | .pgsize_bitmap = SZ_4K, |
792 | .get_resv_regions = s390_iommu_get_resv_regions, |
793 | .default_domain_ops = &(const struct iommu_domain_ops) { |
794 | .attach_dev = s390_iommu_attach_device, |
795 | .map_pages = s390_iommu_map_pages, |
796 | .unmap_pages = s390_iommu_unmap_pages, |
797 | .flush_iotlb_all = s390_iommu_flush_iotlb_all, |
798 | .iotlb_sync = s390_iommu_iotlb_sync, |
799 | .iotlb_sync_map = s390_iommu_iotlb_sync_map, |
800 | .iova_to_phys = s390_iommu_iova_to_phys, |
801 | .free = s390_domain_free, |
802 | } |
803 | }; |
804 | |