1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation |
4 | * |
5 | * Rewrite, cleanup: |
6 | * |
7 | * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation |
8 | * Copyright (C) 2006 Olof Johansson <olof@lixom.net> |
9 | * |
10 | * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR. |
11 | */ |
12 | |
13 | #include <linux/init.h> |
14 | #include <linux/types.h> |
15 | #include <linux/slab.h> |
16 | #include <linux/mm.h> |
17 | #include <linux/memblock.h> |
18 | #include <linux/spinlock.h> |
19 | #include <linux/string.h> |
20 | #include <linux/pci.h> |
21 | #include <linux/dma-mapping.h> |
22 | #include <linux/crash_dump.h> |
23 | #include <linux/memory.h> |
24 | #include <linux/of.h> |
25 | #include <linux/of_address.h> |
26 | #include <linux/iommu.h> |
27 | #include <linux/rculist.h> |
28 | #include <asm/io.h> |
29 | #include <asm/prom.h> |
30 | #include <asm/rtas.h> |
31 | #include <asm/iommu.h> |
32 | #include <asm/pci-bridge.h> |
33 | #include <asm/machdep.h> |
34 | #include <asm/firmware.h> |
35 | #include <asm/tce.h> |
36 | #include <asm/ppc-pci.h> |
37 | #include <asm/udbg.h> |
38 | #include <asm/mmzone.h> |
39 | #include <asm/plpar_wrappers.h> |
40 | |
41 | #include "pseries.h" |
42 | |
43 | enum { |
44 | DDW_QUERY_PE_DMA_WIN = 0, |
45 | DDW_CREATE_PE_DMA_WIN = 1, |
46 | DDW_REMOVE_PE_DMA_WIN = 2, |
47 | |
48 | DDW_APPLICABLE_SIZE |
49 | }; |
50 | |
51 | enum { |
52 | DDW_EXT_SIZE = 0, |
53 | DDW_EXT_RESET_DMA_WIN = 1, |
54 | DDW_EXT_QUERY_OUT_SIZE = 2 |
55 | }; |
56 | |
57 | static struct iommu_table *iommu_pseries_alloc_table(int node) |
58 | { |
59 | struct iommu_table *tbl; |
60 | |
61 | tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); |
62 | if (!tbl) |
63 | return NULL; |
64 | |
65 | INIT_LIST_HEAD_RCU(list: &tbl->it_group_list); |
66 | kref_init(kref: &tbl->it_kref); |
67 | return tbl; |
68 | } |
69 | |
70 | static struct iommu_table_group *iommu_pseries_alloc_group(int node) |
71 | { |
72 | struct iommu_table_group *table_group; |
73 | |
74 | table_group = kzalloc_node(sizeof(*table_group), GFP_KERNEL, node); |
75 | if (!table_group) |
76 | return NULL; |
77 | |
78 | #ifdef CONFIG_IOMMU_API |
79 | table_group->ops = &spapr_tce_table_group_ops; |
80 | table_group->pgsizes = SZ_4K; |
81 | #endif |
82 | |
83 | table_group->tables[0] = iommu_pseries_alloc_table(node); |
84 | if (table_group->tables[0]) |
85 | return table_group; |
86 | |
87 | kfree(objp: table_group); |
88 | return NULL; |
89 | } |
90 | |
91 | static void iommu_pseries_free_group(struct iommu_table_group *table_group, |
92 | const char *node_name) |
93 | { |
94 | if (!table_group) |
95 | return; |
96 | |
97 | #ifdef CONFIG_IOMMU_API |
98 | if (table_group->group) { |
99 | iommu_group_put(group: table_group->group); |
100 | BUG_ON(table_group->group); |
101 | } |
102 | #endif |
103 | |
104 | /* Default DMA window table is at index 0, while DDW at 1. SR-IOV |
105 | * adapters only have table on index 1. |
106 | */ |
107 | if (table_group->tables[0]) |
108 | iommu_tce_table_put(table_group->tables[0]); |
109 | |
110 | if (table_group->tables[1]) |
111 | iommu_tce_table_put(table_group->tables[1]); |
112 | |
113 | kfree(objp: table_group); |
114 | } |
115 | |
116 | static int tce_build_pSeries(struct iommu_table *tbl, long index, |
117 | long npages, unsigned long uaddr, |
118 | enum dma_data_direction direction, |
119 | unsigned long attrs) |
120 | { |
121 | u64 proto_tce; |
122 | __be64 *tcep; |
123 | u64 rpn; |
124 | const unsigned long tceshift = tbl->it_page_shift; |
125 | const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl); |
126 | |
127 | proto_tce = TCE_PCI_READ; // Read allowed |
128 | |
129 | if (direction != DMA_TO_DEVICE) |
130 | proto_tce |= TCE_PCI_WRITE; |
131 | |
132 | tcep = ((__be64 *)tbl->it_base) + index; |
133 | |
134 | while (npages--) { |
135 | /* can't move this out since we might cross MEMBLOCK boundary */ |
136 | rpn = __pa(uaddr) >> tceshift; |
137 | *tcep = cpu_to_be64(proto_tce | rpn << tceshift); |
138 | |
139 | uaddr += pagesize; |
140 | tcep++; |
141 | } |
142 | return 0; |
143 | } |
144 | |
145 | |
146 | static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) |
147 | { |
148 | __be64 *tcep; |
149 | |
150 | tcep = ((__be64 *)tbl->it_base) + index; |
151 | |
152 | while (npages--) |
153 | *(tcep++) = 0; |
154 | } |
155 | |
156 | static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) |
157 | { |
158 | __be64 *tcep; |
159 | |
160 | tcep = ((__be64 *)tbl->it_base) + index; |
161 | |
162 | return be64_to_cpu(*tcep); |
163 | } |
164 | |
165 | static void tce_free_pSeriesLP(unsigned long liobn, long, long, long); |
166 | static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); |
167 | |
168 | static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, |
169 | long npages, unsigned long uaddr, |
170 | enum dma_data_direction direction, |
171 | unsigned long attrs) |
172 | { |
173 | u64 rc = 0; |
174 | u64 proto_tce, tce; |
175 | u64 rpn; |
176 | int ret = 0; |
177 | long tcenum_start = tcenum, npages_start = npages; |
178 | |
179 | rpn = __pa(uaddr) >> tceshift; |
180 | proto_tce = TCE_PCI_READ; |
181 | if (direction != DMA_TO_DEVICE) |
182 | proto_tce |= TCE_PCI_WRITE; |
183 | |
184 | while (npages--) { |
185 | tce = proto_tce | rpn << tceshift; |
186 | rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce); |
187 | |
188 | if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { |
189 | ret = (int)rc; |
190 | tce_free_pSeriesLP(liobn, tcenum_start, tceshift, |
191 | (npages_start - (npages + 1))); |
192 | break; |
193 | } |
194 | |
195 | if (rc && printk_ratelimit()) { |
196 | printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n" , rc); |
197 | printk("\tindex = 0x%llx\n" , (u64)liobn); |
198 | printk("\ttcenum = 0x%llx\n" , (u64)tcenum); |
199 | printk("\ttce val = 0x%llx\n" , tce ); |
200 | dump_stack(); |
201 | } |
202 | |
203 | tcenum++; |
204 | rpn++; |
205 | } |
206 | return ret; |
207 | } |
208 | |
209 | static DEFINE_PER_CPU(__be64 *, tce_page); |
210 | |
211 | static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, |
212 | long npages, unsigned long uaddr, |
213 | enum dma_data_direction direction, |
214 | unsigned long attrs) |
215 | { |
216 | u64 rc = 0; |
217 | u64 proto_tce; |
218 | __be64 *tcep; |
219 | u64 rpn; |
220 | long l, limit; |
221 | long tcenum_start = tcenum, npages_start = npages; |
222 | int ret = 0; |
223 | unsigned long flags; |
224 | const unsigned long tceshift = tbl->it_page_shift; |
225 | |
226 | if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) { |
227 | return tce_build_pSeriesLP(liobn: tbl->it_index, tcenum, |
228 | tceshift, npages, uaddr, |
229 | direction, attrs); |
230 | } |
231 | |
232 | local_irq_save(flags); /* to protect tcep and the page behind it */ |
233 | |
234 | tcep = __this_cpu_read(tce_page); |
235 | |
236 | /* This is safe to do since interrupts are off when we're called |
237 | * from iommu_alloc{,_sg}() |
238 | */ |
239 | if (!tcep) { |
240 | tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
241 | /* If allocation fails, fall back to the loop implementation */ |
242 | if (!tcep) { |
243 | local_irq_restore(flags); |
244 | return tce_build_pSeriesLP(liobn: tbl->it_index, tcenum, |
245 | tceshift, |
246 | npages, uaddr, direction, attrs); |
247 | } |
248 | __this_cpu_write(tce_page, tcep); |
249 | } |
250 | |
251 | rpn = __pa(uaddr) >> tceshift; |
252 | proto_tce = TCE_PCI_READ; |
253 | if (direction != DMA_TO_DEVICE) |
254 | proto_tce |= TCE_PCI_WRITE; |
255 | |
256 | /* We can map max one pageful of TCEs at a time */ |
257 | do { |
258 | /* |
259 | * Set up the page with TCE data, looping through and setting |
260 | * the values. |
261 | */ |
262 | limit = min_t(long, npages, 4096 / TCE_ENTRY_SIZE); |
263 | |
264 | for (l = 0; l < limit; l++) { |
265 | tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift); |
266 | rpn++; |
267 | } |
268 | |
269 | rc = plpar_tce_put_indirect((u64)tbl->it_index, |
270 | (u64)tcenum << tceshift, |
271 | (u64)__pa(tcep), |
272 | limit); |
273 | |
274 | npages -= limit; |
275 | tcenum += limit; |
276 | } while (npages > 0 && !rc); |
277 | |
278 | local_irq_restore(flags); |
279 | |
280 | if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { |
281 | ret = (int)rc; |
282 | tce_freemulti_pSeriesLP(tbl, tcenum_start, |
283 | (npages_start - (npages + limit))); |
284 | return ret; |
285 | } |
286 | |
287 | if (rc && printk_ratelimit()) { |
288 | printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n" , rc); |
289 | printk("\tindex = 0x%llx\n" , (u64)tbl->it_index); |
290 | printk("\tnpages = 0x%llx\n" , (u64)npages); |
291 | printk("\ttce[0] val = 0x%llx\n" , tcep[0]); |
292 | dump_stack(); |
293 | } |
294 | return ret; |
295 | } |
296 | |
297 | static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, |
298 | long npages) |
299 | { |
300 | u64 rc; |
301 | |
302 | while (npages--) { |
303 | rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, 0); |
304 | |
305 | if (rc && printk_ratelimit()) { |
306 | printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n" , rc); |
307 | printk("\tindex = 0x%llx\n" , (u64)liobn); |
308 | printk("\ttcenum = 0x%llx\n" , (u64)tcenum); |
309 | dump_stack(); |
310 | } |
311 | |
312 | tcenum++; |
313 | } |
314 | } |
315 | |
316 | |
317 | static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) |
318 | { |
319 | u64 rc; |
320 | long rpages = npages; |
321 | unsigned long limit; |
322 | |
323 | if (!firmware_has_feature(FW_FEATURE_STUFF_TCE)) |
324 | return tce_free_pSeriesLP(liobn: tbl->it_index, tcenum, |
325 | tceshift: tbl->it_page_shift, npages); |
326 | |
327 | do { |
328 | limit = min_t(unsigned long, rpages, 512); |
329 | |
330 | rc = plpar_tce_stuff((u64)tbl->it_index, |
331 | (u64)tcenum << tbl->it_page_shift, 0, limit); |
332 | |
333 | rpages -= limit; |
334 | tcenum += limit; |
335 | } while (rpages > 0 && !rc); |
336 | |
337 | if (rc && printk_ratelimit()) { |
338 | printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n" ); |
339 | printk("\trc = %lld\n" , rc); |
340 | printk("\tindex = 0x%llx\n" , (u64)tbl->it_index); |
341 | printk("\tnpages = 0x%llx\n" , (u64)npages); |
342 | dump_stack(); |
343 | } |
344 | } |
345 | |
346 | static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum) |
347 | { |
348 | u64 rc; |
349 | unsigned long tce_ret; |
350 | |
351 | rc = plpar_tce_get((u64)tbl->it_index, |
352 | (u64)tcenum << tbl->it_page_shift, &tce_ret); |
353 | |
354 | if (rc && printk_ratelimit()) { |
355 | printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n" , rc); |
356 | printk("\tindex = 0x%llx\n" , (u64)tbl->it_index); |
357 | printk("\ttcenum = 0x%llx\n" , (u64)tcenum); |
358 | dump_stack(); |
359 | } |
360 | |
361 | return tce_ret; |
362 | } |
363 | |
364 | /* this is compatible with cells for the device tree property */ |
365 | struct dynamic_dma_window_prop { |
366 | __be32 liobn; /* tce table number */ |
367 | __be64 dma_base; /* address hi,lo */ |
368 | __be32 tce_shift; /* ilog2(tce_page_size) */ |
369 | __be32 window_shift; /* ilog2(tce_window_size) */ |
370 | }; |
371 | |
372 | struct dma_win { |
373 | struct device_node *device; |
374 | const struct dynamic_dma_window_prop *prop; |
375 | bool direct; |
376 | struct list_head list; |
377 | }; |
378 | |
379 | /* Dynamic DMA Window support */ |
380 | struct ddw_query_response { |
381 | u32 windows_available; |
382 | u64 largest_available_block; |
383 | u32 page_size; |
384 | u32 migration_capable; |
385 | }; |
386 | |
387 | struct ddw_create_response { |
388 | u32 liobn; |
389 | u32 addr_hi; |
390 | u32 addr_lo; |
391 | }; |
392 | |
393 | static LIST_HEAD(dma_win_list); |
394 | /* prevents races between memory on/offline and window creation */ |
395 | static DEFINE_SPINLOCK(dma_win_list_lock); |
396 | /* protects initializing window twice for same device */ |
397 | static DEFINE_MUTEX(dma_win_init_mutex); |
398 | |
399 | static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn, |
400 | unsigned long num_pfn, const void *arg) |
401 | { |
402 | const struct dynamic_dma_window_prop *maprange = arg; |
403 | int rc; |
404 | u64 tce_size, num_tce, dma_offset, next; |
405 | u32 tce_shift; |
406 | long limit; |
407 | |
408 | tce_shift = be32_to_cpu(maprange->tce_shift); |
409 | tce_size = 1ULL << tce_shift; |
410 | next = start_pfn << PAGE_SHIFT; |
411 | num_tce = num_pfn << PAGE_SHIFT; |
412 | |
413 | /* round back to the beginning of the tce page size */ |
414 | num_tce += next & (tce_size - 1); |
415 | next &= ~(tce_size - 1); |
416 | |
417 | /* covert to number of tces */ |
418 | num_tce |= tce_size - 1; |
419 | num_tce >>= tce_shift; |
420 | |
421 | do { |
422 | /* |
423 | * Set up the page with TCE data, looping through and setting |
424 | * the values. |
425 | */ |
426 | limit = min_t(long, num_tce, 512); |
427 | dma_offset = next + be64_to_cpu(maprange->dma_base); |
428 | |
429 | rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn), |
430 | dma_offset, |
431 | 0, limit); |
432 | next += limit * tce_size; |
433 | num_tce -= limit; |
434 | } while (num_tce > 0 && !rc); |
435 | |
436 | return rc; |
437 | } |
438 | |
439 | static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, |
440 | unsigned long num_pfn, const void *arg) |
441 | { |
442 | const struct dynamic_dma_window_prop *maprange = arg; |
443 | u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn; |
444 | __be64 *tcep; |
445 | u32 tce_shift; |
446 | u64 rc = 0; |
447 | long l, limit; |
448 | |
449 | if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) { |
450 | unsigned long tceshift = be32_to_cpu(maprange->tce_shift); |
451 | unsigned long dmastart = (start_pfn << PAGE_SHIFT) + |
452 | be64_to_cpu(maprange->dma_base); |
453 | unsigned long tcenum = dmastart >> tceshift; |
454 | unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift; |
455 | void *uaddr = __va(start_pfn << PAGE_SHIFT); |
456 | |
457 | return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn), |
458 | tcenum, tceshift, npages, uaddr: (unsigned long) uaddr, |
459 | direction: DMA_BIDIRECTIONAL, attrs: 0); |
460 | } |
461 | |
462 | local_irq_disable(); /* to protect tcep and the page behind it */ |
463 | tcep = __this_cpu_read(tce_page); |
464 | |
465 | if (!tcep) { |
466 | tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
467 | if (!tcep) { |
468 | local_irq_enable(); |
469 | return -ENOMEM; |
470 | } |
471 | __this_cpu_write(tce_page, tcep); |
472 | } |
473 | |
474 | proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; |
475 | |
476 | liobn = (u64)be32_to_cpu(maprange->liobn); |
477 | tce_shift = be32_to_cpu(maprange->tce_shift); |
478 | tce_size = 1ULL << tce_shift; |
479 | next = start_pfn << PAGE_SHIFT; |
480 | num_tce = num_pfn << PAGE_SHIFT; |
481 | |
482 | /* round back to the beginning of the tce page size */ |
483 | num_tce += next & (tce_size - 1); |
484 | next &= ~(tce_size - 1); |
485 | |
486 | /* covert to number of tces */ |
487 | num_tce |= tce_size - 1; |
488 | num_tce >>= tce_shift; |
489 | |
490 | /* We can map max one pageful of TCEs at a time */ |
491 | do { |
492 | /* |
493 | * Set up the page with TCE data, looping through and setting |
494 | * the values. |
495 | */ |
496 | limit = min_t(long, num_tce, 4096 / TCE_ENTRY_SIZE); |
497 | dma_offset = next + be64_to_cpu(maprange->dma_base); |
498 | |
499 | for (l = 0; l < limit; l++) { |
500 | tcep[l] = cpu_to_be64(proto_tce | next); |
501 | next += tce_size; |
502 | } |
503 | |
504 | rc = plpar_tce_put_indirect(liobn, |
505 | dma_offset, |
506 | (u64)__pa(tcep), |
507 | limit); |
508 | |
509 | num_tce -= limit; |
510 | } while (num_tce > 0 && !rc); |
511 | |
512 | /* error cleanup: caller will clear whole range */ |
513 | |
514 | local_irq_enable(); |
515 | return rc; |
516 | } |
517 | |
518 | static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn, |
519 | unsigned long num_pfn, void *arg) |
520 | { |
521 | return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); |
522 | } |
523 | |
524 | static void iommu_table_setparms_common(struct iommu_table *tbl, unsigned long busno, |
525 | unsigned long liobn, unsigned long win_addr, |
526 | unsigned long window_size, unsigned long page_shift, |
527 | void *base, struct iommu_table_ops *table_ops) |
528 | { |
529 | tbl->it_busno = busno; |
530 | tbl->it_index = liobn; |
531 | tbl->it_offset = win_addr >> page_shift; |
532 | tbl->it_size = window_size >> page_shift; |
533 | tbl->it_page_shift = page_shift; |
534 | tbl->it_base = (unsigned long)base; |
535 | tbl->it_blocksize = 16; |
536 | tbl->it_type = TCE_PCI; |
537 | tbl->it_ops = table_ops; |
538 | } |
539 | |
540 | struct iommu_table_ops iommu_table_pseries_ops; |
541 | |
542 | static void iommu_table_setparms(struct pci_controller *phb, |
543 | struct device_node *dn, |
544 | struct iommu_table *tbl) |
545 | { |
546 | struct device_node *node; |
547 | const unsigned long *basep; |
548 | const u32 *sizep; |
549 | |
550 | /* Test if we are going over 2GB of DMA space */ |
551 | if (phb->dma_window_base_cur + phb->dma_window_size > SZ_2G) { |
552 | udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n" ); |
553 | panic(fmt: "PCI_DMA: Unexpected number of IOAs under this PHB.\n" ); |
554 | } |
555 | |
556 | node = phb->dn; |
557 | basep = of_get_property(node, name: "linux,tce-base" , NULL); |
558 | sizep = of_get_property(node, name: "linux,tce-size" , NULL); |
559 | if (basep == NULL || sizep == NULL) { |
560 | printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has " |
561 | "missing tce entries !\n" , dn); |
562 | return; |
563 | } |
564 | |
565 | iommu_table_setparms_common(tbl, busno: phb->bus->number, liobn: 0, win_addr: phb->dma_window_base_cur, |
566 | window_size: phb->dma_window_size, page_shift: IOMMU_PAGE_SHIFT_4K, |
567 | __va(*basep), table_ops: &iommu_table_pseries_ops); |
568 | |
569 | if (!is_kdump_kernel()) |
570 | memset((void *)tbl->it_base, 0, *sizep); |
571 | |
572 | phb->dma_window_base_cur += phb->dma_window_size; |
573 | } |
574 | |
575 | struct iommu_table_ops iommu_table_lpar_multi_ops; |
576 | |
577 | struct iommu_table_ops iommu_table_pseries_ops = { |
578 | .set = tce_build_pSeries, |
579 | .clear = tce_free_pSeries, |
580 | .get = tce_get_pseries |
581 | }; |
582 | |
583 | static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) |
584 | { |
585 | struct device_node *dn; |
586 | struct iommu_table *tbl; |
587 | struct device_node *isa_dn, *isa_dn_orig; |
588 | struct device_node *tmp; |
589 | struct pci_dn *pci; |
590 | int children; |
591 | |
592 | dn = pci_bus_to_OF_node(bus); |
593 | |
594 | pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n" , dn); |
595 | |
596 | if (bus->self) { |
597 | /* This is not a root bus, any setup will be done for the |
598 | * device-side of the bridge in iommu_dev_setup_pSeries(). |
599 | */ |
600 | return; |
601 | } |
602 | pci = PCI_DN(dn); |
603 | |
604 | /* Check if the ISA bus on the system is under |
605 | * this PHB. |
606 | */ |
607 | isa_dn = isa_dn_orig = of_find_node_by_type(NULL, type: "isa" ); |
608 | |
609 | while (isa_dn && isa_dn != dn) |
610 | isa_dn = isa_dn->parent; |
611 | |
612 | of_node_put(node: isa_dn_orig); |
613 | |
614 | /* Count number of direct PCI children of the PHB. */ |
615 | for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) |
616 | children++; |
617 | |
618 | pr_debug("Children: %d\n" , children); |
619 | |
620 | /* Calculate amount of DMA window per slot. Each window must be |
621 | * a power of two (due to pci_alloc_consistent requirements). |
622 | * |
623 | * Keep 256MB aside for PHBs with ISA. |
624 | */ |
625 | |
626 | if (!isa_dn) { |
627 | /* No ISA/IDE - just set window size and return */ |
628 | pci->phb->dma_window_size = 0x80000000ul; /* To be divided */ |
629 | |
630 | while (pci->phb->dma_window_size * children > 0x80000000ul) |
631 | pci->phb->dma_window_size >>= 1; |
632 | pr_debug("No ISA/IDE, window size is 0x%llx\n" , |
633 | pci->phb->dma_window_size); |
634 | pci->phb->dma_window_base_cur = 0; |
635 | |
636 | return; |
637 | } |
638 | |
639 | /* If we have ISA, then we probably have an IDE |
640 | * controller too. Allocate a 128MB table but |
641 | * skip the first 128MB to avoid stepping on ISA |
642 | * space. |
643 | */ |
644 | pci->phb->dma_window_size = 0x8000000ul; |
645 | pci->phb->dma_window_base_cur = 0x8000000ul; |
646 | |
647 | pci->table_group = iommu_pseries_alloc_group(node: pci->phb->node); |
648 | tbl = pci->table_group->tables[0]; |
649 | |
650 | iommu_table_setparms(phb: pci->phb, dn, tbl); |
651 | |
652 | if (!iommu_init_table(tbl, pci->phb->node, 0, 0)) |
653 | panic(fmt: "Failed to initialize iommu table" ); |
654 | |
655 | /* Divide the rest (1.75GB) among the children */ |
656 | pci->phb->dma_window_size = 0x80000000ul; |
657 | while (pci->phb->dma_window_size * children > 0x70000000ul) |
658 | pci->phb->dma_window_size >>= 1; |
659 | |
660 | pr_debug("ISA/IDE, window size is 0x%llx\n" , pci->phb->dma_window_size); |
661 | } |
662 | |
663 | #ifdef CONFIG_IOMMU_API |
664 | static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned |
665 | long *tce, enum dma_data_direction *direction) |
666 | { |
667 | long rc; |
668 | unsigned long ioba = (unsigned long) index << tbl->it_page_shift; |
669 | unsigned long flags, oldtce = 0; |
670 | u64 proto_tce = iommu_direction_to_tce_perm(*direction); |
671 | unsigned long newtce = *tce | proto_tce; |
672 | |
673 | spin_lock_irqsave(&tbl->large_pool.lock, flags); |
674 | |
675 | rc = plpar_tce_get((u64)tbl->it_index, ioba, &oldtce); |
676 | if (!rc) |
677 | rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce); |
678 | |
679 | if (!rc) { |
680 | *direction = iommu_tce_direction(oldtce); |
681 | *tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); |
682 | } |
683 | |
684 | spin_unlock_irqrestore(lock: &tbl->large_pool.lock, flags); |
685 | |
686 | return rc; |
687 | } |
688 | #endif |
689 | |
690 | struct iommu_table_ops iommu_table_lpar_multi_ops = { |
691 | .set = tce_buildmulti_pSeriesLP, |
692 | #ifdef CONFIG_IOMMU_API |
693 | .xchg_no_kill = tce_exchange_pseries, |
694 | #endif |
695 | .clear = tce_freemulti_pSeriesLP, |
696 | .get = tce_get_pSeriesLP |
697 | }; |
698 | |
699 | /* |
700 | * Find nearest ibm,dma-window (default DMA window) or direct DMA window or |
701 | * dynamic 64bit DMA window, walking up the device tree. |
702 | */ |
703 | static struct device_node *pci_dma_find(struct device_node *dn, |
704 | struct dynamic_dma_window_prop *prop) |
705 | { |
706 | const __be32 *default_prop = NULL; |
707 | const __be32 *ddw_prop = NULL; |
708 | struct device_node *rdn = NULL; |
709 | bool default_win = false, ddw_win = false; |
710 | |
711 | for ( ; dn && PCI_DN(dn); dn = dn->parent) { |
712 | default_prop = of_get_property(node: dn, name: "ibm,dma-window" , NULL); |
713 | if (default_prop) { |
714 | rdn = dn; |
715 | default_win = true; |
716 | } |
717 | ddw_prop = of_get_property(node: dn, name: DIRECT64_PROPNAME, NULL); |
718 | if (ddw_prop) { |
719 | rdn = dn; |
720 | ddw_win = true; |
721 | break; |
722 | } |
723 | ddw_prop = of_get_property(node: dn, name: DMA64_PROPNAME, NULL); |
724 | if (ddw_prop) { |
725 | rdn = dn; |
726 | ddw_win = true; |
727 | break; |
728 | } |
729 | |
730 | /* At least found default window, which is the case for normal boot */ |
731 | if (default_win) |
732 | break; |
733 | } |
734 | |
735 | /* For PCI devices there will always be a DMA window, either on the device |
736 | * or parent bus |
737 | */ |
738 | WARN_ON(!(default_win | ddw_win)); |
739 | |
740 | /* caller doesn't want to get DMA window property */ |
741 | if (!prop) |
742 | return rdn; |
743 | |
744 | /* parse DMA window property. During normal system boot, only default |
745 | * DMA window is passed in OF. But, for kdump, a dedicated adapter might |
746 | * have both default and DDW in FDT. In this scenario, DDW takes precedence |
747 | * over default window. |
748 | */ |
749 | if (ddw_win) { |
750 | struct dynamic_dma_window_prop *p; |
751 | |
752 | p = (struct dynamic_dma_window_prop *)ddw_prop; |
753 | prop->liobn = p->liobn; |
754 | prop->dma_base = p->dma_base; |
755 | prop->tce_shift = p->tce_shift; |
756 | prop->window_shift = p->window_shift; |
757 | } else if (default_win) { |
758 | unsigned long offset, size, liobn; |
759 | |
760 | of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size); |
761 | |
762 | prop->liobn = cpu_to_be32((u32)liobn); |
763 | prop->dma_base = cpu_to_be64(offset); |
764 | prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K); |
765 | prop->window_shift = cpu_to_be32(order_base_2(size)); |
766 | } |
767 | |
768 | return rdn; |
769 | } |
770 | |
771 | static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) |
772 | { |
773 | struct iommu_table *tbl; |
774 | struct device_node *dn, *pdn; |
775 | struct pci_dn *ppci; |
776 | struct dynamic_dma_window_prop prop; |
777 | |
778 | dn = pci_bus_to_OF_node(bus); |
779 | |
780 | pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n" , |
781 | dn); |
782 | |
783 | pdn = pci_dma_find(dn, prop: &prop); |
784 | |
785 | /* In PPC architecture, there will always be DMA window on bus or one of the |
786 | * parent bus. During reboot, there will be ibm,dma-window property to |
787 | * define DMA window. For kdump, there will at least be default window or DDW |
788 | * or both. |
789 | */ |
790 | |
791 | ppci = PCI_DN(pdn); |
792 | |
793 | pr_debug(" parent is %pOF, iommu_table: 0x%p\n" , |
794 | pdn, ppci->table_group); |
795 | |
796 | if (!ppci->table_group) { |
797 | ppci->table_group = iommu_pseries_alloc_group(node: ppci->phb->node); |
798 | tbl = ppci->table_group->tables[0]; |
799 | |
800 | iommu_table_setparms_common(tbl, busno: ppci->phb->bus->number, |
801 | be32_to_cpu(prop.liobn), |
802 | be64_to_cpu(prop.dma_base), |
803 | window_size: 1ULL << be32_to_cpu(prop.window_shift), |
804 | be32_to_cpu(prop.tce_shift), NULL, |
805 | table_ops: &iommu_table_lpar_multi_ops); |
806 | |
807 | /* Only for normal boot with default window. Doesn't matter even |
808 | * if we set these with DDW which is 64bit during kdump, since |
809 | * these will not be used during kdump. |
810 | */ |
811 | ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base); |
812 | ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); |
813 | |
814 | if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) |
815 | panic(fmt: "Failed to initialize iommu table" ); |
816 | |
817 | iommu_register_group(ppci->table_group, |
818 | pci_domain_nr(bus), 0); |
819 | pr_debug(" created table: %p\n" , ppci->table_group); |
820 | } |
821 | } |
822 | |
823 | |
824 | static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) |
825 | { |
826 | struct device_node *dn; |
827 | struct iommu_table *tbl; |
828 | |
829 | pr_debug("pci_dma_dev_setup_pSeries: %s\n" , pci_name(dev)); |
830 | |
831 | dn = dev->dev.of_node; |
832 | |
833 | /* If we're the direct child of a root bus, then we need to allocate |
834 | * an iommu table ourselves. The bus setup code should have setup |
835 | * the window sizes already. |
836 | */ |
837 | if (!dev->bus->self) { |
838 | struct pci_controller *phb = PCI_DN(dn)->phb; |
839 | |
840 | pr_debug(" --> first child, no bridge. Allocating iommu table.\n" ); |
841 | PCI_DN(dn)->table_group = iommu_pseries_alloc_group(node: phb->node); |
842 | tbl = PCI_DN(dn)->table_group->tables[0]; |
843 | iommu_table_setparms(phb, dn, tbl); |
844 | |
845 | if (!iommu_init_table(tbl, phb->node, 0, 0)) |
846 | panic(fmt: "Failed to initialize iommu table" ); |
847 | |
848 | set_iommu_table_base(&dev->dev, tbl); |
849 | return; |
850 | } |
851 | |
852 | /* If this device is further down the bus tree, search upwards until |
853 | * an already allocated iommu table is found and use that. |
854 | */ |
855 | |
856 | while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL) |
857 | dn = dn->parent; |
858 | |
859 | if (dn && PCI_DN(dn)) |
860 | set_iommu_table_base(&dev->dev, |
861 | PCI_DN(dn)->table_group->tables[0]); |
862 | else |
863 | printk(KERN_WARNING "iommu: Device %s has no iommu table\n" , |
864 | pci_name(dev)); |
865 | } |
866 | |
867 | static int __read_mostly disable_ddw; |
868 | |
869 | static int __init disable_ddw_setup(char *str) |
870 | { |
871 | disable_ddw = 1; |
872 | printk(KERN_INFO "ppc iommu: disabling ddw.\n" ); |
873 | |
874 | return 0; |
875 | } |
876 | |
877 | early_param("disable_ddw" , disable_ddw_setup); |
878 | |
879 | static void clean_dma_window(struct device_node *np, struct dynamic_dma_window_prop *dwp) |
880 | { |
881 | int ret; |
882 | |
883 | ret = tce_clearrange_multi_pSeriesLP(start_pfn: 0, |
884 | num_pfn: 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), arg: dwp); |
885 | if (ret) |
886 | pr_warn("%pOF failed to clear tces in window.\n" , |
887 | np); |
888 | else |
889 | pr_debug("%pOF successfully cleared tces in window.\n" , |
890 | np); |
891 | } |
892 | |
893 | /* |
894 | * Call only if DMA window is clean. |
895 | */ |
896 | static void __remove_dma_window(struct device_node *np, u32 *ddw_avail, u64 liobn) |
897 | { |
898 | int ret; |
899 | |
900 | ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn); |
901 | if (ret) |
902 | pr_warn("%pOF: failed to remove DMA window: rtas returned " |
903 | "%d to ibm,remove-pe-dma-window(%x) %llx\n" , |
904 | np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); |
905 | else |
906 | pr_debug("%pOF: successfully removed DMA window: rtas returned " |
907 | "%d to ibm,remove-pe-dma-window(%x) %llx\n" , |
908 | np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); |
909 | } |
910 | |
911 | static void remove_dma_window(struct device_node *np, u32 *ddw_avail, |
912 | struct property *win) |
913 | { |
914 | struct dynamic_dma_window_prop *dwp; |
915 | u64 liobn; |
916 | |
917 | dwp = win->value; |
918 | liobn = (u64)be32_to_cpu(dwp->liobn); |
919 | |
920 | clean_dma_window(np, dwp); |
921 | __remove_dma_window(np, ddw_avail, liobn); |
922 | } |
923 | |
924 | static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_name) |
925 | { |
926 | struct property *win; |
927 | u32 ddw_avail[DDW_APPLICABLE_SIZE]; |
928 | int ret = 0; |
929 | |
930 | win = of_find_property(np, name: win_name, NULL); |
931 | if (!win) |
932 | return -EINVAL; |
933 | |
934 | ret = of_property_read_u32_array(np, propname: "ibm,ddw-applicable" , |
935 | out_values: &ddw_avail[0], sz: DDW_APPLICABLE_SIZE); |
936 | if (ret) |
937 | return 0; |
938 | |
939 | |
940 | if (win->length >= sizeof(struct dynamic_dma_window_prop)) |
941 | remove_dma_window(np, ddw_avail, win); |
942 | |
943 | if (!remove_prop) |
944 | return 0; |
945 | |
946 | ret = of_remove_property(np, prop: win); |
947 | if (ret) |
948 | pr_warn("%pOF: failed to remove DMA window property: %d\n" , |
949 | np, ret); |
950 | return 0; |
951 | } |
952 | |
953 | static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift, |
954 | bool *direct_mapping) |
955 | { |
956 | struct dma_win *window; |
957 | const struct dynamic_dma_window_prop *dma64; |
958 | bool found = false; |
959 | |
960 | spin_lock(lock: &dma_win_list_lock); |
961 | /* check if we already created a window and dupe that config if so */ |
962 | list_for_each_entry(window, &dma_win_list, list) { |
963 | if (window->device == pdn) { |
964 | dma64 = window->prop; |
965 | *dma_addr = be64_to_cpu(dma64->dma_base); |
966 | *window_shift = be32_to_cpu(dma64->window_shift); |
967 | *direct_mapping = window->direct; |
968 | found = true; |
969 | break; |
970 | } |
971 | } |
972 | spin_unlock(lock: &dma_win_list_lock); |
973 | |
974 | return found; |
975 | } |
976 | |
977 | static struct dma_win *ddw_list_new_entry(struct device_node *pdn, |
978 | const struct dynamic_dma_window_prop *dma64) |
979 | { |
980 | struct dma_win *window; |
981 | |
982 | window = kzalloc(size: sizeof(*window), GFP_KERNEL); |
983 | if (!window) |
984 | return NULL; |
985 | |
986 | window->device = pdn; |
987 | window->prop = dma64; |
988 | window->direct = false; |
989 | |
990 | return window; |
991 | } |
992 | |
993 | static void find_existing_ddw_windows_named(const char *name) |
994 | { |
995 | int len; |
996 | struct device_node *pdn; |
997 | struct dma_win *window; |
998 | const struct dynamic_dma_window_prop *dma64; |
999 | |
1000 | for_each_node_with_property(pdn, name) { |
1001 | dma64 = of_get_property(node: pdn, name, lenp: &len); |
1002 | if (!dma64 || len < sizeof(*dma64)) { |
1003 | remove_ddw(np: pdn, remove_prop: true, win_name: name); |
1004 | continue; |
1005 | } |
1006 | |
1007 | /* If at the time of system initialization, there are DDWs in OF, |
1008 | * it means this is during kexec. DDW could be direct or dynamic. |
1009 | * We will just mark DDWs as "dynamic" since this is kdump path, |
1010 | * no need to worry about perforance. ddw_list_new_entry() will |
1011 | * set window->direct = false. |
1012 | */ |
1013 | window = ddw_list_new_entry(pdn, dma64); |
1014 | if (!window) { |
1015 | of_node_put(node: pdn); |
1016 | break; |
1017 | } |
1018 | |
1019 | spin_lock(lock: &dma_win_list_lock); |
1020 | list_add(new: &window->list, head: &dma_win_list); |
1021 | spin_unlock(lock: &dma_win_list_lock); |
1022 | } |
1023 | } |
1024 | |
1025 | static int find_existing_ddw_windows(void) |
1026 | { |
1027 | if (!firmware_has_feature(FW_FEATURE_LPAR)) |
1028 | return 0; |
1029 | |
1030 | find_existing_ddw_windows_named(name: DIRECT64_PROPNAME); |
1031 | find_existing_ddw_windows_named(name: DMA64_PROPNAME); |
1032 | |
1033 | return 0; |
1034 | } |
1035 | machine_arch_initcall(pseries, find_existing_ddw_windows); |
1036 | |
1037 | /** |
1038 | * ddw_read_ext - Get the value of an DDW extension |
1039 | * @np: device node from which the extension value is to be read. |
1040 | * @extnum: index number of the extension. |
1041 | * @value: pointer to return value, modified when extension is available. |
1042 | * |
1043 | * Checks if "ibm,ddw-extensions" exists for this node, and get the value |
1044 | * on index 'extnum'. |
1045 | * It can be used only to check if a property exists, passing value == NULL. |
1046 | * |
1047 | * Returns: |
1048 | * 0 if extension successfully read |
1049 | * -EINVAL if the "ibm,ddw-extensions" does not exist, |
1050 | * -ENODATA if "ibm,ddw-extensions" does not have a value, and |
1051 | * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension. |
1052 | */ |
1053 | static inline int ddw_read_ext(const struct device_node *np, int extnum, |
1054 | u32 *value) |
1055 | { |
1056 | static const char propname[] = "ibm,ddw-extensions" ; |
1057 | u32 count; |
1058 | int ret; |
1059 | |
1060 | ret = of_property_read_u32_index(np, propname, index: DDW_EXT_SIZE, out_value: &count); |
1061 | if (ret) |
1062 | return ret; |
1063 | |
1064 | if (count < extnum) |
1065 | return -EOVERFLOW; |
1066 | |
1067 | if (!value) |
1068 | value = &count; |
1069 | |
1070 | return of_property_read_u32_index(np, propname, index: extnum, out_value: value); |
1071 | } |
1072 | |
1073 | static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, |
1074 | struct ddw_query_response *query, |
1075 | struct device_node *parent) |
1076 | { |
1077 | struct device_node *dn; |
1078 | struct pci_dn *pdn; |
1079 | u32 cfg_addr, ext_query, query_out[5]; |
1080 | u64 buid; |
1081 | int ret, out_sz; |
1082 | |
1083 | /* |
1084 | * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many |
1085 | * output parameters ibm,query-pe-dma-windows will have, ranging from |
1086 | * 5 to 6. |
1087 | */ |
1088 | ret = ddw_read_ext(np: parent, extnum: DDW_EXT_QUERY_OUT_SIZE, value: &ext_query); |
1089 | if (!ret && ext_query == 1) |
1090 | out_sz = 6; |
1091 | else |
1092 | out_sz = 5; |
1093 | |
1094 | /* |
1095 | * Get the config address and phb buid of the PE window. |
1096 | * Rely on eeh to retrieve this for us. |
1097 | * Retrieve them from the pci device, not the node with the |
1098 | * dma-window property |
1099 | */ |
1100 | dn = pci_device_to_OF_node(pdev: dev); |
1101 | pdn = PCI_DN(dn); |
1102 | buid = pdn->phb->buid; |
1103 | cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); |
1104 | |
1105 | ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out, |
1106 | cfg_addr, BUID_HI(buid), BUID_LO(buid)); |
1107 | |
1108 | switch (out_sz) { |
1109 | case 5: |
1110 | query->windows_available = query_out[0]; |
1111 | query->largest_available_block = query_out[1]; |
1112 | query->page_size = query_out[2]; |
1113 | query->migration_capable = query_out[3]; |
1114 | break; |
1115 | case 6: |
1116 | query->windows_available = query_out[0]; |
1117 | query->largest_available_block = ((u64)query_out[1] << 32) | |
1118 | query_out[2]; |
1119 | query->page_size = query_out[3]; |
1120 | query->migration_capable = query_out[4]; |
1121 | break; |
1122 | } |
1123 | |
1124 | dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d, lb=%llx ps=%x wn=%d\n" , |
1125 | ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid), |
1126 | BUID_LO(buid), ret, query->largest_available_block, |
1127 | query->page_size, query->windows_available); |
1128 | |
1129 | return ret; |
1130 | } |
1131 | |
1132 | static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, |
1133 | struct ddw_create_response *create, int page_shift, |
1134 | int window_shift) |
1135 | { |
1136 | struct device_node *dn; |
1137 | struct pci_dn *pdn; |
1138 | u32 cfg_addr; |
1139 | u64 buid; |
1140 | int ret; |
1141 | |
1142 | /* |
1143 | * Get the config address and phb buid of the PE window. |
1144 | * Rely on eeh to retrieve this for us. |
1145 | * Retrieve them from the pci device, not the node with the |
1146 | * dma-window property |
1147 | */ |
1148 | dn = pci_device_to_OF_node(pdev: dev); |
1149 | pdn = PCI_DN(dn); |
1150 | buid = pdn->phb->buid; |
1151 | cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); |
1152 | |
1153 | do { |
1154 | /* extra outputs are LIOBN and dma-addr (hi, lo) */ |
1155 | ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4, |
1156 | (u32 *)create, cfg_addr, BUID_HI(buid), |
1157 | BUID_LO(buid), page_shift, window_shift); |
1158 | } while (rtas_busy_delay(ret)); |
1159 | dev_info(&dev->dev, |
1160 | "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " |
1161 | "(liobn = 0x%x starting addr = %x %x)\n" , |
1162 | ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid), |
1163 | BUID_LO(buid), page_shift, window_shift, ret, create->liobn, |
1164 | create->addr_hi, create->addr_lo); |
1165 | |
1166 | return ret; |
1167 | } |
1168 | |
1169 | struct failed_ddw_pdn { |
1170 | struct device_node *pdn; |
1171 | struct list_head list; |
1172 | }; |
1173 | |
1174 | static LIST_HEAD(failed_ddw_pdn_list); |
1175 | |
1176 | static phys_addr_t ddw_memory_hotplug_max(void) |
1177 | { |
1178 | resource_size_t max_addr = memory_hotplug_max(); |
1179 | struct device_node *memory; |
1180 | |
1181 | for_each_node_by_type(memory, "memory" ) { |
1182 | struct resource res; |
1183 | |
1184 | if (of_address_to_resource(dev: memory, index: 0, r: &res)) |
1185 | continue; |
1186 | |
1187 | max_addr = max_t(resource_size_t, max_addr, res.end + 1); |
1188 | } |
1189 | |
1190 | return max_addr; |
1191 | } |
1192 | |
1193 | /* |
1194 | * Platforms supporting the DDW option starting with LoPAR level 2.7 implement |
1195 | * ibm,ddw-extensions, which carries the rtas token for |
1196 | * ibm,reset-pe-dma-windows. |
1197 | * That rtas-call can be used to restore the default DMA window for the device. |
1198 | */ |
1199 | static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) |
1200 | { |
1201 | int ret; |
1202 | u32 cfg_addr, reset_dma_win; |
1203 | u64 buid; |
1204 | struct device_node *dn; |
1205 | struct pci_dn *pdn; |
1206 | |
1207 | ret = ddw_read_ext(np: par_dn, extnum: DDW_EXT_RESET_DMA_WIN, value: &reset_dma_win); |
1208 | if (ret) |
1209 | return; |
1210 | |
1211 | dn = pci_device_to_OF_node(pdev: dev); |
1212 | pdn = PCI_DN(dn); |
1213 | buid = pdn->phb->buid; |
1214 | cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8); |
1215 | |
1216 | ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid), |
1217 | BUID_LO(buid)); |
1218 | if (ret) |
1219 | dev_info(&dev->dev, |
1220 | "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d " , |
1221 | reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid), |
1222 | ret); |
1223 | } |
1224 | |
1225 | /* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */ |
1226 | static int iommu_get_page_shift(u32 query_page_size) |
1227 | { |
1228 | /* Supported IO page-sizes according to LoPAR, note that 2M is out of order */ |
1229 | const int shift[] = { |
1230 | __builtin_ctzll(SZ_4K), __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M), |
1231 | __builtin_ctzll(SZ_32M), __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M), |
1232 | __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G), __builtin_ctzll(SZ_2M) |
1233 | }; |
1234 | |
1235 | int i = ARRAY_SIZE(shift) - 1; |
1236 | int ret = 0; |
1237 | |
1238 | /* |
1239 | * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field: |
1240 | * - bit 31 means 4k pages are supported, |
1241 | * - bit 30 means 64k pages are supported, and so on. |
1242 | * Larger pagesizes map more memory with the same amount of TCEs, so start probing them. |
1243 | */ |
1244 | for (; i >= 0 ; i--) { |
1245 | if (query_page_size & (1 << i)) |
1246 | ret = max(ret, shift[i]); |
1247 | } |
1248 | |
1249 | return ret; |
1250 | } |
1251 | |
1252 | static struct property *ddw_property_create(const char *propname, u32 liobn, u64 dma_addr, |
1253 | u32 page_shift, u32 window_shift) |
1254 | { |
1255 | struct dynamic_dma_window_prop *ddwprop; |
1256 | struct property *win64; |
1257 | |
1258 | win64 = kzalloc(size: sizeof(*win64), GFP_KERNEL); |
1259 | if (!win64) |
1260 | return NULL; |
1261 | |
1262 | win64->name = kstrdup(s: propname, GFP_KERNEL); |
1263 | ddwprop = kzalloc(size: sizeof(*ddwprop), GFP_KERNEL); |
1264 | win64->value = ddwprop; |
1265 | win64->length = sizeof(*ddwprop); |
1266 | if (!win64->name || !win64->value) { |
1267 | kfree(objp: win64->name); |
1268 | kfree(objp: win64->value); |
1269 | kfree(objp: win64); |
1270 | return NULL; |
1271 | } |
1272 | |
1273 | ddwprop->liobn = cpu_to_be32(liobn); |
1274 | ddwprop->dma_base = cpu_to_be64(dma_addr); |
1275 | ddwprop->tce_shift = cpu_to_be32(page_shift); |
1276 | ddwprop->window_shift = cpu_to_be32(window_shift); |
1277 | |
1278 | return win64; |
1279 | } |
1280 | |
1281 | /* |
1282 | * If the PE supports dynamic dma windows, and there is space for a table |
1283 | * that can map all pages in a linear offset, then setup such a table, |
1284 | * and record the dma-offset in the struct device. |
1285 | * |
1286 | * dev: the pci device we are checking |
1287 | * pdn: the parent pe node with the ibm,dma_window property |
1288 | * Future: also check if we can remap the base window for our base page size |
1289 | * |
1290 | * returns true if can map all pages (direct mapping), false otherwise.. |
1291 | */ |
1292 | static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) |
1293 | { |
1294 | int len = 0, ret; |
1295 | int max_ram_len = order_base_2(ddw_memory_hotplug_max()); |
1296 | struct ddw_query_response query; |
1297 | struct ddw_create_response create; |
1298 | int page_shift; |
1299 | u64 win_addr; |
1300 | const char *win_name; |
1301 | struct device_node *dn; |
1302 | u32 ddw_avail[DDW_APPLICABLE_SIZE]; |
1303 | struct dma_win *window; |
1304 | struct property *win64; |
1305 | struct failed_ddw_pdn *fpdn; |
1306 | bool default_win_removed = false, direct_mapping = false; |
1307 | bool pmem_present; |
1308 | struct pci_dn *pci = PCI_DN(pdn); |
1309 | struct property *default_win = NULL; |
1310 | |
1311 | dn = of_find_node_by_type(NULL, type: "ibm,pmemory" ); |
1312 | pmem_present = dn != NULL; |
1313 | of_node_put(node: dn); |
1314 | |
1315 | mutex_lock(&dma_win_init_mutex); |
1316 | |
1317 | if (find_existing_ddw(pdn, dma_addr: &dev->dev.archdata.dma_offset, window_shift: &len, direct_mapping: &direct_mapping)) |
1318 | goto out_unlock; |
1319 | |
1320 | /* |
1321 | * If we already went through this for a previous function of |
1322 | * the same device and failed, we don't want to muck with the |
1323 | * DMA window again, as it will race with in-flight operations |
1324 | * and can lead to EEHs. The above mutex protects access to the |
1325 | * list. |
1326 | */ |
1327 | list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) { |
1328 | if (fpdn->pdn == pdn) |
1329 | goto out_unlock; |
1330 | } |
1331 | |
1332 | /* |
1333 | * the ibm,ddw-applicable property holds the tokens for: |
1334 | * ibm,query-pe-dma-window |
1335 | * ibm,create-pe-dma-window |
1336 | * ibm,remove-pe-dma-window |
1337 | * for the given node in that order. |
1338 | * the property is actually in the parent, not the PE |
1339 | */ |
1340 | ret = of_property_read_u32_array(np: pdn, propname: "ibm,ddw-applicable" , |
1341 | out_values: &ddw_avail[0], sz: DDW_APPLICABLE_SIZE); |
1342 | if (ret) |
1343 | goto out_failed; |
1344 | |
1345 | /* |
1346 | * Query if there is a second window of size to map the |
1347 | * whole partition. Query returns number of windows, largest |
1348 | * block assigned to PE (partition endpoint), and two bitmasks |
1349 | * of page sizes: supported and supported for migrate-dma. |
1350 | */ |
1351 | dn = pci_device_to_OF_node(pdev: dev); |
1352 | ret = query_ddw(dev, ddw_avail, query: &query, parent: pdn); |
1353 | if (ret != 0) |
1354 | goto out_failed; |
1355 | |
1356 | /* |
1357 | * If there is no window available, remove the default DMA window, |
1358 | * if it's present. This will make all the resources available to the |
1359 | * new DDW window. |
1360 | * If anything fails after this, we need to restore it, so also check |
1361 | * for extensions presence. |
1362 | */ |
1363 | if (query.windows_available == 0) { |
1364 | int reset_win_ext; |
1365 | |
1366 | /* DDW + IOMMU on single window may fail if there is any allocation */ |
1367 | if (iommu_table_in_use(pci->table_group->tables[0])) { |
1368 | dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n" ); |
1369 | goto out_failed; |
1370 | } |
1371 | |
1372 | default_win = of_find_property(np: pdn, name: "ibm,dma-window" , NULL); |
1373 | if (!default_win) |
1374 | goto out_failed; |
1375 | |
1376 | reset_win_ext = ddw_read_ext(np: pdn, extnum: DDW_EXT_RESET_DMA_WIN, NULL); |
1377 | if (reset_win_ext) |
1378 | goto out_failed; |
1379 | |
1380 | remove_dma_window(np: pdn, ddw_avail, win: default_win); |
1381 | default_win_removed = true; |
1382 | |
1383 | /* Query again, to check if the window is available */ |
1384 | ret = query_ddw(dev, ddw_avail, query: &query, parent: pdn); |
1385 | if (ret != 0) |
1386 | goto out_failed; |
1387 | |
1388 | if (query.windows_available == 0) { |
1389 | /* no windows are available for this device. */ |
1390 | dev_dbg(&dev->dev, "no free dynamic windows" ); |
1391 | goto out_failed; |
1392 | } |
1393 | } |
1394 | |
1395 | page_shift = iommu_get_page_shift(query_page_size: query.page_size); |
1396 | if (!page_shift) { |
1397 | dev_dbg(&dev->dev, "no supported page size in mask %x" , |
1398 | query.page_size); |
1399 | goto out_failed; |
1400 | } |
1401 | |
1402 | |
1403 | /* |
1404 | * The "ibm,pmemory" can appear anywhere in the address space. |
1405 | * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS |
1406 | * for the upper limit and fallback to max RAM otherwise but this |
1407 | * disables device::dma_ops_bypass. |
1408 | */ |
1409 | len = max_ram_len; |
1410 | if (pmem_present) { |
1411 | if (query.largest_available_block >= |
1412 | (1ULL << (MAX_PHYSMEM_BITS - page_shift))) |
1413 | len = MAX_PHYSMEM_BITS; |
1414 | else |
1415 | dev_info(&dev->dev, "Skipping ibm,pmemory" ); |
1416 | } |
1417 | |
1418 | /* check if the available block * number of ptes will map everything */ |
1419 | if (query.largest_available_block < (1ULL << (len - page_shift))) { |
1420 | dev_dbg(&dev->dev, |
1421 | "can't map partition max 0x%llx with %llu %llu-sized pages\n" , |
1422 | 1ULL << len, |
1423 | query.largest_available_block, |
1424 | 1ULL << page_shift); |
1425 | |
1426 | len = order_base_2(query.largest_available_block << page_shift); |
1427 | win_name = DMA64_PROPNAME; |
1428 | } else { |
1429 | direct_mapping = !default_win_removed || |
1430 | (len == MAX_PHYSMEM_BITS) || |
1431 | (!pmem_present && (len == max_ram_len)); |
1432 | win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME; |
1433 | } |
1434 | |
1435 | ret = create_ddw(dev, ddw_avail, create: &create, page_shift, window_shift: len); |
1436 | if (ret != 0) |
1437 | goto out_failed; |
1438 | |
1439 | dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n" , |
1440 | create.liobn, dn); |
1441 | |
1442 | win_addr = ((u64)create.addr_hi << 32) | create.addr_lo; |
1443 | win64 = ddw_property_create(propname: win_name, liobn: create.liobn, dma_addr: win_addr, page_shift, window_shift: len); |
1444 | |
1445 | if (!win64) { |
1446 | dev_info(&dev->dev, |
1447 | "couldn't allocate property, property name, or value\n" ); |
1448 | goto out_remove_win; |
1449 | } |
1450 | |
1451 | ret = of_add_property(np: pdn, prop: win64); |
1452 | if (ret) { |
1453 | dev_err(&dev->dev, "unable to add DMA window property for %pOF: %d" , |
1454 | pdn, ret); |
1455 | goto out_free_prop; |
1456 | } |
1457 | |
1458 | window = ddw_list_new_entry(pdn, dma64: win64->value); |
1459 | if (!window) |
1460 | goto out_del_prop; |
1461 | |
1462 | if (direct_mapping) { |
1463 | window->direct = true; |
1464 | |
1465 | /* DDW maps the whole partition, so enable direct DMA mapping */ |
1466 | ret = walk_system_ram_range(start_pfn: 0, nr_pages: memblock_end_of_DRAM() >> PAGE_SHIFT, |
1467 | arg: win64->value, func: tce_setrange_multi_pSeriesLP_walk); |
1468 | if (ret) { |
1469 | dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n" , |
1470 | dn, ret); |
1471 | |
1472 | /* Make sure to clean DDW if any TCE was set*/ |
1473 | clean_dma_window(np: pdn, dwp: win64->value); |
1474 | goto out_del_list; |
1475 | } |
1476 | } else { |
1477 | struct iommu_table *newtbl; |
1478 | int i; |
1479 | unsigned long start = 0, end = 0; |
1480 | |
1481 | window->direct = false; |
1482 | |
1483 | for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { |
1484 | const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; |
1485 | |
1486 | /* Look for MMIO32 */ |
1487 | if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) { |
1488 | start = pci->phb->mem_resources[i].start; |
1489 | end = pci->phb->mem_resources[i].end; |
1490 | break; |
1491 | } |
1492 | } |
1493 | |
1494 | /* New table for using DDW instead of the default DMA window */ |
1495 | newtbl = iommu_pseries_alloc_table(node: pci->phb->node); |
1496 | if (!newtbl) { |
1497 | dev_dbg(&dev->dev, "couldn't create new IOMMU table\n" ); |
1498 | goto out_del_list; |
1499 | } |
1500 | |
1501 | iommu_table_setparms_common(tbl: newtbl, busno: pci->phb->bus->number, liobn: create.liobn, win_addr, |
1502 | window_size: 1UL << len, page_shift, NULL, table_ops: &iommu_table_lpar_multi_ops); |
1503 | iommu_init_table(newtbl, pci->phb->node, start, end); |
1504 | |
1505 | pci->table_group->tables[1] = newtbl; |
1506 | |
1507 | set_iommu_table_base(&dev->dev, newtbl); |
1508 | } |
1509 | |
1510 | if (default_win_removed) { |
1511 | iommu_tce_table_put(pci->table_group->tables[0]); |
1512 | pci->table_group->tables[0] = NULL; |
1513 | |
1514 | /* default_win is valid here because default_win_removed == true */ |
1515 | of_remove_property(np: pdn, prop: default_win); |
1516 | dev_info(&dev->dev, "Removed default DMA window for %pOF\n" , pdn); |
1517 | } |
1518 | |
1519 | spin_lock(lock: &dma_win_list_lock); |
1520 | list_add(new: &window->list, head: &dma_win_list); |
1521 | spin_unlock(lock: &dma_win_list_lock); |
1522 | |
1523 | dev->dev.archdata.dma_offset = win_addr; |
1524 | goto out_unlock; |
1525 | |
1526 | out_del_list: |
1527 | kfree(objp: window); |
1528 | |
1529 | out_del_prop: |
1530 | of_remove_property(np: pdn, prop: win64); |
1531 | |
1532 | out_free_prop: |
1533 | kfree(objp: win64->name); |
1534 | kfree(objp: win64->value); |
1535 | kfree(objp: win64); |
1536 | |
1537 | out_remove_win: |
1538 | /* DDW is clean, so it's ok to call this directly. */ |
1539 | __remove_dma_window(np: pdn, ddw_avail, liobn: create.liobn); |
1540 | |
1541 | out_failed: |
1542 | if (default_win_removed) |
1543 | reset_dma_window(dev, par_dn: pdn); |
1544 | |
1545 | fpdn = kzalloc(size: sizeof(*fpdn), GFP_KERNEL); |
1546 | if (!fpdn) |
1547 | goto out_unlock; |
1548 | fpdn->pdn = pdn; |
1549 | list_add(new: &fpdn->list, head: &failed_ddw_pdn_list); |
1550 | |
1551 | out_unlock: |
1552 | mutex_unlock(lock: &dma_win_init_mutex); |
1553 | |
1554 | /* |
1555 | * If we have persistent memory and the window size is only as big |
1556 | * as RAM, then we failed to create a window to cover persistent |
1557 | * memory and need to set the DMA limit. |
1558 | */ |
1559 | if (pmem_present && direct_mapping && len == max_ram_len) |
1560 | dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len); |
1561 | |
1562 | return direct_mapping; |
1563 | } |
1564 | |
1565 | static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) |
1566 | { |
1567 | struct device_node *pdn, *dn; |
1568 | struct iommu_table *tbl; |
1569 | struct pci_dn *pci; |
1570 | struct dynamic_dma_window_prop prop; |
1571 | |
1572 | pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n" , pci_name(dev)); |
1573 | |
1574 | /* dev setup for LPAR is a little tricky, since the device tree might |
1575 | * contain the dma-window properties per-device and not necessarily |
1576 | * for the bus. So we need to search upwards in the tree until we |
1577 | * either hit a dma-window property, OR find a parent with a table |
1578 | * already allocated. |
1579 | */ |
1580 | dn = pci_device_to_OF_node(pdev: dev); |
1581 | pr_debug(" node is %pOF\n" , dn); |
1582 | |
1583 | pdn = pci_dma_find(dn, prop: &prop); |
1584 | if (!pdn || !PCI_DN(pdn)) { |
1585 | printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " |
1586 | "no DMA window found for pci dev=%s dn=%pOF\n" , |
1587 | pci_name(dev), dn); |
1588 | return; |
1589 | } |
1590 | pr_debug(" parent is %pOF\n" , pdn); |
1591 | |
1592 | pci = PCI_DN(pdn); |
1593 | if (!pci->table_group) { |
1594 | pci->table_group = iommu_pseries_alloc_group(node: pci->phb->node); |
1595 | tbl = pci->table_group->tables[0]; |
1596 | |
1597 | iommu_table_setparms_common(tbl, busno: pci->phb->bus->number, |
1598 | be32_to_cpu(prop.liobn), |
1599 | be64_to_cpu(prop.dma_base), |
1600 | window_size: 1ULL << be32_to_cpu(prop.window_shift), |
1601 | be32_to_cpu(prop.tce_shift), NULL, |
1602 | table_ops: &iommu_table_lpar_multi_ops); |
1603 | |
1604 | /* Only for normal boot with default window. Doesn't matter even |
1605 | * if we set these with DDW which is 64bit during kdump, since |
1606 | * these will not be used during kdump. |
1607 | */ |
1608 | pci->table_group->tce32_start = be64_to_cpu(prop.dma_base); |
1609 | pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); |
1610 | |
1611 | iommu_init_table(tbl, pci->phb->node, 0, 0); |
1612 | iommu_register_group(pci->table_group, |
1613 | pci_domain_nr(bus: pci->phb->bus), 0); |
1614 | pr_debug(" created table: %p\n" , pci->table_group); |
1615 | } else { |
1616 | pr_debug(" found DMA window, table: %p\n" , pci->table_group); |
1617 | } |
1618 | |
1619 | set_iommu_table_base(&dev->dev, pci->table_group->tables[0]); |
1620 | iommu_add_device(pci->table_group, &dev->dev); |
1621 | } |
1622 | |
1623 | static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) |
1624 | { |
1625 | struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; |
1626 | |
1627 | /* only attempt to use a new window if 64-bit DMA is requested */ |
1628 | if (dma_mask < DMA_BIT_MASK(64)) |
1629 | return false; |
1630 | |
1631 | dev_dbg(&pdev->dev, "node is %pOF\n" , dn); |
1632 | |
1633 | /* |
1634 | * the device tree might contain the dma-window properties |
1635 | * per-device and not necessarily for the bus. So we need to |
1636 | * search upwards in the tree until we either hit a dma-window |
1637 | * property, OR find a parent with a table already allocated. |
1638 | */ |
1639 | pdn = pci_dma_find(dn, NULL); |
1640 | if (pdn && PCI_DN(pdn)) |
1641 | return enable_ddw(dev: pdev, pdn); |
1642 | |
1643 | return false; |
1644 | } |
1645 | |
1646 | static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, |
1647 | void *data) |
1648 | { |
1649 | struct dma_win *window; |
1650 | struct memory_notify *arg = data; |
1651 | int ret = 0; |
1652 | |
1653 | switch (action) { |
1654 | case MEM_GOING_ONLINE: |
1655 | spin_lock(lock: &dma_win_list_lock); |
1656 | list_for_each_entry(window, &dma_win_list, list) { |
1657 | if (window->direct) { |
1658 | ret |= tce_setrange_multi_pSeriesLP(start_pfn: arg->start_pfn, |
1659 | num_pfn: arg->nr_pages, arg: window->prop); |
1660 | } |
1661 | /* XXX log error */ |
1662 | } |
1663 | spin_unlock(lock: &dma_win_list_lock); |
1664 | break; |
1665 | case MEM_CANCEL_ONLINE: |
1666 | case MEM_OFFLINE: |
1667 | spin_lock(lock: &dma_win_list_lock); |
1668 | list_for_each_entry(window, &dma_win_list, list) { |
1669 | if (window->direct) { |
1670 | ret |= tce_clearrange_multi_pSeriesLP(start_pfn: arg->start_pfn, |
1671 | num_pfn: arg->nr_pages, arg: window->prop); |
1672 | } |
1673 | /* XXX log error */ |
1674 | } |
1675 | spin_unlock(lock: &dma_win_list_lock); |
1676 | break; |
1677 | default: |
1678 | break; |
1679 | } |
1680 | if (ret && action != MEM_CANCEL_ONLINE) |
1681 | return NOTIFY_BAD; |
1682 | |
1683 | return NOTIFY_OK; |
1684 | } |
1685 | |
1686 | static struct notifier_block iommu_mem_nb = { |
1687 | .notifier_call = iommu_mem_notifier, |
1688 | }; |
1689 | |
1690 | static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) |
1691 | { |
1692 | int err = NOTIFY_OK; |
1693 | struct of_reconfig_data *rd = data; |
1694 | struct device_node *np = rd->dn; |
1695 | struct pci_dn *pci = PCI_DN(np); |
1696 | struct dma_win *window; |
1697 | |
1698 | switch (action) { |
1699 | case OF_RECONFIG_DETACH_NODE: |
1700 | /* |
1701 | * Removing the property will invoke the reconfig |
1702 | * notifier again, which causes dead-lock on the |
1703 | * read-write semaphore of the notifier chain. So |
1704 | * we have to remove the property when releasing |
1705 | * the device node. |
1706 | */ |
1707 | if (remove_ddw(np, false, DIRECT64_PROPNAME)) |
1708 | remove_ddw(np, false, DMA64_PROPNAME); |
1709 | |
1710 | if (pci && pci->table_group) |
1711 | iommu_pseries_free_group(table_group: pci->table_group, |
1712 | node_name: np->full_name); |
1713 | |
1714 | spin_lock(lock: &dma_win_list_lock); |
1715 | list_for_each_entry(window, &dma_win_list, list) { |
1716 | if (window->device == np) { |
1717 | list_del(entry: &window->list); |
1718 | kfree(objp: window); |
1719 | break; |
1720 | } |
1721 | } |
1722 | spin_unlock(lock: &dma_win_list_lock); |
1723 | break; |
1724 | default: |
1725 | err = NOTIFY_DONE; |
1726 | break; |
1727 | } |
1728 | return err; |
1729 | } |
1730 | |
1731 | static struct notifier_block iommu_reconfig_nb = { |
1732 | .notifier_call = iommu_reconfig_notifier, |
1733 | }; |
1734 | |
1735 | /* These are called very early. */ |
1736 | void __init iommu_init_early_pSeries(void) |
1737 | { |
1738 | if (of_chosen && of_get_property(node: of_chosen, name: "linux,iommu-off" , NULL)) |
1739 | return; |
1740 | |
1741 | if (firmware_has_feature(FW_FEATURE_LPAR)) { |
1742 | pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP; |
1743 | pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP; |
1744 | if (!disable_ddw) |
1745 | pseries_pci_controller_ops.iommu_bypass_supported = |
1746 | iommu_bypass_supported_pSeriesLP; |
1747 | } else { |
1748 | pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries; |
1749 | pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries; |
1750 | } |
1751 | |
1752 | |
1753 | of_reconfig_notifier_register(&iommu_reconfig_nb); |
1754 | register_memory_notifier(nb: &iommu_mem_nb); |
1755 | |
1756 | set_pci_dma_ops(&dma_iommu_ops); |
1757 | } |
1758 | |
1759 | static int __init disable_multitce(char *str) |
1760 | { |
1761 | if (strcmp(str, "off" ) == 0 && |
1762 | firmware_has_feature(FW_FEATURE_LPAR) && |
1763 | (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) || |
1764 | firmware_has_feature(FW_FEATURE_STUFF_TCE))) { |
1765 | printk(KERN_INFO "Disabling MULTITCE firmware feature\n" ); |
1766 | powerpc_firmware_features &= |
1767 | ~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE); |
1768 | } |
1769 | return 1; |
1770 | } |
1771 | |
1772 | __setup("multitce=" , disable_multitce); |
1773 | |
1774 | #ifdef CONFIG_SPAPR_TCE_IOMMU |
1775 | struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose, |
1776 | struct pci_dev *pdev) |
1777 | { |
1778 | struct device_node *pdn, *dn = pdev->dev.of_node; |
1779 | struct iommu_group *grp; |
1780 | struct pci_dn *pci; |
1781 | |
1782 | pdn = pci_dma_find(dn, NULL); |
1783 | if (!pdn || !PCI_DN(pdn)) |
1784 | return ERR_PTR(-ENODEV); |
1785 | |
1786 | pci = PCI_DN(pdn); |
1787 | if (!pci->table_group) |
1788 | return ERR_PTR(-ENODEV); |
1789 | |
1790 | grp = pci->table_group->group; |
1791 | if (!grp) |
1792 | return ERR_PTR(-ENODEV); |
1793 | |
1794 | return iommu_group_ref_get(grp); |
1795 | } |
1796 | #endif |
1797 | |