1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * IOMMU implementation for Cell Broadband Processor Architecture |
4 | * |
5 | * (C) Copyright IBM Corporation 2006-2008 |
6 | * |
7 | * Author: Jeremy Kerr <jk@ozlabs.org> |
8 | */ |
9 | |
10 | #undef DEBUG |
11 | |
12 | #include <linux/kernel.h> |
13 | #include <linux/init.h> |
14 | #include <linux/interrupt.h> |
15 | #include <linux/irqdomain.h> |
16 | #include <linux/notifier.h> |
17 | #include <linux/of.h> |
18 | #include <linux/of_address.h> |
19 | #include <linux/platform_device.h> |
20 | #include <linux/slab.h> |
21 | #include <linux/memblock.h> |
22 | |
23 | #include <asm/prom.h> |
24 | #include <asm/iommu.h> |
25 | #include <asm/machdep.h> |
26 | #include <asm/pci-bridge.h> |
27 | #include <asm/udbg.h> |
28 | #include <asm/firmware.h> |
29 | #include <asm/cell-regs.h> |
30 | |
31 | #include "cell.h" |
32 | #include "interrupt.h" |
33 | |
34 | /* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages |
35 | * instead of leaving them mapped to some dummy page. This can be |
36 | * enabled once the appropriate workarounds for spider bugs have |
37 | * been enabled |
38 | */ |
39 | #define CELL_IOMMU_REAL_UNMAP |
40 | |
41 | /* Define CELL_IOMMU_STRICT_PROTECTION to enforce protection of |
42 | * IO PTEs based on the transfer direction. That can be enabled |
43 | * once spider-net has been fixed to pass the correct direction |
44 | * to the DMA mapping functions |
45 | */ |
46 | #define CELL_IOMMU_STRICT_PROTECTION |
47 | |
48 | |
49 | #define NR_IOMMUS 2 |
50 | |
51 | /* IOC mmap registers */ |
52 | #define IOC_Reg_Size 0x2000 |
53 | |
54 | #define IOC_IOPT_CacheInvd 0x908 |
55 | #define IOC_IOPT_CacheInvd_NE_Mask 0xffe0000000000000ul |
56 | #define IOC_IOPT_CacheInvd_IOPTE_Mask 0x000003fffffffff8ul |
57 | #define IOC_IOPT_CacheInvd_Busy 0x0000000000000001ul |
58 | |
59 | #define IOC_IOST_Origin 0x918 |
60 | #define IOC_IOST_Origin_E 0x8000000000000000ul |
61 | #define IOC_IOST_Origin_HW 0x0000000000000800ul |
62 | #define IOC_IOST_Origin_HL 0x0000000000000400ul |
63 | |
64 | #define IOC_IO_ExcpStat 0x920 |
65 | #define IOC_IO_ExcpStat_V 0x8000000000000000ul |
66 | #define IOC_IO_ExcpStat_SPF_Mask 0x6000000000000000ul |
67 | #define IOC_IO_ExcpStat_SPF_S 0x6000000000000000ul |
68 | #define IOC_IO_ExcpStat_SPF_P 0x2000000000000000ul |
69 | #define IOC_IO_ExcpStat_ADDR_Mask 0x00000007fffff000ul |
70 | #define IOC_IO_ExcpStat_RW_Mask 0x0000000000000800ul |
71 | #define IOC_IO_ExcpStat_IOID_Mask 0x00000000000007fful |
72 | |
73 | #define IOC_IO_ExcpMask 0x928 |
74 | #define IOC_IO_ExcpMask_SFE 0x4000000000000000ul |
75 | #define IOC_IO_ExcpMask_PFE 0x2000000000000000ul |
76 | |
77 | #define IOC_IOCmd_Offset 0x1000 |
78 | |
79 | #define IOC_IOCmd_Cfg 0xc00 |
80 | #define IOC_IOCmd_Cfg_TE 0x0000800000000000ul |
81 | |
82 | |
83 | /* Segment table entries */ |
84 | #define IOSTE_V 0x8000000000000000ul /* valid */ |
85 | #define IOSTE_H 0x4000000000000000ul /* cache hint */ |
86 | #define IOSTE_PT_Base_RPN_Mask 0x3ffffffffffff000ul /* base RPN of IOPT */ |
87 | #define IOSTE_NPPT_Mask 0x0000000000000fe0ul /* no. pages in IOPT */ |
88 | #define IOSTE_PS_Mask 0x0000000000000007ul /* page size */ |
89 | #define IOSTE_PS_4K 0x0000000000000001ul /* - 4kB */ |
90 | #define IOSTE_PS_64K 0x0000000000000003ul /* - 64kB */ |
91 | #define IOSTE_PS_1M 0x0000000000000005ul /* - 1MB */ |
92 | #define IOSTE_PS_16M 0x0000000000000007ul /* - 16MB */ |
93 | |
94 | |
95 | /* IOMMU sizing */ |
96 | #define IO_SEGMENT_SHIFT 28 |
97 | #define IO_PAGENO_BITS(shift) (IO_SEGMENT_SHIFT - (shift)) |
98 | |
99 | /* The high bit needs to be set on every DMA address */ |
100 | #define SPIDER_DMA_OFFSET 0x80000000ul |
101 | |
102 | struct iommu_window { |
103 | struct list_head list; |
104 | struct cbe_iommu *iommu; |
105 | unsigned long offset; |
106 | unsigned long size; |
107 | unsigned int ioid; |
108 | struct iommu_table table; |
109 | }; |
110 | |
111 | #define NAMESIZE 8 |
112 | struct cbe_iommu { |
113 | int nid; |
114 | char name[NAMESIZE]; |
115 | void __iomem *xlate_regs; |
116 | void __iomem *cmd_regs; |
117 | unsigned long *stab; |
118 | unsigned long *ptab; |
119 | void *pad_page; |
120 | struct list_head windows; |
121 | }; |
122 | |
123 | /* Static array of iommus, one per node |
124 | * each contains a list of windows, keyed from dma_window property |
125 | * - on bus setup, look for a matching window, or create one |
126 | * - on dev setup, assign iommu_table ptr |
127 | */ |
128 | static struct cbe_iommu iommus[NR_IOMMUS]; |
129 | static int cbe_nr_iommus; |
130 | |
131 | static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte, |
132 | long n_ptes) |
133 | { |
134 | u64 __iomem *reg; |
135 | u64 val; |
136 | long n; |
137 | |
138 | reg = iommu->xlate_regs + IOC_IOPT_CacheInvd; |
139 | |
140 | while (n_ptes > 0) { |
141 | /* we can invalidate up to 1 << 11 PTEs at once */ |
142 | n = min(n_ptes, 1l << 11); |
143 | val = (((n /*- 1*/) << 53) & IOC_IOPT_CacheInvd_NE_Mask) |
144 | | (__pa(pte) & IOC_IOPT_CacheInvd_IOPTE_Mask) |
145 | | IOC_IOPT_CacheInvd_Busy; |
146 | |
147 | out_be64(reg, val); |
148 | while (in_be64(reg) & IOC_IOPT_CacheInvd_Busy) |
149 | ; |
150 | |
151 | n_ptes -= n; |
152 | pte += n; |
153 | } |
154 | } |
155 | |
156 | static int tce_build_cell(struct iommu_table *tbl, long index, long npages, |
157 | unsigned long uaddr, enum dma_data_direction direction, |
158 | unsigned long attrs) |
159 | { |
160 | int i; |
161 | unsigned long *io_pte, base_pte; |
162 | struct iommu_window *window = |
163 | container_of(tbl, struct iommu_window, table); |
164 | |
165 | /* implementing proper protection causes problems with the spidernet |
166 | * driver - check mapping directions later, but allow read & write by |
167 | * default for now.*/ |
168 | #ifdef CELL_IOMMU_STRICT_PROTECTION |
169 | /* to avoid referencing a global, we use a trick here to setup the |
170 | * protection bit. "prot" is setup to be 3 fields of 4 bits appended |
171 | * together for each of the 3 supported direction values. It is then |
172 | * shifted left so that the fields matching the desired direction |
173 | * lands on the appropriate bits, and other bits are masked out. |
174 | */ |
175 | const unsigned long prot = 0xc48; |
176 | base_pte = |
177 | ((prot << (52 + 4 * direction)) & |
178 | (CBE_IOPTE_PP_W | CBE_IOPTE_PP_R)) | |
179 | CBE_IOPTE_M | CBE_IOPTE_SO_RW | |
180 | (window->ioid & CBE_IOPTE_IOID_Mask); |
181 | #else |
182 | base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M | |
183 | CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask); |
184 | #endif |
185 | if (unlikely(attrs & DMA_ATTR_WEAK_ORDERING)) |
186 | base_pte &= ~CBE_IOPTE_SO_RW; |
187 | |
188 | io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset); |
189 | |
190 | for (i = 0; i < npages; i++, uaddr += (1 << tbl->it_page_shift)) |
191 | io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask); |
192 | |
193 | mb(); |
194 | |
195 | invalidate_tce_cache(iommu: window->iommu, pte: io_pte, n_ptes: npages); |
196 | |
197 | pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n" , |
198 | index, npages, direction, base_pte); |
199 | return 0; |
200 | } |
201 | |
202 | static void tce_free_cell(struct iommu_table *tbl, long index, long npages) |
203 | { |
204 | |
205 | int i; |
206 | unsigned long *io_pte, pte; |
207 | struct iommu_window *window = |
208 | container_of(tbl, struct iommu_window, table); |
209 | |
210 | pr_debug("tce_free_cell(index=%lx,n=%lx)\n" , index, npages); |
211 | |
212 | #ifdef CELL_IOMMU_REAL_UNMAP |
213 | pte = 0; |
214 | #else |
215 | /* spider bridge does PCI reads after freeing - insert a mapping |
216 | * to a scratch page instead of an invalid entry */ |
217 | pte = CBE_IOPTE_PP_R | CBE_IOPTE_M | CBE_IOPTE_SO_RW | |
218 | __pa(window->iommu->pad_page) | |
219 | (window->ioid & CBE_IOPTE_IOID_Mask); |
220 | #endif |
221 | |
222 | io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset); |
223 | |
224 | for (i = 0; i < npages; i++) |
225 | io_pte[i] = pte; |
226 | |
227 | mb(); |
228 | |
229 | invalidate_tce_cache(iommu: window->iommu, pte: io_pte, n_ptes: npages); |
230 | } |
231 | |
232 | static irqreturn_t ioc_interrupt(int irq, void *data) |
233 | { |
234 | unsigned long stat, spf; |
235 | struct cbe_iommu *iommu = data; |
236 | |
237 | stat = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat); |
238 | spf = stat & IOC_IO_ExcpStat_SPF_Mask; |
239 | |
240 | /* Might want to rate limit it */ |
241 | printk(KERN_ERR "iommu: DMA exception 0x%016lx\n" , stat); |
242 | printk(KERN_ERR " V=%d, SPF=[%c%c], RW=%s, IOID=0x%04x\n" , |
243 | !!(stat & IOC_IO_ExcpStat_V), |
244 | (spf == IOC_IO_ExcpStat_SPF_S) ? 'S' : ' ', |
245 | (spf == IOC_IO_ExcpStat_SPF_P) ? 'P' : ' ', |
246 | (stat & IOC_IO_ExcpStat_RW_Mask) ? "Read" : "Write" , |
247 | (unsigned int)(stat & IOC_IO_ExcpStat_IOID_Mask)); |
248 | printk(KERN_ERR " page=0x%016lx\n" , |
249 | stat & IOC_IO_ExcpStat_ADDR_Mask); |
250 | |
251 | /* clear interrupt */ |
252 | stat &= ~IOC_IO_ExcpStat_V; |
253 | out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, stat); |
254 | |
255 | return IRQ_HANDLED; |
256 | } |
257 | |
258 | static int __init cell_iommu_find_ioc(int nid, unsigned long *base) |
259 | { |
260 | struct device_node *np; |
261 | struct resource r; |
262 | |
263 | *base = 0; |
264 | |
265 | /* First look for new style /be nodes */ |
266 | for_each_node_by_name(np, "ioc" ) { |
267 | if (of_node_to_nid(np) != nid) |
268 | continue; |
269 | if (of_address_to_resource(dev: np, index: 0, r: &r)) { |
270 | printk(KERN_ERR "iommu: can't get address for %pOF\n" , |
271 | np); |
272 | continue; |
273 | } |
274 | *base = r.start; |
275 | of_node_put(node: np); |
276 | return 0; |
277 | } |
278 | |
279 | /* Ok, let's try the old way */ |
280 | for_each_node_by_type(np, "cpu" ) { |
281 | const unsigned int *nidp; |
282 | const unsigned long *tmp; |
283 | |
284 | nidp = of_get_property(node: np, name: "node-id" , NULL); |
285 | if (nidp && *nidp == nid) { |
286 | tmp = of_get_property(node: np, name: "ioc-translation" , NULL); |
287 | if (tmp) { |
288 | *base = *tmp; |
289 | of_node_put(node: np); |
290 | return 0; |
291 | } |
292 | } |
293 | } |
294 | |
295 | return -ENODEV; |
296 | } |
297 | |
298 | static void __init cell_iommu_setup_stab(struct cbe_iommu *iommu, |
299 | unsigned long dbase, unsigned long dsize, |
300 | unsigned long fbase, unsigned long fsize) |
301 | { |
302 | struct page *page; |
303 | unsigned long segments, stab_size; |
304 | |
305 | segments = max(dbase + dsize, fbase + fsize) >> IO_SEGMENT_SHIFT; |
306 | |
307 | pr_debug("%s: iommu[%d]: segments: %lu\n" , |
308 | __func__, iommu->nid, segments); |
309 | |
310 | /* set up the segment table */ |
311 | stab_size = segments * sizeof(unsigned long); |
312 | page = alloc_pages_node(nid: iommu->nid, GFP_KERNEL, order: get_order(size: stab_size)); |
313 | BUG_ON(!page); |
314 | iommu->stab = page_address(page); |
315 | memset(iommu->stab, 0, stab_size); |
316 | } |
317 | |
318 | static unsigned long *__init cell_iommu_alloc_ptab(struct cbe_iommu *iommu, |
319 | unsigned long base, unsigned long size, unsigned long gap_base, |
320 | unsigned long gap_size, unsigned long page_shift) |
321 | { |
322 | struct page *page; |
323 | int i; |
324 | unsigned long reg, segments, pages_per_segment, ptab_size, |
325 | n_pte_pages, start_seg, *ptab; |
326 | |
327 | start_seg = base >> IO_SEGMENT_SHIFT; |
328 | segments = size >> IO_SEGMENT_SHIFT; |
329 | pages_per_segment = 1ull << IO_PAGENO_BITS(page_shift); |
330 | /* PTEs for each segment must start on a 4K boundary */ |
331 | pages_per_segment = max(pages_per_segment, |
332 | (1 << 12) / sizeof(unsigned long)); |
333 | |
334 | ptab_size = segments * pages_per_segment * sizeof(unsigned long); |
335 | pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n" , __func__, |
336 | iommu->nid, ptab_size, get_order(ptab_size)); |
337 | page = alloc_pages_node(nid: iommu->nid, GFP_KERNEL, order: get_order(size: ptab_size)); |
338 | BUG_ON(!page); |
339 | |
340 | ptab = page_address(page); |
341 | memset(ptab, 0, ptab_size); |
342 | |
343 | /* number of 4K pages needed for a page table */ |
344 | n_pte_pages = (pages_per_segment * sizeof(unsigned long)) >> 12; |
345 | |
346 | pr_debug("%s: iommu[%d]: stab at %p, ptab at %p, n_pte_pages: %lu\n" , |
347 | __func__, iommu->nid, iommu->stab, ptab, |
348 | n_pte_pages); |
349 | |
350 | /* initialise the STEs */ |
351 | reg = IOSTE_V | ((n_pte_pages - 1) << 5); |
352 | |
353 | switch (page_shift) { |
354 | case 12: reg |= IOSTE_PS_4K; break; |
355 | case 16: reg |= IOSTE_PS_64K; break; |
356 | case 20: reg |= IOSTE_PS_1M; break; |
357 | case 24: reg |= IOSTE_PS_16M; break; |
358 | default: BUG(); |
359 | } |
360 | |
361 | gap_base = gap_base >> IO_SEGMENT_SHIFT; |
362 | gap_size = gap_size >> IO_SEGMENT_SHIFT; |
363 | |
364 | pr_debug("Setting up IOMMU stab:\n" ); |
365 | for (i = start_seg; i < (start_seg + segments); i++) { |
366 | if (i >= gap_base && i < (gap_base + gap_size)) { |
367 | pr_debug("\toverlap at %d, skipping\n" , i); |
368 | continue; |
369 | } |
370 | iommu->stab[i] = reg | (__pa(ptab) + (n_pte_pages << 12) * |
371 | (i - start_seg)); |
372 | pr_debug("\t[%d] 0x%016lx\n" , i, iommu->stab[i]); |
373 | } |
374 | |
375 | return ptab; |
376 | } |
377 | |
378 | static void __init cell_iommu_enable_hardware(struct cbe_iommu *iommu) |
379 | { |
380 | int ret; |
381 | unsigned long reg, xlate_base; |
382 | unsigned int virq; |
383 | |
384 | if (cell_iommu_find_ioc(nid: iommu->nid, base: &xlate_base)) |
385 | panic(fmt: "%s: missing IOC register mappings for node %d\n" , |
386 | __func__, iommu->nid); |
387 | |
388 | iommu->xlate_regs = ioremap(offset: xlate_base, IOC_Reg_Size); |
389 | iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset; |
390 | |
391 | /* ensure that the STEs have updated */ |
392 | mb(); |
393 | |
394 | /* setup interrupts for the iommu. */ |
395 | reg = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat); |
396 | out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, |
397 | reg & ~IOC_IO_ExcpStat_V); |
398 | out_be64(iommu->xlate_regs + IOC_IO_ExcpMask, |
399 | IOC_IO_ExcpMask_PFE | IOC_IO_ExcpMask_SFE); |
400 | |
401 | virq = irq_create_mapping(NULL, |
402 | hwirq: IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT)); |
403 | BUG_ON(!virq); |
404 | |
405 | ret = request_irq(irq: virq, handler: ioc_interrupt, flags: 0, name: iommu->name, dev: iommu); |
406 | BUG_ON(ret); |
407 | |
408 | /* set the IOC segment table origin register (and turn on the iommu) */ |
409 | reg = IOC_IOST_Origin_E | __pa(iommu->stab) | IOC_IOST_Origin_HW; |
410 | out_be64(iommu->xlate_regs + IOC_IOST_Origin, reg); |
411 | in_be64(iommu->xlate_regs + IOC_IOST_Origin); |
412 | |
413 | /* turn on IO translation */ |
414 | reg = in_be64(iommu->cmd_regs + IOC_IOCmd_Cfg) | IOC_IOCmd_Cfg_TE; |
415 | out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg); |
416 | } |
417 | |
418 | static void __init cell_iommu_setup_hardware(struct cbe_iommu *iommu, |
419 | unsigned long base, unsigned long size) |
420 | { |
421 | cell_iommu_setup_stab(iommu, dbase: base, dsize: size, fbase: 0, fsize: 0); |
422 | iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, gap_base: 0, gap_size: 0, |
423 | page_shift: IOMMU_PAGE_SHIFT_4K); |
424 | cell_iommu_enable_hardware(iommu); |
425 | } |
426 | |
427 | #if 0/* Unused for now */ |
428 | static struct iommu_window *find_window(struct cbe_iommu *iommu, |
429 | unsigned long offset, unsigned long size) |
430 | { |
431 | struct iommu_window *window; |
432 | |
433 | /* todo: check for overlapping (but not equal) windows) */ |
434 | |
435 | list_for_each_entry(window, &(iommu->windows), list) { |
436 | if (window->offset == offset && window->size == size) |
437 | return window; |
438 | } |
439 | |
440 | return NULL; |
441 | } |
442 | #endif |
443 | |
444 | static inline u32 cell_iommu_get_ioid(struct device_node *np) |
445 | { |
446 | const u32 *ioid; |
447 | |
448 | ioid = of_get_property(node: np, name: "ioid" , NULL); |
449 | if (ioid == NULL) { |
450 | printk(KERN_WARNING "iommu: missing ioid for %pOF using 0\n" , |
451 | np); |
452 | return 0; |
453 | } |
454 | |
455 | return *ioid; |
456 | } |
457 | |
458 | static struct iommu_table_ops cell_iommu_ops = { |
459 | .set = tce_build_cell, |
460 | .clear = tce_free_cell |
461 | }; |
462 | |
463 | static struct iommu_window * __init |
464 | cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, |
465 | unsigned long offset, unsigned long size, |
466 | unsigned long pte_offset) |
467 | { |
468 | struct iommu_window *window; |
469 | struct page *page; |
470 | u32 ioid; |
471 | |
472 | ioid = cell_iommu_get_ioid(np); |
473 | |
474 | window = kzalloc_node(size: sizeof(*window), GFP_KERNEL, node: iommu->nid); |
475 | BUG_ON(window == NULL); |
476 | |
477 | window->offset = offset; |
478 | window->size = size; |
479 | window->ioid = ioid; |
480 | window->iommu = iommu; |
481 | |
482 | window->table.it_blocksize = 16; |
483 | window->table.it_base = (unsigned long)iommu->ptab; |
484 | window->table.it_index = iommu->nid; |
485 | window->table.it_page_shift = IOMMU_PAGE_SHIFT_4K; |
486 | window->table.it_offset = |
487 | (offset >> window->table.it_page_shift) + pte_offset; |
488 | window->table.it_size = size >> window->table.it_page_shift; |
489 | window->table.it_ops = &cell_iommu_ops; |
490 | |
491 | if (!iommu_init_table(&window->table, iommu->nid, 0, 0)) |
492 | panic(fmt: "Failed to initialize iommu table" ); |
493 | |
494 | pr_debug("\tioid %d\n" , window->ioid); |
495 | pr_debug("\tblocksize %ld\n" , window->table.it_blocksize); |
496 | pr_debug("\tbase 0x%016lx\n" , window->table.it_base); |
497 | pr_debug("\toffset 0x%lx\n" , window->table.it_offset); |
498 | pr_debug("\tsize %ld\n" , window->table.it_size); |
499 | |
500 | list_add(new: &window->list, head: &iommu->windows); |
501 | |
502 | if (offset != 0) |
503 | return window; |
504 | |
505 | /* We need to map and reserve the first IOMMU page since it's used |
506 | * by the spider workaround. In theory, we only need to do that when |
507 | * running on spider but it doesn't really matter. |
508 | * |
509 | * This code also assumes that we have a window that starts at 0, |
510 | * which is the case on all spider based blades. |
511 | */ |
512 | page = alloc_pages_node(nid: iommu->nid, GFP_KERNEL, order: 0); |
513 | BUG_ON(!page); |
514 | iommu->pad_page = page_address(page); |
515 | clear_page(page: iommu->pad_page); |
516 | |
517 | __set_bit(0, window->table.it_map); |
518 | tce_build_cell(tbl: &window->table, index: window->table.it_offset, npages: 1, |
519 | uaddr: (unsigned long)iommu->pad_page, direction: DMA_TO_DEVICE, attrs: 0); |
520 | |
521 | return window; |
522 | } |
523 | |
524 | static struct cbe_iommu *cell_iommu_for_node(int nid) |
525 | { |
526 | int i; |
527 | |
528 | for (i = 0; i < cbe_nr_iommus; i++) |
529 | if (iommus[i].nid == nid) |
530 | return &iommus[i]; |
531 | return NULL; |
532 | } |
533 | |
534 | static unsigned long cell_dma_nommu_offset; |
535 | |
536 | static unsigned long dma_iommu_fixed_base; |
537 | static bool cell_iommu_enabled; |
538 | |
539 | /* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */ |
540 | bool iommu_fixed_is_weak; |
541 | |
542 | static struct iommu_table *cell_get_iommu_table(struct device *dev) |
543 | { |
544 | struct iommu_window *window; |
545 | struct cbe_iommu *iommu; |
546 | |
547 | /* Current implementation uses the first window available in that |
548 | * node's iommu. We -might- do something smarter later though it may |
549 | * never be necessary |
550 | */ |
551 | iommu = cell_iommu_for_node(nid: dev_to_node(dev)); |
552 | if (iommu == NULL || list_empty(head: &iommu->windows)) { |
553 | dev_err(dev, "iommu: missing iommu for %pOF (node %d)\n" , |
554 | dev->of_node, dev_to_node(dev)); |
555 | return NULL; |
556 | } |
557 | window = list_entry(iommu->windows.next, struct iommu_window, list); |
558 | |
559 | return &window->table; |
560 | } |
561 | |
562 | static u64 cell_iommu_get_fixed_address(struct device *dev); |
563 | |
564 | static void cell_dma_dev_setup(struct device *dev) |
565 | { |
566 | if (cell_iommu_enabled) { |
567 | u64 addr = cell_iommu_get_fixed_address(dev); |
568 | |
569 | if (addr != OF_BAD_ADDR) |
570 | dev->archdata.dma_offset = addr + dma_iommu_fixed_base; |
571 | set_iommu_table_base(dev, cell_get_iommu_table(dev)); |
572 | } else { |
573 | dev->archdata.dma_offset = cell_dma_nommu_offset; |
574 | } |
575 | } |
576 | |
577 | static void cell_pci_dma_dev_setup(struct pci_dev *dev) |
578 | { |
579 | cell_dma_dev_setup(dev: &dev->dev); |
580 | } |
581 | |
582 | static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action, |
583 | void *data) |
584 | { |
585 | struct device *dev = data; |
586 | |
587 | /* We are only interested in device addition */ |
588 | if (action != BUS_NOTIFY_ADD_DEVICE) |
589 | return 0; |
590 | |
591 | if (cell_iommu_enabled) |
592 | dev->dma_ops = &dma_iommu_ops; |
593 | cell_dma_dev_setup(dev); |
594 | return 0; |
595 | } |
596 | |
597 | static struct notifier_block cell_of_bus_notifier = { |
598 | .notifier_call = cell_of_bus_notify |
599 | }; |
600 | |
601 | static int __init cell_iommu_get_window(struct device_node *np, |
602 | unsigned long *base, |
603 | unsigned long *size) |
604 | { |
605 | const __be32 *dma_window; |
606 | unsigned long index; |
607 | |
608 | /* Use ibm,dma-window if available, else, hard code ! */ |
609 | dma_window = of_get_property(node: np, name: "ibm,dma-window" , NULL); |
610 | if (dma_window == NULL) { |
611 | *base = 0; |
612 | *size = 0x80000000u; |
613 | return -ENODEV; |
614 | } |
615 | |
616 | of_parse_dma_window(np, dma_window, &index, base, size); |
617 | return 0; |
618 | } |
619 | |
620 | static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np) |
621 | { |
622 | struct cbe_iommu *iommu; |
623 | int nid, i; |
624 | |
625 | /* Get node ID */ |
626 | nid = of_node_to_nid(np); |
627 | if (nid < 0) { |
628 | printk(KERN_ERR "iommu: failed to get node for %pOF\n" , |
629 | np); |
630 | return NULL; |
631 | } |
632 | pr_debug("iommu: setting up iommu for node %d (%pOF)\n" , |
633 | nid, np); |
634 | |
635 | /* XXX todo: If we can have multiple windows on the same IOMMU, which |
636 | * isn't the case today, we probably want here to check whether the |
637 | * iommu for that node is already setup. |
638 | * However, there might be issue with getting the size right so let's |
639 | * ignore that for now. We might want to completely get rid of the |
640 | * multiple window support since the cell iommu supports per-page ioids |
641 | */ |
642 | |
643 | if (cbe_nr_iommus >= NR_IOMMUS) { |
644 | printk(KERN_ERR "iommu: too many IOMMUs detected ! (%pOF)\n" , |
645 | np); |
646 | return NULL; |
647 | } |
648 | |
649 | /* Init base fields */ |
650 | i = cbe_nr_iommus++; |
651 | iommu = &iommus[i]; |
652 | iommu->stab = NULL; |
653 | iommu->nid = nid; |
654 | snprintf(buf: iommu->name, size: sizeof(iommu->name), fmt: "iommu%d" , i); |
655 | INIT_LIST_HEAD(list: &iommu->windows); |
656 | |
657 | return iommu; |
658 | } |
659 | |
660 | static void __init cell_iommu_init_one(struct device_node *np, |
661 | unsigned long offset) |
662 | { |
663 | struct cbe_iommu *iommu; |
664 | unsigned long base, size; |
665 | |
666 | iommu = cell_iommu_alloc(np); |
667 | if (!iommu) |
668 | return; |
669 | |
670 | /* Obtain a window for it */ |
671 | cell_iommu_get_window(np, base: &base, size: &size); |
672 | |
673 | pr_debug("\ttranslating window 0x%lx...0x%lx\n" , |
674 | base, base + size - 1); |
675 | |
676 | /* Initialize the hardware */ |
677 | cell_iommu_setup_hardware(iommu, base, size); |
678 | |
679 | /* Setup the iommu_table */ |
680 | cell_iommu_setup_window(iommu, np, offset: base, size, |
681 | pte_offset: offset >> IOMMU_PAGE_SHIFT_4K); |
682 | } |
683 | |
684 | static void __init cell_disable_iommus(void) |
685 | { |
686 | int node; |
687 | unsigned long base, val; |
688 | void __iomem *xregs, *cregs; |
689 | |
690 | /* Make sure IOC translation is disabled on all nodes */ |
691 | for_each_online_node(node) { |
692 | if (cell_iommu_find_ioc(nid: node, base: &base)) |
693 | continue; |
694 | xregs = ioremap(offset: base, IOC_Reg_Size); |
695 | if (xregs == NULL) |
696 | continue; |
697 | cregs = xregs + IOC_IOCmd_Offset; |
698 | |
699 | pr_debug("iommu: cleaning up iommu on node %d\n" , node); |
700 | |
701 | out_be64(xregs + IOC_IOST_Origin, 0); |
702 | (void)in_be64(xregs + IOC_IOST_Origin); |
703 | val = in_be64(cregs + IOC_IOCmd_Cfg); |
704 | val &= ~IOC_IOCmd_Cfg_TE; |
705 | out_be64(cregs + IOC_IOCmd_Cfg, val); |
706 | (void)in_be64(cregs + IOC_IOCmd_Cfg); |
707 | |
708 | iounmap(addr: xregs); |
709 | } |
710 | } |
711 | |
712 | static int __init cell_iommu_init_disabled(void) |
713 | { |
714 | struct device_node *np = NULL; |
715 | unsigned long base = 0, size; |
716 | |
717 | /* When no iommu is present, we use direct DMA ops */ |
718 | |
719 | /* First make sure all IOC translation is turned off */ |
720 | cell_disable_iommus(); |
721 | |
722 | /* If we have no Axon, we set up the spider DMA magic offset */ |
723 | np = of_find_node_by_name(NULL, name: "axon" ); |
724 | if (!np) |
725 | cell_dma_nommu_offset = SPIDER_DMA_OFFSET; |
726 | of_node_put(node: np); |
727 | |
728 | /* Now we need to check to see where the memory is mapped |
729 | * in PCI space. We assume that all busses use the same dma |
730 | * window which is always the case so far on Cell, thus we |
731 | * pick up the first pci-internal node we can find and check |
732 | * the DMA window from there. |
733 | */ |
734 | for_each_node_by_name(np, "axon" ) { |
735 | if (np->parent == NULL || np->parent->parent != NULL) |
736 | continue; |
737 | if (cell_iommu_get_window(np, base: &base, size: &size) == 0) |
738 | break; |
739 | } |
740 | if (np == NULL) { |
741 | for_each_node_by_name(np, "pci-internal" ) { |
742 | if (np->parent == NULL || np->parent->parent != NULL) |
743 | continue; |
744 | if (cell_iommu_get_window(np, base: &base, size: &size) == 0) |
745 | break; |
746 | } |
747 | } |
748 | of_node_put(node: np); |
749 | |
750 | /* If we found a DMA window, we check if it's big enough to enclose |
751 | * all of physical memory. If not, we force enable IOMMU |
752 | */ |
753 | if (np && size < memblock_end_of_DRAM()) { |
754 | printk(KERN_WARNING "iommu: force-enabled, dma window" |
755 | " (%ldMB) smaller than total memory (%lldMB)\n" , |
756 | size >> 20, memblock_end_of_DRAM() >> 20); |
757 | return -ENODEV; |
758 | } |
759 | |
760 | cell_dma_nommu_offset += base; |
761 | |
762 | if (cell_dma_nommu_offset != 0) |
763 | cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup; |
764 | |
765 | printk("iommu: disabled, direct DMA offset is 0x%lx\n" , |
766 | cell_dma_nommu_offset); |
767 | |
768 | return 0; |
769 | } |
770 | |
771 | /* |
772 | * Fixed IOMMU mapping support |
773 | * |
774 | * This code adds support for setting up a fixed IOMMU mapping on certain |
775 | * cell machines. For 64-bit devices this avoids the performance overhead of |
776 | * mapping and unmapping pages at runtime. 32-bit devices are unable to use |
777 | * the fixed mapping. |
778 | * |
779 | * The fixed mapping is established at boot, and maps all of physical memory |
780 | * 1:1 into device space at some offset. On machines with < 30 GB of memory |
781 | * we setup the fixed mapping immediately above the normal IOMMU window. |
782 | * |
783 | * For example a machine with 4GB of memory would end up with the normal |
784 | * IOMMU window from 0-2GB and the fixed mapping window from 2GB to 6GB. In |
785 | * this case a 64-bit device wishing to DMA to 1GB would be told to DMA to |
786 | * 3GB, plus any offset required by firmware. The firmware offset is encoded |
787 | * in the "dma-ranges" property. |
788 | * |
789 | * On machines with 30GB or more of memory, we are unable to place the fixed |
790 | * mapping above the normal IOMMU window as we would run out of address space. |
791 | * Instead we move the normal IOMMU window to coincide with the hash page |
792 | * table, this region does not need to be part of the fixed mapping as no |
793 | * device should ever be DMA'ing to it. We then setup the fixed mapping |
794 | * from 0 to 32GB. |
795 | */ |
796 | |
797 | static u64 cell_iommu_get_fixed_address(struct device *dev) |
798 | { |
799 | u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR; |
800 | struct device_node *np; |
801 | const u32 *ranges = NULL; |
802 | int i, len, best, naddr, nsize, pna, range_size; |
803 | |
804 | /* We can be called for platform devices that have no of_node */ |
805 | np = of_node_get(node: dev->of_node); |
806 | if (!np) |
807 | goto out; |
808 | |
809 | while (1) { |
810 | naddr = of_n_addr_cells(np); |
811 | nsize = of_n_size_cells(np); |
812 | np = of_get_next_parent(node: np); |
813 | if (!np) |
814 | break; |
815 | |
816 | ranges = of_get_property(node: np, name: "dma-ranges" , lenp: &len); |
817 | |
818 | /* Ignore empty ranges, they imply no translation required */ |
819 | if (ranges && len > 0) |
820 | break; |
821 | } |
822 | |
823 | if (!ranges) { |
824 | dev_dbg(dev, "iommu: no dma-ranges found\n" ); |
825 | goto out; |
826 | } |
827 | |
828 | len /= sizeof(u32); |
829 | |
830 | pna = of_n_addr_cells(np); |
831 | range_size = naddr + nsize + pna; |
832 | |
833 | /* dma-ranges format: |
834 | * child addr : naddr cells |
835 | * parent addr : pna cells |
836 | * size : nsize cells |
837 | */ |
838 | for (i = 0, best = -1, best_size = 0; i < len; i += range_size) { |
839 | cpu_addr = of_translate_dma_address(dev: np, in_addr: ranges + i + naddr); |
840 | size = of_read_number(cell: ranges + i + naddr + pna, size: nsize); |
841 | |
842 | if (cpu_addr == 0 && size > best_size) { |
843 | best = i; |
844 | best_size = size; |
845 | } |
846 | } |
847 | |
848 | if (best >= 0) { |
849 | dev_addr = of_read_number(cell: ranges + best, size: naddr); |
850 | } else |
851 | dev_dbg(dev, "iommu: no suitable range found!\n" ); |
852 | |
853 | out: |
854 | of_node_put(node: np); |
855 | |
856 | return dev_addr; |
857 | } |
858 | |
859 | static bool cell_pci_iommu_bypass_supported(struct pci_dev *pdev, u64 mask) |
860 | { |
861 | return mask == DMA_BIT_MASK(64) && |
862 | cell_iommu_get_fixed_address(dev: &pdev->dev) != OF_BAD_ADDR; |
863 | } |
864 | |
865 | static void __init insert_16M_pte(unsigned long addr, unsigned long *ptab, |
866 | unsigned long base_pte) |
867 | { |
868 | unsigned long segment, offset; |
869 | |
870 | segment = addr >> IO_SEGMENT_SHIFT; |
871 | offset = (addr >> 24) - (segment << IO_PAGENO_BITS(24)); |
872 | ptab = ptab + (segment * (1 << 12) / sizeof(unsigned long)); |
873 | |
874 | pr_debug("iommu: addr %lx ptab %p segment %lx offset %lx\n" , |
875 | addr, ptab, segment, offset); |
876 | |
877 | ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask); |
878 | } |
879 | |
880 | static void __init cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu, |
881 | struct device_node *np, unsigned long dbase, unsigned long dsize, |
882 | unsigned long fbase, unsigned long fsize) |
883 | { |
884 | unsigned long base_pte, uaddr, ioaddr, *ptab; |
885 | |
886 | ptab = cell_iommu_alloc_ptab(iommu, base: fbase, size: fsize, gap_base: dbase, gap_size: dsize, page_shift: 24); |
887 | |
888 | dma_iommu_fixed_base = fbase; |
889 | |
890 | pr_debug("iommu: mapping 0x%lx pages from 0x%lx\n" , fsize, fbase); |
891 | |
892 | base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M | |
893 | (cell_iommu_get_ioid(np) & CBE_IOPTE_IOID_Mask); |
894 | |
895 | if (iommu_fixed_is_weak) |
896 | pr_info("IOMMU: Using weak ordering for fixed mapping\n" ); |
897 | else { |
898 | pr_info("IOMMU: Using strong ordering for fixed mapping\n" ); |
899 | base_pte |= CBE_IOPTE_SO_RW; |
900 | } |
901 | |
902 | for (uaddr = 0; uaddr < fsize; uaddr += (1 << 24)) { |
903 | /* Don't touch the dynamic region */ |
904 | ioaddr = uaddr + fbase; |
905 | if (ioaddr >= dbase && ioaddr < (dbase + dsize)) { |
906 | pr_debug("iommu: fixed/dynamic overlap, skipping\n" ); |
907 | continue; |
908 | } |
909 | |
910 | insert_16M_pte(addr: uaddr, ptab, base_pte); |
911 | } |
912 | |
913 | mb(); |
914 | } |
915 | |
916 | static int __init cell_iommu_fixed_mapping_init(void) |
917 | { |
918 | unsigned long dbase, dsize, fbase, fsize, hbase, hend; |
919 | struct cbe_iommu *iommu; |
920 | struct device_node *np; |
921 | |
922 | /* The fixed mapping is only supported on axon machines */ |
923 | np = of_find_node_by_name(NULL, name: "axon" ); |
924 | of_node_put(node: np); |
925 | |
926 | if (!np) { |
927 | pr_debug("iommu: fixed mapping disabled, no axons found\n" ); |
928 | return -1; |
929 | } |
930 | |
931 | /* We must have dma-ranges properties for fixed mapping to work */ |
932 | np = of_find_node_with_property(NULL, prop_name: "dma-ranges" ); |
933 | of_node_put(node: np); |
934 | |
935 | if (!np) { |
936 | pr_debug("iommu: no dma-ranges found, no fixed mapping\n" ); |
937 | return -1; |
938 | } |
939 | |
940 | /* The default setup is to have the fixed mapping sit after the |
941 | * dynamic region, so find the top of the largest IOMMU window |
942 | * on any axon, then add the size of RAM and that's our max value. |
943 | * If that is > 32GB we have to do other shennanigans. |
944 | */ |
945 | fbase = 0; |
946 | for_each_node_by_name(np, "axon" ) { |
947 | cell_iommu_get_window(np, base: &dbase, size: &dsize); |
948 | fbase = max(fbase, dbase + dsize); |
949 | } |
950 | |
951 | fbase = ALIGN(fbase, 1 << IO_SEGMENT_SHIFT); |
952 | fsize = memblock_phys_mem_size(); |
953 | |
954 | if ((fbase + fsize) <= 0x800000000ul) |
955 | hbase = 0; /* use the device tree window */ |
956 | else { |
957 | /* If we're over 32 GB we need to cheat. We can't map all of |
958 | * RAM with the fixed mapping, and also fit the dynamic |
959 | * region. So try to place the dynamic region where the hash |
960 | * table sits, drivers never need to DMA to it, we don't |
961 | * need a fixed mapping for that area. |
962 | */ |
963 | if (!htab_address) { |
964 | pr_debug("iommu: htab is NULL, on LPAR? Huh?\n" ); |
965 | return -1; |
966 | } |
967 | hbase = __pa(htab_address); |
968 | hend = hbase + htab_size_bytes; |
969 | |
970 | /* The window must start and end on a segment boundary */ |
971 | if ((hbase != ALIGN(hbase, 1 << IO_SEGMENT_SHIFT)) || |
972 | (hend != ALIGN(hend, 1 << IO_SEGMENT_SHIFT))) { |
973 | pr_debug("iommu: hash window not segment aligned\n" ); |
974 | return -1; |
975 | } |
976 | |
977 | /* Check the hash window fits inside the real DMA window */ |
978 | for_each_node_by_name(np, "axon" ) { |
979 | cell_iommu_get_window(np, base: &dbase, size: &dsize); |
980 | |
981 | if (hbase < dbase || (hend > (dbase + dsize))) { |
982 | pr_debug("iommu: hash window doesn't fit in" |
983 | "real DMA window\n" ); |
984 | of_node_put(node: np); |
985 | return -1; |
986 | } |
987 | } |
988 | |
989 | fbase = 0; |
990 | } |
991 | |
992 | /* Setup the dynamic regions */ |
993 | for_each_node_by_name(np, "axon" ) { |
994 | iommu = cell_iommu_alloc(np); |
995 | BUG_ON(!iommu); |
996 | |
997 | if (hbase == 0) |
998 | cell_iommu_get_window(np, base: &dbase, size: &dsize); |
999 | else { |
1000 | dbase = hbase; |
1001 | dsize = htab_size_bytes; |
1002 | } |
1003 | |
1004 | printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx " |
1005 | "fixed window 0x%lx-0x%lx\n" , iommu->nid, dbase, |
1006 | dbase + dsize, fbase, fbase + fsize); |
1007 | |
1008 | cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize); |
1009 | iommu->ptab = cell_iommu_alloc_ptab(iommu, base: dbase, size: dsize, gap_base: 0, gap_size: 0, |
1010 | page_shift: IOMMU_PAGE_SHIFT_4K); |
1011 | cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize, |
1012 | fbase, fsize); |
1013 | cell_iommu_enable_hardware(iommu); |
1014 | cell_iommu_setup_window(iommu, np, offset: dbase, size: dsize, pte_offset: 0); |
1015 | } |
1016 | |
1017 | cell_pci_controller_ops.iommu_bypass_supported = |
1018 | cell_pci_iommu_bypass_supported; |
1019 | return 0; |
1020 | } |
1021 | |
1022 | static int iommu_fixed_disabled; |
1023 | |
1024 | static int __init setup_iommu_fixed(char *str) |
1025 | { |
1026 | struct device_node *pciep; |
1027 | |
1028 | if (strcmp(str, "off" ) == 0) |
1029 | iommu_fixed_disabled = 1; |
1030 | |
1031 | /* If we can find a pcie-endpoint in the device tree assume that |
1032 | * we're on a triblade or a CAB so by default the fixed mapping |
1033 | * should be set to be weakly ordered; but only if the boot |
1034 | * option WASN'T set for strong ordering |
1035 | */ |
1036 | pciep = of_find_node_by_type(NULL, type: "pcie-endpoint" ); |
1037 | |
1038 | if (strcmp(str, "weak" ) == 0 || (pciep && strcmp(str, "strong" ) != 0)) |
1039 | iommu_fixed_is_weak = true; |
1040 | |
1041 | of_node_put(node: pciep); |
1042 | |
1043 | return 1; |
1044 | } |
1045 | __setup("iommu_fixed=" , setup_iommu_fixed); |
1046 | |
1047 | static int __init cell_iommu_init(void) |
1048 | { |
1049 | struct device_node *np; |
1050 | |
1051 | /* If IOMMU is disabled or we have little enough RAM to not need |
1052 | * to enable it, we setup a direct mapping. |
1053 | * |
1054 | * Note: should we make sure we have the IOMMU actually disabled ? |
1055 | */ |
1056 | if (iommu_is_off || |
1057 | (!iommu_force_on && memblock_end_of_DRAM() <= 0x80000000ull)) |
1058 | if (cell_iommu_init_disabled() == 0) |
1059 | goto bail; |
1060 | |
1061 | /* Setup various callbacks */ |
1062 | cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup; |
1063 | |
1064 | if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0) |
1065 | goto done; |
1066 | |
1067 | /* Create an iommu for each /axon node. */ |
1068 | for_each_node_by_name(np, "axon" ) { |
1069 | if (np->parent == NULL || np->parent->parent != NULL) |
1070 | continue; |
1071 | cell_iommu_init_one(np, offset: 0); |
1072 | } |
1073 | |
1074 | /* Create an iommu for each toplevel /pci-internal node for |
1075 | * old hardware/firmware |
1076 | */ |
1077 | for_each_node_by_name(np, "pci-internal" ) { |
1078 | if (np->parent == NULL || np->parent->parent != NULL) |
1079 | continue; |
1080 | cell_iommu_init_one(np, SPIDER_DMA_OFFSET); |
1081 | } |
1082 | done: |
1083 | /* Setup default PCI iommu ops */ |
1084 | set_pci_dma_ops(&dma_iommu_ops); |
1085 | cell_iommu_enabled = true; |
1086 | bail: |
1087 | /* Register callbacks on OF platform device addition/removal |
1088 | * to handle linking them to the right DMA operations |
1089 | */ |
1090 | bus_register_notifier(bus: &platform_bus_type, nb: &cell_of_bus_notifier); |
1091 | |
1092 | return 0; |
1093 | } |
1094 | machine_arch_initcall(cell, cell_iommu_init); |
1095 | |