1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Apple DART page table allocator. |
4 | * |
5 | * Copyright (C) 2022 The Asahi Linux Contributors |
6 | * |
7 | * Based on io-pgtable-arm. |
8 | * |
9 | * Copyright (C) 2014 ARM Limited |
10 | * |
11 | * Author: Will Deacon <will.deacon@arm.com> |
12 | */ |
13 | |
14 | #define pr_fmt(fmt) "dart io-pgtable: " fmt |
15 | |
16 | #include <linux/atomic.h> |
17 | #include <linux/bitfield.h> |
18 | #include <linux/bitops.h> |
19 | #include <linux/io-pgtable.h> |
20 | #include <linux/kernel.h> |
21 | #include <linux/sizes.h> |
22 | #include <linux/slab.h> |
23 | #include <linux/types.h> |
24 | |
25 | #include <asm/barrier.h> |
26 | |
27 | #define DART1_MAX_ADDR_BITS 36 |
28 | |
29 | #define DART_MAX_TABLES 4 |
30 | #define DART_LEVELS 2 |
31 | |
32 | /* Struct accessors */ |
33 | #define io_pgtable_to_data(x) \ |
34 | container_of((x), struct dart_io_pgtable, iop) |
35 | |
36 | #define io_pgtable_ops_to_data(x) \ |
37 | io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) |
38 | |
39 | #define DART_GRANULE(d) \ |
40 | (sizeof(dart_iopte) << (d)->bits_per_level) |
41 | #define DART_PTES_PER_TABLE(d) \ |
42 | (DART_GRANULE(d) >> ilog2(sizeof(dart_iopte))) |
43 | |
44 | #define APPLE_DART_PTE_SUBPAGE_START GENMASK_ULL(63, 52) |
45 | #define APPLE_DART_PTE_SUBPAGE_END GENMASK_ULL(51, 40) |
46 | |
47 | #define APPLE_DART1_PADDR_MASK GENMASK_ULL(35, 12) |
48 | #define APPLE_DART2_PADDR_MASK GENMASK_ULL(37, 10) |
49 | #define APPLE_DART2_PADDR_SHIFT (4) |
50 | |
51 | /* Apple DART1 protection bits */ |
52 | #define APPLE_DART1_PTE_PROT_NO_READ BIT(8) |
53 | #define APPLE_DART1_PTE_PROT_NO_WRITE BIT(7) |
54 | #define APPLE_DART1_PTE_PROT_SP_DIS BIT(1) |
55 | |
56 | /* Apple DART2 protection bits */ |
57 | #define APPLE_DART2_PTE_PROT_NO_READ BIT(3) |
58 | #define APPLE_DART2_PTE_PROT_NO_WRITE BIT(2) |
59 | #define APPLE_DART2_PTE_PROT_NO_CACHE BIT(1) |
60 | |
61 | /* marks PTE as valid */ |
62 | #define APPLE_DART_PTE_VALID BIT(0) |
63 | |
64 | /* IOPTE accessors */ |
65 | #define iopte_deref(pte, d) __va(iopte_to_paddr(pte, d)) |
66 | |
67 | struct dart_io_pgtable { |
68 | struct io_pgtable iop; |
69 | |
70 | int tbl_bits; |
71 | int bits_per_level; |
72 | |
73 | void *pgd[DART_MAX_TABLES]; |
74 | }; |
75 | |
76 | typedef u64 dart_iopte; |
77 | |
78 | |
79 | static dart_iopte paddr_to_iopte(phys_addr_t paddr, |
80 | struct dart_io_pgtable *data) |
81 | { |
82 | dart_iopte pte; |
83 | |
84 | if (data->iop.fmt == APPLE_DART) |
85 | return paddr & APPLE_DART1_PADDR_MASK; |
86 | |
87 | /* format is APPLE_DART2 */ |
88 | pte = paddr >> APPLE_DART2_PADDR_SHIFT; |
89 | pte &= APPLE_DART2_PADDR_MASK; |
90 | |
91 | return pte; |
92 | } |
93 | |
94 | static phys_addr_t iopte_to_paddr(dart_iopte pte, |
95 | struct dart_io_pgtable *data) |
96 | { |
97 | u64 paddr; |
98 | |
99 | if (data->iop.fmt == APPLE_DART) |
100 | return pte & APPLE_DART1_PADDR_MASK; |
101 | |
102 | /* format is APPLE_DART2 */ |
103 | paddr = pte & APPLE_DART2_PADDR_MASK; |
104 | paddr <<= APPLE_DART2_PADDR_SHIFT; |
105 | |
106 | return paddr; |
107 | } |
108 | |
109 | static void *__dart_alloc_pages(size_t size, gfp_t gfp, |
110 | struct io_pgtable_cfg *cfg) |
111 | { |
112 | int order = get_order(size); |
113 | struct page *p; |
114 | |
115 | VM_BUG_ON((gfp & __GFP_HIGHMEM)); |
116 | p = alloc_pages(gfp: gfp | __GFP_ZERO, order); |
117 | if (!p) |
118 | return NULL; |
119 | |
120 | return page_address(p); |
121 | } |
122 | |
123 | static int dart_init_pte(struct dart_io_pgtable *data, |
124 | unsigned long iova, phys_addr_t paddr, |
125 | dart_iopte prot, int num_entries, |
126 | dart_iopte *ptep) |
127 | { |
128 | int i; |
129 | dart_iopte pte = prot; |
130 | size_t sz = data->iop.cfg.pgsize_bitmap; |
131 | |
132 | for (i = 0; i < num_entries; i++) |
133 | if (ptep[i] & APPLE_DART_PTE_VALID) { |
134 | /* We require an unmap first */ |
135 | WARN_ON(ptep[i] & APPLE_DART_PTE_VALID); |
136 | return -EEXIST; |
137 | } |
138 | |
139 | /* subpage protection: always allow access to the entire page */ |
140 | pte |= FIELD_PREP(APPLE_DART_PTE_SUBPAGE_START, 0); |
141 | pte |= FIELD_PREP(APPLE_DART_PTE_SUBPAGE_END, 0xfff); |
142 | |
143 | pte |= APPLE_DART1_PTE_PROT_SP_DIS; |
144 | pte |= APPLE_DART_PTE_VALID; |
145 | |
146 | for (i = 0; i < num_entries; i++) |
147 | ptep[i] = pte | paddr_to_iopte(paddr: paddr + i * sz, data); |
148 | |
149 | return 0; |
150 | } |
151 | |
152 | static dart_iopte dart_install_table(dart_iopte *table, |
153 | dart_iopte *ptep, |
154 | dart_iopte curr, |
155 | struct dart_io_pgtable *data) |
156 | { |
157 | dart_iopte old, new; |
158 | |
159 | new = paddr_to_iopte(__pa(table), data) | APPLE_DART_PTE_VALID; |
160 | |
161 | /* |
162 | * Ensure the table itself is visible before its PTE can be. |
163 | * Whilst we could get away with cmpxchg64_release below, this |
164 | * doesn't have any ordering semantics when !CONFIG_SMP. |
165 | */ |
166 | dma_wmb(); |
167 | |
168 | old = cmpxchg64_relaxed(ptep, curr, new); |
169 | |
170 | return old; |
171 | } |
172 | |
173 | static int dart_get_table(struct dart_io_pgtable *data, unsigned long iova) |
174 | { |
175 | return (iova >> (3 * data->bits_per_level + ilog2(sizeof(dart_iopte)))) & |
176 | ((1 << data->tbl_bits) - 1); |
177 | } |
178 | |
179 | static int dart_get_l1_index(struct dart_io_pgtable *data, unsigned long iova) |
180 | { |
181 | |
182 | return (iova >> (2 * data->bits_per_level + ilog2(sizeof(dart_iopte)))) & |
183 | ((1 << data->bits_per_level) - 1); |
184 | } |
185 | |
186 | static int dart_get_l2_index(struct dart_io_pgtable *data, unsigned long iova) |
187 | { |
188 | |
189 | return (iova >> (data->bits_per_level + ilog2(sizeof(dart_iopte)))) & |
190 | ((1 << data->bits_per_level) - 1); |
191 | } |
192 | |
193 | static dart_iopte *dart_get_l2(struct dart_io_pgtable *data, unsigned long iova) |
194 | { |
195 | dart_iopte pte, *ptep; |
196 | int tbl = dart_get_table(data, iova); |
197 | |
198 | ptep = data->pgd[tbl]; |
199 | if (!ptep) |
200 | return NULL; |
201 | |
202 | ptep += dart_get_l1_index(data, iova); |
203 | pte = READ_ONCE(*ptep); |
204 | |
205 | /* Valid entry? */ |
206 | if (!pte) |
207 | return NULL; |
208 | |
209 | /* Deref to get level 2 table */ |
210 | return iopte_deref(pte, data); |
211 | } |
212 | |
213 | static dart_iopte dart_prot_to_pte(struct dart_io_pgtable *data, |
214 | int prot) |
215 | { |
216 | dart_iopte pte = 0; |
217 | |
218 | if (data->iop.fmt == APPLE_DART) { |
219 | if (!(prot & IOMMU_WRITE)) |
220 | pte |= APPLE_DART1_PTE_PROT_NO_WRITE; |
221 | if (!(prot & IOMMU_READ)) |
222 | pte |= APPLE_DART1_PTE_PROT_NO_READ; |
223 | } |
224 | if (data->iop.fmt == APPLE_DART2) { |
225 | if (!(prot & IOMMU_WRITE)) |
226 | pte |= APPLE_DART2_PTE_PROT_NO_WRITE; |
227 | if (!(prot & IOMMU_READ)) |
228 | pte |= APPLE_DART2_PTE_PROT_NO_READ; |
229 | if (!(prot & IOMMU_CACHE)) |
230 | pte |= APPLE_DART2_PTE_PROT_NO_CACHE; |
231 | } |
232 | |
233 | return pte; |
234 | } |
235 | |
236 | static int dart_map_pages(struct io_pgtable_ops *ops, unsigned long iova, |
237 | phys_addr_t paddr, size_t pgsize, size_t pgcount, |
238 | int iommu_prot, gfp_t gfp, size_t *mapped) |
239 | { |
240 | struct dart_io_pgtable *data = io_pgtable_ops_to_data(ops); |
241 | struct io_pgtable_cfg *cfg = &data->iop.cfg; |
242 | size_t tblsz = DART_GRANULE(data); |
243 | int ret = 0, tbl, num_entries, max_entries, map_idx_start; |
244 | dart_iopte pte, *cptep, *ptep; |
245 | dart_iopte prot; |
246 | |
247 | if (WARN_ON(pgsize != cfg->pgsize_bitmap)) |
248 | return -EINVAL; |
249 | |
250 | if (WARN_ON(paddr >> cfg->oas)) |
251 | return -ERANGE; |
252 | |
253 | /* If no access, then nothing to do */ |
254 | if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) |
255 | return 0; |
256 | |
257 | tbl = dart_get_table(data, iova); |
258 | |
259 | ptep = data->pgd[tbl]; |
260 | ptep += dart_get_l1_index(data, iova); |
261 | pte = READ_ONCE(*ptep); |
262 | |
263 | /* no L2 table present */ |
264 | if (!pte) { |
265 | cptep = __dart_alloc_pages(size: tblsz, gfp, cfg); |
266 | if (!cptep) |
267 | return -ENOMEM; |
268 | |
269 | pte = dart_install_table(table: cptep, ptep, curr: 0, data); |
270 | if (pte) |
271 | free_pages(addr: (unsigned long)cptep, order: get_order(size: tblsz)); |
272 | |
273 | /* L2 table is present (now) */ |
274 | pte = READ_ONCE(*ptep); |
275 | } |
276 | |
277 | ptep = iopte_deref(pte, data); |
278 | |
279 | /* install a leaf entries into L2 table */ |
280 | prot = dart_prot_to_pte(data, prot: iommu_prot); |
281 | map_idx_start = dart_get_l2_index(data, iova); |
282 | max_entries = DART_PTES_PER_TABLE(data) - map_idx_start; |
283 | num_entries = min_t(int, pgcount, max_entries); |
284 | ptep += map_idx_start; |
285 | ret = dart_init_pte(data, iova, paddr, prot, num_entries, ptep); |
286 | if (!ret && mapped) |
287 | *mapped += num_entries * pgsize; |
288 | |
289 | /* |
290 | * Synchronise all PTE updates for the new mapping before there's |
291 | * a chance for anything to kick off a table walk for the new iova. |
292 | */ |
293 | wmb(); |
294 | |
295 | return ret; |
296 | } |
297 | |
298 | static size_t dart_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova, |
299 | size_t pgsize, size_t pgcount, |
300 | struct iommu_iotlb_gather *gather) |
301 | { |
302 | struct dart_io_pgtable *data = io_pgtable_ops_to_data(ops); |
303 | struct io_pgtable_cfg *cfg = &data->iop.cfg; |
304 | int i = 0, num_entries, max_entries, unmap_idx_start; |
305 | dart_iopte pte, *ptep; |
306 | |
307 | if (WARN_ON(pgsize != cfg->pgsize_bitmap || !pgcount)) |
308 | return 0; |
309 | |
310 | ptep = dart_get_l2(data, iova); |
311 | |
312 | /* Valid L2 IOPTE pointer? */ |
313 | if (WARN_ON(!ptep)) |
314 | return 0; |
315 | |
316 | unmap_idx_start = dart_get_l2_index(data, iova); |
317 | ptep += unmap_idx_start; |
318 | |
319 | max_entries = DART_PTES_PER_TABLE(data) - unmap_idx_start; |
320 | num_entries = min_t(int, pgcount, max_entries); |
321 | |
322 | while (i < num_entries) { |
323 | pte = READ_ONCE(*ptep); |
324 | if (WARN_ON(!pte)) |
325 | break; |
326 | |
327 | /* clear pte */ |
328 | *ptep = 0; |
329 | |
330 | if (!iommu_iotlb_gather_queued(gather)) |
331 | io_pgtable_tlb_add_page(iop: &data->iop, gather, |
332 | iova: iova + i * pgsize, granule: pgsize); |
333 | |
334 | ptep++; |
335 | i++; |
336 | } |
337 | |
338 | return i * pgsize; |
339 | } |
340 | |
341 | static phys_addr_t dart_iova_to_phys(struct io_pgtable_ops *ops, |
342 | unsigned long iova) |
343 | { |
344 | struct dart_io_pgtable *data = io_pgtable_ops_to_data(ops); |
345 | dart_iopte pte, *ptep; |
346 | |
347 | ptep = dart_get_l2(data, iova); |
348 | |
349 | /* Valid L2 IOPTE pointer? */ |
350 | if (!ptep) |
351 | return 0; |
352 | |
353 | ptep += dart_get_l2_index(data, iova); |
354 | |
355 | pte = READ_ONCE(*ptep); |
356 | /* Found translation */ |
357 | if (pte) { |
358 | iova &= (data->iop.cfg.pgsize_bitmap - 1); |
359 | return iopte_to_paddr(pte, data) | iova; |
360 | } |
361 | |
362 | /* Ran out of page tables to walk */ |
363 | return 0; |
364 | } |
365 | |
366 | static struct dart_io_pgtable * |
367 | dart_alloc_pgtable(struct io_pgtable_cfg *cfg) |
368 | { |
369 | struct dart_io_pgtable *data; |
370 | int tbl_bits, bits_per_level, va_bits, pg_shift; |
371 | |
372 | pg_shift = __ffs(cfg->pgsize_bitmap); |
373 | bits_per_level = pg_shift - ilog2(sizeof(dart_iopte)); |
374 | |
375 | va_bits = cfg->ias - pg_shift; |
376 | |
377 | tbl_bits = max_t(int, 0, va_bits - (bits_per_level * DART_LEVELS)); |
378 | if ((1 << tbl_bits) > DART_MAX_TABLES) |
379 | return NULL; |
380 | |
381 | data = kzalloc(size: sizeof(*data), GFP_KERNEL); |
382 | if (!data) |
383 | return NULL; |
384 | |
385 | data->tbl_bits = tbl_bits; |
386 | data->bits_per_level = bits_per_level; |
387 | |
388 | data->iop.ops = (struct io_pgtable_ops) { |
389 | .map_pages = dart_map_pages, |
390 | .unmap_pages = dart_unmap_pages, |
391 | .iova_to_phys = dart_iova_to_phys, |
392 | }; |
393 | |
394 | return data; |
395 | } |
396 | |
397 | static struct io_pgtable * |
398 | apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) |
399 | { |
400 | struct dart_io_pgtable *data; |
401 | int i; |
402 | |
403 | if (!cfg->coherent_walk) |
404 | return NULL; |
405 | |
406 | if (cfg->oas != 36 && cfg->oas != 42) |
407 | return NULL; |
408 | |
409 | if (cfg->ias > cfg->oas) |
410 | return NULL; |
411 | |
412 | if (!(cfg->pgsize_bitmap == SZ_4K || cfg->pgsize_bitmap == SZ_16K)) |
413 | return NULL; |
414 | |
415 | data = dart_alloc_pgtable(cfg); |
416 | if (!data) |
417 | return NULL; |
418 | |
419 | cfg->apple_dart_cfg.n_ttbrs = 1 << data->tbl_bits; |
420 | |
421 | for (i = 0; i < cfg->apple_dart_cfg.n_ttbrs; ++i) { |
422 | data->pgd[i] = __dart_alloc_pages(DART_GRANULE(data), GFP_KERNEL, |
423 | cfg); |
424 | if (!data->pgd[i]) |
425 | goto out_free_data; |
426 | cfg->apple_dart_cfg.ttbr[i] = virt_to_phys(address: data->pgd[i]); |
427 | } |
428 | |
429 | return &data->iop; |
430 | |
431 | out_free_data: |
432 | while (--i >= 0) |
433 | free_pages(addr: (unsigned long)data->pgd[i], |
434 | order: get_order(DART_GRANULE(data))); |
435 | kfree(objp: data); |
436 | return NULL; |
437 | } |
438 | |
439 | static void apple_dart_free_pgtable(struct io_pgtable *iop) |
440 | { |
441 | struct dart_io_pgtable *data = io_pgtable_to_data(iop); |
442 | dart_iopte *ptep, *end; |
443 | int i; |
444 | |
445 | for (i = 0; i < (1 << data->tbl_bits) && data->pgd[i]; ++i) { |
446 | ptep = data->pgd[i]; |
447 | end = (void *)ptep + DART_GRANULE(data); |
448 | |
449 | while (ptep != end) { |
450 | dart_iopte pte = *ptep++; |
451 | |
452 | if (pte) { |
453 | unsigned long page = |
454 | (unsigned long)iopte_deref(pte, data); |
455 | |
456 | free_pages(addr: page, order: get_order(DART_GRANULE(data))); |
457 | } |
458 | } |
459 | free_pages(addr: (unsigned long)data->pgd[i], |
460 | order: get_order(DART_GRANULE(data))); |
461 | } |
462 | |
463 | kfree(objp: data); |
464 | } |
465 | |
466 | struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns = { |
467 | .alloc = apple_dart_alloc_pgtable, |
468 | .free = apple_dart_free_pgtable, |
469 | }; |
470 | |