1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright © 2006-2009, Intel Corporation. |
4 | * |
5 | * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> |
6 | */ |
7 | |
8 | #include <linux/iova.h> |
9 | #include <linux/module.h> |
10 | #include <linux/slab.h> |
11 | #include <linux/smp.h> |
12 | #include <linux/bitops.h> |
13 | #include <linux/cpu.h> |
14 | #include <linux/workqueue.h> |
15 | |
16 | /* The anchor node sits above the top of the usable address space */ |
17 | #define IOVA_ANCHOR ~0UL |
18 | |
19 | #define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ |
20 | |
21 | static bool iova_rcache_insert(struct iova_domain *iovad, |
22 | unsigned long pfn, |
23 | unsigned long size); |
24 | static unsigned long iova_rcache_get(struct iova_domain *iovad, |
25 | unsigned long size, |
26 | unsigned long limit_pfn); |
27 | static void free_iova_rcaches(struct iova_domain *iovad); |
28 | static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); |
29 | static void free_global_cached_iovas(struct iova_domain *iovad); |
30 | |
31 | static struct iova *to_iova(struct rb_node *node) |
32 | { |
33 | return rb_entry(node, struct iova, node); |
34 | } |
35 | |
36 | void |
37 | init_iova_domain(struct iova_domain *iovad, unsigned long granule, |
38 | unsigned long start_pfn) |
39 | { |
40 | /* |
41 | * IOVA granularity will normally be equal to the smallest |
42 | * supported IOMMU page size; both *must* be capable of |
43 | * representing individual CPU pages exactly. |
44 | */ |
45 | BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule)); |
46 | |
47 | spin_lock_init(&iovad->iova_rbtree_lock); |
48 | iovad->rbroot = RB_ROOT; |
49 | iovad->cached_node = &iovad->anchor.node; |
50 | iovad->cached32_node = &iovad->anchor.node; |
51 | iovad->granule = granule; |
52 | iovad->start_pfn = start_pfn; |
53 | iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); |
54 | iovad->max32_alloc_size = iovad->dma_32bit_pfn; |
55 | iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; |
56 | rb_link_node(node: &iovad->anchor.node, NULL, rb_link: &iovad->rbroot.rb_node); |
57 | rb_insert_color(&iovad->anchor.node, &iovad->rbroot); |
58 | } |
59 | EXPORT_SYMBOL_GPL(init_iova_domain); |
60 | |
61 | static struct rb_node * |
62 | __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) |
63 | { |
64 | if (limit_pfn <= iovad->dma_32bit_pfn) |
65 | return iovad->cached32_node; |
66 | |
67 | return iovad->cached_node; |
68 | } |
69 | |
70 | static void |
71 | __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) |
72 | { |
73 | if (new->pfn_hi < iovad->dma_32bit_pfn) |
74 | iovad->cached32_node = &new->node; |
75 | else |
76 | iovad->cached_node = &new->node; |
77 | } |
78 | |
79 | static void |
80 | __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) |
81 | { |
82 | struct iova *cached_iova; |
83 | |
84 | cached_iova = to_iova(node: iovad->cached32_node); |
85 | if (free == cached_iova || |
86 | (free->pfn_hi < iovad->dma_32bit_pfn && |
87 | free->pfn_lo >= cached_iova->pfn_lo)) |
88 | iovad->cached32_node = rb_next(&free->node); |
89 | |
90 | if (free->pfn_lo < iovad->dma_32bit_pfn) |
91 | iovad->max32_alloc_size = iovad->dma_32bit_pfn; |
92 | |
93 | cached_iova = to_iova(node: iovad->cached_node); |
94 | if (free->pfn_lo >= cached_iova->pfn_lo) |
95 | iovad->cached_node = rb_next(&free->node); |
96 | } |
97 | |
98 | static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn) |
99 | { |
100 | struct rb_node *node, *next; |
101 | /* |
102 | * Ideally what we'd like to judge here is whether limit_pfn is close |
103 | * enough to the highest-allocated IOVA that starting the allocation |
104 | * walk from the anchor node will be quicker than this initial work to |
105 | * find an exact starting point (especially if that ends up being the |
106 | * anchor node anyway). This is an incredibly crude approximation which |
107 | * only really helps the most likely case, but is at least trivially easy. |
108 | */ |
109 | if (limit_pfn > iovad->dma_32bit_pfn) |
110 | return &iovad->anchor.node; |
111 | |
112 | node = iovad->rbroot.rb_node; |
113 | while (to_iova(node)->pfn_hi < limit_pfn) |
114 | node = node->rb_right; |
115 | |
116 | search_left: |
117 | while (node->rb_left && to_iova(node: node->rb_left)->pfn_lo >= limit_pfn) |
118 | node = node->rb_left; |
119 | |
120 | if (!node->rb_left) |
121 | return node; |
122 | |
123 | next = node->rb_left; |
124 | while (next->rb_right) { |
125 | next = next->rb_right; |
126 | if (to_iova(node: next)->pfn_lo >= limit_pfn) { |
127 | node = next; |
128 | goto search_left; |
129 | } |
130 | } |
131 | |
132 | return node; |
133 | } |
134 | |
135 | /* Insert the iova into domain rbtree by holding writer lock */ |
136 | static void |
137 | iova_insert_rbtree(struct rb_root *root, struct iova *iova, |
138 | struct rb_node *start) |
139 | { |
140 | struct rb_node **new, *parent = NULL; |
141 | |
142 | new = (start) ? &start : &(root->rb_node); |
143 | /* Figure out where to put new node */ |
144 | while (*new) { |
145 | struct iova *this = to_iova(node: *new); |
146 | |
147 | parent = *new; |
148 | |
149 | if (iova->pfn_lo < this->pfn_lo) |
150 | new = &((*new)->rb_left); |
151 | else if (iova->pfn_lo > this->pfn_lo) |
152 | new = &((*new)->rb_right); |
153 | else { |
154 | WARN_ON(1); /* this should not happen */ |
155 | return; |
156 | } |
157 | } |
158 | /* Add new node and rebalance tree. */ |
159 | rb_link_node(node: &iova->node, parent, rb_link: new); |
160 | rb_insert_color(&iova->node, root); |
161 | } |
162 | |
163 | static int __alloc_and_insert_iova_range(struct iova_domain *iovad, |
164 | unsigned long size, unsigned long limit_pfn, |
165 | struct iova *new, bool size_aligned) |
166 | { |
167 | struct rb_node *curr, *prev; |
168 | struct iova *curr_iova; |
169 | unsigned long flags; |
170 | unsigned long new_pfn, retry_pfn; |
171 | unsigned long align_mask = ~0UL; |
172 | unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn; |
173 | |
174 | if (size_aligned) |
175 | align_mask <<= fls_long(l: size - 1); |
176 | |
177 | /* Walk the tree backwards */ |
178 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
179 | if (limit_pfn <= iovad->dma_32bit_pfn && |
180 | size >= iovad->max32_alloc_size) |
181 | goto iova32_full; |
182 | |
183 | curr = __get_cached_rbnode(iovad, limit_pfn); |
184 | curr_iova = to_iova(node: curr); |
185 | retry_pfn = curr_iova->pfn_hi; |
186 | |
187 | retry: |
188 | do { |
189 | high_pfn = min(high_pfn, curr_iova->pfn_lo); |
190 | new_pfn = (high_pfn - size) & align_mask; |
191 | prev = curr; |
192 | curr = rb_prev(curr); |
193 | curr_iova = to_iova(node: curr); |
194 | } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn); |
195 | |
196 | if (high_pfn < size || new_pfn < low_pfn) { |
197 | if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) { |
198 | high_pfn = limit_pfn; |
199 | low_pfn = retry_pfn + 1; |
200 | curr = iova_find_limit(iovad, limit_pfn); |
201 | curr_iova = to_iova(node: curr); |
202 | goto retry; |
203 | } |
204 | iovad->max32_alloc_size = size; |
205 | goto iova32_full; |
206 | } |
207 | |
208 | /* pfn_lo will point to size aligned address if size_aligned is set */ |
209 | new->pfn_lo = new_pfn; |
210 | new->pfn_hi = new->pfn_lo + size - 1; |
211 | |
212 | /* If we have 'prev', it's a valid place to start the insertion. */ |
213 | iova_insert_rbtree(root: &iovad->rbroot, iova: new, start: prev); |
214 | __cached_rbnode_insert_update(iovad, new); |
215 | |
216 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
217 | return 0; |
218 | |
219 | iova32_full: |
220 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
221 | return -ENOMEM; |
222 | } |
223 | |
224 | static struct kmem_cache *iova_cache; |
225 | static unsigned int iova_cache_users; |
226 | static DEFINE_MUTEX(iova_cache_mutex); |
227 | |
228 | static struct iova *alloc_iova_mem(void) |
229 | { |
230 | return kmem_cache_zalloc(k: iova_cache, GFP_ATOMIC | __GFP_NOWARN); |
231 | } |
232 | |
233 | static void free_iova_mem(struct iova *iova) |
234 | { |
235 | if (iova->pfn_lo != IOVA_ANCHOR) |
236 | kmem_cache_free(s: iova_cache, objp: iova); |
237 | } |
238 | |
239 | /** |
240 | * alloc_iova - allocates an iova |
241 | * @iovad: - iova domain in question |
242 | * @size: - size of page frames to allocate |
243 | * @limit_pfn: - max limit address |
244 | * @size_aligned: - set if size_aligned address range is required |
245 | * This function allocates an iova in the range iovad->start_pfn to limit_pfn, |
246 | * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned |
247 | * flag is set then the allocated address iova->pfn_lo will be naturally |
248 | * aligned on roundup_power_of_two(size). |
249 | */ |
250 | struct iova * |
251 | alloc_iova(struct iova_domain *iovad, unsigned long size, |
252 | unsigned long limit_pfn, |
253 | bool size_aligned) |
254 | { |
255 | struct iova *new_iova; |
256 | int ret; |
257 | |
258 | new_iova = alloc_iova_mem(); |
259 | if (!new_iova) |
260 | return NULL; |
261 | |
262 | ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn: limit_pfn + 1, |
263 | new: new_iova, size_aligned); |
264 | |
265 | if (ret) { |
266 | free_iova_mem(iova: new_iova); |
267 | return NULL; |
268 | } |
269 | |
270 | return new_iova; |
271 | } |
272 | EXPORT_SYMBOL_GPL(alloc_iova); |
273 | |
274 | static struct iova * |
275 | private_find_iova(struct iova_domain *iovad, unsigned long pfn) |
276 | { |
277 | struct rb_node *node = iovad->rbroot.rb_node; |
278 | |
279 | assert_spin_locked(&iovad->iova_rbtree_lock); |
280 | |
281 | while (node) { |
282 | struct iova *iova = to_iova(node); |
283 | |
284 | if (pfn < iova->pfn_lo) |
285 | node = node->rb_left; |
286 | else if (pfn > iova->pfn_hi) |
287 | node = node->rb_right; |
288 | else |
289 | return iova; /* pfn falls within iova's range */ |
290 | } |
291 | |
292 | return NULL; |
293 | } |
294 | |
295 | static void remove_iova(struct iova_domain *iovad, struct iova *iova) |
296 | { |
297 | assert_spin_locked(&iovad->iova_rbtree_lock); |
298 | __cached_rbnode_delete_update(iovad, free: iova); |
299 | rb_erase(&iova->node, &iovad->rbroot); |
300 | } |
301 | |
302 | /** |
303 | * find_iova - finds an iova for a given pfn |
304 | * @iovad: - iova domain in question. |
305 | * @pfn: - page frame number |
306 | * This function finds and returns an iova belonging to the |
307 | * given domain which matches the given pfn. |
308 | */ |
309 | struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) |
310 | { |
311 | unsigned long flags; |
312 | struct iova *iova; |
313 | |
314 | /* Take the lock so that no other thread is manipulating the rbtree */ |
315 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
316 | iova = private_find_iova(iovad, pfn); |
317 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
318 | return iova; |
319 | } |
320 | EXPORT_SYMBOL_GPL(find_iova); |
321 | |
322 | /** |
323 | * __free_iova - frees the given iova |
324 | * @iovad: iova domain in question. |
325 | * @iova: iova in question. |
326 | * Frees the given iova belonging to the giving domain |
327 | */ |
328 | void |
329 | __free_iova(struct iova_domain *iovad, struct iova *iova) |
330 | { |
331 | unsigned long flags; |
332 | |
333 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
334 | remove_iova(iovad, iova); |
335 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
336 | free_iova_mem(iova); |
337 | } |
338 | EXPORT_SYMBOL_GPL(__free_iova); |
339 | |
340 | /** |
341 | * free_iova - finds and frees the iova for a given pfn |
342 | * @iovad: - iova domain in question. |
343 | * @pfn: - pfn that is allocated previously |
344 | * This functions finds an iova for a given pfn and then |
345 | * frees the iova from that domain. |
346 | */ |
347 | void |
348 | free_iova(struct iova_domain *iovad, unsigned long pfn) |
349 | { |
350 | unsigned long flags; |
351 | struct iova *iova; |
352 | |
353 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
354 | iova = private_find_iova(iovad, pfn); |
355 | if (!iova) { |
356 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
357 | return; |
358 | } |
359 | remove_iova(iovad, iova); |
360 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
361 | free_iova_mem(iova); |
362 | } |
363 | EXPORT_SYMBOL_GPL(free_iova); |
364 | |
365 | /** |
366 | * alloc_iova_fast - allocates an iova from rcache |
367 | * @iovad: - iova domain in question |
368 | * @size: - size of page frames to allocate |
369 | * @limit_pfn: - max limit address |
370 | * @flush_rcache: - set to flush rcache on regular allocation failure |
371 | * This function tries to satisfy an iova allocation from the rcache, |
372 | * and falls back to regular allocation on failure. If regular allocation |
373 | * fails too and the flush_rcache flag is set then the rcache will be flushed. |
374 | */ |
375 | unsigned long |
376 | alloc_iova_fast(struct iova_domain *iovad, unsigned long size, |
377 | unsigned long limit_pfn, bool flush_rcache) |
378 | { |
379 | unsigned long iova_pfn; |
380 | struct iova *new_iova; |
381 | |
382 | /* |
383 | * Freeing non-power-of-two-sized allocations back into the IOVA caches |
384 | * will come back to bite us badly, so we have to waste a bit of space |
385 | * rounding up anything cacheable to make sure that can't happen. The |
386 | * order of the unadjusted size will still match upon freeing. |
387 | */ |
388 | if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) |
389 | size = roundup_pow_of_two(size); |
390 | |
391 | iova_pfn = iova_rcache_get(iovad, size, limit_pfn: limit_pfn + 1); |
392 | if (iova_pfn) |
393 | return iova_pfn; |
394 | |
395 | retry: |
396 | new_iova = alloc_iova(iovad, size, limit_pfn, true); |
397 | if (!new_iova) { |
398 | unsigned int cpu; |
399 | |
400 | if (!flush_rcache) |
401 | return 0; |
402 | |
403 | /* Try replenishing IOVAs by flushing rcache. */ |
404 | flush_rcache = false; |
405 | for_each_online_cpu(cpu) |
406 | free_cpu_cached_iovas(cpu, iovad); |
407 | free_global_cached_iovas(iovad); |
408 | goto retry; |
409 | } |
410 | |
411 | return new_iova->pfn_lo; |
412 | } |
413 | EXPORT_SYMBOL_GPL(alloc_iova_fast); |
414 | |
415 | /** |
416 | * free_iova_fast - free iova pfn range into rcache |
417 | * @iovad: - iova domain in question. |
418 | * @pfn: - pfn that is allocated previously |
419 | * @size: - # of pages in range |
420 | * This functions frees an iova range by trying to put it into the rcache, |
421 | * falling back to regular iova deallocation via free_iova() if this fails. |
422 | */ |
423 | void |
424 | free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) |
425 | { |
426 | if (iova_rcache_insert(iovad, pfn, size)) |
427 | return; |
428 | |
429 | free_iova(iovad, pfn); |
430 | } |
431 | EXPORT_SYMBOL_GPL(free_iova_fast); |
432 | |
433 | static void iova_domain_free_rcaches(struct iova_domain *iovad) |
434 | { |
435 | cpuhp_state_remove_instance_nocalls(state: CPUHP_IOMMU_IOVA_DEAD, |
436 | node: &iovad->cpuhp_dead); |
437 | free_iova_rcaches(iovad); |
438 | } |
439 | |
440 | /** |
441 | * put_iova_domain - destroys the iova domain |
442 | * @iovad: - iova domain in question. |
443 | * All the iova's in that domain are destroyed. |
444 | */ |
445 | void put_iova_domain(struct iova_domain *iovad) |
446 | { |
447 | struct iova *iova, *tmp; |
448 | |
449 | if (iovad->rcaches) |
450 | iova_domain_free_rcaches(iovad); |
451 | |
452 | rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) |
453 | free_iova_mem(iova); |
454 | } |
455 | EXPORT_SYMBOL_GPL(put_iova_domain); |
456 | |
457 | static int |
458 | __is_range_overlap(struct rb_node *node, |
459 | unsigned long pfn_lo, unsigned long pfn_hi) |
460 | { |
461 | struct iova *iova = to_iova(node); |
462 | |
463 | if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) |
464 | return 1; |
465 | return 0; |
466 | } |
467 | |
468 | static inline struct iova * |
469 | alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi) |
470 | { |
471 | struct iova *iova; |
472 | |
473 | iova = alloc_iova_mem(); |
474 | if (iova) { |
475 | iova->pfn_lo = pfn_lo; |
476 | iova->pfn_hi = pfn_hi; |
477 | } |
478 | |
479 | return iova; |
480 | } |
481 | |
482 | static struct iova * |
483 | __insert_new_range(struct iova_domain *iovad, |
484 | unsigned long pfn_lo, unsigned long pfn_hi) |
485 | { |
486 | struct iova *iova; |
487 | |
488 | iova = alloc_and_init_iova(pfn_lo, pfn_hi); |
489 | if (iova) |
490 | iova_insert_rbtree(root: &iovad->rbroot, iova, NULL); |
491 | |
492 | return iova; |
493 | } |
494 | |
495 | static void |
496 | __adjust_overlap_range(struct iova *iova, |
497 | unsigned long *pfn_lo, unsigned long *pfn_hi) |
498 | { |
499 | if (*pfn_lo < iova->pfn_lo) |
500 | iova->pfn_lo = *pfn_lo; |
501 | if (*pfn_hi > iova->pfn_hi) |
502 | *pfn_lo = iova->pfn_hi + 1; |
503 | } |
504 | |
505 | /** |
506 | * reserve_iova - reserves an iova in the given range |
507 | * @iovad: - iova domain pointer |
508 | * @pfn_lo: - lower page frame address |
509 | * @pfn_hi:- higher pfn adderss |
510 | * This function allocates reserves the address range from pfn_lo to pfn_hi so |
511 | * that this address is not dished out as part of alloc_iova. |
512 | */ |
513 | struct iova * |
514 | reserve_iova(struct iova_domain *iovad, |
515 | unsigned long pfn_lo, unsigned long pfn_hi) |
516 | { |
517 | struct rb_node *node; |
518 | unsigned long flags; |
519 | struct iova *iova; |
520 | unsigned int overlap = 0; |
521 | |
522 | /* Don't allow nonsensical pfns */ |
523 | if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad)))) |
524 | return NULL; |
525 | |
526 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
527 | for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { |
528 | if (__is_range_overlap(node, pfn_lo, pfn_hi)) { |
529 | iova = to_iova(node); |
530 | __adjust_overlap_range(iova, pfn_lo: &pfn_lo, pfn_hi: &pfn_hi); |
531 | if ((pfn_lo >= iova->pfn_lo) && |
532 | (pfn_hi <= iova->pfn_hi)) |
533 | goto finish; |
534 | overlap = 1; |
535 | |
536 | } else if (overlap) |
537 | break; |
538 | } |
539 | |
540 | /* We are here either because this is the first reserver node |
541 | * or need to insert remaining non overlap addr range |
542 | */ |
543 | iova = __insert_new_range(iovad, pfn_lo, pfn_hi); |
544 | finish: |
545 | |
546 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
547 | return iova; |
548 | } |
549 | EXPORT_SYMBOL_GPL(reserve_iova); |
550 | |
551 | /* |
552 | * Magazine caches for IOVA ranges. For an introduction to magazines, |
553 | * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab |
554 | * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams. |
555 | * For simplicity, we use a static magazine size and don't implement the |
556 | * dynamic size tuning described in the paper. |
557 | */ |
558 | |
559 | /* |
560 | * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to |
561 | * assure size of 'iova_magazine' to be 1024 bytes, so that no memory |
562 | * will be wasted. Since only full magazines are inserted into the depot, |
563 | * we don't need to waste PFN capacity on a separate list head either. |
564 | */ |
565 | #define IOVA_MAG_SIZE 127 |
566 | |
567 | #define IOVA_DEPOT_DELAY msecs_to_jiffies(100) |
568 | |
569 | struct iova_magazine { |
570 | union { |
571 | unsigned long size; |
572 | struct iova_magazine *next; |
573 | }; |
574 | unsigned long pfns[IOVA_MAG_SIZE]; |
575 | }; |
576 | static_assert(!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1))); |
577 | |
578 | struct iova_cpu_rcache { |
579 | spinlock_t lock; |
580 | struct iova_magazine *loaded; |
581 | struct iova_magazine *prev; |
582 | }; |
583 | |
584 | struct iova_rcache { |
585 | spinlock_t lock; |
586 | unsigned int depot_size; |
587 | struct iova_magazine *depot; |
588 | struct iova_cpu_rcache __percpu *cpu_rcaches; |
589 | struct iova_domain *iovad; |
590 | struct delayed_work work; |
591 | }; |
592 | |
593 | static struct kmem_cache *iova_magazine_cache; |
594 | |
595 | unsigned long iova_rcache_range(void) |
596 | { |
597 | return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1); |
598 | } |
599 | |
600 | static struct iova_magazine *iova_magazine_alloc(gfp_t flags) |
601 | { |
602 | struct iova_magazine *mag; |
603 | |
604 | mag = kmem_cache_alloc(cachep: iova_magazine_cache, flags); |
605 | if (mag) |
606 | mag->size = 0; |
607 | |
608 | return mag; |
609 | } |
610 | |
611 | static void iova_magazine_free(struct iova_magazine *mag) |
612 | { |
613 | kmem_cache_free(s: iova_magazine_cache, objp: mag); |
614 | } |
615 | |
616 | static void |
617 | iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) |
618 | { |
619 | unsigned long flags; |
620 | int i; |
621 | |
622 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
623 | |
624 | for (i = 0 ; i < mag->size; ++i) { |
625 | struct iova *iova = private_find_iova(iovad, pfn: mag->pfns[i]); |
626 | |
627 | if (WARN_ON(!iova)) |
628 | continue; |
629 | |
630 | remove_iova(iovad, iova); |
631 | free_iova_mem(iova); |
632 | } |
633 | |
634 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
635 | |
636 | mag->size = 0; |
637 | } |
638 | |
639 | static bool iova_magazine_full(struct iova_magazine *mag) |
640 | { |
641 | return mag->size == IOVA_MAG_SIZE; |
642 | } |
643 | |
644 | static bool iova_magazine_empty(struct iova_magazine *mag) |
645 | { |
646 | return mag->size == 0; |
647 | } |
648 | |
649 | static unsigned long iova_magazine_pop(struct iova_magazine *mag, |
650 | unsigned long limit_pfn) |
651 | { |
652 | int i; |
653 | unsigned long pfn; |
654 | |
655 | /* Only fall back to the rbtree if we have no suitable pfns at all */ |
656 | for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) |
657 | if (i == 0) |
658 | return 0; |
659 | |
660 | /* Swap it to pop it */ |
661 | pfn = mag->pfns[i]; |
662 | mag->pfns[i] = mag->pfns[--mag->size]; |
663 | |
664 | return pfn; |
665 | } |
666 | |
667 | static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) |
668 | { |
669 | mag->pfns[mag->size++] = pfn; |
670 | } |
671 | |
672 | static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache) |
673 | { |
674 | struct iova_magazine *mag = rcache->depot; |
675 | |
676 | rcache->depot = mag->next; |
677 | mag->size = IOVA_MAG_SIZE; |
678 | rcache->depot_size--; |
679 | return mag; |
680 | } |
681 | |
682 | static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag) |
683 | { |
684 | mag->next = rcache->depot; |
685 | rcache->depot = mag; |
686 | rcache->depot_size++; |
687 | } |
688 | |
689 | static void iova_depot_work_func(struct work_struct *work) |
690 | { |
691 | struct iova_rcache *rcache = container_of(work, typeof(*rcache), work.work); |
692 | struct iova_magazine *mag = NULL; |
693 | unsigned long flags; |
694 | |
695 | spin_lock_irqsave(&rcache->lock, flags); |
696 | if (rcache->depot_size > num_online_cpus()) |
697 | mag = iova_depot_pop(rcache); |
698 | spin_unlock_irqrestore(lock: &rcache->lock, flags); |
699 | |
700 | if (mag) { |
701 | iova_magazine_free_pfns(mag, iovad: rcache->iovad); |
702 | iova_magazine_free(mag); |
703 | schedule_delayed_work(dwork: &rcache->work, IOVA_DEPOT_DELAY); |
704 | } |
705 | } |
706 | |
707 | int iova_domain_init_rcaches(struct iova_domain *iovad) |
708 | { |
709 | unsigned int cpu; |
710 | int i, ret; |
711 | |
712 | iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE, |
713 | size: sizeof(struct iova_rcache), |
714 | GFP_KERNEL); |
715 | if (!iovad->rcaches) |
716 | return -ENOMEM; |
717 | |
718 | for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { |
719 | struct iova_cpu_rcache *cpu_rcache; |
720 | struct iova_rcache *rcache; |
721 | |
722 | rcache = &iovad->rcaches[i]; |
723 | spin_lock_init(&rcache->lock); |
724 | rcache->iovad = iovad; |
725 | INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func); |
726 | rcache->cpu_rcaches = __alloc_percpu(size: sizeof(*cpu_rcache), |
727 | cache_line_size()); |
728 | if (!rcache->cpu_rcaches) { |
729 | ret = -ENOMEM; |
730 | goto out_err; |
731 | } |
732 | for_each_possible_cpu(cpu) { |
733 | cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); |
734 | |
735 | spin_lock_init(&cpu_rcache->lock); |
736 | cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL); |
737 | cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL); |
738 | if (!cpu_rcache->loaded || !cpu_rcache->prev) { |
739 | ret = -ENOMEM; |
740 | goto out_err; |
741 | } |
742 | } |
743 | } |
744 | |
745 | ret = cpuhp_state_add_instance_nocalls(state: CPUHP_IOMMU_IOVA_DEAD, |
746 | node: &iovad->cpuhp_dead); |
747 | if (ret) |
748 | goto out_err; |
749 | return 0; |
750 | |
751 | out_err: |
752 | free_iova_rcaches(iovad); |
753 | return ret; |
754 | } |
755 | EXPORT_SYMBOL_GPL(iova_domain_init_rcaches); |
756 | |
757 | /* |
758 | * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and |
759 | * return true on success. Can fail if rcache is full and we can't free |
760 | * space, and free_iova() (our only caller) will then return the IOVA |
761 | * range to the rbtree instead. |
762 | */ |
763 | static bool __iova_rcache_insert(struct iova_domain *iovad, |
764 | struct iova_rcache *rcache, |
765 | unsigned long iova_pfn) |
766 | { |
767 | struct iova_cpu_rcache *cpu_rcache; |
768 | bool can_insert = false; |
769 | unsigned long flags; |
770 | |
771 | cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); |
772 | spin_lock_irqsave(&cpu_rcache->lock, flags); |
773 | |
774 | if (!iova_magazine_full(mag: cpu_rcache->loaded)) { |
775 | can_insert = true; |
776 | } else if (!iova_magazine_full(mag: cpu_rcache->prev)) { |
777 | swap(cpu_rcache->prev, cpu_rcache->loaded); |
778 | can_insert = true; |
779 | } else { |
780 | struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC); |
781 | |
782 | if (new_mag) { |
783 | spin_lock(lock: &rcache->lock); |
784 | iova_depot_push(rcache, mag: cpu_rcache->loaded); |
785 | spin_unlock(lock: &rcache->lock); |
786 | schedule_delayed_work(dwork: &rcache->work, IOVA_DEPOT_DELAY); |
787 | |
788 | cpu_rcache->loaded = new_mag; |
789 | can_insert = true; |
790 | } |
791 | } |
792 | |
793 | if (can_insert) |
794 | iova_magazine_push(mag: cpu_rcache->loaded, pfn: iova_pfn); |
795 | |
796 | spin_unlock_irqrestore(lock: &cpu_rcache->lock, flags); |
797 | |
798 | return can_insert; |
799 | } |
800 | |
801 | static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, |
802 | unsigned long size) |
803 | { |
804 | unsigned int log_size = order_base_2(size); |
805 | |
806 | if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) |
807 | return false; |
808 | |
809 | return __iova_rcache_insert(iovad, rcache: &iovad->rcaches[log_size], iova_pfn: pfn); |
810 | } |
811 | |
812 | /* |
813 | * Caller wants to allocate a new IOVA range from 'rcache'. If we can |
814 | * satisfy the request, return a matching non-NULL range and remove |
815 | * it from the 'rcache'. |
816 | */ |
817 | static unsigned long __iova_rcache_get(struct iova_rcache *rcache, |
818 | unsigned long limit_pfn) |
819 | { |
820 | struct iova_cpu_rcache *cpu_rcache; |
821 | unsigned long iova_pfn = 0; |
822 | bool has_pfn = false; |
823 | unsigned long flags; |
824 | |
825 | cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); |
826 | spin_lock_irqsave(&cpu_rcache->lock, flags); |
827 | |
828 | if (!iova_magazine_empty(mag: cpu_rcache->loaded)) { |
829 | has_pfn = true; |
830 | } else if (!iova_magazine_empty(mag: cpu_rcache->prev)) { |
831 | swap(cpu_rcache->prev, cpu_rcache->loaded); |
832 | has_pfn = true; |
833 | } else { |
834 | spin_lock(lock: &rcache->lock); |
835 | if (rcache->depot) { |
836 | iova_magazine_free(mag: cpu_rcache->loaded); |
837 | cpu_rcache->loaded = iova_depot_pop(rcache); |
838 | has_pfn = true; |
839 | } |
840 | spin_unlock(lock: &rcache->lock); |
841 | } |
842 | |
843 | if (has_pfn) |
844 | iova_pfn = iova_magazine_pop(mag: cpu_rcache->loaded, limit_pfn); |
845 | |
846 | spin_unlock_irqrestore(lock: &cpu_rcache->lock, flags); |
847 | |
848 | return iova_pfn; |
849 | } |
850 | |
851 | /* |
852 | * Try to satisfy IOVA allocation range from rcache. Fail if requested |
853 | * size is too big or the DMA limit we are given isn't satisfied by the |
854 | * top element in the magazine. |
855 | */ |
856 | static unsigned long iova_rcache_get(struct iova_domain *iovad, |
857 | unsigned long size, |
858 | unsigned long limit_pfn) |
859 | { |
860 | unsigned int log_size = order_base_2(size); |
861 | |
862 | if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) |
863 | return 0; |
864 | |
865 | return __iova_rcache_get(rcache: &iovad->rcaches[log_size], limit_pfn: limit_pfn - size); |
866 | } |
867 | |
868 | /* |
869 | * free rcache data structures. |
870 | */ |
871 | static void free_iova_rcaches(struct iova_domain *iovad) |
872 | { |
873 | struct iova_rcache *rcache; |
874 | struct iova_cpu_rcache *cpu_rcache; |
875 | unsigned int cpu; |
876 | |
877 | for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { |
878 | rcache = &iovad->rcaches[i]; |
879 | if (!rcache->cpu_rcaches) |
880 | break; |
881 | for_each_possible_cpu(cpu) { |
882 | cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); |
883 | iova_magazine_free(mag: cpu_rcache->loaded); |
884 | iova_magazine_free(mag: cpu_rcache->prev); |
885 | } |
886 | free_percpu(pdata: rcache->cpu_rcaches); |
887 | cancel_delayed_work_sync(dwork: &rcache->work); |
888 | while (rcache->depot) |
889 | iova_magazine_free(mag: iova_depot_pop(rcache)); |
890 | } |
891 | |
892 | kfree(objp: iovad->rcaches); |
893 | iovad->rcaches = NULL; |
894 | } |
895 | |
896 | /* |
897 | * free all the IOVA ranges cached by a cpu (used when cpu is unplugged) |
898 | */ |
899 | static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) |
900 | { |
901 | struct iova_cpu_rcache *cpu_rcache; |
902 | struct iova_rcache *rcache; |
903 | unsigned long flags; |
904 | int i; |
905 | |
906 | for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { |
907 | rcache = &iovad->rcaches[i]; |
908 | cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); |
909 | spin_lock_irqsave(&cpu_rcache->lock, flags); |
910 | iova_magazine_free_pfns(mag: cpu_rcache->loaded, iovad); |
911 | iova_magazine_free_pfns(mag: cpu_rcache->prev, iovad); |
912 | spin_unlock_irqrestore(lock: &cpu_rcache->lock, flags); |
913 | } |
914 | } |
915 | |
916 | /* |
917 | * free all the IOVA ranges of global cache |
918 | */ |
919 | static void free_global_cached_iovas(struct iova_domain *iovad) |
920 | { |
921 | struct iova_rcache *rcache; |
922 | unsigned long flags; |
923 | |
924 | for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { |
925 | rcache = &iovad->rcaches[i]; |
926 | spin_lock_irqsave(&rcache->lock, flags); |
927 | while (rcache->depot) { |
928 | struct iova_magazine *mag = iova_depot_pop(rcache); |
929 | |
930 | iova_magazine_free_pfns(mag, iovad); |
931 | iova_magazine_free(mag); |
932 | } |
933 | spin_unlock_irqrestore(lock: &rcache->lock, flags); |
934 | } |
935 | } |
936 | |
937 | static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node) |
938 | { |
939 | struct iova_domain *iovad; |
940 | |
941 | iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead); |
942 | |
943 | free_cpu_cached_iovas(cpu, iovad); |
944 | return 0; |
945 | } |
946 | |
947 | int iova_cache_get(void) |
948 | { |
949 | int err = -ENOMEM; |
950 | |
951 | mutex_lock(&iova_cache_mutex); |
952 | if (!iova_cache_users) { |
953 | iova_cache = kmem_cache_create(name: "iommu_iova" , size: sizeof(struct iova), align: 0, |
954 | SLAB_HWCACHE_ALIGN, NULL); |
955 | if (!iova_cache) |
956 | goto out_err; |
957 | |
958 | iova_magazine_cache = kmem_cache_create(name: "iommu_iova_magazine" , |
959 | size: sizeof(struct iova_magazine), |
960 | align: 0, SLAB_HWCACHE_ALIGN, NULL); |
961 | if (!iova_magazine_cache) |
962 | goto out_err; |
963 | |
964 | err = cpuhp_setup_state_multi(state: CPUHP_IOMMU_IOVA_DEAD, name: "iommu/iova:dead" , |
965 | NULL, teardown: iova_cpuhp_dead); |
966 | if (err) { |
967 | pr_err("IOVA: Couldn't register cpuhp handler: %pe\n" , ERR_PTR(err)); |
968 | goto out_err; |
969 | } |
970 | } |
971 | |
972 | iova_cache_users++; |
973 | mutex_unlock(lock: &iova_cache_mutex); |
974 | |
975 | return 0; |
976 | |
977 | out_err: |
978 | kmem_cache_destroy(s: iova_cache); |
979 | kmem_cache_destroy(s: iova_magazine_cache); |
980 | mutex_unlock(lock: &iova_cache_mutex); |
981 | return err; |
982 | } |
983 | EXPORT_SYMBOL_GPL(iova_cache_get); |
984 | |
985 | void iova_cache_put(void) |
986 | { |
987 | mutex_lock(&iova_cache_mutex); |
988 | if (WARN_ON(!iova_cache_users)) { |
989 | mutex_unlock(lock: &iova_cache_mutex); |
990 | return; |
991 | } |
992 | iova_cache_users--; |
993 | if (!iova_cache_users) { |
994 | cpuhp_remove_multi_state(state: CPUHP_IOMMU_IOVA_DEAD); |
995 | kmem_cache_destroy(s: iova_cache); |
996 | kmem_cache_destroy(s: iova_magazine_cache); |
997 | } |
998 | mutex_unlock(lock: &iova_cache_mutex); |
999 | } |
1000 | EXPORT_SYMBOL_GPL(iova_cache_put); |
1001 | |
1002 | MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>" ); |
1003 | MODULE_LICENSE("GPL" ); |
1004 | |