1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * IOMMU helpers in MMU context. |
4 | * |
5 | * Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru> |
6 | */ |
7 | |
8 | #include <linux/sched/signal.h> |
9 | #include <linux/slab.h> |
10 | #include <linux/rculist.h> |
11 | #include <linux/vmalloc.h> |
12 | #include <linux/mutex.h> |
13 | #include <linux/migrate.h> |
14 | #include <linux/hugetlb.h> |
15 | #include <linux/swap.h> |
16 | #include <linux/sizes.h> |
17 | #include <linux/mm.h> |
18 | #include <asm/mmu_context.h> |
19 | #include <asm/pte-walk.h> |
20 | #include <linux/mm_inline.h> |
21 | |
22 | static DEFINE_MUTEX(mem_list_mutex); |
23 | |
24 | #define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1 |
25 | #define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1) |
26 | |
27 | struct mm_iommu_table_group_mem_t { |
28 | struct list_head next; |
29 | struct rcu_head rcu; |
30 | unsigned long used; |
31 | atomic64_t mapped; |
32 | unsigned int pageshift; |
33 | u64 ua; /* userspace address */ |
34 | u64 entries; /* number of entries in hpas/hpages[] */ |
35 | /* |
36 | * in mm_iommu_get we temporarily use this to store |
37 | * struct page address. |
38 | * |
39 | * We need to convert ua to hpa in real mode. Make it |
40 | * simpler by storing physical address. |
41 | */ |
42 | union { |
43 | struct page **hpages; /* vmalloc'ed */ |
44 | phys_addr_t *hpas; |
45 | }; |
46 | #define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1) |
47 | u64 dev_hpa; /* Device memory base address */ |
48 | }; |
49 | |
50 | bool mm_iommu_preregistered(struct mm_struct *mm) |
51 | { |
52 | return !list_empty(head: &mm->context.iommu_group_mem_list); |
53 | } |
54 | EXPORT_SYMBOL_GPL(mm_iommu_preregistered); |
55 | |
56 | static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, |
57 | unsigned long entries, unsigned long dev_hpa, |
58 | struct mm_iommu_table_group_mem_t **pmem) |
59 | { |
60 | struct mm_iommu_table_group_mem_t *mem, *mem2; |
61 | long i, ret, locked_entries = 0, pinned = 0; |
62 | unsigned int pageshift; |
63 | unsigned long entry, chunk; |
64 | |
65 | if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { |
66 | ret = account_locked_vm(mm, pages: entries, inc: true); |
67 | if (ret) |
68 | return ret; |
69 | |
70 | locked_entries = entries; |
71 | } |
72 | |
73 | mem = kzalloc(size: sizeof(*mem), GFP_KERNEL); |
74 | if (!mem) { |
75 | ret = -ENOMEM; |
76 | goto unlock_exit; |
77 | } |
78 | |
79 | if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) { |
80 | mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT)); |
81 | mem->dev_hpa = dev_hpa; |
82 | goto good_exit; |
83 | } |
84 | mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA; |
85 | |
86 | /* |
87 | * For a starting point for a maximum page size calculation |
88 | * we use @ua and @entries natural alignment to allow IOMMU pages |
89 | * smaller than huge pages but still bigger than PAGE_SIZE. |
90 | */ |
91 | mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT)); |
92 | mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0]))); |
93 | if (!mem->hpas) { |
94 | kfree(objp: mem); |
95 | ret = -ENOMEM; |
96 | goto unlock_exit; |
97 | } |
98 | |
99 | mmap_read_lock(mm); |
100 | chunk = (1UL << (PAGE_SHIFT + MAX_ORDER)) / |
101 | sizeof(struct vm_area_struct *); |
102 | chunk = min(chunk, entries); |
103 | for (entry = 0; entry < entries; entry += chunk) { |
104 | unsigned long n = min(entries - entry, chunk); |
105 | |
106 | ret = pin_user_pages(start: ua + (entry << PAGE_SHIFT), nr_pages: n, |
107 | gup_flags: FOLL_WRITE | FOLL_LONGTERM, |
108 | pages: mem->hpages + entry); |
109 | if (ret == n) { |
110 | pinned += n; |
111 | continue; |
112 | } |
113 | if (ret > 0) |
114 | pinned += ret; |
115 | break; |
116 | } |
117 | mmap_read_unlock(mm); |
118 | if (pinned != entries) { |
119 | if (!ret) |
120 | ret = -EFAULT; |
121 | goto free_exit; |
122 | } |
123 | |
124 | good_exit: |
125 | atomic64_set(v: &mem->mapped, i: 1); |
126 | mem->used = 1; |
127 | mem->ua = ua; |
128 | mem->entries = entries; |
129 | |
130 | mutex_lock(&mem_list_mutex); |
131 | |
132 | list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next, |
133 | lockdep_is_held(&mem_list_mutex)) { |
134 | /* Overlap? */ |
135 | if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) && |
136 | (ua < (mem2->ua + |
137 | (mem2->entries << PAGE_SHIFT)))) { |
138 | ret = -EINVAL; |
139 | mutex_unlock(lock: &mem_list_mutex); |
140 | goto free_exit; |
141 | } |
142 | } |
143 | |
144 | if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { |
145 | /* |
146 | * Allow to use larger than 64k IOMMU pages. Only do that |
147 | * if we are backed by hugetlb. Skip device memory as it is not |
148 | * backed with page structs. |
149 | */ |
150 | pageshift = PAGE_SHIFT; |
151 | for (i = 0; i < entries; ++i) { |
152 | struct page *page = mem->hpages[i]; |
153 | |
154 | if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) |
155 | pageshift = page_shift(compound_head(page)); |
156 | mem->pageshift = min(mem->pageshift, pageshift); |
157 | /* |
158 | * We don't need struct page reference any more, switch |
159 | * to physical address. |
160 | */ |
161 | mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; |
162 | } |
163 | } |
164 | |
165 | list_add_rcu(new: &mem->next, head: &mm->context.iommu_group_mem_list); |
166 | |
167 | mutex_unlock(lock: &mem_list_mutex); |
168 | |
169 | *pmem = mem; |
170 | |
171 | return 0; |
172 | |
173 | free_exit: |
174 | /* free the references taken */ |
175 | unpin_user_pages(pages: mem->hpages, npages: pinned); |
176 | |
177 | vfree(addr: mem->hpas); |
178 | kfree(objp: mem); |
179 | |
180 | unlock_exit: |
181 | account_locked_vm(mm, pages: locked_entries, inc: false); |
182 | |
183 | return ret; |
184 | } |
185 | |
186 | long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries, |
187 | struct mm_iommu_table_group_mem_t **pmem) |
188 | { |
189 | return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA, |
190 | pmem); |
191 | } |
192 | EXPORT_SYMBOL_GPL(mm_iommu_new); |
193 | |
194 | long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua, |
195 | unsigned long entries, unsigned long dev_hpa, |
196 | struct mm_iommu_table_group_mem_t **pmem) |
197 | { |
198 | return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem); |
199 | } |
200 | EXPORT_SYMBOL_GPL(mm_iommu_newdev); |
201 | |
202 | static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) |
203 | { |
204 | long i; |
205 | struct page *page = NULL; |
206 | |
207 | if (!mem->hpas) |
208 | return; |
209 | |
210 | for (i = 0; i < mem->entries; ++i) { |
211 | if (!mem->hpas[i]) |
212 | continue; |
213 | |
214 | page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT); |
215 | if (!page) |
216 | continue; |
217 | |
218 | if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY) |
219 | SetPageDirty(page); |
220 | |
221 | unpin_user_page(page); |
222 | |
223 | mem->hpas[i] = 0; |
224 | } |
225 | } |
226 | |
227 | static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem) |
228 | { |
229 | |
230 | mm_iommu_unpin(mem); |
231 | vfree(addr: mem->hpas); |
232 | kfree(objp: mem); |
233 | } |
234 | |
235 | static void mm_iommu_free(struct rcu_head *head) |
236 | { |
237 | struct mm_iommu_table_group_mem_t *mem = container_of(head, |
238 | struct mm_iommu_table_group_mem_t, rcu); |
239 | |
240 | mm_iommu_do_free(mem); |
241 | } |
242 | |
243 | static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) |
244 | { |
245 | list_del_rcu(entry: &mem->next); |
246 | call_rcu(head: &mem->rcu, func: mm_iommu_free); |
247 | } |
248 | |
249 | long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) |
250 | { |
251 | long ret = 0; |
252 | unsigned long unlock_entries = 0; |
253 | |
254 | mutex_lock(&mem_list_mutex); |
255 | |
256 | if (mem->used == 0) { |
257 | ret = -ENOENT; |
258 | goto unlock_exit; |
259 | } |
260 | |
261 | --mem->used; |
262 | /* There are still users, exit */ |
263 | if (mem->used) |
264 | goto unlock_exit; |
265 | |
266 | /* Are there still mappings? */ |
267 | if (atomic64_cmpxchg(v: &mem->mapped, old: 1, new: 0) != 1) { |
268 | ++mem->used; |
269 | ret = -EBUSY; |
270 | goto unlock_exit; |
271 | } |
272 | |
273 | if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) |
274 | unlock_entries = mem->entries; |
275 | |
276 | /* @mapped became 0 so now mappings are disabled, release the region */ |
277 | mm_iommu_release(mem); |
278 | |
279 | unlock_exit: |
280 | mutex_unlock(lock: &mem_list_mutex); |
281 | |
282 | account_locked_vm(mm, pages: unlock_entries, inc: false); |
283 | |
284 | return ret; |
285 | } |
286 | EXPORT_SYMBOL_GPL(mm_iommu_put); |
287 | |
288 | struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, |
289 | unsigned long ua, unsigned long size) |
290 | { |
291 | struct mm_iommu_table_group_mem_t *mem, *ret = NULL; |
292 | |
293 | rcu_read_lock(); |
294 | list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { |
295 | if ((mem->ua <= ua) && |
296 | (ua + size <= mem->ua + |
297 | (mem->entries << PAGE_SHIFT))) { |
298 | ret = mem; |
299 | break; |
300 | } |
301 | } |
302 | rcu_read_unlock(); |
303 | |
304 | return ret; |
305 | } |
306 | EXPORT_SYMBOL_GPL(mm_iommu_lookup); |
307 | |
308 | struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm, |
309 | unsigned long ua, unsigned long entries) |
310 | { |
311 | struct mm_iommu_table_group_mem_t *mem, *ret = NULL; |
312 | |
313 | mutex_lock(&mem_list_mutex); |
314 | |
315 | list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next, |
316 | lockdep_is_held(&mem_list_mutex)) { |
317 | if ((mem->ua == ua) && (mem->entries == entries)) { |
318 | ret = mem; |
319 | ++mem->used; |
320 | break; |
321 | } |
322 | } |
323 | |
324 | mutex_unlock(lock: &mem_list_mutex); |
325 | |
326 | return ret; |
327 | } |
328 | EXPORT_SYMBOL_GPL(mm_iommu_get); |
329 | |
330 | long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, |
331 | unsigned long ua, unsigned int pageshift, unsigned long *hpa) |
332 | { |
333 | const long entry = (ua - mem->ua) >> PAGE_SHIFT; |
334 | u64 *va; |
335 | |
336 | if (entry >= mem->entries) |
337 | return -EFAULT; |
338 | |
339 | if (pageshift > mem->pageshift) |
340 | return -EFAULT; |
341 | |
342 | if (!mem->hpas) { |
343 | *hpa = mem->dev_hpa + (ua - mem->ua); |
344 | return 0; |
345 | } |
346 | |
347 | va = &mem->hpas[entry]; |
348 | *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); |
349 | |
350 | return 0; |
351 | } |
352 | EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); |
353 | |
354 | bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, |
355 | unsigned int pageshift, unsigned long *size) |
356 | { |
357 | struct mm_iommu_table_group_mem_t *mem; |
358 | unsigned long end; |
359 | |
360 | rcu_read_lock(); |
361 | list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { |
362 | if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) |
363 | continue; |
364 | |
365 | end = mem->dev_hpa + (mem->entries << PAGE_SHIFT); |
366 | if ((mem->dev_hpa <= hpa) && (hpa < end)) { |
367 | /* |
368 | * Since the IOMMU page size might be bigger than |
369 | * PAGE_SIZE, the amount of preregistered memory |
370 | * starting from @hpa might be smaller than 1<<pageshift |
371 | * and the caller needs to distinguish this situation. |
372 | */ |
373 | *size = min(1UL << pageshift, end - hpa); |
374 | return true; |
375 | } |
376 | } |
377 | rcu_read_unlock(); |
378 | |
379 | return false; |
380 | } |
381 | EXPORT_SYMBOL_GPL(mm_iommu_is_devmem); |
382 | |
383 | long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) |
384 | { |
385 | if (atomic64_inc_not_zero(v: &mem->mapped)) |
386 | return 0; |
387 | |
388 | /* Last mm_iommu_put() has been called, no more mappings allowed() */ |
389 | return -ENXIO; |
390 | } |
391 | EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc); |
392 | |
393 | void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) |
394 | { |
395 | atomic64_add_unless(v: &mem->mapped, a: -1, u: 1); |
396 | } |
397 | EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); |
398 | |
399 | void mm_iommu_init(struct mm_struct *mm) |
400 | { |
401 | INIT_LIST_HEAD_RCU(list: &mm->context.iommu_group_mem_list); |
402 | } |
403 | |