1 | /* SPDX-License-Identifier: GPL-2.0 OR MIT */ |
2 | /************************************************************************** |
3 | * |
4 | * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA |
5 | * All Rights Reserved. |
6 | * |
7 | * Permission is hereby granted, free of charge, to any person obtaining a |
8 | * copy of this software and associated documentation files (the |
9 | * "Software"), to deal in the Software without restriction, including |
10 | * without limitation the rights to use, copy, modify, merge, publish, |
11 | * distribute, sub license, and/or sell copies of the Software, and to |
12 | * permit persons to whom the Software is furnished to do so, subject to |
13 | * the following conditions: |
14 | * |
15 | * The above copyright notice and this permission notice (including the |
16 | * next paragraph) shall be included in all copies or substantial portions |
17 | * of the Software. |
18 | * |
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
21 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
22 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
25 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
26 | * |
27 | **************************************************************************/ |
28 | /* |
29 | * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com> |
30 | */ |
31 | |
32 | #define pr_fmt(fmt) "[TTM] " fmt |
33 | |
34 | #include <drm/ttm/ttm_bo.h> |
35 | #include <drm/ttm/ttm_placement.h> |
36 | #include <drm/ttm/ttm_tt.h> |
37 | |
38 | #include <drm/drm_drv.h> |
39 | #include <drm/drm_managed.h> |
40 | |
41 | static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, |
42 | struct vm_fault *vmf) |
43 | { |
44 | long err = 0; |
45 | |
46 | /* |
47 | * Quick non-stalling check for idle. |
48 | */ |
49 | if (dma_resv_test_signaled(obj: bo->base.resv, usage: DMA_RESV_USAGE_KERNEL)) |
50 | return 0; |
51 | |
52 | /* |
53 | * If possible, avoid waiting for GPU with mmap_lock |
54 | * held. We only do this if the fault allows retry and this |
55 | * is the first attempt. |
56 | */ |
57 | if (fault_flag_allow_retry_first(flags: vmf->flags)) { |
58 | if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) |
59 | return VM_FAULT_RETRY; |
60 | |
61 | ttm_bo_get(bo); |
62 | mmap_read_unlock(mm: vmf->vma->vm_mm); |
63 | (void)dma_resv_wait_timeout(obj: bo->base.resv, |
64 | usage: DMA_RESV_USAGE_KERNEL, intr: true, |
65 | MAX_SCHEDULE_TIMEOUT); |
66 | dma_resv_unlock(obj: bo->base.resv); |
67 | ttm_bo_put(bo); |
68 | return VM_FAULT_RETRY; |
69 | } |
70 | |
71 | /* |
72 | * Ordinary wait. |
73 | */ |
74 | err = dma_resv_wait_timeout(obj: bo->base.resv, usage: DMA_RESV_USAGE_KERNEL, intr: true, |
75 | MAX_SCHEDULE_TIMEOUT); |
76 | if (unlikely(err < 0)) { |
77 | return (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS : |
78 | VM_FAULT_NOPAGE; |
79 | } |
80 | |
81 | return 0; |
82 | } |
83 | |
84 | static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, |
85 | unsigned long page_offset) |
86 | { |
87 | struct ttm_device *bdev = bo->bdev; |
88 | |
89 | if (bdev->funcs->io_mem_pfn) |
90 | return bdev->funcs->io_mem_pfn(bo, page_offset); |
91 | |
92 | return (bo->resource->bus.offset >> PAGE_SHIFT) + page_offset; |
93 | } |
94 | |
95 | /** |
96 | * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback |
97 | * @bo: The buffer object |
98 | * @vmf: The fault structure handed to the callback |
99 | * |
100 | * vm callbacks like fault() and *_mkwrite() allow for the mmap_lock to be dropped |
101 | * during long waits, and after the wait the callback will be restarted. This |
102 | * is to allow other threads using the same virtual memory space concurrent |
103 | * access to map(), unmap() completely unrelated buffer objects. TTM buffer |
104 | * object reservations sometimes wait for GPU and should therefore be |
105 | * considered long waits. This function reserves the buffer object interruptibly |
106 | * taking this into account. Starvation is avoided by the vm system not |
107 | * allowing too many repeated restarts. |
108 | * This function is intended to be used in customized fault() and _mkwrite() |
109 | * handlers. |
110 | * |
111 | * Return: |
112 | * 0 on success and the bo was reserved. |
113 | * VM_FAULT_RETRY if blocking wait. |
114 | * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. |
115 | */ |
116 | vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, |
117 | struct vm_fault *vmf) |
118 | { |
119 | /* |
120 | * Work around locking order reversal in fault / nopfn |
121 | * between mmap_lock and bo_reserve: Perform a trylock operation |
122 | * for reserve, and if it fails, retry the fault after waiting |
123 | * for the buffer to become unreserved. |
124 | */ |
125 | if (unlikely(!dma_resv_trylock(bo->base.resv))) { |
126 | /* |
127 | * If the fault allows retry and this is the first |
128 | * fault attempt, we try to release the mmap_lock |
129 | * before waiting |
130 | */ |
131 | if (fault_flag_allow_retry_first(flags: vmf->flags)) { |
132 | if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { |
133 | ttm_bo_get(bo); |
134 | mmap_read_unlock(mm: vmf->vma->vm_mm); |
135 | if (!dma_resv_lock_interruptible(obj: bo->base.resv, |
136 | NULL)) |
137 | dma_resv_unlock(obj: bo->base.resv); |
138 | ttm_bo_put(bo); |
139 | } |
140 | |
141 | return VM_FAULT_RETRY; |
142 | } |
143 | |
144 | if (dma_resv_lock_interruptible(obj: bo->base.resv, NULL)) |
145 | return VM_FAULT_NOPAGE; |
146 | } |
147 | |
148 | /* |
149 | * Refuse to fault imported pages. This should be handled |
150 | * (if at all) by redirecting mmap to the exporter. |
151 | */ |
152 | if (bo->ttm && (bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL)) { |
153 | if (!(bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE)) { |
154 | dma_resv_unlock(obj: bo->base.resv); |
155 | return VM_FAULT_SIGBUS; |
156 | } |
157 | } |
158 | |
159 | return 0; |
160 | } |
161 | EXPORT_SYMBOL(ttm_bo_vm_reserve); |
162 | |
163 | /** |
164 | * ttm_bo_vm_fault_reserved - TTM fault helper |
165 | * @vmf: The struct vm_fault given as argument to the fault callback |
166 | * @prot: The page protection to be used for this memory area. |
167 | * @num_prefault: Maximum number of prefault pages. The caller may want to |
168 | * specify this based on madvice settings and the size of the GPU object |
169 | * backed by the memory. |
170 | * |
171 | * This function inserts one or more page table entries pointing to the |
172 | * memory backing the buffer object, and then returns a return code |
173 | * instructing the caller to retry the page access. |
174 | * |
175 | * Return: |
176 | * VM_FAULT_NOPAGE on success or pending signal |
177 | * VM_FAULT_SIGBUS on unspecified error |
178 | * VM_FAULT_OOM on out-of-memory |
179 | * VM_FAULT_RETRY if retryable wait |
180 | */ |
181 | vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, |
182 | pgprot_t prot, |
183 | pgoff_t num_prefault) |
184 | { |
185 | struct vm_area_struct *vma = vmf->vma; |
186 | struct ttm_buffer_object *bo = vma->vm_private_data; |
187 | struct ttm_device *bdev = bo->bdev; |
188 | unsigned long page_offset; |
189 | unsigned long page_last; |
190 | unsigned long pfn; |
191 | struct ttm_tt *ttm = NULL; |
192 | struct page *page; |
193 | int err; |
194 | pgoff_t i; |
195 | vm_fault_t ret = VM_FAULT_NOPAGE; |
196 | unsigned long address = vmf->address; |
197 | |
198 | /* |
199 | * Wait for buffer data in transit, due to a pipelined |
200 | * move. |
201 | */ |
202 | ret = ttm_bo_vm_fault_idle(bo, vmf); |
203 | if (unlikely(ret != 0)) |
204 | return ret; |
205 | |
206 | err = ttm_mem_io_reserve(bdev, mem: bo->resource); |
207 | if (unlikely(err != 0)) |
208 | return VM_FAULT_SIGBUS; |
209 | |
210 | page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) + |
211 | vma->vm_pgoff - drm_vma_node_start(node: &bo->base.vma_node); |
212 | page_last = vma_pages(vma) + vma->vm_pgoff - |
213 | drm_vma_node_start(node: &bo->base.vma_node); |
214 | |
215 | if (unlikely(page_offset >= PFN_UP(bo->base.size))) |
216 | return VM_FAULT_SIGBUS; |
217 | |
218 | prot = ttm_io_prot(bo, res: bo->resource, tmp: prot); |
219 | if (!bo->resource->bus.is_iomem) { |
220 | struct ttm_operation_ctx ctx = { |
221 | .interruptible = true, |
222 | .no_wait_gpu = false, |
223 | .force_alloc = true |
224 | }; |
225 | |
226 | ttm = bo->ttm; |
227 | err = ttm_tt_populate(bdev, ttm: bo->ttm, ctx: &ctx); |
228 | if (err) { |
229 | if (err == -EINTR || err == -ERESTARTSYS || |
230 | err == -EAGAIN) |
231 | return VM_FAULT_NOPAGE; |
232 | |
233 | pr_debug("TTM fault hit %pe.\n" , ERR_PTR(err)); |
234 | return VM_FAULT_SIGBUS; |
235 | } |
236 | } else { |
237 | /* Iomem should not be marked encrypted */ |
238 | prot = pgprot_decrypted(prot); |
239 | } |
240 | |
241 | /* |
242 | * Speculatively prefault a number of pages. Only error on |
243 | * first page. |
244 | */ |
245 | for (i = 0; i < num_prefault; ++i) { |
246 | if (bo->resource->bus.is_iomem) { |
247 | pfn = ttm_bo_io_mem_pfn(bo, page_offset); |
248 | } else { |
249 | page = ttm->pages[page_offset]; |
250 | if (unlikely(!page && i == 0)) { |
251 | return VM_FAULT_OOM; |
252 | } else if (unlikely(!page)) { |
253 | break; |
254 | } |
255 | pfn = page_to_pfn(page); |
256 | } |
257 | |
258 | /* |
259 | * Note that the value of @prot at this point may differ from |
260 | * the value of @vma->vm_page_prot in the caching- and |
261 | * encryption bits. This is because the exact location of the |
262 | * data may not be known at mmap() time and may also change |
263 | * at arbitrary times while the data is mmap'ed. |
264 | * See vmf_insert_pfn_prot() for a discussion. |
265 | */ |
266 | ret = vmf_insert_pfn_prot(vma, addr: address, pfn, pgprot: prot); |
267 | |
268 | /* Never error on prefaulted PTEs */ |
269 | if (unlikely((ret & VM_FAULT_ERROR))) { |
270 | if (i == 0) |
271 | return VM_FAULT_NOPAGE; |
272 | else |
273 | break; |
274 | } |
275 | |
276 | address += PAGE_SIZE; |
277 | if (unlikely(++page_offset >= page_last)) |
278 | break; |
279 | } |
280 | return ret; |
281 | } |
282 | EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); |
283 | |
284 | static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res) |
285 | { |
286 | struct page *dummy_page = (struct page *)res; |
287 | |
288 | __free_page(dummy_page); |
289 | } |
290 | |
291 | vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot) |
292 | { |
293 | struct vm_area_struct *vma = vmf->vma; |
294 | struct ttm_buffer_object *bo = vma->vm_private_data; |
295 | struct drm_device *ddev = bo->base.dev; |
296 | vm_fault_t ret = VM_FAULT_NOPAGE; |
297 | unsigned long address; |
298 | unsigned long pfn; |
299 | struct page *page; |
300 | |
301 | /* Allocate new dummy page to map all the VA range in this VMA to it*/ |
302 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
303 | if (!page) |
304 | return VM_FAULT_OOM; |
305 | |
306 | /* Set the page to be freed using drmm release action */ |
307 | if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page)) |
308 | return VM_FAULT_OOM; |
309 | |
310 | pfn = page_to_pfn(page); |
311 | |
312 | /* Prefault the entire VMA range right away to avoid further faults */ |
313 | for (address = vma->vm_start; address < vma->vm_end; |
314 | address += PAGE_SIZE) |
315 | ret = vmf_insert_pfn_prot(vma, addr: address, pfn, pgprot: prot); |
316 | |
317 | return ret; |
318 | } |
319 | EXPORT_SYMBOL(ttm_bo_vm_dummy_page); |
320 | |
321 | vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) |
322 | { |
323 | struct vm_area_struct *vma = vmf->vma; |
324 | pgprot_t prot; |
325 | struct ttm_buffer_object *bo = vma->vm_private_data; |
326 | struct drm_device *ddev = bo->base.dev; |
327 | vm_fault_t ret; |
328 | int idx; |
329 | |
330 | ret = ttm_bo_vm_reserve(bo, vmf); |
331 | if (ret) |
332 | return ret; |
333 | |
334 | prot = vma->vm_page_prot; |
335 | if (drm_dev_enter(dev: ddev, idx: &idx)) { |
336 | ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT); |
337 | drm_dev_exit(idx); |
338 | } else { |
339 | ret = ttm_bo_vm_dummy_page(vmf, prot); |
340 | } |
341 | if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) |
342 | return ret; |
343 | |
344 | dma_resv_unlock(obj: bo->base.resv); |
345 | |
346 | return ret; |
347 | } |
348 | EXPORT_SYMBOL(ttm_bo_vm_fault); |
349 | |
350 | void ttm_bo_vm_open(struct vm_area_struct *vma) |
351 | { |
352 | struct ttm_buffer_object *bo = vma->vm_private_data; |
353 | |
354 | WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping); |
355 | |
356 | ttm_bo_get(bo); |
357 | } |
358 | EXPORT_SYMBOL(ttm_bo_vm_open); |
359 | |
360 | void ttm_bo_vm_close(struct vm_area_struct *vma) |
361 | { |
362 | struct ttm_buffer_object *bo = vma->vm_private_data; |
363 | |
364 | ttm_bo_put(bo); |
365 | vma->vm_private_data = NULL; |
366 | } |
367 | EXPORT_SYMBOL(ttm_bo_vm_close); |
368 | |
369 | static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, |
370 | unsigned long offset, |
371 | uint8_t *buf, int len, int write) |
372 | { |
373 | unsigned long page = offset >> PAGE_SHIFT; |
374 | unsigned long bytes_left = len; |
375 | int ret; |
376 | |
377 | /* Copy a page at a time, that way no extra virtual address |
378 | * mapping is needed |
379 | */ |
380 | offset -= page << PAGE_SHIFT; |
381 | do { |
382 | unsigned long bytes = min(bytes_left, PAGE_SIZE - offset); |
383 | struct ttm_bo_kmap_obj map; |
384 | void *ptr; |
385 | bool is_iomem; |
386 | |
387 | ret = ttm_bo_kmap(bo, start_page: page, num_pages: 1, map: &map); |
388 | if (ret) |
389 | return ret; |
390 | |
391 | ptr = (uint8_t *)ttm_kmap_obj_virtual(map: &map, is_iomem: &is_iomem) + offset; |
392 | WARN_ON_ONCE(is_iomem); |
393 | if (write) |
394 | memcpy(ptr, buf, bytes); |
395 | else |
396 | memcpy(buf, ptr, bytes); |
397 | ttm_bo_kunmap(map: &map); |
398 | |
399 | page++; |
400 | buf += bytes; |
401 | bytes_left -= bytes; |
402 | offset = 0; |
403 | } while (bytes_left); |
404 | |
405 | return len; |
406 | } |
407 | |
408 | int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, |
409 | void *buf, int len, int write) |
410 | { |
411 | struct ttm_buffer_object *bo = vma->vm_private_data; |
412 | unsigned long offset = (addr) - vma->vm_start + |
413 | ((vma->vm_pgoff - drm_vma_node_start(node: &bo->base.vma_node)) |
414 | << PAGE_SHIFT); |
415 | int ret; |
416 | |
417 | if (len < 1 || (offset + len) > bo->base.size) |
418 | return -EIO; |
419 | |
420 | ret = ttm_bo_reserve(bo, interruptible: true, no_wait: false, NULL); |
421 | if (ret) |
422 | return ret; |
423 | |
424 | switch (bo->resource->mem_type) { |
425 | case TTM_PL_SYSTEM: |
426 | fallthrough; |
427 | case TTM_PL_TT: |
428 | ret = ttm_bo_vm_access_kmap(bo, offset, buf, len, write); |
429 | break; |
430 | default: |
431 | if (bo->bdev->funcs->access_memory) |
432 | ret = bo->bdev->funcs->access_memory( |
433 | bo, offset, buf, len, write); |
434 | else |
435 | ret = -EIO; |
436 | } |
437 | |
438 | ttm_bo_unreserve(bo); |
439 | |
440 | return ret; |
441 | } |
442 | EXPORT_SYMBOL(ttm_bo_vm_access); |
443 | |
444 | static const struct vm_operations_struct ttm_bo_vm_ops = { |
445 | .fault = ttm_bo_vm_fault, |
446 | .open = ttm_bo_vm_open, |
447 | .close = ttm_bo_vm_close, |
448 | .access = ttm_bo_vm_access, |
449 | }; |
450 | |
451 | /** |
452 | * ttm_bo_mmap_obj - mmap memory backed by a ttm buffer object. |
453 | * |
454 | * @vma: vma as input from the fbdev mmap method. |
455 | * @bo: The bo backing the address space. |
456 | * |
457 | * Maps a buffer object. |
458 | */ |
459 | int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo) |
460 | { |
461 | /* Enforce no COW since would have really strange behavior with it. */ |
462 | if (is_cow_mapping(flags: vma->vm_flags)) |
463 | return -EINVAL; |
464 | |
465 | ttm_bo_get(bo); |
466 | |
467 | /* |
468 | * Drivers may want to override the vm_ops field. Otherwise we |
469 | * use TTM's default callbacks. |
470 | */ |
471 | if (!vma->vm_ops) |
472 | vma->vm_ops = &ttm_bo_vm_ops; |
473 | |
474 | /* |
475 | * Note: We're transferring the bo reference to |
476 | * vma->vm_private_data here. |
477 | */ |
478 | |
479 | vma->vm_private_data = bo; |
480 | |
481 | vm_flags_set(vma, VM_PFNMAP | VM_IO | VM_DONTEXPAND | VM_DONTDUMP); |
482 | return 0; |
483 | } |
484 | EXPORT_SYMBOL(ttm_bo_mmap_obj); |
485 | |