1 | /* |
2 | * Copyright 2019 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | * based on nouveau_prime.c |
23 | * |
24 | * Authors: Alex Deucher |
25 | */ |
26 | |
27 | /** |
28 | * DOC: PRIME Buffer Sharing |
29 | * |
30 | * The following callback implementations are used for :ref:`sharing GEM buffer |
31 | * objects between different devices via PRIME <prime_buffer_sharing>`. |
32 | */ |
33 | |
34 | #include "amdgpu.h" |
35 | #include "amdgpu_display.h" |
36 | #include "amdgpu_gem.h" |
37 | #include "amdgpu_dma_buf.h" |
38 | #include "amdgpu_xgmi.h" |
39 | #include <drm/amdgpu_drm.h> |
40 | #include <drm/ttm/ttm_tt.h> |
41 | #include <linux/dma-buf.h> |
42 | #include <linux/dma-fence-array.h> |
43 | #include <linux/pci-p2pdma.h> |
44 | #include <linux/pm_runtime.h> |
45 | #include "amdgpu_trace.h" |
46 | |
47 | /** |
48 | * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation |
49 | * |
50 | * @dmabuf: DMA-buf where we attach to |
51 | * @attach: attachment to add |
52 | * |
53 | * Add the attachment as user to the exported DMA-buf. |
54 | */ |
55 | static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, |
56 | struct dma_buf_attachment *attach) |
57 | { |
58 | struct drm_gem_object *obj = dmabuf->priv; |
59 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); |
60 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev); |
61 | int r; |
62 | |
63 | if (pci_p2pdma_distance(provider: adev->pdev, client: attach->dev, verbose: false) < 0) |
64 | attach->peer2peer = false; |
65 | |
66 | r = pm_runtime_get_sync(dev: adev_to_drm(adev)->dev); |
67 | trace_amdgpu_runpm_reference_dumps(index: 1, func: __func__); |
68 | if (r < 0) |
69 | goto out; |
70 | |
71 | return 0; |
72 | |
73 | out: |
74 | pm_runtime_put_autosuspend(dev: adev_to_drm(adev)->dev); |
75 | trace_amdgpu_runpm_reference_dumps(index: 0, func: __func__); |
76 | return r; |
77 | } |
78 | |
79 | /** |
80 | * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation |
81 | * |
82 | * @dmabuf: DMA-buf where we remove the attachment from |
83 | * @attach: the attachment to remove |
84 | * |
85 | * Called when an attachment is removed from the DMA-buf. |
86 | */ |
87 | static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf, |
88 | struct dma_buf_attachment *attach) |
89 | { |
90 | struct drm_gem_object *obj = dmabuf->priv; |
91 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); |
92 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev); |
93 | |
94 | pm_runtime_mark_last_busy(dev: adev_to_drm(adev)->dev); |
95 | pm_runtime_put_autosuspend(dev: adev_to_drm(adev)->dev); |
96 | trace_amdgpu_runpm_reference_dumps(index: 0, func: __func__); |
97 | } |
98 | |
99 | /** |
100 | * amdgpu_dma_buf_pin - &dma_buf_ops.pin implementation |
101 | * |
102 | * @attach: attachment to pin down |
103 | * |
104 | * Pin the BO which is backing the DMA-buf so that it can't move any more. |
105 | */ |
106 | static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) |
107 | { |
108 | struct drm_gem_object *obj = attach->dmabuf->priv; |
109 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); |
110 | |
111 | /* pin buffer into GTT */ |
112 | return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); |
113 | } |
114 | |
115 | /** |
116 | * amdgpu_dma_buf_unpin - &dma_buf_ops.unpin implementation |
117 | * |
118 | * @attach: attachment to unpin |
119 | * |
120 | * Unpin a previously pinned BO to make it movable again. |
121 | */ |
122 | static void amdgpu_dma_buf_unpin(struct dma_buf_attachment *attach) |
123 | { |
124 | struct drm_gem_object *obj = attach->dmabuf->priv; |
125 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); |
126 | |
127 | amdgpu_bo_unpin(bo); |
128 | } |
129 | |
130 | /** |
131 | * amdgpu_dma_buf_map - &dma_buf_ops.map_dma_buf implementation |
132 | * @attach: DMA-buf attachment |
133 | * @dir: DMA direction |
134 | * |
135 | * Makes sure that the shared DMA buffer can be accessed by the target device. |
136 | * For now, simply pins it to the GTT domain, where it should be accessible by |
137 | * all DMA devices. |
138 | * |
139 | * Returns: |
140 | * sg_table filled with the DMA addresses to use or ERR_PRT with negative error |
141 | * code. |
142 | */ |
143 | static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, |
144 | enum dma_data_direction dir) |
145 | { |
146 | struct dma_buf *dma_buf = attach->dmabuf; |
147 | struct drm_gem_object *obj = dma_buf->priv; |
148 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); |
149 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev); |
150 | struct sg_table *sgt; |
151 | long r; |
152 | |
153 | if (!bo->tbo.pin_count) { |
154 | /* move buffer into GTT or VRAM */ |
155 | struct ttm_operation_ctx ctx = { false, false }; |
156 | unsigned int domains = AMDGPU_GEM_DOMAIN_GTT; |
157 | |
158 | if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM && |
159 | attach->peer2peer) { |
160 | bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; |
161 | domains |= AMDGPU_GEM_DOMAIN_VRAM; |
162 | } |
163 | amdgpu_bo_placement_from_domain(abo: bo, domain: domains); |
164 | r = ttm_bo_validate(bo: &bo->tbo, placement: &bo->placement, ctx: &ctx); |
165 | if (r) |
166 | return ERR_PTR(error: r); |
167 | |
168 | } else if (!(amdgpu_mem_type_to_domain(mem_type: bo->tbo.resource->mem_type) & |
169 | AMDGPU_GEM_DOMAIN_GTT)) { |
170 | return ERR_PTR(error: -EBUSY); |
171 | } |
172 | |
173 | switch (bo->tbo.resource->mem_type) { |
174 | case TTM_PL_TT: |
175 | sgt = drm_prime_pages_to_sg(dev: obj->dev, |
176 | pages: bo->tbo.ttm->pages, |
177 | nr_pages: bo->tbo.ttm->num_pages); |
178 | if (IS_ERR(ptr: sgt)) |
179 | return sgt; |
180 | |
181 | if (dma_map_sgtable(dev: attach->dev, sgt, dir, |
182 | DMA_ATTR_SKIP_CPU_SYNC)) |
183 | goto error_free; |
184 | break; |
185 | |
186 | case TTM_PL_VRAM: |
187 | r = amdgpu_vram_mgr_alloc_sgt(adev, mem: bo->tbo.resource, offset: 0, |
188 | size: bo->tbo.base.size, dev: attach->dev, |
189 | dir, sgt: &sgt); |
190 | if (r) |
191 | return ERR_PTR(error: r); |
192 | break; |
193 | default: |
194 | return ERR_PTR(error: -EINVAL); |
195 | } |
196 | |
197 | return sgt; |
198 | |
199 | error_free: |
200 | sg_free_table(sgt); |
201 | kfree(objp: sgt); |
202 | return ERR_PTR(error: -EBUSY); |
203 | } |
204 | |
205 | /** |
206 | * amdgpu_dma_buf_unmap - &dma_buf_ops.unmap_dma_buf implementation |
207 | * @attach: DMA-buf attachment |
208 | * @sgt: sg_table to unmap |
209 | * @dir: DMA direction |
210 | * |
211 | * This is called when a shared DMA buffer no longer needs to be accessible by |
212 | * another device. For now, simply unpins the buffer from GTT. |
213 | */ |
214 | static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, |
215 | struct sg_table *sgt, |
216 | enum dma_data_direction dir) |
217 | { |
218 | if (sgt->sgl->page_link) { |
219 | dma_unmap_sgtable(dev: attach->dev, sgt, dir, attrs: 0); |
220 | sg_free_table(sgt); |
221 | kfree(objp: sgt); |
222 | } else { |
223 | amdgpu_vram_mgr_free_sgt(dev: attach->dev, dir, sgt); |
224 | } |
225 | } |
226 | |
227 | /** |
228 | * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation |
229 | * @dma_buf: Shared DMA buffer |
230 | * @direction: Direction of DMA transfer |
231 | * |
232 | * This is called before CPU access to the shared DMA buffer's memory. If it's |
233 | * a read access, the buffer is moved to the GTT domain if possible, for optimal |
234 | * CPU read performance. |
235 | * |
236 | * Returns: |
237 | * 0 on success or a negative error code on failure. |
238 | */ |
239 | static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, |
240 | enum dma_data_direction direction) |
241 | { |
242 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); |
243 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev); |
244 | struct ttm_operation_ctx ctx = { true, false }; |
245 | u32 domain = amdgpu_display_supported_domains(adev, bo_flags: bo->flags); |
246 | int ret; |
247 | bool reads = (direction == DMA_BIDIRECTIONAL || |
248 | direction == DMA_FROM_DEVICE); |
249 | |
250 | if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT)) |
251 | return 0; |
252 | |
253 | /* move to gtt */ |
254 | ret = amdgpu_bo_reserve(bo, no_intr: false); |
255 | if (unlikely(ret != 0)) |
256 | return ret; |
257 | |
258 | if (!bo->tbo.pin_count && |
259 | (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) { |
260 | amdgpu_bo_placement_from_domain(abo: bo, AMDGPU_GEM_DOMAIN_GTT); |
261 | ret = ttm_bo_validate(bo: &bo->tbo, placement: &bo->placement, ctx: &ctx); |
262 | } |
263 | |
264 | amdgpu_bo_unreserve(bo); |
265 | return ret; |
266 | } |
267 | |
268 | const struct dma_buf_ops amdgpu_dmabuf_ops = { |
269 | .attach = amdgpu_dma_buf_attach, |
270 | .detach = amdgpu_dma_buf_detach, |
271 | .pin = amdgpu_dma_buf_pin, |
272 | .unpin = amdgpu_dma_buf_unpin, |
273 | .map_dma_buf = amdgpu_dma_buf_map, |
274 | .unmap_dma_buf = amdgpu_dma_buf_unmap, |
275 | .release = drm_gem_dmabuf_release, |
276 | .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access, |
277 | .mmap = drm_gem_dmabuf_mmap, |
278 | .vmap = drm_gem_dmabuf_vmap, |
279 | .vunmap = drm_gem_dmabuf_vunmap, |
280 | }; |
281 | |
282 | /** |
283 | * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation |
284 | * @gobj: GEM BO |
285 | * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR. |
286 | * |
287 | * The main work is done by the &drm_gem_prime_export helper. |
288 | * |
289 | * Returns: |
290 | * Shared DMA buffer representing the GEM BO from the given device. |
291 | */ |
292 | struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, |
293 | int flags) |
294 | { |
295 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); |
296 | struct dma_buf *buf; |
297 | |
298 | if (amdgpu_ttm_tt_get_usermm(ttm: bo->tbo.ttm) || |
299 | bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) |
300 | return ERR_PTR(error: -EPERM); |
301 | |
302 | buf = drm_gem_prime_export(obj: gobj, flags); |
303 | if (!IS_ERR(ptr: buf)) |
304 | buf->ops = &amdgpu_dmabuf_ops; |
305 | |
306 | return buf; |
307 | } |
308 | |
309 | /** |
310 | * amdgpu_dma_buf_create_obj - create BO for DMA-buf import |
311 | * |
312 | * @dev: DRM device |
313 | * @dma_buf: DMA-buf |
314 | * |
315 | * Creates an empty SG BO for DMA-buf import. |
316 | * |
317 | * Returns: |
318 | * A new GEM BO of the given DRM device, representing the memory |
319 | * described by the given DMA-buf attachment and scatter/gather table. |
320 | */ |
321 | static struct drm_gem_object * |
322 | amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) |
323 | { |
324 | struct dma_resv *resv = dma_buf->resv; |
325 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
326 | struct drm_gem_object *gobj; |
327 | struct amdgpu_bo *bo; |
328 | uint64_t flags = 0; |
329 | int ret; |
330 | |
331 | dma_resv_lock(obj: resv, NULL); |
332 | |
333 | if (dma_buf->ops == &amdgpu_dmabuf_ops) { |
334 | struct amdgpu_bo *other = gem_to_amdgpu_bo(dma_buf->priv); |
335 | |
336 | flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC | |
337 | AMDGPU_GEM_CREATE_COHERENT | |
338 | AMDGPU_GEM_CREATE_EXT_COHERENT | |
339 | AMDGPU_GEM_CREATE_UNCACHED); |
340 | } |
341 | |
342 | ret = amdgpu_gem_object_create(adev, size: dma_buf->size, PAGE_SIZE, |
343 | AMDGPU_GEM_DOMAIN_CPU, flags, |
344 | type: ttm_bo_type_sg, resv, obj: &gobj, xcp_id_plus1: 0); |
345 | if (ret) |
346 | goto error; |
347 | |
348 | bo = gem_to_amdgpu_bo(gobj); |
349 | bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; |
350 | bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; |
351 | |
352 | dma_resv_unlock(obj: resv); |
353 | return gobj; |
354 | |
355 | error: |
356 | dma_resv_unlock(obj: resv); |
357 | return ERR_PTR(error: ret); |
358 | } |
359 | |
360 | /** |
361 | * amdgpu_dma_buf_move_notify - &attach.move_notify implementation |
362 | * |
363 | * @attach: the DMA-buf attachment |
364 | * |
365 | * Invalidate the DMA-buf attachment, making sure that the we re-create the |
366 | * mapping before the next use. |
367 | */ |
368 | static void |
369 | amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) |
370 | { |
371 | struct drm_gem_object *obj = attach->importer_priv; |
372 | struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj: obj->resv); |
373 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); |
374 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev); |
375 | struct ttm_operation_ctx ctx = { false, false }; |
376 | struct ttm_placement placement = {}; |
377 | struct amdgpu_vm_bo_base *bo_base; |
378 | int r; |
379 | |
380 | /* FIXME: This should be after the "if", but needs a fix to make sure |
381 | * DMABuf imports are initialized in the right VM list. |
382 | */ |
383 | amdgpu_vm_bo_invalidate(adev, bo, evicted: false); |
384 | if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM) |
385 | return; |
386 | |
387 | r = ttm_bo_validate(bo: &bo->tbo, placement: &placement, ctx: &ctx); |
388 | if (r) { |
389 | DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n" , r); |
390 | return; |
391 | } |
392 | |
393 | for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { |
394 | struct amdgpu_vm *vm = bo_base->vm; |
395 | struct dma_resv *resv = vm->root.bo->tbo.base.resv; |
396 | |
397 | if (ticket) { |
398 | /* When we get an error here it means that somebody |
399 | * else is holding the VM lock and updating page tables |
400 | * So we can just continue here. |
401 | */ |
402 | r = dma_resv_lock(obj: resv, ctx: ticket); |
403 | if (r) |
404 | continue; |
405 | |
406 | } else { |
407 | /* TODO: This is more problematic and we actually need |
408 | * to allow page tables updates without holding the |
409 | * lock. |
410 | */ |
411 | if (!dma_resv_trylock(obj: resv)) |
412 | continue; |
413 | } |
414 | |
415 | /* Reserve fences for two SDMA page table updates */ |
416 | r = dma_resv_reserve_fences(obj: resv, num_fences: 2); |
417 | if (!r) |
418 | r = amdgpu_vm_clear_freed(adev, vm, NULL); |
419 | if (!r) |
420 | r = amdgpu_vm_handle_moved(adev, vm, ticket); |
421 | |
422 | if (r && r != -EBUSY) |
423 | DRM_ERROR("Failed to invalidate VM page tables (%d))\n" , |
424 | r); |
425 | |
426 | dma_resv_unlock(obj: resv); |
427 | } |
428 | } |
429 | |
430 | static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = { |
431 | .allow_peer2peer = true, |
432 | .move_notify = amdgpu_dma_buf_move_notify |
433 | }; |
434 | |
435 | /** |
436 | * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation |
437 | * @dev: DRM device |
438 | * @dma_buf: Shared DMA buffer |
439 | * |
440 | * Import a dma_buf into a the driver and potentially create a new GEM object. |
441 | * |
442 | * Returns: |
443 | * GEM BO representing the shared DMA buffer for the given device. |
444 | */ |
445 | struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, |
446 | struct dma_buf *dma_buf) |
447 | { |
448 | struct dma_buf_attachment *attach; |
449 | struct drm_gem_object *obj; |
450 | |
451 | if (dma_buf->ops == &amdgpu_dmabuf_ops) { |
452 | obj = dma_buf->priv; |
453 | if (obj->dev == dev) { |
454 | /* |
455 | * Importing dmabuf exported from out own gem increases |
456 | * refcount on gem itself instead of f_count of dmabuf. |
457 | */ |
458 | drm_gem_object_get(obj); |
459 | return obj; |
460 | } |
461 | } |
462 | |
463 | obj = amdgpu_dma_buf_create_obj(dev, dma_buf); |
464 | if (IS_ERR(ptr: obj)) |
465 | return obj; |
466 | |
467 | attach = dma_buf_dynamic_attach(dmabuf: dma_buf, dev: dev->dev, |
468 | importer_ops: &amdgpu_dma_buf_attach_ops, importer_priv: obj); |
469 | if (IS_ERR(ptr: attach)) { |
470 | drm_gem_object_put(obj); |
471 | return ERR_CAST(ptr: attach); |
472 | } |
473 | |
474 | get_dma_buf(dmabuf: dma_buf); |
475 | obj->import_attach = attach; |
476 | return obj; |
477 | } |
478 | |
479 | /** |
480 | * amdgpu_dmabuf_is_xgmi_accessible - Check if xgmi available for P2P transfer |
481 | * |
482 | * @adev: amdgpu_device pointer of the importer |
483 | * @bo: amdgpu buffer object |
484 | * |
485 | * Returns: |
486 | * True if dmabuf accessible over xgmi, false otherwise. |
487 | */ |
488 | bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, |
489 | struct amdgpu_bo *bo) |
490 | { |
491 | struct drm_gem_object *obj = &bo->tbo.base; |
492 | struct drm_gem_object *gobj; |
493 | |
494 | if (obj->import_attach) { |
495 | struct dma_buf *dma_buf = obj->import_attach->dmabuf; |
496 | |
497 | if (dma_buf->ops != &amdgpu_dmabuf_ops) |
498 | /* No XGMI with non AMD GPUs */ |
499 | return false; |
500 | |
501 | gobj = dma_buf->priv; |
502 | bo = gem_to_amdgpu_bo(gobj); |
503 | } |
504 | |
505 | if (amdgpu_xgmi_same_hive(adev, bo_adev: amdgpu_ttm_adev(bdev: bo->tbo.bdev)) && |
506 | (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)) |
507 | return true; |
508 | |
509 | return false; |
510 | } |
511 | |