1 | /* |
2 | * Copyright 2009 Jerome Glisse. |
3 | * All Rights Reserved. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the |
7 | * "Software"), to deal in the Software without restriction, including |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
9 | * distribute, sub license, and/or sell copies of the Software, and to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
11 | * the following conditions: |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
20 | * |
21 | * The above copyright notice and this permission notice (including the |
22 | * next paragraph) shall be included in all copies or substantial portions |
23 | * of the Software. |
24 | * |
25 | */ |
26 | /* |
27 | * Authors: |
28 | * Jerome Glisse <glisse@freedesktop.org> |
29 | * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> |
30 | * Dave Airlie |
31 | */ |
32 | |
33 | #include <linux/dma-mapping.h> |
34 | #include <linux/iommu.h> |
35 | #include <linux/pagemap.h> |
36 | #include <linux/sched/task.h> |
37 | #include <linux/sched/mm.h> |
38 | #include <linux/seq_file.h> |
39 | #include <linux/slab.h> |
40 | #include <linux/swap.h> |
41 | #include <linux/dma-buf.h> |
42 | #include <linux/sizes.h> |
43 | #include <linux/module.h> |
44 | |
45 | #include <drm/drm_drv.h> |
46 | #include <drm/ttm/ttm_bo.h> |
47 | #include <drm/ttm/ttm_placement.h> |
48 | #include <drm/ttm/ttm_range_manager.h> |
49 | #include <drm/ttm/ttm_tt.h> |
50 | |
51 | #include <drm/amdgpu_drm.h> |
52 | |
53 | #include "amdgpu.h" |
54 | #include "amdgpu_object.h" |
55 | #include "amdgpu_trace.h" |
56 | #include "amdgpu_amdkfd.h" |
57 | #include "amdgpu_sdma.h" |
58 | #include "amdgpu_ras.h" |
59 | #include "amdgpu_hmm.h" |
60 | #include "amdgpu_atomfirmware.h" |
61 | #include "amdgpu_res_cursor.h" |
62 | #include "bif/bif_4_1_d.h" |
63 | |
64 | MODULE_IMPORT_NS(DMA_BUF); |
65 | |
66 | #define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128) |
67 | |
68 | static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, |
69 | struct ttm_tt *ttm, |
70 | struct ttm_resource *bo_mem); |
71 | static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, |
72 | struct ttm_tt *ttm); |
73 | |
74 | static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev, |
75 | unsigned int type, |
76 | uint64_t size_in_page) |
77 | { |
78 | return ttm_range_man_init(bdev: &adev->mman.bdev, type, |
79 | use_tt: false, p_size: size_in_page); |
80 | } |
81 | |
82 | /** |
83 | * amdgpu_evict_flags - Compute placement flags |
84 | * |
85 | * @bo: The buffer object to evict |
86 | * @placement: Possible destination(s) for evicted BO |
87 | * |
88 | * Fill in placement data when ttm_bo_evict() is called |
89 | */ |
90 | static void amdgpu_evict_flags(struct ttm_buffer_object *bo, |
91 | struct ttm_placement *placement) |
92 | { |
93 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev); |
94 | struct amdgpu_bo *abo; |
95 | static const struct ttm_place placements = { |
96 | .fpfn = 0, |
97 | .lpfn = 0, |
98 | .mem_type = TTM_PL_SYSTEM, |
99 | .flags = 0 |
100 | }; |
101 | |
102 | /* Don't handle scatter gather BOs */ |
103 | if (bo->type == ttm_bo_type_sg) { |
104 | placement->num_placement = 0; |
105 | return; |
106 | } |
107 | |
108 | /* Object isn't an AMDGPU object so ignore */ |
109 | if (!amdgpu_bo_is_amdgpu_bo(bo)) { |
110 | placement->placement = &placements; |
111 | placement->num_placement = 1; |
112 | return; |
113 | } |
114 | |
115 | abo = ttm_to_amdgpu_bo(tbo: bo); |
116 | if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) { |
117 | placement->num_placement = 0; |
118 | return; |
119 | } |
120 | |
121 | switch (bo->resource->mem_type) { |
122 | case AMDGPU_PL_GDS: |
123 | case AMDGPU_PL_GWS: |
124 | case AMDGPU_PL_OA: |
125 | case AMDGPU_PL_DOORBELL: |
126 | placement->num_placement = 0; |
127 | return; |
128 | |
129 | case TTM_PL_VRAM: |
130 | if (!adev->mman.buffer_funcs_enabled) { |
131 | /* Move to system memory */ |
132 | amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); |
133 | |
134 | } else if (!amdgpu_gmc_vram_full_visible(gmc: &adev->gmc) && |
135 | !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && |
136 | amdgpu_res_cpu_visible(adev, res: bo->resource)) { |
137 | |
138 | /* Try evicting to the CPU inaccessible part of VRAM |
139 | * first, but only set GTT as busy placement, so this |
140 | * BO will be evicted to GTT rather than causing other |
141 | * BOs to be evicted from VRAM |
142 | */ |
143 | amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | |
144 | AMDGPU_GEM_DOMAIN_GTT | |
145 | AMDGPU_GEM_DOMAIN_CPU); |
146 | abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; |
147 | abo->placements[0].lpfn = 0; |
148 | abo->placements[0].flags |= TTM_PL_FLAG_DESIRED; |
149 | } else { |
150 | /* Move to GTT memory */ |
151 | amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT | |
152 | AMDGPU_GEM_DOMAIN_CPU); |
153 | } |
154 | break; |
155 | case TTM_PL_TT: |
156 | case AMDGPU_PL_PREEMPT: |
157 | default: |
158 | amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); |
159 | break; |
160 | } |
161 | *placement = abo->placement; |
162 | } |
163 | |
164 | /** |
165 | * amdgpu_ttm_map_buffer - Map memory into the GART windows |
166 | * @bo: buffer object to map |
167 | * @mem: memory object to map |
168 | * @mm_cur: range to map |
169 | * @window: which GART window to use |
170 | * @ring: DMA ring to use for the copy |
171 | * @tmz: if we should setup a TMZ enabled mapping |
172 | * @size: in number of bytes to map, out number of bytes mapped |
173 | * @addr: resulting address inside the MC address space |
174 | * |
175 | * Setup one of the GART windows to access a specific piece of memory or return |
176 | * the physical address for local memory. |
177 | */ |
178 | static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, |
179 | struct ttm_resource *mem, |
180 | struct amdgpu_res_cursor *mm_cur, |
181 | unsigned int window, struct amdgpu_ring *ring, |
182 | bool tmz, uint64_t *size, uint64_t *addr) |
183 | { |
184 | struct amdgpu_device *adev = ring->adev; |
185 | unsigned int offset, num_pages, num_dw, num_bytes; |
186 | uint64_t src_addr, dst_addr; |
187 | struct amdgpu_job *job; |
188 | void *cpu_addr; |
189 | uint64_t flags; |
190 | unsigned int i; |
191 | int r; |
192 | |
193 | BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < |
194 | AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); |
195 | |
196 | if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT)) |
197 | return -EINVAL; |
198 | |
199 | /* Map only what can't be accessed directly */ |
200 | if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) { |
201 | *addr = amdgpu_ttm_domain_start(adev, type: mem->mem_type) + |
202 | mm_cur->start; |
203 | return 0; |
204 | } |
205 | |
206 | |
207 | /* |
208 | * If start begins at an offset inside the page, then adjust the size |
209 | * and addr accordingly |
210 | */ |
211 | offset = mm_cur->start & ~PAGE_MASK; |
212 | |
213 | num_pages = PFN_UP(*size + offset); |
214 | num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE); |
215 | |
216 | *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset); |
217 | |
218 | *addr = adev->gmc.gart_start; |
219 | *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * |
220 | AMDGPU_GPU_PAGE_SIZE; |
221 | *addr += offset; |
222 | |
223 | num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); |
224 | num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; |
225 | |
226 | r = amdgpu_job_alloc_with_ib(adev, entity: &adev->mman.high_pr, |
227 | AMDGPU_FENCE_OWNER_UNDEFINED, |
228 | size: num_dw * 4 + num_bytes, |
229 | pool_type: AMDGPU_IB_POOL_DELAYED, job: &job); |
230 | if (r) |
231 | return r; |
232 | |
233 | src_addr = num_dw * 4; |
234 | src_addr += job->ibs[0].gpu_addr; |
235 | |
236 | dst_addr = amdgpu_bo_gpu_offset(bo: adev->gart.bo); |
237 | dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; |
238 | amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, |
239 | dst_addr, num_bytes, false); |
240 | |
241 | amdgpu_ring_pad_ib(ring, &job->ibs[0]); |
242 | WARN_ON(job->ibs[0].length_dw > num_dw); |
243 | |
244 | flags = amdgpu_ttm_tt_pte_flags(adev, ttm: bo->ttm, mem); |
245 | if (tmz) |
246 | flags |= AMDGPU_PTE_TMZ; |
247 | |
248 | cpu_addr = &job->ibs[0].ptr[num_dw]; |
249 | |
250 | if (mem->mem_type == TTM_PL_TT) { |
251 | dma_addr_t *dma_addr; |
252 | |
253 | dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT]; |
254 | amdgpu_gart_map(adev, offset: 0, pages: num_pages, dma_addr, flags, dst: cpu_addr); |
255 | } else { |
256 | dma_addr_t dma_address; |
257 | |
258 | dma_address = mm_cur->start; |
259 | dma_address += adev->vm_manager.vram_base_offset; |
260 | |
261 | for (i = 0; i < num_pages; ++i) { |
262 | amdgpu_gart_map(adev, offset: i << PAGE_SHIFT, pages: 1, dma_addr: &dma_address, |
263 | flags, dst: cpu_addr); |
264 | dma_address += PAGE_SIZE; |
265 | } |
266 | } |
267 | |
268 | dma_fence_put(fence: amdgpu_job_submit(job)); |
269 | return 0; |
270 | } |
271 | |
272 | /** |
273 | * amdgpu_ttm_copy_mem_to_mem - Helper function for copy |
274 | * @adev: amdgpu device |
275 | * @src: buffer/address where to read from |
276 | * @dst: buffer/address where to write to |
277 | * @size: number of bytes to copy |
278 | * @tmz: if a secure copy should be used |
279 | * @resv: resv object to sync to |
280 | * @f: Returns the last fence if multiple jobs are submitted. |
281 | * |
282 | * The function copies @size bytes from {src->mem + src->offset} to |
283 | * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a |
284 | * move and different for a BO to BO copy. |
285 | * |
286 | */ |
287 | int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, |
288 | const struct amdgpu_copy_mem *src, |
289 | const struct amdgpu_copy_mem *dst, |
290 | uint64_t size, bool tmz, |
291 | struct dma_resv *resv, |
292 | struct dma_fence **f) |
293 | { |
294 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; |
295 | struct amdgpu_res_cursor src_mm, dst_mm; |
296 | struct dma_fence *fence = NULL; |
297 | int r = 0; |
298 | |
299 | if (!adev->mman.buffer_funcs_enabled) { |
300 | DRM_ERROR("Trying to move memory with ring turned off.\n" ); |
301 | return -EINVAL; |
302 | } |
303 | |
304 | amdgpu_res_first(res: src->mem, start: src->offset, size, cur: &src_mm); |
305 | amdgpu_res_first(res: dst->mem, start: dst->offset, size, cur: &dst_mm); |
306 | |
307 | mutex_lock(&adev->mman.gtt_window_lock); |
308 | while (src_mm.remaining) { |
309 | uint64_t from, to, cur_size; |
310 | struct dma_fence *next; |
311 | |
312 | /* Never copy more than 256MiB at once to avoid a timeout */ |
313 | cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20); |
314 | |
315 | /* Map src to window 0 and dst to window 1. */ |
316 | r = amdgpu_ttm_map_buffer(bo: src->bo, mem: src->mem, mm_cur: &src_mm, |
317 | window: 0, ring, tmz, size: &cur_size, addr: &from); |
318 | if (r) |
319 | goto error; |
320 | |
321 | r = amdgpu_ttm_map_buffer(bo: dst->bo, mem: dst->mem, mm_cur: &dst_mm, |
322 | window: 1, ring, tmz, size: &cur_size, addr: &to); |
323 | if (r) |
324 | goto error; |
325 | |
326 | r = amdgpu_copy_buffer(ring, src_offset: from, dst_offset: to, byte_count: cur_size, |
327 | resv, fence: &next, direct_submit: false, vm_needs_flush: true, tmz); |
328 | if (r) |
329 | goto error; |
330 | |
331 | dma_fence_put(fence); |
332 | fence = next; |
333 | |
334 | amdgpu_res_next(cur: &src_mm, size: cur_size); |
335 | amdgpu_res_next(cur: &dst_mm, size: cur_size); |
336 | } |
337 | error: |
338 | mutex_unlock(lock: &adev->mman.gtt_window_lock); |
339 | if (f) |
340 | *f = dma_fence_get(fence); |
341 | dma_fence_put(fence); |
342 | return r; |
343 | } |
344 | |
345 | /* |
346 | * amdgpu_move_blit - Copy an entire buffer to another buffer |
347 | * |
348 | * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to |
349 | * help move buffers to and from VRAM. |
350 | */ |
351 | static int amdgpu_move_blit(struct ttm_buffer_object *bo, |
352 | bool evict, |
353 | struct ttm_resource *new_mem, |
354 | struct ttm_resource *old_mem) |
355 | { |
356 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev); |
357 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo: bo); |
358 | struct amdgpu_copy_mem src, dst; |
359 | struct dma_fence *fence = NULL; |
360 | int r; |
361 | |
362 | src.bo = bo; |
363 | dst.bo = bo; |
364 | src.mem = old_mem; |
365 | dst.mem = new_mem; |
366 | src.offset = 0; |
367 | dst.offset = 0; |
368 | |
369 | r = amdgpu_ttm_copy_mem_to_mem(adev, src: &src, dst: &dst, |
370 | size: new_mem->size, |
371 | tmz: amdgpu_bo_encrypted(bo: abo), |
372 | resv: bo->base.resv, f: &fence); |
373 | if (r) |
374 | goto error; |
375 | |
376 | /* clear the space being freed */ |
377 | if (old_mem->mem_type == TTM_PL_VRAM && |
378 | (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { |
379 | struct dma_fence *wipe_fence = NULL; |
380 | |
381 | r = amdgpu_fill_buffer(bo: abo, AMDGPU_POISON, NULL, fence: &wipe_fence, |
382 | delayed: false); |
383 | if (r) { |
384 | goto error; |
385 | } else if (wipe_fence) { |
386 | dma_fence_put(fence); |
387 | fence = wipe_fence; |
388 | } |
389 | } |
390 | |
391 | /* Always block for VM page tables before committing the new location */ |
392 | if (bo->type == ttm_bo_type_kernel) |
393 | r = ttm_bo_move_accel_cleanup(bo, fence, evict: true, pipeline: false, new_mem); |
394 | else |
395 | r = ttm_bo_move_accel_cleanup(bo, fence, evict, pipeline: true, new_mem); |
396 | dma_fence_put(fence); |
397 | return r; |
398 | |
399 | error: |
400 | if (fence) |
401 | dma_fence_wait(fence, intr: false); |
402 | dma_fence_put(fence); |
403 | return r; |
404 | } |
405 | |
406 | /** |
407 | * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU |
408 | * @adev: amdgpu device |
409 | * @res: the resource to check |
410 | * |
411 | * Returns: true if the full resource is CPU visible, false otherwise. |
412 | */ |
413 | bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, |
414 | struct ttm_resource *res) |
415 | { |
416 | struct amdgpu_res_cursor cursor; |
417 | |
418 | if (!res) |
419 | return false; |
420 | |
421 | if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT || |
422 | res->mem_type == AMDGPU_PL_PREEMPT) |
423 | return true; |
424 | |
425 | if (res->mem_type != TTM_PL_VRAM) |
426 | return false; |
427 | |
428 | amdgpu_res_first(res, start: 0, size: res->size, cur: &cursor); |
429 | while (cursor.remaining) { |
430 | if ((cursor.start + cursor.size) >= adev->gmc.visible_vram_size) |
431 | return false; |
432 | amdgpu_res_next(cur: &cursor, size: cursor.size); |
433 | } |
434 | |
435 | return true; |
436 | } |
437 | |
438 | /* |
439 | * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy |
440 | * |
441 | * Called by amdgpu_bo_move() |
442 | */ |
443 | static bool amdgpu_res_copyable(struct amdgpu_device *adev, |
444 | struct ttm_resource *mem) |
445 | { |
446 | if (!amdgpu_res_cpu_visible(adev, res: mem)) |
447 | return false; |
448 | |
449 | /* ttm_resource_ioremap only supports contiguous memory */ |
450 | if (mem->mem_type == TTM_PL_VRAM && |
451 | !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)) |
452 | return false; |
453 | |
454 | return true; |
455 | } |
456 | |
457 | /* |
458 | * amdgpu_bo_move - Move a buffer object to a new memory location |
459 | * |
460 | * Called by ttm_bo_handle_move_mem() |
461 | */ |
462 | static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, |
463 | struct ttm_operation_ctx *ctx, |
464 | struct ttm_resource *new_mem, |
465 | struct ttm_place *hop) |
466 | { |
467 | struct amdgpu_device *adev; |
468 | struct amdgpu_bo *abo; |
469 | struct ttm_resource *old_mem = bo->resource; |
470 | int r; |
471 | |
472 | if (new_mem->mem_type == TTM_PL_TT || |
473 | new_mem->mem_type == AMDGPU_PL_PREEMPT) { |
474 | r = amdgpu_ttm_backend_bind(bdev: bo->bdev, ttm: bo->ttm, bo_mem: new_mem); |
475 | if (r) |
476 | return r; |
477 | } |
478 | |
479 | abo = ttm_to_amdgpu_bo(tbo: bo); |
480 | adev = amdgpu_ttm_adev(bdev: bo->bdev); |
481 | |
482 | if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM && |
483 | bo->ttm == NULL)) { |
484 | ttm_bo_move_null(bo, new_mem); |
485 | goto out; |
486 | } |
487 | if (old_mem->mem_type == TTM_PL_SYSTEM && |
488 | (new_mem->mem_type == TTM_PL_TT || |
489 | new_mem->mem_type == AMDGPU_PL_PREEMPT)) { |
490 | ttm_bo_move_null(bo, new_mem); |
491 | goto out; |
492 | } |
493 | if ((old_mem->mem_type == TTM_PL_TT || |
494 | old_mem->mem_type == AMDGPU_PL_PREEMPT) && |
495 | new_mem->mem_type == TTM_PL_SYSTEM) { |
496 | r = ttm_bo_wait_ctx(bo, ctx); |
497 | if (r) |
498 | return r; |
499 | |
500 | amdgpu_ttm_backend_unbind(bdev: bo->bdev, ttm: bo->ttm); |
501 | ttm_resource_free(bo, res: &bo->resource); |
502 | ttm_bo_assign_mem(bo, new_mem); |
503 | goto out; |
504 | } |
505 | |
506 | if (old_mem->mem_type == AMDGPU_PL_GDS || |
507 | old_mem->mem_type == AMDGPU_PL_GWS || |
508 | old_mem->mem_type == AMDGPU_PL_OA || |
509 | old_mem->mem_type == AMDGPU_PL_DOORBELL || |
510 | new_mem->mem_type == AMDGPU_PL_GDS || |
511 | new_mem->mem_type == AMDGPU_PL_GWS || |
512 | new_mem->mem_type == AMDGPU_PL_OA || |
513 | new_mem->mem_type == AMDGPU_PL_DOORBELL) { |
514 | /* Nothing to save here */ |
515 | ttm_bo_move_null(bo, new_mem); |
516 | goto out; |
517 | } |
518 | |
519 | if (bo->type == ttm_bo_type_device && |
520 | new_mem->mem_type == TTM_PL_VRAM && |
521 | old_mem->mem_type != TTM_PL_VRAM) { |
522 | /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU |
523 | * accesses the BO after it's moved. |
524 | */ |
525 | abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; |
526 | } |
527 | |
528 | if (adev->mman.buffer_funcs_enabled) { |
529 | if (((old_mem->mem_type == TTM_PL_SYSTEM && |
530 | new_mem->mem_type == TTM_PL_VRAM) || |
531 | (old_mem->mem_type == TTM_PL_VRAM && |
532 | new_mem->mem_type == TTM_PL_SYSTEM))) { |
533 | hop->fpfn = 0; |
534 | hop->lpfn = 0; |
535 | hop->mem_type = TTM_PL_TT; |
536 | hop->flags = TTM_PL_FLAG_TEMPORARY; |
537 | return -EMULTIHOP; |
538 | } |
539 | |
540 | r = amdgpu_move_blit(bo, evict, new_mem, old_mem); |
541 | } else { |
542 | r = -ENODEV; |
543 | } |
544 | |
545 | if (r) { |
546 | /* Check that all memory is CPU accessible */ |
547 | if (!amdgpu_res_copyable(adev, mem: old_mem) || |
548 | !amdgpu_res_copyable(adev, mem: new_mem)) { |
549 | pr_err("Move buffer fallback to memcpy unavailable\n" ); |
550 | return r; |
551 | } |
552 | |
553 | r = ttm_bo_move_memcpy(bo, ctx, new_mem); |
554 | if (r) |
555 | return r; |
556 | } |
557 | |
558 | trace_amdgpu_bo_move(bo: abo, new_placement: new_mem->mem_type, old_placement: old_mem->mem_type); |
559 | out: |
560 | /* update statistics */ |
561 | atomic64_add(i: bo->base.size, v: &adev->num_bytes_moved); |
562 | amdgpu_bo_move_notify(bo, evict); |
563 | return 0; |
564 | } |
565 | |
566 | /* |
567 | * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault |
568 | * |
569 | * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault() |
570 | */ |
571 | static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, |
572 | struct ttm_resource *mem) |
573 | { |
574 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
575 | |
576 | switch (mem->mem_type) { |
577 | case TTM_PL_SYSTEM: |
578 | /* system memory */ |
579 | return 0; |
580 | case TTM_PL_TT: |
581 | case AMDGPU_PL_PREEMPT: |
582 | break; |
583 | case TTM_PL_VRAM: |
584 | mem->bus.offset = mem->start << PAGE_SHIFT; |
585 | |
586 | if (adev->mman.aper_base_kaddr && |
587 | mem->placement & TTM_PL_FLAG_CONTIGUOUS) |
588 | mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr + |
589 | mem->bus.offset; |
590 | |
591 | mem->bus.offset += adev->gmc.aper_base; |
592 | mem->bus.is_iomem = true; |
593 | break; |
594 | case AMDGPU_PL_DOORBELL: |
595 | mem->bus.offset = mem->start << PAGE_SHIFT; |
596 | mem->bus.offset += adev->doorbell.base; |
597 | mem->bus.is_iomem = true; |
598 | mem->bus.caching = ttm_uncached; |
599 | break; |
600 | default: |
601 | return -EINVAL; |
602 | } |
603 | return 0; |
604 | } |
605 | |
606 | static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, |
607 | unsigned long page_offset) |
608 | { |
609 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev); |
610 | struct amdgpu_res_cursor cursor; |
611 | |
612 | amdgpu_res_first(res: bo->resource, start: (u64)page_offset << PAGE_SHIFT, size: 0, |
613 | cur: &cursor); |
614 | |
615 | if (bo->resource->mem_type == AMDGPU_PL_DOORBELL) |
616 | return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT; |
617 | |
618 | return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT; |
619 | } |
620 | |
621 | /** |
622 | * amdgpu_ttm_domain_start - Returns GPU start address |
623 | * @adev: amdgpu device object |
624 | * @type: type of the memory |
625 | * |
626 | * Returns: |
627 | * GPU start address of a memory domain |
628 | */ |
629 | |
630 | uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type) |
631 | { |
632 | switch (type) { |
633 | case TTM_PL_TT: |
634 | return adev->gmc.gart_start; |
635 | case TTM_PL_VRAM: |
636 | return adev->gmc.vram_start; |
637 | } |
638 | |
639 | return 0; |
640 | } |
641 | |
642 | /* |
643 | * TTM backend functions. |
644 | */ |
645 | struct amdgpu_ttm_tt { |
646 | struct ttm_tt ttm; |
647 | struct drm_gem_object *gobj; |
648 | u64 offset; |
649 | uint64_t userptr; |
650 | struct task_struct *usertask; |
651 | uint32_t userflags; |
652 | bool bound; |
653 | int32_t pool_id; |
654 | }; |
655 | |
656 | #define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) |
657 | |
658 | #ifdef CONFIG_DRM_AMDGPU_USERPTR |
659 | /* |
660 | * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user |
661 | * memory and start HMM tracking CPU page table update |
662 | * |
663 | * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only |
664 | * once afterwards to stop HMM tracking |
665 | */ |
666 | int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, |
667 | struct hmm_range **range) |
668 | { |
669 | struct ttm_tt *ttm = bo->tbo.ttm; |
670 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
671 | unsigned long start = gtt->userptr; |
672 | struct vm_area_struct *vma; |
673 | struct mm_struct *mm; |
674 | bool readonly; |
675 | int r = 0; |
676 | |
677 | /* Make sure get_user_pages_done() can cleanup gracefully */ |
678 | *range = NULL; |
679 | |
680 | mm = bo->notifier.mm; |
681 | if (unlikely(!mm)) { |
682 | DRM_DEBUG_DRIVER("BO is not registered?\n" ); |
683 | return -EFAULT; |
684 | } |
685 | |
686 | if (!mmget_not_zero(mm)) /* Happens during process shutdown */ |
687 | return -ESRCH; |
688 | |
689 | mmap_read_lock(mm); |
690 | vma = vma_lookup(mm, addr: start); |
691 | if (unlikely(!vma)) { |
692 | r = -EFAULT; |
693 | goto out_unlock; |
694 | } |
695 | if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && |
696 | vma->vm_file)) { |
697 | r = -EPERM; |
698 | goto out_unlock; |
699 | } |
700 | |
701 | readonly = amdgpu_ttm_tt_is_readonly(ttm); |
702 | r = amdgpu_hmm_range_get_pages(notifier: &bo->notifier, start, npages: ttm->num_pages, |
703 | readonly, NULL, pages, phmm_range: range); |
704 | out_unlock: |
705 | mmap_read_unlock(mm); |
706 | if (r) |
707 | pr_debug("failed %d to get user pages 0x%lx\n" , r, start); |
708 | |
709 | mmput(mm); |
710 | |
711 | return r; |
712 | } |
713 | |
714 | /* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations |
715 | */ |
716 | void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, |
717 | struct hmm_range *range) |
718 | { |
719 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
720 | |
721 | if (gtt && gtt->userptr && range) |
722 | amdgpu_hmm_range_get_pages_done(hmm_range: range); |
723 | } |
724 | |
725 | /* |
726 | * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change |
727 | * Check if the pages backing this ttm range have been invalidated |
728 | * |
729 | * Returns: true if pages are still valid |
730 | */ |
731 | bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, |
732 | struct hmm_range *range) |
733 | { |
734 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
735 | |
736 | if (!gtt || !gtt->userptr || !range) |
737 | return false; |
738 | |
739 | DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n" , |
740 | gtt->userptr, ttm->num_pages); |
741 | |
742 | WARN_ONCE(!range->hmm_pfns, "No user pages to check\n" ); |
743 | |
744 | return !amdgpu_hmm_range_get_pages_done(hmm_range: range); |
745 | } |
746 | #endif |
747 | |
748 | /* |
749 | * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. |
750 | * |
751 | * Called by amdgpu_cs_list_validate(). This creates the page list |
752 | * that backs user memory and will ultimately be mapped into the device |
753 | * address space. |
754 | */ |
755 | void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) |
756 | { |
757 | unsigned long i; |
758 | |
759 | for (i = 0; i < ttm->num_pages; ++i) |
760 | ttm->pages[i] = pages ? pages[i] : NULL; |
761 | } |
762 | |
763 | /* |
764 | * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages |
765 | * |
766 | * Called by amdgpu_ttm_backend_bind() |
767 | **/ |
768 | static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev, |
769 | struct ttm_tt *ttm) |
770 | { |
771 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
772 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
773 | int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); |
774 | enum dma_data_direction direction = write ? |
775 | DMA_BIDIRECTIONAL : DMA_TO_DEVICE; |
776 | int r; |
777 | |
778 | /* Allocate an SG array and squash pages into it */ |
779 | r = sg_alloc_table_from_pages(sgt: ttm->sg, pages: ttm->pages, n_pages: ttm->num_pages, offset: 0, |
780 | size: (u64)ttm->num_pages << PAGE_SHIFT, |
781 | GFP_KERNEL); |
782 | if (r) |
783 | goto release_sg; |
784 | |
785 | /* Map SG to device */ |
786 | r = dma_map_sgtable(dev: adev->dev, sgt: ttm->sg, dir: direction, attrs: 0); |
787 | if (r) |
788 | goto release_sg; |
789 | |
790 | /* convert SG to linear array of pages and dma addresses */ |
791 | drm_prime_sg_to_dma_addr_array(sgt: ttm->sg, addrs: gtt->ttm.dma_address, |
792 | max_pages: ttm->num_pages); |
793 | |
794 | return 0; |
795 | |
796 | release_sg: |
797 | kfree(objp: ttm->sg); |
798 | ttm->sg = NULL; |
799 | return r; |
800 | } |
801 | |
802 | /* |
803 | * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages |
804 | */ |
805 | static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, |
806 | struct ttm_tt *ttm) |
807 | { |
808 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
809 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
810 | int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); |
811 | enum dma_data_direction direction = write ? |
812 | DMA_BIDIRECTIONAL : DMA_TO_DEVICE; |
813 | |
814 | /* double check that we don't free the table twice */ |
815 | if (!ttm->sg || !ttm->sg->sgl) |
816 | return; |
817 | |
818 | /* unmap the pages mapped to the device */ |
819 | dma_unmap_sgtable(dev: adev->dev, sgt: ttm->sg, dir: direction, attrs: 0); |
820 | sg_free_table(ttm->sg); |
821 | } |
822 | |
823 | /* |
824 | * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ... |
825 | * MQDn+CtrlStackn where n is the number of XCCs per partition. |
826 | * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD |
827 | * and uses memory type default, UC. The rest of pages_per_xcc are |
828 | * Ctrl stack and modify their memory type to NC. |
829 | */ |
830 | static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, |
831 | struct ttm_tt *ttm, uint64_t flags) |
832 | { |
833 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
834 | uint64_t total_pages = ttm->num_pages; |
835 | int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); |
836 | uint64_t page_idx, pages_per_xcc; |
837 | int i; |
838 | uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | |
839 | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); |
840 | |
841 | pages_per_xcc = total_pages; |
842 | do_div(pages_per_xcc, num_xcc); |
843 | |
844 | for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { |
845 | /* MQD page: use default flags */ |
846 | amdgpu_gart_bind(adev, |
847 | offset: gtt->offset + (page_idx << PAGE_SHIFT), |
848 | pages: 1, dma_addr: >t->ttm.dma_address[page_idx], flags); |
849 | /* |
850 | * Ctrl pages - modify the memory type to NC (ctrl_flags) from |
851 | * the second page of the BO onward. |
852 | */ |
853 | amdgpu_gart_bind(adev, |
854 | offset: gtt->offset + ((page_idx + 1) << PAGE_SHIFT), |
855 | pages: pages_per_xcc - 1, |
856 | dma_addr: >t->ttm.dma_address[page_idx + 1], |
857 | flags: ctrl_flags); |
858 | } |
859 | } |
860 | |
861 | static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, |
862 | struct ttm_buffer_object *tbo, |
863 | uint64_t flags) |
864 | { |
865 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); |
866 | struct ttm_tt *ttm = tbo->ttm; |
867 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
868 | |
869 | if (amdgpu_bo_encrypted(bo: abo)) |
870 | flags |= AMDGPU_PTE_TMZ; |
871 | |
872 | if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { |
873 | amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags); |
874 | } else { |
875 | amdgpu_gart_bind(adev, offset: gtt->offset, pages: ttm->num_pages, |
876 | dma_addr: gtt->ttm.dma_address, flags); |
877 | } |
878 | gtt->bound = true; |
879 | } |
880 | |
881 | /* |
882 | * amdgpu_ttm_backend_bind - Bind GTT memory |
883 | * |
884 | * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem(). |
885 | * This handles binding GTT memory to the device address space. |
886 | */ |
887 | static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, |
888 | struct ttm_tt *ttm, |
889 | struct ttm_resource *bo_mem) |
890 | { |
891 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
892 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
893 | uint64_t flags; |
894 | int r; |
895 | |
896 | if (!bo_mem) |
897 | return -EINVAL; |
898 | |
899 | if (gtt->bound) |
900 | return 0; |
901 | |
902 | if (gtt->userptr) { |
903 | r = amdgpu_ttm_tt_pin_userptr(bdev, ttm); |
904 | if (r) { |
905 | DRM_ERROR("failed to pin userptr\n" ); |
906 | return r; |
907 | } |
908 | } else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) { |
909 | if (!ttm->sg) { |
910 | struct dma_buf_attachment *attach; |
911 | struct sg_table *sgt; |
912 | |
913 | attach = gtt->gobj->import_attach; |
914 | sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); |
915 | if (IS_ERR(ptr: sgt)) |
916 | return PTR_ERR(ptr: sgt); |
917 | |
918 | ttm->sg = sgt; |
919 | } |
920 | |
921 | drm_prime_sg_to_dma_addr_array(sgt: ttm->sg, addrs: gtt->ttm.dma_address, |
922 | max_pages: ttm->num_pages); |
923 | } |
924 | |
925 | if (!ttm->num_pages) { |
926 | WARN(1, "nothing to bind %u pages for mreg %p back %p!\n" , |
927 | ttm->num_pages, bo_mem, ttm); |
928 | } |
929 | |
930 | if (bo_mem->mem_type != TTM_PL_TT || |
931 | !amdgpu_gtt_mgr_has_gart_addr(mem: bo_mem)) { |
932 | gtt->offset = AMDGPU_BO_INVALID_OFFSET; |
933 | return 0; |
934 | } |
935 | |
936 | /* compute PTE flags relevant to this BO memory */ |
937 | flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem: bo_mem); |
938 | |
939 | /* bind pages into GART page tables */ |
940 | gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; |
941 | amdgpu_gart_bind(adev, offset: gtt->offset, pages: ttm->num_pages, |
942 | dma_addr: gtt->ttm.dma_address, flags); |
943 | gtt->bound = true; |
944 | return 0; |
945 | } |
946 | |
947 | /* |
948 | * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either |
949 | * through AGP or GART aperture. |
950 | * |
951 | * If bo is accessible through AGP aperture, then use AGP aperture |
952 | * to access bo; otherwise allocate logical space in GART aperture |
953 | * and map bo to GART aperture. |
954 | */ |
955 | int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) |
956 | { |
957 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev); |
958 | struct ttm_operation_ctx ctx = { false, false }; |
959 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); |
960 | struct ttm_placement placement; |
961 | struct ttm_place placements; |
962 | struct ttm_resource *tmp; |
963 | uint64_t addr, flags; |
964 | int r; |
965 | |
966 | if (bo->resource->start != AMDGPU_BO_INVALID_OFFSET) |
967 | return 0; |
968 | |
969 | addr = amdgpu_gmc_agp_addr(bo); |
970 | if (addr != AMDGPU_BO_INVALID_OFFSET) |
971 | return 0; |
972 | |
973 | /* allocate GART space */ |
974 | placement.num_placement = 1; |
975 | placement.placement = &placements; |
976 | placements.fpfn = 0; |
977 | placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; |
978 | placements.mem_type = TTM_PL_TT; |
979 | placements.flags = bo->resource->placement; |
980 | |
981 | r = ttm_bo_mem_space(bo, placement: &placement, mem: &tmp, ctx: &ctx); |
982 | if (unlikely(r)) |
983 | return r; |
984 | |
985 | /* compute PTE flags for this buffer object */ |
986 | flags = amdgpu_ttm_tt_pte_flags(adev, ttm: bo->ttm, mem: tmp); |
987 | |
988 | /* Bind pages */ |
989 | gtt->offset = (u64)tmp->start << PAGE_SHIFT; |
990 | amdgpu_ttm_gart_bind(adev, tbo: bo, flags); |
991 | amdgpu_gart_invalidate_tlb(adev); |
992 | ttm_resource_free(bo, res: &bo->resource); |
993 | ttm_bo_assign_mem(bo, new_mem: tmp); |
994 | |
995 | return 0; |
996 | } |
997 | |
998 | /* |
999 | * amdgpu_ttm_recover_gart - Rebind GTT pages |
1000 | * |
1001 | * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to |
1002 | * rebind GTT pages during a GPU reset. |
1003 | */ |
1004 | void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) |
1005 | { |
1006 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: tbo->bdev); |
1007 | uint64_t flags; |
1008 | |
1009 | if (!tbo->ttm) |
1010 | return; |
1011 | |
1012 | flags = amdgpu_ttm_tt_pte_flags(adev, ttm: tbo->ttm, mem: tbo->resource); |
1013 | amdgpu_ttm_gart_bind(adev, tbo, flags); |
1014 | } |
1015 | |
1016 | /* |
1017 | * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages |
1018 | * |
1019 | * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and |
1020 | * ttm_tt_destroy(). |
1021 | */ |
1022 | static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, |
1023 | struct ttm_tt *ttm) |
1024 | { |
1025 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
1026 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1027 | |
1028 | /* if the pages have userptr pinning then clear that first */ |
1029 | if (gtt->userptr) { |
1030 | amdgpu_ttm_tt_unpin_userptr(bdev, ttm); |
1031 | } else if (ttm->sg && gtt->gobj->import_attach) { |
1032 | struct dma_buf_attachment *attach; |
1033 | |
1034 | attach = gtt->gobj->import_attach; |
1035 | dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL); |
1036 | ttm->sg = NULL; |
1037 | } |
1038 | |
1039 | if (!gtt->bound) |
1040 | return; |
1041 | |
1042 | if (gtt->offset == AMDGPU_BO_INVALID_OFFSET) |
1043 | return; |
1044 | |
1045 | /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ |
1046 | amdgpu_gart_unbind(adev, offset: gtt->offset, pages: ttm->num_pages); |
1047 | gtt->bound = false; |
1048 | } |
1049 | |
1050 | static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev, |
1051 | struct ttm_tt *ttm) |
1052 | { |
1053 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1054 | |
1055 | if (gtt->usertask) |
1056 | put_task_struct(t: gtt->usertask); |
1057 | |
1058 | ttm_tt_fini(ttm: >t->ttm); |
1059 | kfree(objp: gtt); |
1060 | } |
1061 | |
1062 | /** |
1063 | * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO |
1064 | * |
1065 | * @bo: The buffer object to create a GTT ttm_tt object around |
1066 | * @page_flags: Page flags to be added to the ttm_tt object |
1067 | * |
1068 | * Called by ttm_tt_create(). |
1069 | */ |
1070 | static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, |
1071 | uint32_t page_flags) |
1072 | { |
1073 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev); |
1074 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo: bo); |
1075 | struct amdgpu_ttm_tt *gtt; |
1076 | enum ttm_caching caching; |
1077 | |
1078 | gtt = kzalloc(size: sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); |
1079 | if (!gtt) |
1080 | return NULL; |
1081 | |
1082 | gtt->gobj = &bo->base; |
1083 | if (adev->gmc.mem_partitions && abo->xcp_id >= 0) |
1084 | gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id); |
1085 | else |
1086 | gtt->pool_id = abo->xcp_id; |
1087 | |
1088 | if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) |
1089 | caching = ttm_write_combined; |
1090 | else |
1091 | caching = ttm_cached; |
1092 | |
1093 | /* allocate space for the uninitialized page entries */ |
1094 | if (ttm_sg_tt_init(ttm_dma: >t->ttm, bo, page_flags, caching)) { |
1095 | kfree(objp: gtt); |
1096 | return NULL; |
1097 | } |
1098 | return >t->ttm; |
1099 | } |
1100 | |
1101 | /* |
1102 | * amdgpu_ttm_tt_populate - Map GTT pages visible to the device |
1103 | * |
1104 | * Map the pages of a ttm_tt object to an address space visible |
1105 | * to the underlying device. |
1106 | */ |
1107 | static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, |
1108 | struct ttm_tt *ttm, |
1109 | struct ttm_operation_ctx *ctx) |
1110 | { |
1111 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
1112 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1113 | struct ttm_pool *pool; |
1114 | pgoff_t i; |
1115 | int ret; |
1116 | |
1117 | /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ |
1118 | if (gtt->userptr) { |
1119 | ttm->sg = kzalloc(size: sizeof(struct sg_table), GFP_KERNEL); |
1120 | if (!ttm->sg) |
1121 | return -ENOMEM; |
1122 | return 0; |
1123 | } |
1124 | |
1125 | if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) |
1126 | return 0; |
1127 | |
1128 | if (adev->mman.ttm_pools && gtt->pool_id >= 0) |
1129 | pool = &adev->mman.ttm_pools[gtt->pool_id]; |
1130 | else |
1131 | pool = &adev->mman.bdev.pool; |
1132 | ret = ttm_pool_alloc(pool, tt: ttm, ctx); |
1133 | if (ret) |
1134 | return ret; |
1135 | |
1136 | for (i = 0; i < ttm->num_pages; ++i) |
1137 | ttm->pages[i]->mapping = bdev->dev_mapping; |
1138 | |
1139 | return 0; |
1140 | } |
1141 | |
1142 | /* |
1143 | * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays |
1144 | * |
1145 | * Unmaps pages of a ttm_tt object from the device address space and |
1146 | * unpopulates the page array backing it. |
1147 | */ |
1148 | static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, |
1149 | struct ttm_tt *ttm) |
1150 | { |
1151 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1152 | struct amdgpu_device *adev; |
1153 | struct ttm_pool *pool; |
1154 | pgoff_t i; |
1155 | |
1156 | amdgpu_ttm_backend_unbind(bdev, ttm); |
1157 | |
1158 | if (gtt->userptr) { |
1159 | amdgpu_ttm_tt_set_user_pages(ttm, NULL); |
1160 | kfree(objp: ttm->sg); |
1161 | ttm->sg = NULL; |
1162 | return; |
1163 | } |
1164 | |
1165 | if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) |
1166 | return; |
1167 | |
1168 | for (i = 0; i < ttm->num_pages; ++i) |
1169 | ttm->pages[i]->mapping = NULL; |
1170 | |
1171 | adev = amdgpu_ttm_adev(bdev); |
1172 | |
1173 | if (adev->mman.ttm_pools && gtt->pool_id >= 0) |
1174 | pool = &adev->mman.ttm_pools[gtt->pool_id]; |
1175 | else |
1176 | pool = &adev->mman.bdev.pool; |
1177 | |
1178 | return ttm_pool_free(pool, tt: ttm); |
1179 | } |
1180 | |
1181 | /** |
1182 | * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current |
1183 | * task |
1184 | * |
1185 | * @tbo: The ttm_buffer_object that contains the userptr |
1186 | * @user_addr: The returned value |
1187 | */ |
1188 | int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo, |
1189 | uint64_t *user_addr) |
1190 | { |
1191 | struct amdgpu_ttm_tt *gtt; |
1192 | |
1193 | if (!tbo->ttm) |
1194 | return -EINVAL; |
1195 | |
1196 | gtt = (void *)tbo->ttm; |
1197 | *user_addr = gtt->userptr; |
1198 | return 0; |
1199 | } |
1200 | |
1201 | /** |
1202 | * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current |
1203 | * task |
1204 | * |
1205 | * @bo: The ttm_buffer_object to bind this userptr to |
1206 | * @addr: The address in the current tasks VM space to use |
1207 | * @flags: Requirements of userptr object. |
1208 | * |
1209 | * Called by amdgpu_gem_userptr_ioctl() and kfd_ioctl_alloc_memory_of_gpu() to |
1210 | * bind userptr pages to current task and by kfd_ioctl_acquire_vm() to |
1211 | * initialize GPU VM for a KFD process. |
1212 | */ |
1213 | int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, |
1214 | uint64_t addr, uint32_t flags) |
1215 | { |
1216 | struct amdgpu_ttm_tt *gtt; |
1217 | |
1218 | if (!bo->ttm) { |
1219 | /* TODO: We want a separate TTM object type for userptrs */ |
1220 | bo->ttm = amdgpu_ttm_tt_create(bo, page_flags: 0); |
1221 | if (bo->ttm == NULL) |
1222 | return -ENOMEM; |
1223 | } |
1224 | |
1225 | /* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */ |
1226 | bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL; |
1227 | |
1228 | gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); |
1229 | gtt->userptr = addr; |
1230 | gtt->userflags = flags; |
1231 | |
1232 | if (gtt->usertask) |
1233 | put_task_struct(t: gtt->usertask); |
1234 | gtt->usertask = current->group_leader; |
1235 | get_task_struct(t: gtt->usertask); |
1236 | |
1237 | return 0; |
1238 | } |
1239 | |
1240 | /* |
1241 | * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object |
1242 | */ |
1243 | struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) |
1244 | { |
1245 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1246 | |
1247 | if (gtt == NULL) |
1248 | return NULL; |
1249 | |
1250 | if (gtt->usertask == NULL) |
1251 | return NULL; |
1252 | |
1253 | return gtt->usertask->mm; |
1254 | } |
1255 | |
1256 | /* |
1257 | * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an |
1258 | * address range for the current task. |
1259 | * |
1260 | */ |
1261 | bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, |
1262 | unsigned long end, unsigned long *userptr) |
1263 | { |
1264 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1265 | unsigned long size; |
1266 | |
1267 | if (gtt == NULL || !gtt->userptr) |
1268 | return false; |
1269 | |
1270 | /* Return false if no part of the ttm_tt object lies within |
1271 | * the range |
1272 | */ |
1273 | size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE; |
1274 | if (gtt->userptr > end || gtt->userptr + size <= start) |
1275 | return false; |
1276 | |
1277 | if (userptr) |
1278 | *userptr = gtt->userptr; |
1279 | return true; |
1280 | } |
1281 | |
1282 | /* |
1283 | * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? |
1284 | */ |
1285 | bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) |
1286 | { |
1287 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1288 | |
1289 | if (gtt == NULL || !gtt->userptr) |
1290 | return false; |
1291 | |
1292 | return true; |
1293 | } |
1294 | |
1295 | /* |
1296 | * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only? |
1297 | */ |
1298 | bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) |
1299 | { |
1300 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1301 | |
1302 | if (gtt == NULL) |
1303 | return false; |
1304 | |
1305 | return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); |
1306 | } |
1307 | |
1308 | /** |
1309 | * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object |
1310 | * |
1311 | * @ttm: The ttm_tt object to compute the flags for |
1312 | * @mem: The memory registry backing this ttm_tt object |
1313 | * |
1314 | * Figure out the flags to use for a VM PDE (Page Directory Entry). |
1315 | */ |
1316 | uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) |
1317 | { |
1318 | uint64_t flags = 0; |
1319 | |
1320 | if (mem && mem->mem_type != TTM_PL_SYSTEM) |
1321 | flags |= AMDGPU_PTE_VALID; |
1322 | |
1323 | if (mem && (mem->mem_type == TTM_PL_TT || |
1324 | mem->mem_type == AMDGPU_PL_DOORBELL || |
1325 | mem->mem_type == AMDGPU_PL_PREEMPT)) { |
1326 | flags |= AMDGPU_PTE_SYSTEM; |
1327 | |
1328 | if (ttm->caching == ttm_cached) |
1329 | flags |= AMDGPU_PTE_SNOOPED; |
1330 | } |
1331 | |
1332 | if (mem && mem->mem_type == TTM_PL_VRAM && |
1333 | mem->bus.caching == ttm_cached) |
1334 | flags |= AMDGPU_PTE_SNOOPED; |
1335 | |
1336 | return flags; |
1337 | } |
1338 | |
1339 | /** |
1340 | * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object |
1341 | * |
1342 | * @adev: amdgpu_device pointer |
1343 | * @ttm: The ttm_tt object to compute the flags for |
1344 | * @mem: The memory registry backing this ttm_tt object |
1345 | * |
1346 | * Figure out the flags to use for a VM PTE (Page Table Entry). |
1347 | */ |
1348 | uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, |
1349 | struct ttm_resource *mem) |
1350 | { |
1351 | uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem); |
1352 | |
1353 | flags |= adev->gart.gart_pte_flags; |
1354 | flags |= AMDGPU_PTE_READABLE; |
1355 | |
1356 | if (!amdgpu_ttm_tt_is_readonly(ttm)) |
1357 | flags |= AMDGPU_PTE_WRITEABLE; |
1358 | |
1359 | return flags; |
1360 | } |
1361 | |
1362 | /* |
1363 | * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer |
1364 | * object. |
1365 | * |
1366 | * Return true if eviction is sensible. Called by ttm_mem_evict_first() on |
1367 | * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until |
1368 | * it can find space for a new object and by ttm_bo_force_list_clean() which is |
1369 | * used to clean out a memory space. |
1370 | */ |
1371 | static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, |
1372 | const struct ttm_place *place) |
1373 | { |
1374 | struct dma_resv_iter resv_cursor; |
1375 | struct dma_fence *f; |
1376 | |
1377 | if (!amdgpu_bo_is_amdgpu_bo(bo)) |
1378 | return ttm_bo_eviction_valuable(bo, place); |
1379 | |
1380 | /* Swapout? */ |
1381 | if (bo->resource->mem_type == TTM_PL_SYSTEM) |
1382 | return true; |
1383 | |
1384 | if (bo->type == ttm_bo_type_kernel && |
1385 | !amdgpu_vm_evictable(bo: ttm_to_amdgpu_bo(tbo: bo))) |
1386 | return false; |
1387 | |
1388 | /* If bo is a KFD BO, check if the bo belongs to the current process. |
1389 | * If true, then return false as any KFD process needs all its BOs to |
1390 | * be resident to run successfully |
1391 | */ |
1392 | dma_resv_for_each_fence(&resv_cursor, bo->base.resv, |
1393 | DMA_RESV_USAGE_BOOKKEEP, f) { |
1394 | if (amdkfd_fence_check_mm(f, current->mm)) |
1395 | return false; |
1396 | } |
1397 | |
1398 | /* Preemptible BOs don't own system resources managed by the |
1399 | * driver (pages, VRAM, GART space). They point to resources |
1400 | * owned by someone else (e.g. pageable memory in user mode |
1401 | * or a DMABuf). They are used in a preemptible context so we |
1402 | * can guarantee no deadlocks and good QoS in case of MMU |
1403 | * notifiers or DMABuf move notifiers from the resource owner. |
1404 | */ |
1405 | if (bo->resource->mem_type == AMDGPU_PL_PREEMPT) |
1406 | return false; |
1407 | |
1408 | if (bo->resource->mem_type == TTM_PL_TT && |
1409 | amdgpu_bo_encrypted(bo: ttm_to_amdgpu_bo(tbo: bo))) |
1410 | return false; |
1411 | |
1412 | return ttm_bo_eviction_valuable(bo, place); |
1413 | } |
1414 | |
1415 | static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos, |
1416 | void *buf, size_t size, bool write) |
1417 | { |
1418 | while (size) { |
1419 | uint64_t aligned_pos = ALIGN_DOWN(pos, 4); |
1420 | uint64_t bytes = 4 - (pos & 0x3); |
1421 | uint32_t shift = (pos & 0x3) * 8; |
1422 | uint32_t mask = 0xffffffff << shift; |
1423 | uint32_t value = 0; |
1424 | |
1425 | if (size < bytes) { |
1426 | mask &= 0xffffffff >> (bytes - size) * 8; |
1427 | bytes = size; |
1428 | } |
1429 | |
1430 | if (mask != 0xffffffff) { |
1431 | amdgpu_device_mm_access(adev, pos: aligned_pos, buf: &value, size: 4, write: false); |
1432 | if (write) { |
1433 | value &= ~mask; |
1434 | value |= (*(uint32_t *)buf << shift) & mask; |
1435 | amdgpu_device_mm_access(adev, pos: aligned_pos, buf: &value, size: 4, write: true); |
1436 | } else { |
1437 | value = (value & mask) >> shift; |
1438 | memcpy(buf, &value, bytes); |
1439 | } |
1440 | } else { |
1441 | amdgpu_device_mm_access(adev, pos: aligned_pos, buf, size: 4, write); |
1442 | } |
1443 | |
1444 | pos += bytes; |
1445 | buf += bytes; |
1446 | size -= bytes; |
1447 | } |
1448 | } |
1449 | |
1450 | static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, |
1451 | unsigned long offset, void *buf, |
1452 | int len, int write) |
1453 | { |
1454 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo: bo); |
1455 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: abo->tbo.bdev); |
1456 | struct amdgpu_res_cursor src_mm; |
1457 | struct amdgpu_job *job; |
1458 | struct dma_fence *fence; |
1459 | uint64_t src_addr, dst_addr; |
1460 | unsigned int num_dw; |
1461 | int r, idx; |
1462 | |
1463 | if (len != PAGE_SIZE) |
1464 | return -EINVAL; |
1465 | |
1466 | if (!adev->mman.sdma_access_ptr) |
1467 | return -EACCES; |
1468 | |
1469 | if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) |
1470 | return -ENODEV; |
1471 | |
1472 | if (write) |
1473 | memcpy(adev->mman.sdma_access_ptr, buf, len); |
1474 | |
1475 | num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); |
1476 | r = amdgpu_job_alloc_with_ib(adev, entity: &adev->mman.high_pr, |
1477 | AMDGPU_FENCE_OWNER_UNDEFINED, |
1478 | size: num_dw * 4, pool_type: AMDGPU_IB_POOL_DELAYED, |
1479 | job: &job); |
1480 | if (r) |
1481 | goto out; |
1482 | |
1483 | amdgpu_res_first(res: abo->tbo.resource, start: offset, size: len, cur: &src_mm); |
1484 | src_addr = amdgpu_ttm_domain_start(adev, type: bo->resource->mem_type) + |
1485 | src_mm.start; |
1486 | dst_addr = amdgpu_bo_gpu_offset(bo: adev->mman.sdma_access_bo); |
1487 | if (write) |
1488 | swap(src_addr, dst_addr); |
1489 | |
1490 | amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, |
1491 | PAGE_SIZE, false); |
1492 | |
1493 | amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]); |
1494 | WARN_ON(job->ibs[0].length_dw > num_dw); |
1495 | |
1496 | fence = amdgpu_job_submit(job); |
1497 | |
1498 | if (!dma_fence_wait_timeout(fence, intr: false, timeout: adev->sdma_timeout)) |
1499 | r = -ETIMEDOUT; |
1500 | dma_fence_put(fence); |
1501 | |
1502 | if (!(r || write)) |
1503 | memcpy(buf, adev->mman.sdma_access_ptr, len); |
1504 | out: |
1505 | drm_dev_exit(idx); |
1506 | return r; |
1507 | } |
1508 | |
1509 | /** |
1510 | * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object. |
1511 | * |
1512 | * @bo: The buffer object to read/write |
1513 | * @offset: Offset into buffer object |
1514 | * @buf: Secondary buffer to write/read from |
1515 | * @len: Length in bytes of access |
1516 | * @write: true if writing |
1517 | * |
1518 | * This is used to access VRAM that backs a buffer object via MMIO |
1519 | * access for debugging purposes. |
1520 | */ |
1521 | static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, |
1522 | unsigned long offset, void *buf, int len, |
1523 | int write) |
1524 | { |
1525 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo: bo); |
1526 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: abo->tbo.bdev); |
1527 | struct amdgpu_res_cursor cursor; |
1528 | int ret = 0; |
1529 | |
1530 | if (bo->resource->mem_type != TTM_PL_VRAM) |
1531 | return -EIO; |
1532 | |
1533 | if (amdgpu_device_has_timeouts_enabled(adev) && |
1534 | !amdgpu_ttm_access_memory_sdma(bo, offset, buf, len, write)) |
1535 | return len; |
1536 | |
1537 | amdgpu_res_first(res: bo->resource, start: offset, size: len, cur: &cursor); |
1538 | while (cursor.remaining) { |
1539 | size_t count, size = cursor.size; |
1540 | loff_t pos = cursor.start; |
1541 | |
1542 | count = amdgpu_device_aper_access(adev, pos, buf, size, write); |
1543 | size -= count; |
1544 | if (size) { |
1545 | /* using MM to access rest vram and handle un-aligned address */ |
1546 | pos += count; |
1547 | buf += count; |
1548 | amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write); |
1549 | } |
1550 | |
1551 | ret += cursor.size; |
1552 | buf += cursor.size; |
1553 | amdgpu_res_next(cur: &cursor, size: cursor.size); |
1554 | } |
1555 | |
1556 | return ret; |
1557 | } |
1558 | |
1559 | static void |
1560 | amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) |
1561 | { |
1562 | amdgpu_bo_move_notify(bo, evict: false); |
1563 | } |
1564 | |
1565 | static struct ttm_device_funcs amdgpu_bo_driver = { |
1566 | .ttm_tt_create = &amdgpu_ttm_tt_create, |
1567 | .ttm_tt_populate = &amdgpu_ttm_tt_populate, |
1568 | .ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate, |
1569 | .ttm_tt_destroy = &amdgpu_ttm_backend_destroy, |
1570 | .eviction_valuable = amdgpu_ttm_bo_eviction_valuable, |
1571 | .evict_flags = &amdgpu_evict_flags, |
1572 | .move = &amdgpu_bo_move, |
1573 | .delete_mem_notify = &amdgpu_bo_delete_mem_notify, |
1574 | .release_notify = &amdgpu_bo_release_notify, |
1575 | .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, |
1576 | .io_mem_pfn = amdgpu_ttm_io_mem_pfn, |
1577 | .access_memory = &amdgpu_ttm_access_memory, |
1578 | }; |
1579 | |
1580 | /* |
1581 | * Firmware Reservation functions |
1582 | */ |
1583 | /** |
1584 | * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram |
1585 | * |
1586 | * @adev: amdgpu_device pointer |
1587 | * |
1588 | * free fw reserved vram if it has been reserved. |
1589 | */ |
1590 | static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) |
1591 | { |
1592 | amdgpu_bo_free_kernel(bo: &adev->mman.fw_vram_usage_reserved_bo, |
1593 | NULL, cpu_addr: &adev->mman.fw_vram_usage_va); |
1594 | } |
1595 | |
1596 | /* |
1597 | * Driver Reservation functions |
1598 | */ |
1599 | /** |
1600 | * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram |
1601 | * |
1602 | * @adev: amdgpu_device pointer |
1603 | * |
1604 | * free drv reserved vram if it has been reserved. |
1605 | */ |
1606 | static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev) |
1607 | { |
1608 | amdgpu_bo_free_kernel(bo: &adev->mman.drv_vram_usage_reserved_bo, |
1609 | NULL, |
1610 | cpu_addr: &adev->mman.drv_vram_usage_va); |
1611 | } |
1612 | |
1613 | /** |
1614 | * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw |
1615 | * |
1616 | * @adev: amdgpu_device pointer |
1617 | * |
1618 | * create bo vram reservation from fw. |
1619 | */ |
1620 | static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) |
1621 | { |
1622 | uint64_t vram_size = adev->gmc.visible_vram_size; |
1623 | |
1624 | adev->mman.fw_vram_usage_va = NULL; |
1625 | adev->mman.fw_vram_usage_reserved_bo = NULL; |
1626 | |
1627 | if (adev->mman.fw_vram_usage_size == 0 || |
1628 | adev->mman.fw_vram_usage_size > vram_size) |
1629 | return 0; |
1630 | |
1631 | return amdgpu_bo_create_kernel_at(adev, |
1632 | offset: adev->mman.fw_vram_usage_start_offset, |
1633 | size: adev->mman.fw_vram_usage_size, |
1634 | bo_ptr: &adev->mman.fw_vram_usage_reserved_bo, |
1635 | cpu_addr: &adev->mman.fw_vram_usage_va); |
1636 | } |
1637 | |
1638 | /** |
1639 | * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver |
1640 | * |
1641 | * @adev: amdgpu_device pointer |
1642 | * |
1643 | * create bo vram reservation from drv. |
1644 | */ |
1645 | static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev) |
1646 | { |
1647 | u64 vram_size = adev->gmc.visible_vram_size; |
1648 | |
1649 | adev->mman.drv_vram_usage_va = NULL; |
1650 | adev->mman.drv_vram_usage_reserved_bo = NULL; |
1651 | |
1652 | if (adev->mman.drv_vram_usage_size == 0 || |
1653 | adev->mman.drv_vram_usage_size > vram_size) |
1654 | return 0; |
1655 | |
1656 | return amdgpu_bo_create_kernel_at(adev, |
1657 | offset: adev->mman.drv_vram_usage_start_offset, |
1658 | size: adev->mman.drv_vram_usage_size, |
1659 | bo_ptr: &adev->mman.drv_vram_usage_reserved_bo, |
1660 | cpu_addr: &adev->mman.drv_vram_usage_va); |
1661 | } |
1662 | |
1663 | /* |
1664 | * Memoy training reservation functions |
1665 | */ |
1666 | |
1667 | /** |
1668 | * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram |
1669 | * |
1670 | * @adev: amdgpu_device pointer |
1671 | * |
1672 | * free memory training reserved vram if it has been reserved. |
1673 | */ |
1674 | static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) |
1675 | { |
1676 | struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; |
1677 | |
1678 | ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; |
1679 | amdgpu_bo_free_kernel(bo: &ctx->c2p_bo, NULL, NULL); |
1680 | ctx->c2p_bo = NULL; |
1681 | |
1682 | return 0; |
1683 | } |
1684 | |
1685 | static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev, |
1686 | uint32_t reserve_size) |
1687 | { |
1688 | struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; |
1689 | |
1690 | memset(ctx, 0, sizeof(*ctx)); |
1691 | |
1692 | ctx->c2p_train_data_offset = |
1693 | ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M); |
1694 | ctx->p2c_train_data_offset = |
1695 | (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); |
1696 | ctx->train_data_size = |
1697 | GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; |
1698 | |
1699 | DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n" , |
1700 | ctx->train_data_size, |
1701 | ctx->p2c_train_data_offset, |
1702 | ctx->c2p_train_data_offset); |
1703 | } |
1704 | |
1705 | /* |
1706 | * reserve TMR memory at the top of VRAM which holds |
1707 | * IP Discovery data and is protected by PSP. |
1708 | */ |
1709 | static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) |
1710 | { |
1711 | struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; |
1712 | bool mem_train_support = false; |
1713 | uint32_t reserve_size = 0; |
1714 | int ret; |
1715 | |
1716 | if (adev->bios && !amdgpu_sriov_vf(adev)) { |
1717 | if (amdgpu_atomfirmware_mem_training_supported(adev)) |
1718 | mem_train_support = true; |
1719 | else |
1720 | DRM_DEBUG("memory training does not support!\n" ); |
1721 | } |
1722 | |
1723 | /* |
1724 | * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all |
1725 | * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc) |
1726 | * |
1727 | * Otherwise, fallback to legacy approach to check and reserve tmr block for ip |
1728 | * discovery data and G6 memory training data respectively |
1729 | */ |
1730 | if (adev->bios) |
1731 | reserve_size = |
1732 | amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); |
1733 | |
1734 | if (!adev->bios && |
1735 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(9, 4, 3)) |
1736 | reserve_size = max(reserve_size, (uint32_t)280 << 20); |
1737 | else if (!reserve_size) |
1738 | reserve_size = DISCOVERY_TMR_OFFSET; |
1739 | |
1740 | if (mem_train_support) { |
1741 | /* reserve vram for mem train according to TMR location */ |
1742 | amdgpu_ttm_training_data_block_init(adev, reserve_size); |
1743 | ret = amdgpu_bo_create_kernel_at(adev, |
1744 | offset: ctx->c2p_train_data_offset, |
1745 | size: ctx->train_data_size, |
1746 | bo_ptr: &ctx->c2p_bo, |
1747 | NULL); |
1748 | if (ret) { |
1749 | DRM_ERROR("alloc c2p_bo failed(%d)!\n" , ret); |
1750 | amdgpu_ttm_training_reserve_vram_fini(adev); |
1751 | return ret; |
1752 | } |
1753 | ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; |
1754 | } |
1755 | |
1756 | if (!adev->gmc.is_app_apu) { |
1757 | ret = amdgpu_bo_create_kernel_at( |
1758 | adev, offset: adev->gmc.real_vram_size - reserve_size, |
1759 | size: reserve_size, bo_ptr: &adev->mman.fw_reserved_memory, NULL); |
1760 | if (ret) { |
1761 | DRM_ERROR("alloc tmr failed(%d)!\n" , ret); |
1762 | amdgpu_bo_free_kernel(bo: &adev->mman.fw_reserved_memory, |
1763 | NULL, NULL); |
1764 | return ret; |
1765 | } |
1766 | } else { |
1767 | DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n" ); |
1768 | } |
1769 | |
1770 | return 0; |
1771 | } |
1772 | |
1773 | static int amdgpu_ttm_pools_init(struct amdgpu_device *adev) |
1774 | { |
1775 | int i; |
1776 | |
1777 | if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions) |
1778 | return 0; |
1779 | |
1780 | adev->mman.ttm_pools = kcalloc(n: adev->gmc.num_mem_partitions, |
1781 | size: sizeof(*adev->mman.ttm_pools), |
1782 | GFP_KERNEL); |
1783 | if (!adev->mman.ttm_pools) |
1784 | return -ENOMEM; |
1785 | |
1786 | for (i = 0; i < adev->gmc.num_mem_partitions; i++) { |
1787 | ttm_pool_init(pool: &adev->mman.ttm_pools[i], dev: adev->dev, |
1788 | nid: adev->gmc.mem_partitions[i].numa.node, |
1789 | use_dma_alloc: false, use_dma32: false); |
1790 | } |
1791 | return 0; |
1792 | } |
1793 | |
1794 | static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) |
1795 | { |
1796 | int i; |
1797 | |
1798 | if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools) |
1799 | return; |
1800 | |
1801 | for (i = 0; i < adev->gmc.num_mem_partitions; i++) |
1802 | ttm_pool_fini(pool: &adev->mman.ttm_pools[i]); |
1803 | |
1804 | kfree(objp: adev->mman.ttm_pools); |
1805 | adev->mman.ttm_pools = NULL; |
1806 | } |
1807 | |
1808 | /* |
1809 | * amdgpu_ttm_init - Init the memory management (ttm) as well as various |
1810 | * gtt/vram related fields. |
1811 | * |
1812 | * This initializes all of the memory space pools that the TTM layer |
1813 | * will need such as the GTT space (system memory mapped to the device), |
1814 | * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which |
1815 | * can be mapped per VMID. |
1816 | */ |
1817 | int amdgpu_ttm_init(struct amdgpu_device *adev) |
1818 | { |
1819 | uint64_t gtt_size; |
1820 | int r; |
1821 | |
1822 | mutex_init(&adev->mman.gtt_window_lock); |
1823 | |
1824 | /* No others user of address space so set it to 0 */ |
1825 | r = ttm_device_init(bdev: &adev->mman.bdev, funcs: &amdgpu_bo_driver, dev: adev->dev, |
1826 | mapping: adev_to_drm(adev)->anon_inode->i_mapping, |
1827 | vma_manager: adev_to_drm(adev)->vma_offset_manager, |
1828 | use_dma_alloc: adev->need_swiotlb, |
1829 | use_dma32: dma_addressing_limited(dev: adev->dev)); |
1830 | if (r) { |
1831 | DRM_ERROR("failed initializing buffer object driver(%d).\n" , r); |
1832 | return r; |
1833 | } |
1834 | |
1835 | r = amdgpu_ttm_pools_init(adev); |
1836 | if (r) { |
1837 | DRM_ERROR("failed to init ttm pools(%d).\n" , r); |
1838 | return r; |
1839 | } |
1840 | adev->mman.initialized = true; |
1841 | |
1842 | /* Initialize VRAM pool with all of VRAM divided into pages */ |
1843 | r = amdgpu_vram_mgr_init(adev); |
1844 | if (r) { |
1845 | DRM_ERROR("Failed initializing VRAM heap.\n" ); |
1846 | return r; |
1847 | } |
1848 | |
1849 | /* Change the size here instead of the init above so only lpfn is affected */ |
1850 | amdgpu_ttm_set_buffer_funcs_status(adev, enable: false); |
1851 | #ifdef CONFIG_64BIT |
1852 | #ifdef CONFIG_X86 |
1853 | if (adev->gmc.xgmi.connected_to_cpu) |
1854 | adev->mman.aper_base_kaddr = ioremap_cache(offset: adev->gmc.aper_base, |
1855 | size: adev->gmc.visible_vram_size); |
1856 | |
1857 | else if (adev->gmc.is_app_apu) |
1858 | DRM_DEBUG_DRIVER( |
1859 | "No need to ioremap when real vram size is 0\n" ); |
1860 | else |
1861 | #endif |
1862 | adev->mman.aper_base_kaddr = ioremap_wc(offset: adev->gmc.aper_base, |
1863 | size: adev->gmc.visible_vram_size); |
1864 | #endif |
1865 | |
1866 | /* |
1867 | *The reserved vram for firmware must be pinned to the specified |
1868 | *place on the VRAM, so reserve it early. |
1869 | */ |
1870 | r = amdgpu_ttm_fw_reserve_vram_init(adev); |
1871 | if (r) |
1872 | return r; |
1873 | |
1874 | /* |
1875 | *The reserved vram for driver must be pinned to the specified |
1876 | *place on the VRAM, so reserve it early. |
1877 | */ |
1878 | r = amdgpu_ttm_drv_reserve_vram_init(adev); |
1879 | if (r) |
1880 | return r; |
1881 | |
1882 | /* |
1883 | * only NAVI10 and onwards ASIC support for IP discovery. |
1884 | * If IP discovery enabled, a block of memory should be |
1885 | * reserved for IP discovey. |
1886 | */ |
1887 | if (adev->mman.discovery_bin) { |
1888 | r = amdgpu_ttm_reserve_tmr(adev); |
1889 | if (r) |
1890 | return r; |
1891 | } |
1892 | |
1893 | /* allocate memory as required for VGA |
1894 | * This is used for VGA emulation and pre-OS scanout buffers to |
1895 | * avoid display artifacts while transitioning between pre-OS |
1896 | * and driver. |
1897 | */ |
1898 | if (!adev->gmc.is_app_apu) { |
1899 | r = amdgpu_bo_create_kernel_at(adev, offset: 0, |
1900 | size: adev->mman.stolen_vga_size, |
1901 | bo_ptr: &adev->mman.stolen_vga_memory, |
1902 | NULL); |
1903 | if (r) |
1904 | return r; |
1905 | |
1906 | r = amdgpu_bo_create_kernel_at(adev, offset: adev->mman.stolen_vga_size, |
1907 | size: adev->mman.stolen_extended_size, |
1908 | bo_ptr: &adev->mman.stolen_extended_memory, |
1909 | NULL); |
1910 | |
1911 | if (r) |
1912 | return r; |
1913 | |
1914 | r = amdgpu_bo_create_kernel_at(adev, |
1915 | offset: adev->mman.stolen_reserved_offset, |
1916 | size: adev->mman.stolen_reserved_size, |
1917 | bo_ptr: &adev->mman.stolen_reserved_memory, |
1918 | NULL); |
1919 | if (r) |
1920 | return r; |
1921 | } else { |
1922 | DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n" ); |
1923 | } |
1924 | |
1925 | DRM_INFO("amdgpu: %uM of VRAM memory ready\n" , |
1926 | (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024))); |
1927 | |
1928 | /* Compute GTT size, either based on TTM limit |
1929 | * or whatever the user passed on module init. |
1930 | */ |
1931 | if (amdgpu_gtt_size == -1) |
1932 | gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT; |
1933 | else |
1934 | gtt_size = (uint64_t)amdgpu_gtt_size << 20; |
1935 | |
1936 | /* Initialize GTT memory pool */ |
1937 | r = amdgpu_gtt_mgr_init(adev, gtt_size); |
1938 | if (r) { |
1939 | DRM_ERROR("Failed initializing GTT heap.\n" ); |
1940 | return r; |
1941 | } |
1942 | DRM_INFO("amdgpu: %uM of GTT memory ready.\n" , |
1943 | (unsigned int)(gtt_size / (1024 * 1024))); |
1944 | |
1945 | /* Initiailize doorbell pool on PCI BAR */ |
1946 | r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, size_in_page: adev->doorbell.size / PAGE_SIZE); |
1947 | if (r) { |
1948 | DRM_ERROR("Failed initializing doorbell heap.\n" ); |
1949 | return r; |
1950 | } |
1951 | |
1952 | /* Create a boorbell page for kernel usages */ |
1953 | r = amdgpu_doorbell_create_kernel_doorbells(adev); |
1954 | if (r) { |
1955 | DRM_ERROR("Failed to initialize kernel doorbells.\n" ); |
1956 | return r; |
1957 | } |
1958 | |
1959 | /* Initialize preemptible memory pool */ |
1960 | r = amdgpu_preempt_mgr_init(adev); |
1961 | if (r) { |
1962 | DRM_ERROR("Failed initializing PREEMPT heap.\n" ); |
1963 | return r; |
1964 | } |
1965 | |
1966 | /* Initialize various on-chip memory pools */ |
1967 | r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, size_in_page: adev->gds.gds_size); |
1968 | if (r) { |
1969 | DRM_ERROR("Failed initializing GDS heap.\n" ); |
1970 | return r; |
1971 | } |
1972 | |
1973 | r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, size_in_page: adev->gds.gws_size); |
1974 | if (r) { |
1975 | DRM_ERROR("Failed initializing gws heap.\n" ); |
1976 | return r; |
1977 | } |
1978 | |
1979 | r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, size_in_page: adev->gds.oa_size); |
1980 | if (r) { |
1981 | DRM_ERROR("Failed initializing oa heap.\n" ); |
1982 | return r; |
1983 | } |
1984 | if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, |
1985 | AMDGPU_GEM_DOMAIN_GTT, |
1986 | bo_ptr: &adev->mman.sdma_access_bo, NULL, |
1987 | cpu_addr: &adev->mman.sdma_access_ptr)) |
1988 | DRM_WARN("Debug VRAM access will use slowpath MM access\n" ); |
1989 | |
1990 | return 0; |
1991 | } |
1992 | |
1993 | /* |
1994 | * amdgpu_ttm_fini - De-initialize the TTM memory pools |
1995 | */ |
1996 | void amdgpu_ttm_fini(struct amdgpu_device *adev) |
1997 | { |
1998 | int idx; |
1999 | |
2000 | if (!adev->mman.initialized) |
2001 | return; |
2002 | |
2003 | amdgpu_ttm_pools_fini(adev); |
2004 | |
2005 | amdgpu_ttm_training_reserve_vram_fini(adev); |
2006 | /* return the stolen vga memory back to VRAM */ |
2007 | if (!adev->gmc.is_app_apu) { |
2008 | amdgpu_bo_free_kernel(bo: &adev->mman.stolen_vga_memory, NULL, NULL); |
2009 | amdgpu_bo_free_kernel(bo: &adev->mman.stolen_extended_memory, NULL, NULL); |
2010 | /* return the FW reserved memory back to VRAM */ |
2011 | amdgpu_bo_free_kernel(bo: &adev->mman.fw_reserved_memory, NULL, |
2012 | NULL); |
2013 | if (adev->mman.stolen_reserved_size) |
2014 | amdgpu_bo_free_kernel(bo: &adev->mman.stolen_reserved_memory, |
2015 | NULL, NULL); |
2016 | } |
2017 | amdgpu_bo_free_kernel(bo: &adev->mman.sdma_access_bo, NULL, |
2018 | cpu_addr: &adev->mman.sdma_access_ptr); |
2019 | amdgpu_ttm_fw_reserve_vram_fini(adev); |
2020 | amdgpu_ttm_drv_reserve_vram_fini(adev); |
2021 | |
2022 | if (drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) { |
2023 | |
2024 | if (adev->mman.aper_base_kaddr) |
2025 | iounmap(addr: adev->mman.aper_base_kaddr); |
2026 | adev->mman.aper_base_kaddr = NULL; |
2027 | |
2028 | drm_dev_exit(idx); |
2029 | } |
2030 | |
2031 | amdgpu_vram_mgr_fini(adev); |
2032 | amdgpu_gtt_mgr_fini(adev); |
2033 | amdgpu_preempt_mgr_fini(adev); |
2034 | ttm_range_man_fini(bdev: &adev->mman.bdev, AMDGPU_PL_GDS); |
2035 | ttm_range_man_fini(bdev: &adev->mman.bdev, AMDGPU_PL_GWS); |
2036 | ttm_range_man_fini(bdev: &adev->mman.bdev, AMDGPU_PL_OA); |
2037 | ttm_device_fini(bdev: &adev->mman.bdev); |
2038 | adev->mman.initialized = false; |
2039 | DRM_INFO("amdgpu: ttm finalized\n" ); |
2040 | } |
2041 | |
2042 | /** |
2043 | * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions |
2044 | * |
2045 | * @adev: amdgpu_device pointer |
2046 | * @enable: true when we can use buffer functions. |
2047 | * |
2048 | * Enable/disable use of buffer functions during suspend/resume. This should |
2049 | * only be called at bootup or when userspace isn't running. |
2050 | */ |
2051 | void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) |
2052 | { |
2053 | struct ttm_resource_manager *man = ttm_manager_type(bdev: &adev->mman.bdev, TTM_PL_VRAM); |
2054 | uint64_t size; |
2055 | int r; |
2056 | |
2057 | if (!adev->mman.initialized || amdgpu_in_reset(adev) || |
2058 | adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu) |
2059 | return; |
2060 | |
2061 | if (enable) { |
2062 | struct amdgpu_ring *ring; |
2063 | struct drm_gpu_scheduler *sched; |
2064 | |
2065 | ring = adev->mman.buffer_funcs_ring; |
2066 | sched = &ring->sched; |
2067 | r = drm_sched_entity_init(entity: &adev->mman.high_pr, |
2068 | priority: DRM_SCHED_PRIORITY_KERNEL, sched_list: &sched, |
2069 | num_sched_list: 1, NULL); |
2070 | if (r) { |
2071 | DRM_ERROR("Failed setting up TTM BO move entity (%d)\n" , |
2072 | r); |
2073 | return; |
2074 | } |
2075 | |
2076 | r = drm_sched_entity_init(entity: &adev->mman.low_pr, |
2077 | priority: DRM_SCHED_PRIORITY_NORMAL, sched_list: &sched, |
2078 | num_sched_list: 1, NULL); |
2079 | if (r) { |
2080 | DRM_ERROR("Failed setting up TTM BO move entity (%d)\n" , |
2081 | r); |
2082 | goto error_free_entity; |
2083 | } |
2084 | } else { |
2085 | drm_sched_entity_destroy(entity: &adev->mman.high_pr); |
2086 | drm_sched_entity_destroy(entity: &adev->mman.low_pr); |
2087 | dma_fence_put(fence: man->move); |
2088 | man->move = NULL; |
2089 | } |
2090 | |
2091 | /* this just adjusts TTM size idea, which sets lpfn to the correct value */ |
2092 | if (enable) |
2093 | size = adev->gmc.real_vram_size; |
2094 | else |
2095 | size = adev->gmc.visible_vram_size; |
2096 | man->size = size; |
2097 | adev->mman.buffer_funcs_enabled = enable; |
2098 | |
2099 | return; |
2100 | |
2101 | error_free_entity: |
2102 | drm_sched_entity_destroy(entity: &adev->mman.high_pr); |
2103 | } |
2104 | |
2105 | static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, |
2106 | bool direct_submit, |
2107 | unsigned int num_dw, |
2108 | struct dma_resv *resv, |
2109 | bool vm_needs_flush, |
2110 | struct amdgpu_job **job, |
2111 | bool delayed) |
2112 | { |
2113 | enum amdgpu_ib_pool_type pool = direct_submit ? |
2114 | AMDGPU_IB_POOL_DIRECT : |
2115 | AMDGPU_IB_POOL_DELAYED; |
2116 | int r; |
2117 | struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr : |
2118 | &adev->mman.high_pr; |
2119 | r = amdgpu_job_alloc_with_ib(adev, entity, |
2120 | AMDGPU_FENCE_OWNER_UNDEFINED, |
2121 | size: num_dw * 4, pool_type: pool, job); |
2122 | if (r) |
2123 | return r; |
2124 | |
2125 | if (vm_needs_flush) { |
2126 | (*job)->vm_pd_addr = amdgpu_gmc_pd_addr(bo: adev->gmc.pdb0_bo ? |
2127 | adev->gmc.pdb0_bo : |
2128 | adev->gart.bo); |
2129 | (*job)->vm_needs_flush = true; |
2130 | } |
2131 | if (!resv) |
2132 | return 0; |
2133 | |
2134 | return drm_sched_job_add_resv_dependencies(job: &(*job)->base, resv, |
2135 | usage: DMA_RESV_USAGE_BOOKKEEP); |
2136 | } |
2137 | |
2138 | int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, |
2139 | uint64_t dst_offset, uint32_t byte_count, |
2140 | struct dma_resv *resv, |
2141 | struct dma_fence **fence, bool direct_submit, |
2142 | bool vm_needs_flush, bool tmz) |
2143 | { |
2144 | struct amdgpu_device *adev = ring->adev; |
2145 | unsigned int num_loops, num_dw; |
2146 | struct amdgpu_job *job; |
2147 | uint32_t max_bytes; |
2148 | unsigned int i; |
2149 | int r; |
2150 | |
2151 | if (!direct_submit && !ring->sched.ready) { |
2152 | DRM_ERROR("Trying to move memory with ring turned off.\n" ); |
2153 | return -EINVAL; |
2154 | } |
2155 | |
2156 | max_bytes = adev->mman.buffer_funcs->copy_max_bytes; |
2157 | num_loops = DIV_ROUND_UP(byte_count, max_bytes); |
2158 | num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); |
2159 | r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw, |
2160 | resv, vm_needs_flush, job: &job, delayed: false); |
2161 | if (r) |
2162 | return r; |
2163 | |
2164 | for (i = 0; i < num_loops; i++) { |
2165 | uint32_t cur_size_in_bytes = min(byte_count, max_bytes); |
2166 | |
2167 | amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, |
2168 | dst_offset, cur_size_in_bytes, tmz); |
2169 | |
2170 | src_offset += cur_size_in_bytes; |
2171 | dst_offset += cur_size_in_bytes; |
2172 | byte_count -= cur_size_in_bytes; |
2173 | } |
2174 | |
2175 | amdgpu_ring_pad_ib(ring, &job->ibs[0]); |
2176 | WARN_ON(job->ibs[0].length_dw > num_dw); |
2177 | if (direct_submit) |
2178 | r = amdgpu_job_submit_direct(job, ring, fence); |
2179 | else |
2180 | *fence = amdgpu_job_submit(job); |
2181 | if (r) |
2182 | goto error_free; |
2183 | |
2184 | return r; |
2185 | |
2186 | error_free: |
2187 | amdgpu_job_free(job); |
2188 | DRM_ERROR("Error scheduling IBs (%d)\n" , r); |
2189 | return r; |
2190 | } |
2191 | |
2192 | static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, |
2193 | uint64_t dst_addr, uint32_t byte_count, |
2194 | struct dma_resv *resv, |
2195 | struct dma_fence **fence, |
2196 | bool vm_needs_flush, bool delayed) |
2197 | { |
2198 | struct amdgpu_device *adev = ring->adev; |
2199 | unsigned int num_loops, num_dw; |
2200 | struct amdgpu_job *job; |
2201 | uint32_t max_bytes; |
2202 | unsigned int i; |
2203 | int r; |
2204 | |
2205 | max_bytes = adev->mman.buffer_funcs->fill_max_bytes; |
2206 | num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); |
2207 | num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8); |
2208 | r = amdgpu_ttm_prepare_job(adev, direct_submit: false, num_dw, resv, vm_needs_flush, |
2209 | job: &job, delayed); |
2210 | if (r) |
2211 | return r; |
2212 | |
2213 | for (i = 0; i < num_loops; i++) { |
2214 | uint32_t cur_size = min(byte_count, max_bytes); |
2215 | |
2216 | amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr, |
2217 | cur_size); |
2218 | |
2219 | dst_addr += cur_size; |
2220 | byte_count -= cur_size; |
2221 | } |
2222 | |
2223 | amdgpu_ring_pad_ib(ring, &job->ibs[0]); |
2224 | WARN_ON(job->ibs[0].length_dw > num_dw); |
2225 | *fence = amdgpu_job_submit(job); |
2226 | return 0; |
2227 | } |
2228 | |
2229 | int amdgpu_fill_buffer(struct amdgpu_bo *bo, |
2230 | uint32_t src_data, |
2231 | struct dma_resv *resv, |
2232 | struct dma_fence **f, |
2233 | bool delayed) |
2234 | { |
2235 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev); |
2236 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; |
2237 | struct dma_fence *fence = NULL; |
2238 | struct amdgpu_res_cursor dst; |
2239 | int r; |
2240 | |
2241 | if (!adev->mman.buffer_funcs_enabled) { |
2242 | DRM_ERROR("Trying to clear memory with ring turned off.\n" ); |
2243 | return -EINVAL; |
2244 | } |
2245 | |
2246 | amdgpu_res_first(res: bo->tbo.resource, start: 0, size: amdgpu_bo_size(bo), cur: &dst); |
2247 | |
2248 | mutex_lock(&adev->mman.gtt_window_lock); |
2249 | while (dst.remaining) { |
2250 | struct dma_fence *next; |
2251 | uint64_t cur_size, to; |
2252 | |
2253 | /* Never fill more than 256MiB at once to avoid timeouts */ |
2254 | cur_size = min(dst.size, 256ULL << 20); |
2255 | |
2256 | r = amdgpu_ttm_map_buffer(bo: &bo->tbo, mem: bo->tbo.resource, mm_cur: &dst, |
2257 | window: 1, ring, tmz: false, size: &cur_size, addr: &to); |
2258 | if (r) |
2259 | goto error; |
2260 | |
2261 | r = amdgpu_ttm_fill_mem(ring, src_data, dst_addr: to, byte_count: cur_size, resv, |
2262 | fence: &next, vm_needs_flush: true, delayed); |
2263 | if (r) |
2264 | goto error; |
2265 | |
2266 | dma_fence_put(fence); |
2267 | fence = next; |
2268 | |
2269 | amdgpu_res_next(cur: &dst, size: cur_size); |
2270 | } |
2271 | error: |
2272 | mutex_unlock(lock: &adev->mman.gtt_window_lock); |
2273 | if (f) |
2274 | *f = dma_fence_get(fence); |
2275 | dma_fence_put(fence); |
2276 | return r; |
2277 | } |
2278 | |
2279 | /** |
2280 | * amdgpu_ttm_evict_resources - evict memory buffers |
2281 | * @adev: amdgpu device object |
2282 | * @mem_type: evicted BO's memory type |
2283 | * |
2284 | * Evicts all @mem_type buffers on the lru list of the memory type. |
2285 | * |
2286 | * Returns: |
2287 | * 0 for success or a negative error code on failure. |
2288 | */ |
2289 | int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type) |
2290 | { |
2291 | struct ttm_resource_manager *man; |
2292 | |
2293 | switch (mem_type) { |
2294 | case TTM_PL_VRAM: |
2295 | case TTM_PL_TT: |
2296 | case AMDGPU_PL_GWS: |
2297 | case AMDGPU_PL_GDS: |
2298 | case AMDGPU_PL_OA: |
2299 | man = ttm_manager_type(bdev: &adev->mman.bdev, mem_type); |
2300 | break; |
2301 | default: |
2302 | DRM_ERROR("Trying to evict invalid memory type\n" ); |
2303 | return -EINVAL; |
2304 | } |
2305 | |
2306 | return ttm_resource_manager_evict_all(bdev: &adev->mman.bdev, man); |
2307 | } |
2308 | |
2309 | #if defined(CONFIG_DEBUG_FS) |
2310 | |
2311 | static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) |
2312 | { |
2313 | struct amdgpu_device *adev = m->private; |
2314 | |
2315 | return ttm_pool_debugfs(pool: &adev->mman.bdev.pool, m); |
2316 | } |
2317 | |
2318 | DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool); |
2319 | |
2320 | /* |
2321 | * amdgpu_ttm_vram_read - Linear read access to VRAM |
2322 | * |
2323 | * Accesses VRAM via MMIO for debugging purposes. |
2324 | */ |
2325 | static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, |
2326 | size_t size, loff_t *pos) |
2327 | { |
2328 | struct amdgpu_device *adev = file_inode(f)->i_private; |
2329 | ssize_t result = 0; |
2330 | |
2331 | if (size & 0x3 || *pos & 0x3) |
2332 | return -EINVAL; |
2333 | |
2334 | if (*pos >= adev->gmc.mc_vram_size) |
2335 | return -ENXIO; |
2336 | |
2337 | size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos)); |
2338 | while (size) { |
2339 | size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4); |
2340 | uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ]; |
2341 | |
2342 | amdgpu_device_vram_access(adev, pos: *pos, buf: value, size: bytes, write: false); |
2343 | if (copy_to_user(to: buf, from: value, n: bytes)) |
2344 | return -EFAULT; |
2345 | |
2346 | result += bytes; |
2347 | buf += bytes; |
2348 | *pos += bytes; |
2349 | size -= bytes; |
2350 | } |
2351 | |
2352 | return result; |
2353 | } |
2354 | |
2355 | /* |
2356 | * amdgpu_ttm_vram_write - Linear write access to VRAM |
2357 | * |
2358 | * Accesses VRAM via MMIO for debugging purposes. |
2359 | */ |
2360 | static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, |
2361 | size_t size, loff_t *pos) |
2362 | { |
2363 | struct amdgpu_device *adev = file_inode(f)->i_private; |
2364 | ssize_t result = 0; |
2365 | int r; |
2366 | |
2367 | if (size & 0x3 || *pos & 0x3) |
2368 | return -EINVAL; |
2369 | |
2370 | if (*pos >= adev->gmc.mc_vram_size) |
2371 | return -ENXIO; |
2372 | |
2373 | while (size) { |
2374 | uint32_t value; |
2375 | |
2376 | if (*pos >= adev->gmc.mc_vram_size) |
2377 | return result; |
2378 | |
2379 | r = get_user(value, (uint32_t *)buf); |
2380 | if (r) |
2381 | return r; |
2382 | |
2383 | amdgpu_device_mm_access(adev, pos: *pos, buf: &value, size: 4, write: true); |
2384 | |
2385 | result += 4; |
2386 | buf += 4; |
2387 | *pos += 4; |
2388 | size -= 4; |
2389 | } |
2390 | |
2391 | return result; |
2392 | } |
2393 | |
2394 | static const struct file_operations amdgpu_ttm_vram_fops = { |
2395 | .owner = THIS_MODULE, |
2396 | .read = amdgpu_ttm_vram_read, |
2397 | .write = amdgpu_ttm_vram_write, |
2398 | .llseek = default_llseek, |
2399 | }; |
2400 | |
2401 | /* |
2402 | * amdgpu_iomem_read - Virtual read access to GPU mapped memory |
2403 | * |
2404 | * This function is used to read memory that has been mapped to the |
2405 | * GPU and the known addresses are not physical addresses but instead |
2406 | * bus addresses (e.g., what you'd put in an IB or ring buffer). |
2407 | */ |
2408 | static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, |
2409 | size_t size, loff_t *pos) |
2410 | { |
2411 | struct amdgpu_device *adev = file_inode(f)->i_private; |
2412 | struct iommu_domain *dom; |
2413 | ssize_t result = 0; |
2414 | int r; |
2415 | |
2416 | /* retrieve the IOMMU domain if any for this device */ |
2417 | dom = iommu_get_domain_for_dev(dev: adev->dev); |
2418 | |
2419 | while (size) { |
2420 | phys_addr_t addr = *pos & PAGE_MASK; |
2421 | loff_t off = *pos & ~PAGE_MASK; |
2422 | size_t bytes = PAGE_SIZE - off; |
2423 | unsigned long pfn; |
2424 | struct page *p; |
2425 | void *ptr; |
2426 | |
2427 | bytes = min(bytes, size); |
2428 | |
2429 | /* Translate the bus address to a physical address. If |
2430 | * the domain is NULL it means there is no IOMMU active |
2431 | * and the address translation is the identity |
2432 | */ |
2433 | addr = dom ? iommu_iova_to_phys(domain: dom, iova: addr) : addr; |
2434 | |
2435 | pfn = addr >> PAGE_SHIFT; |
2436 | if (!pfn_valid(pfn)) |
2437 | return -EPERM; |
2438 | |
2439 | p = pfn_to_page(pfn); |
2440 | if (p->mapping != adev->mman.bdev.dev_mapping) |
2441 | return -EPERM; |
2442 | |
2443 | ptr = kmap_local_page(page: p); |
2444 | r = copy_to_user(to: buf, from: ptr + off, n: bytes); |
2445 | kunmap_local(ptr); |
2446 | if (r) |
2447 | return -EFAULT; |
2448 | |
2449 | size -= bytes; |
2450 | *pos += bytes; |
2451 | result += bytes; |
2452 | } |
2453 | |
2454 | return result; |
2455 | } |
2456 | |
2457 | /* |
2458 | * amdgpu_iomem_write - Virtual write access to GPU mapped memory |
2459 | * |
2460 | * This function is used to write memory that has been mapped to the |
2461 | * GPU and the known addresses are not physical addresses but instead |
2462 | * bus addresses (e.g., what you'd put in an IB or ring buffer). |
2463 | */ |
2464 | static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, |
2465 | size_t size, loff_t *pos) |
2466 | { |
2467 | struct amdgpu_device *adev = file_inode(f)->i_private; |
2468 | struct iommu_domain *dom; |
2469 | ssize_t result = 0; |
2470 | int r; |
2471 | |
2472 | dom = iommu_get_domain_for_dev(dev: adev->dev); |
2473 | |
2474 | while (size) { |
2475 | phys_addr_t addr = *pos & PAGE_MASK; |
2476 | loff_t off = *pos & ~PAGE_MASK; |
2477 | size_t bytes = PAGE_SIZE - off; |
2478 | unsigned long pfn; |
2479 | struct page *p; |
2480 | void *ptr; |
2481 | |
2482 | bytes = min(bytes, size); |
2483 | |
2484 | addr = dom ? iommu_iova_to_phys(domain: dom, iova: addr) : addr; |
2485 | |
2486 | pfn = addr >> PAGE_SHIFT; |
2487 | if (!pfn_valid(pfn)) |
2488 | return -EPERM; |
2489 | |
2490 | p = pfn_to_page(pfn); |
2491 | if (p->mapping != adev->mman.bdev.dev_mapping) |
2492 | return -EPERM; |
2493 | |
2494 | ptr = kmap_local_page(page: p); |
2495 | r = copy_from_user(to: ptr + off, from: buf, n: bytes); |
2496 | kunmap_local(ptr); |
2497 | if (r) |
2498 | return -EFAULT; |
2499 | |
2500 | size -= bytes; |
2501 | *pos += bytes; |
2502 | result += bytes; |
2503 | } |
2504 | |
2505 | return result; |
2506 | } |
2507 | |
2508 | static const struct file_operations amdgpu_ttm_iomem_fops = { |
2509 | .owner = THIS_MODULE, |
2510 | .read = amdgpu_iomem_read, |
2511 | .write = amdgpu_iomem_write, |
2512 | .llseek = default_llseek |
2513 | }; |
2514 | |
2515 | #endif |
2516 | |
2517 | void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) |
2518 | { |
2519 | #if defined(CONFIG_DEBUG_FS) |
2520 | struct drm_minor *minor = adev_to_drm(adev)->primary; |
2521 | struct dentry *root = minor->debugfs_root; |
2522 | |
2523 | debugfs_create_file_size(name: "amdgpu_vram" , mode: 0444, parent: root, data: adev, |
2524 | fops: &amdgpu_ttm_vram_fops, file_size: adev->gmc.mc_vram_size); |
2525 | debugfs_create_file(name: "amdgpu_iomem" , mode: 0444, parent: root, data: adev, |
2526 | fops: &amdgpu_ttm_iomem_fops); |
2527 | debugfs_create_file(name: "ttm_page_pool" , mode: 0444, parent: root, data: adev, |
2528 | fops: &amdgpu_ttm_page_pool_fops); |
2529 | ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev, |
2530 | TTM_PL_VRAM), |
2531 | parent: root, name: "amdgpu_vram_mm" ); |
2532 | ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev, |
2533 | TTM_PL_TT), |
2534 | parent: root, name: "amdgpu_gtt_mm" ); |
2535 | ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev, |
2536 | AMDGPU_PL_GDS), |
2537 | parent: root, name: "amdgpu_gds_mm" ); |
2538 | ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev, |
2539 | AMDGPU_PL_GWS), |
2540 | parent: root, name: "amdgpu_gws_mm" ); |
2541 | ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev, |
2542 | AMDGPU_PL_OA), |
2543 | parent: root, name: "amdgpu_oa_mm" ); |
2544 | |
2545 | #endif |
2546 | } |
2547 | |