1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright 2023 Advanced Micro Devices, Inc. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), |
7 | * to deal in the Software without restriction, including without limitation |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
9 | * and/or sell copies of the Software, and to permit persons to whom the |
10 | * Software is furnished to do so, subject to the following conditions: |
11 | * |
12 | * The above copyright notice and this permission notice shall be included in |
13 | * all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | * |
23 | */ |
24 | |
25 | #include <linux/firmware.h> |
26 | #include <drm/drm_exec.h> |
27 | |
28 | #include "amdgpu.h" |
29 | #include "amdgpu_umsch_mm.h" |
30 | #include "umsch_mm_v4_0.h" |
31 | |
32 | struct umsch_mm_test_ctx_data { |
33 | uint8_t process_csa[PAGE_SIZE]; |
34 | uint8_t vpe_ctx_csa[PAGE_SIZE]; |
35 | uint8_t vcn_ctx_csa[PAGE_SIZE]; |
36 | }; |
37 | |
38 | struct umsch_mm_test_mqd_data { |
39 | uint8_t vpe_mqd[PAGE_SIZE]; |
40 | uint8_t vcn_mqd[PAGE_SIZE]; |
41 | }; |
42 | |
43 | struct umsch_mm_test_ring_data { |
44 | uint8_t vpe_ring[PAGE_SIZE]; |
45 | uint8_t vpe_ib[PAGE_SIZE]; |
46 | uint8_t vcn_ring[PAGE_SIZE]; |
47 | uint8_t vcn_ib[PAGE_SIZE]; |
48 | }; |
49 | |
50 | struct umsch_mm_test_queue_info { |
51 | uint64_t mqd_addr; |
52 | uint64_t csa_addr; |
53 | uint32_t doorbell_offset_0; |
54 | uint32_t doorbell_offset_1; |
55 | enum UMSCH_SWIP_ENGINE_TYPE engine; |
56 | }; |
57 | |
58 | struct umsch_mm_test { |
59 | struct amdgpu_bo *ctx_data_obj; |
60 | uint64_t ctx_data_gpu_addr; |
61 | uint32_t *ctx_data_cpu_addr; |
62 | |
63 | struct amdgpu_bo *mqd_data_obj; |
64 | uint64_t mqd_data_gpu_addr; |
65 | uint32_t *mqd_data_cpu_addr; |
66 | |
67 | struct amdgpu_bo *ring_data_obj; |
68 | uint64_t ring_data_gpu_addr; |
69 | uint32_t *ring_data_cpu_addr; |
70 | |
71 | |
72 | struct amdgpu_vm *vm; |
73 | struct amdgpu_bo_va *bo_va; |
74 | uint32_t pasid; |
75 | uint32_t vm_cntx_cntl; |
76 | uint32_t num_queues; |
77 | }; |
78 | |
79 | static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
80 | struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, |
81 | uint64_t addr, uint32_t size) |
82 | { |
83 | struct amdgpu_sync sync; |
84 | struct drm_exec exec; |
85 | int r; |
86 | |
87 | amdgpu_sync_create(sync: &sync); |
88 | |
89 | drm_exec_init(exec: &exec, flags: 0, nr: 0); |
90 | drm_exec_until_all_locked(&exec) { |
91 | r = drm_exec_lock_obj(exec: &exec, obj: &bo->tbo.base); |
92 | drm_exec_retry_on_contention(&exec); |
93 | if (unlikely(r)) |
94 | goto error_fini_exec; |
95 | |
96 | r = amdgpu_vm_lock_pd(vm, exec: &exec, num_fences: 0); |
97 | drm_exec_retry_on_contention(&exec); |
98 | if (unlikely(r)) |
99 | goto error_fini_exec; |
100 | } |
101 | |
102 | *bo_va = amdgpu_vm_bo_add(adev, vm, bo); |
103 | if (!*bo_va) { |
104 | r = -ENOMEM; |
105 | goto error_fini_exec; |
106 | } |
107 | |
108 | r = amdgpu_vm_bo_map(adev, bo_va: *bo_va, addr, offset: 0, size, |
109 | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | |
110 | AMDGPU_PTE_EXECUTABLE); |
111 | |
112 | if (r) |
113 | goto error_del_bo_va; |
114 | |
115 | |
116 | r = amdgpu_vm_bo_update(adev, bo_va: *bo_va, clear: false); |
117 | if (r) |
118 | goto error_del_bo_va; |
119 | |
120 | amdgpu_sync_fence(sync: &sync, f: (*bo_va)->last_pt_update); |
121 | |
122 | r = amdgpu_vm_update_pdes(adev, vm, immediate: false); |
123 | if (r) |
124 | goto error_del_bo_va; |
125 | |
126 | amdgpu_sync_fence(sync: &sync, f: vm->last_update); |
127 | |
128 | amdgpu_sync_wait(sync: &sync, intr: false); |
129 | drm_exec_fini(exec: &exec); |
130 | |
131 | amdgpu_sync_free(sync: &sync); |
132 | |
133 | return 0; |
134 | |
135 | error_del_bo_va: |
136 | amdgpu_vm_bo_del(adev, bo_va: *bo_va); |
137 | amdgpu_sync_free(sync: &sync); |
138 | |
139 | error_fini_exec: |
140 | drm_exec_fini(exec: &exec); |
141 | amdgpu_sync_free(sync: &sync); |
142 | return r; |
143 | } |
144 | |
145 | static int unmap_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
146 | struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va, |
147 | uint64_t addr) |
148 | { |
149 | struct drm_exec exec; |
150 | long r; |
151 | |
152 | drm_exec_init(exec: &exec, flags: 0, nr: 0); |
153 | drm_exec_until_all_locked(&exec) { |
154 | r = drm_exec_lock_obj(exec: &exec, obj: &bo->tbo.base); |
155 | drm_exec_retry_on_contention(&exec); |
156 | if (unlikely(r)) |
157 | goto out_unlock; |
158 | |
159 | r = amdgpu_vm_lock_pd(vm, exec: &exec, num_fences: 0); |
160 | drm_exec_retry_on_contention(&exec); |
161 | if (unlikely(r)) |
162 | goto out_unlock; |
163 | } |
164 | |
165 | |
166 | r = amdgpu_vm_bo_unmap(adev, bo_va, addr); |
167 | if (r) |
168 | goto out_unlock; |
169 | |
170 | amdgpu_vm_bo_del(adev, bo_va); |
171 | |
172 | out_unlock: |
173 | drm_exec_fini(exec: &exec); |
174 | |
175 | return r; |
176 | } |
177 | |
178 | static void setup_vpe_queue(struct amdgpu_device *adev, |
179 | struct umsch_mm_test *test, |
180 | struct umsch_mm_test_queue_info *qinfo) |
181 | { |
182 | struct MQD_INFO *mqd = (struct MQD_INFO *)test->mqd_data_cpu_addr; |
183 | uint64_t ring_gpu_addr = test->ring_data_gpu_addr; |
184 | |
185 | mqd->rb_base_lo = (ring_gpu_addr >> 8); |
186 | mqd->rb_base_hi = (ring_gpu_addr >> 40); |
187 | mqd->rb_size = PAGE_SIZE / 4; |
188 | mqd->wptr_val = 0; |
189 | mqd->rptr_val = 0; |
190 | mqd->unmapped = 1; |
191 | |
192 | if (adev->vpe.collaborate_mode) |
193 | memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO)); |
194 | |
195 | qinfo->mqd_addr = test->mqd_data_gpu_addr; |
196 | qinfo->csa_addr = test->ctx_data_gpu_addr + |
197 | offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa); |
198 | qinfo->doorbell_offset_0 = 0; |
199 | qinfo->doorbell_offset_1 = 0; |
200 | } |
201 | |
202 | static void setup_vcn_queue(struct amdgpu_device *adev, |
203 | struct umsch_mm_test *test, |
204 | struct umsch_mm_test_queue_info *qinfo) |
205 | { |
206 | } |
207 | |
208 | static int add_test_queue(struct amdgpu_device *adev, |
209 | struct umsch_mm_test *test, |
210 | struct umsch_mm_test_queue_info *qinfo) |
211 | { |
212 | struct umsch_mm_add_queue_input queue_input = {}; |
213 | int r; |
214 | |
215 | queue_input.process_id = test->pasid; |
216 | queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(bo: test->vm->root.bo); |
217 | |
218 | queue_input.process_va_start = 0; |
219 | queue_input.process_va_end = (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; |
220 | |
221 | queue_input.process_quantum = 100000; /* 10ms */ |
222 | queue_input.process_csa_addr = test->ctx_data_gpu_addr + |
223 | offsetof(struct umsch_mm_test_ctx_data, process_csa); |
224 | |
225 | queue_input.context_quantum = 10000; /* 1ms */ |
226 | queue_input.context_csa_addr = qinfo->csa_addr; |
227 | |
228 | queue_input.inprocess_context_priority = CONTEXT_PRIORITY_LEVEL_NORMAL; |
229 | queue_input.context_global_priority_level = CONTEXT_PRIORITY_LEVEL_NORMAL; |
230 | queue_input.doorbell_offset_0 = qinfo->doorbell_offset_0; |
231 | queue_input.doorbell_offset_1 = qinfo->doorbell_offset_1; |
232 | |
233 | queue_input.engine_type = qinfo->engine; |
234 | queue_input.mqd_addr = qinfo->mqd_addr; |
235 | queue_input.vm_context_cntl = test->vm_cntx_cntl; |
236 | |
237 | amdgpu_umsch_mm_lock(umsch: &adev->umsch_mm); |
238 | r = adev->umsch_mm.funcs->add_queue(&adev->umsch_mm, &queue_input); |
239 | amdgpu_umsch_mm_unlock(umsch: &adev->umsch_mm); |
240 | if (r) |
241 | return r; |
242 | |
243 | return 0; |
244 | } |
245 | |
246 | static int remove_test_queue(struct amdgpu_device *adev, |
247 | struct umsch_mm_test *test, |
248 | struct umsch_mm_test_queue_info *qinfo) |
249 | { |
250 | struct umsch_mm_remove_queue_input queue_input = {}; |
251 | int r; |
252 | |
253 | queue_input.doorbell_offset_0 = qinfo->doorbell_offset_0; |
254 | queue_input.doorbell_offset_1 = qinfo->doorbell_offset_1; |
255 | queue_input.context_csa_addr = qinfo->csa_addr; |
256 | |
257 | amdgpu_umsch_mm_lock(umsch: &adev->umsch_mm); |
258 | r = adev->umsch_mm.funcs->remove_queue(&adev->umsch_mm, &queue_input); |
259 | amdgpu_umsch_mm_unlock(umsch: &adev->umsch_mm); |
260 | if (r) |
261 | return r; |
262 | |
263 | return 0; |
264 | } |
265 | |
266 | static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *test) |
267 | { |
268 | struct MQD_INFO *mqd = (struct MQD_INFO *)test->mqd_data_cpu_addr; |
269 | uint32_t *ring = test->ring_data_cpu_addr + |
270 | offsetof(struct umsch_mm_test_ring_data, vpe_ring) / 4; |
271 | uint32_t *ib = test->ring_data_cpu_addr + |
272 | offsetof(struct umsch_mm_test_ring_data, vpe_ib) / 4; |
273 | uint64_t ib_gpu_addr = test->ring_data_gpu_addr + |
274 | offsetof(struct umsch_mm_test_ring_data, vpe_ib); |
275 | uint32_t *fence = ib + 2048 / 4; |
276 | uint64_t fence_gpu_addr = ib_gpu_addr + 2048; |
277 | const uint32_t test_pattern = 0xdeadbeef; |
278 | int i; |
279 | |
280 | ib[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0); |
281 | ib[1] = lower_32_bits(fence_gpu_addr); |
282 | ib[2] = upper_32_bits(fence_gpu_addr); |
283 | ib[3] = test_pattern; |
284 | |
285 | ring[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_INDIRECT, 0); |
286 | ring[1] = (ib_gpu_addr & 0xffffffe0); |
287 | ring[2] = upper_32_bits(ib_gpu_addr); |
288 | ring[3] = 4; |
289 | ring[4] = 0; |
290 | ring[5] = 0; |
291 | |
292 | mqd->wptr_val = (6 << 2); |
293 | if (adev->vpe.collaborate_mode) |
294 | (++mqd)->wptr_val = (6 << 2); |
295 | |
296 | WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); |
297 | |
298 | for (i = 0; i < adev->usec_timeout; i++) { |
299 | if (*fence == test_pattern) |
300 | return 0; |
301 | udelay(1); |
302 | } |
303 | |
304 | dev_err(adev->dev, "vpe queue submission timeout\n" ); |
305 | |
306 | return -ETIMEDOUT; |
307 | } |
308 | |
309 | static int submit_vcn_queue(struct amdgpu_device *adev, struct umsch_mm_test *test) |
310 | { |
311 | return 0; |
312 | } |
313 | |
314 | static int setup_umsch_mm_test(struct amdgpu_device *adev, |
315 | struct umsch_mm_test *test) |
316 | { |
317 | struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; |
318 | int r; |
319 | |
320 | test->vm_cntx_cntl = hub->vm_cntx_cntl; |
321 | |
322 | test->vm = kzalloc(size: sizeof(*test->vm), GFP_KERNEL); |
323 | if (!test->vm) { |
324 | r = -ENOMEM; |
325 | return r; |
326 | } |
327 | |
328 | r = amdgpu_vm_init(adev, vm: test->vm, xcp_id: -1); |
329 | if (r) |
330 | goto error_free_vm; |
331 | |
332 | r = amdgpu_pasid_alloc(bits: 16); |
333 | if (r < 0) |
334 | goto error_fini_vm; |
335 | test->pasid = r; |
336 | |
337 | r = amdgpu_bo_create_kernel(adev, size: sizeof(struct umsch_mm_test_ctx_data), |
338 | PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, |
339 | bo_ptr: &test->ctx_data_obj, |
340 | gpu_addr: &test->ctx_data_gpu_addr, |
341 | cpu_addr: (void **)&test->ctx_data_cpu_addr); |
342 | if (r) |
343 | goto error_free_pasid; |
344 | |
345 | memset(test->ctx_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ctx_data)); |
346 | |
347 | r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, |
348 | PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, |
349 | bo_ptr: &test->mqd_data_obj, |
350 | gpu_addr: &test->mqd_data_gpu_addr, |
351 | cpu_addr: (void **)&test->mqd_data_cpu_addr); |
352 | if (r) |
353 | goto error_free_ctx_data_obj; |
354 | |
355 | memset(test->mqd_data_cpu_addr, 0, PAGE_SIZE); |
356 | |
357 | r = amdgpu_bo_create_kernel(adev, size: sizeof(struct umsch_mm_test_ring_data), |
358 | PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, |
359 | bo_ptr: &test->ring_data_obj, |
360 | NULL, |
361 | cpu_addr: (void **)&test->ring_data_cpu_addr); |
362 | if (r) |
363 | goto error_free_mqd_data_obj; |
364 | |
365 | memset(test->ring_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ring_data)); |
366 | |
367 | test->ring_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM; |
368 | r = map_ring_data(adev, vm: test->vm, bo: test->ring_data_obj, bo_va: &test->bo_va, |
369 | addr: test->ring_data_gpu_addr, size: sizeof(struct umsch_mm_test_ring_data)); |
370 | if (r) |
371 | goto error_free_ring_data_obj; |
372 | |
373 | return 0; |
374 | |
375 | error_free_ring_data_obj: |
376 | amdgpu_bo_free_kernel(bo: &test->ring_data_obj, NULL, |
377 | cpu_addr: (void **)&test->ring_data_cpu_addr); |
378 | error_free_mqd_data_obj: |
379 | amdgpu_bo_free_kernel(bo: &test->mqd_data_obj, gpu_addr: &test->mqd_data_gpu_addr, |
380 | cpu_addr: (void **)&test->mqd_data_cpu_addr); |
381 | error_free_ctx_data_obj: |
382 | amdgpu_bo_free_kernel(bo: &test->ctx_data_obj, gpu_addr: &test->ctx_data_gpu_addr, |
383 | cpu_addr: (void **)&test->ctx_data_cpu_addr); |
384 | error_free_pasid: |
385 | amdgpu_pasid_free(pasid: test->pasid); |
386 | error_fini_vm: |
387 | amdgpu_vm_fini(adev, vm: test->vm); |
388 | error_free_vm: |
389 | kfree(objp: test->vm); |
390 | |
391 | return r; |
392 | } |
393 | |
394 | static void cleanup_umsch_mm_test(struct amdgpu_device *adev, |
395 | struct umsch_mm_test *test) |
396 | { |
397 | unmap_ring_data(adev, vm: test->vm, bo: test->ring_data_obj, |
398 | bo_va: test->bo_va, addr: test->ring_data_gpu_addr); |
399 | amdgpu_bo_free_kernel(bo: &test->mqd_data_obj, gpu_addr: &test->mqd_data_gpu_addr, |
400 | cpu_addr: (void **)&test->mqd_data_cpu_addr); |
401 | amdgpu_bo_free_kernel(bo: &test->ring_data_obj, NULL, |
402 | cpu_addr: (void **)&test->ring_data_cpu_addr); |
403 | amdgpu_bo_free_kernel(bo: &test->ctx_data_obj, gpu_addr: &test->ctx_data_gpu_addr, |
404 | cpu_addr: (void **)&test->ctx_data_cpu_addr); |
405 | amdgpu_pasid_free(pasid: test->pasid); |
406 | amdgpu_vm_fini(adev, vm: test->vm); |
407 | kfree(objp: test->vm); |
408 | } |
409 | |
410 | static int setup_test_queues(struct amdgpu_device *adev, |
411 | struct umsch_mm_test *test, |
412 | struct umsch_mm_test_queue_info *qinfo) |
413 | { |
414 | int i, r; |
415 | |
416 | for (i = 0; i < test->num_queues; i++) { |
417 | if (qinfo[i].engine == UMSCH_SWIP_ENGINE_TYPE_VPE) |
418 | setup_vpe_queue(adev, test, qinfo: &qinfo[i]); |
419 | else |
420 | setup_vcn_queue(adev, test, qinfo: &qinfo[i]); |
421 | |
422 | r = add_test_queue(adev, test, qinfo: &qinfo[i]); |
423 | if (r) |
424 | return r; |
425 | } |
426 | |
427 | return 0; |
428 | } |
429 | |
430 | static int submit_test_queues(struct amdgpu_device *adev, |
431 | struct umsch_mm_test *test, |
432 | struct umsch_mm_test_queue_info *qinfo) |
433 | { |
434 | int i, r; |
435 | |
436 | for (i = 0; i < test->num_queues; i++) { |
437 | if (qinfo[i].engine == UMSCH_SWIP_ENGINE_TYPE_VPE) |
438 | r = submit_vpe_queue(adev, test); |
439 | else |
440 | r = submit_vcn_queue(adev, test); |
441 | if (r) |
442 | return r; |
443 | } |
444 | |
445 | return 0; |
446 | } |
447 | |
448 | static void cleanup_test_queues(struct amdgpu_device *adev, |
449 | struct umsch_mm_test *test, |
450 | struct umsch_mm_test_queue_info *qinfo) |
451 | { |
452 | int i; |
453 | |
454 | for (i = 0; i < test->num_queues; i++) |
455 | remove_test_queue(adev, test, qinfo: &qinfo[i]); |
456 | } |
457 | |
458 | static int umsch_mm_test(struct amdgpu_device *adev) |
459 | { |
460 | struct umsch_mm_test_queue_info qinfo[] = { |
461 | { .engine = UMSCH_SWIP_ENGINE_TYPE_VPE }, |
462 | }; |
463 | struct umsch_mm_test test = { .num_queues = ARRAY_SIZE(qinfo) }; |
464 | int r; |
465 | |
466 | r = setup_umsch_mm_test(adev, test: &test); |
467 | if (r) |
468 | return r; |
469 | |
470 | r = setup_test_queues(adev, test: &test, qinfo); |
471 | if (r) |
472 | goto cleanup; |
473 | |
474 | r = submit_test_queues(adev, test: &test, qinfo); |
475 | if (r) |
476 | goto cleanup; |
477 | |
478 | cleanup_test_queues(adev, test: &test, qinfo); |
479 | cleanup_umsch_mm_test(adev, test: &test); |
480 | |
481 | return 0; |
482 | |
483 | cleanup: |
484 | cleanup_test_queues(adev, test: &test, qinfo); |
485 | cleanup_umsch_mm_test(adev, test: &test); |
486 | return r; |
487 | } |
488 | |
489 | int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws) |
490 | { |
491 | struct amdgpu_ring *ring = &umsch->ring; |
492 | |
493 | if (amdgpu_ring_alloc(ring, ndw: ndws)) |
494 | return -ENOMEM; |
495 | |
496 | amdgpu_ring_write_multiple(ring, src: pkt, count_dw: ndws); |
497 | amdgpu_ring_commit(ring); |
498 | |
499 | return 0; |
500 | } |
501 | |
502 | int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch) |
503 | { |
504 | struct amdgpu_ring *ring = &umsch->ring; |
505 | struct amdgpu_device *adev = ring->adev; |
506 | int r; |
507 | |
508 | r = amdgpu_fence_wait_polling(ring, wait_seq: ring->fence_drv.sync_seq, timeout: adev->usec_timeout); |
509 | if (r < 1) { |
510 | dev_err(adev->dev, "ring umsch timeout, emitted fence %u\n" , |
511 | ring->fence_drv.sync_seq); |
512 | return -ETIMEDOUT; |
513 | } |
514 | |
515 | return 0; |
516 | } |
517 | |
518 | static void umsch_mm_ring_set_wptr(struct amdgpu_ring *ring) |
519 | { |
520 | struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring; |
521 | struct amdgpu_device *adev = ring->adev; |
522 | |
523 | if (ring->use_doorbell) |
524 | WDOORBELL32(ring->doorbell_index, ring->wptr << 2); |
525 | else |
526 | WREG32(umsch->rb_wptr, ring->wptr << 2); |
527 | } |
528 | |
529 | static u64 umsch_mm_ring_get_rptr(struct amdgpu_ring *ring) |
530 | { |
531 | struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring; |
532 | struct amdgpu_device *adev = ring->adev; |
533 | |
534 | return RREG32(umsch->rb_rptr); |
535 | } |
536 | |
537 | static u64 umsch_mm_ring_get_wptr(struct amdgpu_ring *ring) |
538 | { |
539 | struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring; |
540 | struct amdgpu_device *adev = ring->adev; |
541 | |
542 | return RREG32(umsch->rb_wptr); |
543 | } |
544 | |
545 | static const struct amdgpu_ring_funcs umsch_v4_0_ring_funcs = { |
546 | .type = AMDGPU_RING_TYPE_UMSCH_MM, |
547 | .align_mask = 0, |
548 | .nop = 0, |
549 | .support_64bit_ptrs = false, |
550 | .get_rptr = umsch_mm_ring_get_rptr, |
551 | .get_wptr = umsch_mm_ring_get_wptr, |
552 | .set_wptr = umsch_mm_ring_set_wptr, |
553 | .insert_nop = amdgpu_ring_insert_nop, |
554 | }; |
555 | |
556 | int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch) |
557 | { |
558 | struct amdgpu_device *adev = container_of(umsch, struct amdgpu_device, umsch_mm); |
559 | struct amdgpu_ring *ring = &umsch->ring; |
560 | |
561 | ring->vm_hub = AMDGPU_MMHUB0(0); |
562 | ring->use_doorbell = true; |
563 | ring->no_scheduler = true; |
564 | ring->doorbell_index = (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1) + 6; |
565 | |
566 | snprintf(buf: ring->name, size: sizeof(ring->name), fmt: "umsch" ); |
567 | |
568 | return amdgpu_ring_init(adev, ring, max_dw: 1024, NULL, irq_type: 0, hw_prio: AMDGPU_RING_PRIO_DEFAULT, NULL); |
569 | } |
570 | |
571 | int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) |
572 | { |
573 | const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr; |
574 | struct amdgpu_device *adev = umsch->ring.adev; |
575 | const char *fw_name = NULL; |
576 | int r; |
577 | |
578 | switch (amdgpu_ip_version(adev, ip: VCN_HWIP, inst: 0)) { |
579 | case IP_VERSION(4, 0, 5): |
580 | case IP_VERSION(4, 0, 6): |
581 | fw_name = "amdgpu/umsch_mm_4_0_0.bin" ; |
582 | break; |
583 | default: |
584 | break; |
585 | } |
586 | |
587 | r = amdgpu_ucode_request(adev, fw: &adev->umsch_mm.fw, fw_name); |
588 | if (r) { |
589 | release_firmware(fw: adev->umsch_mm.fw); |
590 | adev->umsch_mm.fw = NULL; |
591 | return r; |
592 | } |
593 | |
594 | umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)adev->umsch_mm.fw->data; |
595 | |
596 | adev->umsch_mm.ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes); |
597 | adev->umsch_mm.data_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes); |
598 | |
599 | adev->umsch_mm.irq_start_addr = |
600 | le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_lo) | |
601 | ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_hi)) << 32); |
602 | adev->umsch_mm.uc_start_addr = |
603 | le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_lo) | |
604 | ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_hi)) << 32); |
605 | adev->umsch_mm.data_start_addr = |
606 | le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_lo) | |
607 | ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_hi)) << 32); |
608 | |
609 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
610 | struct amdgpu_firmware_info *info; |
611 | |
612 | info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_UCODE]; |
613 | info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_UCODE; |
614 | info->fw = adev->umsch_mm.fw; |
615 | adev->firmware.fw_size += |
616 | ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes), PAGE_SIZE); |
617 | |
618 | info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_DATA]; |
619 | info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_DATA; |
620 | info->fw = adev->umsch_mm.fw; |
621 | adev->firmware.fw_size += |
622 | ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes), PAGE_SIZE); |
623 | } |
624 | |
625 | return 0; |
626 | } |
627 | |
628 | int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch) |
629 | { |
630 | const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr; |
631 | struct amdgpu_device *adev = umsch->ring.adev; |
632 | const __le32 *fw_data; |
633 | uint32_t fw_size; |
634 | int r; |
635 | |
636 | umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *) |
637 | adev->umsch_mm.fw->data; |
638 | |
639 | fw_data = (const __le32 *)(adev->umsch_mm.fw->data + |
640 | le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_offset_bytes)); |
641 | fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes); |
642 | |
643 | r = amdgpu_bo_create_reserved(adev, size: fw_size, |
644 | align: 4 * 1024, AMDGPU_GEM_DOMAIN_VRAM, |
645 | bo_ptr: &adev->umsch_mm.ucode_fw_obj, |
646 | gpu_addr: &adev->umsch_mm.ucode_fw_gpu_addr, |
647 | cpu_addr: (void **)&adev->umsch_mm.ucode_fw_ptr); |
648 | if (r) { |
649 | dev_err(adev->dev, "(%d) failed to create umsch_mm fw ucode bo\n" , r); |
650 | return r; |
651 | } |
652 | |
653 | memcpy(adev->umsch_mm.ucode_fw_ptr, fw_data, fw_size); |
654 | |
655 | amdgpu_bo_kunmap(bo: adev->umsch_mm.ucode_fw_obj); |
656 | amdgpu_bo_unreserve(bo: adev->umsch_mm.ucode_fw_obj); |
657 | return 0; |
658 | } |
659 | |
660 | int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch) |
661 | { |
662 | const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr; |
663 | struct amdgpu_device *adev = umsch->ring.adev; |
664 | const __le32 *fw_data; |
665 | uint32_t fw_size; |
666 | int r; |
667 | |
668 | umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *) |
669 | adev->umsch_mm.fw->data; |
670 | |
671 | fw_data = (const __le32 *)(adev->umsch_mm.fw->data + |
672 | le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes)); |
673 | fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes); |
674 | |
675 | r = amdgpu_bo_create_reserved(adev, size: fw_size, |
676 | align: 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, |
677 | bo_ptr: &adev->umsch_mm.data_fw_obj, |
678 | gpu_addr: &adev->umsch_mm.data_fw_gpu_addr, |
679 | cpu_addr: (void **)&adev->umsch_mm.data_fw_ptr); |
680 | if (r) { |
681 | dev_err(adev->dev, "(%d) failed to create umsch_mm fw data bo\n" , r); |
682 | return r; |
683 | } |
684 | |
685 | memcpy(adev->umsch_mm.data_fw_ptr, fw_data, fw_size); |
686 | |
687 | amdgpu_bo_kunmap(bo: adev->umsch_mm.data_fw_obj); |
688 | amdgpu_bo_unreserve(bo: adev->umsch_mm.data_fw_obj); |
689 | return 0; |
690 | } |
691 | |
692 | int amdgpu_umsch_mm_psp_execute_cmd_buf(struct amdgpu_umsch_mm *umsch) |
693 | { |
694 | struct amdgpu_device *adev = umsch->ring.adev; |
695 | struct amdgpu_firmware_info ucode = { |
696 | .ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER, |
697 | .mc_addr = adev->umsch_mm.cmd_buf_gpu_addr, |
698 | .ucode_size = ((uintptr_t)adev->umsch_mm.cmd_buf_curr_ptr - |
699 | (uintptr_t)adev->umsch_mm.cmd_buf_ptr), |
700 | }; |
701 | |
702 | return psp_execute_ip_fw_load(psp: &adev->psp, ucode: &ucode); |
703 | } |
704 | |
705 | static void umsch_mm_agdb_index_init(struct amdgpu_device *adev) |
706 | { |
707 | uint32_t umsch_mm_agdb_start; |
708 | int i; |
709 | |
710 | umsch_mm_agdb_start = adev->doorbell_index.max_assignment + 1; |
711 | umsch_mm_agdb_start = roundup(umsch_mm_agdb_start, 1024); |
712 | umsch_mm_agdb_start += (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1); |
713 | |
714 | for (i = 0; i < CONTEXT_PRIORITY_NUM_LEVELS; i++) |
715 | adev->umsch_mm.agdb_index[i] = umsch_mm_agdb_start + i; |
716 | } |
717 | |
718 | static int umsch_mm_init(struct amdgpu_device *adev) |
719 | { |
720 | int r; |
721 | |
722 | adev->umsch_mm.vmid_mask_mm_vpe = 0xf00; |
723 | adev->umsch_mm.engine_mask = (1 << UMSCH_SWIP_ENGINE_TYPE_VPE); |
724 | adev->umsch_mm.vpe_hqd_mask = 0xfe; |
725 | |
726 | r = amdgpu_device_wb_get(adev, wb: &adev->umsch_mm.wb_index); |
727 | if (r) { |
728 | dev_err(adev->dev, "failed to alloc wb for umsch: %d\n" , r); |
729 | return r; |
730 | } |
731 | |
732 | adev->umsch_mm.sch_ctx_gpu_addr = adev->wb.gpu_addr + |
733 | (adev->umsch_mm.wb_index * 4); |
734 | |
735 | r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, |
736 | AMDGPU_GEM_DOMAIN_GTT, |
737 | bo_ptr: &adev->umsch_mm.cmd_buf_obj, |
738 | gpu_addr: &adev->umsch_mm.cmd_buf_gpu_addr, |
739 | cpu_addr: (void **)&adev->umsch_mm.cmd_buf_ptr); |
740 | if (r) { |
741 | dev_err(adev->dev, "failed to allocate cmdbuf bo %d\n" , r); |
742 | amdgpu_device_wb_free(adev, wb: adev->umsch_mm.wb_index); |
743 | return r; |
744 | } |
745 | |
746 | mutex_init(&adev->umsch_mm.mutex_hidden); |
747 | |
748 | umsch_mm_agdb_index_init(adev); |
749 | |
750 | return 0; |
751 | } |
752 | |
753 | |
754 | static int umsch_mm_early_init(void *handle) |
755 | { |
756 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
757 | |
758 | switch (amdgpu_ip_version(adev, ip: VCN_HWIP, inst: 0)) { |
759 | case IP_VERSION(4, 0, 5): |
760 | case IP_VERSION(4, 0, 6): |
761 | umsch_mm_v4_0_set_funcs(umsch: &adev->umsch_mm); |
762 | break; |
763 | default: |
764 | return -EINVAL; |
765 | } |
766 | |
767 | adev->umsch_mm.ring.funcs = &umsch_v4_0_ring_funcs; |
768 | umsch_mm_set_regs(&adev->umsch_mm); |
769 | |
770 | return 0; |
771 | } |
772 | |
773 | static int umsch_mm_late_init(void *handle) |
774 | { |
775 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
776 | |
777 | return umsch_mm_test(adev); |
778 | } |
779 | |
780 | static int umsch_mm_sw_init(void *handle) |
781 | { |
782 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
783 | int r; |
784 | |
785 | r = umsch_mm_init(adev); |
786 | if (r) |
787 | return r; |
788 | |
789 | r = umsch_mm_ring_init(&adev->umsch_mm); |
790 | if (r) |
791 | return r; |
792 | |
793 | r = umsch_mm_init_microcode(&adev->umsch_mm); |
794 | if (r) |
795 | return r; |
796 | |
797 | return 0; |
798 | } |
799 | |
800 | static int umsch_mm_sw_fini(void *handle) |
801 | { |
802 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
803 | |
804 | release_firmware(fw: adev->umsch_mm.fw); |
805 | adev->umsch_mm.fw = NULL; |
806 | |
807 | amdgpu_ring_fini(ring: &adev->umsch_mm.ring); |
808 | |
809 | mutex_destroy(lock: &adev->umsch_mm.mutex_hidden); |
810 | |
811 | amdgpu_bo_free_kernel(bo: &adev->umsch_mm.cmd_buf_obj, |
812 | gpu_addr: &adev->umsch_mm.cmd_buf_gpu_addr, |
813 | cpu_addr: (void **)&adev->umsch_mm.cmd_buf_ptr); |
814 | |
815 | amdgpu_device_wb_free(adev, wb: adev->umsch_mm.wb_index); |
816 | |
817 | return 0; |
818 | } |
819 | |
820 | static int umsch_mm_hw_init(void *handle) |
821 | { |
822 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
823 | int r; |
824 | |
825 | r = umsch_mm_load_microcode(&adev->umsch_mm); |
826 | if (r) |
827 | return r; |
828 | |
829 | umsch_mm_ring_start(&adev->umsch_mm); |
830 | |
831 | r = umsch_mm_set_hw_resources(&adev->umsch_mm); |
832 | if (r) |
833 | return r; |
834 | |
835 | return 0; |
836 | } |
837 | |
838 | static int umsch_mm_hw_fini(void *handle) |
839 | { |
840 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
841 | |
842 | umsch_mm_ring_stop(&adev->umsch_mm); |
843 | |
844 | amdgpu_bo_free_kernel(bo: &adev->umsch_mm.data_fw_obj, |
845 | gpu_addr: &adev->umsch_mm.data_fw_gpu_addr, |
846 | cpu_addr: (void **)&adev->umsch_mm.data_fw_ptr); |
847 | |
848 | amdgpu_bo_free_kernel(bo: &adev->umsch_mm.ucode_fw_obj, |
849 | gpu_addr: &adev->umsch_mm.ucode_fw_gpu_addr, |
850 | cpu_addr: (void **)&adev->umsch_mm.ucode_fw_ptr); |
851 | return 0; |
852 | } |
853 | |
854 | static int umsch_mm_suspend(void *handle) |
855 | { |
856 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
857 | |
858 | return umsch_mm_hw_fini(handle: adev); |
859 | } |
860 | |
861 | static int umsch_mm_resume(void *handle) |
862 | { |
863 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
864 | |
865 | return umsch_mm_hw_init(handle: adev); |
866 | } |
867 | |
868 | static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { |
869 | .name = "umsch_mm_v4_0" , |
870 | .early_init = umsch_mm_early_init, |
871 | .late_init = umsch_mm_late_init, |
872 | .sw_init = umsch_mm_sw_init, |
873 | .sw_fini = umsch_mm_sw_fini, |
874 | .hw_init = umsch_mm_hw_init, |
875 | .hw_fini = umsch_mm_hw_fini, |
876 | .suspend = umsch_mm_suspend, |
877 | .resume = umsch_mm_resume, |
878 | }; |
879 | |
880 | const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = { |
881 | .type = AMD_IP_BLOCK_TYPE_UMSCH_MM, |
882 | .major = 4, |
883 | .minor = 0, |
884 | .rev = 0, |
885 | .funcs = &umsch_mm_v4_0_ip_funcs, |
886 | }; |
887 | |