1 | /* |
2 | * Copyright 2008 Advanced Micro Devices, Inc. |
3 | * Copyright 2008 Red Hat Inc. |
4 | * Copyright 2009 Jerome Glisse. |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
7 | * copy of this software and associated documentation files (the "Software"), |
8 | * to deal in the Software without restriction, including without limitation |
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
10 | * and/or sell copies of the Software, and to permit persons to whom the |
11 | * Software is furnished to do so, subject to the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice shall be included in |
14 | * all copies or substantial portions of the Software. |
15 | * |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
22 | * OTHER DEALINGS IN THE SOFTWARE. |
23 | * |
24 | * Authors: Dave Airlie |
25 | * Alex Deucher |
26 | * Jerome Glisse |
27 | * Christian König |
28 | */ |
29 | #include <linux/seq_file.h> |
30 | #include <linux/slab.h> |
31 | #include <linux/uaccess.h> |
32 | #include <linux/debugfs.h> |
33 | |
34 | #include <drm/amdgpu_drm.h> |
35 | #include "amdgpu.h" |
36 | #include "atom.h" |
37 | |
38 | /* |
39 | * Rings |
40 | * Most engines on the GPU are fed via ring buffers. Ring |
41 | * buffers are areas of GPU accessible memory that the host |
42 | * writes commands into and the GPU reads commands out of. |
43 | * There is a rptr (read pointer) that determines where the |
44 | * GPU is currently reading, and a wptr (write pointer) |
45 | * which determines where the host has written. When the |
46 | * pointers are equal, the ring is idle. When the host |
47 | * writes commands to the ring buffer, it increments the |
48 | * wptr. The GPU then starts fetching commands and executes |
49 | * them until the pointers are equal again. |
50 | */ |
51 | |
52 | /** |
53 | * amdgpu_ring_max_ibs - Return max IBs that fit in a single submission. |
54 | * |
55 | * @type: ring type for which to return the limit. |
56 | */ |
57 | unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type) |
58 | { |
59 | switch (type) { |
60 | case AMDGPU_RING_TYPE_GFX: |
61 | /* Need to keep at least 192 on GFX7+ for old radv. */ |
62 | return 192; |
63 | case AMDGPU_RING_TYPE_COMPUTE: |
64 | return 125; |
65 | case AMDGPU_RING_TYPE_VCN_JPEG: |
66 | return 16; |
67 | default: |
68 | return 49; |
69 | } |
70 | } |
71 | |
72 | /** |
73 | * amdgpu_ring_alloc - allocate space on the ring buffer |
74 | * |
75 | * @ring: amdgpu_ring structure holding ring information |
76 | * @ndw: number of dwords to allocate in the ring buffer |
77 | * |
78 | * Allocate @ndw dwords in the ring buffer (all asics). |
79 | * Returns 0 on success, error on failure. |
80 | */ |
81 | int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) |
82 | { |
83 | /* Align requested size with padding so unlock_commit can |
84 | * pad safely */ |
85 | ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask; |
86 | |
87 | /* Make sure we aren't trying to allocate more space |
88 | * than the maximum for one submission |
89 | */ |
90 | if (WARN_ON_ONCE(ndw > ring->max_dw)) |
91 | return -ENOMEM; |
92 | |
93 | ring->count_dw = ndw; |
94 | ring->wptr_old = ring->wptr; |
95 | |
96 | if (ring->funcs->begin_use) |
97 | ring->funcs->begin_use(ring); |
98 | |
99 | return 0; |
100 | } |
101 | |
102 | /** amdgpu_ring_insert_nop - insert NOP packets |
103 | * |
104 | * @ring: amdgpu_ring structure holding ring information |
105 | * @count: the number of NOP packets to insert |
106 | * |
107 | * This is the generic insert_nop function for rings except SDMA |
108 | */ |
109 | void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) |
110 | { |
111 | int i; |
112 | |
113 | for (i = 0; i < count; i++) |
114 | amdgpu_ring_write(ring, v: ring->funcs->nop); |
115 | } |
116 | |
117 | /** |
118 | * amdgpu_ring_generic_pad_ib - pad IB with NOP packets |
119 | * |
120 | * @ring: amdgpu_ring structure holding ring information |
121 | * @ib: IB to add NOP packets to |
122 | * |
123 | * This is the generic pad_ib function for rings except SDMA |
124 | */ |
125 | void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) |
126 | { |
127 | while (ib->length_dw & ring->funcs->align_mask) |
128 | ib->ptr[ib->length_dw++] = ring->funcs->nop; |
129 | } |
130 | |
131 | /** |
132 | * amdgpu_ring_commit - tell the GPU to execute the new |
133 | * commands on the ring buffer |
134 | * |
135 | * @ring: amdgpu_ring structure holding ring information |
136 | * |
137 | * Update the wptr (write pointer) to tell the GPU to |
138 | * execute new commands on the ring buffer (all asics). |
139 | */ |
140 | void amdgpu_ring_commit(struct amdgpu_ring *ring) |
141 | { |
142 | uint32_t count; |
143 | |
144 | /* We pad to match fetch size */ |
145 | count = ring->funcs->align_mask + 1 - |
146 | (ring->wptr & ring->funcs->align_mask); |
147 | count %= ring->funcs->align_mask + 1; |
148 | ring->funcs->insert_nop(ring, count); |
149 | |
150 | mb(); |
151 | amdgpu_ring_set_wptr(ring); |
152 | |
153 | if (ring->funcs->end_use) |
154 | ring->funcs->end_use(ring); |
155 | } |
156 | |
157 | /** |
158 | * amdgpu_ring_undo - reset the wptr |
159 | * |
160 | * @ring: amdgpu_ring structure holding ring information |
161 | * |
162 | * Reset the driver's copy of the wptr (all asics). |
163 | */ |
164 | void amdgpu_ring_undo(struct amdgpu_ring *ring) |
165 | { |
166 | ring->wptr = ring->wptr_old; |
167 | |
168 | if (ring->funcs->end_use) |
169 | ring->funcs->end_use(ring); |
170 | } |
171 | |
172 | #define amdgpu_ring_get_gpu_addr(ring, offset) \ |
173 | (ring->is_mes_queue ? \ |
174 | (ring->mes_ctx->meta_data_gpu_addr + offset) : \ |
175 | (ring->adev->wb.gpu_addr + offset * 4)) |
176 | |
177 | #define amdgpu_ring_get_cpu_addr(ring, offset) \ |
178 | (ring->is_mes_queue ? \ |
179 | (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \ |
180 | (&ring->adev->wb.wb[offset])) |
181 | |
182 | /** |
183 | * amdgpu_ring_init - init driver ring struct. |
184 | * |
185 | * @adev: amdgpu_device pointer |
186 | * @ring: amdgpu_ring structure holding ring information |
187 | * @max_dw: maximum number of dw for ring alloc |
188 | * @irq_src: interrupt source to use for this ring |
189 | * @irq_type: interrupt type to use for this ring |
190 | * @hw_prio: ring priority (NORMAL/HIGH) |
191 | * @sched_score: optional score atomic shared with other schedulers |
192 | * |
193 | * Initialize the driver information for the selected ring (all asics). |
194 | * Returns 0 on success, error on failure. |
195 | */ |
196 | int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, |
197 | unsigned int max_dw, struct amdgpu_irq_src *irq_src, |
198 | unsigned int irq_type, unsigned int hw_prio, |
199 | atomic_t *sched_score) |
200 | { |
201 | int r; |
202 | int sched_hw_submission = amdgpu_sched_hw_submission; |
203 | u32 *num_sched; |
204 | u32 hw_ip; |
205 | unsigned int max_ibs_dw; |
206 | |
207 | /* Set the hw submission limit higher for KIQ because |
208 | * it's used for a number of gfx/compute tasks by both |
209 | * KFD and KGD which may have outstanding fences and |
210 | * it doesn't really use the gpu scheduler anyway; |
211 | * KIQ tasks get submitted directly to the ring. |
212 | */ |
213 | if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) |
214 | sched_hw_submission = max(sched_hw_submission, 256); |
215 | else if (ring == &adev->sdma.instance[0].page) |
216 | sched_hw_submission = 256; |
217 | |
218 | if (ring->adev == NULL) { |
219 | if (adev->num_rings >= AMDGPU_MAX_RINGS) |
220 | return -EINVAL; |
221 | |
222 | ring->adev = adev; |
223 | ring->num_hw_submission = sched_hw_submission; |
224 | ring->sched_score = sched_score; |
225 | ring->vmid_wait = dma_fence_get_stub(); |
226 | |
227 | if (!ring->is_mes_queue) { |
228 | ring->idx = adev->num_rings++; |
229 | adev->rings[ring->idx] = ring; |
230 | } |
231 | |
232 | r = amdgpu_fence_driver_init_ring(ring); |
233 | if (r) |
234 | return r; |
235 | } |
236 | |
237 | if (ring->is_mes_queue) { |
238 | ring->rptr_offs = amdgpu_mes_ctx_get_offs(ring, |
239 | id_offs: AMDGPU_MES_CTX_RPTR_OFFS); |
240 | ring->wptr_offs = amdgpu_mes_ctx_get_offs(ring, |
241 | id_offs: AMDGPU_MES_CTX_WPTR_OFFS); |
242 | ring->fence_offs = amdgpu_mes_ctx_get_offs(ring, |
243 | id_offs: AMDGPU_MES_CTX_FENCE_OFFS); |
244 | ring->trail_fence_offs = amdgpu_mes_ctx_get_offs(ring, |
245 | id_offs: AMDGPU_MES_CTX_TRAIL_FENCE_OFFS); |
246 | ring->cond_exe_offs = amdgpu_mes_ctx_get_offs(ring, |
247 | id_offs: AMDGPU_MES_CTX_COND_EXE_OFFS); |
248 | } else { |
249 | r = amdgpu_device_wb_get(adev, wb: &ring->rptr_offs); |
250 | if (r) { |
251 | dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n" , r); |
252 | return r; |
253 | } |
254 | |
255 | r = amdgpu_device_wb_get(adev, wb: &ring->wptr_offs); |
256 | if (r) { |
257 | dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n" , r); |
258 | return r; |
259 | } |
260 | |
261 | r = amdgpu_device_wb_get(adev, wb: &ring->fence_offs); |
262 | if (r) { |
263 | dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n" , r); |
264 | return r; |
265 | } |
266 | |
267 | r = amdgpu_device_wb_get(adev, wb: &ring->trail_fence_offs); |
268 | if (r) { |
269 | dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n" , r); |
270 | return r; |
271 | } |
272 | |
273 | r = amdgpu_device_wb_get(adev, wb: &ring->cond_exe_offs); |
274 | if (r) { |
275 | dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n" , r); |
276 | return r; |
277 | } |
278 | } |
279 | |
280 | ring->fence_gpu_addr = |
281 | amdgpu_ring_get_gpu_addr(ring, ring->fence_offs); |
282 | ring->fence_cpu_addr = |
283 | amdgpu_ring_get_cpu_addr(ring, ring->fence_offs); |
284 | |
285 | ring->rptr_gpu_addr = |
286 | amdgpu_ring_get_gpu_addr(ring, ring->rptr_offs); |
287 | ring->rptr_cpu_addr = |
288 | amdgpu_ring_get_cpu_addr(ring, ring->rptr_offs); |
289 | |
290 | ring->wptr_gpu_addr = |
291 | amdgpu_ring_get_gpu_addr(ring, ring->wptr_offs); |
292 | ring->wptr_cpu_addr = |
293 | amdgpu_ring_get_cpu_addr(ring, ring->wptr_offs); |
294 | |
295 | ring->trail_fence_gpu_addr = |
296 | amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs); |
297 | ring->trail_fence_cpu_addr = |
298 | amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs); |
299 | |
300 | ring->cond_exe_gpu_addr = |
301 | amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs); |
302 | ring->cond_exe_cpu_addr = |
303 | amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs); |
304 | |
305 | /* always set cond_exec_polling to CONTINUE */ |
306 | *ring->cond_exe_cpu_addr = 1; |
307 | |
308 | r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type); |
309 | if (r) { |
310 | dev_err(adev->dev, "failed initializing fences (%d).\n" , r); |
311 | return r; |
312 | } |
313 | |
314 | max_ibs_dw = ring->funcs->emit_frame_size + |
315 | amdgpu_ring_max_ibs(type: ring->funcs->type) * ring->funcs->emit_ib_size; |
316 | max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask; |
317 | |
318 | if (WARN_ON(max_ibs_dw > max_dw)) |
319 | max_dw = max_ibs_dw; |
320 | |
321 | ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission); |
322 | |
323 | ring->buf_mask = (ring->ring_size / 4) - 1; |
324 | ring->ptr_mask = ring->funcs->support_64bit_ptrs ? |
325 | 0xffffffffffffffff : ring->buf_mask; |
326 | |
327 | /* Allocate ring buffer */ |
328 | if (ring->is_mes_queue) { |
329 | int offset = 0; |
330 | |
331 | BUG_ON(ring->ring_size > PAGE_SIZE*4); |
332 | |
333 | offset = amdgpu_mes_ctx_get_offs(ring, |
334 | id_offs: AMDGPU_MES_CTX_RING_OFFS); |
335 | ring->gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); |
336 | ring->ring = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); |
337 | amdgpu_ring_clear_ring(ring); |
338 | |
339 | } else if (ring->ring_obj == NULL) { |
340 | r = amdgpu_bo_create_kernel(adev, size: ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, |
341 | AMDGPU_GEM_DOMAIN_GTT, |
342 | bo_ptr: &ring->ring_obj, |
343 | gpu_addr: &ring->gpu_addr, |
344 | cpu_addr: (void **)&ring->ring); |
345 | if (r) { |
346 | dev_err(adev->dev, "(%d) ring create failed\n" , r); |
347 | return r; |
348 | } |
349 | amdgpu_ring_clear_ring(ring); |
350 | } |
351 | |
352 | ring->max_dw = max_dw; |
353 | ring->hw_prio = hw_prio; |
354 | |
355 | if (!ring->no_scheduler) { |
356 | hw_ip = ring->funcs->type; |
357 | num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds; |
358 | adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] = |
359 | &ring->sched; |
360 | } |
361 | |
362 | return 0; |
363 | } |
364 | |
365 | /** |
366 | * amdgpu_ring_fini - tear down the driver ring struct. |
367 | * |
368 | * @ring: amdgpu_ring structure holding ring information |
369 | * |
370 | * Tear down the driver information for the selected ring (all asics). |
371 | */ |
372 | void amdgpu_ring_fini(struct amdgpu_ring *ring) |
373 | { |
374 | |
375 | /* Not to finish a ring which is not initialized */ |
376 | if (!(ring->adev) || |
377 | (!ring->is_mes_queue && !(ring->adev->rings[ring->idx]))) |
378 | return; |
379 | |
380 | ring->sched.ready = false; |
381 | |
382 | if (!ring->is_mes_queue) { |
383 | amdgpu_device_wb_free(adev: ring->adev, wb: ring->rptr_offs); |
384 | amdgpu_device_wb_free(adev: ring->adev, wb: ring->wptr_offs); |
385 | |
386 | amdgpu_device_wb_free(adev: ring->adev, wb: ring->cond_exe_offs); |
387 | amdgpu_device_wb_free(adev: ring->adev, wb: ring->fence_offs); |
388 | |
389 | amdgpu_bo_free_kernel(bo: &ring->ring_obj, |
390 | gpu_addr: &ring->gpu_addr, |
391 | cpu_addr: (void **)&ring->ring); |
392 | } else { |
393 | kfree(objp: ring->fence_drv.fences); |
394 | } |
395 | |
396 | dma_fence_put(fence: ring->vmid_wait); |
397 | ring->vmid_wait = NULL; |
398 | ring->me = 0; |
399 | |
400 | if (!ring->is_mes_queue) |
401 | ring->adev->rings[ring->idx] = NULL; |
402 | } |
403 | |
404 | /** |
405 | * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper |
406 | * |
407 | * @ring: ring to write to |
408 | * @reg0: register to write |
409 | * @reg1: register to wait on |
410 | * @ref: reference value to write/wait on |
411 | * @mask: mask to wait on |
412 | * |
413 | * Helper for rings that don't support write and wait in a |
414 | * single oneshot packet. |
415 | */ |
416 | void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, |
417 | uint32_t reg0, uint32_t reg1, |
418 | uint32_t ref, uint32_t mask) |
419 | { |
420 | amdgpu_ring_emit_wreg(ring, reg0, ref); |
421 | amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); |
422 | } |
423 | |
424 | /** |
425 | * amdgpu_ring_soft_recovery - try to soft recover a ring lockup |
426 | * |
427 | * @ring: ring to try the recovery on |
428 | * @vmid: VMID we try to get going again |
429 | * @fence: timedout fence |
430 | * |
431 | * Tries to get a ring proceeding again when it is stuck. |
432 | */ |
433 | bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, |
434 | struct dma_fence *fence) |
435 | { |
436 | unsigned long flags; |
437 | ktime_t deadline; |
438 | |
439 | if (unlikely(ring->adev->debug_disable_soft_recovery)) |
440 | return false; |
441 | |
442 | deadline = ktime_add_us(kt: ktime_get(), usec: 10000); |
443 | |
444 | if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) |
445 | return false; |
446 | |
447 | spin_lock_irqsave(fence->lock, flags); |
448 | if (!dma_fence_is_signaled_locked(fence)) |
449 | dma_fence_set_error(fence, error: -ENODATA); |
450 | spin_unlock_irqrestore(lock: fence->lock, flags); |
451 | |
452 | atomic_inc(v: &ring->adev->gpu_reset_counter); |
453 | while (!dma_fence_is_signaled(fence) && |
454 | ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) |
455 | ring->funcs->soft_recovery(ring, vmid); |
456 | |
457 | return dma_fence_is_signaled(fence); |
458 | } |
459 | |
460 | /* |
461 | * Debugfs info |
462 | */ |
463 | #if defined(CONFIG_DEBUG_FS) |
464 | |
465 | /* Layout of file is 12 bytes consisting of |
466 | * - rptr |
467 | * - wptr |
468 | * - driver's copy of wptr |
469 | * |
470 | * followed by n-words of ring data |
471 | */ |
472 | static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, |
473 | size_t size, loff_t *pos) |
474 | { |
475 | struct amdgpu_ring *ring = file_inode(f)->i_private; |
476 | int r, i; |
477 | uint32_t value, result, early[3]; |
478 | |
479 | if (*pos & 3 || size & 3) |
480 | return -EINVAL; |
481 | |
482 | result = 0; |
483 | |
484 | if (*pos < 12) { |
485 | early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask; |
486 | early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; |
487 | early[2] = ring->wptr & ring->buf_mask; |
488 | for (i = *pos / 4; i < 3 && size; i++) { |
489 | r = put_user(early[i], (uint32_t *)buf); |
490 | if (r) |
491 | return r; |
492 | buf += 4; |
493 | result += 4; |
494 | size -= 4; |
495 | *pos += 4; |
496 | } |
497 | } |
498 | |
499 | while (size) { |
500 | if (*pos >= (ring->ring_size + 12)) |
501 | return result; |
502 | |
503 | value = ring->ring[(*pos - 12)/4]; |
504 | r = put_user(value, (uint32_t *)buf); |
505 | if (r) |
506 | return r; |
507 | buf += 4; |
508 | result += 4; |
509 | size -= 4; |
510 | *pos += 4; |
511 | } |
512 | |
513 | return result; |
514 | } |
515 | |
516 | static const struct file_operations amdgpu_debugfs_ring_fops = { |
517 | .owner = THIS_MODULE, |
518 | .read = amdgpu_debugfs_ring_read, |
519 | .llseek = default_llseek |
520 | }; |
521 | |
522 | static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, |
523 | size_t size, loff_t *pos) |
524 | { |
525 | struct amdgpu_ring *ring = file_inode(f)->i_private; |
526 | volatile u32 *mqd; |
527 | u32 *kbuf; |
528 | int r, i; |
529 | uint32_t value, result; |
530 | |
531 | if (*pos & 3 || size & 3) |
532 | return -EINVAL; |
533 | |
534 | kbuf = kmalloc(size: ring->mqd_size, GFP_KERNEL); |
535 | if (!kbuf) |
536 | return -ENOMEM; |
537 | |
538 | r = amdgpu_bo_reserve(bo: ring->mqd_obj, no_intr: false); |
539 | if (unlikely(r != 0)) |
540 | goto err_free; |
541 | |
542 | r = amdgpu_bo_kmap(bo: ring->mqd_obj, ptr: (void **)&mqd); |
543 | if (r) |
544 | goto err_unreserve; |
545 | |
546 | /* |
547 | * Copy to local buffer to avoid put_user(), which might fault |
548 | * and acquire mmap_sem, under reservation_ww_class_mutex. |
549 | */ |
550 | for (i = 0; i < ring->mqd_size/sizeof(u32); i++) |
551 | kbuf[i] = mqd[i]; |
552 | |
553 | amdgpu_bo_kunmap(bo: ring->mqd_obj); |
554 | amdgpu_bo_unreserve(bo: ring->mqd_obj); |
555 | |
556 | result = 0; |
557 | while (size) { |
558 | if (*pos >= ring->mqd_size) |
559 | break; |
560 | |
561 | value = kbuf[*pos/4]; |
562 | r = put_user(value, (uint32_t *)buf); |
563 | if (r) |
564 | goto err_free; |
565 | buf += 4; |
566 | result += 4; |
567 | size -= 4; |
568 | *pos += 4; |
569 | } |
570 | |
571 | kfree(objp: kbuf); |
572 | return result; |
573 | |
574 | err_unreserve: |
575 | amdgpu_bo_unreserve(bo: ring->mqd_obj); |
576 | err_free: |
577 | kfree(objp: kbuf); |
578 | return r; |
579 | } |
580 | |
581 | static const struct file_operations amdgpu_debugfs_mqd_fops = { |
582 | .owner = THIS_MODULE, |
583 | .read = amdgpu_debugfs_mqd_read, |
584 | .llseek = default_llseek |
585 | }; |
586 | |
587 | static int amdgpu_debugfs_ring_error(void *data, u64 val) |
588 | { |
589 | struct amdgpu_ring *ring = data; |
590 | |
591 | amdgpu_fence_driver_set_error(ring, error: val); |
592 | return 0; |
593 | } |
594 | |
595 | DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(amdgpu_debugfs_error_fops, NULL, |
596 | amdgpu_debugfs_ring_error, "%lld\n" ); |
597 | |
598 | #endif |
599 | |
600 | void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, |
601 | struct amdgpu_ring *ring) |
602 | { |
603 | #if defined(CONFIG_DEBUG_FS) |
604 | struct drm_minor *minor = adev_to_drm(adev)->primary; |
605 | struct dentry *root = minor->debugfs_root; |
606 | char name[32]; |
607 | |
608 | sprintf(buf: name, fmt: "amdgpu_ring_%s" , ring->name); |
609 | debugfs_create_file_size(name, S_IFREG | 0444, parent: root, data: ring, |
610 | fops: &amdgpu_debugfs_ring_fops, |
611 | file_size: ring->ring_size + 12); |
612 | |
613 | if (ring->mqd_obj) { |
614 | sprintf(buf: name, fmt: "amdgpu_mqd_%s" , ring->name); |
615 | debugfs_create_file_size(name, S_IFREG | 0444, parent: root, data: ring, |
616 | fops: &amdgpu_debugfs_mqd_fops, |
617 | file_size: ring->mqd_size); |
618 | } |
619 | |
620 | sprintf(buf: name, fmt: "amdgpu_error_%s" , ring->name); |
621 | debugfs_create_file(name, mode: 0200, parent: root, data: ring, |
622 | fops: &amdgpu_debugfs_error_fops); |
623 | |
624 | #endif |
625 | } |
626 | |
627 | /** |
628 | * amdgpu_ring_test_helper - tests ring and set sched readiness status |
629 | * |
630 | * @ring: ring to try the recovery on |
631 | * |
632 | * Tests ring and set sched readiness status |
633 | * |
634 | * Returns 0 on success, error on failure. |
635 | */ |
636 | int amdgpu_ring_test_helper(struct amdgpu_ring *ring) |
637 | { |
638 | struct amdgpu_device *adev = ring->adev; |
639 | int r; |
640 | |
641 | r = amdgpu_ring_test_ring(ring); |
642 | if (r) |
643 | DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n" , |
644 | ring->name, r); |
645 | else |
646 | DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n" , |
647 | ring->name); |
648 | |
649 | ring->sched.ready = !r; |
650 | |
651 | return r; |
652 | } |
653 | |
654 | static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, |
655 | struct amdgpu_mqd_prop *prop) |
656 | { |
657 | struct amdgpu_device *adev = ring->adev; |
658 | bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && |
659 | amdgpu_gfx_is_high_priority_compute_queue(adev, ring); |
660 | bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX && |
661 | amdgpu_gfx_is_high_priority_graphics_queue(adev, ring); |
662 | |
663 | memset(prop, 0, sizeof(*prop)); |
664 | |
665 | prop->mqd_gpu_addr = ring->mqd_gpu_addr; |
666 | prop->hqd_base_gpu_addr = ring->gpu_addr; |
667 | prop->rptr_gpu_addr = ring->rptr_gpu_addr; |
668 | prop->wptr_gpu_addr = ring->wptr_gpu_addr; |
669 | prop->queue_size = ring->ring_size; |
670 | prop->eop_gpu_addr = ring->eop_gpu_addr; |
671 | prop->use_doorbell = ring->use_doorbell; |
672 | prop->doorbell_index = ring->doorbell_index; |
673 | |
674 | /* map_queues packet doesn't need activate the queue, |
675 | * so only kiq need set this field. |
676 | */ |
677 | prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ; |
678 | |
679 | prop->allow_tunneling = is_high_prio_compute; |
680 | if (is_high_prio_compute || is_high_prio_gfx) { |
681 | prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; |
682 | prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; |
683 | } |
684 | } |
685 | |
686 | int amdgpu_ring_init_mqd(struct amdgpu_ring *ring) |
687 | { |
688 | struct amdgpu_device *adev = ring->adev; |
689 | struct amdgpu_mqd *mqd_mgr; |
690 | struct amdgpu_mqd_prop prop; |
691 | |
692 | amdgpu_ring_to_mqd_prop(ring, prop: &prop); |
693 | |
694 | ring->wptr = 0; |
695 | |
696 | if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) |
697 | mqd_mgr = &adev->mqds[AMDGPU_HW_IP_COMPUTE]; |
698 | else |
699 | mqd_mgr = &adev->mqds[ring->funcs->type]; |
700 | |
701 | return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop); |
702 | } |
703 | |
704 | void amdgpu_ring_ib_begin(struct amdgpu_ring *ring) |
705 | { |
706 | if (ring->is_sw_ring) |
707 | amdgpu_sw_ring_ib_begin(ring); |
708 | } |
709 | |
710 | void amdgpu_ring_ib_end(struct amdgpu_ring *ring) |
711 | { |
712 | if (ring->is_sw_ring) |
713 | amdgpu_sw_ring_ib_end(ring); |
714 | } |
715 | |
716 | void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring) |
717 | { |
718 | if (ring->is_sw_ring) |
719 | amdgpu_sw_ring_ib_mark_offset(ring, type: AMDGPU_MUX_OFFSET_TYPE_CONTROL); |
720 | } |
721 | |
722 | void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring) |
723 | { |
724 | if (ring->is_sw_ring) |
725 | amdgpu_sw_ring_ib_mark_offset(ring, type: AMDGPU_MUX_OFFSET_TYPE_CE); |
726 | } |
727 | |
728 | void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring) |
729 | { |
730 | if (ring->is_sw_ring) |
731 | amdgpu_sw_ring_ib_mark_offset(ring, type: AMDGPU_MUX_OFFSET_TYPE_DE); |
732 | } |
733 | |
734 | bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring) |
735 | { |
736 | if (!ring) |
737 | return false; |
738 | |
739 | if (ring->no_scheduler || !drm_sched_wqueue_ready(sched: &ring->sched)) |
740 | return false; |
741 | |
742 | return true; |
743 | } |
744 | |