1 | /* |
2 | * Copyright 2009 Jerome Glisse. |
3 | * All Rights Reserved. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the |
7 | * "Software"), to deal in the Software without restriction, including |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
9 | * distribute, sub license, and/or sell copies of the Software, and to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
11 | * the following conditions: |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
20 | * |
21 | * The above copyright notice and this permission notice (including the |
22 | * next paragraph) shall be included in all copies or substantial portions |
23 | * of the Software. |
24 | * |
25 | */ |
26 | /* |
27 | * Authors: |
28 | * Jerome Glisse <glisse@freedesktop.org> |
29 | * Dave Airlie |
30 | */ |
31 | #include <linux/seq_file.h> |
32 | #include <linux/atomic.h> |
33 | #include <linux/wait.h> |
34 | #include <linux/kref.h> |
35 | #include <linux/slab.h> |
36 | #include <linux/firmware.h> |
37 | #include <linux/pm_runtime.h> |
38 | |
39 | #include <drm/drm_drv.h> |
40 | #include "amdgpu.h" |
41 | #include "amdgpu_trace.h" |
42 | #include "amdgpu_reset.h" |
43 | |
44 | /* |
45 | * Fences mark an event in the GPUs pipeline and are used |
46 | * for GPU/CPU synchronization. When the fence is written, |
47 | * it is expected that all buffers associated with that fence |
48 | * are no longer in use by the associated ring on the GPU and |
49 | * that the relevant GPU caches have been flushed. |
50 | */ |
51 | |
52 | struct amdgpu_fence { |
53 | struct dma_fence base; |
54 | |
55 | /* RB, DMA, etc. */ |
56 | struct amdgpu_ring *ring; |
57 | ktime_t start_timestamp; |
58 | }; |
59 | |
60 | static struct kmem_cache *amdgpu_fence_slab; |
61 | |
62 | int amdgpu_fence_slab_init(void) |
63 | { |
64 | amdgpu_fence_slab = KMEM_CACHE(amdgpu_fence, SLAB_HWCACHE_ALIGN); |
65 | if (!amdgpu_fence_slab) |
66 | return -ENOMEM; |
67 | return 0; |
68 | } |
69 | |
70 | void amdgpu_fence_slab_fini(void) |
71 | { |
72 | rcu_barrier(); |
73 | kmem_cache_destroy(s: amdgpu_fence_slab); |
74 | } |
75 | /* |
76 | * Cast helper |
77 | */ |
78 | static const struct dma_fence_ops amdgpu_fence_ops; |
79 | static const struct dma_fence_ops amdgpu_job_fence_ops; |
80 | static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f) |
81 | { |
82 | struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base); |
83 | |
84 | if (__f->base.ops == &amdgpu_fence_ops || |
85 | __f->base.ops == &amdgpu_job_fence_ops) |
86 | return __f; |
87 | |
88 | return NULL; |
89 | } |
90 | |
91 | /** |
92 | * amdgpu_fence_write - write a fence value |
93 | * |
94 | * @ring: ring the fence is associated with |
95 | * @seq: sequence number to write |
96 | * |
97 | * Writes a fence value to memory (all asics). |
98 | */ |
99 | static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq) |
100 | { |
101 | struct amdgpu_fence_driver *drv = &ring->fence_drv; |
102 | |
103 | if (drv->cpu_addr) |
104 | *drv->cpu_addr = cpu_to_le32(seq); |
105 | } |
106 | |
107 | /** |
108 | * amdgpu_fence_read - read a fence value |
109 | * |
110 | * @ring: ring the fence is associated with |
111 | * |
112 | * Reads a fence value from memory (all asics). |
113 | * Returns the value of the fence read from memory. |
114 | */ |
115 | static u32 amdgpu_fence_read(struct amdgpu_ring *ring) |
116 | { |
117 | struct amdgpu_fence_driver *drv = &ring->fence_drv; |
118 | u32 seq = 0; |
119 | |
120 | if (drv->cpu_addr) |
121 | seq = le32_to_cpu(*drv->cpu_addr); |
122 | else |
123 | seq = atomic_read(v: &drv->last_seq); |
124 | |
125 | return seq; |
126 | } |
127 | |
128 | /** |
129 | * amdgpu_fence_emit - emit a fence on the requested ring |
130 | * |
131 | * @ring: ring the fence is associated with |
132 | * @f: resulting fence object |
133 | * @job: job the fence is embedded in |
134 | * @flags: flags to pass into the subordinate .emit_fence() call |
135 | * |
136 | * Emits a fence command on the requested ring (all asics). |
137 | * Returns 0 on success, -ENOMEM on failure. |
138 | */ |
139 | int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job, |
140 | unsigned int flags) |
141 | { |
142 | struct amdgpu_device *adev = ring->adev; |
143 | struct dma_fence *fence; |
144 | struct amdgpu_fence *am_fence; |
145 | struct dma_fence __rcu **ptr; |
146 | uint32_t seq; |
147 | int r; |
148 | |
149 | if (job == NULL) { |
150 | /* create a sperate hw fence */ |
151 | am_fence = kmem_cache_alloc(cachep: amdgpu_fence_slab, GFP_ATOMIC); |
152 | if (am_fence == NULL) |
153 | return -ENOMEM; |
154 | fence = &am_fence->base; |
155 | am_fence->ring = ring; |
156 | } else { |
157 | /* take use of job-embedded fence */ |
158 | fence = &job->hw_fence; |
159 | } |
160 | |
161 | seq = ++ring->fence_drv.sync_seq; |
162 | if (job && job->job_run_counter) { |
163 | /* reinit seq for resubmitted jobs */ |
164 | fence->seqno = seq; |
165 | /* TO be inline with external fence creation and other drivers */ |
166 | dma_fence_get(fence); |
167 | } else { |
168 | if (job) { |
169 | dma_fence_init(fence, ops: &amdgpu_job_fence_ops, |
170 | lock: &ring->fence_drv.lock, |
171 | context: adev->fence_context + ring->idx, seqno: seq); |
172 | /* Against remove in amdgpu_job_{free, free_cb} */ |
173 | dma_fence_get(fence); |
174 | } else { |
175 | dma_fence_init(fence, ops: &amdgpu_fence_ops, |
176 | lock: &ring->fence_drv.lock, |
177 | context: adev->fence_context + ring->idx, seqno: seq); |
178 | } |
179 | } |
180 | |
181 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, |
182 | seq, flags | AMDGPU_FENCE_FLAG_INT); |
183 | pm_runtime_get_noresume(dev: adev_to_drm(adev)->dev); |
184 | trace_amdgpu_runpm_reference_dumps(index: 1, func: __func__); |
185 | ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; |
186 | if (unlikely(rcu_dereference_protected(*ptr, 1))) { |
187 | struct dma_fence *old; |
188 | |
189 | rcu_read_lock(); |
190 | old = dma_fence_get_rcu_safe(fencep: ptr); |
191 | rcu_read_unlock(); |
192 | |
193 | if (old) { |
194 | r = dma_fence_wait(fence: old, intr: false); |
195 | dma_fence_put(fence: old); |
196 | if (r) |
197 | return r; |
198 | } |
199 | } |
200 | |
201 | to_amdgpu_fence(f: fence)->start_timestamp = ktime_get(); |
202 | |
203 | /* This function can't be called concurrently anyway, otherwise |
204 | * emitting the fence would mess up the hardware ring buffer. |
205 | */ |
206 | rcu_assign_pointer(*ptr, dma_fence_get(fence)); |
207 | |
208 | *f = fence; |
209 | |
210 | return 0; |
211 | } |
212 | |
213 | /** |
214 | * amdgpu_fence_emit_polling - emit a fence on the requeste ring |
215 | * |
216 | * @ring: ring the fence is associated with |
217 | * @s: resulting sequence number |
218 | * @timeout: the timeout for waiting in usecs |
219 | * |
220 | * Emits a fence command on the requested ring (all asics). |
221 | * Used For polling fence. |
222 | * Returns 0 on success, -ENOMEM on failure. |
223 | */ |
224 | int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, |
225 | uint32_t timeout) |
226 | { |
227 | uint32_t seq; |
228 | signed long r; |
229 | |
230 | if (!s) |
231 | return -EINVAL; |
232 | |
233 | seq = ++ring->fence_drv.sync_seq; |
234 | r = amdgpu_fence_wait_polling(ring, |
235 | wait_seq: seq - ring->fence_drv.num_fences_mask, |
236 | timeout); |
237 | if (r < 1) |
238 | return -ETIMEDOUT; |
239 | |
240 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, |
241 | seq, 0); |
242 | |
243 | *s = seq; |
244 | |
245 | return 0; |
246 | } |
247 | |
248 | /** |
249 | * amdgpu_fence_schedule_fallback - schedule fallback check |
250 | * |
251 | * @ring: pointer to struct amdgpu_ring |
252 | * |
253 | * Start a timer as fallback to our interrupts. |
254 | */ |
255 | static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring) |
256 | { |
257 | mod_timer(timer: &ring->fence_drv.fallback_timer, |
258 | expires: jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT); |
259 | } |
260 | |
261 | /** |
262 | * amdgpu_fence_process - check for fence activity |
263 | * |
264 | * @ring: pointer to struct amdgpu_ring |
265 | * |
266 | * Checks the current fence value and calculates the last |
267 | * signalled fence value. Wakes the fence queue if the |
268 | * sequence number has increased. |
269 | * |
270 | * Returns true if fence was processed |
271 | */ |
272 | bool amdgpu_fence_process(struct amdgpu_ring *ring) |
273 | { |
274 | struct amdgpu_fence_driver *drv = &ring->fence_drv; |
275 | struct amdgpu_device *adev = ring->adev; |
276 | uint32_t seq, last_seq; |
277 | |
278 | do { |
279 | last_seq = atomic_read(v: &ring->fence_drv.last_seq); |
280 | seq = amdgpu_fence_read(ring); |
281 | |
282 | } while (atomic_cmpxchg(v: &drv->last_seq, old: last_seq, new: seq) != last_seq); |
283 | |
284 | if (del_timer(timer: &ring->fence_drv.fallback_timer) && |
285 | seq != ring->fence_drv.sync_seq) |
286 | amdgpu_fence_schedule_fallback(ring); |
287 | |
288 | if (unlikely(seq == last_seq)) |
289 | return false; |
290 | |
291 | last_seq &= drv->num_fences_mask; |
292 | seq &= drv->num_fences_mask; |
293 | |
294 | do { |
295 | struct dma_fence *fence, **ptr; |
296 | |
297 | ++last_seq; |
298 | last_seq &= drv->num_fences_mask; |
299 | ptr = &drv->fences[last_seq]; |
300 | |
301 | /* There is always exactly one thread signaling this fence slot */ |
302 | fence = rcu_dereference_protected(*ptr, 1); |
303 | RCU_INIT_POINTER(*ptr, NULL); |
304 | |
305 | if (!fence) |
306 | continue; |
307 | |
308 | dma_fence_signal(fence); |
309 | dma_fence_put(fence); |
310 | pm_runtime_mark_last_busy(dev: adev_to_drm(adev)->dev); |
311 | pm_runtime_put_autosuspend(dev: adev_to_drm(adev)->dev); |
312 | trace_amdgpu_runpm_reference_dumps(index: 0, func: __func__); |
313 | } while (last_seq != seq); |
314 | |
315 | return true; |
316 | } |
317 | |
318 | /** |
319 | * amdgpu_fence_fallback - fallback for hardware interrupts |
320 | * |
321 | * @t: timer context used to obtain the pointer to ring structure |
322 | * |
323 | * Checks for fence activity. |
324 | */ |
325 | static void amdgpu_fence_fallback(struct timer_list *t) |
326 | { |
327 | struct amdgpu_ring *ring = from_timer(ring, t, |
328 | fence_drv.fallback_timer); |
329 | |
330 | if (amdgpu_fence_process(ring)) |
331 | DRM_WARN("Fence fallback timer expired on ring %s\n" , ring->name); |
332 | } |
333 | |
334 | /** |
335 | * amdgpu_fence_wait_empty - wait for all fences to signal |
336 | * |
337 | * @ring: ring index the fence is associated with |
338 | * |
339 | * Wait for all fences on the requested ring to signal (all asics). |
340 | * Returns 0 if the fences have passed, error for all other cases. |
341 | */ |
342 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) |
343 | { |
344 | uint64_t seq = READ_ONCE(ring->fence_drv.sync_seq); |
345 | struct dma_fence *fence, **ptr; |
346 | int r; |
347 | |
348 | if (!seq) |
349 | return 0; |
350 | |
351 | ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; |
352 | rcu_read_lock(); |
353 | fence = rcu_dereference(*ptr); |
354 | if (!fence || !dma_fence_get_rcu(fence)) { |
355 | rcu_read_unlock(); |
356 | return 0; |
357 | } |
358 | rcu_read_unlock(); |
359 | |
360 | r = dma_fence_wait(fence, intr: false); |
361 | dma_fence_put(fence); |
362 | return r; |
363 | } |
364 | |
365 | /** |
366 | * amdgpu_fence_wait_polling - busy wait for givn sequence number |
367 | * |
368 | * @ring: ring index the fence is associated with |
369 | * @wait_seq: sequence number to wait |
370 | * @timeout: the timeout for waiting in usecs |
371 | * |
372 | * Wait for all fences on the requested ring to signal (all asics). |
373 | * Returns left time if no timeout, 0 or minus if timeout. |
374 | */ |
375 | signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, |
376 | uint32_t wait_seq, |
377 | signed long timeout) |
378 | { |
379 | |
380 | while ((int32_t)(wait_seq - amdgpu_fence_read(ring)) > 0 && timeout > 0) { |
381 | udelay(2); |
382 | timeout -= 2; |
383 | } |
384 | return timeout > 0 ? timeout : 0; |
385 | } |
386 | /** |
387 | * amdgpu_fence_count_emitted - get the count of emitted fences |
388 | * |
389 | * @ring: ring the fence is associated with |
390 | * |
391 | * Get the number of fences emitted on the requested ring (all asics). |
392 | * Returns the number of emitted fences on the ring. Used by the |
393 | * dynpm code to ring track activity. |
394 | */ |
395 | unsigned int amdgpu_fence_count_emitted(struct amdgpu_ring *ring) |
396 | { |
397 | uint64_t emitted; |
398 | |
399 | /* We are not protected by ring lock when reading the last sequence |
400 | * but it's ok to report slightly wrong fence count here. |
401 | */ |
402 | emitted = 0x100000000ull; |
403 | emitted -= atomic_read(v: &ring->fence_drv.last_seq); |
404 | emitted += READ_ONCE(ring->fence_drv.sync_seq); |
405 | return lower_32_bits(emitted); |
406 | } |
407 | |
408 | /** |
409 | * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now |
410 | * @ring: ring the fence is associated with |
411 | * |
412 | * Find the earliest fence unsignaled until now, calculate the time delta |
413 | * between the time fence emitted and now. |
414 | */ |
415 | u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring) |
416 | { |
417 | struct amdgpu_fence_driver *drv = &ring->fence_drv; |
418 | struct dma_fence *fence; |
419 | uint32_t last_seq, sync_seq; |
420 | |
421 | last_seq = atomic_read(v: &ring->fence_drv.last_seq); |
422 | sync_seq = READ_ONCE(ring->fence_drv.sync_seq); |
423 | if (last_seq == sync_seq) |
424 | return 0; |
425 | |
426 | ++last_seq; |
427 | last_seq &= drv->num_fences_mask; |
428 | fence = drv->fences[last_seq]; |
429 | if (!fence) |
430 | return 0; |
431 | |
432 | return ktime_us_delta(later: ktime_get(), |
433 | earlier: to_amdgpu_fence(f: fence)->start_timestamp); |
434 | } |
435 | |
436 | /** |
437 | * amdgpu_fence_update_start_timestamp - update the timestamp of the fence |
438 | * @ring: ring the fence is associated with |
439 | * @seq: the fence seq number to update. |
440 | * @timestamp: the start timestamp to update. |
441 | * |
442 | * The function called at the time the fence and related ib is about to |
443 | * resubmit to gpu in MCBP scenario. Thus we do not consider race condition |
444 | * with amdgpu_fence_process to modify the same fence. |
445 | */ |
446 | void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp) |
447 | { |
448 | struct amdgpu_fence_driver *drv = &ring->fence_drv; |
449 | struct dma_fence *fence; |
450 | |
451 | seq &= drv->num_fences_mask; |
452 | fence = drv->fences[seq]; |
453 | if (!fence) |
454 | return; |
455 | |
456 | to_amdgpu_fence(f: fence)->start_timestamp = timestamp; |
457 | } |
458 | |
459 | /** |
460 | * amdgpu_fence_driver_start_ring - make the fence driver |
461 | * ready for use on the requested ring. |
462 | * |
463 | * @ring: ring to start the fence driver on |
464 | * @irq_src: interrupt source to use for this ring |
465 | * @irq_type: interrupt type to use for this ring |
466 | * |
467 | * Make the fence driver ready for processing (all asics). |
468 | * Not all asics have all rings, so each asic will only |
469 | * start the fence driver on the rings it has. |
470 | * Returns 0 for success, errors for failure. |
471 | */ |
472 | int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, |
473 | struct amdgpu_irq_src *irq_src, |
474 | unsigned int irq_type) |
475 | { |
476 | struct amdgpu_device *adev = ring->adev; |
477 | uint64_t index; |
478 | |
479 | if (ring->funcs->type != AMDGPU_RING_TYPE_UVD) { |
480 | ring->fence_drv.cpu_addr = ring->fence_cpu_addr; |
481 | ring->fence_drv.gpu_addr = ring->fence_gpu_addr; |
482 | } else { |
483 | /* put fence directly behind firmware */ |
484 | index = ALIGN(adev->uvd.fw->size, 8); |
485 | ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index; |
486 | ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index; |
487 | } |
488 | amdgpu_fence_write(ring, seq: atomic_read(v: &ring->fence_drv.last_seq)); |
489 | |
490 | ring->fence_drv.irq_src = irq_src; |
491 | ring->fence_drv.irq_type = irq_type; |
492 | ring->fence_drv.initialized = true; |
493 | |
494 | DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr 0x%016llx\n" , |
495 | ring->name, ring->fence_drv.gpu_addr); |
496 | return 0; |
497 | } |
498 | |
499 | /** |
500 | * amdgpu_fence_driver_init_ring - init the fence driver |
501 | * for the requested ring. |
502 | * |
503 | * @ring: ring to init the fence driver on |
504 | * |
505 | * Init the fence driver for the requested ring (all asics). |
506 | * Helper function for amdgpu_fence_driver_init(). |
507 | */ |
508 | int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) |
509 | { |
510 | struct amdgpu_device *adev = ring->adev; |
511 | |
512 | if (!adev) |
513 | return -EINVAL; |
514 | |
515 | if (!is_power_of_2(n: ring->num_hw_submission)) |
516 | return -EINVAL; |
517 | |
518 | ring->fence_drv.cpu_addr = NULL; |
519 | ring->fence_drv.gpu_addr = 0; |
520 | ring->fence_drv.sync_seq = 0; |
521 | atomic_set(v: &ring->fence_drv.last_seq, i: 0); |
522 | ring->fence_drv.initialized = false; |
523 | |
524 | timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0); |
525 | |
526 | ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1; |
527 | spin_lock_init(&ring->fence_drv.lock); |
528 | ring->fence_drv.fences = kcalloc(n: ring->num_hw_submission * 2, size: sizeof(void *), |
529 | GFP_KERNEL); |
530 | |
531 | if (!ring->fence_drv.fences) |
532 | return -ENOMEM; |
533 | |
534 | return 0; |
535 | } |
536 | |
537 | /** |
538 | * amdgpu_fence_driver_sw_init - init the fence driver |
539 | * for all possible rings. |
540 | * |
541 | * @adev: amdgpu device pointer |
542 | * |
543 | * Init the fence driver for all possible rings (all asics). |
544 | * Not all asics have all rings, so each asic will only |
545 | * start the fence driver on the rings it has using |
546 | * amdgpu_fence_driver_start_ring(). |
547 | * Returns 0 for success. |
548 | */ |
549 | int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev) |
550 | { |
551 | return 0; |
552 | } |
553 | |
554 | /** |
555 | * amdgpu_fence_need_ring_interrupt_restore - helper function to check whether |
556 | * fence driver interrupts need to be restored. |
557 | * |
558 | * @ring: ring that to be checked |
559 | * |
560 | * Interrupts for rings that belong to GFX IP don't need to be restored |
561 | * when the target power state is s0ix. |
562 | * |
563 | * Return true if need to restore interrupts, false otherwise. |
564 | */ |
565 | static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring) |
566 | { |
567 | struct amdgpu_device *adev = ring->adev; |
568 | bool is_gfx_power_domain = false; |
569 | |
570 | switch (ring->funcs->type) { |
571 | case AMDGPU_RING_TYPE_SDMA: |
572 | /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */ |
573 | if (amdgpu_ip_version(adev, ip: SDMA0_HWIP, inst: 0) >= |
574 | IP_VERSION(5, 0, 0)) |
575 | is_gfx_power_domain = true; |
576 | break; |
577 | case AMDGPU_RING_TYPE_GFX: |
578 | case AMDGPU_RING_TYPE_COMPUTE: |
579 | case AMDGPU_RING_TYPE_KIQ: |
580 | case AMDGPU_RING_TYPE_MES: |
581 | is_gfx_power_domain = true; |
582 | break; |
583 | default: |
584 | break; |
585 | } |
586 | |
587 | return !(adev->in_s0ix && is_gfx_power_domain); |
588 | } |
589 | |
590 | /** |
591 | * amdgpu_fence_driver_hw_fini - tear down the fence driver |
592 | * for all possible rings. |
593 | * |
594 | * @adev: amdgpu device pointer |
595 | * |
596 | * Tear down the fence driver for all possible rings (all asics). |
597 | */ |
598 | void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) |
599 | { |
600 | int i, r; |
601 | |
602 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { |
603 | struct amdgpu_ring *ring = adev->rings[i]; |
604 | |
605 | if (!ring || !ring->fence_drv.initialized) |
606 | continue; |
607 | |
608 | /* You can't wait for HW to signal if it's gone */ |
609 | if (!drm_dev_is_unplugged(dev: adev_to_drm(adev))) |
610 | r = amdgpu_fence_wait_empty(ring); |
611 | else |
612 | r = -ENODEV; |
613 | /* no need to trigger GPU reset as we are unloading */ |
614 | if (r) |
615 | amdgpu_fence_driver_force_completion(ring); |
616 | |
617 | if (!drm_dev_is_unplugged(dev: adev_to_drm(adev)) && |
618 | ring->fence_drv.irq_src && |
619 | amdgpu_fence_need_ring_interrupt_restore(ring)) |
620 | amdgpu_irq_put(adev, src: ring->fence_drv.irq_src, |
621 | type: ring->fence_drv.irq_type); |
622 | |
623 | del_timer_sync(timer: &ring->fence_drv.fallback_timer); |
624 | } |
625 | } |
626 | |
627 | /* Will either stop and flush handlers for amdgpu interrupt or reanble it */ |
628 | void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop) |
629 | { |
630 | int i; |
631 | |
632 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { |
633 | struct amdgpu_ring *ring = adev->rings[i]; |
634 | |
635 | if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src) |
636 | continue; |
637 | |
638 | if (stop) |
639 | disable_irq(irq: adev->irq.irq); |
640 | else |
641 | enable_irq(irq: adev->irq.irq); |
642 | } |
643 | } |
644 | |
645 | void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev) |
646 | { |
647 | unsigned int i, j; |
648 | |
649 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { |
650 | struct amdgpu_ring *ring = adev->rings[i]; |
651 | |
652 | if (!ring || !ring->fence_drv.initialized) |
653 | continue; |
654 | |
655 | /* |
656 | * Notice we check for sched.ops since there's some |
657 | * override on the meaning of sched.ready by amdgpu. |
658 | * The natural check would be sched.ready, which is |
659 | * set as drm_sched_init() finishes... |
660 | */ |
661 | if (ring->sched.ops) |
662 | drm_sched_fini(sched: &ring->sched); |
663 | |
664 | for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) |
665 | dma_fence_put(fence: ring->fence_drv.fences[j]); |
666 | kfree(objp: ring->fence_drv.fences); |
667 | ring->fence_drv.fences = NULL; |
668 | ring->fence_drv.initialized = false; |
669 | } |
670 | } |
671 | |
672 | /** |
673 | * amdgpu_fence_driver_hw_init - enable the fence driver |
674 | * for all possible rings. |
675 | * |
676 | * @adev: amdgpu device pointer |
677 | * |
678 | * Enable the fence driver for all possible rings (all asics). |
679 | * Not all asics have all rings, so each asic will only |
680 | * start the fence driver on the rings it has using |
681 | * amdgpu_fence_driver_start_ring(). |
682 | * Returns 0 for success. |
683 | */ |
684 | void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) |
685 | { |
686 | int i; |
687 | |
688 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { |
689 | struct amdgpu_ring *ring = adev->rings[i]; |
690 | |
691 | if (!ring || !ring->fence_drv.initialized) |
692 | continue; |
693 | |
694 | /* enable the interrupt */ |
695 | if (ring->fence_drv.irq_src && |
696 | amdgpu_fence_need_ring_interrupt_restore(ring)) |
697 | amdgpu_irq_get(adev, src: ring->fence_drv.irq_src, |
698 | type: ring->fence_drv.irq_type); |
699 | } |
700 | } |
701 | |
702 | /** |
703 | * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring |
704 | * |
705 | * @ring: fence of the ring to be cleared |
706 | * |
707 | */ |
708 | void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) |
709 | { |
710 | int i; |
711 | struct dma_fence *old, **ptr; |
712 | |
713 | for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { |
714 | ptr = &ring->fence_drv.fences[i]; |
715 | old = rcu_dereference_protected(*ptr, 1); |
716 | if (old && old->ops == &amdgpu_job_fence_ops) { |
717 | struct amdgpu_job *job; |
718 | |
719 | /* For non-scheduler bad job, i.e. failed ib test, we need to signal |
720 | * it right here or we won't be able to track them in fence_drv |
721 | * and they will remain unsignaled during sa_bo free. |
722 | */ |
723 | job = container_of(old, struct amdgpu_job, hw_fence); |
724 | if (!job->base.s_fence && !dma_fence_is_signaled(fence: old)) |
725 | dma_fence_signal(fence: old); |
726 | RCU_INIT_POINTER(*ptr, NULL); |
727 | dma_fence_put(fence: old); |
728 | } |
729 | } |
730 | } |
731 | |
732 | /** |
733 | * amdgpu_fence_driver_set_error - set error code on fences |
734 | * @ring: the ring which contains the fences |
735 | * @error: the error code to set |
736 | * |
737 | * Set an error code to all the fences pending on the ring. |
738 | */ |
739 | void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error) |
740 | { |
741 | struct amdgpu_fence_driver *drv = &ring->fence_drv; |
742 | unsigned long flags; |
743 | |
744 | spin_lock_irqsave(&drv->lock, flags); |
745 | for (unsigned int i = 0; i <= drv->num_fences_mask; ++i) { |
746 | struct dma_fence *fence; |
747 | |
748 | fence = rcu_dereference_protected(drv->fences[i], |
749 | lockdep_is_held(&drv->lock)); |
750 | if (fence && !dma_fence_is_signaled_locked(fence)) |
751 | dma_fence_set_error(fence, error); |
752 | } |
753 | spin_unlock_irqrestore(lock: &drv->lock, flags); |
754 | } |
755 | |
756 | /** |
757 | * amdgpu_fence_driver_force_completion - force signal latest fence of ring |
758 | * |
759 | * @ring: fence of the ring to signal |
760 | * |
761 | */ |
762 | void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) |
763 | { |
764 | amdgpu_fence_driver_set_error(ring, error: -ECANCELED); |
765 | amdgpu_fence_write(ring, seq: ring->fence_drv.sync_seq); |
766 | amdgpu_fence_process(ring); |
767 | } |
768 | |
769 | /* |
770 | * Common fence implementation |
771 | */ |
772 | |
773 | static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence) |
774 | { |
775 | return "amdgpu" ; |
776 | } |
777 | |
778 | static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) |
779 | { |
780 | return (const char *)to_amdgpu_fence(f)->ring->name; |
781 | } |
782 | |
783 | static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) |
784 | { |
785 | struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); |
786 | |
787 | return (const char *)to_amdgpu_ring(job->base.sched)->name; |
788 | } |
789 | |
790 | /** |
791 | * amdgpu_fence_enable_signaling - enable signalling on fence |
792 | * @f: fence |
793 | * |
794 | * This function is called with fence_queue lock held, and adds a callback |
795 | * to fence_queue that checks if this fence is signaled, and if so it |
796 | * signals the fence and removes itself. |
797 | */ |
798 | static bool amdgpu_fence_enable_signaling(struct dma_fence *f) |
799 | { |
800 | if (!timer_pending(timer: &to_amdgpu_fence(f)->ring->fence_drv.fallback_timer)) |
801 | amdgpu_fence_schedule_fallback(ring: to_amdgpu_fence(f)->ring); |
802 | |
803 | return true; |
804 | } |
805 | |
806 | /** |
807 | * amdgpu_job_fence_enable_signaling - enable signalling on job fence |
808 | * @f: fence |
809 | * |
810 | * This is the simliar function with amdgpu_fence_enable_signaling above, it |
811 | * only handles the job embedded fence. |
812 | */ |
813 | static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) |
814 | { |
815 | struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); |
816 | |
817 | if (!timer_pending(timer: &to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) |
818 | amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); |
819 | |
820 | return true; |
821 | } |
822 | |
823 | /** |
824 | * amdgpu_fence_free - free up the fence memory |
825 | * |
826 | * @rcu: RCU callback head |
827 | * |
828 | * Free up the fence memory after the RCU grace period. |
829 | */ |
830 | static void amdgpu_fence_free(struct rcu_head *rcu) |
831 | { |
832 | struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); |
833 | |
834 | /* free fence_slab if it's separated fence*/ |
835 | kmem_cache_free(s: amdgpu_fence_slab, objp: to_amdgpu_fence(f)); |
836 | } |
837 | |
838 | /** |
839 | * amdgpu_job_fence_free - free up the job with embedded fence |
840 | * |
841 | * @rcu: RCU callback head |
842 | * |
843 | * Free up the job with embedded fence after the RCU grace period. |
844 | */ |
845 | static void amdgpu_job_fence_free(struct rcu_head *rcu) |
846 | { |
847 | struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); |
848 | |
849 | /* free job if fence has a parent job */ |
850 | kfree(container_of(f, struct amdgpu_job, hw_fence)); |
851 | } |
852 | |
853 | /** |
854 | * amdgpu_fence_release - callback that fence can be freed |
855 | * |
856 | * @f: fence |
857 | * |
858 | * This function is called when the reference count becomes zero. |
859 | * It just RCU schedules freeing up the fence. |
860 | */ |
861 | static void amdgpu_fence_release(struct dma_fence *f) |
862 | { |
863 | call_rcu(head: &f->rcu, func: amdgpu_fence_free); |
864 | } |
865 | |
866 | /** |
867 | * amdgpu_job_fence_release - callback that job embedded fence can be freed |
868 | * |
869 | * @f: fence |
870 | * |
871 | * This is the simliar function with amdgpu_fence_release above, it |
872 | * only handles the job embedded fence. |
873 | */ |
874 | static void amdgpu_job_fence_release(struct dma_fence *f) |
875 | { |
876 | call_rcu(head: &f->rcu, func: amdgpu_job_fence_free); |
877 | } |
878 | |
879 | static const struct dma_fence_ops amdgpu_fence_ops = { |
880 | .get_driver_name = amdgpu_fence_get_driver_name, |
881 | .get_timeline_name = amdgpu_fence_get_timeline_name, |
882 | .enable_signaling = amdgpu_fence_enable_signaling, |
883 | .release = amdgpu_fence_release, |
884 | }; |
885 | |
886 | static const struct dma_fence_ops amdgpu_job_fence_ops = { |
887 | .get_driver_name = amdgpu_fence_get_driver_name, |
888 | .get_timeline_name = amdgpu_job_fence_get_timeline_name, |
889 | .enable_signaling = amdgpu_job_fence_enable_signaling, |
890 | .release = amdgpu_job_fence_release, |
891 | }; |
892 | |
893 | /* |
894 | * Fence debugfs |
895 | */ |
896 | #if defined(CONFIG_DEBUG_FS) |
897 | static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused) |
898 | { |
899 | struct amdgpu_device *adev = m->private; |
900 | int i; |
901 | |
902 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
903 | struct amdgpu_ring *ring = adev->rings[i]; |
904 | |
905 | if (!ring || !ring->fence_drv.initialized) |
906 | continue; |
907 | |
908 | amdgpu_fence_process(ring); |
909 | |
910 | seq_printf(m, fmt: "--- ring %d (%s) ---\n" , i, ring->name); |
911 | seq_printf(m, fmt: "Last signaled fence 0x%08x\n" , |
912 | atomic_read(v: &ring->fence_drv.last_seq)); |
913 | seq_printf(m, fmt: "Last emitted 0x%08x\n" , |
914 | ring->fence_drv.sync_seq); |
915 | |
916 | if (ring->funcs->type == AMDGPU_RING_TYPE_GFX || |
917 | ring->funcs->type == AMDGPU_RING_TYPE_SDMA) { |
918 | seq_printf(m, fmt: "Last signaled trailing fence 0x%08x\n" , |
919 | le32_to_cpu(*ring->trail_fence_cpu_addr)); |
920 | seq_printf(m, fmt: "Last emitted 0x%08x\n" , |
921 | ring->trail_seq); |
922 | } |
923 | |
924 | if (ring->funcs->type != AMDGPU_RING_TYPE_GFX) |
925 | continue; |
926 | |
927 | /* set in CP_VMID_PREEMPT and preemption occurred */ |
928 | seq_printf(m, fmt: "Last preempted 0x%08x\n" , |
929 | le32_to_cpu(*(ring->fence_drv.cpu_addr + 2))); |
930 | /* set in CP_VMID_RESET and reset occurred */ |
931 | seq_printf(m, fmt: "Last reset 0x%08x\n" , |
932 | le32_to_cpu(*(ring->fence_drv.cpu_addr + 4))); |
933 | /* Both preemption and reset occurred */ |
934 | seq_printf(m, fmt: "Last both 0x%08x\n" , |
935 | le32_to_cpu(*(ring->fence_drv.cpu_addr + 6))); |
936 | } |
937 | return 0; |
938 | } |
939 | |
940 | /* |
941 | * amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover |
942 | * |
943 | * Manually trigger a gpu reset at the next fence wait. |
944 | */ |
945 | static int gpu_recover_get(void *data, u64 *val) |
946 | { |
947 | struct amdgpu_device *adev = (struct amdgpu_device *)data; |
948 | struct drm_device *dev = adev_to_drm(adev); |
949 | int r; |
950 | |
951 | r = pm_runtime_get_sync(dev: dev->dev); |
952 | if (r < 0) { |
953 | pm_runtime_put_autosuspend(dev: dev->dev); |
954 | return 0; |
955 | } |
956 | |
957 | if (amdgpu_reset_domain_schedule(domain: adev->reset_domain, work: &adev->reset_work)) |
958 | flush_work(work: &adev->reset_work); |
959 | |
960 | *val = atomic_read(v: &adev->reset_domain->reset_res); |
961 | |
962 | pm_runtime_mark_last_busy(dev: dev->dev); |
963 | pm_runtime_put_autosuspend(dev: dev->dev); |
964 | |
965 | return 0; |
966 | } |
967 | |
968 | DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info); |
969 | DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL, |
970 | "%lld\n" ); |
971 | |
972 | static void amdgpu_debugfs_reset_work(struct work_struct *work) |
973 | { |
974 | struct amdgpu_device *adev = container_of(work, struct amdgpu_device, |
975 | reset_work); |
976 | |
977 | struct amdgpu_reset_context reset_context; |
978 | |
979 | memset(&reset_context, 0, sizeof(reset_context)); |
980 | |
981 | reset_context.method = AMD_RESET_METHOD_NONE; |
982 | reset_context.reset_req_dev = adev; |
983 | set_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context.flags); |
984 | |
985 | amdgpu_device_gpu_recover(adev, NULL, reset_context: &reset_context); |
986 | } |
987 | |
988 | #endif |
989 | |
990 | void amdgpu_debugfs_fence_init(struct amdgpu_device *adev) |
991 | { |
992 | #if defined(CONFIG_DEBUG_FS) |
993 | struct drm_minor *minor = adev_to_drm(adev)->primary; |
994 | struct dentry *root = minor->debugfs_root; |
995 | |
996 | debugfs_create_file(name: "amdgpu_fence_info" , mode: 0444, parent: root, data: adev, |
997 | fops: &amdgpu_debugfs_fence_info_fops); |
998 | |
999 | if (!amdgpu_sriov_vf(adev)) { |
1000 | |
1001 | INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work); |
1002 | debugfs_create_file(name: "amdgpu_gpu_recover" , mode: 0444, parent: root, data: adev, |
1003 | fops: &amdgpu_debugfs_gpu_recover_fops); |
1004 | } |
1005 | #endif |
1006 | } |
1007 | |
1008 | |