amdgpu_ring_mux.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c]

1	/*
2	* Copyright 2022 Advanced Micro Devices, Inc.
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20	* OTHER DEALINGS IN THE SOFTWARE.
21	*
22	*/
23	#include <linux/slab.h>
24	#include <drm/drm_print.h>
25
26	#include "amdgpu_ring_mux.h"
27	#include "amdgpu_ring.h"
28	#include "amdgpu.h"
29
30	#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
31	#define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000
32
33	static const struct ring_info {
34	unsigned int hw_pio;
35	const char *ring_name;
36	} sw_ring_info[] = {
37	{ AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
38	{ AMDGPU_RING_PRIO_2, "gfx_high"},
39	};
40
41	static struct kmem_cache *amdgpu_mux_chunk_slab;
42
43	static inline struct amdgpu_mux_entry amdgpu_ring_mux_sw_entry(struct* amdgpu_ring_mux *mux,
44	struct amdgpu_ring *ring)
45	{
46	return ring->entry_index < mux->ring_entry_size ?
47	&mux->ring_entry[ring->entry_index] : NULL;
48	}
49
50	/ copy packages on sw ring range[begin, end) /
51	static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
52	struct amdgpu_ring *ring,
53	u64 s_start, u64 s_end)
54	{
55	u64 start, end;
56	struct amdgpu_ring *real_ring = mux->real_ring;
57
58	start = s_start & ring->buf_mask;
59	end = s_end & ring->buf_mask;
60
61	if (start == end) {
62	DRM_ERROR("no more data copied from sw ring\n");
63	return;
64	}
65	if (start > end) {
66	amdgpu_ring_alloc(ring: real_ring, ndw: (ring->ring_size >> `2`) + end - start);
67	amdgpu_ring_write_multiple(ring: real_ring, src: (void *)&ring->ring[start],
68	count_dw: (ring->ring_size >> `2`) - start);
69	amdgpu_ring_write_multiple(ring: real_ring, src: (void *)&ring->ring[`0`], count_dw: end);
70	} else {
71	amdgpu_ring_alloc(ring: real_ring, ndw: end - start);
72	amdgpu_ring_write_multiple(ring: real_ring, src: (void *)&ring->ring[start], count_dw: end - start);
73	}
74	}
75
76	static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
77	{
78	struct amdgpu_mux_entry *e = NULL;
79	struct amdgpu_mux_chunk *chunk;
80	uint32_t seq, last_seq;
81	int i;
82
83	/find low priority entries:/
84	if (!mux->s_resubmit)
85	return;
86
87	for (i = `0`; i < mux->num_ring_entries; i++) {
88	if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
89	e = &mux->ring_entry[i];
90	break;
91	}
92	}
93
94	if (!e) {
95	DRM_ERROR("%s no low priority ring found\n", __func__);
96	return;
97	}
98
99	last_seq = atomic_read(v: &e->ring->fence_drv.last_seq);
100	seq = mux->seqno_to_resubmit;
101	if (last_seq < seq) {
102	/resubmit all the fences between (last_seq, seq]/
103	list_for_each_entry(chunk, &e->list, entry) {
104	if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) {
105	amdgpu_fence_update_start_timestamp(ring: e->ring,
106	seq: chunk->sync_seq,
107	timestamp: ktime_get());
108	if (chunk->sync_seq ==
109	le32_to_cpu(*(e->ring->fence_drv.cpu_addr + `2`))) {
110	if (chunk->cntl_offset <= e->ring->buf_mask)
111	amdgpu_ring_patch_cntl(e->ring,
112	chunk->cntl_offset);
113	if (chunk->ce_offset <= e->ring->buf_mask)
114	amdgpu_ring_patch_ce(e->ring, chunk->ce_offset);
115	if (chunk->de_offset <= e->ring->buf_mask)
116	amdgpu_ring_patch_de(e->ring, chunk->de_offset);
117	}
118	amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring: e->ring,
119	s_start: chunk->start,
120	s_end: chunk->end);
121	mux->wptr_resubmit = chunk->end;
122	amdgpu_ring_commit(ring: mux->real_ring);
123	}
124	}
125	}
126
127	del_timer(timer: &mux->resubmit_timer);
128	mux->s_resubmit = false;
129	}
130
131	static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
132	{
133	mod_timer(timer: &mux->resubmit_timer, expires: jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
134	}
135
136	static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
137	{
138	struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
139
140	if (!spin_trylock(lock: &mux->lock)) {
141	amdgpu_ring_mux_schedule_resubmit(mux);
142	DRM_ERROR("reschedule resubmit\n");
143	return;
144	}
145	amdgpu_mux_resubmit_chunks(mux);
146	spin_unlock(lock: &mux->lock);
147	}
148
149	int amdgpu_ring_mux_init(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring,
150	unsigned int entry_size)
151	{
152	mux->real_ring = ring;
153	mux->num_ring_entries = `0`;
154
155	mux->ring_entry = kcalloc(n: entry_size, size: sizeof(struct amdgpu_mux_entry), GFP_KERNEL);
156	if (!mux->ring_entry)
157	return -ENOMEM;
158
159	mux->ring_entry_size = entry_size;
160	mux->s_resubmit = false;
161
162	amdgpu_mux_chunk_slab = KMEM_CACHE(amdgpu_mux_chunk, SLAB_HWCACHE_ALIGN);
163	if (!amdgpu_mux_chunk_slab) {
164	DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
165	return -ENOMEM;
166	}
167
168	spin_lock_init(&mux->lock);
169	timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, `0`);
170
171	return `0`;
172	}
173
174	void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
175	{
176	struct amdgpu_mux_entry *e;
177	struct amdgpu_mux_chunk chunk, chunk2;
178	int i;
179
180	for (i = `0`; i < mux->num_ring_entries; i++) {
181	e = &mux->ring_entry[i];
182	list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
183	list_del(entry: &chunk->entry);
184	kmem_cache_free(s: amdgpu_mux_chunk_slab, objp: chunk);
185	}
186	}
187	kmem_cache_destroy(s: amdgpu_mux_chunk_slab);
188	kfree(objp: mux->ring_entry);
189	mux->ring_entry = NULL;
190	mux->num_ring_entries = `0`;
191	mux->ring_entry_size = `0`;
192	}
193
194	int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring)
195	{
196	struct amdgpu_mux_entry *e;
197
198	if (mux->num_ring_entries >= mux->ring_entry_size) {
199	DRM_ERROR("add sw ring exceeding max entry size\n");
200	return -ENOENT;
201	}
202
203	e = &mux->ring_entry[mux->num_ring_entries];
204	ring->entry_index = mux->num_ring_entries;
205	e->ring = ring;
206
207	INIT_LIST_HEAD(list: &e->list);
208	mux->num_ring_entries += `1`;
209	return `0`;
210	}
211
212	void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring, u64 wptr)
213	{
214	struct amdgpu_mux_entry *e;
215
216	spin_lock(lock: &mux->lock);
217
218	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT)
219	amdgpu_mux_resubmit_chunks(mux);
220
221	e = amdgpu_ring_mux_sw_entry(mux, ring);
222	if (!e) {
223	DRM_ERROR("cannot find entry for sw ring\n");
224	spin_unlock(lock: &mux->lock);
225	return;
226	}
227
228	/ We could skip this set wptr as preemption in process. /
229	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) {
230	spin_unlock(lock: &mux->lock);
231	return;
232	}
233
234	e->sw_cptr = e->sw_wptr;
235	/ Update cptr if the package already copied in resubmit functions /
236	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit)
237	e->sw_cptr = mux->wptr_resubmit;
238	e->sw_wptr = wptr;
239	e->start_ptr_in_hw_ring = mux->real_ring->wptr;
240
241	/ Skip copying for the packages already resubmitted./
242	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT \|\| mux->wptr_resubmit < wptr) {
243	amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, s_start: e->sw_cptr, s_end: wptr);
244	e->end_ptr_in_hw_ring = mux->real_ring->wptr;
245	amdgpu_ring_commit(ring: mux->real_ring);
246	} else {
247	e->end_ptr_in_hw_ring = mux->real_ring->wptr;
248	}
249	spin_unlock(lock: &mux->lock);
250	}
251
252	u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring)
253	{
254	struct amdgpu_mux_entry *e;
255
256	e = amdgpu_ring_mux_sw_entry(mux, ring);
257	if (!e) {
258	DRM_ERROR("cannot find entry for sw ring\n");
259	return `0`;
260	}
261
262	return e->sw_wptr;
263	}
264
265	/**
266	* amdgpu_ring_mux_get_rptr - get the readptr of the software ring
267	* @mux: the multiplexer the software rings attach to
268	* @ring: the software ring of which we calculate the readptr
269	*
270	* The return value of the readptr is not precise while the other rings could
271	* write data onto the real ring buffer.After overwriting on the real ring, we
272	* can not decide if our packages have been excuted or not read yet. However,
273	* this function is only called by the tools such as umr to collect the latest
274	* packages for the hang analysis. We assume the hang happens near our latest
275	* submit. Thus we could use the following logic to give the clue:
276	* If the readptr is between start and end, then we return the copy pointer
277	* plus the distance from start to readptr. If the readptr is before start, we
278	* return the copy pointer. Lastly, if the readptr is past end, we return the
279	* write pointer.
280	*/
281	u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring)
282	{
283	struct amdgpu_mux_entry *e;
284	u64 readp, offset, start, end;
285
286	e = amdgpu_ring_mux_sw_entry(mux, ring);
287	if (!e) {
288	DRM_ERROR("no sw entry found!\n");
289	return `0`;
290	}
291
292	readp = amdgpu_ring_get_rptr(mux->real_ring);
293
294	start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
295	end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
296	if (start > end) {
297	if (readp <= end)
298	readp += mux->real_ring->ring_size >> `2`;
299	end += mux->real_ring->ring_size >> `2`;
300	}
301
302	if (start <= readp && readp <= end) {
303	offset = readp - start;
304	e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
305	} else if (readp < start) {
306	e->sw_rptr = e->sw_cptr;
307	} else {
308	/ end < readptr /
309	e->sw_rptr = e->sw_wptr;
310	}
311
312	return e->sw_rptr;
313	}
314
315	u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
316	{
317	struct amdgpu_device *adev = ring->adev;
318	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
319
320	WARN_ON(!ring->is_sw_ring);
321	return amdgpu_ring_mux_get_rptr(mux, ring);
322	}
323
324	u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
325	{
326	struct amdgpu_device *adev = ring->adev;
327	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
328
329	WARN_ON(!ring->is_sw_ring);
330	return amdgpu_ring_mux_get_wptr(mux, ring);
331	}
332
333	void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
334	{
335	struct amdgpu_device *adev = ring->adev;
336	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
337
338	WARN_ON(!ring->is_sw_ring);
339	amdgpu_ring_mux_set_wptr(mux, ring, wptr: ring->wptr);
340	}
341
342	/ Override insert_nop to prevent emitting nops to the software rings /
343	void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
344	{
345	WARN_ON(!ring->is_sw_ring);
346	}
347
348	const char amdgpu_sw_ring_name(int* idx)
349	{
350	return idx < ARRAY_SIZE(sw_ring_info) ?
351	sw_ring_info[idx].ring_name : NULL;
352	}
353
354	unsigned int amdgpu_sw_ring_priority(int idx)
355	{
356	return idx < ARRAY_SIZE(sw_ring_info) ?
357	sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
358	}
359
360	/Scan on low prio rings to have unsignaled fence and high ring has no fence./
361	static int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
362	{
363	struct amdgpu_ring *ring;
364	int i, need_preempt;
365
366	need_preempt = `0`;
367	for (i = `0`; i < mux->num_ring_entries; i++) {
368	ring = mux->ring_entry[i].ring;
369	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT &&
370	amdgpu_fence_count_emitted(ring) > `0`)
371	return `0`;
372	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT &&
373	amdgpu_fence_last_unsignaled_time_us(ring) >
374	AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US)
375	need_preempt = `1`;
376	}
377	return need_preempt && !mux->s_resubmit;
378	}
379
380	/ Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. /
381	static int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
382	{
383	int r;
384
385	spin_lock(lock: &mux->lock);
386	mux->pending_trailing_fence_signaled = true;
387	r = amdgpu_ring_preempt_ib(mux->real_ring);
388	spin_unlock(lock: &mux->lock);
389	return r;
390	}
391
392	void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
393	{
394	struct amdgpu_device *adev = ring->adev;
395	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
396
397	WARN_ON(!ring->is_sw_ring);
398	if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
399	if (amdgpu_mcbp_scan(mux) > `0`)
400	amdgpu_mcbp_trigger_preempt(mux);
401	return;
402	}
403
404	amdgpu_ring_mux_start_ib(mux, ring);
405	}
406
407	void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
408	{
409	struct amdgpu_device *adev = ring->adev;
410	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
411
412	WARN_ON(!ring->is_sw_ring);
413	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
414	return;
415	amdgpu_ring_mux_end_ib(mux, ring);
416	}
417
418	void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring ring, enum* amdgpu_ring_mux_offset_type type)
419	{
420	struct amdgpu_device *adev = ring->adev;
421	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
422	unsigned offset;
423
424	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
425	return;
426
427	offset = ring->wptr & ring->buf_mask;
428
429	amdgpu_ring_mux_ib_mark_offset(mux, ring, offset, type);
430	}
431
432	void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring)
433	{
434	struct amdgpu_mux_entry *e;
435	struct amdgpu_mux_chunk *chunk;
436
437	spin_lock(lock: &mux->lock);
438	amdgpu_mux_resubmit_chunks(mux);
439	spin_unlock(lock: &mux->lock);
440
441	e = amdgpu_ring_mux_sw_entry(mux, ring);
442	if (!e) {
443	DRM_ERROR("cannot find entry!\n");
444	return;
445	}
446
447	chunk = kmem_cache_alloc(cachep: amdgpu_mux_chunk_slab, GFP_KERNEL);
448	if (!chunk) {
449	DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
450	return;
451	}
452
453	chunk->start = ring->wptr;
454	/ the initialized value used to check if they are set by the ib submission/
455	chunk->cntl_offset = ring->buf_mask + `1`;
456	chunk->de_offset = ring->buf_mask + `1`;
457	chunk->ce_offset = ring->buf_mask + `1`;
458	list_add_tail(new: &chunk->entry, head: &e->list);
459	}
460
461	static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring)
462	{
463	uint32_t last_seq = `0`;
464	struct amdgpu_mux_entry *e;
465	struct amdgpu_mux_chunk chunk, tmp;
466
467	e = amdgpu_ring_mux_sw_entry(mux, ring);
468	if (!e) {
469	DRM_ERROR("cannot find entry!\n");
470	return;
471	}
472
473	last_seq = atomic_read(v: &ring->fence_drv.last_seq);
474
475	list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
476	if (chunk->sync_seq <= last_seq) {
477	list_del(entry: &chunk->entry);
478	kmem_cache_free(s: amdgpu_mux_chunk_slab, objp: chunk);
479	}
480	}
481	}
482
483	void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux,
484	struct amdgpu_ring *ring, u64 offset,
485	enum amdgpu_ring_mux_offset_type type)
486	{
487	struct amdgpu_mux_entry *e;
488	struct amdgpu_mux_chunk *chunk;
489
490	e = amdgpu_ring_mux_sw_entry(mux, ring);
491	if (!e) {
492	DRM_ERROR("cannot find entry!\n");
493	return;
494	}
495
496	chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
497	if (!chunk) {
498	DRM_ERROR("cannot find chunk!\n");
499	return;
500	}
501
502	switch (type) {
503	case AMDGPU_MUX_OFFSET_TYPE_CONTROL:
504	chunk->cntl_offset = offset;
505	break;
506	case AMDGPU_MUX_OFFSET_TYPE_DE:
507	chunk->de_offset = offset;
508	break;
509	case AMDGPU_MUX_OFFSET_TYPE_CE:
510	chunk->ce_offset = offset;
511	break;
512	default:
513	DRM_ERROR("invalid type (%d)\n", type);
514	break;
515	}
516	}
517
518	void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring)
519	{
520	struct amdgpu_mux_entry *e;
521	struct amdgpu_mux_chunk *chunk;
522
523	e = amdgpu_ring_mux_sw_entry(mux, ring);
524	if (!e) {
525	DRM_ERROR("cannot find entry!\n");
526	return;
527	}
528
529	chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
530	if (!chunk) {
531	DRM_ERROR("cannot find chunk!\n");
532	return;
533	}
534
535	chunk->end = ring->wptr;
536	chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
537
538	scan_and_remove_signaled_chunk(mux, ring);
539	}
540
541	bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux)
542	{
543	struct amdgpu_mux_entry *e;
544	struct amdgpu_ring *ring = NULL;
545	int i;
546
547	if (!mux->pending_trailing_fence_signaled)
548	return false;
549
550	if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr))
551	return false;
552
553	for (i = `0`; i < mux->num_ring_entries; i++) {
554	e = &mux->ring_entry[i];
555	if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
556	ring = e->ring;
557	break;
558	}
559	}
560
561	if (!ring) {
562	DRM_ERROR("cannot find low priority ring\n");
563	return false;
564	}
565
566	amdgpu_fence_process(ring);
567	if (amdgpu_fence_count_emitted(ring) > `0`) {
568	mux->s_resubmit = true;
569	mux->seqno_to_resubmit = ring->fence_drv.sync_seq;
570	amdgpu_ring_mux_schedule_resubmit(mux);
571	}
572
573	mux->pending_trailing_fence_signaled = false;
574	return true;
575	}
576

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c