amdgpu_ih.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c]

1	/*
2	* Copyright 2014 Advanced Micro Devices, Inc.
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20	* OTHER DEALINGS IN THE SOFTWARE.
21	*
22	*/
23
24	#include <linux/dma-mapping.h>
25
26	#include "amdgpu.h"
27	#include "amdgpu_ih.h"
28	#include "amdgpu_reset.h"
29
30	/**
31	* amdgpu_ih_ring_init - initialize the IH state
32	*
33	* @adev: amdgpu_device pointer
34	* @ih: ih ring to initialize
35	* @ring_size: ring size to allocate
36	* @use_bus_addr: true when we can use dma_alloc_coherent
37	*
38	* Initializes the IH state and allocates a buffer
39	* for the IH ring buffer.
40	* Returns 0 for success, errors for failure.
41	*/
42	int amdgpu_ih_ring_init(struct amdgpu_device adev, struct* amdgpu_ih_ring *ih,
43	unsigned ring_size, bool use_bus_addr)
44	{
45	u32 rb_bufsz;
46	int r;
47
48	/ Align ring size /
49	rb_bufsz = order_base_2(ring_size / `4`);
50	ring_size = (`1` << rb_bufsz) * `4`;
51	ih->ring_size = ring_size;
52	ih->ptr_mask = ih->ring_size - `1`;
53	ih->rptr = `0`;
54	ih->use_bus_addr = use_bus_addr;
55
56	if (use_bus_addr) {
57	dma_addr_t dma_addr;
58
59	if (ih->ring)
60	return `0`;
61
62	/ add 8 bytes for the rptr/wptr shadows and*
63	* add them to the end of the ring allocation.
64	*/
65	ih->ring = dma_alloc_coherent(dev: adev->dev, size: ih->ring_size + `8`,
66	dma_handle: &dma_addr, GFP_KERNEL);
67	if (ih->ring == NULL)
68	return -ENOMEM;
69
70	ih->gpu_addr = dma_addr;
71	ih->wptr_addr = dma_addr + ih->ring_size;
72	ih->wptr_cpu = &ih->ring[ih->ring_size / `4`];
73	ih->rptr_addr = dma_addr + ih->ring_size + `4`;
74	ih->rptr_cpu = &ih->ring[(ih->ring_size / `4`) + `1`];
75	} else {
76	unsigned wptr_offs, rptr_offs;
77
78	r = amdgpu_device_wb_get(adev, wb: &wptr_offs);
79	if (r)
80	return r;
81
82	r = amdgpu_device_wb_get(adev, wb: &rptr_offs);
83	if (r) {
84	amdgpu_device_wb_free(adev, wb: wptr_offs);
85	return r;
86	}
87
88	r = amdgpu_bo_create_kernel(adev, size: ih->ring_size, PAGE_SIZE,
89	AMDGPU_GEM_DOMAIN_GTT,
90	bo_ptr: &ih->ring_obj, gpu_addr: &ih->gpu_addr,
91	cpu_addr: (void **)&ih->ring);
92	if (r) {
93	amdgpu_device_wb_free(adev, wb: rptr_offs);
94	amdgpu_device_wb_free(adev, wb: wptr_offs);
95	return r;
96	}
97
98	ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * `4`;
99	ih->wptr_cpu = &adev->wb.wb[wptr_offs];
100	ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * `4`;
101	ih->rptr_cpu = &adev->wb.wb[rptr_offs];
102	}
103
104	init_waitqueue_head(&ih->wait_process);
105	return `0`;
106	}
107
108	/**
109	* amdgpu_ih_ring_fini - tear down the IH state
110	*
111	* @adev: amdgpu_device pointer
112	* @ih: ih ring to tear down
113	*
114	* Tears down the IH state and frees buffer
115	* used for the IH ring buffer.
116	*/
117	void amdgpu_ih_ring_fini(struct amdgpu_device adev, struct* amdgpu_ih_ring *ih)
118	{
119
120	if (!ih->ring)
121	return;
122
123	if (ih->use_bus_addr) {
124
125	/ add 8 bytes for the rptr/wptr shadows and*
126	* add them to the end of the ring allocation.
127	*/
128	dma_free_coherent(dev: adev->dev, size: ih->ring_size + `8`,
129	cpu_addr: (void *)ih->ring, dma_handle: ih->gpu_addr);
130	ih->ring = NULL;
131	} else {
132	amdgpu_bo_free_kernel(bo: &ih->ring_obj, gpu_addr: &ih->gpu_addr,
133	cpu_addr: (void **)&ih->ring);
134	amdgpu_device_wb_free(adev, wb: (ih->wptr_addr - ih->gpu_addr) / `4`);
135	amdgpu_device_wb_free(adev, wb: (ih->rptr_addr - ih->gpu_addr) / `4`);
136	}
137	}
138
139	/**
140	* amdgpu_ih_ring_write - write IV to the ring buffer
141	*
142	* @adev: amdgpu_device pointer
143	* @ih: ih ring to write to
144	* @iv: the iv to write
145	* @num_dw: size of the iv in dw
146	*
147	* Writes an IV to the ring buffer using the CPU and increment the wptr.
148	* Used for testing and delegating IVs to a software ring.
149	*/
150	void amdgpu_ih_ring_write(struct amdgpu_device adev, struct* amdgpu_ih_ring *ih,
151	const uint32_t iv, unsigned* int num_dw)
152	{
153	uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> `2`;
154	unsigned int i;
155
156	for (i = `0`; i < num_dw; ++i)
157	ih->ring[wptr++] = cpu_to_le32(iv[i]);
158
159	wptr <<= `2`;
160	wptr &= ih->ptr_mask;
161
162	/ Only commit the new wptr if we don't overflow /
163	if (wptr != READ_ONCE(ih->rptr)) {
164	wmb();
165	WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
166	} else if (adev->irq.retry_cam_enabled) {
167	dev_warn_once(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
168	wptr, ih->rptr);
169	}
170	}
171
172	/**
173	* amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint
174	*
175	* @adev: amdgpu_device pointer
176	* @ih: ih ring to process
177	*
178	* Used to ensure ring has processed IVs up to the checkpoint write pointer.
179	*/
180	int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
181	struct amdgpu_ih_ring *ih)
182	{
183	uint32_t checkpoint_wptr;
184	uint64_t checkpoint_ts;
185	long timeout = HZ;
186
187	if (!ih->enabled \|\| adev->shutdown)
188	return -ENODEV;
189
190	checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
191	/ Order wptr with ring data. /
192	rmb();
193	checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -`1`);
194
195	return wait_event_interruptible_timeout(ih->wait_process,
196	amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) \|\|
197	ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout);
198	}
199
200	/**
201	* amdgpu_ih_process - interrupt handler
202	*
203	* @adev: amdgpu_device pointer
204	* @ih: ih ring to process
205	*
206	* Interrupt hander (VI), walk the IH ring.
207	* Returns irq process return code.
208	*/
209	int amdgpu_ih_process(struct amdgpu_device adev, struct* amdgpu_ih_ring *ih)
210	{
211	unsigned int count;
212	u32 wptr;
213
214	if (!ih->enabled \|\| adev->shutdown)
215	return IRQ_NONE;
216
217	wptr = amdgpu_ih_get_wptr(adev, ih);
218
219	restart_ih:
220	count = AMDGPU_IH_MAX_NUM_IVS;
221	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
222
223	/ Order reading of wptr vs. reading of IH ring data /
224	rmb();
225
226	while (ih->rptr != wptr && --count) {
227	amdgpu_irq_dispatch(adev, ih);
228	ih->rptr &= ih->ptr_mask;
229	}
230
231	if (!ih->overflow)
232	amdgpu_ih_set_rptr(adev, ih);
233
234	wake_up_all(&ih->wait_process);
235
236	/ make sure wptr hasn't changed while processing /
237	wptr = amdgpu_ih_get_wptr(adev, ih);
238	if (wptr != ih->rptr)
239	if (!ih->overflow)
240	goto restart_ih;
241
242	if (ih->overflow)
243	if (amdgpu_sriov_runtime(adev))
244	WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
245	&adev->virt.flr_work),
246	"Failed to queue work! at %s",
247	__func__);
248
249	return IRQ_HANDLED;
250	}
251
252	/**
253	* amdgpu_ih_decode_iv_helper - decode an interrupt vector
254	*
255	* @adev: amdgpu_device pointer
256	* @ih: ih ring to process
257	* @entry: IV entry
258	*
259	* Decodes the interrupt vector at the current rptr
260	* position and also advance the position for Vega10
261	* and later GPUs.
262	*/
263	void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
264	struct amdgpu_ih_ring *ih,
265	struct amdgpu_iv_entry *entry)
266	{
267	/ wptr/rptr are in bytes! /
268	u32 ring_index = ih->rptr >> `2`;
269	uint32_t dw[`8`];
270
271	dw[`0`] = le32_to_cpu(ih->ring[ring_index + `0`]);
272	dw[`1`] = le32_to_cpu(ih->ring[ring_index + `1`]);
273	dw[`2`] = le32_to_cpu(ih->ring[ring_index + `2`]);
274	dw[`3`] = le32_to_cpu(ih->ring[ring_index + `3`]);
275	dw[`4`] = le32_to_cpu(ih->ring[ring_index + `4`]);
276	dw[`5`] = le32_to_cpu(ih->ring[ring_index + `5`]);
277	dw[`6`] = le32_to_cpu(ih->ring[ring_index + `6`]);
278	dw[`7`] = le32_to_cpu(ih->ring[ring_index + `7`]);
279
280	entry->client_id = dw[`0`] & `0xff`;
281	entry->src_id = (dw[`0`] >> `8`) & `0xff`;
282	entry->ring_id = (dw[`0`] >> `16`) & `0xff`;
283	entry->vmid = (dw[`0`] >> `24`) & `0xf`;
284	entry->vmid_src = (dw[`0`] >> `31`);
285	entry->timestamp = dw[`1`] \| ((u64)(dw[`2`] & `0xffff`) << `32`);
286	entry->timestamp_src = dw[`2`] >> `31`;
287	entry->pasid = dw[`3`] & `0xffff`;
288	entry->node_id = (dw[`3`] >> `16`) & `0xff`;
289	entry->src_data[`0`] = dw[`4`];
290	entry->src_data[`1`] = dw[`5`];
291	entry->src_data[`2`] = dw[`6`];
292	entry->src_data[`3`] = dw[`7`];
293
294	/ wptr/rptr are in bytes! /
295	ih->rptr += `32`;
296	}
297
298	uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
299	signed int offset)
300	{
301	uint32_t iv_size = `32`;
302	uint32_t ring_index;
303	uint32_t dw1, dw2;
304
305	rptr += iv_size * offset;
306	ring_index = (rptr & ih->ptr_mask) >> `2`;
307
308	dw1 = le32_to_cpu(ih->ring[ring_index + `1`]);
309	dw2 = le32_to_cpu(ih->ring[ring_index + `2`]);
310	return dw1 \| ((u64)(dw2 & `0xffff`) << `32`);
311	}
312
313	const char amdgpu_ih_ring_name(struct* amdgpu_device adev, struct* amdgpu_ih_ring *ih)
314	{
315	return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" :
316	ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown";
317	}
318

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c