amdgpu_vce.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c]

1	/*
2	* Copyright 2013 Advanced Micro Devices, Inc.
3	* All Rights Reserved.
4	*
5	* Permission is hereby granted, free of charge, to any person obtaining a
6	* copy of this software and associated documentation files (the
7	* "Software"), to deal in the Software without restriction, including
8	* without limitation the rights to use, copy, modify, merge, publish,
9	* distribute, sub license, and/or sell copies of the Software, and to
10	* permit persons to whom the Software is furnished to do so, subject to
11	* the following conditions:
12	*
13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15	* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16	* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17	* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18	* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19	* USE OR OTHER DEALINGS IN THE SOFTWARE.
20	*
21	* The above copyright notice and this permission notice (including the
22	* next paragraph) shall be included in all copies or substantial portions
23	* of the Software.
24	*
25	* Authors: Christian König <christian.koenig@amd.com>
26	*/
27
28	#include <linux/firmware.h>
29	#include <linux/module.h>
30
31	#include <drm/drm.h>
32	#include <drm/drm_drv.h>
33
34	#include "amdgpu.h"
35	#include "amdgpu_pm.h"
36	#include "amdgpu_vce.h"
37	#include "amdgpu_cs.h"
38	#include "cikd.h"
39
40	/ 1 second timeout /
41	#define VCE_IDLE_TIMEOUT msecs_to_jiffies(1000)
42
43	/ Firmware Names /
44	#ifdef CONFIG_DRM_AMDGPU_CIK
45	#define FIRMWARE_BONAIRE "amdgpu/bonaire_vce.bin"
46	#define FIRMWARE_KABINI "amdgpu/kabini_vce.bin"
47	#define FIRMWARE_KAVERI "amdgpu/kaveri_vce.bin"
48	#define FIRMWARE_HAWAII "amdgpu/hawaii_vce.bin"
49	#define FIRMWARE_MULLINS "amdgpu/mullins_vce.bin"
50	#endif
51	#define FIRMWARE_TONGA "amdgpu/tonga_vce.bin"
52	#define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin"
53	#define FIRMWARE_FIJI "amdgpu/fiji_vce.bin"
54	#define FIRMWARE_STONEY "amdgpu/stoney_vce.bin"
55	#define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin"
56	#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin"
57	#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin"
58	#define FIRMWARE_VEGAM "amdgpu/vegam_vce.bin"
59
60	#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin"
61	#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin"
62	#define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin"
63
64	#ifdef CONFIG_DRM_AMDGPU_CIK
65	MODULE_FIRMWARE(FIRMWARE_BONAIRE);
66	MODULE_FIRMWARE(FIRMWARE_KABINI);
67	MODULE_FIRMWARE(FIRMWARE_KAVERI);
68	MODULE_FIRMWARE(FIRMWARE_HAWAII);
69	MODULE_FIRMWARE(FIRMWARE_MULLINS);
70	#endif
71	MODULE_FIRMWARE(FIRMWARE_TONGA);
72	MODULE_FIRMWARE(FIRMWARE_CARRIZO);
73	MODULE_FIRMWARE(FIRMWARE_FIJI);
74	MODULE_FIRMWARE(FIRMWARE_STONEY);
75	MODULE_FIRMWARE(FIRMWARE_POLARIS10);
76	MODULE_FIRMWARE(FIRMWARE_POLARIS11);
77	MODULE_FIRMWARE(FIRMWARE_POLARIS12);
78	MODULE_FIRMWARE(FIRMWARE_VEGAM);
79
80	MODULE_FIRMWARE(FIRMWARE_VEGA10);
81	MODULE_FIRMWARE(FIRMWARE_VEGA12);
82	MODULE_FIRMWARE(FIRMWARE_VEGA20);
83
84	static void amdgpu_vce_idle_work_handler(struct work_struct *work);
85	static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
86	struct dma_fence **fence);
87	static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
88	bool direct, struct dma_fence **fence);
89
90	/**
91	* amdgpu_vce_sw_init - allocate memory, load vce firmware
92	*
93	* @adev: amdgpu_device pointer
94	* @size: size for the new BO
95	*
96	* First step to get VCE online, allocate memory and load the firmware
97	*/
98	int amdgpu_vce_sw_init(struct amdgpu_device adev, unsigned* long size)
99	{
100	const char *fw_name;
101	const struct common_firmware_header *hdr;
102	unsigned int ucode_version, version_major, version_minor, binary_id;
103	int i, r;
104
105	switch (adev->asic_type) {
106	#ifdef CONFIG_DRM_AMDGPU_CIK
107	case CHIP_BONAIRE:
108	fw_name = FIRMWARE_BONAIRE;
109	break;
110	case CHIP_KAVERI:
111	fw_name = FIRMWARE_KAVERI;
112	break;
113	case CHIP_KABINI:
114	fw_name = FIRMWARE_KABINI;
115	break;
116	case CHIP_HAWAII:
117	fw_name = FIRMWARE_HAWAII;
118	break;
119	case CHIP_MULLINS:
120	fw_name = FIRMWARE_MULLINS;
121	break;
122	#endif
123	case CHIP_TONGA:
124	fw_name = FIRMWARE_TONGA;
125	break;
126	case CHIP_CARRIZO:
127	fw_name = FIRMWARE_CARRIZO;
128	break;
129	case CHIP_FIJI:
130	fw_name = FIRMWARE_FIJI;
131	break;
132	case CHIP_STONEY:
133	fw_name = FIRMWARE_STONEY;
134	break;
135	case CHIP_POLARIS10:
136	fw_name = FIRMWARE_POLARIS10;
137	break;
138	case CHIP_POLARIS11:
139	fw_name = FIRMWARE_POLARIS11;
140	break;
141	case CHIP_POLARIS12:
142	fw_name = FIRMWARE_POLARIS12;
143	break;
144	case CHIP_VEGAM:
145	fw_name = FIRMWARE_VEGAM;
146	break;
147	case CHIP_VEGA10:
148	fw_name = FIRMWARE_VEGA10;
149	break;
150	case CHIP_VEGA12:
151	fw_name = FIRMWARE_VEGA12;
152	break;
153	case CHIP_VEGA20:
154	fw_name = FIRMWARE_VEGA20;
155	break;
156
157	default:
158	return -EINVAL;
159	}
160
161	r = amdgpu_ucode_request(adev, fw: &adev->vce.fw, fw_name);
162	if (r) {
163	dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
164	fw_name);
165	amdgpu_ucode_release(fw: &adev->vce.fw);
166	return r;
167	}
168
169	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
170
171	ucode_version = le32_to_cpu(hdr->ucode_version);
172	version_major = (ucode_version >> `20`) & `0xfff`;
173	version_minor = (ucode_version >> `8`) & `0xfff`;
174	binary_id = ucode_version & `0xff`;
175	DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n",
176	version_major, version_minor, binary_id);
177	adev->vce.fw_version = ((version_major << `24`) \| (version_minor << `16`) \|
178	(binary_id << `8`));
179
180	r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
181	AMDGPU_GEM_DOMAIN_VRAM \|
182	AMDGPU_GEM_DOMAIN_GTT,
183	bo_ptr: &adev->vce.vcpu_bo,
184	gpu_addr: &adev->vce.gpu_addr, cpu_addr: &adev->vce.cpu_addr);
185	if (r) {
186	dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
187	return r;
188	}
189
190	for (i = `0`; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
191	atomic_set(v: &adev->vce.handles[i], i: `0`);
192	adev->vce.filp[i] = NULL;
193	}
194
195	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
196	mutex_init(&adev->vce.idle_mutex);
197
198	return `0`;
199	}
200
201	/**
202	* amdgpu_vce_sw_fini - free memory
203	*
204	* @adev: amdgpu_device pointer
205	*
206	* Last step on VCE teardown, free firmware memory
207	*/
208	int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
209	{
210	unsigned int i;
211
212	if (adev->vce.vcpu_bo == NULL)
213	return `0`;
214
215	drm_sched_entity_destroy(entity: &adev->vce.entity);
216
217	amdgpu_bo_free_kernel(bo: &adev->vce.vcpu_bo, gpu_addr: &adev->vce.gpu_addr,
218	cpu_addr: (void **)&adev->vce.cpu_addr);
219
220	for (i = `0`; i < adev->vce.num_rings; i++)
221	amdgpu_ring_fini(ring: &adev->vce.ring[i]);
222
223	amdgpu_ucode_release(fw: &adev->vce.fw);
224	mutex_destroy(lock: &adev->vce.idle_mutex);
225
226	return `0`;
227	}
228
229	/**
230	* amdgpu_vce_entity_init - init entity
231	*
232	* @adev: amdgpu_device pointer
233	* @ring: amdgpu_ring pointer to check
234	*
235	* Initialize the entity used for handle management in the kernel driver.
236	*/
237	int amdgpu_vce_entity_init(struct amdgpu_device adev, struct* amdgpu_ring *ring)
238	{
239	if (ring == &adev->vce.ring[`0`]) {
240	struct drm_gpu_scheduler *sched = &ring->sched;
241	int r;
242
243	r = drm_sched_entity_init(entity: &adev->vce.entity, priority: DRM_SCHED_PRIORITY_NORMAL,
244	sched_list: &sched, num_sched_list: `1`, NULL);
245	if (r != `0`) {
246	DRM_ERROR("Failed setting up VCE run queue.\n");
247	return r;
248	}
249	}
250
251	return `0`;
252	}
253
254	/**
255	* amdgpu_vce_suspend - unpin VCE fw memory
256	*
257	* @adev: amdgpu_device pointer
258	*
259	*/
260	int amdgpu_vce_suspend(struct amdgpu_device *adev)
261	{
262	int i;
263
264	cancel_delayed_work_sync(dwork: &adev->vce.idle_work);
265
266	if (adev->vce.vcpu_bo == NULL)
267	return `0`;
268
269	for (i = `0`; i < AMDGPU_MAX_VCE_HANDLES; ++i)
270	if (atomic_read(v: &adev->vce.handles[i]))
271	break;
272
273	if (i == AMDGPU_MAX_VCE_HANDLES)
274	return `0`;
275
276	/ TODO: suspending running encoding sessions isn't supported /
277	return -EINVAL;
278	}
279
280	/**
281	* amdgpu_vce_resume - pin VCE fw memory
282	*
283	* @adev: amdgpu_device pointer
284	*
285	*/
286	int amdgpu_vce_resume(struct amdgpu_device *adev)
287	{
288	void *cpu_addr;
289	const struct common_firmware_header *hdr;
290	unsigned int offset;
291	int r, idx;
292
293	if (adev->vce.vcpu_bo == NULL)
294	return -EINVAL;
295
296	r = amdgpu_bo_reserve(bo: adev->vce.vcpu_bo, no_intr: false);
297	if (r) {
298	dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
299	return r;
300	}
301
302	r = amdgpu_bo_kmap(bo: adev->vce.vcpu_bo, ptr: &cpu_addr);
303	if (r) {
304	amdgpu_bo_unreserve(bo: adev->vce.vcpu_bo);
305	dev_err(adev->dev, "(%d) VCE map failed\n", r);
306	return r;
307	}
308
309	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
310	offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
311
312	if (drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) {
313	memcpy_toio(cpu_addr, adev->vce.fw->data + offset,
314	adev->vce.fw->size - offset);
315	drm_dev_exit(idx);
316	}
317
318	amdgpu_bo_kunmap(bo: adev->vce.vcpu_bo);
319
320	amdgpu_bo_unreserve(bo: adev->vce.vcpu_bo);
321
322	return `0`;
323	}
324
325	/**
326	* amdgpu_vce_idle_work_handler - power off VCE
327	*
328	* @work: pointer to work structure
329	*
330	* power of VCE when it's not used any more
331	*/
332	static void amdgpu_vce_idle_work_handler(struct work_struct *work)
333	{
334	struct amdgpu_device *adev =
335	container_of(work, struct amdgpu_device, vce.idle_work.work);
336	unsigned int i, count = `0`;
337
338	for (i = `0`; i < adev->vce.num_rings; i++)
339	count += amdgpu_fence_count_emitted(ring: &adev->vce.ring[i]);
340
341	if (count == `0`) {
342	if (adev->pm.dpm_enabled) {
343	amdgpu_dpm_enable_vce(adev, enable: false);
344	} else {
345	amdgpu_asic_set_vce_clocks(adev, `0`, `0`);
346	amdgpu_device_ip_set_powergating_state(dev: adev, block_type: AMD_IP_BLOCK_TYPE_VCE,
347	state: AMD_PG_STATE_GATE);
348	amdgpu_device_ip_set_clockgating_state(dev: adev, block_type: AMD_IP_BLOCK_TYPE_VCE,
349	state: AMD_CG_STATE_GATE);
350	}
351	} else {
352	schedule_delayed_work(dwork: &adev->vce.idle_work, VCE_IDLE_TIMEOUT);
353	}
354	}
355
356	/**
357	* amdgpu_vce_ring_begin_use - power up VCE
358	*
359	* @ring: amdgpu ring
360	*
361	* Make sure VCE is powerd up when we want to use it
362	*/
363	void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
364	{
365	struct amdgpu_device *adev = ring->adev;
366	bool set_clocks;
367
368	if (amdgpu_sriov_vf(adev))
369	return;
370
371	mutex_lock(&adev->vce.idle_mutex);
372	set_clocks = !cancel_delayed_work_sync(dwork: &adev->vce.idle_work);
373	if (set_clocks) {
374	if (adev->pm.dpm_enabled) {
375	amdgpu_dpm_enable_vce(adev, enable: true);
376	} else {
377	amdgpu_asic_set_vce_clocks(adev, `53300`, `40000`);
378	amdgpu_device_ip_set_clockgating_state(dev: adev, block_type: AMD_IP_BLOCK_TYPE_VCE,
379	state: AMD_CG_STATE_UNGATE);
380	amdgpu_device_ip_set_powergating_state(dev: adev, block_type: AMD_IP_BLOCK_TYPE_VCE,
381	state: AMD_PG_STATE_UNGATE);
382
383	}
384	}
385	mutex_unlock(lock: &adev->vce.idle_mutex);
386	}
387
388	/**
389	* amdgpu_vce_ring_end_use - power VCE down
390	*
391	* @ring: amdgpu ring
392	*
393	* Schedule work to power VCE down again
394	*/
395	void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
396	{
397	if (!amdgpu_sriov_vf(ring->adev))
398	schedule_delayed_work(dwork: &ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
399	}
400
401	/**
402	* amdgpu_vce_free_handles - free still open VCE handles
403	*
404	* @adev: amdgpu_device pointer
405	* @filp: drm file pointer
406	*
407	* Close all VCE handles still open by this file pointer
408	*/
409	void amdgpu_vce_free_handles(struct amdgpu_device adev, struct* drm_file *filp)
410	{
411	struct amdgpu_ring *ring = &adev->vce.ring[`0`];
412	int i, r;
413
414	for (i = `0`; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
415	uint32_t handle = atomic_read(v: &adev->vce.handles[i]);
416
417	if (!handle \|\| adev->vce.filp[i] != filp)
418	continue;
419
420	r = amdgpu_vce_get_destroy_msg(ring, handle, direct: false, NULL);
421	if (r)
422	DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
423
424	adev->vce.filp[i] = NULL;
425	atomic_set(v: &adev->vce.handles[i], i: `0`);
426	}
427	}
428
429	/**
430	* amdgpu_vce_get_create_msg - generate a VCE create msg
431	*
432	* @ring: ring we should submit the msg to
433	* @handle: VCE session handle to use
434	* @fence: optional fence to return
435	*
436	* Open up a stream for HW test
437	*/
438	static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
439	struct dma_fence **fence)
440	{
441	const unsigned int ib_size_dw = `1024`;
442	struct amdgpu_job *job;
443	struct amdgpu_ib *ib;
444	struct amdgpu_ib ib_msg;
445	struct dma_fence *f = NULL;
446	uint64_t addr;
447	int i, r;
448
449	r = amdgpu_job_alloc_with_ib(adev: ring->adev, entity: &ring->adev->vce.entity,
450	AMDGPU_FENCE_OWNER_UNDEFINED,
451	size: ib_size_dw * `4`, pool_type: AMDGPU_IB_POOL_DIRECT,
452	job: &job);
453	if (r)
454	return r;
455
456	memset(&ib_msg, `0`, sizeof(ib_msg));
457	/ only one gpu page is needed, alloc +1 page to make addr aligned. /
458	r = amdgpu_ib_get(adev: ring->adev, NULL, AMDGPU_GPU_PAGE_SIZE * `2`,
459	pool: AMDGPU_IB_POOL_DIRECT,
460	ib: &ib_msg);
461	if (r)
462	goto err;
463
464	ib = &job->ibs[`0`];
465	/ let addr point to page boundary /
466	addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg.gpu_addr);
467
468	/ stitch together an VCE create msg /
469	ib->length_dw = `0`;
470	ib->ptr[ib->length_dw++] = `0x0000000c`; / len /
471	ib->ptr[ib->length_dw++] = `0x00000001`; / session cmd /
472	ib->ptr[ib->length_dw++] = handle;
473
474	if ((ring->adev->vce.fw_version >> `24`) >= `52`)
475	ib->ptr[ib->length_dw++] = `0x00000040`; / len /
476	else
477	ib->ptr[ib->length_dw++] = `0x00000030`; / len /
478	ib->ptr[ib->length_dw++] = `0x01000001`; / create cmd /
479	ib->ptr[ib->length_dw++] = `0x00000000`;
480	ib->ptr[ib->length_dw++] = `0x00000042`;
481	ib->ptr[ib->length_dw++] = `0x0000000a`;
482	ib->ptr[ib->length_dw++] = `0x00000001`;
483	ib->ptr[ib->length_dw++] = `0x00000080`;
484	ib->ptr[ib->length_dw++] = `0x00000060`;
485	ib->ptr[ib->length_dw++] = `0x00000100`;
486	ib->ptr[ib->length_dw++] = `0x00000100`;
487	ib->ptr[ib->length_dw++] = `0x0000000c`;
488	ib->ptr[ib->length_dw++] = `0x00000000`;
489	if ((ring->adev->vce.fw_version >> `24`) >= `52`) {
490	ib->ptr[ib->length_dw++] = `0x00000000`;
491	ib->ptr[ib->length_dw++] = `0x00000000`;
492	ib->ptr[ib->length_dw++] = `0x00000000`;
493	ib->ptr[ib->length_dw++] = `0x00000000`;
494	}
495
496	ib->ptr[ib->length_dw++] = `0x00000014`; / len /
497	ib->ptr[ib->length_dw++] = `0x05000005`; / feedback buffer /
498	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
499	ib->ptr[ib->length_dw++] = addr;
500	ib->ptr[ib->length_dw++] = `0x00000001`;
501
502	for (i = ib->length_dw; i < ib_size_dw; ++i)
503	ib->ptr[i] = `0x0`;
504
505	r = amdgpu_job_submit_direct(job, ring, fence: &f);
506	amdgpu_ib_free(adev: ring->adev, ib: &ib_msg, f);
507	if (r)
508	goto err;
509
510	if (fence)
511	*fence = dma_fence_get(fence: f);
512	dma_fence_put(fence: f);
513	return `0`;
514
515	err:
516	amdgpu_job_free(job);
517	return r;
518	}
519
520	/**
521	* amdgpu_vce_get_destroy_msg - generate a VCE destroy msg
522	*
523	* @ring: ring we should submit the msg to
524	* @handle: VCE session handle to use
525	* @direct: direct or delayed pool
526	* @fence: optional fence to return
527	*
528	* Close up a stream for HW test or if userspace failed to do so
529	*/
530	static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
531	bool direct, struct dma_fence **fence)
532	{
533	const unsigned int ib_size_dw = `1024`;
534	struct amdgpu_job *job;
535	struct amdgpu_ib *ib;
536	struct dma_fence *f = NULL;
537	int i, r;
538
539	r = amdgpu_job_alloc_with_ib(adev: ring->adev, entity: &ring->adev->vce.entity,
540	AMDGPU_FENCE_OWNER_UNDEFINED,
541	size: ib_size_dw * `4`,
542	pool_type: direct ? AMDGPU_IB_POOL_DIRECT :
543	AMDGPU_IB_POOL_DELAYED, job: &job);
544	if (r)
545	return r;
546
547	ib = &job->ibs[`0`];
548
549	/ stitch together an VCE destroy msg /
550	ib->length_dw = `0`;
551	ib->ptr[ib->length_dw++] = `0x0000000c`; / len /
552	ib->ptr[ib->length_dw++] = `0x00000001`; / session cmd /
553	ib->ptr[ib->length_dw++] = handle;
554
555	ib->ptr[ib->length_dw++] = `0x00000020`; / len /
556	ib->ptr[ib->length_dw++] = `0x00000002`; / task info /
557	ib->ptr[ib->length_dw++] = `0xffffffff`; / next task info, set to 0xffffffff if no /
558	ib->ptr[ib->length_dw++] = `0x00000001`; / destroy session /
559	ib->ptr[ib->length_dw++] = `0x00000000`;
560	ib->ptr[ib->length_dw++] = `0x00000000`;
561	ib->ptr[ib->length_dw++] = `0xffffffff`; / feedback is not needed, set to 0xffffffff and firmware will not output feedback /
562	ib->ptr[ib->length_dw++] = `0x00000000`;
563
564	ib->ptr[ib->length_dw++] = `0x00000008`; / len /
565	ib->ptr[ib->length_dw++] = `0x02000001`; / destroy cmd /
566
567	for (i = ib->length_dw; i < ib_size_dw; ++i)
568	ib->ptr[i] = `0x0`;
569
570	if (direct)
571	r = amdgpu_job_submit_direct(job, ring, fence: &f);
572	else
573	f = amdgpu_job_submit(job);
574	if (r)
575	goto err;
576
577	if (fence)
578	*fence = dma_fence_get(fence: f);
579	dma_fence_put(fence: f);
580	return `0`;
581
582	err:
583	amdgpu_job_free(job);
584	return r;
585	}
586
587	/**
588	* amdgpu_vce_validate_bo - make sure not to cross 4GB boundary
589	*
590	* @p: cs parser
591	* @ib: indirect buffer to use
592	* @lo: address of lower dword
593	* @hi: address of higher dword
594	* @size: minimum size
595	* @index: bs/fb index
596	*
597	* Make sure that no BO cross a 4GB boundary.
598	*/
599	static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
600	struct amdgpu_ib ib, int* lo, int hi,
601	unsigned int size, int32_t index)
602	{
603	int64_t offset = ((uint64_t)size) * ((int64_t)index);
604	struct ttm_operation_ctx ctx = { false, false };
605	struct amdgpu_bo_va_mapping *mapping;
606	unsigned int i, fpfn, lpfn;
607	struct amdgpu_bo *bo;
608	uint64_t addr;
609	int r;
610
611	addr = ((uint64_t)amdgpu_ib_get_value(ib, idx: lo)) \|
612	((uint64_t)amdgpu_ib_get_value(ib, idx: hi)) << `32`;
613	if (index >= `0`) {
614	addr += offset;
615	fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
616	lpfn = `0x100000000ULL` >> PAGE_SHIFT;
617	} else {
618	fpfn = `0`;
619	lpfn = (`0x100000000ULL` - PAGE_ALIGN(offset)) >> PAGE_SHIFT;
620	}
621
622	r = amdgpu_cs_find_mapping(parser: p, addr, bo: &bo, mapping: &mapping);
623	if (r) {
624	DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
625	addr, lo, hi, size, index);
626	return r;
627	}
628
629	for (i = `0`; i < bo->placement.num_placement; ++i) {
630	bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn);
631	bo->placements[i].lpfn = bo->placements[i].lpfn ?
632	min(bo->placements[i].lpfn, lpfn) : lpfn;
633	}
634	return ttm_bo_validate(bo: &bo->tbo, placement: &bo->placement, ctx: &ctx);
635	}
636
637
638	/**
639	* amdgpu_vce_cs_reloc - command submission relocation
640	*
641	* @p: parser context
642	* @ib: indirect buffer to use
643	* @lo: address of lower dword
644	* @hi: address of higher dword
645	* @size: minimum size
646	* @index: bs/fb index
647	*
648	* Patch relocation inside command stream with real buffer address
649	*/
650	static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser p, struct* amdgpu_ib *ib,
651	int lo, int hi, unsigned int size, uint32_t index)
652	{
653	struct amdgpu_bo_va_mapping *mapping;
654	struct amdgpu_bo *bo;
655	uint64_t addr;
656	int r;
657
658	if (index == `0xffffffff`)
659	index = `0`;
660
661	addr = ((uint64_t)amdgpu_ib_get_value(ib, idx: lo)) \|
662	((uint64_t)amdgpu_ib_get_value(ib, idx: hi)) << `32`;
663	addr += ((uint64_t)size) * ((uint64_t)index);
664
665	r = amdgpu_cs_find_mapping(parser: p, addr, bo: &bo, mapping: &mapping);
666	if (r) {
667	DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
668	addr, lo, hi, size, index);
669	return r;
670	}
671
672	if ((addr + (uint64_t)size) >
673	(mapping->last + `1`) * AMDGPU_GPU_PAGE_SIZE) {
674	DRM_ERROR("BO too small for addr 0x%010llx %d %d\n",
675	addr, lo, hi);
676	return -EINVAL;
677	}
678
679	addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
680	addr += amdgpu_bo_gpu_offset(bo);
681	addr -= ((uint64_t)size) * ((uint64_t)index);
682
683	amdgpu_ib_set_value(ib, idx: lo, lower_32_bits(addr));
684	amdgpu_ib_set_value(ib, idx: hi, upper_32_bits(addr));
685
686	return `0`;
687	}
688
689	/**
690	* amdgpu_vce_validate_handle - validate stream handle
691	*
692	* @p: parser context
693	* @handle: handle to validate
694	* @allocated: allocated a new handle?
695	*
696	* Validates the handle and return the found session index or -EINVAL
697	* we don't have another free session index.
698	*/
699	static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
700	uint32_t handle, uint32_t *allocated)
701	{
702	unsigned int i;
703
704	/ validate the handle /
705	for (i = `0`; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
706	if (atomic_read(v: &p->adev->vce.handles[i]) == handle) {
707	if (p->adev->vce.filp[i] != p->filp) {
708	DRM_ERROR("VCE handle collision detected!\n");
709	return -EINVAL;
710	}
711	return i;
712	}
713	}
714
715	/ handle not found try to alloc a new one /
716	for (i = `0`; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
717	if (!atomic_cmpxchg(v: &p->adev->vce.handles[i], old: `0`, new: handle)) {
718	p->adev->vce.filp[i] = p->filp;
719	p->adev->vce.img_size[i] = `0`;
720	*allocated \|= `1` << i;
721	return i;
722	}
723	}
724
725	DRM_ERROR("No more free VCE handles!\n");
726	return -EINVAL;
727	}
728
729	/**
730	* amdgpu_vce_ring_parse_cs - parse and validate the command stream
731	*
732	* @p: parser context
733	* @job: the job to parse
734	* @ib: the IB to patch
735	*/
736	int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
737	struct amdgpu_job *job,
738	struct amdgpu_ib *ib)
739	{
740	unsigned int fb_idx = `0`, bs_idx = `0`;
741	int session_idx = -`1`;
742	uint32_t destroyed = `0`;
743	uint32_t created = `0`;
744	uint32_t allocated = `0`;
745	uint32_t tmp, handle = `0`;
746	uint32_t *size = &tmp;
747	unsigned int idx;
748	int i, r = `0`;
749
750	job->vm = NULL;
751	ib->gpu_addr = amdgpu_sa_bo_gpu_addr(sa_bo: ib->sa_bo);
752
753	for (idx = `0`; idx < ib->length_dw;) {
754	uint32_t len = amdgpu_ib_get_value(ib, idx);
755	uint32_t cmd = amdgpu_ib_get_value(ib, idx: idx + `1`);
756
757	if ((len < `8`) \|\| (len & `3`)) {
758	DRM_ERROR("invalid VCE command length (%d)!\n", len);
759	r = -EINVAL;
760	goto out;
761	}
762
763	switch (cmd) {
764	case `0x00000002`: / task info /
765	fb_idx = amdgpu_ib_get_value(ib, idx: idx + `6`);
766	bs_idx = amdgpu_ib_get_value(ib, idx: idx + `7`);
767	break;
768
769	case `0x03000001`: / encode /
770	r = amdgpu_vce_validate_bo(p, ib, lo: idx + `10`, hi: idx + `9`,
771	size: `0`, index: `0`);
772	if (r)
773	goto out;
774
775	r = amdgpu_vce_validate_bo(p, ib, lo: idx + `12`, hi: idx + `11`,
776	size: `0`, index: `0`);
777	if (r)
778	goto out;
779	break;
780
781	case `0x05000001`: / context buffer /
782	r = amdgpu_vce_validate_bo(p, ib, lo: idx + `3`, hi: idx + `2`,
783	size: `0`, index: `0`);
784	if (r)
785	goto out;
786	break;
787
788	case `0x05000004`: / video bitstream buffer /
789	tmp = amdgpu_ib_get_value(ib, idx: idx + `4`);
790	r = amdgpu_vce_validate_bo(p, ib, lo: idx + `3`, hi: idx + `2`,
791	size: tmp, index: bs_idx);
792	if (r)
793	goto out;
794	break;
795
796	case `0x05000005`: / feedback buffer /
797	r = amdgpu_vce_validate_bo(p, ib, lo: idx + `3`, hi: idx + `2`,
798	size: `4096`, index: fb_idx);
799	if (r)
800	goto out;
801	break;
802
803	case `0x0500000d`: / MV buffer /
804	r = amdgpu_vce_validate_bo(p, ib, lo: idx + `3`, hi: idx + `2`,
805	size: `0`, index: `0`);
806	if (r)
807	goto out;
808
809	r = amdgpu_vce_validate_bo(p, ib, lo: idx + `8`, hi: idx + `7`,
810	size: `0`, index: `0`);
811	if (r)
812	goto out;
813	break;
814	}
815
816	idx += len / `4`;
817	}
818
819	for (idx = `0`; idx < ib->length_dw;) {
820	uint32_t len = amdgpu_ib_get_value(ib, idx);
821	uint32_t cmd = amdgpu_ib_get_value(ib, idx: idx + `1`);
822
823	switch (cmd) {
824	case `0x00000001`: / session /
825	handle = amdgpu_ib_get_value(ib, idx: idx + `2`);
826	session_idx = amdgpu_vce_validate_handle(p, handle,
827	allocated: &allocated);
828	if (session_idx < `0`) {
829	r = session_idx;
830	goto out;
831	}
832	size = &p->adev->vce.img_size[session_idx];
833	break;
834
835	case `0x00000002`: / task info /
836	fb_idx = amdgpu_ib_get_value(ib, idx: idx + `6`);
837	bs_idx = amdgpu_ib_get_value(ib, idx: idx + `7`);
838	break;
839
840	case `0x01000001`: / create /
841	created \|= `1` << session_idx;
842	if (destroyed & (`1` << session_idx)) {
843	destroyed &= ~(`1` << session_idx);
844	allocated \|= `1` << session_idx;
845
846	} else if (!(allocated & (`1` << session_idx))) {
847	DRM_ERROR("Handle already in use!\n");
848	r = -EINVAL;
849	goto out;
850	}
851
852	size = amdgpu_ib_get_value(ib, idx: idx + `8`)
853	amdgpu_ib_get_value(ib, idx: idx + `10`) *
854	`8` * `3` / `2`;
855	break;
856
857	case `0x04000001`: / config extension /
858	case `0x04000002`: / pic control /
859	case `0x04000005`: / rate control /
860	case `0x04000007`: / motion estimation /
861	case `0x04000008`: / rdo /
862	case `0x04000009`: / vui /
863	case `0x05000002`: / auxiliary buffer /
864	case `0x05000009`: / clock table /
865	break;
866
867	case `0x0500000c`: / hw config /
868	switch (p->adev->asic_type) {
869	#ifdef CONFIG_DRM_AMDGPU_CIK
870	case CHIP_KAVERI:
871	case CHIP_MULLINS:
872	#endif
873	case CHIP_CARRIZO:
874	break;
875	default:
876	r = -EINVAL;
877	goto out;
878	}
879	break;
880
881	case `0x03000001`: / encode /
882	r = amdgpu_vce_cs_reloc(p, ib, lo: idx + `10`, hi: idx + `9`,
883	size: *size, index: `0`);
884	if (r)
885	goto out;
886
887	r = amdgpu_vce_cs_reloc(p, ib, lo: idx + `12`, hi: idx + `11`,
888	size: *size / `3`, index: `0`);
889	if (r)
890	goto out;
891	break;
892
893	case `0x02000001`: / destroy /
894	destroyed \|= `1` << session_idx;
895	break;
896
897	case `0x05000001`: / context buffer /
898	r = amdgpu_vce_cs_reloc(p, ib, lo: idx + `3`, hi: idx + `2`,
899	size: size `2`, index: `0`);
900	if (r)
901	goto out;
902	break;
903
904	case `0x05000004`: / video bitstream buffer /
905	tmp = amdgpu_ib_get_value(ib, idx: idx + `4`);
906	r = amdgpu_vce_cs_reloc(p, ib, lo: idx + `3`, hi: idx + `2`,
907	size: tmp, index: bs_idx);
908	if (r)
909	goto out;
910	break;
911
912	case `0x05000005`: / feedback buffer /
913	r = amdgpu_vce_cs_reloc(p, ib, lo: idx + `3`, hi: idx + `2`,
914	size: `4096`, index: fb_idx);
915	if (r)
916	goto out;
917	break;
918
919	case `0x0500000d`: / MV buffer /
920	r = amdgpu_vce_cs_reloc(p, ib, lo: idx + `3`,
921	hi: idx + `2`, size: *size, index: `0`);
922	if (r)
923	goto out;
924
925	r = amdgpu_vce_cs_reloc(p, ib, lo: idx + `8`,
926	hi: idx + `7`, size: *size / `12`, index: `0`);
927	if (r)
928	goto out;
929	break;
930
931	default:
932	DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
933	r = -EINVAL;
934	goto out;
935	}
936
937	if (session_idx == -`1`) {
938	DRM_ERROR("no session command at start of IB\n");
939	r = -EINVAL;
940	goto out;
941	}
942
943	idx += len / `4`;
944	}
945
946	if (allocated & ~created) {
947	DRM_ERROR("New session without create command!\n");
948	r = -ENOENT;
949	}
950
951	out:
952	if (!r) {
953	/ No error, free all destroyed handle slots /
954	tmp = destroyed;
955	} else {
956	/ Error during parsing, free all allocated handle slots /
957	tmp = allocated;
958	}
959
960	for (i = `0`; i < AMDGPU_MAX_VCE_HANDLES; ++i)
961	if (tmp & (`1` << i))
962	atomic_set(v: &p->adev->vce.handles[i], i: `0`);
963
964	return r;
965	}
966
967	/**
968	* amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode
969	*
970	* @p: parser context
971	* @job: the job to parse
972	* @ib: the IB to patch
973	*/
974	int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
975	struct amdgpu_job *job,
976	struct amdgpu_ib *ib)
977	{
978	int session_idx = -`1`;
979	uint32_t destroyed = `0`;
980	uint32_t created = `0`;
981	uint32_t allocated = `0`;
982	uint32_t tmp, handle = `0`;
983	int i, r = `0`, idx = `0`;
984
985	while (idx < ib->length_dw) {
986	uint32_t len = amdgpu_ib_get_value(ib, idx);
987	uint32_t cmd = amdgpu_ib_get_value(ib, idx: idx + `1`);
988
989	if ((len < `8`) \|\| (len & `3`)) {
990	DRM_ERROR("invalid VCE command length (%d)!\n", len);
991	r = -EINVAL;
992	goto out;
993	}
994
995	switch (cmd) {
996	case `0x00000001`: / session /
997	handle = amdgpu_ib_get_value(ib, idx: idx + `2`);
998	session_idx = amdgpu_vce_validate_handle(p, handle,
999	allocated: &allocated);
1000	if (session_idx < `0`) {
1001	r = session_idx;
1002	goto out;
1003	}
1004	break;
1005
1006	case `0x01000001`: / create /
1007	created \|= `1` << session_idx;
1008	if (destroyed & (`1` << session_idx)) {
1009	destroyed &= ~(`1` << session_idx);
1010	allocated \|= `1` << session_idx;
1011
1012	} else if (!(allocated & (`1` << session_idx))) {
1013	DRM_ERROR("Handle already in use!\n");
1014	r = -EINVAL;
1015	goto out;
1016	}
1017
1018	break;
1019
1020	case `0x02000001`: / destroy /
1021	destroyed \|= `1` << session_idx;
1022	break;
1023
1024	default:
1025	break;
1026	}
1027
1028	if (session_idx == -`1`) {
1029	DRM_ERROR("no session command at start of IB\n");
1030	r = -EINVAL;
1031	goto out;
1032	}
1033
1034	idx += len / `4`;
1035	}
1036
1037	if (allocated & ~created) {
1038	DRM_ERROR("New session without create command!\n");
1039	r = -ENOENT;
1040	}
1041
1042	out:
1043	if (!r) {
1044	/ No error, free all destroyed handle slots /
1045	tmp = destroyed;
1046	amdgpu_ib_free(adev: p->adev, ib, NULL);
1047	} else {
1048	/ Error during parsing, free all allocated handle slots /
1049	tmp = allocated;
1050	}
1051
1052	for (i = `0`; i < AMDGPU_MAX_VCE_HANDLES; ++i)
1053	if (tmp & (`1` << i))
1054	atomic_set(v: &p->adev->vce.handles[i], i: `0`);
1055
1056	return r;
1057	}
1058
1059	/**
1060	* amdgpu_vce_ring_emit_ib - execute indirect buffer
1061	*
1062	* @ring: engine to use
1063	* @job: job to retrieve vmid from
1064	* @ib: the IB to execute
1065	* @flags: unused
1066	*
1067	*/
1068	void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
1069	struct amdgpu_job *job,
1070	struct amdgpu_ib *ib,
1071	uint32_t flags)
1072	{
1073	amdgpu_ring_write(ring, VCE_CMD_IB);
1074	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1075	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1076	amdgpu_ring_write(ring, v: ib->length_dw);
1077	}
1078
1079	/**
1080	* amdgpu_vce_ring_emit_fence - add a fence command to the ring
1081	*
1082	* @ring: engine to use
1083	* @addr: address
1084	* @seq: sequence number
1085	* @flags: fence related flags
1086	*
1087	*/
1088	void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
1089	unsigned int flags)
1090	{
1091	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1092
1093	amdgpu_ring_write(ring, VCE_CMD_FENCE);
1094	amdgpu_ring_write(ring, v: addr);
1095	amdgpu_ring_write(ring, upper_32_bits(addr));
1096	amdgpu_ring_write(ring, v: seq);
1097	amdgpu_ring_write(ring, VCE_CMD_TRAP);
1098	amdgpu_ring_write(ring, VCE_CMD_END);
1099	}
1100
1101	/**
1102	* amdgpu_vce_ring_test_ring - test if VCE ring is working
1103	*
1104	* @ring: the engine to test on
1105	*
1106	*/
1107	int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
1108	{
1109	struct amdgpu_device *adev = ring->adev;
1110	uint32_t rptr;
1111	unsigned int i;
1112	int r, timeout = adev->usec_timeout;
1113
1114	/ skip ring test for sriov/
1115	if (amdgpu_sriov_vf(adev))
1116	return `0`;
1117
1118	r = amdgpu_ring_alloc(ring, ndw: `16`);
1119	if (r)
1120	return r;
1121
1122	rptr = amdgpu_ring_get_rptr(ring);
1123
1124	amdgpu_ring_write(ring, VCE_CMD_END);
1125	amdgpu_ring_commit(ring);
1126
1127	for (i = `0`; i < timeout; i++) {
1128	if (amdgpu_ring_get_rptr(ring) != rptr)
1129	break;
1130	udelay(`1`);
1131	}
1132
1133	if (i >= timeout)
1134	r = -ETIMEDOUT;
1135
1136	return r;
1137	}
1138
1139	/**
1140	* amdgpu_vce_ring_test_ib - test if VCE IBs are working
1141	*
1142	* @ring: the engine to test on
1143	* @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
1144	*
1145	*/
1146	int amdgpu_vce_ring_test_ib(struct amdgpu_ring ring, long* timeout)
1147	{
1148	struct dma_fence *fence = NULL;
1149	long r;
1150
1151	/ skip vce ring1/2 ib test for now, since it's not reliable /
1152	if (ring != &ring->adev->vce.ring[`0`])
1153	return `0`;
1154
1155	r = amdgpu_vce_get_create_msg(ring, handle: `1`, NULL);
1156	if (r)
1157	goto error;
1158
1159	r = amdgpu_vce_get_destroy_msg(ring, handle: `1`, direct: true, fence: &fence);
1160	if (r)
1161	goto error;
1162
1163	r = dma_fence_wait_timeout(fence, intr: false, timeout);
1164	if (r == `0`)
1165	r = -ETIMEDOUT;
1166	else if (r > `0`)
1167	r = `0`;
1168
1169	error:
1170	dma_fence_put(fence);
1171	return r;
1172	}
1173
1174	enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring)
1175	{
1176	switch (ring) {
1177	case `0`:
1178	return AMDGPU_RING_PRIO_0;
1179	case `1`:
1180	return AMDGPU_RING_PRIO_1;
1181	case `2`:
1182	return AMDGPU_RING_PRIO_2;
1183	default:
1184	return AMDGPU_RING_PRIO_0;
1185	}
1186	}
1187

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c