radeon_uvd.c source code [linux/drivers/gpu/drm/radeon/radeon_uvd.c]

1	/*
2	* Copyright 2011 Advanced Micro Devices, Inc.
3	* All Rights Reserved.
4	*
5	* Permission is hereby granted, free of charge, to any person obtaining a
6	* copy of this software and associated documentation files (the
7	* "Software"), to deal in the Software without restriction, including
8	* without limitation the rights to use, copy, modify, merge, publish,
9	* distribute, sub license, and/or sell copies of the Software, and to
10	* permit persons to whom the Software is furnished to do so, subject to
11	* the following conditions:
12	*
13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15	* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16	* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17	* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18	* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19	* USE OR OTHER DEALINGS IN THE SOFTWARE.
20	*
21	* The above copyright notice and this permission notice (including the
22	* next paragraph) shall be included in all copies or substantial portions
23	* of the Software.
24	*
25	*/
26	/*
27	* Authors:
28	* Christian König <deathsimple@vodafone.de>
29	*/
30
31	#include <linux/firmware.h>
32	#include <linux/module.h>
33
34	#include <drm/drm.h>
35
36	#include "radeon.h"
37	#include "radeon_ucode.h"
38	#include "r600d.h"
39
40	/ 1 second timeout /
41	#define UVD_IDLE_TIMEOUT_MS 1000
42
43	/ Firmware Names /
44	#define FIRMWARE_R600 "radeon/R600_uvd.bin"
45	#define FIRMWARE_RS780 "radeon/RS780_uvd.bin"
46	#define FIRMWARE_RV770 "radeon/RV770_uvd.bin"
47	#define FIRMWARE_RV710 "radeon/RV710_uvd.bin"
48	#define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin"
49	#define FIRMWARE_SUMO "radeon/SUMO_uvd.bin"
50	#define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin"
51	#define FIRMWARE_BONAIRE_LEGACY "radeon/BONAIRE_uvd.bin"
52	#define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin"
53
54	MODULE_FIRMWARE(FIRMWARE_R600);
55	MODULE_FIRMWARE(FIRMWARE_RS780);
56	MODULE_FIRMWARE(FIRMWARE_RV770);
57	MODULE_FIRMWARE(FIRMWARE_RV710);
58	MODULE_FIRMWARE(FIRMWARE_CYPRESS);
59	MODULE_FIRMWARE(FIRMWARE_SUMO);
60	MODULE_FIRMWARE(FIRMWARE_TAHITI);
61	MODULE_FIRMWARE(FIRMWARE_BONAIRE_LEGACY);
62	MODULE_FIRMWARE(FIRMWARE_BONAIRE);
63
64	static void radeon_uvd_idle_work_handler(struct work_struct *work);
65
66	int radeon_uvd_init(struct radeon_device *rdev)
67	{
68	unsigned long bo_size;
69	const char fw_name = NULL, legacy_fw_name = NULL;
70	int i, r;
71
72	INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler);
73
74	switch (rdev->family) {
75	case CHIP_RV610:
76	case CHIP_RV630:
77	case CHIP_RV670:
78	case CHIP_RV620:
79	case CHIP_RV635:
80	legacy_fw_name = FIRMWARE_R600;
81	break;
82
83	case CHIP_RS780:
84	case CHIP_RS880:
85	legacy_fw_name = FIRMWARE_RS780;
86	break;
87
88	case CHIP_RV770:
89	legacy_fw_name = FIRMWARE_RV770;
90	break;
91
92	case CHIP_RV710:
93	case CHIP_RV730:
94	case CHIP_RV740:
95	legacy_fw_name = FIRMWARE_RV710;
96	break;
97
98	case CHIP_CYPRESS:
99	case CHIP_HEMLOCK:
100	case CHIP_JUNIPER:
101	case CHIP_REDWOOD:
102	case CHIP_CEDAR:
103	legacy_fw_name = FIRMWARE_CYPRESS;
104	break;
105
106	case CHIP_SUMO:
107	case CHIP_SUMO2:
108	case CHIP_PALM:
109	case CHIP_CAYMAN:
110	case CHIP_BARTS:
111	case CHIP_TURKS:
112	case CHIP_CAICOS:
113	legacy_fw_name = FIRMWARE_SUMO;
114	break;
115
116	case CHIP_TAHITI:
117	case CHIP_VERDE:
118	case CHIP_PITCAIRN:
119	case CHIP_ARUBA:
120	case CHIP_OLAND:
121	legacy_fw_name = FIRMWARE_TAHITI;
122	break;
123
124	case CHIP_BONAIRE:
125	case CHIP_KABINI:
126	case CHIP_KAVERI:
127	case CHIP_HAWAII:
128	case CHIP_MULLINS:
129	legacy_fw_name = FIRMWARE_BONAIRE_LEGACY;
130	fw_name = FIRMWARE_BONAIRE;
131	break;
132
133	default:
134	return -EINVAL;
135	}
136
137	rdev->uvd.fw_header_present = false;
138	rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES;
139	if (fw_name) {
140	/ Let's try to load the newer firmware first /
141	r = request_firmware(fw: &rdev->uvd_fw, name: fw_name, device: rdev->dev);
142	if (r) {
143	dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
144	fw_name);
145	} else {
146	struct common_firmware_header hdr = (void* *)rdev->uvd_fw->data;
147	unsigned version_major, version_minor, family_id;
148
149	r = radeon_ucode_validate(fw: rdev->uvd_fw);
150	if (r)
151	return r;
152
153	rdev->uvd.fw_header_present = true;
154
155	family_id = (__force u32)(hdr->ucode_version) & `0xff`;
156	version_major = (le32_to_cpu((__force __le32)(hdr->ucode_version))
157	>> `24`) & `0xff`;
158	version_minor = (le32_to_cpu((__force __le32)(hdr->ucode_version))
159	>> `8`) & `0xff`;
160	DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n",
161	version_major, version_minor, family_id);
162
163	/*
164	* Limit the number of UVD handles depending on
165	* microcode major and minor versions.
166	*/
167	if ((version_major >= `0x01`) && (version_minor >= `0x37`))
168	rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES;
169	}
170	}
171
172	/*
173	* In case there is only legacy firmware, or we encounter an error
174	* while loading the new firmware, we fall back to loading the legacy
175	* firmware now.
176	*/
177	if (!fw_name \|\| r) {
178	r = request_firmware(fw: &rdev->uvd_fw, name: legacy_fw_name, device: rdev->dev);
179	if (r) {
180	dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
181	legacy_fw_name);
182	return r;
183	}
184	}
185
186	bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + `8`) +
187	RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE +
188	RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles;
189	r = radeon_bo_create(rdev, size: bo_size, PAGE_SIZE, kernel: true,
190	RADEON_GEM_DOMAIN_VRAM, flags: `0`, NULL,
191	NULL, bo_ptr: &rdev->uvd.vcpu_bo);
192	if (r) {
193	dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
194	return r;
195	}
196
197	r = radeon_bo_reserve(bo: rdev->uvd.vcpu_bo, no_intr: false);
198	if (r) {
199	radeon_bo_unref(bo: &rdev->uvd.vcpu_bo);
200	dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r);
201	return r;
202	}
203
204	r = radeon_bo_pin(bo: rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
205	gpu_addr: &rdev->uvd.gpu_addr);
206	if (r) {
207	radeon_bo_unreserve(bo: rdev->uvd.vcpu_bo);
208	radeon_bo_unref(bo: &rdev->uvd.vcpu_bo);
209	dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r);
210	return r;
211	}
212
213	r = radeon_bo_kmap(bo: rdev->uvd.vcpu_bo, ptr: &rdev->uvd.cpu_addr);
214	if (r) {
215	dev_err(rdev->dev, "(%d) UVD map failed\n", r);
216	return r;
217	}
218
219	radeon_bo_unreserve(bo: rdev->uvd.vcpu_bo);
220
221	for (i = `0`; i < rdev->uvd.max_handles; ++i) {
222	atomic_set(v: &rdev->uvd.handles[i], i: `0`);
223	rdev->uvd.filp[i] = NULL;
224	rdev->uvd.img_size[i] = `0`;
225	}
226
227	return `0`;
228	}
229
230	void radeon_uvd_fini(struct radeon_device *rdev)
231	{
232	int r;
233
234	if (rdev->uvd.vcpu_bo == NULL)
235	return;
236
237	r = radeon_bo_reserve(bo: rdev->uvd.vcpu_bo, no_intr: false);
238	if (!r) {
239	radeon_bo_kunmap(bo: rdev->uvd.vcpu_bo);
240	radeon_bo_unpin(bo: rdev->uvd.vcpu_bo);
241	radeon_bo_unreserve(bo: rdev->uvd.vcpu_bo);
242	}
243
244	radeon_bo_unref(bo: &rdev->uvd.vcpu_bo);
245
246	radeon_ring_fini(rdev, cp: &rdev->ring[R600_RING_TYPE_UVD_INDEX]);
247
248	release_firmware(fw: rdev->uvd_fw);
249	}
250
251	int radeon_uvd_suspend(struct radeon_device *rdev)
252	{
253	int i, r;
254
255	if (rdev->uvd.vcpu_bo == NULL)
256	return `0`;
257
258	for (i = `0`; i < rdev->uvd.max_handles; ++i) {
259	uint32_t handle = atomic_read(v: &rdev->uvd.handles[i]);
260	if (handle != `0`) {
261	struct radeon_fence *fence;
262
263	radeon_uvd_note_usage(rdev);
264
265	r = radeon_uvd_get_destroy_msg(rdev,
266	R600_RING_TYPE_UVD_INDEX, handle, fence: &fence);
267	if (r) {
268	DRM_ERROR("Error destroying UVD (%d)!\n", r);
269	continue;
270	}
271
272	radeon_fence_wait(fence, interruptible: false);
273	radeon_fence_unref(fence: &fence);
274
275	rdev->uvd.filp[i] = NULL;
276	atomic_set(v: &rdev->uvd.handles[i], i: `0`);
277	}
278	}
279
280	return `0`;
281	}
282
283	int radeon_uvd_resume(struct radeon_device *rdev)
284	{
285	unsigned size;
286	void *ptr;
287
288	if (rdev->uvd.vcpu_bo == NULL)
289	return -EINVAL;
290
291	memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
292
293	size = radeon_bo_size(bo: rdev->uvd.vcpu_bo);
294	size -= rdev->uvd_fw->size;
295
296	ptr = rdev->uvd.cpu_addr;
297	ptr += rdev->uvd_fw->size;
298
299	memset_io((void __iomem *)ptr, `0`, size);
300
301	return `0`;
302	}
303
304	void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo,
305	uint32_t allowed_domains)
306	{
307	int i;
308
309	for (i = `0`; i < rbo->placement.num_placement; ++i) {
310	rbo->placements[i].fpfn = `0` >> PAGE_SHIFT;
311	rbo->placements[i].lpfn = (`256` * `1024` * `1024`) >> PAGE_SHIFT;
312	}
313
314	/ If it must be in VRAM it must be in the first segment as well /
315	if (allowed_domains == RADEON_GEM_DOMAIN_VRAM)
316	return;
317
318	/ abort if we already have more than one placement /
319	if (rbo->placement.num_placement > `1`)
320	return;
321
322	/ add another 256MB segment /
323	rbo->placements[`1`] = rbo->placements[`0`];
324	rbo->placements[`1`].fpfn += (`256` * `1024` * `1024`) >> PAGE_SHIFT;
325	rbo->placements[`1`].lpfn += (`256` * `1024` * `1024`) >> PAGE_SHIFT;
326	rbo->placement.num_placement++;
327	}
328
329	void radeon_uvd_free_handles(struct radeon_device rdev, struct* drm_file *filp)
330	{
331	int i, r;
332	for (i = `0`; i < rdev->uvd.max_handles; ++i) {
333	uint32_t handle = atomic_read(v: &rdev->uvd.handles[i]);
334	if (handle != `0` && rdev->uvd.filp[i] == filp) {
335	struct radeon_fence *fence;
336
337	radeon_uvd_note_usage(rdev);
338
339	r = radeon_uvd_get_destroy_msg(rdev,
340	R600_RING_TYPE_UVD_INDEX, handle, fence: &fence);
341	if (r) {
342	DRM_ERROR("Error destroying UVD (%d)!\n", r);
343	continue;
344	}
345
346	radeon_fence_wait(fence, interruptible: false);
347	radeon_fence_unref(fence: &fence);
348
349	rdev->uvd.filp[i] = NULL;
350	atomic_set(v: &rdev->uvd.handles[i], i: `0`);
351	}
352	}
353	}
354
355	static int radeon_uvd_cs_msg_decode(uint32_t msg, unsigned* buf_sizes[])
356	{
357	unsigned stream_type = msg[`4`];
358	unsigned width = msg[`6`];
359	unsigned height = msg[`7`];
360	unsigned dpb_size = msg[`9`];
361	unsigned pitch = msg[`28`];
362
363	unsigned width_in_mb = width / `16`;
364	unsigned height_in_mb = ALIGN(height / `16`, `2`);
365
366	unsigned image_size, tmp, min_dpb_size;
367
368	image_size = width * height;
369	image_size += image_size / `2`;
370	image_size = ALIGN(image_size, `1024`);
371
372	switch (stream_type) {
373	case `0`: / H264 /
374
375	/ reference picture buffer /
376	min_dpb_size = image_size * `17`;
377
378	/ macroblock context buffer /
379	min_dpb_size += width_in_mb * height_in_mb * `17` * `192`;
380
381	/ IT surface buffer /
382	min_dpb_size += width_in_mb * height_in_mb * `32`;
383	break;
384
385	case `1`: / VC1 /
386
387	/ reference picture buffer /
388	min_dpb_size = image_size * `3`;
389
390	/ CONTEXT_BUFFER /
391	min_dpb_size += width_in_mb * height_in_mb * `128`;
392
393	/ IT surface buffer /
394	min_dpb_size += width_in_mb * `64`;
395
396	/ DB surface buffer /
397	min_dpb_size += width_in_mb * `128`;
398
399	/ BP /
400	tmp = max(width_in_mb, height_in_mb);
401	min_dpb_size += ALIGN(tmp * `7` * `16`, `64`);
402	break;
403
404	case `3`: / MPEG2 /
405
406	/ reference picture buffer /
407	min_dpb_size = image_size * `3`;
408	break;
409
410	case `4`: / MPEG4 /
411
412	/ reference picture buffer /
413	min_dpb_size = image_size * `3`;
414
415	/ CM /
416	min_dpb_size += width_in_mb * height_in_mb * `64`;
417
418	/ IT surface buffer /
419	min_dpb_size += ALIGN(width_in_mb * height_in_mb * `32`, `64`);
420	break;
421
422	default:
423	DRM_ERROR("UVD codec not handled %d!\n", stream_type);
424	return -EINVAL;
425	}
426
427	if (width > pitch) {
428	DRM_ERROR("Invalid UVD decoding target pitch!\n");
429	return -EINVAL;
430	}
431
432	if (dpb_size < min_dpb_size) {
433	DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
434	dpb_size, min_dpb_size);
435	return -EINVAL;
436	}
437
438	buf_sizes[`0x1`] = dpb_size;
439	buf_sizes[`0x2`] = image_size;
440	return `0`;
441	}
442
443	static int radeon_uvd_validate_codec(struct radeon_cs_parser *p,
444	unsigned stream_type)
445	{
446	switch (stream_type) {
447	case `0`: / H264 /
448	case `1`: / VC1 /
449	/ always supported /
450	return `0`;
451
452	case `3`: / MPEG2 /
453	case `4`: / MPEG4 /
454	/ only since UVD 3 /
455	if (p->rdev->family >= CHIP_PALM)
456	return `0`;
457
458	fallthrough;
459	default:
460	DRM_ERROR("UVD codec not supported by hardware %d!\n",
461	stream_type);
462	return -EINVAL;
463	}
464	}
465
466	static int radeon_uvd_cs_msg(struct radeon_cs_parser p, struct* radeon_bo *bo,
467	unsigned offset, unsigned buf_sizes[])
468	{
469	int32_t *msg, msg_type, handle;
470	unsigned img_size = `0`;
471	void *ptr;
472	int i, r;
473
474	if (offset & `0x3F`) {
475	DRM_ERROR("UVD messages must be 64 byte aligned!\n");
476	return -EINVAL;
477	}
478
479	r = radeon_bo_kmap(bo, ptr: &ptr);
480	if (r) {
481	DRM_ERROR("Failed mapping the UVD message (%d)!\n", r);
482	return r;
483	}
484
485	msg = ptr + offset;
486
487	msg_type = msg[`1`];
488	handle = msg[`2`];
489
490	if (handle == `0`) {
491	radeon_bo_kunmap(bo);
492	DRM_ERROR("Invalid UVD handle!\n");
493	return -EINVAL;
494	}
495
496	switch (msg_type) {
497	case `0`:
498	/ it's a create msg, calc image size (width * height) /
499	img_size = msg[`7`] * msg[`8`];
500
501	r = radeon_uvd_validate_codec(p, stream_type: msg[`4`]);
502	radeon_bo_kunmap(bo);
503	if (r)
504	return r;
505
506	/ try to alloc a new handle /
507	for (i = `0`; i < p->rdev->uvd.max_handles; ++i) {
508	if (atomic_read(v: &p->rdev->uvd.handles[i]) == handle) {
509	DRM_ERROR("Handle 0x%x already in use!\n", handle);
510	return -EINVAL;
511	}
512
513	if (!atomic_cmpxchg(v: &p->rdev->uvd.handles[i], old: `0`, new: handle)) {
514	p->rdev->uvd.filp[i] = p->filp;
515	p->rdev->uvd.img_size[i] = img_size;
516	return `0`;
517	}
518	}
519
520	DRM_ERROR("No more free UVD handles!\n");
521	return -EINVAL;
522
523	case `1`:
524	/ it's a decode msg, validate codec and calc buffer sizes /
525	r = radeon_uvd_validate_codec(p, stream_type: msg[`4`]);
526	if (!r)
527	r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
528	radeon_bo_kunmap(bo);
529	if (r)
530	return r;
531
532	/ validate the handle /
533	for (i = `0`; i < p->rdev->uvd.max_handles; ++i) {
534	if (atomic_read(v: &p->rdev->uvd.handles[i]) == handle) {
535	if (p->rdev->uvd.filp[i] != p->filp) {
536	DRM_ERROR("UVD handle collision detected!\n");
537	return -EINVAL;
538	}
539	return `0`;
540	}
541	}
542
543	DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
544	return -ENOENT;
545
546	case `2`:
547	/ it's a destroy msg, free the handle /
548	for (i = `0`; i < p->rdev->uvd.max_handles; ++i)
549	atomic_cmpxchg(v: &p->rdev->uvd.handles[i], old: handle, new: `0`);
550	radeon_bo_kunmap(bo);
551	return `0`;
552
553	default:
554	DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
555	}
556
557	radeon_bo_kunmap(bo);
558	return -EINVAL;
559	}
560
561	static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
562	int data0, int data1,
563	unsigned buf_sizes[], bool *has_msg_cmd)
564	{
565	struct radeon_cs_chunk *relocs_chunk;
566	struct radeon_bo_list *reloc;
567	unsigned idx, cmd, offset;
568	uint64_t start, end;
569	int r;
570
571	relocs_chunk = p->chunk_relocs;
572	offset = radeon_get_ib_value(p, idx: data0);
573	idx = radeon_get_ib_value(p, idx: data1);
574	if (idx >= relocs_chunk->length_dw) {
575	DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
576	idx, relocs_chunk->length_dw);
577	return -EINVAL;
578	}
579
580	reloc = &p->relocs[(idx / `4`)];
581	start = reloc->gpu_offset;
582	end = start + radeon_bo_size(bo: reloc->robj);
583	start += offset;
584
585	p->ib.ptr[data0] = start & `0xFFFFFFFF`;
586	p->ib.ptr[data1] = start >> `32`;
587
588	cmd = radeon_get_ib_value(p, idx: p->idx) >> `1`;
589
590	if (cmd < `0x4`) {
591	if (end <= start) {
592	DRM_ERROR("invalid reloc offset %X!\n", offset);
593	return -EINVAL;
594	}
595	if ((end - start) < buf_sizes[cmd]) {
596	DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
597	(unsigned)(end - start), buf_sizes[cmd]);
598	return -EINVAL;
599	}
600
601	} else if (cmd != `0x100`) {
602	DRM_ERROR("invalid UVD command %X!\n", cmd);
603	return -EINVAL;
604	}
605
606	if ((start >> `28`) != ((end - `1`) >> `28`)) {
607	DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
608	start, end);
609	return -EINVAL;
610	}
611
612	/ TODO: is this still necessary on NI+ ? /
613	if ((cmd == `0` \|\| cmd == `0x3`) &&
614	(start >> `28`) != (p->rdev->uvd.gpu_addr >> `28`)) {
615	DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
616	start, end);
617	return -EINVAL;
618	}
619
620	if (cmd == `0`) {
621	if (*has_msg_cmd) {
622	DRM_ERROR("More than one message in a UVD-IB!\n");
623	return -EINVAL;
624	}
625	*has_msg_cmd = true;
626	r = radeon_uvd_cs_msg(p, bo: reloc->robj, offset, buf_sizes);
627	if (r)
628	return r;
629	} else if (!*has_msg_cmd) {
630	DRM_ERROR("Message needed before other commands are send!\n");
631	return -EINVAL;
632	}
633
634	return `0`;
635	}
636
637	static int radeon_uvd_cs_reg(struct radeon_cs_parser *p,
638	struct radeon_cs_packet *pkt,
639	int data0, int* *data1,
640	unsigned buf_sizes[],
641	bool *has_msg_cmd)
642	{
643	int i, r;
644
645	p->idx++;
646	for (i = `0`; i <= pkt->count; ++i) {
647	switch (pkt->reg + i*`4`) {
648	case UVD_GPCOM_VCPU_DATA0:
649	*data0 = p->idx;
650	break;
651	case UVD_GPCOM_VCPU_DATA1:
652	*data1 = p->idx;
653	break;
654	case UVD_GPCOM_VCPU_CMD:
655	r = radeon_uvd_cs_reloc(p, data0: data0, data1: data1,
656	buf_sizes, has_msg_cmd);
657	if (r)
658	return r;
659	break;
660	case UVD_ENGINE_CNTL:
661	case UVD_NO_OP:
662	break;
663	default:
664	DRM_ERROR("Invalid reg 0x%X!\n",
665	pkt->reg + i*`4`);
666	return -EINVAL;
667	}
668	p->idx++;
669	}
670	return `0`;
671	}
672
673	int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
674	{
675	struct radeon_cs_packet pkt;
676	int r, data0 = `0`, data1 = `0`;
677
678	/ does the IB has a msg command /
679	bool has_msg_cmd = false;
680
681	/ minimum buffer sizes /
682	unsigned buf_sizes[] = {
683	[`0x00000000`] = `2048`,
684	[`0x00000001`] = `32` * `1024` * `1024`,
685	[`0x00000002`] = `2048` * `1152` * `3`,
686	[`0x00000003`] = `2048`,
687	};
688
689	if (p->chunk_ib->length_dw % `16`) {
690	DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
691	p->chunk_ib->length_dw);
692	return -EINVAL;
693	}
694
695	if (p->chunk_relocs == NULL) {
696	DRM_ERROR("No relocation chunk !\n");
697	return -EINVAL;
698	}
699
700
701	do {
702	r = radeon_cs_packet_parse(p, pkt: &pkt, idx: p->idx);
703	if (r)
704	return r;
705	switch (pkt.type) {
706	case RADEON_PACKET_TYPE0:
707	r = radeon_uvd_cs_reg(p, pkt: &pkt, data0: &data0, data1: &data1,
708	buf_sizes, has_msg_cmd: &has_msg_cmd);
709	if (r)
710	return r;
711	break;
712	case RADEON_PACKET_TYPE2:
713	p->idx += pkt.count + `2`;
714	break;
715	default:
716	DRM_ERROR("Unknown packet type %d !\n", pkt.type);
717	return -EINVAL;
718	}
719	} while (p->idx < p->chunk_ib->length_dw);
720
721	if (!has_msg_cmd) {
722	DRM_ERROR("UVD-IBs need a msg command!\n");
723	return -EINVAL;
724	}
725
726	return `0`;
727	}
728
729	static int radeon_uvd_send_msg(struct radeon_device *rdev,
730	int ring, uint64_t addr,
731	struct radeon_fence **fence)
732	{
733	struct radeon_ib ib;
734	int i, r;
735
736	r = radeon_ib_get(rdev, ring, ib: &ib, NULL, size: `64`);
737	if (r)
738	return r;
739
740	ib.ptr[`0`] = PACKET0(UVD_GPCOM_VCPU_DATA0, `0`);
741	ib.ptr[`1`] = addr;
742	ib.ptr[`2`] = PACKET0(UVD_GPCOM_VCPU_DATA1, `0`);
743	ib.ptr[`3`] = addr >> `32`;
744	ib.ptr[`4`] = PACKET0(UVD_GPCOM_VCPU_CMD, `0`);
745	ib.ptr[`5`] = `0`;
746	for (i = `6`; i < `16`; i += `2`) {
747	ib.ptr[i] = PACKET0(UVD_NO_OP, `0`);
748	ib.ptr[i+`1`] = `0`;
749	}
750	ib.length_dw = `16`;
751
752	r = radeon_ib_schedule(rdev, ib: &ib, NULL, hdp_flush: false);
753
754	if (fence)
755	*fence = radeon_fence_ref(fence: ib.fence);
756
757	radeon_ib_free(rdev, ib: &ib);
758	return r;
759	}
760
761	/*
762	* multiple fence commands without any stream commands in between can
763	* crash the vcpu so just try to emmit a dummy create/destroy msg to
764	* avoid this
765	*/
766	int radeon_uvd_get_create_msg(struct radeon_device rdev, int* ring,
767	uint32_t handle, struct radeon_fence **fence)
768	{
769	/ we use the last page of the vcpu bo for the UVD message /
770	uint64_t offs = radeon_bo_size(bo: rdev->uvd.vcpu_bo) -
771	RADEON_GPU_PAGE_SIZE;
772
773	uint32_t __iomem msg = (void* __iomem *)(rdev->uvd.cpu_addr + offs);
774	uint64_t addr = rdev->uvd.gpu_addr + offs;
775
776	int r, i;
777
778	r = radeon_bo_reserve(bo: rdev->uvd.vcpu_bo, no_intr: true);
779	if (r)
780	return r;
781
782	/ stitch together an UVD create msg /
783	writel(val: (__force u32)cpu_to_le32(`0x00000de4`), addr: &msg[`0`]);
784	writel(val: `0x0`, addr: (void __iomem *)&msg[`1`]);
785	writel(val: (__force u32)cpu_to_le32(handle), addr: &msg[`2`]);
786	writel(val: `0x0`, addr: &msg[`3`]);
787	writel(val: `0x0`, addr: &msg[`4`]);
788	writel(val: `0x0`, addr: &msg[`5`]);
789	writel(val: `0x0`, addr: &msg[`6`]);
790	writel(val: (__force u32)cpu_to_le32(`0x00000780`), addr: &msg[`7`]);
791	writel(val: (__force u32)cpu_to_le32(`0x00000440`), addr: &msg[`8`]);
792	writel(val: `0x0`, addr: &msg[`9`]);
793	writel(val: (__force u32)cpu_to_le32(`0x01b37000`), addr: &msg[`10`]);
794	for (i = `11`; i < `1024`; ++i)
795	writel(val: `0x0`, addr: &msg[i]);
796
797	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
798	radeon_bo_unreserve(bo: rdev->uvd.vcpu_bo);
799	return r;
800	}
801
802	int radeon_uvd_get_destroy_msg(struct radeon_device rdev, int* ring,
803	uint32_t handle, struct radeon_fence **fence)
804	{
805	/ we use the last page of the vcpu bo for the UVD message /
806	uint64_t offs = radeon_bo_size(bo: rdev->uvd.vcpu_bo) -
807	RADEON_GPU_PAGE_SIZE;
808
809	uint32_t __iomem msg = (void* __iomem *)(rdev->uvd.cpu_addr + offs);
810	uint64_t addr = rdev->uvd.gpu_addr + offs;
811
812	int r, i;
813
814	r = radeon_bo_reserve(bo: rdev->uvd.vcpu_bo, no_intr: true);
815	if (r)
816	return r;
817
818	/ stitch together an UVD destroy msg /
819	writel(val: (__force u32)cpu_to_le32(`0x00000de4`), addr: &msg[`0`]);
820	writel(val: (__force u32)cpu_to_le32(`0x00000002`), addr: &msg[`1`]);
821	writel(val: (__force u32)cpu_to_le32(handle), addr: &msg[`2`]);
822	writel(val: `0x0`, addr: &msg[`3`]);
823	for (i = `4`; i < `1024`; ++i)
824	writel(val: `0x0`, addr: &msg[i]);
825
826	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
827	radeon_bo_unreserve(bo: rdev->uvd.vcpu_bo);
828	return r;
829	}
830
831	/**
832	* radeon_uvd_count_handles - count number of open streams
833	*
834	* @rdev: radeon_device pointer
835	* @sd: number of SD streams
836	* @hd: number of HD streams
837	*
838	* Count the number of open SD/HD streams as a hint for power mangement
839	*/
840	static void radeon_uvd_count_handles(struct radeon_device *rdev,
841	unsigned sd, unsigned* *hd)
842	{
843	unsigned i;
844
845	*sd = `0`;
846	*hd = `0`;
847
848	for (i = `0`; i < rdev->uvd.max_handles; ++i) {
849	if (!atomic_read(v: &rdev->uvd.handles[i]))
850	continue;
851
852	if (rdev->uvd.img_size[i] >= `720`*`576`)
853	++(*hd);
854	else
855	++(*sd);
856	}
857	}
858
859	static void radeon_uvd_idle_work_handler(struct work_struct *work)
860	{
861	struct radeon_device *rdev =
862	container_of(work, struct radeon_device, uvd.idle_work.work);
863
864	if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == `0`) {
865	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
866	radeon_uvd_count_handles(rdev, sd: &rdev->pm.dpm.sd,
867	hd: &rdev->pm.dpm.hd);
868	radeon_dpm_enable_uvd(rdev, enable: false);
869	} else {
870	radeon_set_uvd_clocks(rdev, `0`, `0`);
871	}
872	} else {
873	schedule_delayed_work(dwork: &rdev->uvd.idle_work,
874	delay: msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
875	}
876	}
877
878	void radeon_uvd_note_usage(struct radeon_device *rdev)
879	{
880	bool streams_changed = false;
881	bool set_clocks = !cancel_delayed_work_sync(dwork: &rdev->uvd.idle_work);
882	set_clocks &= schedule_delayed_work(dwork: &rdev->uvd.idle_work,
883	delay: msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
884
885	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
886	unsigned hd = `0`, sd = `0`;
887	radeon_uvd_count_handles(rdev, sd: &sd, hd: &hd);
888	if ((rdev->pm.dpm.sd != sd) \|\|
889	(rdev->pm.dpm.hd != hd)) {
890	rdev->pm.dpm.sd = sd;
891	rdev->pm.dpm.hd = hd;
892	/ disable this for now /
893	/streams_changed = true;/
894	}
895	}
896
897	if (set_clocks \|\| streams_changed) {
898	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
899	radeon_dpm_enable_uvd(rdev, enable: true);
900	} else {
901	radeon_set_uvd_clocks(rdev, `53300`, `40000`);
902	}
903	}
904	}
905
906	static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq,
907	unsigned target_freq,
908	unsigned pd_min,
909	unsigned pd_even)
910	{
911	unsigned post_div = vco_freq / target_freq;
912
913	/ adjust to post divider minimum value /
914	if (post_div < pd_min)
915	post_div = pd_min;
916
917	/ we alway need a frequency less than or equal the target /
918	if ((vco_freq / post_div) > target_freq)
919	post_div += `1`;
920
921	/ post dividers above a certain value must be even /
922	if (post_div > pd_even && post_div % `2`)
923	post_div += `1`;
924
925	return post_div;
926	}
927
928	/**
929	* radeon_uvd_calc_upll_dividers - calc UPLL clock dividers
930	*
931	* @rdev: radeon_device pointer
932	* @vclk: wanted VCLK
933	* @dclk: wanted DCLK
934	* @vco_min: minimum VCO frequency
935	* @vco_max: maximum VCO frequency
936	* @fb_factor: factor to multiply vco freq with
937	* @fb_mask: limit and bitmask for feedback divider
938	* @pd_min: post divider minimum
939	* @pd_max: post divider maximum
940	* @pd_even: post divider must be even above this value
941	* @optimal_fb_div: resulting feedback divider
942	* @optimal_vclk_div: resulting vclk post divider
943	* @optimal_dclk_div: resulting dclk post divider
944	*
945	* Calculate dividers for UVDs UPLL (R6xx-SI, except APUs).
946	* Returns zero on success -EINVAL on error.
947	*/
948	int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
949	unsigned vclk, unsigned dclk,
950	unsigned vco_min, unsigned vco_max,
951	unsigned fb_factor, unsigned fb_mask,
952	unsigned pd_min, unsigned pd_max,
953	unsigned pd_even,
954	unsigned *optimal_fb_div,
955	unsigned *optimal_vclk_div,
956	unsigned *optimal_dclk_div)
957	{
958	unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq;
959
960	/ start off with something large /
961	unsigned optimal_score = ~`0`;
962
963	/ loop through vco from low to high /
964	vco_min = max3(vco_min, vclk, dclk);
965	for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += `100`) {
966
967	uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
968	unsigned vclk_div, dclk_div, score;
969
970	do_div(fb_div, ref_freq);
971
972	/ fb div out of range ? /
973	if (fb_div > fb_mask)
974	break; / it can oly get worse /
975
976	fb_div &= fb_mask;
977
978	/ calc vclk divider with current vco freq /
979	vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, target_freq: vclk,
980	pd_min, pd_even);
981	if (vclk_div > pd_max)
982	break; / vco is too big, it has to stop /
983
984	/ calc dclk divider with current vco freq /
985	dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, target_freq: dclk,
986	pd_min, pd_even);
987	if (dclk_div > pd_max)
988	break; / vco is too big, it has to stop /
989
990	/ calc score with current vco freq /
991	score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div);
992
993	/ determine if this vco setting is better than current optimal settings /
994	if (score < optimal_score) {
995	*optimal_fb_div = fb_div;
996	*optimal_vclk_div = vclk_div;
997	*optimal_dclk_div = dclk_div;
998	optimal_score = score;
999	if (optimal_score == `0`)
1000	break; / it can't get better than this /
1001	}
1002	}
1003
1004	/ did we found a valid setup ? /
1005	if (optimal_score == ~`0`)
1006	return -EINVAL;
1007
1008	return `0`;
1009	}
1010
1011	int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
1012	unsigned cg_upll_func_cntl)
1013	{
1014	unsigned i;
1015
1016	/ make sure UPLL_CTLREQ is deasserted /
1017	WREG32_P(cg_upll_func_cntl, `0`, ~UPLL_CTLREQ_MASK);
1018
1019	mdelay(`10`);
1020
1021	/ assert UPLL_CTLREQ /
1022	WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
1023
1024	/ wait for CTLACK and CTLACK2 to get asserted /
1025	for (i = `0`; i < `100`; ++i) {
1026	uint32_t mask = UPLL_CTLACK_MASK \| UPLL_CTLACK2_MASK;
1027	if ((RREG32(cg_upll_func_cntl) & mask) == mask)
1028	break;
1029	mdelay(`10`);
1030	}
1031
1032	/ deassert UPLL_CTLREQ /
1033	WREG32_P(cg_upll_func_cntl, `0`, ~UPLL_CTLREQ_MASK);
1034
1035	if (i == `100`) {
1036	DRM_ERROR("Timeout setting UVD clocks!\n");
1037	return -ETIMEDOUT;
1038	}
1039
1040	return `0`;
1041	}
1042

source code of linux/drivers/gpu/drm/radeon/radeon_uvd.c