intel_lrc.c source code [linux/drivers/gpu/drm/i915/gt/intel_lrc.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2014 Intel Corporation
4	*/
5
6	#include "gem/i915_gem_lmem.h"
7
8	#include "gen8_engine_cs.h"
9	#include "i915_drv.h"
10	#include "i915_perf.h"
11	#include "i915_reg.h"
12	#include "intel_context.h"
13	#include "intel_engine.h"
14	#include "intel_engine_regs.h"
15	#include "intel_gpu_commands.h"
16	#include "intel_gt.h"
17	#include "intel_gt_regs.h"
18	#include "intel_lrc.h"
19	#include "intel_lrc_reg.h"
20	#include "intel_ring.h"
21	#include "shmem_utils.h"
22
23	/*
24	* The per-platform tables are u8-encoded in @data. Decode @data and set the
25	* addresses' offset and commands in @regs. The following encoding is used
26	* for each byte. There are 2 steps: decoding commands and decoding addresses.
27	*
28	* Commands:
29	* [7]: create NOPs - number of NOPs are set in lower bits
30	* [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
31	* MI_LRI_FORCE_POSTED
32	* [5:0]: Number of NOPs or registers to set values to in case of
33	* MI_LOAD_REGISTER_IMM
34	*
35	* Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
36	* number of registers. They are set by using the REG/REG16 macros: the former
37	* is used for offsets smaller than 0x200 while the latter is for values bigger
38	* than that. Those macros already set all the bits documented below correctly:
39	*
40	* [7]: When a register offset needs more than 6 bits, use additional bytes, to
41	* follow, for the lower bits
42	* [6:0]: Register offset, without considering the engine base.
43	*
44	* This function only tweaks the commands and register offsets. Values are not
45	* filled out.
46	*/
47	static void set_offsets(u32 *regs,
48	const u8 *data,
49	const struct intel_engine_cs *engine,
50	bool close)
51	#define NOP(x) (BIT(7) \| (x))
52	#define LRI(count, flags) ((flags) << 6 \| (count) \| BUILD_BUG_ON_ZERO(count >= BIT(6)))
53	#define POSTED BIT(0)
54	#define REG(x) (((x) >> 2) \| BUILD_BUG_ON_ZERO(x >= 0x200))
55	#define REG16(x) \
56	(((x) >> 9) \| BIT(7) \| BUILD_BUG_ON_ZERO(x >= 0x10000)), \
57	(((x) >> 2) & 0x7f)
58	#define END 0
59	{
60	const u32 base = engine->mmio_base;
61
62	while (*data) {
63	u8 count, flags;
64
65	if (data & BIT(`7`)) { /* skip /
66	count = *data++ & ~BIT(`7`);
67	regs += count;
68	continue;
69	}
70
71	count = *data & `0x3f`;
72	flags = *data >> `6`;
73	data++;
74
75	*regs = MI_LOAD_REGISTER_IMM(count);
76	if (flags & POSTED)
77	*regs \|= MI_LRI_FORCE_POSTED;
78	if (GRAPHICS_VER(engine->i915) >= `11`)
79	*regs \|= MI_LRI_LRM_CS_MMIO;
80	regs++;
81
82	GEM_BUG_ON(!count);
83	do {
84	u32 offset = `0`;
85	u8 v;
86
87	do {
88	v = *data++;
89	offset <<= `7`;
90	offset \|= v & ~BIT(`7`);
91	} while (v & BIT(`7`));
92
93	regs[`0`] = base + (offset << `2`);
94	regs += `2`;
95	} while (--count);
96	}
97
98	if (close) {
99	/ Close the batch; used mainly by live_lrc_layout() /
100	*regs = MI_BATCH_BUFFER_END;
101	if (GRAPHICS_VER(engine->i915) >= `11`)
102	*regs \|= BIT(`0`);
103	}
104	}
105
106	static const u8 gen8_xcs_offsets[] = {
107	NOP(`1`),
108	LRI(`11`, `0`),
109	REG16(`0x244`),
110	REG(`0x034`),
111	REG(`0x030`),
112	REG(`0x038`),
113	REG(`0x03c`),
114	REG(`0x168`),
115	REG(`0x140`),
116	REG(`0x110`),
117	REG(`0x11c`),
118	REG(`0x114`),
119	REG(`0x118`),
120
121	NOP(`9`),
122	LRI(`9`, `0`),
123	REG16(`0x3a8`),
124	REG16(`0x28c`),
125	REG16(`0x288`),
126	REG16(`0x284`),
127	REG16(`0x280`),
128	REG16(`0x27c`),
129	REG16(`0x278`),
130	REG16(`0x274`),
131	REG16(`0x270`),
132
133	NOP(`13`),
134	LRI(`2`, `0`),
135	REG16(`0x200`),
136	REG(`0x028`),
137
138	END
139	};
140
141	static const u8 gen9_xcs_offsets[] = {
142	NOP(`1`),
143	LRI(`14`, POSTED),
144	REG16(`0x244`),
145	REG(`0x034`),
146	REG(`0x030`),
147	REG(`0x038`),
148	REG(`0x03c`),
149	REG(`0x168`),
150	REG(`0x140`),
151	REG(`0x110`),
152	REG(`0x11c`),
153	REG(`0x114`),
154	REG(`0x118`),
155	REG(`0x1c0`),
156	REG(`0x1c4`),
157	REG(`0x1c8`),
158
159	NOP(`3`),
160	LRI(`9`, POSTED),
161	REG16(`0x3a8`),
162	REG16(`0x28c`),
163	REG16(`0x288`),
164	REG16(`0x284`),
165	REG16(`0x280`),
166	REG16(`0x27c`),
167	REG16(`0x278`),
168	REG16(`0x274`),
169	REG16(`0x270`),
170
171	NOP(`13`),
172	LRI(`1`, POSTED),
173	REG16(`0x200`),
174
175	NOP(`13`),
176	LRI(`44`, POSTED),
177	REG(`0x028`),
178	REG(`0x09c`),
179	REG(`0x0c0`),
180	REG(`0x178`),
181	REG(`0x17c`),
182	REG16(`0x358`),
183	REG(`0x170`),
184	REG(`0x150`),
185	REG(`0x154`),
186	REG(`0x158`),
187	REG16(`0x41c`),
188	REG16(`0x600`),
189	REG16(`0x604`),
190	REG16(`0x608`),
191	REG16(`0x60c`),
192	REG16(`0x610`),
193	REG16(`0x614`),
194	REG16(`0x618`),
195	REG16(`0x61c`),
196	REG16(`0x620`),
197	REG16(`0x624`),
198	REG16(`0x628`),
199	REG16(`0x62c`),
200	REG16(`0x630`),
201	REG16(`0x634`),
202	REG16(`0x638`),
203	REG16(`0x63c`),
204	REG16(`0x640`),
205	REG16(`0x644`),
206	REG16(`0x648`),
207	REG16(`0x64c`),
208	REG16(`0x650`),
209	REG16(`0x654`),
210	REG16(`0x658`),
211	REG16(`0x65c`),
212	REG16(`0x660`),
213	REG16(`0x664`),
214	REG16(`0x668`),
215	REG16(`0x66c`),
216	REG16(`0x670`),
217	REG16(`0x674`),
218	REG16(`0x678`),
219	REG16(`0x67c`),
220	REG(`0x068`),
221
222	END
223	};
224
225	static const u8 gen12_xcs_offsets[] = {
226	NOP(`1`),
227	LRI(`13`, POSTED),
228	REG16(`0x244`),
229	REG(`0x034`),
230	REG(`0x030`),
231	REG(`0x038`),
232	REG(`0x03c`),
233	REG(`0x168`),
234	REG(`0x140`),
235	REG(`0x110`),
236	REG(`0x1c0`),
237	REG(`0x1c4`),
238	REG(`0x1c8`),
239	REG(`0x180`),
240	REG16(`0x2b4`),
241
242	NOP(`5`),
243	LRI(`9`, POSTED),
244	REG16(`0x3a8`),
245	REG16(`0x28c`),
246	REG16(`0x288`),
247	REG16(`0x284`),
248	REG16(`0x280`),
249	REG16(`0x27c`),
250	REG16(`0x278`),
251	REG16(`0x274`),
252	REG16(`0x270`),
253
254	END
255	};
256
257	static const u8 dg2_xcs_offsets[] = {
258	NOP(`1`),
259	LRI(`15`, POSTED),
260	REG16(`0x244`),
261	REG(`0x034`),
262	REG(`0x030`),
263	REG(`0x038`),
264	REG(`0x03c`),
265	REG(`0x168`),
266	REG(`0x140`),
267	REG(`0x110`),
268	REG(`0x1c0`),
269	REG(`0x1c4`),
270	REG(`0x1c8`),
271	REG(`0x180`),
272	REG16(`0x2b4`),
273	REG(`0x120`),
274	REG(`0x124`),
275
276	NOP(`1`),
277	LRI(`9`, POSTED),
278	REG16(`0x3a8`),
279	REG16(`0x28c`),
280	REG16(`0x288`),
281	REG16(`0x284`),
282	REG16(`0x280`),
283	REG16(`0x27c`),
284	REG16(`0x278`),
285	REG16(`0x274`),
286	REG16(`0x270`),
287
288	END
289	};
290
291	static const u8 gen8_rcs_offsets[] = {
292	NOP(`1`),
293	LRI(`14`, POSTED),
294	REG16(`0x244`),
295	REG(`0x034`),
296	REG(`0x030`),
297	REG(`0x038`),
298	REG(`0x03c`),
299	REG(`0x168`),
300	REG(`0x140`),
301	REG(`0x110`),
302	REG(`0x11c`),
303	REG(`0x114`),
304	REG(`0x118`),
305	REG(`0x1c0`),
306	REG(`0x1c4`),
307	REG(`0x1c8`),
308
309	NOP(`3`),
310	LRI(`9`, POSTED),
311	REG16(`0x3a8`),
312	REG16(`0x28c`),
313	REG16(`0x288`),
314	REG16(`0x284`),
315	REG16(`0x280`),
316	REG16(`0x27c`),
317	REG16(`0x278`),
318	REG16(`0x274`),
319	REG16(`0x270`),
320
321	NOP(`13`),
322	LRI(`1`, `0`),
323	REG(`0x0c8`),
324
325	END
326	};
327
328	static const u8 gen9_rcs_offsets[] = {
329	NOP(`1`),
330	LRI(`14`, POSTED),
331	REG16(`0x244`),
332	REG(`0x34`),
333	REG(`0x30`),
334	REG(`0x38`),
335	REG(`0x3c`),
336	REG(`0x168`),
337	REG(`0x140`),
338	REG(`0x110`),
339	REG(`0x11c`),
340	REG(`0x114`),
341	REG(`0x118`),
342	REG(`0x1c0`),
343	REG(`0x1c4`),
344	REG(`0x1c8`),
345
346	NOP(`3`),
347	LRI(`9`, POSTED),
348	REG16(`0x3a8`),
349	REG16(`0x28c`),
350	REG16(`0x288`),
351	REG16(`0x284`),
352	REG16(`0x280`),
353	REG16(`0x27c`),
354	REG16(`0x278`),
355	REG16(`0x274`),
356	REG16(`0x270`),
357
358	NOP(`13`),
359	LRI(`1`, `0`),
360	REG(`0xc8`),
361
362	NOP(`13`),
363	LRI(`44`, POSTED),
364	REG(`0x28`),
365	REG(`0x9c`),
366	REG(`0xc0`),
367	REG(`0x178`),
368	REG(`0x17c`),
369	REG16(`0x358`),
370	REG(`0x170`),
371	REG(`0x150`),
372	REG(`0x154`),
373	REG(`0x158`),
374	REG16(`0x41c`),
375	REG16(`0x600`),
376	REG16(`0x604`),
377	REG16(`0x608`),
378	REG16(`0x60c`),
379	REG16(`0x610`),
380	REG16(`0x614`),
381	REG16(`0x618`),
382	REG16(`0x61c`),
383	REG16(`0x620`),
384	REG16(`0x624`),
385	REG16(`0x628`),
386	REG16(`0x62c`),
387	REG16(`0x630`),
388	REG16(`0x634`),
389	REG16(`0x638`),
390	REG16(`0x63c`),
391	REG16(`0x640`),
392	REG16(`0x644`),
393	REG16(`0x648`),
394	REG16(`0x64c`),
395	REG16(`0x650`),
396	REG16(`0x654`),
397	REG16(`0x658`),
398	REG16(`0x65c`),
399	REG16(`0x660`),
400	REG16(`0x664`),
401	REG16(`0x668`),
402	REG16(`0x66c`),
403	REG16(`0x670`),
404	REG16(`0x674`),
405	REG16(`0x678`),
406	REG16(`0x67c`),
407	REG(`0x68`),
408
409	END
410	};
411
412	static const u8 gen11_rcs_offsets[] = {
413	NOP(`1`),
414	LRI(`15`, POSTED),
415	REG16(`0x244`),
416	REG(`0x034`),
417	REG(`0x030`),
418	REG(`0x038`),
419	REG(`0x03c`),
420	REG(`0x168`),
421	REG(`0x140`),
422	REG(`0x110`),
423	REG(`0x11c`),
424	REG(`0x114`),
425	REG(`0x118`),
426	REG(`0x1c0`),
427	REG(`0x1c4`),
428	REG(`0x1c8`),
429	REG(`0x180`),
430
431	NOP(`1`),
432	LRI(`9`, POSTED),
433	REG16(`0x3a8`),
434	REG16(`0x28c`),
435	REG16(`0x288`),
436	REG16(`0x284`),
437	REG16(`0x280`),
438	REG16(`0x27c`),
439	REG16(`0x278`),
440	REG16(`0x274`),
441	REG16(`0x270`),
442
443	LRI(`1`, POSTED),
444	REG(`0x1b0`),
445
446	NOP(`10`),
447	LRI(`1`, `0`),
448	REG(`0x0c8`),
449
450	END
451	};
452
453	static const u8 gen12_rcs_offsets[] = {
454	NOP(`1`),
455	LRI(`13`, POSTED),
456	REG16(`0x244`),
457	REG(`0x034`),
458	REG(`0x030`),
459	REG(`0x038`),
460	REG(`0x03c`),
461	REG(`0x168`),
462	REG(`0x140`),
463	REG(`0x110`),
464	REG(`0x1c0`),
465	REG(`0x1c4`),
466	REG(`0x1c8`),
467	REG(`0x180`),
468	REG16(`0x2b4`),
469
470	NOP(`5`),
471	LRI(`9`, POSTED),
472	REG16(`0x3a8`),
473	REG16(`0x28c`),
474	REG16(`0x288`),
475	REG16(`0x284`),
476	REG16(`0x280`),
477	REG16(`0x27c`),
478	REG16(`0x278`),
479	REG16(`0x274`),
480	REG16(`0x270`),
481
482	LRI(`3`, POSTED),
483	REG(`0x1b0`),
484	REG16(`0x5a8`),
485	REG16(`0x5ac`),
486
487	NOP(`6`),
488	LRI(`1`, `0`),
489	REG(`0x0c8`),
490	NOP(`3` + `9` + `1`),
491
492	LRI(`51`, POSTED),
493	REG16(`0x588`),
494	REG16(`0x588`),
495	REG16(`0x588`),
496	REG16(`0x588`),
497	REG16(`0x588`),
498	REG16(`0x588`),
499	REG(`0x028`),
500	REG(`0x09c`),
501	REG(`0x0c0`),
502	REG(`0x178`),
503	REG(`0x17c`),
504	REG16(`0x358`),
505	REG(`0x170`),
506	REG(`0x150`),
507	REG(`0x154`),
508	REG(`0x158`),
509	REG16(`0x41c`),
510	REG16(`0x600`),
511	REG16(`0x604`),
512	REG16(`0x608`),
513	REG16(`0x60c`),
514	REG16(`0x610`),
515	REG16(`0x614`),
516	REG16(`0x618`),
517	REG16(`0x61c`),
518	REG16(`0x620`),
519	REG16(`0x624`),
520	REG16(`0x628`),
521	REG16(`0x62c`),
522	REG16(`0x630`),
523	REG16(`0x634`),
524	REG16(`0x638`),
525	REG16(`0x63c`),
526	REG16(`0x640`),
527	REG16(`0x644`),
528	REG16(`0x648`),
529	REG16(`0x64c`),
530	REG16(`0x650`),
531	REG16(`0x654`),
532	REG16(`0x658`),
533	REG16(`0x65c`),
534	REG16(`0x660`),
535	REG16(`0x664`),
536	REG16(`0x668`),
537	REG16(`0x66c`),
538	REG16(`0x670`),
539	REG16(`0x674`),
540	REG16(`0x678`),
541	REG16(`0x67c`),
542	REG(`0x068`),
543	REG(`0x084`),
544	NOP(`1`),
545
546	END
547	};
548
549	static const u8 xehp_rcs_offsets[] = {
550	NOP(`1`),
551	LRI(`13`, POSTED),
552	REG16(`0x244`),
553	REG(`0x034`),
554	REG(`0x030`),
555	REG(`0x038`),
556	REG(`0x03c`),
557	REG(`0x168`),
558	REG(`0x140`),
559	REG(`0x110`),
560	REG(`0x1c0`),
561	REG(`0x1c4`),
562	REG(`0x1c8`),
563	REG(`0x180`),
564	REG16(`0x2b4`),
565
566	NOP(`5`),
567	LRI(`9`, POSTED),
568	REG16(`0x3a8`),
569	REG16(`0x28c`),
570	REG16(`0x288`),
571	REG16(`0x284`),
572	REG16(`0x280`),
573	REG16(`0x27c`),
574	REG16(`0x278`),
575	REG16(`0x274`),
576	REG16(`0x270`),
577
578	LRI(`3`, POSTED),
579	REG(`0x1b0`),
580	REG16(`0x5a8`),
581	REG16(`0x5ac`),
582
583	NOP(`6`),
584	LRI(`1`, `0`),
585	REG(`0x0c8`),
586
587	END
588	};
589
590	static const u8 dg2_rcs_offsets[] = {
591	NOP(`1`),
592	LRI(`15`, POSTED),
593	REG16(`0x244`),
594	REG(`0x034`),
595	REG(`0x030`),
596	REG(`0x038`),
597	REG(`0x03c`),
598	REG(`0x168`),
599	REG(`0x140`),
600	REG(`0x110`),
601	REG(`0x1c0`),
602	REG(`0x1c4`),
603	REG(`0x1c8`),
604	REG(`0x180`),
605	REG16(`0x2b4`),
606	REG(`0x120`),
607	REG(`0x124`),
608
609	NOP(`1`),
610	LRI(`9`, POSTED),
611	REG16(`0x3a8`),
612	REG16(`0x28c`),
613	REG16(`0x288`),
614	REG16(`0x284`),
615	REG16(`0x280`),
616	REG16(`0x27c`),
617	REG16(`0x278`),
618	REG16(`0x274`),
619	REG16(`0x270`),
620
621	LRI(`3`, POSTED),
622	REG(`0x1b0`),
623	REG16(`0x5a8`),
624	REG16(`0x5ac`),
625
626	NOP(`6`),
627	LRI(`1`, `0`),
628	REG(`0x0c8`),
629
630	END
631	};
632
633	static const u8 mtl_rcs_offsets[] = {
634	NOP(`1`),
635	LRI(`15`, POSTED),
636	REG16(`0x244`),
637	REG(`0x034`),
638	REG(`0x030`),
639	REG(`0x038`),
640	REG(`0x03c`),
641	REG(`0x168`),
642	REG(`0x140`),
643	REG(`0x110`),
644	REG(`0x1c0`),
645	REG(`0x1c4`),
646	REG(`0x1c8`),
647	REG(`0x180`),
648	REG16(`0x2b4`),
649	REG(`0x120`),
650	REG(`0x124`),
651
652	NOP(`1`),
653	LRI(`9`, POSTED),
654	REG16(`0x3a8`),
655	REG16(`0x28c`),
656	REG16(`0x288`),
657	REG16(`0x284`),
658	REG16(`0x280`),
659	REG16(`0x27c`),
660	REG16(`0x278`),
661	REG16(`0x274`),
662	REG16(`0x270`),
663
664	NOP(`2`),
665	LRI(`2`, POSTED),
666	REG16(`0x5a8`),
667	REG16(`0x5ac`),
668
669	NOP(`6`),
670	LRI(`1`, `0`),
671	REG(`0x0c8`),
672
673	END
674	};
675
676	#undef END
677	#undef REG16
678	#undef REG
679	#undef LRI
680	#undef NOP
681
682	static const u8 reg_offsets(const* struct intel_engine_cs *engine)
683	{
684	/*
685	* The gen12+ lists only have the registers we program in the basic
686	* default state. We rely on the context image using relative
687	* addressing to automatic fixup the register state between the
688	* physical engines for virtual engine.
689	*/
690	GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= `12` &&
691	!intel_engine_has_relative_mmio(engine));
692
693	if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
694	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(`12`, `70`))
695	return mtl_rcs_offsets;
696	else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(`12`, `55`))
697	return dg2_rcs_offsets;
698	else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(`12`, `50`))
699	return xehp_rcs_offsets;
700	else if (GRAPHICS_VER(engine->i915) >= `12`)
701	return gen12_rcs_offsets;
702	else if (GRAPHICS_VER(engine->i915) >= `11`)
703	return gen11_rcs_offsets;
704	else if (GRAPHICS_VER(engine->i915) >= `9`)
705	return gen9_rcs_offsets;
706	else
707	return gen8_rcs_offsets;
708	} else {
709	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(`12`, `55`))
710	return dg2_xcs_offsets;
711	else if (GRAPHICS_VER(engine->i915) >= `12`)
712	return gen12_xcs_offsets;
713	else if (GRAPHICS_VER(engine->i915) >= `9`)
714	return gen9_xcs_offsets;
715	else
716	return gen8_xcs_offsets;
717	}
718	}
719
720	static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
721	{
722	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(`12`, `50`))
723	return `0x70`;
724	else if (GRAPHICS_VER(engine->i915) >= `12`)
725	return `0x60`;
726	else if (GRAPHICS_VER(engine->i915) >= `9`)
727	return `0x54`;
728	else if (engine->class == RENDER_CLASS)
729	return `0x58`;
730	else
731	return -`1`;
732	}
733
734	static int lrc_ring_bb_offset(const struct intel_engine_cs *engine)
735	{
736	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(`12`, `50`))
737	return `0x80`;
738	else if (GRAPHICS_VER(engine->i915) >= `12`)
739	return `0x70`;
740	else if (GRAPHICS_VER(engine->i915) >= `9`)
741	return `0x64`;
742	else if (GRAPHICS_VER(engine->i915) >= `8` &&
743	engine->class == RENDER_CLASS)
744	return `0xc4`;
745	else
746	return -`1`;
747	}
748
749	static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
750	{
751	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(`12`, `50`))
752	return `0x84`;
753	else if (GRAPHICS_VER(engine->i915) >= `12`)
754	return `0x74`;
755	else if (GRAPHICS_VER(engine->i915) >= `9`)
756	return `0x68`;
757	else if (engine->class == RENDER_CLASS)
758	return `0xd8`;
759	else
760	return -`1`;
761	}
762
763	static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
764	{
765	if (GRAPHICS_VER(engine->i915) >= `12`)
766	return `0x12`;
767	else if (GRAPHICS_VER(engine->i915) >= `9` \|\| engine->class == RENDER_CLASS)
768	return `0x18`;
769	else
770	return -`1`;
771	}
772
773	static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
774	{
775	int x;
776
777	x = lrc_ring_wa_bb_per_ctx(engine);
778	if (x < `0`)
779	return x;
780
781	return x + `2`;
782	}
783
784	static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
785	{
786	int x;
787
788	x = lrc_ring_indirect_ptr(engine);
789	if (x < `0`)
790	return x;
791
792	return x + `2`;
793	}
794
795	static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
796	{
797
798	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(`12`, `50`))
799	/*
800	* Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
801	* simply to match the RCS context image layout.
802	*/
803	return `0xc6`;
804	else if (engine->class != RENDER_CLASS)
805	return -`1`;
806	else if (GRAPHICS_VER(engine->i915) >= `12`)
807	return `0xb6`;
808	else if (GRAPHICS_VER(engine->i915) >= `11`)
809	return `0xaa`;
810	else
811	return -`1`;
812	}
813
814	static u32
815	lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
816	{
817	if (GRAPHICS_VER(engine->i915) >= `12`)
818	return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
819	else if (GRAPHICS_VER(engine->i915) >= `11`)
820	return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
821	else if (GRAPHICS_VER(engine->i915) >= `9`)
822	return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
823	else if (GRAPHICS_VER(engine->i915) >= `8`)
824	return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
825
826	GEM_BUG_ON(GRAPHICS_VER(engine->i915) < `8`);
827
828	return `0`;
829	}
830
831	static void
832	lrc_setup_bb_per_ctx(u32 *regs,
833	const struct intel_engine_cs *engine,
834	u32 ctx_bb_ggtt_addr)
835	{
836	GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -`1`);
837	regs[lrc_ring_wa_bb_per_ctx(engine) + `1`] =
838	ctx_bb_ggtt_addr \|
839	PER_CTX_BB_FORCE \|
840	PER_CTX_BB_VALID;
841	}
842
843	static void
844	lrc_setup_indirect_ctx(u32 *regs,
845	const struct intel_engine_cs *engine,
846	u32 ctx_bb_ggtt_addr,
847	u32 size)
848	{
849	GEM_BUG_ON(!size);
850	GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
851	GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -`1`);
852	regs[lrc_ring_indirect_ptr(engine) + `1`] =
853	ctx_bb_ggtt_addr \| (size / CACHELINE_BYTES);
854
855	GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -`1`);
856	regs[lrc_ring_indirect_offset(engine) + `1`] =
857	lrc_ring_indirect_offset_default(engine) << `6`;
858	}
859
860	static bool ctx_needs_runalone(const struct intel_context *ce)
861	{
862	struct i915_gem_context *gem_ctx;
863	bool ctx_is_protected = false;
864
865	/*
866	* On MTL and newer platforms, protected contexts require setting
867	* the LRC run-alone bit or else the encryption will not happen.
868	*/
869	if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(`12`, `70`) &&
870	(ce->engine->class == COMPUTE_CLASS \|\| ce->engine->class == RENDER_CLASS)) {
871	rcu_read_lock();
872	gem_ctx = rcu_dereference(ce->gem_context);
873	if (gem_ctx)
874	ctx_is_protected = gem_ctx->uses_protected_content;
875	rcu_read_unlock();
876	}
877
878	return ctx_is_protected;
879	}
880
881	static void init_common_regs(u32 * const regs,
882	const struct intel_context *ce,
883	const struct intel_engine_cs *engine,
884	bool inhibit)
885	{
886	u32 ctl;
887	int loc;
888
889	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
890	ctl \|= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
891	if (inhibit)
892	ctl \|= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
893	if (GRAPHICS_VER(engine->i915) < `11`)
894	ctl \|= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT \|
895	CTX_CTRL_RS_CTX_ENABLE);
896	if (ctx_needs_runalone(ce))
897	ctl \|= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE);
898	regs[CTX_CONTEXT_CONTROL] = ctl;
899
900	regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
901
902	loc = lrc_ring_bb_offset(engine);
903	if (loc != -`1`)
904	regs[loc + `1`] = `0`;
905	}
906
907	static void init_wa_bb_regs(u32 * const regs,
908	const struct intel_engine_cs *engine)
909	{
910	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
911
912	if (wa_ctx->per_ctx.size) {
913	const u32 ggtt_offset = i915_ggtt_offset(vma: wa_ctx->vma);
914
915	GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -`1`);
916	regs[lrc_ring_wa_bb_per_ctx(engine) + `1`] =
917	(ggtt_offset + wa_ctx->per_ctx.offset) \| `0x01`;
918	}
919
920	if (wa_ctx->indirect_ctx.size) {
921	lrc_setup_indirect_ctx(regs, engine,
922	ctx_bb_ggtt_addr: i915_ggtt_offset(vma: wa_ctx->vma) +
923	wa_ctx->indirect_ctx.offset,
924	size: wa_ctx->indirect_ctx.size);
925	}
926	}
927
928	static void init_ppgtt_regs(u32 regs, const* struct i915_ppgtt *ppgtt)
929	{
930	if (i915_vm_is_4lvl(vm: &ppgtt->vm)) {
931	/ 64b PPGTT (48bit canonical)*
932	* PDP0_DESCRIPTOR contains the base address to PML4 and
933	* other PDP Descriptors are ignored.
934	*/
935	ASSIGN_CTX_PML4(ppgtt, regs);
936	} else {
937	ASSIGN_CTX_PDP(ppgtt, regs, `3`);
938	ASSIGN_CTX_PDP(ppgtt, regs, `2`);
939	ASSIGN_CTX_PDP(ppgtt, regs, `1`);
940	ASSIGN_CTX_PDP(ppgtt, regs, `0`);
941	}
942	}
943
944	static struct i915_ppgtt vm_alias(struct* i915_address_space *vm)
945	{
946	if (i915_is_ggtt(vm))
947	return i915_vm_to_ggtt(vm)->alias;
948	else
949	return i915_vm_to_ppgtt(vm);
950	}
951
952	static void __reset_stop_ring(u32 regs, const* struct intel_engine_cs *engine)
953	{
954	int x;
955
956	x = lrc_ring_mi_mode(engine);
957	if (x != -`1`) {
958	regs[x + `1`] &= ~STOP_RING;
959	regs[x + `1`] \|= STOP_RING << `16`;
960	}
961	}
962
963	static void __lrc_init_regs(u32 *regs,
964	const struct intel_context *ce,
965	const struct intel_engine_cs *engine,
966	bool inhibit)
967	{
968	/*
969	* A context is actually a big batch buffer with several
970	* MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
971	* values we are setting here are only for the first context restore:
972	* on a subsequent save, the GPU will recreate this batchbuffer with new
973	* values (including all the missing MI_LOAD_REGISTER_IMM commands that
974	* we are not initializing here).
975	*
976	* Must keep consistent with virtual_update_register_offsets().
977	*/
978
979	if (inhibit)
980	memset(regs, `0`, PAGE_SIZE);
981
982	set_offsets(regs, data: reg_offsets(engine), engine, close: inhibit);
983
984	init_common_regs(regs, ce, engine, inhibit);
985	init_ppgtt_regs(regs, ppgtt: vm_alias(vm: ce->vm));
986
987	init_wa_bb_regs(regs, engine);
988
989	__reset_stop_ring(regs, engine);
990	}
991
992	void lrc_init_regs(const struct intel_context *ce,
993	const struct intel_engine_cs *engine,
994	bool inhibit)
995	{
996	__lrc_init_regs(regs: ce->lrc_reg_state, ce, engine, inhibit);
997	}
998
999	void lrc_reset_regs(const struct intel_context *ce,
1000	const struct intel_engine_cs *engine)
1001	{
1002	__reset_stop_ring(regs: ce->lrc_reg_state, engine);
1003	}
1004
1005	static void
1006	set_redzone(void vaddr, const* struct intel_engine_cs *engine)
1007	{
1008	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1009	return;
1010
1011	vaddr += engine->context_size;
1012
1013	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
1014	}
1015
1016	static void
1017	check_redzone(const void vaddr, const* struct intel_engine_cs *engine)
1018	{
1019	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1020	return;
1021
1022	vaddr += engine->context_size;
1023
1024	if (memchr_inv(p: vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
1025	drm_err_once(&engine->i915->drm,
1026	"%s context redzone overwritten!\n",
1027	engine->name);
1028	}
1029
1030	static u32 context_wa_bb_offset(const struct intel_context *ce)
1031	{
1032	return PAGE_SIZE * ce->wa_bb_page;
1033	}
1034
1035	/*
1036	* per_ctx below determines which WABB section is used.
1037	* When true, the function returns the location of the
1038	* PER_CTX_BB. When false, the function returns the
1039	* location of the INDIRECT_CTX.
1040	*/
1041	static u32 context_wabb(const* struct intel_context *ce, bool per_ctx)
1042	{
1043	void *ptr;
1044
1045	GEM_BUG_ON(!ce->wa_bb_page);
1046
1047	ptr = ce->lrc_reg_state;
1048	ptr -= LRC_STATE_OFFSET; / back to start of context image /
1049	ptr += context_wa_bb_offset(ce);
1050	ptr += per_ctx ? PAGE_SIZE : `0`;
1051
1052	return ptr;
1053	}
1054
1055	void lrc_init_state(struct intel_context *ce,
1056	struct intel_engine_cs *engine,
1057	void *state)
1058	{
1059	bool inhibit = true;
1060
1061	set_redzone(vaddr: state, engine);
1062
1063	if (engine->default_state) {
1064	shmem_read(file: engine->default_state, off: `0`,
1065	dst: state, len: engine->context_size);
1066	__set_bit(CONTEXT_VALID_BIT, &ce->flags);
1067	inhibit = false;
1068	}
1069
1070	/ Clear the ppHWSP (inc. per-context counters) /
1071	memset(state, `0`, PAGE_SIZE);
1072
1073	/ Clear the indirect wa and storage /
1074	if (ce->wa_bb_page)
1075	memset(state + context_wa_bb_offset(ce), `0`, PAGE_SIZE);
1076
1077	/*
1078	* The second page of the context object contains some registers which
1079	* must be set up prior to the first execution.
1080	*/
1081	__lrc_init_regs(regs: state + LRC_STATE_OFFSET, ce, engine, inhibit);
1082	}
1083
1084	u32 lrc_indirect_bb(const struct intel_context *ce)
1085	{
1086	return i915_ggtt_offset(vma: ce->state) + context_wa_bb_offset(ce);
1087	}
1088
1089	static u32 setup_predicate_disable_wa(const* struct intel_context ce, u32 cs)
1090	{
1091	/ If predication is active, this will be noop'ed /
1092	*cs++ = MI_STORE_DWORD_IMM_GEN4 \| MI_USE_GGTT \| (`4` - `2`);
1093	*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1094	*cs++ = `0`;
1095	cs++ = `0`; /* No predication /
1096
1097	/ predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear /
1098	*cs++ = MI_BATCH_BUFFER_END \| BIT(`15`);
1099	*cs++ = MI_SET_PREDICATE \| MI_SET_PREDICATE_DISABLE;
1100
1101	/ Instructions are no longer predicated (disabled), we can proceed /
1102	*cs++ = MI_STORE_DWORD_IMM_GEN4 \| MI_USE_GGTT \| (`4` - `2`);
1103	*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1104	*cs++ = `0`;
1105	cs++ = `1`; /* enable predication before the next BB /
1106
1107	*cs++ = MI_BATCH_BUFFER_END;
1108	GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
1109
1110	return cs;
1111	}
1112
1113	static struct i915_vma *
1114	__lrc_alloc_state(struct intel_context ce, struct* intel_engine_cs *engine)
1115	{
1116	struct drm_i915_gem_object *obj;
1117	struct i915_vma *vma;
1118	u32 context_size;
1119
1120	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
1121
1122	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1123	context_size += I915_GTT_PAGE_SIZE; / for redzone /
1124
1125	if (GRAPHICS_VER(engine->i915) >= `12`) {
1126	ce->wa_bb_page = context_size / PAGE_SIZE;
1127	/ INDIRECT_CTX and PER_CTX_BB need separate pages. /
1128	context_size += PAGE_SIZE * `2`;
1129	}
1130
1131	if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
1132	ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
1133	context_size += PARENT_SCRATCH_SIZE;
1134	}
1135
1136	obj = i915_gem_object_create_lmem(i915: engine->i915, size: context_size,
1137	I915_BO_ALLOC_PM_VOLATILE);
1138	if (IS_ERR(ptr: obj)) {
1139	obj = i915_gem_object_create_shmem(i915: engine->i915, size: context_size);
1140	if (IS_ERR(ptr: obj))
1141	return ERR_CAST(ptr: obj);
1142
1143	/*
1144	* Wa_22016122933: For Media version 13.0, all Media GT shared
1145	* memory needs to be mapped as WC on CPU side and UC (PAT
1146	* index 2) on GPU side.
1147	*/
1148	if (intel_gt_needs_wa_22016122933(gt: engine->gt))
1149	i915_gem_object_set_cache_coherency(obj, cache_level: I915_CACHE_NONE);
1150	}
1151
1152	vma = i915_vma_instance(obj, vm: &engine->gt->ggtt->vm, NULL);
1153	if (IS_ERR(ptr: vma)) {
1154	i915_gem_object_put(obj);
1155	return vma;
1156	}
1157
1158	return vma;
1159	}
1160
1161	static struct intel_timeline *
1162	pinned_timeline(struct intel_context ce, struct* intel_engine_cs *engine)
1163	{
1164	struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
1165
1166	return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
1167	}
1168
1169	int lrc_alloc(struct intel_context ce, struct* intel_engine_cs *engine)
1170	{
1171	struct intel_ring *ring;
1172	struct i915_vma *vma;
1173	int err;
1174
1175	GEM_BUG_ON(ce->state);
1176
1177	vma = __lrc_alloc_state(ce, engine);
1178	if (IS_ERR(ptr: vma))
1179	return PTR_ERR(ptr: vma);
1180
1181	ring = intel_engine_create_ring(engine, size: ce->ring_size);
1182	if (IS_ERR(ptr: ring)) {
1183	err = PTR_ERR(ptr: ring);
1184	goto err_vma;
1185	}
1186
1187	if (!page_mask_bits(ce->timeline)) {
1188	struct intel_timeline *tl;
1189
1190	/*
1191	* Use the static global HWSP for the kernel context, and
1192	* a dynamically allocated cacheline for everyone else.
1193	*/
1194	if (unlikely(ce->timeline))
1195	tl = pinned_timeline(ce, engine);
1196	else
1197	tl = intel_timeline_create(gt: engine->gt);
1198	if (IS_ERR(ptr: tl)) {
1199	err = PTR_ERR(ptr: tl);
1200	goto err_ring;
1201	}
1202
1203	ce->timeline = tl;
1204	}
1205
1206	ce->ring = ring;
1207	ce->state = vma;
1208
1209	return `0`;
1210
1211	err_ring:
1212	intel_ring_put(ring);
1213	err_vma:
1214	i915_vma_put(vma);
1215	return err;
1216	}
1217
1218	void lrc_reset(struct intel_context *ce)
1219	{
1220	GEM_BUG_ON(!intel_context_is_pinned(ce));
1221
1222	intel_ring_reset(ring: ce->ring, tail: ce->ring->emit);
1223
1224	/ Scrub away the garbage /
1225	lrc_init_regs(ce, engine: ce->engine, inhibit: true);
1226	ce->lrc.lrca = lrc_update_regs(ce, engine: ce->engine, head: ce->ring->tail);
1227	}
1228
1229	int
1230	lrc_pre_pin(struct intel_context *ce,
1231	struct intel_engine_cs *engine,
1232	struct i915_gem_ww_ctx *ww,
1233	void **vaddr)
1234	{
1235	GEM_BUG_ON(!ce->state);
1236	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
1237
1238	*vaddr = i915_gem_object_pin_map(obj: ce->state->obj,
1239	type: intel_gt_coherent_map_type(gt: ce->engine->gt,
1240	obj: ce->state->obj,
1241	always_coherent: false) \|
1242	I915_MAP_OVERRIDE);
1243
1244	return PTR_ERR_OR_ZERO(ptr: *vaddr);
1245	}
1246
1247	int
1248	lrc_pin(struct intel_context *ce,
1249	struct intel_engine_cs *engine,
1250	void *vaddr)
1251	{
1252	ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
1253
1254	if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
1255	lrc_init_state(ce, engine, state: vaddr);
1256
1257	ce->lrc.lrca = lrc_update_regs(ce, engine, head: ce->ring->tail);
1258	return `0`;
1259	}
1260
1261	void lrc_unpin(struct intel_context *ce)
1262	{
1263	if (unlikely(ce->parallel.last_rq)) {
1264	i915_request_put(rq: ce->parallel.last_rq);
1265	ce->parallel.last_rq = NULL;
1266	}
1267	check_redzone(vaddr: (void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
1268	engine: ce->engine);
1269	}
1270
1271	void lrc_post_unpin(struct intel_context *ce)
1272	{
1273	i915_gem_object_unpin_map(obj: ce->state->obj);
1274	}
1275
1276	void lrc_fini(struct intel_context *ce)
1277	{
1278	if (!ce->state)
1279	return;
1280
1281	intel_ring_put(fetch_and_zero(&ce->ring));
1282	i915_vma_put(fetch_and_zero(&ce->state));
1283	}
1284
1285	void lrc_destroy(struct kref *kref)
1286	{
1287	struct intel_context ce = container_of(kref, typeof(ce), ref);
1288
1289	GEM_BUG_ON(!i915_active_is_idle(&ce->active));
1290	GEM_BUG_ON(intel_context_is_pinned(ce));
1291
1292	lrc_fini(ce);
1293
1294	intel_context_fini(ce);
1295	intel_context_free(ce);
1296	}
1297
1298	static u32 *
1299	gen12_emit_timestamp_wa(const struct intel_context ce, u32 cs)
1300	{
1301	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 \|
1302	MI_SRM_LRM_GLOBAL_GTT \|
1303	MI_LRI_LRM_CS_MMIO;
1304	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(`0`, `0`));
1305	*cs++ = i915_ggtt_offset(vma: ce->state) + LRC_STATE_OFFSET +
1306	CTX_TIMESTAMP * sizeof(u32);
1307	*cs++ = `0`;
1308
1309	*cs++ = MI_LOAD_REGISTER_REG \|
1310	MI_LRR_SOURCE_CS_MMIO \|
1311	MI_LRI_LRM_CS_MMIO;
1312	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(`0`, `0`));
1313	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(`0`));
1314
1315	*cs++ = MI_LOAD_REGISTER_REG \|
1316	MI_LRR_SOURCE_CS_MMIO \|
1317	MI_LRI_LRM_CS_MMIO;
1318	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(`0`, `0`));
1319	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(`0`));
1320
1321	return cs;
1322	}
1323
1324	static u32 *
1325	gen12_emit_restore_scratch(const struct intel_context ce, u32 cs)
1326	{
1327	GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -`1`);
1328
1329	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 \|
1330	MI_SRM_LRM_GLOBAL_GTT \|
1331	MI_LRI_LRM_CS_MMIO;
1332	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(`0`, `0`));
1333	*cs++ = i915_ggtt_offset(vma: ce->state) + LRC_STATE_OFFSET +
1334	(lrc_ring_gpr0(engine: ce->engine) + `1`) * sizeof(u32);
1335	*cs++ = `0`;
1336
1337	return cs;
1338	}
1339
1340	static u32 *
1341	gen12_emit_cmd_buf_wa(const struct intel_context ce, u32 cs)
1342	{
1343	GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -`1`);
1344
1345	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 \|
1346	MI_SRM_LRM_GLOBAL_GTT \|
1347	MI_LRI_LRM_CS_MMIO;
1348	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(`0`, `0`));
1349	*cs++ = i915_ggtt_offset(vma: ce->state) + LRC_STATE_OFFSET +
1350	(lrc_ring_cmd_buf_cctl(engine: ce->engine) + `1`) * sizeof(u32);
1351	*cs++ = `0`;
1352
1353	*cs++ = MI_LOAD_REGISTER_REG \|
1354	MI_LRR_SOURCE_CS_MMIO \|
1355	MI_LRI_LRM_CS_MMIO;
1356	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(`0`, `0`));
1357	*cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(`0`));
1358
1359	return cs;
1360	}
1361
1362	/*
1363	* The bspec's tuning guide asks us to program a vertical watermark value of
1364	* 0x3FF. However this register is not saved/restored properly by the
1365	* hardware, so we're required to apply the desired value via INDIRECT_CTX
1366	* batch buffer to ensure the value takes effect properly. All other bits
1367	* in this register should remain at 0 (the hardware default).
1368	*/
1369	static u32 *
1370	dg2_emit_draw_watermark_setting(u32 *cs)
1371	{
1372	*cs++ = MI_LOAD_REGISTER_IMM(`1`);
1373	*cs++ = i915_mmio_reg_offset(DRAW_WATERMARK);
1374	*cs++ = REG_FIELD_PREP(VERT_WM_VAL, `0x3FF`);
1375
1376	return cs;
1377	}
1378
1379	static u32 *
1380	gen12_invalidate_state_cache(u32 *cs)
1381	{
1382	*cs++ = MI_LOAD_REGISTER_IMM(`1`);
1383	*cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2);
1384	*cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
1385	return cs;
1386	}
1387
1388	static u32 *
1389	gen12_emit_indirect_ctx_rcs(const struct intel_context ce, u32 cs)
1390	{
1391	cs = gen12_emit_timestamp_wa(ce, cs);
1392	cs = gen12_emit_cmd_buf_wa(ce, cs);
1393	cs = gen12_emit_restore_scratch(ce, cs);
1394
1395	/ Wa_16013000631:dg2 /
1396	if (IS_DG2_G11(ce->engine->i915))
1397	cs = gen8_emit_pipe_control(batch: cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, offset: `0`);
1398
1399	cs = gen12_emit_aux_table_inv(engine: ce->engine, cs);
1400
1401	/ Wa_18022495364 /
1402	if (IS_GFX_GT_IP_RANGE(ce->engine->gt, IP_VER(`12`, `0`), IP_VER(`12`, `10`)))
1403	cs = gen12_invalidate_state_cache(cs);
1404
1405	/ Wa_16014892111 /
1406	if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(`12`, `70`), STEP_A0, STEP_B0) \|\|
1407	IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(`12`, `71`), STEP_A0, STEP_B0) \|\|
1408	IS_DG2(ce->engine->i915))
1409	cs = dg2_emit_draw_watermark_setting(cs);
1410
1411	return cs;
1412	}
1413
1414	static u32 *
1415	gen12_emit_indirect_ctx_xcs(const struct intel_context ce, u32 cs)
1416	{
1417	cs = gen12_emit_timestamp_wa(ce, cs);
1418	cs = gen12_emit_restore_scratch(ce, cs);
1419
1420	/ Wa_16013000631:dg2 /
1421	if (IS_DG2_G11(ce->engine->i915))
1422	if (ce->engine->class == COMPUTE_CLASS)
1423	cs = gen8_emit_pipe_control(batch: cs,
1424	PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
1425	offset: `0`);
1426
1427	return gen12_emit_aux_table_inv(engine: ce->engine, cs);
1428	}
1429
1430	static u32 xehp_emit_fastcolor_blt_wabb(const* struct intel_context ce, u32 cs)
1431	{
1432	struct intel_gt *gt = ce->engine->gt;
1433	int mocs = gt->mocs.uc_index << `1`;
1434
1435	/**
1436	* Wa_16018031267 / Wa_16018063123 requires that SW forces the
1437	* main copy engine arbitration into round robin mode. We
1438	* additionally need to submit the following WABB blt command
1439	* to produce 4 subblits with each subblit generating 0 byte
1440	* write requests as WABB:
1441	*
1442	* XY_FASTCOLOR_BLT
1443	* BG0 -> 5100000E
1444	* BG1 -> 0000003F (Dest pitch)
1445	* BG2 -> 00000000 (X1, Y1) = (0, 0)
1446	* BG3 -> 00040001 (X2, Y2) = (1, 4)
1447	* BG4 -> scratch
1448	* BG5 -> scratch
1449	* BG6-12 -> 00000000
1450	* BG13 -> 20004004 (Surf. Width= 2,Surf. Height = 5 )
1451	* BG14 -> 00000010 (Qpitch = 4)
1452	* BG15 -> 00000000
1453	*/
1454	*cs++ = XY_FAST_COLOR_BLT_CMD \| (`16` - `2`);
1455	*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) \| `0x3f`;
1456	*cs++ = `0`;
1457	*cs++ = `4` << `16` \| `1`;
1458	*cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
1459	*cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
1460	*cs++ = `0`;
1461	*cs++ = `0`;
1462	*cs++ = `0`;
1463	*cs++ = `0`;
1464	*cs++ = `0`;
1465	*cs++ = `0`;
1466	*cs++ = `0`;
1467	*cs++ = `0x20004004`;
1468	*cs++ = `0x10`;
1469	*cs++ = `0`;
1470
1471	return cs;
1472	}
1473
1474	static u32 *
1475	xehp_emit_per_ctx_bb(const struct intel_context ce, u32 cs)
1476	{
1477	/ Wa_16018031267, Wa_16018063123 /
1478	if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine))
1479	cs = xehp_emit_fastcolor_blt_wabb(ce, cs);
1480
1481	return cs;
1482	}
1483
1484	static void
1485	setup_per_ctx_bb(const struct intel_context *ce,
1486	const struct intel_engine_cs *engine,
1487	u32 (emit)(const struct intel_context , u32 ))
1488	{
1489	/ Place PER_CTX_BB on next page after INDIRECT_CTX /
1490	u32 * const start = context_wabb(ce, per_ctx: true);
1491	u32 *cs;
1492
1493	cs = emit(ce, start);
1494
1495	/ PER_CTX_BB must manually terminate /
1496	*cs++ = MI_BATCH_BUFFER_END;
1497
1498	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1499	lrc_setup_bb_per_ctx(regs: ce->lrc_reg_state, engine,
1500	ctx_bb_ggtt_addr: lrc_indirect_bb(ce) + PAGE_SIZE);
1501	}
1502
1503	static void
1504	setup_indirect_ctx_bb(const struct intel_context *ce,
1505	const struct intel_engine_cs *engine,
1506	u32 (emit)(const struct intel_context , u32 ))
1507	{
1508	u32 * const start = context_wabb(ce, per_ctx: false);
1509	u32 *cs;
1510
1511	cs = emit(ce, start);
1512	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1513	while ((unsigned long)cs % CACHELINE_BYTES)
1514	*cs++ = MI_NOOP;
1515
1516	GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
1517	setup_predicate_disable_wa(ce, cs: start + DG2_PREDICATE_RESULT_BB / sizeof(*start));
1518
1519	lrc_setup_indirect_ctx(regs: ce->lrc_reg_state, engine,
1520	ctx_bb_ggtt_addr: lrc_indirect_bb(ce),
1521	size: (cs - start) * sizeof(*cs));
1522	}
1523
1524	/*
1525	* The context descriptor encodes various attributes of a context,
1526	* including its GTT address and some flags. Because it's fairly
1527	* expensive to calculate, we'll just do it once and cache the result,
1528	* which remains valid until the context is unpinned.
1529	*
1530	* This is what a descriptor looks like, from LSB to MSB::
1531	*
1532	* bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
1533	* bits 12-31: LRCA, GTT address of (the HWSP of) this context
1534	* bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
1535	* bits 53-54: mbz, reserved for use by hardware
1536	* bits 55-63: group ID, currently unused and set to 0
1537	*
1538	* Starting from Gen11, the upper dword of the descriptor has a new format:
1539	*
1540	* bits 32-36: reserved
1541	* bits 37-47: SW context ID
1542	* bits 48:53: engine instance
1543	* bit 54: mbz, reserved for use by hardware
1544	* bits 55-60: SW counter
1545	* bits 61-63: engine class
1546	*
1547	* On Xe_HP, the upper dword of the descriptor has a new format:
1548	*
1549	* bits 32-37: virtual function number
1550	* bit 38: mbz, reserved for use by hardware
1551	* bits 39-54: SW context ID
1552	* bits 55-57: reserved
1553	* bits 58-63: SW counter
1554	*
1555	* engine info, SW context ID and SW counter need to form a unique number
1556	* (Context ID) per lrc.
1557	*/
1558	static u32 lrc_descriptor(const struct intel_context *ce)
1559	{
1560	u32 desc;
1561
1562	desc = INTEL_LEGACY_32B_CONTEXT;
1563	if (i915_vm_is_4lvl(vm: ce->vm))
1564	desc = INTEL_LEGACY_64B_CONTEXT;
1565	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
1566
1567	desc \|= GEN8_CTX_VALID \| GEN8_CTX_PRIVILEGE;
1568	if (GRAPHICS_VER(ce->vm->i915) == `8`)
1569	desc \|= GEN8_CTX_L3LLC_COHERENT;
1570
1571	return i915_ggtt_offset(vma: ce->state) \| desc;
1572	}
1573
1574	u32 lrc_update_regs(const struct intel_context *ce,
1575	const struct intel_engine_cs *engine,
1576	u32 head)
1577	{
1578	struct intel_ring *ring = ce->ring;
1579	u32 *regs = ce->lrc_reg_state;
1580
1581	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1582	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1583
1584	regs[CTX_RING_START] = i915_ggtt_offset(vma: ring->vma);
1585	regs[CTX_RING_HEAD] = head;
1586	regs[CTX_RING_TAIL] = ring->tail;
1587	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) \| RING_VALID;
1588
1589	/ RPCS /
1590	if (engine->class == RENDER_CLASS) {
1591	regs[CTX_R_PWR_CLK_STATE] =
1592	intel_sseu_make_rpcs(gt: engine->gt, req_sseu: &ce->sseu);
1593
1594	i915_oa_init_reg_state(ce, engine);
1595	}
1596
1597	if (ce->wa_bb_page) {
1598	u32 (fn)(const struct intel_context ce, u32 cs);
1599
1600	fn = gen12_emit_indirect_ctx_xcs;
1601	if (ce->engine->class == RENDER_CLASS)
1602	fn = gen12_emit_indirect_ctx_rcs;
1603
1604	/ Mutually exclusive wrt to global indirect bb /
1605	GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1606	setup_indirect_ctx_bb(ce, engine, emit: fn);
1607	setup_per_ctx_bb(ce, engine, emit: xehp_emit_per_ctx_bb);
1608	}
1609
1610	return lrc_descriptor(ce) \| CTX_DESC_FORCE_RESTORE;
1611	}
1612
1613	void lrc_update_offsets(struct intel_context *ce,
1614	struct intel_engine_cs *engine)
1615	{
1616	set_offsets(regs: ce->lrc_reg_state, data: reg_offsets(engine), engine, close: false);
1617	}
1618
1619	void lrc_check_regs(const struct intel_context *ce,
1620	const struct intel_engine_cs *engine,
1621	const char *when)
1622	{
1623	const struct intel_ring *ring = ce->ring;
1624	u32 *regs = ce->lrc_reg_state;
1625	bool valid = true;
1626	int x;
1627
1628	if (regs[CTX_RING_START] != i915_ggtt_offset(vma: ring->vma)) {
1629	pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1630	engine->name,
1631	regs[CTX_RING_START],
1632	i915_ggtt_offset(ring->vma));
1633	regs[CTX_RING_START] = i915_ggtt_offset(vma: ring->vma);
1634	valid = false;
1635	}
1636
1637	if ((regs[CTX_RING_CTL] & ~(RING_WAIT \| RING_WAIT_SEMAPHORE)) !=
1638	(RING_CTL_SIZE(ring->size) \| RING_VALID)) {
1639	pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1640	engine->name,
1641	regs[CTX_RING_CTL],
1642	(u32)(RING_CTL_SIZE(ring->size) \| RING_VALID));
1643	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) \| RING_VALID;
1644	valid = false;
1645	}
1646
1647	x = lrc_ring_mi_mode(engine);
1648	if (x != -`1` && regs[x + `1`] & (regs[x + `1`] >> `16`) & STOP_RING) {
1649	pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1650	engine->name, regs[x + `1`]);
1651	regs[x + `1`] &= ~STOP_RING;
1652	regs[x + `1`] \|= STOP_RING << `16`;
1653	valid = false;
1654	}
1655
1656	WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1657	}
1658
1659	/*
1660	* In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1661	* PIPE_CONTROL instruction. This is required for the flush to happen correctly
1662	* but there is a slight complication as this is applied in WA batch where the
1663	* values are only initialized once so we cannot take register value at the
1664	* beginning and reuse it further; hence we save its value to memory, upload a
1665	* constant value with bit21 set and then we restore it back with the saved value.
1666	* To simplify the WA, a constant value is formed by using the default value
1667	* of this register. This shouldn't be a problem because we are only modifying
1668	* it for a short period and this batch in non-premptible. We can ofcourse
1669	* use additional instructions that read the actual value of the register
1670	* at that time and set our bit of interest but it makes the WA complicated.
1671	*
1672	* This WA is also required for Gen9 so extracting as a function avoids
1673	* code duplication.
1674	*/
1675	static u32 *
1676	gen8_emit_flush_coherentl3_wa(struct intel_engine_cs engine, u32 batch)
1677	{
1678	/ NB no one else is allowed to scribble over scratch + 256! /
1679	*batch++ = MI_STORE_REGISTER_MEM_GEN8 \| MI_SRM_LRM_GLOBAL_GTT;
1680	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1681	*batch++ = intel_gt_scratch_offset(gt: engine->gt,
1682	field: INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1683	*batch++ = `0`;
1684
1685	*batch++ = MI_LOAD_REGISTER_IMM(`1`);
1686	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1687	*batch++ = `0x40400000` \| GEN8_LQSC_FLUSH_COHERENT_LINES;
1688
1689	batch = gen8_emit_pipe_control(batch,
1690	PIPE_CONTROL_CS_STALL \|
1691	PIPE_CONTROL_DC_FLUSH_ENABLE,
1692	offset: `0`);
1693
1694	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 \| MI_SRM_LRM_GLOBAL_GTT;
1695	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1696	*batch++ = intel_gt_scratch_offset(gt: engine->gt,
1697	field: INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1698	*batch++ = `0`;
1699
1700	return batch;
1701	}
1702
1703	/*
1704	* Typically we only have one indirect_ctx and per_ctx batch buffer which are
1705	* initialized at the beginning and shared across all contexts but this field
1706	* helps us to have multiple batches at different offsets and select them based
1707	* on a criteria. At the moment this batch always start at the beginning of the page
1708	* and at this point we don't have multiple wa_ctx batch buffers.
1709	*
1710	* The number of WA applied are not known at the beginning; we use this field
1711	* to return the no of DWORDS written.
1712	*
1713	* It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1714	* so it adds NOOPs as padding to make it cacheline aligned.
1715	* MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1716	* makes a complete batch buffer.
1717	*/
1718	static u32 gen8_init_indirectctx_bb(struct* intel_engine_cs engine, u32 batch)
1719	{
1720	/ WaDisableCtxRestoreArbitration:bdw,chv /
1721	*batch++ = MI_ARB_ON_OFF \| MI_ARB_DISABLE;
1722
1723	/ WaFlushCoherentL3CacheLinesAtContextSwitch:bdw /
1724	if (IS_BROADWELL(engine->i915))
1725	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1726
1727	/ WaClearSlmSpaceAtContextSwitch:bdw,chv /
1728	/ Actual scratch location is at 128 bytes offset /
1729	batch = gen8_emit_pipe_control(batch,
1730	PIPE_CONTROL_FLUSH_L3 \|
1731	PIPE_CONTROL_STORE_DATA_INDEX \|
1732	PIPE_CONTROL_CS_STALL \|
1733	PIPE_CONTROL_QW_WRITE,
1734	LRC_PPHWSP_SCRATCH_ADDR);
1735
1736	*batch++ = MI_ARB_ON_OFF \| MI_ARB_ENABLE;
1737
1738	/ Pad to end of cacheline /
1739	while ((unsigned long)batch % CACHELINE_BYTES)
1740	*batch++ = MI_NOOP;
1741
1742	/*
1743	* MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1744	* execution depends on the length specified in terms of cache lines
1745	* in the register CTX_RCS_INDIRECT_CTX
1746	*/
1747
1748	return batch;
1749	}
1750
1751	struct lri {
1752	i915_reg_t reg;
1753	u32 value;
1754	};
1755
1756	static u32 emit_lri(u32 batch, const struct lri lri, unsigned* int count)
1757	{
1758	GEM_BUG_ON(!count \|\| count > `63`);
1759
1760	*batch++ = MI_LOAD_REGISTER_IMM(count);
1761	do {
1762	*batch++ = i915_mmio_reg_offset(lri->reg);
1763	*batch++ = lri->value;
1764	} while (lri++, --count);
1765	*batch++ = MI_NOOP;
1766
1767	return batch;
1768	}
1769
1770	static u32 gen9_init_indirectctx_bb(struct* intel_engine_cs engine, u32 batch)
1771	{
1772	static const struct lri lri[] = {
1773	/ WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk /
1774	{
1775	COMMON_SLICE_CHICKEN2,
1776	__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1777	`0`),
1778	},
1779
1780	/ BSpec: 11391 /
1781	{
1782	FF_SLICE_CHICKEN,
1783	__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1784	FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1785	},
1786
1787	/ BSpec: 11299 /
1788	{
1789	_3D_CHICKEN3,
1790	__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1791	_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1792	}
1793	};
1794
1795	*batch++ = MI_ARB_ON_OFF \| MI_ARB_DISABLE;
1796
1797	/ WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk /
1798	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1799
1800	/ WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl /
1801	batch = gen8_emit_pipe_control(batch,
1802	PIPE_CONTROL_FLUSH_L3 \|
1803	PIPE_CONTROL_STORE_DATA_INDEX \|
1804	PIPE_CONTROL_CS_STALL \|
1805	PIPE_CONTROL_QW_WRITE,
1806	LRC_PPHWSP_SCRATCH_ADDR);
1807
1808	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1809
1810	/ WaMediaPoolStateCmdInWABB:bxt,glk /
1811	if (HAS_POOLED_EU(engine->i915)) {
1812	/*
1813	* EU pool configuration is setup along with golden context
1814	* during context initialization. This value depends on
1815	* device type (2x6 or 3x6) and needs to be updated based
1816	* on which subslice is disabled especially for 2x6
1817	* devices, however it is safe to load default
1818	* configuration of 3x6 device instead of masking off
1819	* corresponding bits because HW ignores bits of a disabled
1820	* subslice and drops down to appropriate config. Please
1821	* see render_state_setup() in i915_gem_render_state.c for
1822	* possible configurations, to avoid duplication they are
1823	* not shown here again.
1824	*/
1825	*batch++ = GEN9_MEDIA_POOL_STATE;
1826	*batch++ = GEN9_MEDIA_POOL_ENABLE;
1827	*batch++ = `0x00777000`;
1828	*batch++ = `0`;
1829	*batch++ = `0`;
1830	*batch++ = `0`;
1831	}
1832
1833	*batch++ = MI_ARB_ON_OFF \| MI_ARB_ENABLE;
1834
1835	/ Pad to end of cacheline /
1836	while ((unsigned long)batch % CACHELINE_BYTES)
1837	*batch++ = MI_NOOP;
1838
1839	return batch;
1840	}
1841
1842	#define CTX_WA_BB_SIZE (PAGE_SIZE)
1843
1844	static int lrc_create_wa_ctx(struct intel_engine_cs *engine)
1845	{
1846	struct drm_i915_gem_object *obj;
1847	struct i915_vma *vma;
1848	int err;
1849
1850	obj = i915_gem_object_create_shmem(i915: engine->i915, CTX_WA_BB_SIZE);
1851	if (IS_ERR(ptr: obj))
1852	return PTR_ERR(ptr: obj);
1853
1854	vma = i915_vma_instance(obj, vm: &engine->gt->ggtt->vm, NULL);
1855	if (IS_ERR(ptr: vma)) {
1856	err = PTR_ERR(ptr: vma);
1857	goto err;
1858	}
1859
1860	engine->wa_ctx.vma = vma;
1861	return `0`;
1862
1863	err:
1864	i915_gem_object_put(obj);
1865	return err;
1866	}
1867
1868	void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1869	{
1870	i915_vma_unpin_and_release(p_vma: &engine->wa_ctx.vma, flags: `0`);
1871	}
1872
1873	typedef u32 (wa_bb_func_t)(struct intel_engine_cs engine, u32 batch);
1874
1875	void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1876	{
1877	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1878	struct i915_wa_ctx_bb *wa_bb[] = {
1879	&wa_ctx->indirect_ctx, &wa_ctx->per_ctx
1880	};
1881	wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
1882	struct i915_gem_ww_ctx ww;
1883	void batch, batch_ptr;
1884	unsigned int i;
1885	int err;
1886
1887	if (GRAPHICS_VER(engine->i915) >= `11` \|\|
1888	!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
1889	return;
1890
1891	if (GRAPHICS_VER(engine->i915) == `9`) {
1892	wa_bb_fn[`0`] = gen9_init_indirectctx_bb;
1893	wa_bb_fn[`1`] = NULL;
1894	} else if (GRAPHICS_VER(engine->i915) == `8`) {
1895	wa_bb_fn[`0`] = gen8_init_indirectctx_bb;
1896	wa_bb_fn[`1`] = NULL;
1897	}
1898
1899	err = lrc_create_wa_ctx(engine);
1900	if (err) {
1901	/*
1902	* We continue even if we fail to initialize WA batch
1903	* because we only expect rare glitches but nothing
1904	* critical to prevent us from using GPU
1905	*/
1906	drm_err(&engine->i915->drm,
1907	"Ignoring context switch w/a allocation error:%d\n",
1908	err);
1909	return;
1910	}
1911
1912	if (!engine->wa_ctx.vma)
1913	return;
1914
1915	i915_gem_ww_ctx_init(ctx: &ww, intr: true);
1916	retry:
1917	err = i915_gem_object_lock(obj: wa_ctx->vma->obj, ww: &ww);
1918	if (!err)
1919	err = i915_ggtt_pin(vma: wa_ctx->vma, ww: &ww, align: `0`, PIN_HIGH);
1920	if (err)
1921	goto err;
1922
1923	batch = i915_gem_object_pin_map(obj: wa_ctx->vma->obj, type: I915_MAP_WB);
1924	if (IS_ERR(ptr: batch)) {
1925	err = PTR_ERR(ptr: batch);
1926	goto err_unpin;
1927	}
1928
1929	/*
1930	* Emit the two workaround batch buffers, recording the offset from the
1931	* start of the workaround batch buffer object for each and their
1932	* respective sizes.
1933	*/
1934	batch_ptr = batch;
1935	for (i = `0`; i < ARRAY_SIZE(wa_bb_fn); i++) {
1936	wa_bb[i]->offset = batch_ptr - batch;
1937	if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1938	CACHELINE_BYTES))) {
1939	err = -EINVAL;
1940	break;
1941	}
1942	if (wa_bb_fn[i])
1943	batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1944	wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1945	}
1946	GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1947
1948	__i915_gem_object_flush_map(obj: wa_ctx->vma->obj, offset: `0`, size: batch_ptr - batch);
1949	__i915_gem_object_release_map(obj: wa_ctx->vma->obj);
1950
1951	/ Verify that we can handle failure to setup the wa_ctx /
1952	if (!err)
1953	err = i915_inject_probe_error(engine->i915, -ENODEV);
1954
1955	err_unpin:
1956	if (err)
1957	i915_vma_unpin(vma: wa_ctx->vma);
1958	err:
1959	if (err == -EDEADLK) {
1960	err = i915_gem_ww_ctx_backoff(ctx: &ww);
1961	if (!err)
1962	goto retry;
1963	}
1964	i915_gem_ww_ctx_fini(ctx: &ww);
1965
1966	if (err) {
1967	i915_vma_put(vma: engine->wa_ctx.vma);
1968
1969	/ Clear all flags to prevent further use /
1970	memset(wa_ctx, `0`, sizeof(*wa_ctx));
1971	}
1972	}
1973
1974	static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt)
1975	{
1976	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1977	stats->runtime.num_underflow++;
1978	stats->runtime.max_underflow =
1979	max_t(u32, stats->runtime.max_underflow, -dt);
1980	#endif
1981	}
1982
1983	static u32 lrc_get_runtime(const struct intel_context *ce)
1984	{
1985	/*
1986	* We can use either ppHWSP[16] which is recorded before the context
1987	* switch (and so excludes the cost of context switches) or use the
1988	* value from the context image itself, which is saved/restored earlier
1989	* and so includes the cost of the save.
1990	*/
1991	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
1992	}
1993
1994	void lrc_update_runtime(struct intel_context *ce)
1995	{
1996	struct intel_context_stats *stats = &ce->stats;
1997	u32 old;
1998	s32 dt;
1999
2000	old = stats->runtime.last;
2001	stats->runtime.last = lrc_get_runtime(ce);
2002	dt = stats->runtime.last - old;
2003	if (!dt)
2004	return;
2005
2006	if (unlikely(dt < `0`)) {
2007	CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
2008	old, stats->runtime.last, dt);
2009	st_runtime_underflow(stats, dt);
2010	return;
2011	}
2012
2013	ewma_runtime_add(e: &stats->runtime.avg, val: dt);
2014	stats->runtime.total += dt;
2015	}
2016
2017	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2018	#include "selftest_lrc.c"
2019	#endif
2020

source code of linux/drivers/gpu/drm/i915/gt/intel_lrc.c