1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2014 Intel Corporation
4 */
5
6#include "gem/i915_gem_lmem.h"
7
8#include "gen8_engine_cs.h"
9#include "i915_drv.h"
10#include "i915_perf.h"
11#include "i915_reg.h"
12#include "intel_context.h"
13#include "intel_engine.h"
14#include "intel_engine_regs.h"
15#include "intel_gpu_commands.h"
16#include "intel_gt.h"
17#include "intel_gt_regs.h"
18#include "intel_lrc.h"
19#include "intel_lrc_reg.h"
20#include "intel_ring.h"
21#include "shmem_utils.h"
22
23/*
24 * The per-platform tables are u8-encoded in @data. Decode @data and set the
25 * addresses' offset and commands in @regs. The following encoding is used
26 * for each byte. There are 2 steps: decoding commands and decoding addresses.
27 *
28 * Commands:
29 * [7]: create NOPs - number of NOPs are set in lower bits
30 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
31 * MI_LRI_FORCE_POSTED
32 * [5:0]: Number of NOPs or registers to set values to in case of
33 * MI_LOAD_REGISTER_IMM
34 *
35 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
36 * number of registers. They are set by using the REG/REG16 macros: the former
37 * is used for offsets smaller than 0x200 while the latter is for values bigger
38 * than that. Those macros already set all the bits documented below correctly:
39 *
40 * [7]: When a register offset needs more than 6 bits, use additional bytes, to
41 * follow, for the lower bits
42 * [6:0]: Register offset, without considering the engine base.
43 *
44 * This function only tweaks the commands and register offsets. Values are not
45 * filled out.
46 */
47static void set_offsets(u32 *regs,
48 const u8 *data,
49 const struct intel_engine_cs *engine,
50 bool close)
51#define NOP(x) (BIT(7) | (x))
52#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
53#define POSTED BIT(0)
54#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
55#define REG16(x) \
56 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
57 (((x) >> 2) & 0x7f)
58#define END 0
59{
60 const u32 base = engine->mmio_base;
61
62 while (*data) {
63 u8 count, flags;
64
65 if (*data & BIT(7)) { /* skip */
66 count = *data++ & ~BIT(7);
67 regs += count;
68 continue;
69 }
70
71 count = *data & 0x3f;
72 flags = *data >> 6;
73 data++;
74
75 *regs = MI_LOAD_REGISTER_IMM(count);
76 if (flags & POSTED)
77 *regs |= MI_LRI_FORCE_POSTED;
78 if (GRAPHICS_VER(engine->i915) >= 11)
79 *regs |= MI_LRI_LRM_CS_MMIO;
80 regs++;
81
82 GEM_BUG_ON(!count);
83 do {
84 u32 offset = 0;
85 u8 v;
86
87 do {
88 v = *data++;
89 offset <<= 7;
90 offset |= v & ~BIT(7);
91 } while (v & BIT(7));
92
93 regs[0] = base + (offset << 2);
94 regs += 2;
95 } while (--count);
96 }
97
98 if (close) {
99 /* Close the batch; used mainly by live_lrc_layout() */
100 *regs = MI_BATCH_BUFFER_END;
101 if (GRAPHICS_VER(engine->i915) >= 11)
102 *regs |= BIT(0);
103 }
104}
105
106static const u8 gen8_xcs_offsets[] = {
107 NOP(1),
108 LRI(11, 0),
109 REG16(0x244),
110 REG(0x034),
111 REG(0x030),
112 REG(0x038),
113 REG(0x03c),
114 REG(0x168),
115 REG(0x140),
116 REG(0x110),
117 REG(0x11c),
118 REG(0x114),
119 REG(0x118),
120
121 NOP(9),
122 LRI(9, 0),
123 REG16(0x3a8),
124 REG16(0x28c),
125 REG16(0x288),
126 REG16(0x284),
127 REG16(0x280),
128 REG16(0x27c),
129 REG16(0x278),
130 REG16(0x274),
131 REG16(0x270),
132
133 NOP(13),
134 LRI(2, 0),
135 REG16(0x200),
136 REG(0x028),
137
138 END
139};
140
141static const u8 gen9_xcs_offsets[] = {
142 NOP(1),
143 LRI(14, POSTED),
144 REG16(0x244),
145 REG(0x034),
146 REG(0x030),
147 REG(0x038),
148 REG(0x03c),
149 REG(0x168),
150 REG(0x140),
151 REG(0x110),
152 REG(0x11c),
153 REG(0x114),
154 REG(0x118),
155 REG(0x1c0),
156 REG(0x1c4),
157 REG(0x1c8),
158
159 NOP(3),
160 LRI(9, POSTED),
161 REG16(0x3a8),
162 REG16(0x28c),
163 REG16(0x288),
164 REG16(0x284),
165 REG16(0x280),
166 REG16(0x27c),
167 REG16(0x278),
168 REG16(0x274),
169 REG16(0x270),
170
171 NOP(13),
172 LRI(1, POSTED),
173 REG16(0x200),
174
175 NOP(13),
176 LRI(44, POSTED),
177 REG(0x028),
178 REG(0x09c),
179 REG(0x0c0),
180 REG(0x178),
181 REG(0x17c),
182 REG16(0x358),
183 REG(0x170),
184 REG(0x150),
185 REG(0x154),
186 REG(0x158),
187 REG16(0x41c),
188 REG16(0x600),
189 REG16(0x604),
190 REG16(0x608),
191 REG16(0x60c),
192 REG16(0x610),
193 REG16(0x614),
194 REG16(0x618),
195 REG16(0x61c),
196 REG16(0x620),
197 REG16(0x624),
198 REG16(0x628),
199 REG16(0x62c),
200 REG16(0x630),
201 REG16(0x634),
202 REG16(0x638),
203 REG16(0x63c),
204 REG16(0x640),
205 REG16(0x644),
206 REG16(0x648),
207 REG16(0x64c),
208 REG16(0x650),
209 REG16(0x654),
210 REG16(0x658),
211 REG16(0x65c),
212 REG16(0x660),
213 REG16(0x664),
214 REG16(0x668),
215 REG16(0x66c),
216 REG16(0x670),
217 REG16(0x674),
218 REG16(0x678),
219 REG16(0x67c),
220 REG(0x068),
221
222 END
223};
224
225static const u8 gen12_xcs_offsets[] = {
226 NOP(1),
227 LRI(13, POSTED),
228 REG16(0x244),
229 REG(0x034),
230 REG(0x030),
231 REG(0x038),
232 REG(0x03c),
233 REG(0x168),
234 REG(0x140),
235 REG(0x110),
236 REG(0x1c0),
237 REG(0x1c4),
238 REG(0x1c8),
239 REG(0x180),
240 REG16(0x2b4),
241
242 NOP(5),
243 LRI(9, POSTED),
244 REG16(0x3a8),
245 REG16(0x28c),
246 REG16(0x288),
247 REG16(0x284),
248 REG16(0x280),
249 REG16(0x27c),
250 REG16(0x278),
251 REG16(0x274),
252 REG16(0x270),
253
254 END
255};
256
257static const u8 dg2_xcs_offsets[] = {
258 NOP(1),
259 LRI(15, POSTED),
260 REG16(0x244),
261 REG(0x034),
262 REG(0x030),
263 REG(0x038),
264 REG(0x03c),
265 REG(0x168),
266 REG(0x140),
267 REG(0x110),
268 REG(0x1c0),
269 REG(0x1c4),
270 REG(0x1c8),
271 REG(0x180),
272 REG16(0x2b4),
273 REG(0x120),
274 REG(0x124),
275
276 NOP(1),
277 LRI(9, POSTED),
278 REG16(0x3a8),
279 REG16(0x28c),
280 REG16(0x288),
281 REG16(0x284),
282 REG16(0x280),
283 REG16(0x27c),
284 REG16(0x278),
285 REG16(0x274),
286 REG16(0x270),
287
288 END
289};
290
291static const u8 gen8_rcs_offsets[] = {
292 NOP(1),
293 LRI(14, POSTED),
294 REG16(0x244),
295 REG(0x034),
296 REG(0x030),
297 REG(0x038),
298 REG(0x03c),
299 REG(0x168),
300 REG(0x140),
301 REG(0x110),
302 REG(0x11c),
303 REG(0x114),
304 REG(0x118),
305 REG(0x1c0),
306 REG(0x1c4),
307 REG(0x1c8),
308
309 NOP(3),
310 LRI(9, POSTED),
311 REG16(0x3a8),
312 REG16(0x28c),
313 REG16(0x288),
314 REG16(0x284),
315 REG16(0x280),
316 REG16(0x27c),
317 REG16(0x278),
318 REG16(0x274),
319 REG16(0x270),
320
321 NOP(13),
322 LRI(1, 0),
323 REG(0x0c8),
324
325 END
326};
327
328static const u8 gen9_rcs_offsets[] = {
329 NOP(1),
330 LRI(14, POSTED),
331 REG16(0x244),
332 REG(0x34),
333 REG(0x30),
334 REG(0x38),
335 REG(0x3c),
336 REG(0x168),
337 REG(0x140),
338 REG(0x110),
339 REG(0x11c),
340 REG(0x114),
341 REG(0x118),
342 REG(0x1c0),
343 REG(0x1c4),
344 REG(0x1c8),
345
346 NOP(3),
347 LRI(9, POSTED),
348 REG16(0x3a8),
349 REG16(0x28c),
350 REG16(0x288),
351 REG16(0x284),
352 REG16(0x280),
353 REG16(0x27c),
354 REG16(0x278),
355 REG16(0x274),
356 REG16(0x270),
357
358 NOP(13),
359 LRI(1, 0),
360 REG(0xc8),
361
362 NOP(13),
363 LRI(44, POSTED),
364 REG(0x28),
365 REG(0x9c),
366 REG(0xc0),
367 REG(0x178),
368 REG(0x17c),
369 REG16(0x358),
370 REG(0x170),
371 REG(0x150),
372 REG(0x154),
373 REG(0x158),
374 REG16(0x41c),
375 REG16(0x600),
376 REG16(0x604),
377 REG16(0x608),
378 REG16(0x60c),
379 REG16(0x610),
380 REG16(0x614),
381 REG16(0x618),
382 REG16(0x61c),
383 REG16(0x620),
384 REG16(0x624),
385 REG16(0x628),
386 REG16(0x62c),
387 REG16(0x630),
388 REG16(0x634),
389 REG16(0x638),
390 REG16(0x63c),
391 REG16(0x640),
392 REG16(0x644),
393 REG16(0x648),
394 REG16(0x64c),
395 REG16(0x650),
396 REG16(0x654),
397 REG16(0x658),
398 REG16(0x65c),
399 REG16(0x660),
400 REG16(0x664),
401 REG16(0x668),
402 REG16(0x66c),
403 REG16(0x670),
404 REG16(0x674),
405 REG16(0x678),
406 REG16(0x67c),
407 REG(0x68),
408
409 END
410};
411
412static const u8 gen11_rcs_offsets[] = {
413 NOP(1),
414 LRI(15, POSTED),
415 REG16(0x244),
416 REG(0x034),
417 REG(0x030),
418 REG(0x038),
419 REG(0x03c),
420 REG(0x168),
421 REG(0x140),
422 REG(0x110),
423 REG(0x11c),
424 REG(0x114),
425 REG(0x118),
426 REG(0x1c0),
427 REG(0x1c4),
428 REG(0x1c8),
429 REG(0x180),
430
431 NOP(1),
432 LRI(9, POSTED),
433 REG16(0x3a8),
434 REG16(0x28c),
435 REG16(0x288),
436 REG16(0x284),
437 REG16(0x280),
438 REG16(0x27c),
439 REG16(0x278),
440 REG16(0x274),
441 REG16(0x270),
442
443 LRI(1, POSTED),
444 REG(0x1b0),
445
446 NOP(10),
447 LRI(1, 0),
448 REG(0x0c8),
449
450 END
451};
452
453static const u8 gen12_rcs_offsets[] = {
454 NOP(1),
455 LRI(13, POSTED),
456 REG16(0x244),
457 REG(0x034),
458 REG(0x030),
459 REG(0x038),
460 REG(0x03c),
461 REG(0x168),
462 REG(0x140),
463 REG(0x110),
464 REG(0x1c0),
465 REG(0x1c4),
466 REG(0x1c8),
467 REG(0x180),
468 REG16(0x2b4),
469
470 NOP(5),
471 LRI(9, POSTED),
472 REG16(0x3a8),
473 REG16(0x28c),
474 REG16(0x288),
475 REG16(0x284),
476 REG16(0x280),
477 REG16(0x27c),
478 REG16(0x278),
479 REG16(0x274),
480 REG16(0x270),
481
482 LRI(3, POSTED),
483 REG(0x1b0),
484 REG16(0x5a8),
485 REG16(0x5ac),
486
487 NOP(6),
488 LRI(1, 0),
489 REG(0x0c8),
490 NOP(3 + 9 + 1),
491
492 LRI(51, POSTED),
493 REG16(0x588),
494 REG16(0x588),
495 REG16(0x588),
496 REG16(0x588),
497 REG16(0x588),
498 REG16(0x588),
499 REG(0x028),
500 REG(0x09c),
501 REG(0x0c0),
502 REG(0x178),
503 REG(0x17c),
504 REG16(0x358),
505 REG(0x170),
506 REG(0x150),
507 REG(0x154),
508 REG(0x158),
509 REG16(0x41c),
510 REG16(0x600),
511 REG16(0x604),
512 REG16(0x608),
513 REG16(0x60c),
514 REG16(0x610),
515 REG16(0x614),
516 REG16(0x618),
517 REG16(0x61c),
518 REG16(0x620),
519 REG16(0x624),
520 REG16(0x628),
521 REG16(0x62c),
522 REG16(0x630),
523 REG16(0x634),
524 REG16(0x638),
525 REG16(0x63c),
526 REG16(0x640),
527 REG16(0x644),
528 REG16(0x648),
529 REG16(0x64c),
530 REG16(0x650),
531 REG16(0x654),
532 REG16(0x658),
533 REG16(0x65c),
534 REG16(0x660),
535 REG16(0x664),
536 REG16(0x668),
537 REG16(0x66c),
538 REG16(0x670),
539 REG16(0x674),
540 REG16(0x678),
541 REG16(0x67c),
542 REG(0x068),
543 REG(0x084),
544 NOP(1),
545
546 END
547};
548
549static const u8 xehp_rcs_offsets[] = {
550 NOP(1),
551 LRI(13, POSTED),
552 REG16(0x244),
553 REG(0x034),
554 REG(0x030),
555 REG(0x038),
556 REG(0x03c),
557 REG(0x168),
558 REG(0x140),
559 REG(0x110),
560 REG(0x1c0),
561 REG(0x1c4),
562 REG(0x1c8),
563 REG(0x180),
564 REG16(0x2b4),
565
566 NOP(5),
567 LRI(9, POSTED),
568 REG16(0x3a8),
569 REG16(0x28c),
570 REG16(0x288),
571 REG16(0x284),
572 REG16(0x280),
573 REG16(0x27c),
574 REG16(0x278),
575 REG16(0x274),
576 REG16(0x270),
577
578 LRI(3, POSTED),
579 REG(0x1b0),
580 REG16(0x5a8),
581 REG16(0x5ac),
582
583 NOP(6),
584 LRI(1, 0),
585 REG(0x0c8),
586
587 END
588};
589
590static const u8 dg2_rcs_offsets[] = {
591 NOP(1),
592 LRI(15, POSTED),
593 REG16(0x244),
594 REG(0x034),
595 REG(0x030),
596 REG(0x038),
597 REG(0x03c),
598 REG(0x168),
599 REG(0x140),
600 REG(0x110),
601 REG(0x1c0),
602 REG(0x1c4),
603 REG(0x1c8),
604 REG(0x180),
605 REG16(0x2b4),
606 REG(0x120),
607 REG(0x124),
608
609 NOP(1),
610 LRI(9, POSTED),
611 REG16(0x3a8),
612 REG16(0x28c),
613 REG16(0x288),
614 REG16(0x284),
615 REG16(0x280),
616 REG16(0x27c),
617 REG16(0x278),
618 REG16(0x274),
619 REG16(0x270),
620
621 LRI(3, POSTED),
622 REG(0x1b0),
623 REG16(0x5a8),
624 REG16(0x5ac),
625
626 NOP(6),
627 LRI(1, 0),
628 REG(0x0c8),
629
630 END
631};
632
633static const u8 mtl_rcs_offsets[] = {
634 NOP(1),
635 LRI(15, POSTED),
636 REG16(0x244),
637 REG(0x034),
638 REG(0x030),
639 REG(0x038),
640 REG(0x03c),
641 REG(0x168),
642 REG(0x140),
643 REG(0x110),
644 REG(0x1c0),
645 REG(0x1c4),
646 REG(0x1c8),
647 REG(0x180),
648 REG16(0x2b4),
649 REG(0x120),
650 REG(0x124),
651
652 NOP(1),
653 LRI(9, POSTED),
654 REG16(0x3a8),
655 REG16(0x28c),
656 REG16(0x288),
657 REG16(0x284),
658 REG16(0x280),
659 REG16(0x27c),
660 REG16(0x278),
661 REG16(0x274),
662 REG16(0x270),
663
664 NOP(2),
665 LRI(2, POSTED),
666 REG16(0x5a8),
667 REG16(0x5ac),
668
669 NOP(6),
670 LRI(1, 0),
671 REG(0x0c8),
672
673 END
674};
675
676#undef END
677#undef REG16
678#undef REG
679#undef LRI
680#undef NOP
681
682static const u8 *reg_offsets(const struct intel_engine_cs *engine)
683{
684 /*
685 * The gen12+ lists only have the registers we program in the basic
686 * default state. We rely on the context image using relative
687 * addressing to automatic fixup the register state between the
688 * physical engines for virtual engine.
689 */
690 GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
691 !intel_engine_has_relative_mmio(engine));
692
693 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
694 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
695 return mtl_rcs_offsets;
696 else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
697 return dg2_rcs_offsets;
698 else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
699 return xehp_rcs_offsets;
700 else if (GRAPHICS_VER(engine->i915) >= 12)
701 return gen12_rcs_offsets;
702 else if (GRAPHICS_VER(engine->i915) >= 11)
703 return gen11_rcs_offsets;
704 else if (GRAPHICS_VER(engine->i915) >= 9)
705 return gen9_rcs_offsets;
706 else
707 return gen8_rcs_offsets;
708 } else {
709 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
710 return dg2_xcs_offsets;
711 else if (GRAPHICS_VER(engine->i915) >= 12)
712 return gen12_xcs_offsets;
713 else if (GRAPHICS_VER(engine->i915) >= 9)
714 return gen9_xcs_offsets;
715 else
716 return gen8_xcs_offsets;
717 }
718}
719
720static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
721{
722 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
723 return 0x70;
724 else if (GRAPHICS_VER(engine->i915) >= 12)
725 return 0x60;
726 else if (GRAPHICS_VER(engine->i915) >= 9)
727 return 0x54;
728 else if (engine->class == RENDER_CLASS)
729 return 0x58;
730 else
731 return -1;
732}
733
734static int lrc_ring_bb_offset(const struct intel_engine_cs *engine)
735{
736 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
737 return 0x80;
738 else if (GRAPHICS_VER(engine->i915) >= 12)
739 return 0x70;
740 else if (GRAPHICS_VER(engine->i915) >= 9)
741 return 0x64;
742 else if (GRAPHICS_VER(engine->i915) >= 8 &&
743 engine->class == RENDER_CLASS)
744 return 0xc4;
745 else
746 return -1;
747}
748
749static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
750{
751 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
752 return 0x84;
753 else if (GRAPHICS_VER(engine->i915) >= 12)
754 return 0x74;
755 else if (GRAPHICS_VER(engine->i915) >= 9)
756 return 0x68;
757 else if (engine->class == RENDER_CLASS)
758 return 0xd8;
759 else
760 return -1;
761}
762
763static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
764{
765 if (GRAPHICS_VER(engine->i915) >= 12)
766 return 0x12;
767 else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS)
768 return 0x18;
769 else
770 return -1;
771}
772
773static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
774{
775 int x;
776
777 x = lrc_ring_wa_bb_per_ctx(engine);
778 if (x < 0)
779 return x;
780
781 return x + 2;
782}
783
784static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
785{
786 int x;
787
788 x = lrc_ring_indirect_ptr(engine);
789 if (x < 0)
790 return x;
791
792 return x + 2;
793}
794
795static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
796{
797
798 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
799 /*
800 * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
801 * simply to match the RCS context image layout.
802 */
803 return 0xc6;
804 else if (engine->class != RENDER_CLASS)
805 return -1;
806 else if (GRAPHICS_VER(engine->i915) >= 12)
807 return 0xb6;
808 else if (GRAPHICS_VER(engine->i915) >= 11)
809 return 0xaa;
810 else
811 return -1;
812}
813
814static u32
815lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
816{
817 if (GRAPHICS_VER(engine->i915) >= 12)
818 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
819 else if (GRAPHICS_VER(engine->i915) >= 11)
820 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
821 else if (GRAPHICS_VER(engine->i915) >= 9)
822 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
823 else if (GRAPHICS_VER(engine->i915) >= 8)
824 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
825
826 GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8);
827
828 return 0;
829}
830
831static void
832lrc_setup_bb_per_ctx(u32 *regs,
833 const struct intel_engine_cs *engine,
834 u32 ctx_bb_ggtt_addr)
835{
836 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
837 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
838 ctx_bb_ggtt_addr |
839 PER_CTX_BB_FORCE |
840 PER_CTX_BB_VALID;
841}
842
843static void
844lrc_setup_indirect_ctx(u32 *regs,
845 const struct intel_engine_cs *engine,
846 u32 ctx_bb_ggtt_addr,
847 u32 size)
848{
849 GEM_BUG_ON(!size);
850 GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
851 GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
852 regs[lrc_ring_indirect_ptr(engine) + 1] =
853 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
854
855 GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
856 regs[lrc_ring_indirect_offset(engine) + 1] =
857 lrc_ring_indirect_offset_default(engine) << 6;
858}
859
860static bool ctx_needs_runalone(const struct intel_context *ce)
861{
862 struct i915_gem_context *gem_ctx;
863 bool ctx_is_protected = false;
864
865 /*
866 * On MTL and newer platforms, protected contexts require setting
867 * the LRC run-alone bit or else the encryption will not happen.
868 */
869 if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 70) &&
870 (ce->engine->class == COMPUTE_CLASS || ce->engine->class == RENDER_CLASS)) {
871 rcu_read_lock();
872 gem_ctx = rcu_dereference(ce->gem_context);
873 if (gem_ctx)
874 ctx_is_protected = gem_ctx->uses_protected_content;
875 rcu_read_unlock();
876 }
877
878 return ctx_is_protected;
879}
880
881static void init_common_regs(u32 * const regs,
882 const struct intel_context *ce,
883 const struct intel_engine_cs *engine,
884 bool inhibit)
885{
886 u32 ctl;
887 int loc;
888
889 ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
890 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
891 if (inhibit)
892 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
893 if (GRAPHICS_VER(engine->i915) < 11)
894 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
895 CTX_CTRL_RS_CTX_ENABLE);
896 if (ctx_needs_runalone(ce))
897 ctl |= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE);
898 regs[CTX_CONTEXT_CONTROL] = ctl;
899
900 regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
901
902 loc = lrc_ring_bb_offset(engine);
903 if (loc != -1)
904 regs[loc + 1] = 0;
905}
906
907static void init_wa_bb_regs(u32 * const regs,
908 const struct intel_engine_cs *engine)
909{
910 const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
911
912 if (wa_ctx->per_ctx.size) {
913 const u32 ggtt_offset = i915_ggtt_offset(vma: wa_ctx->vma);
914
915 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
916 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
917 (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
918 }
919
920 if (wa_ctx->indirect_ctx.size) {
921 lrc_setup_indirect_ctx(regs, engine,
922 ctx_bb_ggtt_addr: i915_ggtt_offset(vma: wa_ctx->vma) +
923 wa_ctx->indirect_ctx.offset,
924 size: wa_ctx->indirect_ctx.size);
925 }
926}
927
928static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
929{
930 if (i915_vm_is_4lvl(vm: &ppgtt->vm)) {
931 /* 64b PPGTT (48bit canonical)
932 * PDP0_DESCRIPTOR contains the base address to PML4 and
933 * other PDP Descriptors are ignored.
934 */
935 ASSIGN_CTX_PML4(ppgtt, regs);
936 } else {
937 ASSIGN_CTX_PDP(ppgtt, regs, 3);
938 ASSIGN_CTX_PDP(ppgtt, regs, 2);
939 ASSIGN_CTX_PDP(ppgtt, regs, 1);
940 ASSIGN_CTX_PDP(ppgtt, regs, 0);
941 }
942}
943
944static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
945{
946 if (i915_is_ggtt(vm))
947 return i915_vm_to_ggtt(vm)->alias;
948 else
949 return i915_vm_to_ppgtt(vm);
950}
951
952static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
953{
954 int x;
955
956 x = lrc_ring_mi_mode(engine);
957 if (x != -1) {
958 regs[x + 1] &= ~STOP_RING;
959 regs[x + 1] |= STOP_RING << 16;
960 }
961}
962
963static void __lrc_init_regs(u32 *regs,
964 const struct intel_context *ce,
965 const struct intel_engine_cs *engine,
966 bool inhibit)
967{
968 /*
969 * A context is actually a big batch buffer with several
970 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
971 * values we are setting here are only for the first context restore:
972 * on a subsequent save, the GPU will recreate this batchbuffer with new
973 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
974 * we are not initializing here).
975 *
976 * Must keep consistent with virtual_update_register_offsets().
977 */
978
979 if (inhibit)
980 memset(regs, 0, PAGE_SIZE);
981
982 set_offsets(regs, data: reg_offsets(engine), engine, close: inhibit);
983
984 init_common_regs(regs, ce, engine, inhibit);
985 init_ppgtt_regs(regs, ppgtt: vm_alias(vm: ce->vm));
986
987 init_wa_bb_regs(regs, engine);
988
989 __reset_stop_ring(regs, engine);
990}
991
992void lrc_init_regs(const struct intel_context *ce,
993 const struct intel_engine_cs *engine,
994 bool inhibit)
995{
996 __lrc_init_regs(regs: ce->lrc_reg_state, ce, engine, inhibit);
997}
998
999void lrc_reset_regs(const struct intel_context *ce,
1000 const struct intel_engine_cs *engine)
1001{
1002 __reset_stop_ring(regs: ce->lrc_reg_state, engine);
1003}
1004
1005static void
1006set_redzone(void *vaddr, const struct intel_engine_cs *engine)
1007{
1008 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1009 return;
1010
1011 vaddr += engine->context_size;
1012
1013 memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
1014}
1015
1016static void
1017check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
1018{
1019 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1020 return;
1021
1022 vaddr += engine->context_size;
1023
1024 if (memchr_inv(p: vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
1025 drm_err_once(&engine->i915->drm,
1026 "%s context redzone overwritten!\n",
1027 engine->name);
1028}
1029
1030static u32 context_wa_bb_offset(const struct intel_context *ce)
1031{
1032 return PAGE_SIZE * ce->wa_bb_page;
1033}
1034
1035/*
1036 * per_ctx below determines which WABB section is used.
1037 * When true, the function returns the location of the
1038 * PER_CTX_BB. When false, the function returns the
1039 * location of the INDIRECT_CTX.
1040 */
1041static u32 *context_wabb(const struct intel_context *ce, bool per_ctx)
1042{
1043 void *ptr;
1044
1045 GEM_BUG_ON(!ce->wa_bb_page);
1046
1047 ptr = ce->lrc_reg_state;
1048 ptr -= LRC_STATE_OFFSET; /* back to start of context image */
1049 ptr += context_wa_bb_offset(ce);
1050 ptr += per_ctx ? PAGE_SIZE : 0;
1051
1052 return ptr;
1053}
1054
1055void lrc_init_state(struct intel_context *ce,
1056 struct intel_engine_cs *engine,
1057 void *state)
1058{
1059 bool inhibit = true;
1060
1061 set_redzone(vaddr: state, engine);
1062
1063 if (engine->default_state) {
1064 shmem_read(file: engine->default_state, off: 0,
1065 dst: state, len: engine->context_size);
1066 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
1067 inhibit = false;
1068 }
1069
1070 /* Clear the ppHWSP (inc. per-context counters) */
1071 memset(state, 0, PAGE_SIZE);
1072
1073 /* Clear the indirect wa and storage */
1074 if (ce->wa_bb_page)
1075 memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE);
1076
1077 /*
1078 * The second page of the context object contains some registers which
1079 * must be set up prior to the first execution.
1080 */
1081 __lrc_init_regs(regs: state + LRC_STATE_OFFSET, ce, engine, inhibit);
1082}
1083
1084u32 lrc_indirect_bb(const struct intel_context *ce)
1085{
1086 return i915_ggtt_offset(vma: ce->state) + context_wa_bb_offset(ce);
1087}
1088
1089static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs)
1090{
1091 /* If predication is active, this will be noop'ed */
1092 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
1093 *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1094 *cs++ = 0;
1095 *cs++ = 0; /* No predication */
1096
1097 /* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */
1098 *cs++ = MI_BATCH_BUFFER_END | BIT(15);
1099 *cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE;
1100
1101 /* Instructions are no longer predicated (disabled), we can proceed */
1102 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
1103 *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1104 *cs++ = 0;
1105 *cs++ = 1; /* enable predication before the next BB */
1106
1107 *cs++ = MI_BATCH_BUFFER_END;
1108 GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
1109
1110 return cs;
1111}
1112
1113static struct i915_vma *
1114__lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
1115{
1116 struct drm_i915_gem_object *obj;
1117 struct i915_vma *vma;
1118 u32 context_size;
1119
1120 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
1121
1122 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1123 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
1124
1125 if (GRAPHICS_VER(engine->i915) >= 12) {
1126 ce->wa_bb_page = context_size / PAGE_SIZE;
1127 /* INDIRECT_CTX and PER_CTX_BB need separate pages. */
1128 context_size += PAGE_SIZE * 2;
1129 }
1130
1131 if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
1132 ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
1133 context_size += PARENT_SCRATCH_SIZE;
1134 }
1135
1136 obj = i915_gem_object_create_lmem(i915: engine->i915, size: context_size,
1137 I915_BO_ALLOC_PM_VOLATILE);
1138 if (IS_ERR(ptr: obj)) {
1139 obj = i915_gem_object_create_shmem(i915: engine->i915, size: context_size);
1140 if (IS_ERR(ptr: obj))
1141 return ERR_CAST(ptr: obj);
1142
1143 /*
1144 * Wa_22016122933: For Media version 13.0, all Media GT shared
1145 * memory needs to be mapped as WC on CPU side and UC (PAT
1146 * index 2) on GPU side.
1147 */
1148 if (intel_gt_needs_wa_22016122933(gt: engine->gt))
1149 i915_gem_object_set_cache_coherency(obj, cache_level: I915_CACHE_NONE);
1150 }
1151
1152 vma = i915_vma_instance(obj, vm: &engine->gt->ggtt->vm, NULL);
1153 if (IS_ERR(ptr: vma)) {
1154 i915_gem_object_put(obj);
1155 return vma;
1156 }
1157
1158 return vma;
1159}
1160
1161static struct intel_timeline *
1162pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
1163{
1164 struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
1165
1166 return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
1167}
1168
1169int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
1170{
1171 struct intel_ring *ring;
1172 struct i915_vma *vma;
1173 int err;
1174
1175 GEM_BUG_ON(ce->state);
1176
1177 vma = __lrc_alloc_state(ce, engine);
1178 if (IS_ERR(ptr: vma))
1179 return PTR_ERR(ptr: vma);
1180
1181 ring = intel_engine_create_ring(engine, size: ce->ring_size);
1182 if (IS_ERR(ptr: ring)) {
1183 err = PTR_ERR(ptr: ring);
1184 goto err_vma;
1185 }
1186
1187 if (!page_mask_bits(ce->timeline)) {
1188 struct intel_timeline *tl;
1189
1190 /*
1191 * Use the static global HWSP for the kernel context, and
1192 * a dynamically allocated cacheline for everyone else.
1193 */
1194 if (unlikely(ce->timeline))
1195 tl = pinned_timeline(ce, engine);
1196 else
1197 tl = intel_timeline_create(gt: engine->gt);
1198 if (IS_ERR(ptr: tl)) {
1199 err = PTR_ERR(ptr: tl);
1200 goto err_ring;
1201 }
1202
1203 ce->timeline = tl;
1204 }
1205
1206 ce->ring = ring;
1207 ce->state = vma;
1208
1209 return 0;
1210
1211err_ring:
1212 intel_ring_put(ring);
1213err_vma:
1214 i915_vma_put(vma);
1215 return err;
1216}
1217
1218void lrc_reset(struct intel_context *ce)
1219{
1220 GEM_BUG_ON(!intel_context_is_pinned(ce));
1221
1222 intel_ring_reset(ring: ce->ring, tail: ce->ring->emit);
1223
1224 /* Scrub away the garbage */
1225 lrc_init_regs(ce, engine: ce->engine, inhibit: true);
1226 ce->lrc.lrca = lrc_update_regs(ce, engine: ce->engine, head: ce->ring->tail);
1227}
1228
1229int
1230lrc_pre_pin(struct intel_context *ce,
1231 struct intel_engine_cs *engine,
1232 struct i915_gem_ww_ctx *ww,
1233 void **vaddr)
1234{
1235 GEM_BUG_ON(!ce->state);
1236 GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
1237
1238 *vaddr = i915_gem_object_pin_map(obj: ce->state->obj,
1239 type: intel_gt_coherent_map_type(gt: ce->engine->gt,
1240 obj: ce->state->obj,
1241 always_coherent: false) |
1242 I915_MAP_OVERRIDE);
1243
1244 return PTR_ERR_OR_ZERO(ptr: *vaddr);
1245}
1246
1247int
1248lrc_pin(struct intel_context *ce,
1249 struct intel_engine_cs *engine,
1250 void *vaddr)
1251{
1252 ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
1253
1254 if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
1255 lrc_init_state(ce, engine, state: vaddr);
1256
1257 ce->lrc.lrca = lrc_update_regs(ce, engine, head: ce->ring->tail);
1258 return 0;
1259}
1260
1261void lrc_unpin(struct intel_context *ce)
1262{
1263 if (unlikely(ce->parallel.last_rq)) {
1264 i915_request_put(rq: ce->parallel.last_rq);
1265 ce->parallel.last_rq = NULL;
1266 }
1267 check_redzone(vaddr: (void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
1268 engine: ce->engine);
1269}
1270
1271void lrc_post_unpin(struct intel_context *ce)
1272{
1273 i915_gem_object_unpin_map(obj: ce->state->obj);
1274}
1275
1276void lrc_fini(struct intel_context *ce)
1277{
1278 if (!ce->state)
1279 return;
1280
1281 intel_ring_put(fetch_and_zero(&ce->ring));
1282 i915_vma_put(fetch_and_zero(&ce->state));
1283}
1284
1285void lrc_destroy(struct kref *kref)
1286{
1287 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
1288
1289 GEM_BUG_ON(!i915_active_is_idle(&ce->active));
1290 GEM_BUG_ON(intel_context_is_pinned(ce));
1291
1292 lrc_fini(ce);
1293
1294 intel_context_fini(ce);
1295 intel_context_free(ce);
1296}
1297
1298static u32 *
1299gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
1300{
1301 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1302 MI_SRM_LRM_GLOBAL_GTT |
1303 MI_LRI_LRM_CS_MMIO;
1304 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1305 *cs++ = i915_ggtt_offset(vma: ce->state) + LRC_STATE_OFFSET +
1306 CTX_TIMESTAMP * sizeof(u32);
1307 *cs++ = 0;
1308
1309 *cs++ = MI_LOAD_REGISTER_REG |
1310 MI_LRR_SOURCE_CS_MMIO |
1311 MI_LRI_LRM_CS_MMIO;
1312 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1313 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1314
1315 *cs++ = MI_LOAD_REGISTER_REG |
1316 MI_LRR_SOURCE_CS_MMIO |
1317 MI_LRI_LRM_CS_MMIO;
1318 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1319 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1320
1321 return cs;
1322}
1323
1324static u32 *
1325gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
1326{
1327 GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
1328
1329 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1330 MI_SRM_LRM_GLOBAL_GTT |
1331 MI_LRI_LRM_CS_MMIO;
1332 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1333 *cs++ = i915_ggtt_offset(vma: ce->state) + LRC_STATE_OFFSET +
1334 (lrc_ring_gpr0(engine: ce->engine) + 1) * sizeof(u32);
1335 *cs++ = 0;
1336
1337 return cs;
1338}
1339
1340static u32 *
1341gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
1342{
1343 GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1344
1345 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1346 MI_SRM_LRM_GLOBAL_GTT |
1347 MI_LRI_LRM_CS_MMIO;
1348 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1349 *cs++ = i915_ggtt_offset(vma: ce->state) + LRC_STATE_OFFSET +
1350 (lrc_ring_cmd_buf_cctl(engine: ce->engine) + 1) * sizeof(u32);
1351 *cs++ = 0;
1352
1353 *cs++ = MI_LOAD_REGISTER_REG |
1354 MI_LRR_SOURCE_CS_MMIO |
1355 MI_LRI_LRM_CS_MMIO;
1356 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1357 *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1358
1359 return cs;
1360}
1361
1362/*
1363 * The bspec's tuning guide asks us to program a vertical watermark value of
1364 * 0x3FF. However this register is not saved/restored properly by the
1365 * hardware, so we're required to apply the desired value via INDIRECT_CTX
1366 * batch buffer to ensure the value takes effect properly. All other bits
1367 * in this register should remain at 0 (the hardware default).
1368 */
1369static u32 *
1370dg2_emit_draw_watermark_setting(u32 *cs)
1371{
1372 *cs++ = MI_LOAD_REGISTER_IMM(1);
1373 *cs++ = i915_mmio_reg_offset(DRAW_WATERMARK);
1374 *cs++ = REG_FIELD_PREP(VERT_WM_VAL, 0x3FF);
1375
1376 return cs;
1377}
1378
1379static u32 *
1380gen12_invalidate_state_cache(u32 *cs)
1381{
1382 *cs++ = MI_LOAD_REGISTER_IMM(1);
1383 *cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2);
1384 *cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
1385 return cs;
1386}
1387
1388static u32 *
1389gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1390{
1391 cs = gen12_emit_timestamp_wa(ce, cs);
1392 cs = gen12_emit_cmd_buf_wa(ce, cs);
1393 cs = gen12_emit_restore_scratch(ce, cs);
1394
1395 /* Wa_16013000631:dg2 */
1396 if (IS_DG2_G11(ce->engine->i915))
1397 cs = gen8_emit_pipe_control(batch: cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, offset: 0);
1398
1399 cs = gen12_emit_aux_table_inv(engine: ce->engine, cs);
1400
1401 /* Wa_18022495364 */
1402 if (IS_GFX_GT_IP_RANGE(ce->engine->gt, IP_VER(12, 0), IP_VER(12, 10)))
1403 cs = gen12_invalidate_state_cache(cs);
1404
1405 /* Wa_16014892111 */
1406 if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
1407 IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
1408 IS_DG2(ce->engine->i915))
1409 cs = dg2_emit_draw_watermark_setting(cs);
1410
1411 return cs;
1412}
1413
1414static u32 *
1415gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1416{
1417 cs = gen12_emit_timestamp_wa(ce, cs);
1418 cs = gen12_emit_restore_scratch(ce, cs);
1419
1420 /* Wa_16013000631:dg2 */
1421 if (IS_DG2_G11(ce->engine->i915))
1422 if (ce->engine->class == COMPUTE_CLASS)
1423 cs = gen8_emit_pipe_control(batch: cs,
1424 PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
1425 offset: 0);
1426
1427 return gen12_emit_aux_table_inv(engine: ce->engine, cs);
1428}
1429
1430static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs)
1431{
1432 struct intel_gt *gt = ce->engine->gt;
1433 int mocs = gt->mocs.uc_index << 1;
1434
1435 /**
1436 * Wa_16018031267 / Wa_16018063123 requires that SW forces the
1437 * main copy engine arbitration into round robin mode. We
1438 * additionally need to submit the following WABB blt command
1439 * to produce 4 subblits with each subblit generating 0 byte
1440 * write requests as WABB:
1441 *
1442 * XY_FASTCOLOR_BLT
1443 * BG0 -> 5100000E
1444 * BG1 -> 0000003F (Dest pitch)
1445 * BG2 -> 00000000 (X1, Y1) = (0, 0)
1446 * BG3 -> 00040001 (X2, Y2) = (1, 4)
1447 * BG4 -> scratch
1448 * BG5 -> scratch
1449 * BG6-12 -> 00000000
1450 * BG13 -> 20004004 (Surf. Width= 2,Surf. Height = 5 )
1451 * BG14 -> 00000010 (Qpitch = 4)
1452 * BG15 -> 00000000
1453 */
1454 *cs++ = XY_FAST_COLOR_BLT_CMD | (16 - 2);
1455 *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 0x3f;
1456 *cs++ = 0;
1457 *cs++ = 4 << 16 | 1;
1458 *cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
1459 *cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
1460 *cs++ = 0;
1461 *cs++ = 0;
1462 *cs++ = 0;
1463 *cs++ = 0;
1464 *cs++ = 0;
1465 *cs++ = 0;
1466 *cs++ = 0;
1467 *cs++ = 0x20004004;
1468 *cs++ = 0x10;
1469 *cs++ = 0;
1470
1471 return cs;
1472}
1473
1474static u32 *
1475xehp_emit_per_ctx_bb(const struct intel_context *ce, u32 *cs)
1476{
1477 /* Wa_16018031267, Wa_16018063123 */
1478 if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine))
1479 cs = xehp_emit_fastcolor_blt_wabb(ce, cs);
1480
1481 return cs;
1482}
1483
1484static void
1485setup_per_ctx_bb(const struct intel_context *ce,
1486 const struct intel_engine_cs *engine,
1487 u32 *(*emit)(const struct intel_context *, u32 *))
1488{
1489 /* Place PER_CTX_BB on next page after INDIRECT_CTX */
1490 u32 * const start = context_wabb(ce, per_ctx: true);
1491 u32 *cs;
1492
1493 cs = emit(ce, start);
1494
1495 /* PER_CTX_BB must manually terminate */
1496 *cs++ = MI_BATCH_BUFFER_END;
1497
1498 GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1499 lrc_setup_bb_per_ctx(regs: ce->lrc_reg_state, engine,
1500 ctx_bb_ggtt_addr: lrc_indirect_bb(ce) + PAGE_SIZE);
1501}
1502
1503static void
1504setup_indirect_ctx_bb(const struct intel_context *ce,
1505 const struct intel_engine_cs *engine,
1506 u32 *(*emit)(const struct intel_context *, u32 *))
1507{
1508 u32 * const start = context_wabb(ce, per_ctx: false);
1509 u32 *cs;
1510
1511 cs = emit(ce, start);
1512 GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1513 while ((unsigned long)cs % CACHELINE_BYTES)
1514 *cs++ = MI_NOOP;
1515
1516 GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
1517 setup_predicate_disable_wa(ce, cs: start + DG2_PREDICATE_RESULT_BB / sizeof(*start));
1518
1519 lrc_setup_indirect_ctx(regs: ce->lrc_reg_state, engine,
1520 ctx_bb_ggtt_addr: lrc_indirect_bb(ce),
1521 size: (cs - start) * sizeof(*cs));
1522}
1523
1524/*
1525 * The context descriptor encodes various attributes of a context,
1526 * including its GTT address and some flags. Because it's fairly
1527 * expensive to calculate, we'll just do it once and cache the result,
1528 * which remains valid until the context is unpinned.
1529 *
1530 * This is what a descriptor looks like, from LSB to MSB::
1531 *
1532 * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
1533 * bits 12-31: LRCA, GTT address of (the HWSP of) this context
1534 * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
1535 * bits 53-54: mbz, reserved for use by hardware
1536 * bits 55-63: group ID, currently unused and set to 0
1537 *
1538 * Starting from Gen11, the upper dword of the descriptor has a new format:
1539 *
1540 * bits 32-36: reserved
1541 * bits 37-47: SW context ID
1542 * bits 48:53: engine instance
1543 * bit 54: mbz, reserved for use by hardware
1544 * bits 55-60: SW counter
1545 * bits 61-63: engine class
1546 *
1547 * On Xe_HP, the upper dword of the descriptor has a new format:
1548 *
1549 * bits 32-37: virtual function number
1550 * bit 38: mbz, reserved for use by hardware
1551 * bits 39-54: SW context ID
1552 * bits 55-57: reserved
1553 * bits 58-63: SW counter
1554 *
1555 * engine info, SW context ID and SW counter need to form a unique number
1556 * (Context ID) per lrc.
1557 */
1558static u32 lrc_descriptor(const struct intel_context *ce)
1559{
1560 u32 desc;
1561
1562 desc = INTEL_LEGACY_32B_CONTEXT;
1563 if (i915_vm_is_4lvl(vm: ce->vm))
1564 desc = INTEL_LEGACY_64B_CONTEXT;
1565 desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
1566
1567 desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
1568 if (GRAPHICS_VER(ce->vm->i915) == 8)
1569 desc |= GEN8_CTX_L3LLC_COHERENT;
1570
1571 return i915_ggtt_offset(vma: ce->state) | desc;
1572}
1573
1574u32 lrc_update_regs(const struct intel_context *ce,
1575 const struct intel_engine_cs *engine,
1576 u32 head)
1577{
1578 struct intel_ring *ring = ce->ring;
1579 u32 *regs = ce->lrc_reg_state;
1580
1581 GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1582 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1583
1584 regs[CTX_RING_START] = i915_ggtt_offset(vma: ring->vma);
1585 regs[CTX_RING_HEAD] = head;
1586 regs[CTX_RING_TAIL] = ring->tail;
1587 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1588
1589 /* RPCS */
1590 if (engine->class == RENDER_CLASS) {
1591 regs[CTX_R_PWR_CLK_STATE] =
1592 intel_sseu_make_rpcs(gt: engine->gt, req_sseu: &ce->sseu);
1593
1594 i915_oa_init_reg_state(ce, engine);
1595 }
1596
1597 if (ce->wa_bb_page) {
1598 u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1599
1600 fn = gen12_emit_indirect_ctx_xcs;
1601 if (ce->engine->class == RENDER_CLASS)
1602 fn = gen12_emit_indirect_ctx_rcs;
1603
1604 /* Mutually exclusive wrt to global indirect bb */
1605 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1606 setup_indirect_ctx_bb(ce, engine, emit: fn);
1607 setup_per_ctx_bb(ce, engine, emit: xehp_emit_per_ctx_bb);
1608 }
1609
1610 return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1611}
1612
1613void lrc_update_offsets(struct intel_context *ce,
1614 struct intel_engine_cs *engine)
1615{
1616 set_offsets(regs: ce->lrc_reg_state, data: reg_offsets(engine), engine, close: false);
1617}
1618
1619void lrc_check_regs(const struct intel_context *ce,
1620 const struct intel_engine_cs *engine,
1621 const char *when)
1622{
1623 const struct intel_ring *ring = ce->ring;
1624 u32 *regs = ce->lrc_reg_state;
1625 bool valid = true;
1626 int x;
1627
1628 if (regs[CTX_RING_START] != i915_ggtt_offset(vma: ring->vma)) {
1629 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1630 engine->name,
1631 regs[CTX_RING_START],
1632 i915_ggtt_offset(ring->vma));
1633 regs[CTX_RING_START] = i915_ggtt_offset(vma: ring->vma);
1634 valid = false;
1635 }
1636
1637 if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1638 (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1639 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1640 engine->name,
1641 regs[CTX_RING_CTL],
1642 (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1643 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1644 valid = false;
1645 }
1646
1647 x = lrc_ring_mi_mode(engine);
1648 if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1649 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1650 engine->name, regs[x + 1]);
1651 regs[x + 1] &= ~STOP_RING;
1652 regs[x + 1] |= STOP_RING << 16;
1653 valid = false;
1654 }
1655
1656 WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1657}
1658
1659/*
1660 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1661 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1662 * but there is a slight complication as this is applied in WA batch where the
1663 * values are only initialized once so we cannot take register value at the
1664 * beginning and reuse it further; hence we save its value to memory, upload a
1665 * constant value with bit21 set and then we restore it back with the saved value.
1666 * To simplify the WA, a constant value is formed by using the default value
1667 * of this register. This shouldn't be a problem because we are only modifying
1668 * it for a short period and this batch in non-premptible. We can ofcourse
1669 * use additional instructions that read the actual value of the register
1670 * at that time and set our bit of interest but it makes the WA complicated.
1671 *
1672 * This WA is also required for Gen9 so extracting as a function avoids
1673 * code duplication.
1674 */
1675static u32 *
1676gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1677{
1678 /* NB no one else is allowed to scribble over scratch + 256! */
1679 *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1680 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1681 *batch++ = intel_gt_scratch_offset(gt: engine->gt,
1682 field: INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1683 *batch++ = 0;
1684
1685 *batch++ = MI_LOAD_REGISTER_IMM(1);
1686 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1687 *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1688
1689 batch = gen8_emit_pipe_control(batch,
1690 PIPE_CONTROL_CS_STALL |
1691 PIPE_CONTROL_DC_FLUSH_ENABLE,
1692 offset: 0);
1693
1694 *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1695 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1696 *batch++ = intel_gt_scratch_offset(gt: engine->gt,
1697 field: INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1698 *batch++ = 0;
1699
1700 return batch;
1701}
1702
1703/*
1704 * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1705 * initialized at the beginning and shared across all contexts but this field
1706 * helps us to have multiple batches at different offsets and select them based
1707 * on a criteria. At the moment this batch always start at the beginning of the page
1708 * and at this point we don't have multiple wa_ctx batch buffers.
1709 *
1710 * The number of WA applied are not known at the beginning; we use this field
1711 * to return the no of DWORDS written.
1712 *
1713 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1714 * so it adds NOOPs as padding to make it cacheline aligned.
1715 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1716 * makes a complete batch buffer.
1717 */
1718static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1719{
1720 /* WaDisableCtxRestoreArbitration:bdw,chv */
1721 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1722
1723 /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1724 if (IS_BROADWELL(engine->i915))
1725 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1726
1727 /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1728 /* Actual scratch location is at 128 bytes offset */
1729 batch = gen8_emit_pipe_control(batch,
1730 PIPE_CONTROL_FLUSH_L3 |
1731 PIPE_CONTROL_STORE_DATA_INDEX |
1732 PIPE_CONTROL_CS_STALL |
1733 PIPE_CONTROL_QW_WRITE,
1734 LRC_PPHWSP_SCRATCH_ADDR);
1735
1736 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1737
1738 /* Pad to end of cacheline */
1739 while ((unsigned long)batch % CACHELINE_BYTES)
1740 *batch++ = MI_NOOP;
1741
1742 /*
1743 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1744 * execution depends on the length specified in terms of cache lines
1745 * in the register CTX_RCS_INDIRECT_CTX
1746 */
1747
1748 return batch;
1749}
1750
1751struct lri {
1752 i915_reg_t reg;
1753 u32 value;
1754};
1755
1756static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1757{
1758 GEM_BUG_ON(!count || count > 63);
1759
1760 *batch++ = MI_LOAD_REGISTER_IMM(count);
1761 do {
1762 *batch++ = i915_mmio_reg_offset(lri->reg);
1763 *batch++ = lri->value;
1764 } while (lri++, --count);
1765 *batch++ = MI_NOOP;
1766
1767 return batch;
1768}
1769
1770static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1771{
1772 static const struct lri lri[] = {
1773 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1774 {
1775 COMMON_SLICE_CHICKEN2,
1776 __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1777 0),
1778 },
1779
1780 /* BSpec: 11391 */
1781 {
1782 FF_SLICE_CHICKEN,
1783 __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1784 FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1785 },
1786
1787 /* BSpec: 11299 */
1788 {
1789 _3D_CHICKEN3,
1790 __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1791 _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1792 }
1793 };
1794
1795 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1796
1797 /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1798 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1799
1800 /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1801 batch = gen8_emit_pipe_control(batch,
1802 PIPE_CONTROL_FLUSH_L3 |
1803 PIPE_CONTROL_STORE_DATA_INDEX |
1804 PIPE_CONTROL_CS_STALL |
1805 PIPE_CONTROL_QW_WRITE,
1806 LRC_PPHWSP_SCRATCH_ADDR);
1807
1808 batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1809
1810 /* WaMediaPoolStateCmdInWABB:bxt,glk */
1811 if (HAS_POOLED_EU(engine->i915)) {
1812 /*
1813 * EU pool configuration is setup along with golden context
1814 * during context initialization. This value depends on
1815 * device type (2x6 or 3x6) and needs to be updated based
1816 * on which subslice is disabled especially for 2x6
1817 * devices, however it is safe to load default
1818 * configuration of 3x6 device instead of masking off
1819 * corresponding bits because HW ignores bits of a disabled
1820 * subslice and drops down to appropriate config. Please
1821 * see render_state_setup() in i915_gem_render_state.c for
1822 * possible configurations, to avoid duplication they are
1823 * not shown here again.
1824 */
1825 *batch++ = GEN9_MEDIA_POOL_STATE;
1826 *batch++ = GEN9_MEDIA_POOL_ENABLE;
1827 *batch++ = 0x00777000;
1828 *batch++ = 0;
1829 *batch++ = 0;
1830 *batch++ = 0;
1831 }
1832
1833 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1834
1835 /* Pad to end of cacheline */
1836 while ((unsigned long)batch % CACHELINE_BYTES)
1837 *batch++ = MI_NOOP;
1838
1839 return batch;
1840}
1841
1842#define CTX_WA_BB_SIZE (PAGE_SIZE)
1843
1844static int lrc_create_wa_ctx(struct intel_engine_cs *engine)
1845{
1846 struct drm_i915_gem_object *obj;
1847 struct i915_vma *vma;
1848 int err;
1849
1850 obj = i915_gem_object_create_shmem(i915: engine->i915, CTX_WA_BB_SIZE);
1851 if (IS_ERR(ptr: obj))
1852 return PTR_ERR(ptr: obj);
1853
1854 vma = i915_vma_instance(obj, vm: &engine->gt->ggtt->vm, NULL);
1855 if (IS_ERR(ptr: vma)) {
1856 err = PTR_ERR(ptr: vma);
1857 goto err;
1858 }
1859
1860 engine->wa_ctx.vma = vma;
1861 return 0;
1862
1863err:
1864 i915_gem_object_put(obj);
1865 return err;
1866}
1867
1868void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1869{
1870 i915_vma_unpin_and_release(p_vma: &engine->wa_ctx.vma, flags: 0);
1871}
1872
1873typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1874
1875void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1876{
1877 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1878 struct i915_wa_ctx_bb *wa_bb[] = {
1879 &wa_ctx->indirect_ctx, &wa_ctx->per_ctx
1880 };
1881 wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
1882 struct i915_gem_ww_ctx ww;
1883 void *batch, *batch_ptr;
1884 unsigned int i;
1885 int err;
1886
1887 if (GRAPHICS_VER(engine->i915) >= 11 ||
1888 !(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
1889 return;
1890
1891 if (GRAPHICS_VER(engine->i915) == 9) {
1892 wa_bb_fn[0] = gen9_init_indirectctx_bb;
1893 wa_bb_fn[1] = NULL;
1894 } else if (GRAPHICS_VER(engine->i915) == 8) {
1895 wa_bb_fn[0] = gen8_init_indirectctx_bb;
1896 wa_bb_fn[1] = NULL;
1897 }
1898
1899 err = lrc_create_wa_ctx(engine);
1900 if (err) {
1901 /*
1902 * We continue even if we fail to initialize WA batch
1903 * because we only expect rare glitches but nothing
1904 * critical to prevent us from using GPU
1905 */
1906 drm_err(&engine->i915->drm,
1907 "Ignoring context switch w/a allocation error:%d\n",
1908 err);
1909 return;
1910 }
1911
1912 if (!engine->wa_ctx.vma)
1913 return;
1914
1915 i915_gem_ww_ctx_init(ctx: &ww, intr: true);
1916retry:
1917 err = i915_gem_object_lock(obj: wa_ctx->vma->obj, ww: &ww);
1918 if (!err)
1919 err = i915_ggtt_pin(vma: wa_ctx->vma, ww: &ww, align: 0, PIN_HIGH);
1920 if (err)
1921 goto err;
1922
1923 batch = i915_gem_object_pin_map(obj: wa_ctx->vma->obj, type: I915_MAP_WB);
1924 if (IS_ERR(ptr: batch)) {
1925 err = PTR_ERR(ptr: batch);
1926 goto err_unpin;
1927 }
1928
1929 /*
1930 * Emit the two workaround batch buffers, recording the offset from the
1931 * start of the workaround batch buffer object for each and their
1932 * respective sizes.
1933 */
1934 batch_ptr = batch;
1935 for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1936 wa_bb[i]->offset = batch_ptr - batch;
1937 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1938 CACHELINE_BYTES))) {
1939 err = -EINVAL;
1940 break;
1941 }
1942 if (wa_bb_fn[i])
1943 batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1944 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1945 }
1946 GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1947
1948 __i915_gem_object_flush_map(obj: wa_ctx->vma->obj, offset: 0, size: batch_ptr - batch);
1949 __i915_gem_object_release_map(obj: wa_ctx->vma->obj);
1950
1951 /* Verify that we can handle failure to setup the wa_ctx */
1952 if (!err)
1953 err = i915_inject_probe_error(engine->i915, -ENODEV);
1954
1955err_unpin:
1956 if (err)
1957 i915_vma_unpin(vma: wa_ctx->vma);
1958err:
1959 if (err == -EDEADLK) {
1960 err = i915_gem_ww_ctx_backoff(ctx: &ww);
1961 if (!err)
1962 goto retry;
1963 }
1964 i915_gem_ww_ctx_fini(ctx: &ww);
1965
1966 if (err) {
1967 i915_vma_put(vma: engine->wa_ctx.vma);
1968
1969 /* Clear all flags to prevent further use */
1970 memset(wa_ctx, 0, sizeof(*wa_ctx));
1971 }
1972}
1973
1974static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt)
1975{
1976#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1977 stats->runtime.num_underflow++;
1978 stats->runtime.max_underflow =
1979 max_t(u32, stats->runtime.max_underflow, -dt);
1980#endif
1981}
1982
1983static u32 lrc_get_runtime(const struct intel_context *ce)
1984{
1985 /*
1986 * We can use either ppHWSP[16] which is recorded before the context
1987 * switch (and so excludes the cost of context switches) or use the
1988 * value from the context image itself, which is saved/restored earlier
1989 * and so includes the cost of the save.
1990 */
1991 return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
1992}
1993
1994void lrc_update_runtime(struct intel_context *ce)
1995{
1996 struct intel_context_stats *stats = &ce->stats;
1997 u32 old;
1998 s32 dt;
1999
2000 old = stats->runtime.last;
2001 stats->runtime.last = lrc_get_runtime(ce);
2002 dt = stats->runtime.last - old;
2003 if (!dt)
2004 return;
2005
2006 if (unlikely(dt < 0)) {
2007 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
2008 old, stats->runtime.last, dt);
2009 st_runtime_underflow(stats, dt);
2010 return;
2011 }
2012
2013 ewma_runtime_add(e: &stats->runtime.avg, val: dt);
2014 stats->runtime.total += dt;
2015}
2016
2017#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2018#include "selftest_lrc.c"
2019#endif
2020

source code of linux/drivers/gpu/drm/i915/gt/intel_lrc.c