1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2016 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/string_helpers.h> |
7 | |
8 | #include <drm/drm_print.h> |
9 | |
10 | #include "gem/i915_gem_context.h" |
11 | #include "gem/i915_gem_internal.h" |
12 | #include "gt/intel_gt_print.h" |
13 | #include "gt/intel_gt_regs.h" |
14 | |
15 | #include "i915_cmd_parser.h" |
16 | #include "i915_drv.h" |
17 | #include "i915_irq.h" |
18 | #include "i915_reg.h" |
19 | #include "intel_breadcrumbs.h" |
20 | #include "intel_context.h" |
21 | #include "intel_engine.h" |
22 | #include "intel_engine_pm.h" |
23 | #include "intel_engine_regs.h" |
24 | #include "intel_engine_user.h" |
25 | #include "intel_execlists_submission.h" |
26 | #include "intel_gt.h" |
27 | #include "intel_gt_mcr.h" |
28 | #include "intel_gt_pm.h" |
29 | #include "intel_gt_requests.h" |
30 | #include "intel_lrc.h" |
31 | #include "intel_lrc_reg.h" |
32 | #include "intel_reset.h" |
33 | #include "intel_ring.h" |
34 | #include "uc/intel_guc_submission.h" |
35 | |
36 | /* Haswell does have the CXT_SIZE register however it does not appear to be |
37 | * valid. Now, docs explain in dwords what is in the context object. The full |
38 | * size is 70720 bytes, however, the power context and execlist context will |
39 | * never be saved (power context is stored elsewhere, and execlists don't work |
40 | * on HSW) - so the final size, including the extra state required for the |
41 | * Resource Streamer, is 66944 bytes, which rounds to 17 pages. |
42 | */ |
43 | #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) |
44 | |
45 | #define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) |
46 | #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) |
47 | #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) |
48 | #define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE) |
49 | |
50 | #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) |
51 | |
52 | #define MAX_MMIO_BASES 3 |
53 | struct engine_info { |
54 | u8 class; |
55 | u8 instance; |
56 | /* mmio bases table *must* be sorted in reverse graphics_ver order */ |
57 | struct engine_mmio_base { |
58 | u32 graphics_ver : 8; |
59 | u32 base : 24; |
60 | } mmio_bases[MAX_MMIO_BASES]; |
61 | }; |
62 | |
63 | static const struct engine_info intel_engines[] = { |
64 | [RCS0] = { |
65 | .class = RENDER_CLASS, |
66 | .instance = 0, |
67 | .mmio_bases = { |
68 | { .graphics_ver = 1, .base = RENDER_RING_BASE } |
69 | }, |
70 | }, |
71 | [BCS0] = { |
72 | .class = COPY_ENGINE_CLASS, |
73 | .instance = 0, |
74 | .mmio_bases = { |
75 | { .graphics_ver = 6, .base = BLT_RING_BASE } |
76 | }, |
77 | }, |
78 | [BCS1] = { |
79 | .class = COPY_ENGINE_CLASS, |
80 | .instance = 1, |
81 | .mmio_bases = { |
82 | { .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE } |
83 | }, |
84 | }, |
85 | [BCS2] = { |
86 | .class = COPY_ENGINE_CLASS, |
87 | .instance = 2, |
88 | .mmio_bases = { |
89 | { .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE } |
90 | }, |
91 | }, |
92 | [BCS3] = { |
93 | .class = COPY_ENGINE_CLASS, |
94 | .instance = 3, |
95 | .mmio_bases = { |
96 | { .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE } |
97 | }, |
98 | }, |
99 | [BCS4] = { |
100 | .class = COPY_ENGINE_CLASS, |
101 | .instance = 4, |
102 | .mmio_bases = { |
103 | { .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE } |
104 | }, |
105 | }, |
106 | [BCS5] = { |
107 | .class = COPY_ENGINE_CLASS, |
108 | .instance = 5, |
109 | .mmio_bases = { |
110 | { .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE } |
111 | }, |
112 | }, |
113 | [BCS6] = { |
114 | .class = COPY_ENGINE_CLASS, |
115 | .instance = 6, |
116 | .mmio_bases = { |
117 | { .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE } |
118 | }, |
119 | }, |
120 | [BCS7] = { |
121 | .class = COPY_ENGINE_CLASS, |
122 | .instance = 7, |
123 | .mmio_bases = { |
124 | { .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE } |
125 | }, |
126 | }, |
127 | [BCS8] = { |
128 | .class = COPY_ENGINE_CLASS, |
129 | .instance = 8, |
130 | .mmio_bases = { |
131 | { .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE } |
132 | }, |
133 | }, |
134 | [VCS0] = { |
135 | .class = VIDEO_DECODE_CLASS, |
136 | .instance = 0, |
137 | .mmio_bases = { |
138 | { .graphics_ver = 11, .base = GEN11_BSD_RING_BASE }, |
139 | { .graphics_ver = 6, .base = GEN6_BSD_RING_BASE }, |
140 | { .graphics_ver = 4, .base = BSD_RING_BASE } |
141 | }, |
142 | }, |
143 | [VCS1] = { |
144 | .class = VIDEO_DECODE_CLASS, |
145 | .instance = 1, |
146 | .mmio_bases = { |
147 | { .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE }, |
148 | { .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE } |
149 | }, |
150 | }, |
151 | [VCS2] = { |
152 | .class = VIDEO_DECODE_CLASS, |
153 | .instance = 2, |
154 | .mmio_bases = { |
155 | { .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE } |
156 | }, |
157 | }, |
158 | [VCS3] = { |
159 | .class = VIDEO_DECODE_CLASS, |
160 | .instance = 3, |
161 | .mmio_bases = { |
162 | { .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE } |
163 | }, |
164 | }, |
165 | [VCS4] = { |
166 | .class = VIDEO_DECODE_CLASS, |
167 | .instance = 4, |
168 | .mmio_bases = { |
169 | { .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE } |
170 | }, |
171 | }, |
172 | [VCS5] = { |
173 | .class = VIDEO_DECODE_CLASS, |
174 | .instance = 5, |
175 | .mmio_bases = { |
176 | { .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE } |
177 | }, |
178 | }, |
179 | [VCS6] = { |
180 | .class = VIDEO_DECODE_CLASS, |
181 | .instance = 6, |
182 | .mmio_bases = { |
183 | { .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE } |
184 | }, |
185 | }, |
186 | [VCS7] = { |
187 | .class = VIDEO_DECODE_CLASS, |
188 | .instance = 7, |
189 | .mmio_bases = { |
190 | { .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE } |
191 | }, |
192 | }, |
193 | [VECS0] = { |
194 | .class = VIDEO_ENHANCEMENT_CLASS, |
195 | .instance = 0, |
196 | .mmio_bases = { |
197 | { .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE }, |
198 | { .graphics_ver = 7, .base = VEBOX_RING_BASE } |
199 | }, |
200 | }, |
201 | [VECS1] = { |
202 | .class = VIDEO_ENHANCEMENT_CLASS, |
203 | .instance = 1, |
204 | .mmio_bases = { |
205 | { .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE } |
206 | }, |
207 | }, |
208 | [VECS2] = { |
209 | .class = VIDEO_ENHANCEMENT_CLASS, |
210 | .instance = 2, |
211 | .mmio_bases = { |
212 | { .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE } |
213 | }, |
214 | }, |
215 | [VECS3] = { |
216 | .class = VIDEO_ENHANCEMENT_CLASS, |
217 | .instance = 3, |
218 | .mmio_bases = { |
219 | { .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE } |
220 | }, |
221 | }, |
222 | [CCS0] = { |
223 | .class = COMPUTE_CLASS, |
224 | .instance = 0, |
225 | .mmio_bases = { |
226 | { .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE } |
227 | } |
228 | }, |
229 | [CCS1] = { |
230 | .class = COMPUTE_CLASS, |
231 | .instance = 1, |
232 | .mmio_bases = { |
233 | { .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE } |
234 | } |
235 | }, |
236 | [CCS2] = { |
237 | .class = COMPUTE_CLASS, |
238 | .instance = 2, |
239 | .mmio_bases = { |
240 | { .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE } |
241 | } |
242 | }, |
243 | [CCS3] = { |
244 | .class = COMPUTE_CLASS, |
245 | .instance = 3, |
246 | .mmio_bases = { |
247 | { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE } |
248 | } |
249 | }, |
250 | [GSC0] = { |
251 | .class = OTHER_CLASS, |
252 | .instance = OTHER_GSC_INSTANCE, |
253 | .mmio_bases = { |
254 | { .graphics_ver = 12, .base = MTL_GSC_RING_BASE } |
255 | } |
256 | }, |
257 | }; |
258 | |
259 | /** |
260 | * intel_engine_context_size() - return the size of the context for an engine |
261 | * @gt: the gt |
262 | * @class: engine class |
263 | * |
264 | * Each engine class may require a different amount of space for a context |
265 | * image. |
266 | * |
267 | * Return: size (in bytes) of an engine class specific context image |
268 | * |
269 | * Note: this size includes the HWSP, which is part of the context image |
270 | * in LRC mode, but does not include the "shared data page" used with |
271 | * GuC submission. The caller should account for this if using the GuC. |
272 | */ |
273 | u32 intel_engine_context_size(struct intel_gt *gt, u8 class) |
274 | { |
275 | struct intel_uncore *uncore = gt->uncore; |
276 | u32 cxt_size; |
277 | |
278 | BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); |
279 | |
280 | switch (class) { |
281 | case COMPUTE_CLASS: |
282 | fallthrough; |
283 | case RENDER_CLASS: |
284 | switch (GRAPHICS_VER(gt->i915)) { |
285 | default: |
286 | MISSING_CASE(GRAPHICS_VER(gt->i915)); |
287 | return DEFAULT_LR_CONTEXT_RENDER_SIZE; |
288 | case 12: |
289 | case 11: |
290 | return GEN11_LR_CONTEXT_RENDER_SIZE; |
291 | case 9: |
292 | return GEN9_LR_CONTEXT_RENDER_SIZE; |
293 | case 8: |
294 | return GEN8_LR_CONTEXT_RENDER_SIZE; |
295 | case 7: |
296 | if (IS_HASWELL(gt->i915)) |
297 | return HSW_CXT_TOTAL_SIZE; |
298 | |
299 | cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE); |
300 | return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64, |
301 | PAGE_SIZE); |
302 | case 6: |
303 | cxt_size = intel_uncore_read(uncore, CXT_SIZE); |
304 | return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64, |
305 | PAGE_SIZE); |
306 | case 5: |
307 | case 4: |
308 | /* |
309 | * There is a discrepancy here between the size reported |
310 | * by the register and the size of the context layout |
311 | * in the docs. Both are described as authorative! |
312 | * |
313 | * The discrepancy is on the order of a few cachelines, |
314 | * but the total is under one page (4k), which is our |
315 | * minimum allocation anyway so it should all come |
316 | * out in the wash. |
317 | */ |
318 | cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1; |
319 | gt_dbg(gt, "graphics_ver = %d CXT_SIZE = %d bytes [0x%08x]\n" , |
320 | GRAPHICS_VER(gt->i915), cxt_size * 64, |
321 | cxt_size - 1); |
322 | return round_up(cxt_size * 64, PAGE_SIZE); |
323 | case 3: |
324 | case 2: |
325 | /* For the special day when i810 gets merged. */ |
326 | case 1: |
327 | return 0; |
328 | } |
329 | break; |
330 | default: |
331 | MISSING_CASE(class); |
332 | fallthrough; |
333 | case VIDEO_DECODE_CLASS: |
334 | case VIDEO_ENHANCEMENT_CLASS: |
335 | case COPY_ENGINE_CLASS: |
336 | case OTHER_CLASS: |
337 | if (GRAPHICS_VER(gt->i915) < 8) |
338 | return 0; |
339 | return GEN8_LR_CONTEXT_OTHER_SIZE; |
340 | } |
341 | } |
342 | |
343 | static u32 __engine_mmio_base(struct drm_i915_private *i915, |
344 | const struct engine_mmio_base *bases) |
345 | { |
346 | int i; |
347 | |
348 | for (i = 0; i < MAX_MMIO_BASES; i++) |
349 | if (GRAPHICS_VER(i915) >= bases[i].graphics_ver) |
350 | break; |
351 | |
352 | GEM_BUG_ON(i == MAX_MMIO_BASES); |
353 | GEM_BUG_ON(!bases[i].base); |
354 | |
355 | return bases[i].base; |
356 | } |
357 | |
358 | static void __sprint_engine_name(struct intel_engine_cs *engine) |
359 | { |
360 | /* |
361 | * Before we know what the uABI name for this engine will be, |
362 | * we still would like to keep track of this engine in the debug logs. |
363 | * We throw in a ' here as a reminder that this isn't its final name. |
364 | */ |
365 | GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u" , |
366 | intel_engine_class_repr(engine->class), |
367 | engine->instance) >= sizeof(engine->name)); |
368 | } |
369 | |
370 | void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask) |
371 | { |
372 | /* |
373 | * Though they added more rings on g4x/ilk, they did not add |
374 | * per-engine HWSTAM until gen6. |
375 | */ |
376 | if (GRAPHICS_VER(engine->i915) < 6 && engine->class != RENDER_CLASS) |
377 | return; |
378 | |
379 | if (GRAPHICS_VER(engine->i915) >= 3) |
380 | ENGINE_WRITE(engine, RING_HWSTAM, mask); |
381 | else |
382 | ENGINE_WRITE16(engine, RING_HWSTAM, mask); |
383 | } |
384 | |
385 | static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine) |
386 | { |
387 | /* Mask off all writes into the unknown HWSP */ |
388 | intel_engine_set_hwsp_writemask(engine, mask: ~0u); |
389 | } |
390 | |
391 | static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir) |
392 | { |
393 | GEM_DEBUG_WARN_ON(iir); |
394 | } |
395 | |
396 | static u32 get_reset_domain(u8 ver, enum intel_engine_id id) |
397 | { |
398 | u32 reset_domain; |
399 | |
400 | if (ver >= 11) { |
401 | static const u32 engine_reset_domains[] = { |
402 | [RCS0] = GEN11_GRDOM_RENDER, |
403 | [BCS0] = GEN11_GRDOM_BLT, |
404 | [BCS1] = XEHPC_GRDOM_BLT1, |
405 | [BCS2] = XEHPC_GRDOM_BLT2, |
406 | [BCS3] = XEHPC_GRDOM_BLT3, |
407 | [BCS4] = XEHPC_GRDOM_BLT4, |
408 | [BCS5] = XEHPC_GRDOM_BLT5, |
409 | [BCS6] = XEHPC_GRDOM_BLT6, |
410 | [BCS7] = XEHPC_GRDOM_BLT7, |
411 | [BCS8] = XEHPC_GRDOM_BLT8, |
412 | [VCS0] = GEN11_GRDOM_MEDIA, |
413 | [VCS1] = GEN11_GRDOM_MEDIA2, |
414 | [VCS2] = GEN11_GRDOM_MEDIA3, |
415 | [VCS3] = GEN11_GRDOM_MEDIA4, |
416 | [VCS4] = GEN11_GRDOM_MEDIA5, |
417 | [VCS5] = GEN11_GRDOM_MEDIA6, |
418 | [VCS6] = GEN11_GRDOM_MEDIA7, |
419 | [VCS7] = GEN11_GRDOM_MEDIA8, |
420 | [VECS0] = GEN11_GRDOM_VECS, |
421 | [VECS1] = GEN11_GRDOM_VECS2, |
422 | [VECS2] = GEN11_GRDOM_VECS3, |
423 | [VECS3] = GEN11_GRDOM_VECS4, |
424 | [CCS0] = GEN11_GRDOM_RENDER, |
425 | [CCS1] = GEN11_GRDOM_RENDER, |
426 | [CCS2] = GEN11_GRDOM_RENDER, |
427 | [CCS3] = GEN11_GRDOM_RENDER, |
428 | [GSC0] = GEN12_GRDOM_GSC, |
429 | }; |
430 | GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || |
431 | !engine_reset_domains[id]); |
432 | reset_domain = engine_reset_domains[id]; |
433 | } else { |
434 | static const u32 engine_reset_domains[] = { |
435 | [RCS0] = GEN6_GRDOM_RENDER, |
436 | [BCS0] = GEN6_GRDOM_BLT, |
437 | [VCS0] = GEN6_GRDOM_MEDIA, |
438 | [VCS1] = GEN8_GRDOM_MEDIA2, |
439 | [VECS0] = GEN6_GRDOM_VECS, |
440 | }; |
441 | GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || |
442 | !engine_reset_domains[id]); |
443 | reset_domain = engine_reset_domains[id]; |
444 | } |
445 | |
446 | return reset_domain; |
447 | } |
448 | |
449 | static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, |
450 | u8 logical_instance) |
451 | { |
452 | const struct engine_info *info = &intel_engines[id]; |
453 | struct drm_i915_private *i915 = gt->i915; |
454 | struct intel_engine_cs *engine; |
455 | u8 guc_class; |
456 | |
457 | BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); |
458 | BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); |
459 | BUILD_BUG_ON(I915_MAX_VCS > (MAX_ENGINE_INSTANCE + 1)); |
460 | BUILD_BUG_ON(I915_MAX_VECS > (MAX_ENGINE_INSTANCE + 1)); |
461 | |
462 | if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine))) |
463 | return -EINVAL; |
464 | |
465 | if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS)) |
466 | return -EINVAL; |
467 | |
468 | if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE)) |
469 | return -EINVAL; |
470 | |
471 | if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance])) |
472 | return -EINVAL; |
473 | |
474 | engine = kzalloc(size: sizeof(*engine), GFP_KERNEL); |
475 | if (!engine) |
476 | return -ENOMEM; |
477 | |
478 | BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); |
479 | |
480 | INIT_LIST_HEAD(list: &engine->pinned_contexts_list); |
481 | engine->id = id; |
482 | engine->legacy_idx = INVALID_ENGINE; |
483 | engine->mask = BIT(id); |
484 | engine->reset_domain = get_reset_domain(GRAPHICS_VER(gt->i915), |
485 | id); |
486 | engine->i915 = i915; |
487 | engine->gt = gt; |
488 | engine->uncore = gt->uncore; |
489 | guc_class = engine_class_to_guc_class(class: info->class); |
490 | engine->guc_id = MAKE_GUC_ID(guc_class, info->instance); |
491 | engine->mmio_base = __engine_mmio_base(i915, bases: info->mmio_bases); |
492 | |
493 | engine->irq_handler = nop_irq_handler; |
494 | |
495 | engine->class = info->class; |
496 | engine->instance = info->instance; |
497 | engine->logical_mask = BIT(logical_instance); |
498 | __sprint_engine_name(engine); |
499 | |
500 | if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) && |
501 | __ffs(CCS_MASK(engine->gt)) == engine->instance) || |
502 | engine->class == RENDER_CLASS) |
503 | engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE; |
504 | |
505 | /* features common between engines sharing EUs */ |
506 | if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) { |
507 | engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE; |
508 | engine->flags |= I915_ENGINE_HAS_EU_PRIORITY; |
509 | } |
510 | |
511 | engine->props.heartbeat_interval_ms = |
512 | CONFIG_DRM_I915_HEARTBEAT_INTERVAL; |
513 | engine->props.max_busywait_duration_ns = |
514 | CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT; |
515 | engine->props.preempt_timeout_ms = |
516 | CONFIG_DRM_I915_PREEMPT_TIMEOUT; |
517 | engine->props.stop_timeout_ms = |
518 | CONFIG_DRM_I915_STOP_TIMEOUT; |
519 | engine->props.timeslice_duration_ms = |
520 | CONFIG_DRM_I915_TIMESLICE_DURATION; |
521 | |
522 | /* |
523 | * Mid-thread pre-emption is not available in Gen12. Unfortunately, |
524 | * some compute workloads run quite long threads. That means they get |
525 | * reset due to not pre-empting in a timely manner. So, bump the |
526 | * pre-emption timeout value to be much higher for compute engines. |
527 | */ |
528 | if (GRAPHICS_VER(i915) == 12 && (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)) |
529 | engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE; |
530 | |
531 | /* Cap properties according to any system limits */ |
532 | #define CLAMP_PROP(field) \ |
533 | do { \ |
534 | u64 clamp = intel_clamp_##field(engine, engine->props.field); \ |
535 | if (clamp != engine->props.field) { \ |
536 | drm_notice(&engine->i915->drm, \ |
537 | "Warning, clamping %s to %lld to prevent overflow\n", \ |
538 | #field, clamp); \ |
539 | engine->props.field = clamp; \ |
540 | } \ |
541 | } while (0) |
542 | |
543 | CLAMP_PROP(heartbeat_interval_ms); |
544 | CLAMP_PROP(max_busywait_duration_ns); |
545 | CLAMP_PROP(preempt_timeout_ms); |
546 | CLAMP_PROP(stop_timeout_ms); |
547 | CLAMP_PROP(timeslice_duration_ms); |
548 | |
549 | #undef CLAMP_PROP |
550 | |
551 | engine->defaults = engine->props; /* never to change again */ |
552 | |
553 | engine->context_size = intel_engine_context_size(gt, class: engine->class); |
554 | if (WARN_ON(engine->context_size > BIT(20))) |
555 | engine->context_size = 0; |
556 | if (engine->context_size) |
557 | DRIVER_CAPS(i915)->has_logical_contexts = true; |
558 | |
559 | ewma__engine_latency_init(e: &engine->latency); |
560 | |
561 | ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); |
562 | |
563 | /* Scrub mmio state on takeover */ |
564 | intel_engine_sanitize_mmio(engine); |
565 | |
566 | gt->engine_class[info->class][info->instance] = engine; |
567 | gt->engine[id] = engine; |
568 | |
569 | return 0; |
570 | } |
571 | |
572 | u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value) |
573 | { |
574 | value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); |
575 | |
576 | return value; |
577 | } |
578 | |
579 | u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value) |
580 | { |
581 | value = min(value, jiffies_to_nsecs(2)); |
582 | |
583 | return value; |
584 | } |
585 | |
586 | u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value) |
587 | { |
588 | /* |
589 | * NB: The GuC API only supports 32bit values. However, the limit is further |
590 | * reduced due to internal calculations which would otherwise overflow. |
591 | */ |
592 | if (intel_guc_submission_is_wanted(guc: &engine->gt->uc.guc)) |
593 | value = min_t(u64, value, guc_policy_max_preempt_timeout_ms()); |
594 | |
595 | value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); |
596 | |
597 | return value; |
598 | } |
599 | |
600 | u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value) |
601 | { |
602 | value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); |
603 | |
604 | return value; |
605 | } |
606 | |
607 | u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value) |
608 | { |
609 | /* |
610 | * NB: The GuC API only supports 32bit values. However, the limit is further |
611 | * reduced due to internal calculations which would otherwise overflow. |
612 | */ |
613 | if (intel_guc_submission_is_wanted(guc: &engine->gt->uc.guc)) |
614 | value = min_t(u64, value, guc_policy_max_exec_quantum_ms()); |
615 | |
616 | value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); |
617 | |
618 | return value; |
619 | } |
620 | |
621 | static void __setup_engine_capabilities(struct intel_engine_cs *engine) |
622 | { |
623 | struct drm_i915_private *i915 = engine->i915; |
624 | |
625 | if (engine->class == VIDEO_DECODE_CLASS) { |
626 | /* |
627 | * HEVC support is present on first engine instance |
628 | * before Gen11 and on all instances afterwards. |
629 | */ |
630 | if (GRAPHICS_VER(i915) >= 11 || |
631 | (GRAPHICS_VER(i915) >= 9 && engine->instance == 0)) |
632 | engine->uabi_capabilities |= |
633 | I915_VIDEO_CLASS_CAPABILITY_HEVC; |
634 | |
635 | /* |
636 | * SFC block is present only on even logical engine |
637 | * instances. |
638 | */ |
639 | if ((GRAPHICS_VER(i915) >= 11 && |
640 | (engine->gt->info.vdbox_sfc_access & |
641 | BIT(engine->instance))) || |
642 | (GRAPHICS_VER(i915) >= 9 && engine->instance == 0)) |
643 | engine->uabi_capabilities |= |
644 | I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; |
645 | } else if (engine->class == VIDEO_ENHANCEMENT_CLASS) { |
646 | if (GRAPHICS_VER(i915) >= 9 && |
647 | engine->gt->info.sfc_mask & BIT(engine->instance)) |
648 | engine->uabi_capabilities |= |
649 | I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; |
650 | } |
651 | } |
652 | |
653 | static void intel_setup_engine_capabilities(struct intel_gt *gt) |
654 | { |
655 | struct intel_engine_cs *engine; |
656 | enum intel_engine_id id; |
657 | |
658 | for_each_engine(engine, gt, id) |
659 | __setup_engine_capabilities(engine); |
660 | } |
661 | |
662 | /** |
663 | * intel_engines_release() - free the resources allocated for Command Streamers |
664 | * @gt: pointer to struct intel_gt |
665 | */ |
666 | void intel_engines_release(struct intel_gt *gt) |
667 | { |
668 | struct intel_engine_cs *engine; |
669 | enum intel_engine_id id; |
670 | |
671 | /* |
672 | * Before we release the resources held by engine, we must be certain |
673 | * that the HW is no longer accessing them -- having the GPU scribble |
674 | * to or read from a page being used for something else causes no end |
675 | * of fun. |
676 | * |
677 | * The GPU should be reset by this point, but assume the worst just |
678 | * in case we aborted before completely initialising the engines. |
679 | */ |
680 | GEM_BUG_ON(intel_gt_pm_is_awake(gt)); |
681 | if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) |
682 | __intel_gt_reset(gt, ALL_ENGINES); |
683 | |
684 | /* Decouple the backend; but keep the layout for late GPU resets */ |
685 | for_each_engine(engine, gt, id) { |
686 | if (!engine->release) |
687 | continue; |
688 | |
689 | intel_wakeref_wait_for_idle(wf: &engine->wakeref); |
690 | GEM_BUG_ON(intel_engine_pm_is_awake(engine)); |
691 | |
692 | engine->release(engine); |
693 | engine->release = NULL; |
694 | |
695 | memset(&engine->reset, 0, sizeof(engine->reset)); |
696 | } |
697 | } |
698 | |
699 | void intel_engine_free_request_pool(struct intel_engine_cs *engine) |
700 | { |
701 | if (!engine->request_pool) |
702 | return; |
703 | |
704 | kmem_cache_free(s: i915_request_slab_cache(), objp: engine->request_pool); |
705 | } |
706 | |
707 | void intel_engines_free(struct intel_gt *gt) |
708 | { |
709 | struct intel_engine_cs *engine; |
710 | enum intel_engine_id id; |
711 | |
712 | /* Free the requests! dma-resv keeps fences around for an eternity */ |
713 | rcu_barrier(); |
714 | |
715 | for_each_engine(engine, gt, id) { |
716 | intel_engine_free_request_pool(engine); |
717 | kfree(objp: engine); |
718 | gt->engine[id] = NULL; |
719 | } |
720 | } |
721 | |
722 | static |
723 | bool gen11_vdbox_has_sfc(struct intel_gt *gt, |
724 | unsigned int physical_vdbox, |
725 | unsigned int logical_vdbox, u16 vdbox_mask) |
726 | { |
727 | struct drm_i915_private *i915 = gt->i915; |
728 | |
729 | /* |
730 | * In Gen11, only even numbered logical VDBOXes are hooked |
731 | * up to an SFC (Scaler & Format Converter) unit. |
732 | * In Gen12, Even numbered physical instance always are connected |
733 | * to an SFC. Odd numbered physical instances have SFC only if |
734 | * previous even instance is fused off. |
735 | * |
736 | * Starting with Xe_HP, there's also a dedicated SFC_ENABLE field |
737 | * in the fuse register that tells us whether a specific SFC is present. |
738 | */ |
739 | if ((gt->info.sfc_mask & BIT(physical_vdbox / 2)) == 0) |
740 | return false; |
741 | else if (MEDIA_VER(i915) >= 12) |
742 | return (physical_vdbox % 2 == 0) || |
743 | !(BIT(physical_vdbox - 1) & vdbox_mask); |
744 | else if (MEDIA_VER(i915) == 11) |
745 | return logical_vdbox % 2 == 0; |
746 | |
747 | return false; |
748 | } |
749 | |
750 | static void engine_mask_apply_media_fuses(struct intel_gt *gt) |
751 | { |
752 | struct drm_i915_private *i915 = gt->i915; |
753 | unsigned int logical_vdbox = 0; |
754 | unsigned int i; |
755 | u32 media_fuse, fuse1; |
756 | u16 vdbox_mask; |
757 | u16 vebox_mask; |
758 | |
759 | if (MEDIA_VER(gt->i915) < 11) |
760 | return; |
761 | |
762 | /* |
763 | * On newer platforms the fusing register is called 'enable' and has |
764 | * enable semantics, while on older platforms it is called 'disable' |
765 | * and bits have disable semantices. |
766 | */ |
767 | media_fuse = intel_uncore_read(uncore: gt->uncore, GEN11_GT_VEBOX_VDBOX_DISABLE); |
768 | if (MEDIA_VER_FULL(i915) < IP_VER(12, 50)) |
769 | media_fuse = ~media_fuse; |
770 | |
771 | vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; |
772 | vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> |
773 | GEN11_GT_VEBOX_DISABLE_SHIFT; |
774 | |
775 | if (MEDIA_VER_FULL(i915) >= IP_VER(12, 50)) { |
776 | fuse1 = intel_uncore_read(uncore: gt->uncore, HSW_PAVP_FUSE1); |
777 | gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1); |
778 | } else { |
779 | gt->info.sfc_mask = ~0; |
780 | } |
781 | |
782 | for (i = 0; i < I915_MAX_VCS; i++) { |
783 | if (!HAS_ENGINE(gt, _VCS(i))) { |
784 | vdbox_mask &= ~BIT(i); |
785 | continue; |
786 | } |
787 | |
788 | if (!(BIT(i) & vdbox_mask)) { |
789 | gt->info.engine_mask &= ~BIT(_VCS(i)); |
790 | gt_dbg(gt, "vcs%u fused off\n" , i); |
791 | continue; |
792 | } |
793 | |
794 | if (gen11_vdbox_has_sfc(gt, physical_vdbox: i, logical_vdbox, vdbox_mask)) |
795 | gt->info.vdbox_sfc_access |= BIT(i); |
796 | logical_vdbox++; |
797 | } |
798 | gt_dbg(gt, "vdbox enable: %04x, instances: %04lx\n" , vdbox_mask, VDBOX_MASK(gt)); |
799 | GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt)); |
800 | |
801 | for (i = 0; i < I915_MAX_VECS; i++) { |
802 | if (!HAS_ENGINE(gt, _VECS(i))) { |
803 | vebox_mask &= ~BIT(i); |
804 | continue; |
805 | } |
806 | |
807 | if (!(BIT(i) & vebox_mask)) { |
808 | gt->info.engine_mask &= ~BIT(_VECS(i)); |
809 | gt_dbg(gt, "vecs%u fused off\n" , i); |
810 | } |
811 | } |
812 | gt_dbg(gt, "vebox enable: %04x, instances: %04lx\n" , vebox_mask, VEBOX_MASK(gt)); |
813 | GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt)); |
814 | } |
815 | |
816 | static void engine_mask_apply_compute_fuses(struct intel_gt *gt) |
817 | { |
818 | struct drm_i915_private *i915 = gt->i915; |
819 | struct intel_gt_info *info = >->info; |
820 | int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS; |
821 | unsigned long ccs_mask; |
822 | unsigned int i; |
823 | |
824 | if (GRAPHICS_VER(i915) < 11) |
825 | return; |
826 | |
827 | if (hweight32(CCS_MASK(gt)) <= 1) |
828 | return; |
829 | |
830 | ccs_mask = intel_slicemask_from_xehp_dssmask(dss_mask: info->sseu.compute_subslice_mask, |
831 | dss_per_slice: ss_per_ccs); |
832 | /* |
833 | * If all DSS in a quadrant are fused off, the corresponding CCS |
834 | * engine is not available for use. |
835 | */ |
836 | for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) { |
837 | info->engine_mask &= ~BIT(_CCS(i)); |
838 | gt_dbg(gt, "ccs%u fused off\n" , i); |
839 | } |
840 | } |
841 | |
842 | static void engine_mask_apply_copy_fuses(struct intel_gt *gt) |
843 | { |
844 | struct drm_i915_private *i915 = gt->i915; |
845 | struct intel_gt_info *info = >->info; |
846 | unsigned long meml3_mask; |
847 | unsigned long quad; |
848 | |
849 | if (!(GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60) && |
850 | GRAPHICS_VER_FULL(i915) < IP_VER(12, 70))) |
851 | return; |
852 | |
853 | meml3_mask = intel_uncore_read(uncore: gt->uncore, GEN10_MIRROR_FUSE3); |
854 | meml3_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, meml3_mask); |
855 | |
856 | /* |
857 | * Link Copy engines may be fused off according to meml3_mask. Each |
858 | * bit is a quad that houses 2 Link Copy and two Sub Copy engines. |
859 | */ |
860 | for_each_clear_bit(quad, &meml3_mask, GEN12_MAX_MSLICES) { |
861 | unsigned int instance = quad * 2 + 1; |
862 | intel_engine_mask_t mask = GENMASK(_BCS(instance + 1), |
863 | _BCS(instance)); |
864 | |
865 | if (mask & info->engine_mask) { |
866 | gt_dbg(gt, "bcs%u fused off\n" , instance); |
867 | gt_dbg(gt, "bcs%u fused off\n" , instance + 1); |
868 | |
869 | info->engine_mask &= ~mask; |
870 | } |
871 | } |
872 | } |
873 | |
874 | /* |
875 | * Determine which engines are fused off in our particular hardware. |
876 | * Note that we have a catch-22 situation where we need to be able to access |
877 | * the blitter forcewake domain to read the engine fuses, but at the same time |
878 | * we need to know which engines are available on the system to know which |
879 | * forcewake domains are present. We solve this by intializing the forcewake |
880 | * domains based on the full engine mask in the platform capabilities before |
881 | * calling this function and pruning the domains for fused-off engines |
882 | * afterwards. |
883 | */ |
884 | static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) |
885 | { |
886 | struct intel_gt_info *info = >->info; |
887 | |
888 | GEM_BUG_ON(!info->engine_mask); |
889 | |
890 | engine_mask_apply_media_fuses(gt); |
891 | engine_mask_apply_compute_fuses(gt); |
892 | engine_mask_apply_copy_fuses(gt); |
893 | |
894 | /* |
895 | * The only use of the GSC CS is to load and communicate with the GSC |
896 | * FW, so we have no use for it if we don't have the FW. |
897 | * |
898 | * IMPORTANT: in cases where we don't have the GSC FW, we have a |
899 | * catch-22 situation that breaks media C6 due to 2 requirements: |
900 | * 1) once turned on, the GSC power well will not go to sleep unless the |
901 | * GSC FW is loaded. |
902 | * 2) to enable idling (which is required for media C6) we need to |
903 | * initialize the IDLE_MSG register for the GSC CS and do at least 1 |
904 | * submission, which will wake up the GSC power well. |
905 | */ |
906 | if (__HAS_ENGINE(info->engine_mask, GSC0) && !intel_uc_wants_gsc_uc(uc: >->uc)) { |
907 | gt_notice(gt, "No GSC FW selected, disabling GSC CS and media C6\n" ); |
908 | info->engine_mask &= ~BIT(GSC0); |
909 | } |
910 | |
911 | /* |
912 | * Do not create the command streamer for CCS slices beyond the first. |
913 | * All the workload submitted to the first engine will be shared among |
914 | * all the slices. |
915 | * |
916 | * Once the user will be allowed to customize the CCS mode, then this |
917 | * check needs to be removed. |
918 | */ |
919 | if (IS_DG2(gt->i915)) { |
920 | u8 first_ccs = __ffs(CCS_MASK(gt)); |
921 | |
922 | /* Mask off all the CCS engine */ |
923 | info->engine_mask &= ~GENMASK(CCS3, CCS0); |
924 | /* Put back in the first CCS engine */ |
925 | info->engine_mask |= BIT(_CCS(first_ccs)); |
926 | } |
927 | |
928 | return info->engine_mask; |
929 | } |
930 | |
931 | static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids, |
932 | u8 class, const u8 *map, u8 num_instances) |
933 | { |
934 | int i, j; |
935 | u8 current_logical_id = 0; |
936 | |
937 | for (j = 0; j < num_instances; ++j) { |
938 | for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) { |
939 | if (!HAS_ENGINE(gt, i) || |
940 | intel_engines[i].class != class) |
941 | continue; |
942 | |
943 | if (intel_engines[i].instance == map[j]) { |
944 | logical_ids[intel_engines[i].instance] = |
945 | current_logical_id++; |
946 | break; |
947 | } |
948 | } |
949 | } |
950 | } |
951 | |
952 | static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class) |
953 | { |
954 | /* |
955 | * Logical to physical mapping is needed for proper support |
956 | * to split-frame feature. |
957 | */ |
958 | if (MEDIA_VER(gt->i915) >= 11 && class == VIDEO_DECODE_CLASS) { |
959 | const u8 map[] = { 0, 2, 4, 6, 1, 3, 5, 7 }; |
960 | |
961 | populate_logical_ids(gt, logical_ids, class, |
962 | map, ARRAY_SIZE(map)); |
963 | } else { |
964 | int i; |
965 | u8 map[MAX_ENGINE_INSTANCE + 1]; |
966 | |
967 | for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i) |
968 | map[i] = i; |
969 | populate_logical_ids(gt, logical_ids, class, |
970 | map, ARRAY_SIZE(map)); |
971 | } |
972 | } |
973 | |
974 | /** |
975 | * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers |
976 | * @gt: pointer to struct intel_gt |
977 | * |
978 | * Return: non-zero if the initialization failed. |
979 | */ |
980 | int intel_engines_init_mmio(struct intel_gt *gt) |
981 | { |
982 | struct drm_i915_private *i915 = gt->i915; |
983 | const unsigned int engine_mask = init_engine_mask(gt); |
984 | unsigned int mask = 0; |
985 | unsigned int i, class; |
986 | u8 logical_ids[MAX_ENGINE_INSTANCE + 1]; |
987 | int err; |
988 | |
989 | drm_WARN_ON(&i915->drm, engine_mask == 0); |
990 | drm_WARN_ON(&i915->drm, engine_mask & |
991 | GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES)); |
992 | |
993 | if (i915_inject_probe_failure(i915)) |
994 | return -ENODEV; |
995 | |
996 | for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) { |
997 | setup_logical_ids(gt, logical_ids, class); |
998 | |
999 | for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) { |
1000 | u8 instance = intel_engines[i].instance; |
1001 | |
1002 | if (intel_engines[i].class != class || |
1003 | !HAS_ENGINE(gt, i)) |
1004 | continue; |
1005 | |
1006 | err = intel_engine_setup(gt, id: i, |
1007 | logical_instance: logical_ids[instance]); |
1008 | if (err) |
1009 | goto cleanup; |
1010 | |
1011 | mask |= BIT(i); |
1012 | } |
1013 | } |
1014 | |
1015 | /* |
1016 | * Catch failures to update intel_engines table when the new engines |
1017 | * are added to the driver by a warning and disabling the forgotten |
1018 | * engines. |
1019 | */ |
1020 | if (drm_WARN_ON(&i915->drm, mask != engine_mask)) |
1021 | gt->info.engine_mask = mask; |
1022 | |
1023 | gt->info.num_engines = hweight32(mask); |
1024 | |
1025 | intel_gt_check_and_clear_faults(gt); |
1026 | |
1027 | intel_setup_engine_capabilities(gt); |
1028 | |
1029 | intel_uncore_prune_engine_fw_domains(uncore: gt->uncore, gt); |
1030 | |
1031 | return 0; |
1032 | |
1033 | cleanup: |
1034 | intel_engines_free(gt); |
1035 | return err; |
1036 | } |
1037 | |
1038 | void intel_engine_init_execlists(struct intel_engine_cs *engine) |
1039 | { |
1040 | struct intel_engine_execlists * const execlists = &engine->execlists; |
1041 | |
1042 | execlists->port_mask = 1; |
1043 | GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); |
1044 | GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); |
1045 | |
1046 | memset(execlists->pending, 0, sizeof(execlists->pending)); |
1047 | execlists->active = |
1048 | memset(execlists->inflight, 0, sizeof(execlists->inflight)); |
1049 | } |
1050 | |
1051 | static void cleanup_status_page(struct intel_engine_cs *engine) |
1052 | { |
1053 | struct i915_vma *vma; |
1054 | |
1055 | /* Prevent writes into HWSP after returning the page to the system */ |
1056 | intel_engine_set_hwsp_writemask(engine, mask: ~0u); |
1057 | |
1058 | vma = fetch_and_zero(&engine->status_page.vma); |
1059 | if (!vma) |
1060 | return; |
1061 | |
1062 | if (!HWS_NEEDS_PHYSICAL(engine->i915)) |
1063 | i915_vma_unpin(vma); |
1064 | |
1065 | i915_gem_object_unpin_map(obj: vma->obj); |
1066 | i915_gem_object_put(obj: vma->obj); |
1067 | } |
1068 | |
1069 | static int pin_ggtt_status_page(struct intel_engine_cs *engine, |
1070 | struct i915_gem_ww_ctx *ww, |
1071 | struct i915_vma *vma) |
1072 | { |
1073 | unsigned int flags; |
1074 | |
1075 | if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(ggtt: engine->gt->ggtt)) |
1076 | /* |
1077 | * On g33, we cannot place HWS above 256MiB, so |
1078 | * restrict its pinning to the low mappable arena. |
1079 | * Though this restriction is not documented for |
1080 | * gen4, gen5, or byt, they also behave similarly |
1081 | * and hang if the HWS is placed at the top of the |
1082 | * GTT. To generalise, it appears that all !llc |
1083 | * platforms have issues with us placing the HWS |
1084 | * above the mappable region (even though we never |
1085 | * actually map it). |
1086 | */ |
1087 | flags = PIN_MAPPABLE; |
1088 | else |
1089 | flags = PIN_HIGH; |
1090 | |
1091 | return i915_ggtt_pin(vma, ww, align: 0, flags); |
1092 | } |
1093 | |
1094 | static int init_status_page(struct intel_engine_cs *engine) |
1095 | { |
1096 | struct drm_i915_gem_object *obj; |
1097 | struct i915_gem_ww_ctx ww; |
1098 | struct i915_vma *vma; |
1099 | void *vaddr; |
1100 | int ret; |
1101 | |
1102 | INIT_LIST_HEAD(list: &engine->status_page.timelines); |
1103 | |
1104 | /* |
1105 | * Though the HWS register does support 36bit addresses, historically |
1106 | * we have had hangs and corruption reported due to wild writes if |
1107 | * the HWS is placed above 4G. We only allow objects to be allocated |
1108 | * in GFP_DMA32 for i965, and no earlier physical address users had |
1109 | * access to more than 4G. |
1110 | */ |
1111 | obj = i915_gem_object_create_internal(i915: engine->i915, PAGE_SIZE); |
1112 | if (IS_ERR(ptr: obj)) { |
1113 | gt_err(engine->gt, "Failed to allocate status page\n" ); |
1114 | return PTR_ERR(ptr: obj); |
1115 | } |
1116 | |
1117 | i915_gem_object_set_cache_coherency(obj, cache_level: I915_CACHE_LLC); |
1118 | |
1119 | vma = i915_vma_instance(obj, vm: &engine->gt->ggtt->vm, NULL); |
1120 | if (IS_ERR(ptr: vma)) { |
1121 | ret = PTR_ERR(ptr: vma); |
1122 | goto err_put; |
1123 | } |
1124 | |
1125 | i915_gem_ww_ctx_init(ctx: &ww, intr: true); |
1126 | retry: |
1127 | ret = i915_gem_object_lock(obj, ww: &ww); |
1128 | if (!ret && !HWS_NEEDS_PHYSICAL(engine->i915)) |
1129 | ret = pin_ggtt_status_page(engine, ww: &ww, vma); |
1130 | if (ret) |
1131 | goto err; |
1132 | |
1133 | vaddr = i915_gem_object_pin_map(obj, type: I915_MAP_WB); |
1134 | if (IS_ERR(ptr: vaddr)) { |
1135 | ret = PTR_ERR(ptr: vaddr); |
1136 | goto err_unpin; |
1137 | } |
1138 | |
1139 | engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE); |
1140 | engine->status_page.vma = vma; |
1141 | |
1142 | err_unpin: |
1143 | if (ret) |
1144 | i915_vma_unpin(vma); |
1145 | err: |
1146 | if (ret == -EDEADLK) { |
1147 | ret = i915_gem_ww_ctx_backoff(ctx: &ww); |
1148 | if (!ret) |
1149 | goto retry; |
1150 | } |
1151 | i915_gem_ww_ctx_fini(ctx: &ww); |
1152 | err_put: |
1153 | if (ret) |
1154 | i915_gem_object_put(obj); |
1155 | return ret; |
1156 | } |
1157 | |
1158 | static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine) |
1159 | { |
1160 | static const union intel_engine_tlb_inv_reg gen8_regs[] = { |
1161 | [RENDER_CLASS].reg = GEN8_RTCR, |
1162 | [VIDEO_DECODE_CLASS].reg = GEN8_M1TCR, /* , GEN8_M2TCR */ |
1163 | [VIDEO_ENHANCEMENT_CLASS].reg = GEN8_VTCR, |
1164 | [COPY_ENGINE_CLASS].reg = GEN8_BTCR, |
1165 | }; |
1166 | static const union intel_engine_tlb_inv_reg gen12_regs[] = { |
1167 | [RENDER_CLASS].reg = GEN12_GFX_TLB_INV_CR, |
1168 | [VIDEO_DECODE_CLASS].reg = GEN12_VD_TLB_INV_CR, |
1169 | [VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR, |
1170 | [COPY_ENGINE_CLASS].reg = GEN12_BLT_TLB_INV_CR, |
1171 | [COMPUTE_CLASS].reg = GEN12_COMPCTX_TLB_INV_CR, |
1172 | }; |
1173 | static const union intel_engine_tlb_inv_reg xehp_regs[] = { |
1174 | [RENDER_CLASS].mcr_reg = XEHP_GFX_TLB_INV_CR, |
1175 | [VIDEO_DECODE_CLASS].mcr_reg = XEHP_VD_TLB_INV_CR, |
1176 | [VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR, |
1177 | [COPY_ENGINE_CLASS].mcr_reg = XEHP_BLT_TLB_INV_CR, |
1178 | [COMPUTE_CLASS].mcr_reg = XEHP_COMPCTX_TLB_INV_CR, |
1179 | }; |
1180 | static const union intel_engine_tlb_inv_reg xelpmp_regs[] = { |
1181 | [VIDEO_DECODE_CLASS].reg = GEN12_VD_TLB_INV_CR, |
1182 | [VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR, |
1183 | [OTHER_CLASS].reg = XELPMP_GSC_TLB_INV_CR, |
1184 | }; |
1185 | struct drm_i915_private *i915 = engine->i915; |
1186 | const unsigned int instance = engine->instance; |
1187 | const unsigned int class = engine->class; |
1188 | const union intel_engine_tlb_inv_reg *regs; |
1189 | union intel_engine_tlb_inv_reg reg; |
1190 | unsigned int num = 0; |
1191 | u32 val; |
1192 | |
1193 | /* |
1194 | * New platforms should not be added with catch-all-newer (>=) |
1195 | * condition so that any later platform added triggers the below warning |
1196 | * and in turn mandates a human cross-check of whether the invalidation |
1197 | * flows have compatible semantics. |
1198 | * |
1199 | * For instance with the 11.00 -> 12.00 transition three out of five |
1200 | * respective engine registers were moved to masked type. Then after the |
1201 | * 12.00 -> 12.50 transition multi cast handling is required too. |
1202 | */ |
1203 | |
1204 | if (engine->gt->type == GT_MEDIA) { |
1205 | if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) { |
1206 | regs = xelpmp_regs; |
1207 | num = ARRAY_SIZE(xelpmp_regs); |
1208 | } |
1209 | } else { |
1210 | if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 74) || |
1211 | GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) || |
1212 | GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) || |
1213 | GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) || |
1214 | GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) { |
1215 | regs = xehp_regs; |
1216 | num = ARRAY_SIZE(xehp_regs); |
1217 | } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) || |
1218 | GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) { |
1219 | regs = gen12_regs; |
1220 | num = ARRAY_SIZE(gen12_regs); |
1221 | } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { |
1222 | regs = gen8_regs; |
1223 | num = ARRAY_SIZE(gen8_regs); |
1224 | } else if (GRAPHICS_VER(i915) < 8) { |
1225 | return 0; |
1226 | } |
1227 | } |
1228 | |
1229 | if (gt_WARN_ONCE(engine->gt, !num, |
1230 | "Platform does not implement TLB invalidation!" )) |
1231 | return -ENODEV; |
1232 | |
1233 | if (gt_WARN_ON_ONCE(engine->gt, |
1234 | class >= num || |
1235 | (!regs[class].reg.reg && |
1236 | !regs[class].mcr_reg.reg))) |
1237 | return -ERANGE; |
1238 | |
1239 | reg = regs[class]; |
1240 | |
1241 | if (regs == xelpmp_regs && class == OTHER_CLASS) { |
1242 | /* |
1243 | * There's only a single GSC instance, but it uses register bit |
1244 | * 1 instead of either 0 or OTHER_GSC_INSTANCE. |
1245 | */ |
1246 | GEM_WARN_ON(instance != OTHER_GSC_INSTANCE); |
1247 | val = 1; |
1248 | } else if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance == 1) { |
1249 | reg.reg = GEN8_M2TCR; |
1250 | val = 0; |
1251 | } else { |
1252 | val = instance; |
1253 | } |
1254 | |
1255 | val = BIT(val); |
1256 | |
1257 | engine->tlb_inv.mcr = regs == xehp_regs; |
1258 | engine->tlb_inv.reg = reg; |
1259 | engine->tlb_inv.done = val; |
1260 | |
1261 | if (GRAPHICS_VER(i915) >= 12 && |
1262 | (engine->class == VIDEO_DECODE_CLASS || |
1263 | engine->class == VIDEO_ENHANCEMENT_CLASS || |
1264 | engine->class == COMPUTE_CLASS || |
1265 | engine->class == OTHER_CLASS)) |
1266 | engine->tlb_inv.request = _MASKED_BIT_ENABLE(val); |
1267 | else |
1268 | engine->tlb_inv.request = val; |
1269 | |
1270 | return 0; |
1271 | } |
1272 | |
1273 | static int engine_setup_common(struct intel_engine_cs *engine) |
1274 | { |
1275 | int err; |
1276 | |
1277 | init_llist_head(list: &engine->barrier_tasks); |
1278 | |
1279 | err = intel_engine_init_tlb_invalidation(engine); |
1280 | if (err) |
1281 | return err; |
1282 | |
1283 | err = init_status_page(engine); |
1284 | if (err) |
1285 | return err; |
1286 | |
1287 | engine->breadcrumbs = intel_breadcrumbs_create(irq_engine: engine); |
1288 | if (!engine->breadcrumbs) { |
1289 | err = -ENOMEM; |
1290 | goto err_status; |
1291 | } |
1292 | |
1293 | engine->sched_engine = i915_sched_engine_create(ENGINE_PHYSICAL); |
1294 | if (!engine->sched_engine) { |
1295 | err = -ENOMEM; |
1296 | goto err_sched_engine; |
1297 | } |
1298 | engine->sched_engine->private_data = engine; |
1299 | |
1300 | err = intel_engine_init_cmd_parser(engine); |
1301 | if (err) |
1302 | goto err_cmd_parser; |
1303 | |
1304 | intel_engine_init_execlists(engine); |
1305 | intel_engine_init__pm(engine); |
1306 | intel_engine_init_retire(engine); |
1307 | |
1308 | /* Use the whole device by default */ |
1309 | engine->sseu = |
1310 | intel_sseu_from_device_info(sseu: &engine->gt->info.sseu); |
1311 | |
1312 | intel_engine_init_workarounds(engine); |
1313 | intel_engine_init_whitelist(engine); |
1314 | intel_engine_init_ctx_wa(engine); |
1315 | |
1316 | if (GRAPHICS_VER(engine->i915) >= 12) |
1317 | engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; |
1318 | |
1319 | return 0; |
1320 | |
1321 | err_cmd_parser: |
1322 | i915_sched_engine_put(sched_engine: engine->sched_engine); |
1323 | err_sched_engine: |
1324 | intel_breadcrumbs_put(b: engine->breadcrumbs); |
1325 | err_status: |
1326 | cleanup_status_page(engine); |
1327 | return err; |
1328 | } |
1329 | |
1330 | struct measure_breadcrumb { |
1331 | struct i915_request rq; |
1332 | struct intel_ring ring; |
1333 | u32 cs[2048]; |
1334 | }; |
1335 | |
1336 | static int measure_breadcrumb_dw(struct intel_context *ce) |
1337 | { |
1338 | struct intel_engine_cs *engine = ce->engine; |
1339 | struct measure_breadcrumb *frame; |
1340 | int dw; |
1341 | |
1342 | GEM_BUG_ON(!engine->gt->scratch); |
1343 | |
1344 | frame = kzalloc(size: sizeof(*frame), GFP_KERNEL); |
1345 | if (!frame) |
1346 | return -ENOMEM; |
1347 | |
1348 | frame->rq.i915 = engine->i915; |
1349 | frame->rq.engine = engine; |
1350 | frame->rq.context = ce; |
1351 | rcu_assign_pointer(frame->rq.timeline, ce->timeline); |
1352 | frame->rq.hwsp_seqno = ce->timeline->hwsp_seqno; |
1353 | |
1354 | frame->ring.vaddr = frame->cs; |
1355 | frame->ring.size = sizeof(frame->cs); |
1356 | frame->ring.wrap = |
1357 | BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size); |
1358 | frame->ring.effective_size = frame->ring.size; |
1359 | intel_ring_update_space(ring: &frame->ring); |
1360 | frame->rq.ring = &frame->ring; |
1361 | |
1362 | mutex_lock(&ce->timeline->mutex); |
1363 | spin_lock_irq(lock: &engine->sched_engine->lock); |
1364 | |
1365 | dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; |
1366 | |
1367 | spin_unlock_irq(lock: &engine->sched_engine->lock); |
1368 | mutex_unlock(lock: &ce->timeline->mutex); |
1369 | |
1370 | GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ |
1371 | |
1372 | kfree(objp: frame); |
1373 | return dw; |
1374 | } |
1375 | |
1376 | struct intel_context * |
1377 | intel_engine_create_pinned_context(struct intel_engine_cs *engine, |
1378 | struct i915_address_space *vm, |
1379 | unsigned int ring_size, |
1380 | unsigned int hwsp, |
1381 | struct lock_class_key *key, |
1382 | const char *name) |
1383 | { |
1384 | struct intel_context *ce; |
1385 | int err; |
1386 | |
1387 | ce = intel_context_create(engine); |
1388 | if (IS_ERR(ptr: ce)) |
1389 | return ce; |
1390 | |
1391 | __set_bit(CONTEXT_BARRIER_BIT, &ce->flags); |
1392 | ce->timeline = page_pack_bits(NULL, hwsp); |
1393 | ce->ring = NULL; |
1394 | ce->ring_size = ring_size; |
1395 | |
1396 | i915_vm_put(vm: ce->vm); |
1397 | ce->vm = i915_vm_get(vm); |
1398 | |
1399 | err = intel_context_pin(ce); /* perma-pin so it is always available */ |
1400 | if (err) { |
1401 | intel_context_put(ce); |
1402 | return ERR_PTR(error: err); |
1403 | } |
1404 | |
1405 | list_add_tail(new: &ce->pinned_contexts_link, head: &engine->pinned_contexts_list); |
1406 | |
1407 | /* |
1408 | * Give our perma-pinned kernel timelines a separate lockdep class, |
1409 | * so that we can use them from within the normal user timelines |
1410 | * should we need to inject GPU operations during their request |
1411 | * construction. |
1412 | */ |
1413 | lockdep_set_class_and_name(&ce->timeline->mutex, key, name); |
1414 | |
1415 | return ce; |
1416 | } |
1417 | |
1418 | void intel_engine_destroy_pinned_context(struct intel_context *ce) |
1419 | { |
1420 | struct intel_engine_cs *engine = ce->engine; |
1421 | struct i915_vma *hwsp = engine->status_page.vma; |
1422 | |
1423 | GEM_BUG_ON(ce->timeline->hwsp_ggtt != hwsp); |
1424 | |
1425 | mutex_lock(&hwsp->vm->mutex); |
1426 | list_del(entry: &ce->timeline->engine_link); |
1427 | mutex_unlock(lock: &hwsp->vm->mutex); |
1428 | |
1429 | list_del(entry: &ce->pinned_contexts_link); |
1430 | intel_context_unpin(ce); |
1431 | intel_context_put(ce); |
1432 | } |
1433 | |
1434 | static struct intel_context * |
1435 | create_ggtt_bind_context(struct intel_engine_cs *engine) |
1436 | { |
1437 | static struct lock_class_key kernel; |
1438 | |
1439 | /* |
1440 | * MI_UPDATE_GTT can insert up to 511 PTE entries and there could be multiple |
1441 | * bind requets at a time so get a bigger ring. |
1442 | */ |
1443 | return intel_engine_create_pinned_context(engine, vm: engine->gt->vm, SZ_512K, |
1444 | I915_GEM_HWS_GGTT_BIND_ADDR, |
1445 | key: &kernel, name: "ggtt_bind_context" ); |
1446 | } |
1447 | |
1448 | static struct intel_context * |
1449 | create_kernel_context(struct intel_engine_cs *engine) |
1450 | { |
1451 | static struct lock_class_key kernel; |
1452 | |
1453 | return intel_engine_create_pinned_context(engine, vm: engine->gt->vm, SZ_4K, |
1454 | I915_GEM_HWS_SEQNO_ADDR, |
1455 | key: &kernel, name: "kernel_context" ); |
1456 | } |
1457 | |
1458 | /* |
1459 | * engine_init_common - initialize engine state which might require hw access |
1460 | * @engine: Engine to initialize. |
1461 | * |
1462 | * Initializes @engine@ structure members shared between legacy and execlists |
1463 | * submission modes which do require hardware access. |
1464 | * |
1465 | * Typcally done at later stages of submission mode specific engine setup. |
1466 | * |
1467 | * Returns zero on success or an error code on failure. |
1468 | */ |
1469 | static int engine_init_common(struct intel_engine_cs *engine) |
1470 | { |
1471 | struct intel_context *ce, *bce = NULL; |
1472 | int ret; |
1473 | |
1474 | engine->set_default_submission(engine); |
1475 | |
1476 | /* |
1477 | * We may need to do things with the shrinker which |
1478 | * require us to immediately switch back to the default |
1479 | * context. This can cause a problem as pinning the |
1480 | * default context also requires GTT space which may not |
1481 | * be available. To avoid this we always pin the default |
1482 | * context. |
1483 | */ |
1484 | ce = create_kernel_context(engine); |
1485 | if (IS_ERR(ptr: ce)) |
1486 | return PTR_ERR(ptr: ce); |
1487 | /* |
1488 | * Create a separate pinned context for GGTT update with blitter engine |
1489 | * if a platform require such service. MI_UPDATE_GTT works on other |
1490 | * engines as well but BCS should be less busy engine so pick that for |
1491 | * GGTT updates. |
1492 | */ |
1493 | if (i915_ggtt_require_binder(i915: engine->i915) && engine->id == BCS0) { |
1494 | bce = create_ggtt_bind_context(engine); |
1495 | if (IS_ERR(ptr: bce)) { |
1496 | ret = PTR_ERR(ptr: bce); |
1497 | goto err_ce_context; |
1498 | } |
1499 | } |
1500 | |
1501 | ret = measure_breadcrumb_dw(ce); |
1502 | if (ret < 0) |
1503 | goto err_bce_context; |
1504 | |
1505 | engine->emit_fini_breadcrumb_dw = ret; |
1506 | engine->kernel_context = ce; |
1507 | engine->bind_context = bce; |
1508 | |
1509 | return 0; |
1510 | |
1511 | err_bce_context: |
1512 | if (bce) |
1513 | intel_engine_destroy_pinned_context(ce: bce); |
1514 | err_ce_context: |
1515 | intel_engine_destroy_pinned_context(ce); |
1516 | return ret; |
1517 | } |
1518 | |
1519 | int intel_engines_init(struct intel_gt *gt) |
1520 | { |
1521 | int (*setup)(struct intel_engine_cs *engine); |
1522 | struct intel_engine_cs *engine; |
1523 | enum intel_engine_id id; |
1524 | int err; |
1525 | |
1526 | if (intel_uc_uses_guc_submission(uc: >->uc)) { |
1527 | gt->submission_method = INTEL_SUBMISSION_GUC; |
1528 | setup = intel_guc_submission_setup; |
1529 | } else if (HAS_EXECLISTS(gt->i915)) { |
1530 | gt->submission_method = INTEL_SUBMISSION_ELSP; |
1531 | setup = intel_execlists_submission_setup; |
1532 | } else { |
1533 | gt->submission_method = INTEL_SUBMISSION_RING; |
1534 | setup = intel_ring_submission_setup; |
1535 | } |
1536 | |
1537 | for_each_engine(engine, gt, id) { |
1538 | err = engine_setup_common(engine); |
1539 | if (err) |
1540 | return err; |
1541 | |
1542 | err = setup(engine); |
1543 | if (err) { |
1544 | intel_engine_cleanup_common(engine); |
1545 | return err; |
1546 | } |
1547 | |
1548 | /* The backend should now be responsible for cleanup */ |
1549 | GEM_BUG_ON(engine->release == NULL); |
1550 | |
1551 | err = engine_init_common(engine); |
1552 | if (err) |
1553 | return err; |
1554 | |
1555 | intel_engine_add_user(engine); |
1556 | } |
1557 | |
1558 | return 0; |
1559 | } |
1560 | |
1561 | /** |
1562 | * intel_engine_cleanup_common - cleans up the engine state created by |
1563 | * the common initiailizers. |
1564 | * @engine: Engine to cleanup. |
1565 | * |
1566 | * This cleans up everything created by the common helpers. |
1567 | */ |
1568 | void intel_engine_cleanup_common(struct intel_engine_cs *engine) |
1569 | { |
1570 | GEM_BUG_ON(!list_empty(&engine->sched_engine->requests)); |
1571 | |
1572 | i915_sched_engine_put(sched_engine: engine->sched_engine); |
1573 | intel_breadcrumbs_put(b: engine->breadcrumbs); |
1574 | |
1575 | intel_engine_fini_retire(engine); |
1576 | intel_engine_cleanup_cmd_parser(engine); |
1577 | |
1578 | if (engine->default_state) |
1579 | fput(engine->default_state); |
1580 | |
1581 | if (engine->kernel_context) |
1582 | intel_engine_destroy_pinned_context(ce: engine->kernel_context); |
1583 | |
1584 | if (engine->bind_context) |
1585 | intel_engine_destroy_pinned_context(ce: engine->bind_context); |
1586 | |
1587 | |
1588 | GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); |
1589 | cleanup_status_page(engine); |
1590 | |
1591 | intel_wa_list_free(wal: &engine->ctx_wa_list); |
1592 | intel_wa_list_free(wal: &engine->wa_list); |
1593 | intel_wa_list_free(wal: &engine->whitelist); |
1594 | } |
1595 | |
1596 | /** |
1597 | * intel_engine_resume - re-initializes the HW state of the engine |
1598 | * @engine: Engine to resume. |
1599 | * |
1600 | * Returns zero on success or an error code on failure. |
1601 | */ |
1602 | int intel_engine_resume(struct intel_engine_cs *engine) |
1603 | { |
1604 | intel_engine_apply_workarounds(engine); |
1605 | intel_engine_apply_whitelist(engine); |
1606 | |
1607 | return engine->resume(engine); |
1608 | } |
1609 | |
1610 | u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) |
1611 | { |
1612 | struct drm_i915_private *i915 = engine->i915; |
1613 | |
1614 | u64 acthd; |
1615 | |
1616 | if (GRAPHICS_VER(i915) >= 8) |
1617 | acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW); |
1618 | else if (GRAPHICS_VER(i915) >= 4) |
1619 | acthd = ENGINE_READ(engine, RING_ACTHD); |
1620 | else |
1621 | acthd = ENGINE_READ(engine, ACTHD); |
1622 | |
1623 | return acthd; |
1624 | } |
1625 | |
1626 | u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) |
1627 | { |
1628 | u64 bbaddr; |
1629 | |
1630 | if (GRAPHICS_VER(engine->i915) >= 8) |
1631 | bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW); |
1632 | else |
1633 | bbaddr = ENGINE_READ(engine, RING_BBADDR); |
1634 | |
1635 | return bbaddr; |
1636 | } |
1637 | |
1638 | static unsigned long stop_timeout(const struct intel_engine_cs *engine) |
1639 | { |
1640 | if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */ |
1641 | return 0; |
1642 | |
1643 | /* |
1644 | * If we are doing a normal GPU reset, we can take our time and allow |
1645 | * the engine to quiesce. We've stopped submission to the engine, and |
1646 | * if we wait long enough an innocent context should complete and |
1647 | * leave the engine idle. So they should not be caught unaware by |
1648 | * the forthcoming GPU reset (which usually follows the stop_cs)! |
1649 | */ |
1650 | return READ_ONCE(engine->props.stop_timeout_ms); |
1651 | } |
1652 | |
1653 | static int __intel_engine_stop_cs(struct intel_engine_cs *engine, |
1654 | int fast_timeout_us, |
1655 | int slow_timeout_ms) |
1656 | { |
1657 | struct intel_uncore *uncore = engine->uncore; |
1658 | const i915_reg_t mode = RING_MI_MODE(engine->mmio_base); |
1659 | int err; |
1660 | |
1661 | intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); |
1662 | |
1663 | /* |
1664 | * Wa_22011802037: Prior to doing a reset, ensure CS is |
1665 | * stopped, set ring stop bit and prefetch disable bit to halt CS |
1666 | */ |
1667 | if (intel_engine_reset_needs_wa_22011802037(gt: engine->gt)) |
1668 | intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base), |
1669 | _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE)); |
1670 | |
1671 | err = __intel_wait_for_register_fw(uncore: engine->uncore, reg: mode, |
1672 | MODE_IDLE, MODE_IDLE, |
1673 | fast_timeout_us, |
1674 | slow_timeout_ms, |
1675 | NULL); |
1676 | |
1677 | /* A final mmio read to let GPU writes be hopefully flushed to memory */ |
1678 | intel_uncore_posting_read_fw(uncore, mode); |
1679 | return err; |
1680 | } |
1681 | |
1682 | int intel_engine_stop_cs(struct intel_engine_cs *engine) |
1683 | { |
1684 | int err = 0; |
1685 | |
1686 | if (GRAPHICS_VER(engine->i915) < 3) |
1687 | return -ENODEV; |
1688 | |
1689 | ENGINE_TRACE(engine, "\n" ); |
1690 | /* |
1691 | * TODO: Find out why occasionally stopping the CS times out. Seen |
1692 | * especially with gem_eio tests. |
1693 | * |
1694 | * Occasionally trying to stop the cs times out, but does not adversely |
1695 | * affect functionality. The timeout is set as a config parameter that |
1696 | * defaults to 100ms. In most cases the follow up operation is to wait |
1697 | * for pending MI_FORCE_WAKES. The assumption is that this timeout is |
1698 | * sufficient for any pending MI_FORCEWAKEs to complete. Once root |
1699 | * caused, the caller must check and handle the return from this |
1700 | * function. |
1701 | */ |
1702 | if (__intel_engine_stop_cs(engine, fast_timeout_us: 1000, slow_timeout_ms: stop_timeout(engine))) { |
1703 | ENGINE_TRACE(engine, |
1704 | "timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n" , |
1705 | ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR, |
1706 | ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR); |
1707 | |
1708 | /* |
1709 | * Sometimes we observe that the idle flag is not |
1710 | * set even though the ring is empty. So double |
1711 | * check before giving up. |
1712 | */ |
1713 | if ((ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) != |
1714 | (ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR)) |
1715 | err = -ETIMEDOUT; |
1716 | } |
1717 | |
1718 | return err; |
1719 | } |
1720 | |
1721 | void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) |
1722 | { |
1723 | ENGINE_TRACE(engine, "\n" ); |
1724 | |
1725 | ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); |
1726 | } |
1727 | |
1728 | static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine) |
1729 | { |
1730 | static const i915_reg_t _reg[I915_NUM_ENGINES] = { |
1731 | [RCS0] = MSG_IDLE_CS, |
1732 | [BCS0] = MSG_IDLE_BCS, |
1733 | [VCS0] = MSG_IDLE_VCS0, |
1734 | [VCS1] = MSG_IDLE_VCS1, |
1735 | [VCS2] = MSG_IDLE_VCS2, |
1736 | [VCS3] = MSG_IDLE_VCS3, |
1737 | [VCS4] = MSG_IDLE_VCS4, |
1738 | [VCS5] = MSG_IDLE_VCS5, |
1739 | [VCS6] = MSG_IDLE_VCS6, |
1740 | [VCS7] = MSG_IDLE_VCS7, |
1741 | [VECS0] = MSG_IDLE_VECS0, |
1742 | [VECS1] = MSG_IDLE_VECS1, |
1743 | [VECS2] = MSG_IDLE_VECS2, |
1744 | [VECS3] = MSG_IDLE_VECS3, |
1745 | [CCS0] = MSG_IDLE_CS, |
1746 | [CCS1] = MSG_IDLE_CS, |
1747 | [CCS2] = MSG_IDLE_CS, |
1748 | [CCS3] = MSG_IDLE_CS, |
1749 | }; |
1750 | u32 val; |
1751 | |
1752 | if (!_reg[engine->id].reg) |
1753 | return 0; |
1754 | |
1755 | val = intel_uncore_read(uncore: engine->uncore, reg: _reg[engine->id]); |
1756 | |
1757 | /* bits[29:25] & bits[13:9] >> shift */ |
1758 | return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT; |
1759 | } |
1760 | |
1761 | static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask) |
1762 | { |
1763 | int ret; |
1764 | |
1765 | /* Ensure GPM receives fw up/down after CS is stopped */ |
1766 | udelay(1); |
1767 | |
1768 | /* Wait for forcewake request to complete in GPM */ |
1769 | ret = __intel_wait_for_register_fw(uncore: gt->uncore, |
1770 | GEN9_PWRGT_DOMAIN_STATUS, |
1771 | mask: fw_mask, value: fw_mask, fast_timeout_us: 5000, slow_timeout_ms: 0, NULL); |
1772 | |
1773 | /* Ensure CS receives fw ack from GPM */ |
1774 | udelay(1); |
1775 | |
1776 | if (ret) |
1777 | GT_TRACE(gt, "Failed to complete pending forcewake %d\n" , ret); |
1778 | } |
1779 | |
1780 | /* |
1781 | * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any |
1782 | * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The |
1783 | * pending status is indicated by bits[13:9] (masked by bits[29:25]) in the |
1784 | * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we |
1785 | * are concerned only with the gt reset here, we use a logical OR of pending |
1786 | * forcewakeups from all reset domains and then wait for them to complete by |
1787 | * querying PWRGT_DOMAIN_STATUS. |
1788 | */ |
1789 | void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine) |
1790 | { |
1791 | u32 fw_pending = __cs_pending_mi_force_wakes(engine); |
1792 | |
1793 | if (fw_pending) |
1794 | __gpm_wait_for_fw_complete(gt: engine->gt, fw_mask: fw_pending); |
1795 | } |
1796 | |
1797 | /* NB: please notice the memset */ |
1798 | void intel_engine_get_instdone(const struct intel_engine_cs *engine, |
1799 | struct intel_instdone *instdone) |
1800 | { |
1801 | struct drm_i915_private *i915 = engine->i915; |
1802 | struct intel_uncore *uncore = engine->uncore; |
1803 | u32 mmio_base = engine->mmio_base; |
1804 | int slice; |
1805 | int subslice; |
1806 | int iter; |
1807 | |
1808 | memset(instdone, 0, sizeof(*instdone)); |
1809 | |
1810 | if (GRAPHICS_VER(i915) >= 8) { |
1811 | instdone->instdone = |
1812 | intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); |
1813 | |
1814 | if (engine->id != RCS0) |
1815 | return; |
1816 | |
1817 | instdone->slice_common = |
1818 | intel_uncore_read(uncore, GEN7_SC_INSTDONE); |
1819 | if (GRAPHICS_VER(i915) >= 12) { |
1820 | instdone->slice_common_extra[0] = |
1821 | intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA); |
1822 | instdone->slice_common_extra[1] = |
1823 | intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2); |
1824 | } |
1825 | |
1826 | for_each_ss_steering(iter, engine->gt, slice, subslice) { |
1827 | instdone->sampler[slice][subslice] = |
1828 | intel_gt_mcr_read(gt: engine->gt, |
1829 | GEN8_SAMPLER_INSTDONE, |
1830 | group: slice, instance: subslice); |
1831 | instdone->row[slice][subslice] = |
1832 | intel_gt_mcr_read(gt: engine->gt, |
1833 | GEN8_ROW_INSTDONE, |
1834 | group: slice, instance: subslice); |
1835 | } |
1836 | |
1837 | if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { |
1838 | for_each_ss_steering(iter, engine->gt, slice, subslice) |
1839 | instdone->geom_svg[slice][subslice] = |
1840 | intel_gt_mcr_read(gt: engine->gt, |
1841 | XEHPG_INSTDONE_GEOM_SVG, |
1842 | group: slice, instance: subslice); |
1843 | } |
1844 | } else if (GRAPHICS_VER(i915) >= 7) { |
1845 | instdone->instdone = |
1846 | intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); |
1847 | |
1848 | if (engine->id != RCS0) |
1849 | return; |
1850 | |
1851 | instdone->slice_common = |
1852 | intel_uncore_read(uncore, GEN7_SC_INSTDONE); |
1853 | instdone->sampler[0][0] = |
1854 | intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE); |
1855 | instdone->row[0][0] = |
1856 | intel_uncore_read(uncore, GEN7_ROW_INSTDONE); |
1857 | } else if (GRAPHICS_VER(i915) >= 4) { |
1858 | instdone->instdone = |
1859 | intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); |
1860 | if (engine->id == RCS0) |
1861 | /* HACK: Using the wrong struct member */ |
1862 | instdone->slice_common = |
1863 | intel_uncore_read(uncore, GEN4_INSTDONE1); |
1864 | } else { |
1865 | instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE); |
1866 | } |
1867 | } |
1868 | |
1869 | static bool ring_is_idle(struct intel_engine_cs *engine) |
1870 | { |
1871 | bool idle = true; |
1872 | |
1873 | if (I915_SELFTEST_ONLY(!engine->mmio_base)) |
1874 | return true; |
1875 | |
1876 | if (!intel_engine_pm_get_if_awake(engine)) |
1877 | return true; |
1878 | |
1879 | /* First check that no commands are left in the ring */ |
1880 | if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) != |
1881 | (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR)) |
1882 | idle = false; |
1883 | |
1884 | /* No bit for gen2, so assume the CS parser is idle */ |
1885 | if (GRAPHICS_VER(engine->i915) > 2 && |
1886 | !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE)) |
1887 | idle = false; |
1888 | |
1889 | intel_engine_pm_put(engine); |
1890 | |
1891 | return idle; |
1892 | } |
1893 | |
1894 | void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync) |
1895 | { |
1896 | struct tasklet_struct *t = &engine->sched_engine->tasklet; |
1897 | |
1898 | if (!t->callback) |
1899 | return; |
1900 | |
1901 | local_bh_disable(); |
1902 | if (tasklet_trylock(t)) { |
1903 | /* Must wait for any GPU reset in progress. */ |
1904 | if (__tasklet_is_enabled(t)) |
1905 | t->callback(t); |
1906 | tasklet_unlock(t); |
1907 | } |
1908 | local_bh_enable(); |
1909 | |
1910 | /* Synchronise and wait for the tasklet on another CPU */ |
1911 | if (sync) |
1912 | tasklet_unlock_wait(t); |
1913 | } |
1914 | |
1915 | /** |
1916 | * intel_engine_is_idle() - Report if the engine has finished process all work |
1917 | * @engine: the intel_engine_cs |
1918 | * |
1919 | * Return true if there are no requests pending, nothing left to be submitted |
1920 | * to hardware, and that the engine is idle. |
1921 | */ |
1922 | bool intel_engine_is_idle(struct intel_engine_cs *engine) |
1923 | { |
1924 | /* More white lies, if wedged, hw state is inconsistent */ |
1925 | if (intel_gt_is_wedged(gt: engine->gt)) |
1926 | return true; |
1927 | |
1928 | if (!intel_engine_pm_is_awake(engine)) |
1929 | return true; |
1930 | |
1931 | /* Waiting to drain ELSP? */ |
1932 | intel_synchronize_hardirq(i915: engine->i915); |
1933 | intel_engine_flush_submission(engine); |
1934 | |
1935 | /* ELSP is empty, but there are ready requests? E.g. after reset */ |
1936 | if (!i915_sched_engine_is_empty(sched_engine: engine->sched_engine)) |
1937 | return false; |
1938 | |
1939 | /* Ring stopped? */ |
1940 | return ring_is_idle(engine); |
1941 | } |
1942 | |
1943 | bool intel_engines_are_idle(struct intel_gt *gt) |
1944 | { |
1945 | struct intel_engine_cs *engine; |
1946 | enum intel_engine_id id; |
1947 | |
1948 | /* |
1949 | * If the driver is wedged, HW state may be very inconsistent and |
1950 | * report that it is still busy, even though we have stopped using it. |
1951 | */ |
1952 | if (intel_gt_is_wedged(gt)) |
1953 | return true; |
1954 | |
1955 | /* Already parked (and passed an idleness test); must still be idle */ |
1956 | if (!READ_ONCE(gt->awake)) |
1957 | return true; |
1958 | |
1959 | for_each_engine(engine, gt, id) { |
1960 | if (!intel_engine_is_idle(engine)) |
1961 | return false; |
1962 | } |
1963 | |
1964 | return true; |
1965 | } |
1966 | |
1967 | bool intel_engine_irq_enable(struct intel_engine_cs *engine) |
1968 | { |
1969 | if (!engine->irq_enable) |
1970 | return false; |
1971 | |
1972 | /* Caller disables interrupts */ |
1973 | spin_lock(lock: engine->gt->irq_lock); |
1974 | engine->irq_enable(engine); |
1975 | spin_unlock(lock: engine->gt->irq_lock); |
1976 | |
1977 | return true; |
1978 | } |
1979 | |
1980 | void intel_engine_irq_disable(struct intel_engine_cs *engine) |
1981 | { |
1982 | if (!engine->irq_disable) |
1983 | return; |
1984 | |
1985 | /* Caller disables interrupts */ |
1986 | spin_lock(lock: engine->gt->irq_lock); |
1987 | engine->irq_disable(engine); |
1988 | spin_unlock(lock: engine->gt->irq_lock); |
1989 | } |
1990 | |
1991 | void intel_engines_reset_default_submission(struct intel_gt *gt) |
1992 | { |
1993 | struct intel_engine_cs *engine; |
1994 | enum intel_engine_id id; |
1995 | |
1996 | for_each_engine(engine, gt, id) { |
1997 | if (engine->sanitize) |
1998 | engine->sanitize(engine); |
1999 | |
2000 | engine->set_default_submission(engine); |
2001 | } |
2002 | } |
2003 | |
2004 | bool intel_engine_can_store_dword(struct intel_engine_cs *engine) |
2005 | { |
2006 | switch (GRAPHICS_VER(engine->i915)) { |
2007 | case 2: |
2008 | return false; /* uses physical not virtual addresses */ |
2009 | case 3: |
2010 | /* maybe only uses physical not virtual addresses */ |
2011 | return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915)); |
2012 | case 4: |
2013 | return !IS_I965G(engine->i915); /* who knows! */ |
2014 | case 6: |
2015 | return engine->class != VIDEO_DECODE_CLASS; /* b0rked */ |
2016 | default: |
2017 | return true; |
2018 | } |
2019 | } |
2020 | |
2021 | static struct intel_timeline *get_timeline(struct i915_request *rq) |
2022 | { |
2023 | struct intel_timeline *tl; |
2024 | |
2025 | /* |
2026 | * Even though we are holding the engine->sched_engine->lock here, there |
2027 | * is no control over the submission queue per-se and we are |
2028 | * inspecting the active state at a random point in time, with an |
2029 | * unknown queue. Play safe and make sure the timeline remains valid. |
2030 | * (Only being used for pretty printing, one extra kref shouldn't |
2031 | * cause a camel stampede!) |
2032 | */ |
2033 | rcu_read_lock(); |
2034 | tl = rcu_dereference(rq->timeline); |
2035 | if (!kref_get_unless_zero(kref: &tl->kref)) |
2036 | tl = NULL; |
2037 | rcu_read_unlock(); |
2038 | |
2039 | return tl; |
2040 | } |
2041 | |
2042 | static int print_ring(char *buf, int sz, struct i915_request *rq) |
2043 | { |
2044 | int len = 0; |
2045 | |
2046 | if (!i915_request_signaled(rq)) { |
2047 | struct intel_timeline *tl = get_timeline(rq); |
2048 | |
2049 | len = scnprintf(buf, size: sz, |
2050 | fmt: "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, " , |
2051 | i915_ggtt_offset(vma: rq->ring->vma), |
2052 | tl ? tl->hwsp_offset : 0, |
2053 | hwsp_seqno(rq), |
2054 | DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context), |
2055 | 1000 * 1000)); |
2056 | |
2057 | if (tl) |
2058 | intel_timeline_put(timeline: tl); |
2059 | } |
2060 | |
2061 | return len; |
2062 | } |
2063 | |
2064 | static void hexdump(struct drm_printer *m, const void *buf, size_t len) |
2065 | { |
2066 | const size_t rowsize = 8 * sizeof(u32); |
2067 | const void *prev = NULL; |
2068 | bool skip = false; |
2069 | size_t pos; |
2070 | |
2071 | for (pos = 0; pos < len; pos += rowsize) { |
2072 | char line[128]; |
2073 | |
2074 | if (prev && !memcmp(p: prev, q: buf + pos, size: rowsize)) { |
2075 | if (!skip) { |
2076 | drm_printf(p: m, f: "*\n" ); |
2077 | skip = true; |
2078 | } |
2079 | continue; |
2080 | } |
2081 | |
2082 | WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, |
2083 | rowsize, sizeof(u32), |
2084 | line, sizeof(line), |
2085 | false) >= sizeof(line)); |
2086 | drm_printf(p: m, f: "[%04zx] %s\n" , pos, line); |
2087 | |
2088 | prev = buf + pos; |
2089 | skip = false; |
2090 | } |
2091 | } |
2092 | |
2093 | static const char *repr_timer(const struct timer_list *t) |
2094 | { |
2095 | if (!READ_ONCE(t->expires)) |
2096 | return "inactive" ; |
2097 | |
2098 | if (timer_pending(timer: t)) |
2099 | return "active" ; |
2100 | |
2101 | return "expired" ; |
2102 | } |
2103 | |
2104 | static void intel_engine_print_registers(struct intel_engine_cs *engine, |
2105 | struct drm_printer *m) |
2106 | { |
2107 | struct drm_i915_private *i915 = engine->i915; |
2108 | struct intel_engine_execlists * const execlists = &engine->execlists; |
2109 | u64 addr; |
2110 | |
2111 | if (engine->id == RENDER_CLASS && IS_GRAPHICS_VER(i915, 4, 7)) |
2112 | drm_printf(p: m, f: "\tCCID: 0x%08x\n" , ENGINE_READ(engine, CCID)); |
2113 | if (HAS_EXECLISTS(i915)) { |
2114 | drm_printf(p: m, f: "\tEL_STAT_HI: 0x%08x\n" , |
2115 | ENGINE_READ(engine, RING_EXECLIST_STATUS_HI)); |
2116 | drm_printf(p: m, f: "\tEL_STAT_LO: 0x%08x\n" , |
2117 | ENGINE_READ(engine, RING_EXECLIST_STATUS_LO)); |
2118 | } |
2119 | drm_printf(p: m, f: "\tRING_START: 0x%08x\n" , |
2120 | ENGINE_READ(engine, RING_START)); |
2121 | drm_printf(p: m, f: "\tRING_HEAD: 0x%08x\n" , |
2122 | ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR); |
2123 | drm_printf(p: m, f: "\tRING_TAIL: 0x%08x\n" , |
2124 | ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR); |
2125 | drm_printf(p: m, f: "\tRING_CTL: 0x%08x%s\n" , |
2126 | ENGINE_READ(engine, RING_CTL), |
2127 | ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "" ); |
2128 | if (GRAPHICS_VER(engine->i915) > 2) { |
2129 | drm_printf(p: m, f: "\tRING_MODE: 0x%08x%s\n" , |
2130 | ENGINE_READ(engine, RING_MI_MODE), |
2131 | ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "" ); |
2132 | } |
2133 | |
2134 | if (GRAPHICS_VER(i915) >= 6) { |
2135 | drm_printf(p: m, f: "\tRING_IMR: 0x%08x\n" , |
2136 | ENGINE_READ(engine, RING_IMR)); |
2137 | drm_printf(p: m, f: "\tRING_ESR: 0x%08x\n" , |
2138 | ENGINE_READ(engine, RING_ESR)); |
2139 | drm_printf(p: m, f: "\tRING_EMR: 0x%08x\n" , |
2140 | ENGINE_READ(engine, RING_EMR)); |
2141 | drm_printf(p: m, f: "\tRING_EIR: 0x%08x\n" , |
2142 | ENGINE_READ(engine, RING_EIR)); |
2143 | } |
2144 | |
2145 | addr = intel_engine_get_active_head(engine); |
2146 | drm_printf(p: m, f: "\tACTHD: 0x%08x_%08x\n" , |
2147 | upper_32_bits(addr), lower_32_bits(addr)); |
2148 | addr = intel_engine_get_last_batch_head(engine); |
2149 | drm_printf(p: m, f: "\tBBADDR: 0x%08x_%08x\n" , |
2150 | upper_32_bits(addr), lower_32_bits(addr)); |
2151 | if (GRAPHICS_VER(i915) >= 8) |
2152 | addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW); |
2153 | else if (GRAPHICS_VER(i915) >= 4) |
2154 | addr = ENGINE_READ(engine, RING_DMA_FADD); |
2155 | else |
2156 | addr = ENGINE_READ(engine, DMA_FADD_I8XX); |
2157 | drm_printf(p: m, f: "\tDMA_FADDR: 0x%08x_%08x\n" , |
2158 | upper_32_bits(addr), lower_32_bits(addr)); |
2159 | if (GRAPHICS_VER(i915) >= 4) { |
2160 | drm_printf(p: m, f: "\tIPEIR: 0x%08x\n" , |
2161 | ENGINE_READ(engine, RING_IPEIR)); |
2162 | drm_printf(p: m, f: "\tIPEHR: 0x%08x\n" , |
2163 | ENGINE_READ(engine, RING_IPEHR)); |
2164 | } else { |
2165 | drm_printf(p: m, f: "\tIPEIR: 0x%08x\n" , ENGINE_READ(engine, IPEIR)); |
2166 | drm_printf(p: m, f: "\tIPEHR: 0x%08x\n" , ENGINE_READ(engine, IPEHR)); |
2167 | } |
2168 | |
2169 | if (HAS_EXECLISTS(i915) && !intel_engine_uses_guc(engine)) { |
2170 | struct i915_request * const *port, *rq; |
2171 | const u32 *hws = |
2172 | &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; |
2173 | const u8 num_entries = execlists->csb_size; |
2174 | unsigned int idx; |
2175 | u8 read, write; |
2176 | |
2177 | drm_printf(p: m, f: "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n" , |
2178 | str_yes_no(test_bit(TASKLET_STATE_SCHED, &engine->sched_engine->tasklet.state)), |
2179 | str_enabled_disabled(v: !atomic_read(v: &engine->sched_engine->tasklet.count)), |
2180 | repr_timer(t: &engine->execlists.preempt), |
2181 | repr_timer(t: &engine->execlists.timer)); |
2182 | |
2183 | read = execlists->csb_head; |
2184 | write = READ_ONCE(*execlists->csb_write); |
2185 | |
2186 | drm_printf(p: m, f: "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n" , |
2187 | ENGINE_READ(engine, RING_EXECLIST_STATUS_LO), |
2188 | ENGINE_READ(engine, RING_EXECLIST_STATUS_HI), |
2189 | read, write, num_entries); |
2190 | |
2191 | if (read >= num_entries) |
2192 | read = 0; |
2193 | if (write >= num_entries) |
2194 | write = 0; |
2195 | if (read > write) |
2196 | write += num_entries; |
2197 | while (read < write) { |
2198 | idx = ++read % num_entries; |
2199 | drm_printf(p: m, f: "\tExeclist CSB[%d]: 0x%08x, context: %d\n" , |
2200 | idx, hws[idx * 2], hws[idx * 2 + 1]); |
2201 | } |
2202 | |
2203 | i915_sched_engine_active_lock_bh(sched_engine: engine->sched_engine); |
2204 | rcu_read_lock(); |
2205 | for (port = execlists->active; (rq = *port); port++) { |
2206 | char hdr[160]; |
2207 | int len; |
2208 | |
2209 | len = scnprintf(buf: hdr, size: sizeof(hdr), |
2210 | fmt: "\t\tActive[%d]: ccid:%08x%s%s, " , |
2211 | (int)(port - execlists->active), |
2212 | rq->context->lrc.ccid, |
2213 | intel_context_is_closed(ce: rq->context) ? "!" : "" , |
2214 | intel_context_is_banned(ce: rq->context) ? "*" : "" ); |
2215 | len += print_ring(buf: hdr + len, sz: sizeof(hdr) - len, rq); |
2216 | scnprintf(buf: hdr + len, size: sizeof(hdr) - len, fmt: "rq: " ); |
2217 | i915_request_show(m, rq, prefix: hdr, indent: 0); |
2218 | } |
2219 | for (port = execlists->pending; (rq = *port); port++) { |
2220 | char hdr[160]; |
2221 | int len; |
2222 | |
2223 | len = scnprintf(buf: hdr, size: sizeof(hdr), |
2224 | fmt: "\t\tPending[%d]: ccid:%08x%s%s, " , |
2225 | (int)(port - execlists->pending), |
2226 | rq->context->lrc.ccid, |
2227 | intel_context_is_closed(ce: rq->context) ? "!" : "" , |
2228 | intel_context_is_banned(ce: rq->context) ? "*" : "" ); |
2229 | len += print_ring(buf: hdr + len, sz: sizeof(hdr) - len, rq); |
2230 | scnprintf(buf: hdr + len, size: sizeof(hdr) - len, fmt: "rq: " ); |
2231 | i915_request_show(m, rq, prefix: hdr, indent: 0); |
2232 | } |
2233 | rcu_read_unlock(); |
2234 | i915_sched_engine_active_unlock_bh(sched_engine: engine->sched_engine); |
2235 | } else if (GRAPHICS_VER(i915) > 6) { |
2236 | drm_printf(p: m, f: "\tPP_DIR_BASE: 0x%08x\n" , |
2237 | ENGINE_READ(engine, RING_PP_DIR_BASE)); |
2238 | drm_printf(p: m, f: "\tPP_DIR_BASE_READ: 0x%08x\n" , |
2239 | ENGINE_READ(engine, RING_PP_DIR_BASE_READ)); |
2240 | drm_printf(p: m, f: "\tPP_DIR_DCLV: 0x%08x\n" , |
2241 | ENGINE_READ(engine, RING_PP_DIR_DCLV)); |
2242 | } |
2243 | } |
2244 | |
2245 | static void print_request_ring(struct drm_printer *m, struct i915_request *rq) |
2246 | { |
2247 | struct i915_vma_resource *vma_res = rq->batch_res; |
2248 | void *ring; |
2249 | int size; |
2250 | |
2251 | drm_printf(p: m, |
2252 | f: "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n" , |
2253 | rq->head, rq->postfix, rq->tail, |
2254 | vma_res ? upper_32_bits(vma_res->start) : ~0u, |
2255 | vma_res ? lower_32_bits(vma_res->start) : ~0u); |
2256 | |
2257 | size = rq->tail - rq->head; |
2258 | if (rq->tail < rq->head) |
2259 | size += rq->ring->size; |
2260 | |
2261 | ring = kmalloc(size, GFP_ATOMIC); |
2262 | if (ring) { |
2263 | const void *vaddr = rq->ring->vaddr; |
2264 | unsigned int head = rq->head; |
2265 | unsigned int len = 0; |
2266 | |
2267 | if (rq->tail < head) { |
2268 | len = rq->ring->size - head; |
2269 | memcpy(ring, vaddr + head, len); |
2270 | head = 0; |
2271 | } |
2272 | memcpy(ring + len, vaddr + head, size - len); |
2273 | |
2274 | hexdump(m, buf: ring, len: size); |
2275 | kfree(objp: ring); |
2276 | } |
2277 | } |
2278 | |
2279 | static unsigned long read_ul(void *p, size_t x) |
2280 | { |
2281 | return *(unsigned long *)(p + x); |
2282 | } |
2283 | |
2284 | static void print_properties(struct intel_engine_cs *engine, |
2285 | struct drm_printer *m) |
2286 | { |
2287 | static const struct pmap { |
2288 | size_t offset; |
2289 | const char *name; |
2290 | } props[] = { |
2291 | #define P(x) { \ |
2292 | .offset = offsetof(typeof(engine->props), x), \ |
2293 | .name = #x \ |
2294 | } |
2295 | P(heartbeat_interval_ms), |
2296 | P(max_busywait_duration_ns), |
2297 | P(preempt_timeout_ms), |
2298 | P(stop_timeout_ms), |
2299 | P(timeslice_duration_ms), |
2300 | |
2301 | {}, |
2302 | #undef P |
2303 | }; |
2304 | const struct pmap *p; |
2305 | |
2306 | drm_printf(p: m, f: "\tProperties:\n" ); |
2307 | for (p = props; p->name; p++) |
2308 | drm_printf(p: m, f: "\t\t%s: %lu [default %lu]\n" , |
2309 | p->name, |
2310 | read_ul(p: &engine->props, x: p->offset), |
2311 | read_ul(p: &engine->defaults, x: p->offset)); |
2312 | } |
2313 | |
2314 | static void engine_dump_request(struct i915_request *rq, struct drm_printer *m, const char *msg) |
2315 | { |
2316 | struct intel_timeline *tl = get_timeline(rq); |
2317 | |
2318 | i915_request_show(m, rq, prefix: msg, indent: 0); |
2319 | |
2320 | drm_printf(p: m, f: "\t\tring->start: 0x%08x\n" , |
2321 | i915_ggtt_offset(vma: rq->ring->vma)); |
2322 | drm_printf(p: m, f: "\t\tring->head: 0x%08x\n" , |
2323 | rq->ring->head); |
2324 | drm_printf(p: m, f: "\t\tring->tail: 0x%08x\n" , |
2325 | rq->ring->tail); |
2326 | drm_printf(p: m, f: "\t\tring->emit: 0x%08x\n" , |
2327 | rq->ring->emit); |
2328 | drm_printf(p: m, f: "\t\tring->space: 0x%08x\n" , |
2329 | rq->ring->space); |
2330 | |
2331 | if (tl) { |
2332 | drm_printf(p: m, f: "\t\tring->hwsp: 0x%08x\n" , |
2333 | tl->hwsp_offset); |
2334 | intel_timeline_put(timeline: tl); |
2335 | } |
2336 | |
2337 | print_request_ring(m, rq); |
2338 | |
2339 | if (rq->context->lrc_reg_state) { |
2340 | drm_printf(p: m, f: "Logical Ring Context:\n" ); |
2341 | hexdump(m, buf: rq->context->lrc_reg_state, PAGE_SIZE); |
2342 | } |
2343 | } |
2344 | |
2345 | void intel_engine_dump_active_requests(struct list_head *requests, |
2346 | struct i915_request *hung_rq, |
2347 | struct drm_printer *m) |
2348 | { |
2349 | struct i915_request *rq; |
2350 | const char *msg; |
2351 | enum i915_request_state state; |
2352 | |
2353 | list_for_each_entry(rq, requests, sched.link) { |
2354 | if (rq == hung_rq) |
2355 | continue; |
2356 | |
2357 | state = i915_test_request_state(rq); |
2358 | if (state < I915_REQUEST_QUEUED) |
2359 | continue; |
2360 | |
2361 | if (state == I915_REQUEST_ACTIVE) |
2362 | msg = "\t\tactive on engine" ; |
2363 | else |
2364 | msg = "\t\tactive in queue" ; |
2365 | |
2366 | engine_dump_request(rq, m, msg); |
2367 | } |
2368 | } |
2369 | |
2370 | static void engine_dump_active_requests(struct intel_engine_cs *engine, |
2371 | struct drm_printer *m) |
2372 | { |
2373 | struct intel_context *hung_ce = NULL; |
2374 | struct i915_request *hung_rq = NULL; |
2375 | |
2376 | /* |
2377 | * No need for an engine->irq_seqno_barrier() before the seqno reads. |
2378 | * The GPU is still running so requests are still executing and any |
2379 | * hardware reads will be out of date by the time they are reported. |
2380 | * But the intention here is just to report an instantaneous snapshot |
2381 | * so that's fine. |
2382 | */ |
2383 | intel_engine_get_hung_entity(engine, ce: &hung_ce, rq: &hung_rq); |
2384 | |
2385 | drm_printf(p: m, f: "\tRequests:\n" ); |
2386 | |
2387 | if (hung_rq) |
2388 | engine_dump_request(rq: hung_rq, m, msg: "\t\thung" ); |
2389 | else if (hung_ce) |
2390 | drm_printf(p: m, f: "\t\tGot hung ce but no hung rq!\n" ); |
2391 | |
2392 | if (intel_uc_uses_guc_submission(uc: &engine->gt->uc)) |
2393 | intel_guc_dump_active_requests(engine, hung_rq, m); |
2394 | else |
2395 | intel_execlists_dump_active_requests(engine, hung_rq, m); |
2396 | |
2397 | if (hung_rq) |
2398 | i915_request_put(rq: hung_rq); |
2399 | } |
2400 | |
2401 | void intel_engine_dump(struct intel_engine_cs *engine, |
2402 | struct drm_printer *m, |
2403 | const char *, ...) |
2404 | { |
2405 | struct i915_gpu_error * const error = &engine->i915->gpu_error; |
2406 | struct i915_request *rq; |
2407 | intel_wakeref_t wakeref; |
2408 | ktime_t dummy; |
2409 | |
2410 | if (header) { |
2411 | va_list ap; |
2412 | |
2413 | va_start(ap, header); |
2414 | drm_vprintf(p: m, fmt: header, va: &ap); |
2415 | va_end(ap); |
2416 | } |
2417 | |
2418 | if (intel_gt_is_wedged(gt: engine->gt)) |
2419 | drm_printf(p: m, f: "*** WEDGED ***\n" ); |
2420 | |
2421 | drm_printf(p: m, f: "\tAwake? %d\n" , atomic_read(v: &engine->wakeref.count)); |
2422 | drm_printf(p: m, f: "\tBarriers?: %s\n" , |
2423 | str_yes_no(v: !llist_empty(head: &engine->barrier_tasks))); |
2424 | drm_printf(p: m, f: "\tLatency: %luus\n" , |
2425 | ewma__engine_latency_read(e: &engine->latency)); |
2426 | if (intel_engine_supports_stats(engine)) |
2427 | drm_printf(p: m, f: "\tRuntime: %llums\n" , |
2428 | ktime_to_ms(kt: intel_engine_get_busy_time(engine, |
2429 | now: &dummy))); |
2430 | drm_printf(p: m, f: "\tForcewake: %x domains, %d active\n" , |
2431 | engine->fw_domain, READ_ONCE(engine->fw_active)); |
2432 | |
2433 | rcu_read_lock(); |
2434 | rq = READ_ONCE(engine->heartbeat.systole); |
2435 | if (rq) |
2436 | drm_printf(p: m, f: "\tHeartbeat: %d ms ago\n" , |
2437 | jiffies_to_msecs(j: jiffies - rq->emitted_jiffies)); |
2438 | rcu_read_unlock(); |
2439 | drm_printf(p: m, f: "\tReset count: %d (global %d)\n" , |
2440 | i915_reset_engine_count(error, engine), |
2441 | i915_reset_count(error)); |
2442 | print_properties(engine, m); |
2443 | |
2444 | engine_dump_active_requests(engine, m); |
2445 | |
2446 | drm_printf(p: m, f: "\tMMIO base: 0x%08x\n" , engine->mmio_base); |
2447 | wakeref = intel_runtime_pm_get_if_in_use(rpm: engine->uncore->rpm); |
2448 | if (wakeref) { |
2449 | intel_engine_print_registers(engine, m); |
2450 | intel_runtime_pm_put(rpm: engine->uncore->rpm, wref: wakeref); |
2451 | } else { |
2452 | drm_printf(p: m, f: "\tDevice is asleep; skipping register dump\n" ); |
2453 | } |
2454 | |
2455 | intel_execlists_show_requests(engine, m, show_request: i915_request_show, max: 8); |
2456 | |
2457 | drm_printf(p: m, f: "HWSP:\n" ); |
2458 | hexdump(m, buf: engine->status_page.addr, PAGE_SIZE); |
2459 | |
2460 | drm_printf(p: m, f: "Idle? %s\n" , str_yes_no(v: intel_engine_is_idle(engine))); |
2461 | |
2462 | intel_engine_print_breadcrumbs(engine, p: m); |
2463 | } |
2464 | |
2465 | /** |
2466 | * intel_engine_get_busy_time() - Return current accumulated engine busyness |
2467 | * @engine: engine to report on |
2468 | * @now: monotonic timestamp of sampling |
2469 | * |
2470 | * Returns accumulated time @engine was busy since engine stats were enabled. |
2471 | */ |
2472 | ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) |
2473 | { |
2474 | return engine->busyness(engine, now); |
2475 | } |
2476 | |
2477 | struct intel_context * |
2478 | intel_engine_create_virtual(struct intel_engine_cs **siblings, |
2479 | unsigned int count, unsigned long flags) |
2480 | { |
2481 | if (count == 0) |
2482 | return ERR_PTR(error: -EINVAL); |
2483 | |
2484 | if (count == 1 && !(flags & FORCE_VIRTUAL)) |
2485 | return intel_context_create(engine: siblings[0]); |
2486 | |
2487 | GEM_BUG_ON(!siblings[0]->cops->create_virtual); |
2488 | return siblings[0]->cops->create_virtual(siblings, count, flags); |
2489 | } |
2490 | |
2491 | static struct i915_request *engine_execlist_find_hung_request(struct intel_engine_cs *engine) |
2492 | { |
2493 | struct i915_request *request, *active = NULL; |
2494 | |
2495 | /* |
2496 | * This search does not work in GuC submission mode. However, the GuC |
2497 | * will report the hanging context directly to the driver itself. So |
2498 | * the driver should never get here when in GuC mode. |
2499 | */ |
2500 | GEM_BUG_ON(intel_uc_uses_guc_submission(&engine->gt->uc)); |
2501 | |
2502 | /* |
2503 | * We are called by the error capture, reset and to dump engine |
2504 | * state at random points in time. In particular, note that neither is |
2505 | * crucially ordered with an interrupt. After a hang, the GPU is dead |
2506 | * and we assume that no more writes can happen (we waited long enough |
2507 | * for all writes that were in transaction to be flushed) - adding an |
2508 | * extra delay for a recent interrupt is pointless. Hence, we do |
2509 | * not need an engine->irq_seqno_barrier() before the seqno reads. |
2510 | * At all other times, we must assume the GPU is still running, but |
2511 | * we only care about the snapshot of this moment. |
2512 | */ |
2513 | lockdep_assert_held(&engine->sched_engine->lock); |
2514 | |
2515 | rcu_read_lock(); |
2516 | request = execlists_active(execlists: &engine->execlists); |
2517 | if (request) { |
2518 | struct intel_timeline *tl = request->context->timeline; |
2519 | |
2520 | list_for_each_entry_from_reverse(request, &tl->requests, link) { |
2521 | if (__i915_request_is_complete(rq: request)) |
2522 | break; |
2523 | |
2524 | active = request; |
2525 | } |
2526 | } |
2527 | rcu_read_unlock(); |
2528 | if (active) |
2529 | return active; |
2530 | |
2531 | list_for_each_entry(request, &engine->sched_engine->requests, |
2532 | sched.link) { |
2533 | if (i915_test_request_state(rq: request) != I915_REQUEST_ACTIVE) |
2534 | continue; |
2535 | |
2536 | active = request; |
2537 | break; |
2538 | } |
2539 | |
2540 | return active; |
2541 | } |
2542 | |
2543 | void intel_engine_get_hung_entity(struct intel_engine_cs *engine, |
2544 | struct intel_context **ce, struct i915_request **rq) |
2545 | { |
2546 | unsigned long flags; |
2547 | |
2548 | *ce = intel_engine_get_hung_context(engine); |
2549 | if (*ce) { |
2550 | intel_engine_clear_hung_context(engine); |
2551 | |
2552 | *rq = intel_context_get_active_request(ce: *ce); |
2553 | return; |
2554 | } |
2555 | |
2556 | /* |
2557 | * Getting here with GuC enabled means it is a forced error capture |
2558 | * with no actual hang. So, no need to attempt the execlist search. |
2559 | */ |
2560 | if (intel_uc_uses_guc_submission(uc: &engine->gt->uc)) |
2561 | return; |
2562 | |
2563 | spin_lock_irqsave(&engine->sched_engine->lock, flags); |
2564 | *rq = engine_execlist_find_hung_request(engine); |
2565 | if (*rq) |
2566 | *rq = i915_request_get_rcu(rq: *rq); |
2567 | spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags); |
2568 | } |
2569 | |
2570 | void xehp_enable_ccs_engines(struct intel_engine_cs *engine) |
2571 | { |
2572 | /* |
2573 | * If there are any non-fused-off CCS engines, we need to enable CCS |
2574 | * support in the RCU_MODE register. This only needs to be done once, |
2575 | * so for simplicity we'll take care of this in the RCS engine's |
2576 | * resume handler; since the RCS and all CCS engines belong to the |
2577 | * same reset domain and are reset together, this will also take care |
2578 | * of re-applying the setting after i915-triggered resets. |
2579 | */ |
2580 | if (!CCS_MASK(engine->gt)) |
2581 | return; |
2582 | |
2583 | intel_uncore_write(uncore: engine->uncore, GEN12_RCU_MODE, |
2584 | _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); |
2585 | } |
2586 | |
2587 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
2588 | #include "mock_engine.c" |
2589 | #include "selftest_engine.c" |
2590 | #include "selftest_engine_cs.c" |
2591 | #endif |
2592 | |