1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2019 Intel Corporation |
4 | */ |
5 | |
6 | #include <drm/drm_managed.h> |
7 | #include <drm/intel-gtt.h> |
8 | |
9 | #include "gem/i915_gem_internal.h" |
10 | #include "gem/i915_gem_lmem.h" |
11 | |
12 | #include "i915_drv.h" |
13 | #include "i915_perf_oa_regs.h" |
14 | #include "i915_reg.h" |
15 | #include "intel_context.h" |
16 | #include "intel_engine_pm.h" |
17 | #include "intel_engine_regs.h" |
18 | #include "intel_ggtt_gmch.h" |
19 | #include "intel_gt.h" |
20 | #include "intel_gt_buffer_pool.h" |
21 | #include "intel_gt_clock_utils.h" |
22 | #include "intel_gt_debugfs.h" |
23 | #include "intel_gt_mcr.h" |
24 | #include "intel_gt_pm.h" |
25 | #include "intel_gt_print.h" |
26 | #include "intel_gt_regs.h" |
27 | #include "intel_gt_requests.h" |
28 | #include "intel_migrate.h" |
29 | #include "intel_mocs.h" |
30 | #include "intel_pci_config.h" |
31 | #include "intel_rc6.h" |
32 | #include "intel_renderstate.h" |
33 | #include "intel_rps.h" |
34 | #include "intel_sa_media.h" |
35 | #include "intel_gt_sysfs.h" |
36 | #include "intel_tlb.h" |
37 | #include "intel_uncore.h" |
38 | #include "shmem_utils.h" |
39 | |
40 | void intel_gt_common_init_early(struct intel_gt *gt) |
41 | { |
42 | spin_lock_init(gt->irq_lock); |
43 | |
44 | INIT_LIST_HEAD(list: >->closed_vma); |
45 | spin_lock_init(>->closed_lock); |
46 | |
47 | init_llist_head(list: >->watchdog.list); |
48 | INIT_WORK(>->watchdog.work, intel_gt_watchdog_work); |
49 | |
50 | intel_gt_init_buffer_pool(gt); |
51 | intel_gt_init_reset(gt); |
52 | intel_gt_init_requests(gt); |
53 | intel_gt_init_timelines(gt); |
54 | intel_gt_init_tlb(gt); |
55 | intel_gt_pm_init_early(gt); |
56 | |
57 | intel_wopcm_init_early(wopcm: >->wopcm); |
58 | intel_uc_init_early(uc: >->uc); |
59 | intel_rps_init_early(rps: >->rps); |
60 | } |
61 | |
62 | /* Preliminary initialization of Tile 0 */ |
63 | int intel_root_gt_init_early(struct drm_i915_private *i915) |
64 | { |
65 | struct intel_gt *gt; |
66 | |
67 | gt = drmm_kzalloc(dev: &i915->drm, size: sizeof(*gt), GFP_KERNEL); |
68 | if (!gt) |
69 | return -ENOMEM; |
70 | |
71 | i915->gt[0] = gt; |
72 | |
73 | gt->i915 = i915; |
74 | gt->uncore = &i915->uncore; |
75 | gt->irq_lock = drmm_kzalloc(dev: &i915->drm, size: sizeof(*gt->irq_lock), GFP_KERNEL); |
76 | if (!gt->irq_lock) |
77 | return -ENOMEM; |
78 | |
79 | intel_gt_common_init_early(gt); |
80 | |
81 | return 0; |
82 | } |
83 | |
84 | static int intel_gt_probe_lmem(struct intel_gt *gt) |
85 | { |
86 | struct drm_i915_private *i915 = gt->i915; |
87 | unsigned int instance = gt->info.id; |
88 | int id = INTEL_REGION_LMEM_0 + instance; |
89 | struct intel_memory_region *mem; |
90 | int err; |
91 | |
92 | mem = intel_gt_setup_lmem(gt); |
93 | if (IS_ERR(ptr: mem)) { |
94 | err = PTR_ERR(ptr: mem); |
95 | if (err == -ENODEV) |
96 | return 0; |
97 | |
98 | gt_err(gt, "Failed to setup region(%d) type=%d\n" , |
99 | err, INTEL_MEMORY_LOCAL); |
100 | return err; |
101 | } |
102 | |
103 | mem->id = id; |
104 | mem->instance = instance; |
105 | |
106 | intel_memory_region_set_name(mem, fmt: "local%u" , mem->instance); |
107 | |
108 | GEM_BUG_ON(!HAS_REGION(i915, id)); |
109 | GEM_BUG_ON(i915->mm.regions[id]); |
110 | i915->mm.regions[id] = mem; |
111 | |
112 | return 0; |
113 | } |
114 | |
115 | int intel_gt_assign_ggtt(struct intel_gt *gt) |
116 | { |
117 | /* Media GT shares primary GT's GGTT */ |
118 | if (gt->type == GT_MEDIA) { |
119 | gt->ggtt = to_gt(i915: gt->i915)->ggtt; |
120 | } else { |
121 | gt->ggtt = i915_ggtt_create(i915: gt->i915); |
122 | if (IS_ERR(ptr: gt->ggtt)) |
123 | return PTR_ERR(ptr: gt->ggtt); |
124 | } |
125 | |
126 | list_add_tail(new: >->ggtt_link, head: >->ggtt->gt_list); |
127 | |
128 | return 0; |
129 | } |
130 | |
131 | int intel_gt_init_mmio(struct intel_gt *gt) |
132 | { |
133 | intel_gt_init_clock_frequency(gt); |
134 | |
135 | intel_uc_init_mmio(uc: >->uc); |
136 | intel_sseu_info_init(gt); |
137 | intel_gt_mcr_init(gt); |
138 | |
139 | return intel_engines_init_mmio(gt); |
140 | } |
141 | |
142 | static void init_unused_ring(struct intel_gt *gt, u32 base) |
143 | { |
144 | struct intel_uncore *uncore = gt->uncore; |
145 | |
146 | intel_uncore_write(uncore, RING_CTL(base), val: 0); |
147 | intel_uncore_write(uncore, RING_HEAD(base), val: 0); |
148 | intel_uncore_write(uncore, RING_TAIL(base), val: 0); |
149 | intel_uncore_write(uncore, RING_START(base), val: 0); |
150 | } |
151 | |
152 | static void init_unused_rings(struct intel_gt *gt) |
153 | { |
154 | struct drm_i915_private *i915 = gt->i915; |
155 | |
156 | if (IS_I830(i915)) { |
157 | init_unused_ring(gt, PRB1_BASE); |
158 | init_unused_ring(gt, SRB0_BASE); |
159 | init_unused_ring(gt, SRB1_BASE); |
160 | init_unused_ring(gt, SRB2_BASE); |
161 | init_unused_ring(gt, SRB3_BASE); |
162 | } else if (GRAPHICS_VER(i915) == 2) { |
163 | init_unused_ring(gt, SRB0_BASE); |
164 | init_unused_ring(gt, SRB1_BASE); |
165 | } else if (GRAPHICS_VER(i915) == 3) { |
166 | init_unused_ring(gt, PRB1_BASE); |
167 | init_unused_ring(gt, PRB2_BASE); |
168 | } |
169 | } |
170 | |
171 | int intel_gt_init_hw(struct intel_gt *gt) |
172 | { |
173 | struct drm_i915_private *i915 = gt->i915; |
174 | struct intel_uncore *uncore = gt->uncore; |
175 | int ret; |
176 | |
177 | gt->last_init_time = ktime_get(); |
178 | |
179 | /* Double layer security blanket, see i915_gem_init() */ |
180 | intel_uncore_forcewake_get(uncore, domains: FORCEWAKE_ALL); |
181 | |
182 | if (HAS_EDRAM(i915) && GRAPHICS_VER(i915) < 9) |
183 | intel_uncore_rmw(uncore, HSW_IDICR, clear: 0, IDIHASHMSK(0xf)); |
184 | |
185 | if (IS_HASWELL(i915)) |
186 | intel_uncore_write(uncore, |
187 | HSW_MI_PREDICATE_RESULT_2, |
188 | IS_HASWELL_GT3(i915) ? |
189 | LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); |
190 | |
191 | /* Apply the GT workarounds... */ |
192 | intel_gt_apply_workarounds(gt); |
193 | /* ...and determine whether they are sticking. */ |
194 | intel_gt_verify_workarounds(gt, from: "init" ); |
195 | |
196 | intel_gt_init_swizzling(gt); |
197 | |
198 | /* |
199 | * At least 830 can leave some of the unused rings |
200 | * "active" (ie. head != tail) after resume which |
201 | * will prevent c3 entry. Makes sure all unused rings |
202 | * are totally idle. |
203 | */ |
204 | init_unused_rings(gt); |
205 | |
206 | ret = i915_ppgtt_init_hw(gt); |
207 | if (ret) { |
208 | gt_err(gt, "Enabling PPGTT failed (%d)\n" , ret); |
209 | goto out; |
210 | } |
211 | |
212 | /* We can't enable contexts until all firmware is loaded */ |
213 | ret = intel_uc_init_hw(uc: >->uc); |
214 | if (ret) { |
215 | gt_probe_error(gt, "Enabling uc failed (%d)\n" , ret); |
216 | goto out; |
217 | } |
218 | |
219 | intel_mocs_init(gt); |
220 | |
221 | out: |
222 | intel_uncore_forcewake_put(uncore, domains: FORCEWAKE_ALL); |
223 | return ret; |
224 | } |
225 | |
226 | static void gen6_clear_engine_error_register(struct intel_engine_cs *engine) |
227 | { |
228 | GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); |
229 | GEN6_RING_FAULT_REG_POSTING_READ(engine); |
230 | } |
231 | |
232 | i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt) |
233 | { |
234 | /* GT0_PERF_LIMIT_REASONS is available only for Gen11+ */ |
235 | if (GRAPHICS_VER(gt->i915) < 11) |
236 | return INVALID_MMIO_REG; |
237 | |
238 | return gt->type == GT_MEDIA ? |
239 | MTL_MEDIA_PERF_LIMIT_REASONS : GT0_PERF_LIMIT_REASONS; |
240 | } |
241 | |
242 | void |
243 | intel_gt_clear_error_registers(struct intel_gt *gt, |
244 | intel_engine_mask_t engine_mask) |
245 | { |
246 | struct drm_i915_private *i915 = gt->i915; |
247 | struct intel_uncore *uncore = gt->uncore; |
248 | u32 eir; |
249 | |
250 | if (GRAPHICS_VER(i915) != 2) |
251 | intel_uncore_write(uncore, PGTBL_ER, val: 0); |
252 | |
253 | if (GRAPHICS_VER(i915) < 4) |
254 | intel_uncore_write(uncore, IPEIR(RENDER_RING_BASE), val: 0); |
255 | else |
256 | intel_uncore_write(uncore, IPEIR_I965, val: 0); |
257 | |
258 | intel_uncore_write(uncore, EIR, val: 0); |
259 | eir = intel_uncore_read(uncore, EIR); |
260 | if (eir) { |
261 | /* |
262 | * some errors might have become stuck, |
263 | * mask them. |
264 | */ |
265 | gt_dbg(gt, "EIR stuck: 0x%08x, masking\n" , eir); |
266 | intel_uncore_rmw(uncore, EMR, clear: 0, set: eir); |
267 | intel_uncore_write(uncore, GEN2_IIR, |
268 | I915_MASTER_ERROR_INTERRUPT); |
269 | } |
270 | |
271 | /* |
272 | * For the media GT, this ring fault register is not replicated, |
273 | * so don't do multicast/replicated register read/write operation on it. |
274 | */ |
275 | if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) { |
276 | intel_uncore_rmw(uncore, XELPMP_RING_FAULT_REG, |
277 | RING_FAULT_VALID, set: 0); |
278 | intel_uncore_posting_read(uncore, |
279 | XELPMP_RING_FAULT_REG); |
280 | |
281 | } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { |
282 | intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG, |
283 | RING_FAULT_VALID, set: 0); |
284 | intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG); |
285 | |
286 | } else if (GRAPHICS_VER(i915) >= 12) { |
287 | intel_uncore_rmw(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID, set: 0); |
288 | intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG); |
289 | } else if (GRAPHICS_VER(i915) >= 8) { |
290 | intel_uncore_rmw(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID, set: 0); |
291 | intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG); |
292 | } else if (GRAPHICS_VER(i915) >= 6) { |
293 | struct intel_engine_cs *engine; |
294 | enum intel_engine_id id; |
295 | |
296 | for_each_engine_masked(engine, gt, engine_mask, id) |
297 | gen6_clear_engine_error_register(engine); |
298 | } |
299 | } |
300 | |
301 | static void gen6_check_faults(struct intel_gt *gt) |
302 | { |
303 | struct intel_engine_cs *engine; |
304 | enum intel_engine_id id; |
305 | u32 fault; |
306 | |
307 | for_each_engine(engine, gt, id) { |
308 | fault = GEN6_RING_FAULT_REG_READ(engine); |
309 | if (fault & RING_FAULT_VALID) { |
310 | gt_dbg(gt, "Unexpected fault\n" |
311 | "\tAddr: 0x%08lx\n" |
312 | "\tAddress space: %s\n" |
313 | "\tSource ID: %d\n" |
314 | "\tType: %d\n" , |
315 | fault & PAGE_MASK, |
316 | fault & RING_FAULT_GTTSEL_MASK ? |
317 | "GGTT" : "PPGTT" , |
318 | RING_FAULT_SRCID(fault), |
319 | RING_FAULT_FAULT_TYPE(fault)); |
320 | } |
321 | } |
322 | } |
323 | |
324 | static void xehp_check_faults(struct intel_gt *gt) |
325 | { |
326 | u32 fault; |
327 | |
328 | /* |
329 | * Although the fault register now lives in an MCR register range, |
330 | * the GAM registers are special and we only truly need to read |
331 | * the "primary" GAM instance rather than handling each instance |
332 | * individually. intel_gt_mcr_read_any() will automatically steer |
333 | * toward the primary instance. |
334 | */ |
335 | fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG); |
336 | if (fault & RING_FAULT_VALID) { |
337 | u32 fault_data0, fault_data1; |
338 | u64 fault_addr; |
339 | |
340 | fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0); |
341 | fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1); |
342 | |
343 | fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | |
344 | ((u64)fault_data0 << 12); |
345 | |
346 | gt_dbg(gt, "Unexpected fault\n" |
347 | "\tAddr: 0x%08x_%08x\n" |
348 | "\tAddress space: %s\n" |
349 | "\tEngine ID: %d\n" |
350 | "\tSource ID: %d\n" |
351 | "\tType: %d\n" , |
352 | upper_32_bits(fault_addr), lower_32_bits(fault_addr), |
353 | fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT" , |
354 | GEN8_RING_FAULT_ENGINE_ID(fault), |
355 | RING_FAULT_SRCID(fault), |
356 | RING_FAULT_FAULT_TYPE(fault)); |
357 | } |
358 | } |
359 | |
360 | static void gen8_check_faults(struct intel_gt *gt) |
361 | { |
362 | struct intel_uncore *uncore = gt->uncore; |
363 | i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg; |
364 | u32 fault; |
365 | |
366 | if (GRAPHICS_VER(gt->i915) >= 12) { |
367 | fault_reg = GEN12_RING_FAULT_REG; |
368 | fault_data0_reg = GEN12_FAULT_TLB_DATA0; |
369 | fault_data1_reg = GEN12_FAULT_TLB_DATA1; |
370 | } else { |
371 | fault_reg = GEN8_RING_FAULT_REG; |
372 | fault_data0_reg = GEN8_FAULT_TLB_DATA0; |
373 | fault_data1_reg = GEN8_FAULT_TLB_DATA1; |
374 | } |
375 | |
376 | fault = intel_uncore_read(uncore, reg: fault_reg); |
377 | if (fault & RING_FAULT_VALID) { |
378 | u32 fault_data0, fault_data1; |
379 | u64 fault_addr; |
380 | |
381 | fault_data0 = intel_uncore_read(uncore, reg: fault_data0_reg); |
382 | fault_data1 = intel_uncore_read(uncore, reg: fault_data1_reg); |
383 | |
384 | fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | |
385 | ((u64)fault_data0 << 12); |
386 | |
387 | gt_dbg(gt, "Unexpected fault\n" |
388 | "\tAddr: 0x%08x_%08x\n" |
389 | "\tAddress space: %s\n" |
390 | "\tEngine ID: %d\n" |
391 | "\tSource ID: %d\n" |
392 | "\tType: %d\n" , |
393 | upper_32_bits(fault_addr), lower_32_bits(fault_addr), |
394 | fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT" , |
395 | GEN8_RING_FAULT_ENGINE_ID(fault), |
396 | RING_FAULT_SRCID(fault), |
397 | RING_FAULT_FAULT_TYPE(fault)); |
398 | } |
399 | } |
400 | |
401 | void intel_gt_check_and_clear_faults(struct intel_gt *gt) |
402 | { |
403 | struct drm_i915_private *i915 = gt->i915; |
404 | |
405 | /* From GEN8 onwards we only have one 'All Engine Fault Register' */ |
406 | if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) |
407 | xehp_check_faults(gt); |
408 | else if (GRAPHICS_VER(i915) >= 8) |
409 | gen8_check_faults(gt); |
410 | else if (GRAPHICS_VER(i915) >= 6) |
411 | gen6_check_faults(gt); |
412 | else |
413 | return; |
414 | |
415 | intel_gt_clear_error_registers(gt, ALL_ENGINES); |
416 | } |
417 | |
418 | void intel_gt_flush_ggtt_writes(struct intel_gt *gt) |
419 | { |
420 | struct intel_uncore *uncore = gt->uncore; |
421 | intel_wakeref_t wakeref; |
422 | |
423 | /* |
424 | * No actual flushing is required for the GTT write domain for reads |
425 | * from the GTT domain. Writes to it "immediately" go to main memory |
426 | * as far as we know, so there's no chipset flush. It also doesn't |
427 | * land in the GPU render cache. |
428 | * |
429 | * However, we do have to enforce the order so that all writes through |
430 | * the GTT land before any writes to the device, such as updates to |
431 | * the GATT itself. |
432 | * |
433 | * We also have to wait a bit for the writes to land from the GTT. |
434 | * An uncached read (i.e. mmio) seems to be ideal for the round-trip |
435 | * timing. This issue has only been observed when switching quickly |
436 | * between GTT writes and CPU reads from inside the kernel on recent hw, |
437 | * and it appears to only affect discrete GTT blocks (i.e. on LLC |
438 | * system agents we cannot reproduce this behaviour, until Cannonlake |
439 | * that was!). |
440 | */ |
441 | |
442 | wmb(); |
443 | |
444 | if (INTEL_INFO(gt->i915)->has_coherent_ggtt) |
445 | return; |
446 | |
447 | intel_gt_chipset_flush(gt); |
448 | |
449 | with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) { |
450 | unsigned long flags; |
451 | |
452 | spin_lock_irqsave(&uncore->lock, flags); |
453 | intel_uncore_posting_read_fw(uncore, |
454 | RING_TAIL(RENDER_RING_BASE)); |
455 | spin_unlock_irqrestore(lock: &uncore->lock, flags); |
456 | } |
457 | } |
458 | |
459 | void intel_gt_chipset_flush(struct intel_gt *gt) |
460 | { |
461 | wmb(); |
462 | if (GRAPHICS_VER(gt->i915) < 6) |
463 | intel_ggtt_gmch_flush(); |
464 | } |
465 | |
466 | void intel_gt_driver_register(struct intel_gt *gt) |
467 | { |
468 | intel_gsc_init(gsc: >->gsc, i915: gt->i915); |
469 | |
470 | intel_rps_driver_register(rps: >->rps); |
471 | |
472 | intel_gt_debugfs_register(gt); |
473 | intel_gt_sysfs_register(gt); |
474 | } |
475 | |
476 | static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) |
477 | { |
478 | struct drm_i915_private *i915 = gt->i915; |
479 | struct drm_i915_gem_object *obj; |
480 | struct i915_vma *vma; |
481 | int ret; |
482 | |
483 | obj = i915_gem_object_create_lmem(i915, size, |
484 | I915_BO_ALLOC_VOLATILE | |
485 | I915_BO_ALLOC_GPU_ONLY); |
486 | if (IS_ERR(ptr: obj) && !IS_METEORLAKE(i915)) /* Wa_22018444074 */ |
487 | obj = i915_gem_object_create_stolen(dev_priv: i915, size); |
488 | if (IS_ERR(ptr: obj)) |
489 | obj = i915_gem_object_create_internal(i915, size); |
490 | if (IS_ERR(ptr: obj)) { |
491 | gt_err(gt, "Failed to allocate scratch page\n" ); |
492 | return PTR_ERR(ptr: obj); |
493 | } |
494 | |
495 | vma = i915_vma_instance(obj, vm: >->ggtt->vm, NULL); |
496 | if (IS_ERR(ptr: vma)) { |
497 | ret = PTR_ERR(ptr: vma); |
498 | goto err_unref; |
499 | } |
500 | |
501 | ret = i915_ggtt_pin(vma, NULL, align: 0, PIN_HIGH); |
502 | if (ret) |
503 | goto err_unref; |
504 | |
505 | gt->scratch = i915_vma_make_unshrinkable(vma); |
506 | |
507 | return 0; |
508 | |
509 | err_unref: |
510 | i915_gem_object_put(obj); |
511 | return ret; |
512 | } |
513 | |
514 | static void intel_gt_fini_scratch(struct intel_gt *gt) |
515 | { |
516 | i915_vma_unpin_and_release(p_vma: >->scratch, flags: 0); |
517 | } |
518 | |
519 | static struct i915_address_space *kernel_vm(struct intel_gt *gt) |
520 | { |
521 | if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING) |
522 | return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm; |
523 | else |
524 | return i915_vm_get(vm: >->ggtt->vm); |
525 | } |
526 | |
527 | static int __engines_record_defaults(struct intel_gt *gt) |
528 | { |
529 | struct i915_request *requests[I915_NUM_ENGINES] = {}; |
530 | struct intel_engine_cs *engine; |
531 | enum intel_engine_id id; |
532 | int err = 0; |
533 | |
534 | /* |
535 | * As we reset the gpu during very early sanitisation, the current |
536 | * register state on the GPU should reflect its defaults values. |
537 | * We load a context onto the hw (with restore-inhibit), then switch |
538 | * over to a second context to save that default register state. We |
539 | * can then prime every new context with that state so they all start |
540 | * from the same default HW values. |
541 | */ |
542 | |
543 | for_each_engine(engine, gt, id) { |
544 | struct intel_renderstate so; |
545 | struct intel_context *ce; |
546 | struct i915_request *rq; |
547 | |
548 | /* We must be able to switch to something! */ |
549 | GEM_BUG_ON(!engine->kernel_context); |
550 | |
551 | ce = intel_context_create(engine); |
552 | if (IS_ERR(ptr: ce)) { |
553 | err = PTR_ERR(ptr: ce); |
554 | goto out; |
555 | } |
556 | |
557 | err = intel_renderstate_init(so: &so, ce); |
558 | if (err) |
559 | goto err; |
560 | |
561 | rq = i915_request_create(ce); |
562 | if (IS_ERR(ptr: rq)) { |
563 | err = PTR_ERR(ptr: rq); |
564 | goto err_fini; |
565 | } |
566 | |
567 | err = intel_engine_emit_ctx_wa(rq); |
568 | if (err) |
569 | goto err_rq; |
570 | |
571 | err = intel_renderstate_emit(so: &so, rq); |
572 | if (err) |
573 | goto err_rq; |
574 | |
575 | err_rq: |
576 | requests[id] = i915_request_get(rq); |
577 | i915_request_add(rq); |
578 | err_fini: |
579 | intel_renderstate_fini(so: &so, ce); |
580 | err: |
581 | if (err) { |
582 | intel_context_put(ce); |
583 | goto out; |
584 | } |
585 | } |
586 | |
587 | /* Flush the default context image to memory, and enable powersaving. */ |
588 | if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { |
589 | err = -EIO; |
590 | goto out; |
591 | } |
592 | |
593 | for (id = 0; id < ARRAY_SIZE(requests); id++) { |
594 | struct i915_request *rq; |
595 | struct file *state; |
596 | |
597 | rq = requests[id]; |
598 | if (!rq) |
599 | continue; |
600 | |
601 | if (rq->fence.error) { |
602 | err = -EIO; |
603 | goto out; |
604 | } |
605 | |
606 | GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags)); |
607 | if (!rq->context->state) |
608 | continue; |
609 | |
610 | /* Keep a copy of the state's backing pages; free the obj */ |
611 | state = shmem_create_from_object(obj: rq->context->state->obj); |
612 | if (IS_ERR(ptr: state)) { |
613 | err = PTR_ERR(ptr: state); |
614 | goto out; |
615 | } |
616 | rq->engine->default_state = state; |
617 | } |
618 | |
619 | out: |
620 | /* |
621 | * If we have to abandon now, we expect the engines to be idle |
622 | * and ready to be torn-down. The quickest way we can accomplish |
623 | * this is by declaring ourselves wedged. |
624 | */ |
625 | if (err) |
626 | intel_gt_set_wedged(gt); |
627 | |
628 | for (id = 0; id < ARRAY_SIZE(requests); id++) { |
629 | struct intel_context *ce; |
630 | struct i915_request *rq; |
631 | |
632 | rq = requests[id]; |
633 | if (!rq) |
634 | continue; |
635 | |
636 | ce = rq->context; |
637 | i915_request_put(rq); |
638 | intel_context_put(ce); |
639 | } |
640 | return err; |
641 | } |
642 | |
643 | static int __engines_verify_workarounds(struct intel_gt *gt) |
644 | { |
645 | struct intel_engine_cs *engine; |
646 | enum intel_engine_id id; |
647 | int err = 0; |
648 | |
649 | if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) |
650 | return 0; |
651 | |
652 | for_each_engine(engine, gt, id) { |
653 | if (intel_engine_verify_workarounds(engine, from: "load" )) |
654 | err = -EIO; |
655 | } |
656 | |
657 | /* Flush and restore the kernel context for safety */ |
658 | if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) |
659 | err = -EIO; |
660 | |
661 | return err; |
662 | } |
663 | |
664 | static void __intel_gt_disable(struct intel_gt *gt) |
665 | { |
666 | intel_gt_set_wedged_on_fini(gt); |
667 | |
668 | intel_gt_suspend_prepare(gt); |
669 | intel_gt_suspend_late(gt); |
670 | |
671 | GEM_BUG_ON(intel_gt_pm_is_awake(gt)); |
672 | } |
673 | |
674 | int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) |
675 | { |
676 | long remaining_timeout; |
677 | |
678 | /* If the device is asleep, we have no requests outstanding */ |
679 | if (!intel_gt_pm_is_awake(gt)) |
680 | return 0; |
681 | |
682 | while ((timeout = intel_gt_retire_requests_timeout(gt, timeout, |
683 | remaining_timeout: &remaining_timeout)) > 0) { |
684 | cond_resched(); |
685 | if (signal_pending(current)) |
686 | return -EINTR; |
687 | } |
688 | |
689 | if (timeout) |
690 | return timeout; |
691 | |
692 | if (remaining_timeout < 0) |
693 | remaining_timeout = 0; |
694 | |
695 | return intel_uc_wait_for_idle(uc: >->uc, timeout: remaining_timeout); |
696 | } |
697 | |
698 | int intel_gt_init(struct intel_gt *gt) |
699 | { |
700 | int err; |
701 | |
702 | err = i915_inject_probe_error(gt->i915, -ENODEV); |
703 | if (err) |
704 | return err; |
705 | |
706 | intel_gt_init_workarounds(gt); |
707 | |
708 | /* |
709 | * This is just a security blanket to placate dragons. |
710 | * On some systems, we very sporadically observe that the first TLBs |
711 | * used by the CS may be stale, despite us poking the TLB reset. If |
712 | * we hold the forcewake during initialisation these problems |
713 | * just magically go away. |
714 | */ |
715 | intel_uncore_forcewake_get(uncore: gt->uncore, domains: FORCEWAKE_ALL); |
716 | |
717 | err = intel_gt_init_scratch(gt, |
718 | GRAPHICS_VER(gt->i915) == 2 ? SZ_256K : SZ_4K); |
719 | if (err) |
720 | goto out_fw; |
721 | |
722 | intel_gt_pm_init(gt); |
723 | |
724 | gt->vm = kernel_vm(gt); |
725 | if (!gt->vm) { |
726 | err = -ENOMEM; |
727 | goto err_pm; |
728 | } |
729 | |
730 | intel_set_mocs_index(gt); |
731 | |
732 | err = intel_engines_init(gt); |
733 | if (err) |
734 | goto err_engines; |
735 | |
736 | err = intel_uc_init(uc: >->uc); |
737 | if (err) |
738 | goto err_engines; |
739 | |
740 | err = intel_gt_resume(gt); |
741 | if (err) |
742 | goto err_uc_init; |
743 | |
744 | err = intel_gt_init_hwconfig(gt); |
745 | if (err) |
746 | gt_err(gt, "Failed to retrieve hwconfig table: %pe\n" , ERR_PTR(err)); |
747 | |
748 | err = __engines_record_defaults(gt); |
749 | if (err) |
750 | goto err_gt; |
751 | |
752 | err = __engines_verify_workarounds(gt); |
753 | if (err) |
754 | goto err_gt; |
755 | |
756 | err = i915_inject_probe_error(gt->i915, -EIO); |
757 | if (err) |
758 | goto err_gt; |
759 | |
760 | intel_uc_init_late(uc: >->uc); |
761 | |
762 | intel_migrate_init(m: >->migrate, gt); |
763 | |
764 | goto out_fw; |
765 | err_gt: |
766 | __intel_gt_disable(gt); |
767 | intel_uc_fini_hw(uc: >->uc); |
768 | err_uc_init: |
769 | intel_uc_fini(uc: >->uc); |
770 | err_engines: |
771 | intel_engines_release(gt); |
772 | i915_vm_put(fetch_and_zero(>->vm)); |
773 | err_pm: |
774 | intel_gt_pm_fini(gt); |
775 | intel_gt_fini_scratch(gt); |
776 | out_fw: |
777 | if (err) |
778 | intel_gt_set_wedged_on_init(gt); |
779 | intel_uncore_forcewake_put(uncore: gt->uncore, domains: FORCEWAKE_ALL); |
780 | return err; |
781 | } |
782 | |
783 | void intel_gt_driver_remove(struct intel_gt *gt) |
784 | { |
785 | __intel_gt_disable(gt); |
786 | |
787 | intel_migrate_fini(m: >->migrate); |
788 | intel_uc_driver_remove(uc: >->uc); |
789 | |
790 | intel_engines_release(gt); |
791 | |
792 | intel_gt_flush_buffer_pool(gt); |
793 | } |
794 | |
795 | void intel_gt_driver_unregister(struct intel_gt *gt) |
796 | { |
797 | intel_wakeref_t wakeref; |
798 | |
799 | intel_gt_sysfs_unregister(gt); |
800 | intel_rps_driver_unregister(rps: >->rps); |
801 | intel_gsc_fini(gsc: >->gsc); |
802 | |
803 | /* |
804 | * If we unload the driver and wedge before the GSC worker is complete, |
805 | * the worker will hit an error on its submission to the GSC engine and |
806 | * then exit. This is hard to hit for a user, but it is reproducible |
807 | * with skipping selftests. The error is handled gracefully by the |
808 | * worker, so there are no functional issues, but we still end up with |
809 | * an error message in dmesg, which is something we want to avoid as |
810 | * this is a supported scenario. We could modify the worker to better |
811 | * handle a wedging occurring during its execution, but that gets |
812 | * complicated for a couple of reasons: |
813 | * - We do want the error on runtime wedging, because there are |
814 | * implications for subsystems outside of GT (i.e., PXP, HDCP), it's |
815 | * only the error on driver unload that we want to silence. |
816 | * - The worker is responsible for multiple submissions (GSC FW load, |
817 | * HuC auth, SW proxy), so all of those will have to be adapted to |
818 | * handle the wedged_on_fini scenario. |
819 | * Therefore, it's much simpler to just wait for the worker to be done |
820 | * before wedging on driver removal, also considering that the worker |
821 | * will likely already be idle in the great majority of non-selftest |
822 | * scenarios. |
823 | */ |
824 | intel_gsc_uc_flush_work(gsc: >->uc.gsc); |
825 | |
826 | /* |
827 | * Upon unregistering the device to prevent any new users, cancel |
828 | * all in-flight requests so that we can quickly unbind the active |
829 | * resources. |
830 | */ |
831 | intel_gt_set_wedged_on_fini(gt); |
832 | |
833 | /* Scrub all HW state upon release */ |
834 | with_intel_runtime_pm(gt->uncore->rpm, wakeref) |
835 | __intel_gt_reset(gt, ALL_ENGINES); |
836 | } |
837 | |
838 | void intel_gt_driver_release(struct intel_gt *gt) |
839 | { |
840 | struct i915_address_space *vm; |
841 | |
842 | vm = fetch_and_zero(>->vm); |
843 | if (vm) /* FIXME being called twice on error paths :( */ |
844 | i915_vm_put(vm); |
845 | |
846 | intel_wa_list_free(wal: >->wa_list); |
847 | intel_gt_pm_fini(gt); |
848 | intel_gt_fini_scratch(gt); |
849 | intel_gt_fini_buffer_pool(gt); |
850 | intel_gt_fini_hwconfig(gt); |
851 | } |
852 | |
853 | void intel_gt_driver_late_release_all(struct drm_i915_private *i915) |
854 | { |
855 | struct intel_gt *gt; |
856 | unsigned int id; |
857 | |
858 | /* We need to wait for inflight RCU frees to release their grip */ |
859 | rcu_barrier(); |
860 | |
861 | for_each_gt(gt, i915, id) { |
862 | intel_uc_driver_late_release(uc: >->uc); |
863 | intel_gt_fini_requests(gt); |
864 | intel_gt_fini_reset(gt); |
865 | intel_gt_fini_timelines(gt); |
866 | intel_gt_fini_tlb(gt); |
867 | intel_engines_free(gt); |
868 | } |
869 | } |
870 | |
871 | static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) |
872 | { |
873 | int ret; |
874 | |
875 | if (!gt_is_root(gt)) { |
876 | struct intel_uncore *uncore; |
877 | spinlock_t *irq_lock; |
878 | |
879 | uncore = drmm_kzalloc(dev: >->i915->drm, size: sizeof(*uncore), GFP_KERNEL); |
880 | if (!uncore) |
881 | return -ENOMEM; |
882 | |
883 | irq_lock = drmm_kzalloc(dev: >->i915->drm, size: sizeof(*irq_lock), GFP_KERNEL); |
884 | if (!irq_lock) |
885 | return -ENOMEM; |
886 | |
887 | gt->uncore = uncore; |
888 | gt->irq_lock = irq_lock; |
889 | |
890 | intel_gt_common_init_early(gt); |
891 | } |
892 | |
893 | intel_uncore_init_early(uncore: gt->uncore, gt); |
894 | |
895 | ret = intel_uncore_setup_mmio(uncore: gt->uncore, phys_addr); |
896 | if (ret) |
897 | return ret; |
898 | |
899 | gt->phys_addr = phys_addr; |
900 | |
901 | return 0; |
902 | } |
903 | |
904 | int intel_gt_probe_all(struct drm_i915_private *i915) |
905 | { |
906 | struct pci_dev *pdev = to_pci_dev(i915->drm.dev); |
907 | struct intel_gt *gt = to_gt(i915); |
908 | const struct intel_gt_definition *gtdef; |
909 | phys_addr_t phys_addr; |
910 | unsigned int mmio_bar; |
911 | unsigned int i; |
912 | int ret; |
913 | |
914 | mmio_bar = intel_mmio_bar(GRAPHICS_VER(i915)); |
915 | phys_addr = pci_resource_start(pdev, mmio_bar); |
916 | |
917 | /* |
918 | * We always have at least one primary GT on any device |
919 | * and it has been already initialized early during probe |
920 | * in i915_driver_probe() |
921 | */ |
922 | gt->i915 = i915; |
923 | gt->name = "Primary GT" ; |
924 | gt->info.engine_mask = INTEL_INFO(i915)->platform_engine_mask; |
925 | |
926 | gt_dbg(gt, "Setting up %s\n" , gt->name); |
927 | ret = intel_gt_tile_setup(gt, phys_addr); |
928 | if (ret) |
929 | return ret; |
930 | |
931 | if (!HAS_EXTRA_GT_LIST(i915)) |
932 | return 0; |
933 | |
934 | for (i = 1, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]; |
935 | gtdef->name != NULL; |
936 | i++, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]) { |
937 | gt = drmm_kzalloc(dev: &i915->drm, size: sizeof(*gt), GFP_KERNEL); |
938 | if (!gt) { |
939 | ret = -ENOMEM; |
940 | goto err; |
941 | } |
942 | |
943 | gt->i915 = i915; |
944 | gt->name = gtdef->name; |
945 | gt->type = gtdef->type; |
946 | gt->info.engine_mask = gtdef->engine_mask; |
947 | gt->info.id = i; |
948 | |
949 | gt_dbg(gt, "Setting up %s\n" , gt->name); |
950 | if (GEM_WARN_ON(range_overflows_t(resource_size_t, |
951 | gtdef->mapping_base, |
952 | SZ_16M, |
953 | pci_resource_len(pdev, mmio_bar)))) { |
954 | ret = -ENODEV; |
955 | goto err; |
956 | } |
957 | |
958 | switch (gtdef->type) { |
959 | case GT_TILE: |
960 | ret = intel_gt_tile_setup(gt, phys_addr: phys_addr + gtdef->mapping_base); |
961 | break; |
962 | |
963 | case GT_MEDIA: |
964 | ret = intel_sa_mediagt_setup(gt, phys_addr: phys_addr + gtdef->mapping_base, |
965 | gsi_offset: gtdef->gsi_offset); |
966 | break; |
967 | |
968 | case GT_PRIMARY: |
969 | /* Primary GT should not appear in extra GT list */ |
970 | default: |
971 | MISSING_CASE(gtdef->type); |
972 | ret = -ENODEV; |
973 | } |
974 | |
975 | if (ret) |
976 | goto err; |
977 | |
978 | i915->gt[i] = gt; |
979 | } |
980 | |
981 | return 0; |
982 | |
983 | err: |
984 | i915_probe_error(i915, "Failed to initialize %s! (%d)\n" , gtdef->name, ret); |
985 | return ret; |
986 | } |
987 | |
988 | int intel_gt_tiles_init(struct drm_i915_private *i915) |
989 | { |
990 | struct intel_gt *gt; |
991 | unsigned int id; |
992 | int ret; |
993 | |
994 | for_each_gt(gt, i915, id) { |
995 | ret = intel_gt_probe_lmem(gt); |
996 | if (ret) |
997 | return ret; |
998 | } |
999 | |
1000 | return 0; |
1001 | } |
1002 | |
1003 | void intel_gt_info_print(const struct intel_gt_info *info, |
1004 | struct drm_printer *p) |
1005 | { |
1006 | drm_printf(p, f: "available engines: %x\n" , info->engine_mask); |
1007 | |
1008 | intel_sseu_dump(sseu: &info->sseu, p); |
1009 | } |
1010 | |
1011 | enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt, |
1012 | struct drm_i915_gem_object *obj, |
1013 | bool always_coherent) |
1014 | { |
1015 | /* |
1016 | * Wa_22016122933: always return I915_MAP_WC for Media |
1017 | * version 13.0 when the object is on the Media GT |
1018 | */ |
1019 | if (i915_gem_object_is_lmem(obj) || intel_gt_needs_wa_22016122933(gt)) |
1020 | return I915_MAP_WC; |
1021 | if (HAS_LLC(gt->i915) || always_coherent) |
1022 | return I915_MAP_WB; |
1023 | else |
1024 | return I915_MAP_WC; |
1025 | } |
1026 | |
1027 | bool intel_gt_needs_wa_16018031267(struct intel_gt *gt) |
1028 | { |
1029 | /* Wa_16018031267, Wa_16018063123 */ |
1030 | return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71)); |
1031 | } |
1032 | |
1033 | bool intel_gt_needs_wa_22016122933(struct intel_gt *gt) |
1034 | { |
1035 | return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA; |
1036 | } |
1037 | |
1038 | static void __intel_gt_bind_context_set_ready(struct intel_gt *gt, bool ready) |
1039 | { |
1040 | struct intel_engine_cs *engine = gt->engine[BCS0]; |
1041 | |
1042 | if (engine && engine->bind_context) |
1043 | engine->bind_context_ready = ready; |
1044 | } |
1045 | |
1046 | /** |
1047 | * intel_gt_bind_context_set_ready - Set the context binding as ready |
1048 | * |
1049 | * @gt: GT structure |
1050 | * |
1051 | * This function marks the binder context as ready. |
1052 | */ |
1053 | void intel_gt_bind_context_set_ready(struct intel_gt *gt) |
1054 | { |
1055 | __intel_gt_bind_context_set_ready(gt, ready: true); |
1056 | } |
1057 | |
1058 | /** |
1059 | * intel_gt_bind_context_set_unready - Set the context binding as ready |
1060 | * @gt: GT structure |
1061 | * |
1062 | * This function marks the binder context as not ready. |
1063 | */ |
1064 | |
1065 | void intel_gt_bind_context_set_unready(struct intel_gt *gt) |
1066 | { |
1067 | __intel_gt_bind_context_set_ready(gt, ready: false); |
1068 | } |
1069 | |
1070 | /** |
1071 | * intel_gt_is_bind_context_ready - Check if context binding is ready |
1072 | * |
1073 | * @gt: GT structure |
1074 | * |
1075 | * This function returns binder context's ready status. |
1076 | */ |
1077 | bool intel_gt_is_bind_context_ready(struct intel_gt *gt) |
1078 | { |
1079 | struct intel_engine_cs *engine = gt->engine[BCS0]; |
1080 | |
1081 | if (engine) |
1082 | return engine->bind_context_ready; |
1083 | |
1084 | return false; |
1085 | } |
1086 | |