1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2023 Intel Corporation |
4 | */ |
5 | |
6 | #include "i915_drv.h" |
7 | #include "i915_perf_oa_regs.h" |
8 | #include "intel_engine_pm.h" |
9 | #include "intel_gt.h" |
10 | #include "intel_gt_mcr.h" |
11 | #include "intel_gt_pm.h" |
12 | #include "intel_gt_print.h" |
13 | #include "intel_gt_regs.h" |
14 | #include "intel_tlb.h" |
15 | #include "uc/intel_guc.h" |
16 | |
17 | /* |
18 | * HW architecture suggest typical invalidation time at 40us, |
19 | * with pessimistic cases up to 100us and a recommendation to |
20 | * cap at 1ms. We go a bit higher just in case. |
21 | */ |
22 | #define TLB_INVAL_TIMEOUT_US 100 |
23 | #define TLB_INVAL_TIMEOUT_MS 4 |
24 | |
25 | /* |
26 | * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets |
27 | * but are now considered MCR registers. Since they exist within a GAM range, |
28 | * the primary instance of the register rolls up the status from each unit. |
29 | */ |
30 | static int wait_for_invalidate(struct intel_engine_cs *engine) |
31 | { |
32 | if (engine->tlb_inv.mcr) |
33 | return intel_gt_mcr_wait_for_reg(gt: engine->gt, |
34 | reg: engine->tlb_inv.reg.mcr_reg, |
35 | mask: engine->tlb_inv.done, |
36 | value: 0, |
37 | TLB_INVAL_TIMEOUT_US, |
38 | TLB_INVAL_TIMEOUT_MS); |
39 | else |
40 | return __intel_wait_for_register_fw(uncore: engine->gt->uncore, |
41 | reg: engine->tlb_inv.reg.reg, |
42 | mask: engine->tlb_inv.done, |
43 | value: 0, |
44 | TLB_INVAL_TIMEOUT_US, |
45 | TLB_INVAL_TIMEOUT_MS, |
46 | NULL); |
47 | } |
48 | |
49 | static void mmio_invalidate_full(struct intel_gt *gt) |
50 | { |
51 | struct drm_i915_private *i915 = gt->i915; |
52 | struct intel_uncore *uncore = gt->uncore; |
53 | struct intel_engine_cs *engine; |
54 | intel_engine_mask_t awake, tmp; |
55 | enum intel_engine_id id; |
56 | unsigned long flags; |
57 | |
58 | if (GRAPHICS_VER(i915) < 8) |
59 | return; |
60 | |
61 | intel_uncore_forcewake_get(uncore, domains: FORCEWAKE_ALL); |
62 | |
63 | intel_gt_mcr_lock(gt, flags: &flags); |
64 | spin_lock(lock: &uncore->lock); /* serialise invalidate with GT reset */ |
65 | |
66 | awake = 0; |
67 | for_each_engine(engine, gt, id) { |
68 | if (!intel_engine_pm_is_awake(engine)) |
69 | continue; |
70 | |
71 | if (engine->tlb_inv.mcr) |
72 | intel_gt_mcr_multicast_write_fw(gt, |
73 | reg: engine->tlb_inv.reg.mcr_reg, |
74 | value: engine->tlb_inv.request); |
75 | else |
76 | intel_uncore_write_fw(uncore, |
77 | engine->tlb_inv.reg.reg, |
78 | engine->tlb_inv.request); |
79 | |
80 | awake |= engine->mask; |
81 | } |
82 | |
83 | GT_TRACE(gt, "invalidated engines %08x\n" , awake); |
84 | |
85 | /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */ |
86 | if (awake && |
87 | (IS_TIGERLAKE(i915) || |
88 | IS_DG1(i915) || |
89 | IS_ROCKETLAKE(i915) || |
90 | IS_ALDERLAKE_S(i915) || |
91 | IS_ALDERLAKE_P(i915))) |
92 | intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1); |
93 | |
94 | spin_unlock(lock: &uncore->lock); |
95 | intel_gt_mcr_unlock(gt, flags); |
96 | |
97 | for_each_engine_masked(engine, gt, awake, tmp) { |
98 | if (wait_for_invalidate(engine)) |
99 | gt_err_ratelimited(gt, |
100 | "%s TLB invalidation did not complete in %ums!\n" , |
101 | engine->name, TLB_INVAL_TIMEOUT_MS); |
102 | } |
103 | |
104 | /* |
105 | * Use delayed put since a) we mostly expect a flurry of TLB |
106 | * invalidations so it is good to avoid paying the forcewake cost and |
107 | * b) it works around a bug in Icelake which cannot cope with too rapid |
108 | * transitions. |
109 | */ |
110 | intel_uncore_forcewake_put_delayed(uncore, domains: FORCEWAKE_ALL); |
111 | } |
112 | |
113 | static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno) |
114 | { |
115 | u32 cur = intel_gt_tlb_seqno(gt); |
116 | |
117 | /* Only skip if a *full* TLB invalidate barrier has passed */ |
118 | return (s32)(cur - ALIGN(seqno, 2)) > 0; |
119 | } |
120 | |
121 | void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno) |
122 | { |
123 | intel_wakeref_t wakeref; |
124 | |
125 | if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) |
126 | return; |
127 | |
128 | if (intel_gt_is_wedged(gt)) |
129 | return; |
130 | |
131 | if (tlb_seqno_passed(gt, seqno)) |
132 | return; |
133 | |
134 | with_intel_gt_pm_if_awake(gt, wakeref) { |
135 | struct intel_guc *guc = >->uc.guc; |
136 | |
137 | mutex_lock(>->tlb.invalidate_lock); |
138 | if (tlb_seqno_passed(gt, seqno)) |
139 | goto unlock; |
140 | |
141 | if (HAS_GUC_TLB_INVALIDATION(gt->i915)) { |
142 | /* |
143 | * Only perform GuC TLB invalidation if GuC is ready. |
144 | * The only time GuC could not be ready is on GT reset, |
145 | * which would clobber all the TLBs anyways, making |
146 | * any TLB invalidation path here unnecessary. |
147 | */ |
148 | if (intel_guc_is_ready(guc)) |
149 | intel_guc_invalidate_tlb_engines(guc); |
150 | } else { |
151 | mmio_invalidate_full(gt); |
152 | } |
153 | |
154 | write_seqcount_invalidate(>->tlb.seqno); |
155 | unlock: |
156 | mutex_unlock(lock: >->tlb.invalidate_lock); |
157 | } |
158 | } |
159 | |
160 | void intel_gt_init_tlb(struct intel_gt *gt) |
161 | { |
162 | mutex_init(>->tlb.invalidate_lock); |
163 | seqcount_mutex_init(>->tlb.seqno, >->tlb.invalidate_lock); |
164 | } |
165 | |
166 | void intel_gt_fini_tlb(struct intel_gt *gt) |
167 | { |
168 | mutex_destroy(lock: >->tlb.invalidate_lock); |
169 | } |
170 | |
171 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
172 | #include "selftest_tlb.c" |
173 | #endif |
174 | |