1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2023 Intel Corporation
4 */
5
6#include "i915_drv.h"
7#include "i915_perf_oa_regs.h"
8#include "intel_engine_pm.h"
9#include "intel_gt.h"
10#include "intel_gt_mcr.h"
11#include "intel_gt_pm.h"
12#include "intel_gt_print.h"
13#include "intel_gt_regs.h"
14#include "intel_tlb.h"
15#include "uc/intel_guc.h"
16
17/*
18 * HW architecture suggest typical invalidation time at 40us,
19 * with pessimistic cases up to 100us and a recommendation to
20 * cap at 1ms. We go a bit higher just in case.
21 */
22#define TLB_INVAL_TIMEOUT_US 100
23#define TLB_INVAL_TIMEOUT_MS 4
24
25/*
26 * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
27 * but are now considered MCR registers. Since they exist within a GAM range,
28 * the primary instance of the register rolls up the status from each unit.
29 */
30static int wait_for_invalidate(struct intel_engine_cs *engine)
31{
32 if (engine->tlb_inv.mcr)
33 return intel_gt_mcr_wait_for_reg(gt: engine->gt,
34 reg: engine->tlb_inv.reg.mcr_reg,
35 mask: engine->tlb_inv.done,
36 value: 0,
37 TLB_INVAL_TIMEOUT_US,
38 TLB_INVAL_TIMEOUT_MS);
39 else
40 return __intel_wait_for_register_fw(uncore: engine->gt->uncore,
41 reg: engine->tlb_inv.reg.reg,
42 mask: engine->tlb_inv.done,
43 value: 0,
44 TLB_INVAL_TIMEOUT_US,
45 TLB_INVAL_TIMEOUT_MS,
46 NULL);
47}
48
49static void mmio_invalidate_full(struct intel_gt *gt)
50{
51 struct drm_i915_private *i915 = gt->i915;
52 struct intel_uncore *uncore = gt->uncore;
53 struct intel_engine_cs *engine;
54 intel_engine_mask_t awake, tmp;
55 enum intel_engine_id id;
56 unsigned long flags;
57
58 if (GRAPHICS_VER(i915) < 8)
59 return;
60
61 intel_uncore_forcewake_get(uncore, domains: FORCEWAKE_ALL);
62
63 intel_gt_mcr_lock(gt, flags: &flags);
64 spin_lock(lock: &uncore->lock); /* serialise invalidate with GT reset */
65
66 awake = 0;
67 for_each_engine(engine, gt, id) {
68 if (!intel_engine_pm_is_awake(engine))
69 continue;
70
71 if (engine->tlb_inv.mcr)
72 intel_gt_mcr_multicast_write_fw(gt,
73 reg: engine->tlb_inv.reg.mcr_reg,
74 value: engine->tlb_inv.request);
75 else
76 intel_uncore_write_fw(uncore,
77 engine->tlb_inv.reg.reg,
78 engine->tlb_inv.request);
79
80 awake |= engine->mask;
81 }
82
83 GT_TRACE(gt, "invalidated engines %08x\n", awake);
84
85 /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
86 if (awake &&
87 (IS_TIGERLAKE(i915) ||
88 IS_DG1(i915) ||
89 IS_ROCKETLAKE(i915) ||
90 IS_ALDERLAKE_S(i915) ||
91 IS_ALDERLAKE_P(i915)))
92 intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
93
94 spin_unlock(lock: &uncore->lock);
95 intel_gt_mcr_unlock(gt, flags);
96
97 for_each_engine_masked(engine, gt, awake, tmp) {
98 if (wait_for_invalidate(engine))
99 gt_err_ratelimited(gt,
100 "%s TLB invalidation did not complete in %ums!\n",
101 engine->name, TLB_INVAL_TIMEOUT_MS);
102 }
103
104 /*
105 * Use delayed put since a) we mostly expect a flurry of TLB
106 * invalidations so it is good to avoid paying the forcewake cost and
107 * b) it works around a bug in Icelake which cannot cope with too rapid
108 * transitions.
109 */
110 intel_uncore_forcewake_put_delayed(uncore, domains: FORCEWAKE_ALL);
111}
112
113static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
114{
115 u32 cur = intel_gt_tlb_seqno(gt);
116
117 /* Only skip if a *full* TLB invalidate barrier has passed */
118 return (s32)(cur - ALIGN(seqno, 2)) > 0;
119}
120
121void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
122{
123 intel_wakeref_t wakeref;
124
125 if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
126 return;
127
128 if (intel_gt_is_wedged(gt))
129 return;
130
131 if (tlb_seqno_passed(gt, seqno))
132 return;
133
134 with_intel_gt_pm_if_awake(gt, wakeref) {
135 struct intel_guc *guc = &gt->uc.guc;
136
137 mutex_lock(&gt->tlb.invalidate_lock);
138 if (tlb_seqno_passed(gt, seqno))
139 goto unlock;
140
141 if (HAS_GUC_TLB_INVALIDATION(gt->i915)) {
142 /*
143 * Only perform GuC TLB invalidation if GuC is ready.
144 * The only time GuC could not be ready is on GT reset,
145 * which would clobber all the TLBs anyways, making
146 * any TLB invalidation path here unnecessary.
147 */
148 if (intel_guc_is_ready(guc))
149 intel_guc_invalidate_tlb_engines(guc);
150 } else {
151 mmio_invalidate_full(gt);
152 }
153
154 write_seqcount_invalidate(&gt->tlb.seqno);
155unlock:
156 mutex_unlock(lock: &gt->tlb.invalidate_lock);
157 }
158}
159
160void intel_gt_init_tlb(struct intel_gt *gt)
161{
162 mutex_init(&gt->tlb.invalidate_lock);
163 seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
164}
165
166void intel_gt_fini_tlb(struct intel_gt *gt)
167{
168 mutex_destroy(lock: &gt->tlb.invalidate_lock);
169}
170
171#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
172#include "selftest_tlb.c"
173#endif
174

source code of linux/drivers/gpu/drm/i915/gt/intel_tlb.c