1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2020 Intel Corporation |
4 | */ |
5 | |
6 | #include "gen2_engine_cs.h" |
7 | #include "i915_drv.h" |
8 | #include "i915_reg.h" |
9 | #include "intel_engine.h" |
10 | #include "intel_engine_regs.h" |
11 | #include "intel_gpu_commands.h" |
12 | #include "intel_gt.h" |
13 | #include "intel_gt_irq.h" |
14 | #include "intel_ring.h" |
15 | |
16 | int gen2_emit_flush(struct i915_request *rq, u32 mode) |
17 | { |
18 | unsigned int num_store_dw = 12; |
19 | u32 cmd, *cs; |
20 | |
21 | cmd = MI_FLUSH; |
22 | if (mode & EMIT_INVALIDATE) |
23 | cmd |= MI_READ_FLUSH; |
24 | |
25 | cs = intel_ring_begin(rq, num_dwords: 2 + 4 * num_store_dw); |
26 | if (IS_ERR(ptr: cs)) |
27 | return PTR_ERR(ptr: cs); |
28 | |
29 | *cs++ = cmd; |
30 | while (num_store_dw--) { |
31 | *cs++ = MI_STORE_DWORD_INDEX; |
32 | *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32); |
33 | *cs++ = 0; |
34 | *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; |
35 | } |
36 | *cs++ = cmd; |
37 | |
38 | intel_ring_advance(rq, cs); |
39 | |
40 | return 0; |
41 | } |
42 | |
43 | int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode) |
44 | { |
45 | u32 cmd, *cs; |
46 | int i; |
47 | |
48 | /* |
49 | * read/write caches: |
50 | * |
51 | * I915_GEM_DOMAIN_RENDER is always invalidated, but is |
52 | * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is |
53 | * also flushed at 2d versus 3d pipeline switches. |
54 | * |
55 | * read-only caches: |
56 | * |
57 | * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if |
58 | * MI_READ_FLUSH is set, and is always flushed on 965. |
59 | * |
60 | * I915_GEM_DOMAIN_COMMAND may not exist? |
61 | * |
62 | * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is |
63 | * invalidated when MI_EXE_FLUSH is set. |
64 | * |
65 | * I915_GEM_DOMAIN_VERTEX, which exists on 965, is |
66 | * invalidated with every MI_FLUSH. |
67 | * |
68 | * TLBs: |
69 | * |
70 | * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND |
71 | * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and |
72 | * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER |
73 | * are flushed at any MI_FLUSH. |
74 | */ |
75 | |
76 | cmd = MI_FLUSH; |
77 | if (mode & EMIT_INVALIDATE) { |
78 | cmd |= MI_EXE_FLUSH; |
79 | if (IS_G4X(rq->i915) || GRAPHICS_VER(rq->i915) == 5) |
80 | cmd |= MI_INVALIDATE_ISP; |
81 | } |
82 | |
83 | i = 2; |
84 | if (mode & EMIT_INVALIDATE) |
85 | i += 20; |
86 | |
87 | cs = intel_ring_begin(rq, num_dwords: i); |
88 | if (IS_ERR(ptr: cs)) |
89 | return PTR_ERR(ptr: cs); |
90 | |
91 | *cs++ = cmd; |
92 | |
93 | /* |
94 | * A random delay to let the CS invalidate take effect? Without this |
95 | * delay, the GPU relocation path fails as the CS does not see |
96 | * the updated contents. Just as important, if we apply the flushes |
97 | * to the EMIT_FLUSH branch (i.e. immediately after the relocation |
98 | * write and before the invalidate on the next batch), the relocations |
99 | * still fail. This implies that is a delay following invalidation |
100 | * that is required to reset the caches as opposed to a delay to |
101 | * ensure the memory is written. |
102 | */ |
103 | if (mode & EMIT_INVALIDATE) { |
104 | *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; |
105 | *cs++ = intel_gt_scratch_offset(gt: rq->engine->gt, |
106 | field: INTEL_GT_SCRATCH_FIELD_DEFAULT) | |
107 | PIPE_CONTROL_GLOBAL_GTT; |
108 | *cs++ = 0; |
109 | *cs++ = 0; |
110 | |
111 | for (i = 0; i < 12; i++) |
112 | *cs++ = MI_FLUSH; |
113 | |
114 | *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; |
115 | *cs++ = intel_gt_scratch_offset(gt: rq->engine->gt, |
116 | field: INTEL_GT_SCRATCH_FIELD_DEFAULT) | |
117 | PIPE_CONTROL_GLOBAL_GTT; |
118 | *cs++ = 0; |
119 | *cs++ = 0; |
120 | } |
121 | |
122 | *cs++ = cmd; |
123 | |
124 | intel_ring_advance(rq, cs); |
125 | |
126 | return 0; |
127 | } |
128 | |
129 | int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode) |
130 | { |
131 | u32 *cs; |
132 | |
133 | cs = intel_ring_begin(rq, num_dwords: 2); |
134 | if (IS_ERR(ptr: cs)) |
135 | return PTR_ERR(ptr: cs); |
136 | |
137 | *cs++ = MI_FLUSH; |
138 | *cs++ = MI_NOOP; |
139 | intel_ring_advance(rq, cs); |
140 | |
141 | return 0; |
142 | } |
143 | |
144 | static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs, |
145 | int flush, int post) |
146 | { |
147 | GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); |
148 | GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR); |
149 | |
150 | *cs++ = MI_FLUSH; |
151 | |
152 | while (flush--) { |
153 | *cs++ = MI_STORE_DWORD_INDEX; |
154 | *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32); |
155 | *cs++ = rq->fence.seqno; |
156 | } |
157 | |
158 | while (post--) { |
159 | *cs++ = MI_STORE_DWORD_INDEX; |
160 | *cs++ = I915_GEM_HWS_SEQNO_ADDR; |
161 | *cs++ = rq->fence.seqno; |
162 | } |
163 | |
164 | *cs++ = MI_USER_INTERRUPT; |
165 | |
166 | rq->tail = intel_ring_offset(rq, addr: cs); |
167 | assert_ring_tail_valid(ring: rq->ring, tail: rq->tail); |
168 | |
169 | return cs; |
170 | } |
171 | |
172 | u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs) |
173 | { |
174 | return __gen2_emit_breadcrumb(rq, cs, flush: 16, post: 8); |
175 | } |
176 | |
177 | u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) |
178 | { |
179 | return __gen2_emit_breadcrumb(rq, cs, flush: 8, post: 8); |
180 | } |
181 | |
182 | /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ |
183 | #define I830_BATCH_LIMIT SZ_256K |
184 | #define I830_TLB_ENTRIES (2) |
185 | #define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT) |
186 | int i830_emit_bb_start(struct i915_request *rq, |
187 | u64 offset, u32 len, |
188 | unsigned int dispatch_flags) |
189 | { |
190 | u32 *cs, cs_offset = |
191 | intel_gt_scratch_offset(gt: rq->engine->gt, |
192 | field: INTEL_GT_SCRATCH_FIELD_DEFAULT); |
193 | |
194 | GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE); |
195 | |
196 | cs = intel_ring_begin(rq, num_dwords: 6); |
197 | if (IS_ERR(ptr: cs)) |
198 | return PTR_ERR(ptr: cs); |
199 | |
200 | /* Evict the invalid PTE TLBs */ |
201 | *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; |
202 | *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; |
203 | *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ |
204 | *cs++ = cs_offset; |
205 | *cs++ = 0xdeadbeef; |
206 | *cs++ = MI_NOOP; |
207 | intel_ring_advance(rq, cs); |
208 | |
209 | if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { |
210 | if (len > I830_BATCH_LIMIT) |
211 | return -ENOSPC; |
212 | |
213 | cs = intel_ring_begin(rq, num_dwords: 6 + 2); |
214 | if (IS_ERR(ptr: cs)) |
215 | return PTR_ERR(ptr: cs); |
216 | |
217 | /* |
218 | * Blit the batch (which has now all relocs applied) to the |
219 | * stable batch scratch bo area (so that the CS never |
220 | * stumbles over its tlb invalidation bug) ... |
221 | */ |
222 | *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); |
223 | *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; |
224 | *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; |
225 | *cs++ = cs_offset; |
226 | *cs++ = 4096; |
227 | *cs++ = offset; |
228 | |
229 | *cs++ = MI_FLUSH; |
230 | *cs++ = MI_NOOP; |
231 | intel_ring_advance(rq, cs); |
232 | |
233 | /* ... and execute it. */ |
234 | offset = cs_offset; |
235 | } |
236 | |
237 | if (!(dispatch_flags & I915_DISPATCH_SECURE)) |
238 | offset |= MI_BATCH_NON_SECURE; |
239 | |
240 | cs = intel_ring_begin(rq, num_dwords: 2); |
241 | if (IS_ERR(ptr: cs)) |
242 | return PTR_ERR(ptr: cs); |
243 | |
244 | *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; |
245 | *cs++ = offset; |
246 | intel_ring_advance(rq, cs); |
247 | |
248 | return 0; |
249 | } |
250 | |
251 | int gen3_emit_bb_start(struct i915_request *rq, |
252 | u64 offset, u32 len, |
253 | unsigned int dispatch_flags) |
254 | { |
255 | u32 *cs; |
256 | |
257 | if (!(dispatch_flags & I915_DISPATCH_SECURE)) |
258 | offset |= MI_BATCH_NON_SECURE; |
259 | |
260 | cs = intel_ring_begin(rq, num_dwords: 2); |
261 | if (IS_ERR(ptr: cs)) |
262 | return PTR_ERR(ptr: cs); |
263 | |
264 | *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; |
265 | *cs++ = offset; |
266 | intel_ring_advance(rq, cs); |
267 | |
268 | return 0; |
269 | } |
270 | |
271 | int gen4_emit_bb_start(struct i915_request *rq, |
272 | u64 offset, u32 length, |
273 | unsigned int dispatch_flags) |
274 | { |
275 | u32 security; |
276 | u32 *cs; |
277 | |
278 | security = MI_BATCH_NON_SECURE_I965; |
279 | if (dispatch_flags & I915_DISPATCH_SECURE) |
280 | security = 0; |
281 | |
282 | cs = intel_ring_begin(rq, num_dwords: 2); |
283 | if (IS_ERR(ptr: cs)) |
284 | return PTR_ERR(ptr: cs); |
285 | |
286 | *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security; |
287 | *cs++ = offset; |
288 | intel_ring_advance(rq, cs); |
289 | |
290 | return 0; |
291 | } |
292 | |
293 | void gen2_irq_enable(struct intel_engine_cs *engine) |
294 | { |
295 | struct drm_i915_private *i915 = engine->i915; |
296 | |
297 | i915->irq_mask &= ~engine->irq_enable_mask; |
298 | intel_uncore_write16(uncore: &i915->uncore, GEN2_IMR, val: i915->irq_mask); |
299 | ENGINE_POSTING_READ16(engine, RING_IMR); |
300 | } |
301 | |
302 | void gen2_irq_disable(struct intel_engine_cs *engine) |
303 | { |
304 | struct drm_i915_private *i915 = engine->i915; |
305 | |
306 | i915->irq_mask |= engine->irq_enable_mask; |
307 | intel_uncore_write16(uncore: &i915->uncore, GEN2_IMR, val: i915->irq_mask); |
308 | } |
309 | |
310 | void gen3_irq_enable(struct intel_engine_cs *engine) |
311 | { |
312 | engine->i915->irq_mask &= ~engine->irq_enable_mask; |
313 | intel_uncore_write(uncore: engine->uncore, GEN2_IMR, val: engine->i915->irq_mask); |
314 | intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR); |
315 | } |
316 | |
317 | void gen3_irq_disable(struct intel_engine_cs *engine) |
318 | { |
319 | engine->i915->irq_mask |= engine->irq_enable_mask; |
320 | intel_uncore_write(uncore: engine->uncore, GEN2_IMR, val: engine->i915->irq_mask); |
321 | } |
322 | |
323 | void gen5_irq_enable(struct intel_engine_cs *engine) |
324 | { |
325 | gen5_gt_enable_irq(gt: engine->gt, mask: engine->irq_enable_mask); |
326 | } |
327 | |
328 | void gen5_irq_disable(struct intel_engine_cs *engine) |
329 | { |
330 | gen5_gt_disable_irq(gt: engine->gt, mask: engine->irq_enable_mask); |
331 | } |
332 | |