1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2014 Intel Corporation |
4 | */ |
5 | |
6 | #include "gem/i915_gem_internal.h" |
7 | |
8 | #include "i915_drv.h" |
9 | #include "intel_renderstate.h" |
10 | #include "intel_context.h" |
11 | #include "intel_gpu_commands.h" |
12 | #include "intel_ring.h" |
13 | |
14 | static const struct intel_renderstate_rodata * |
15 | render_state_get_rodata(const struct intel_engine_cs *engine) |
16 | { |
17 | if (engine->class != RENDER_CLASS) |
18 | return NULL; |
19 | |
20 | switch (GRAPHICS_VER(engine->i915)) { |
21 | case 6: |
22 | return &gen6_null_state; |
23 | case 7: |
24 | return &gen7_null_state; |
25 | case 8: |
26 | return &gen8_null_state; |
27 | case 9: |
28 | return &gen9_null_state; |
29 | } |
30 | |
31 | return NULL; |
32 | } |
33 | |
34 | /* |
35 | * Macro to add commands to auxiliary batch. |
36 | * This macro only checks for page overflow before inserting the commands, |
37 | * this is sufficient as the null state generator makes the final batch |
38 | * with two passes to build command and state separately. At this point |
39 | * the size of both are known and it compacts them by relocating the state |
40 | * right after the commands taking care of alignment so we should sufficient |
41 | * space below them for adding new commands. |
42 | */ |
43 | #define OUT_BATCH(batch, i, val) \ |
44 | do { \ |
45 | if ((i) >= PAGE_SIZE / sizeof(u32)) \ |
46 | goto out; \ |
47 | (batch)[(i)++] = (val); \ |
48 | } while (0) |
49 | |
50 | static int render_state_setup(struct intel_renderstate *so, |
51 | struct drm_i915_private *i915) |
52 | { |
53 | const struct intel_renderstate_rodata *rodata = so->rodata; |
54 | unsigned int i = 0, reloc_index = 0; |
55 | int ret = -EINVAL; |
56 | u32 *d; |
57 | |
58 | d = i915_gem_object_pin_map(obj: so->vma->obj, type: I915_MAP_WB); |
59 | if (IS_ERR(ptr: d)) |
60 | return PTR_ERR(ptr: d); |
61 | |
62 | while (i < rodata->batch_items) { |
63 | u32 s = rodata->batch[i]; |
64 | |
65 | if (i * 4 == rodata->reloc[reloc_index]) { |
66 | u64 r = s + i915_vma_offset(vma: so->vma); |
67 | |
68 | s = lower_32_bits(r); |
69 | if (HAS_64BIT_RELOC(i915)) { |
70 | if (i + 1 >= rodata->batch_items || |
71 | rodata->batch[i + 1] != 0) |
72 | goto out; |
73 | |
74 | d[i++] = s; |
75 | s = upper_32_bits(r); |
76 | } |
77 | |
78 | reloc_index++; |
79 | } |
80 | |
81 | d[i++] = s; |
82 | } |
83 | |
84 | if (rodata->reloc[reloc_index] != -1) { |
85 | drm_err(&i915->drm, "only %d relocs resolved\n" , reloc_index); |
86 | goto out; |
87 | } |
88 | |
89 | so->batch_offset = i915_ggtt_offset(vma: so->vma); |
90 | so->batch_size = rodata->batch_items * sizeof(u32); |
91 | |
92 | while (i % CACHELINE_DWORDS) |
93 | OUT_BATCH(d, i, MI_NOOP); |
94 | |
95 | so->aux_offset = i * sizeof(u32); |
96 | |
97 | if (HAS_POOLED_EU(i915)) { |
98 | /* |
99 | * We always program 3x6 pool config but depending upon which |
100 | * subslice is disabled HW drops down to appropriate config |
101 | * shown below. |
102 | * |
103 | * In the below table 2x6 config always refers to |
104 | * fused-down version, native 2x6 is not available and can |
105 | * be ignored |
106 | * |
107 | * SNo subslices config eu pool configuration |
108 | * ----------------------------------------------------------- |
109 | * 1 3 subslices enabled (3x6) - 0x00777000 (9+9) |
110 | * 2 ss0 disabled (2x6) - 0x00777000 (3+9) |
111 | * 3 ss1 disabled (2x6) - 0x00770000 (6+6) |
112 | * 4 ss2 disabled (2x6) - 0x00007000 (9+3) |
113 | */ |
114 | u32 eu_pool_config = 0x00777000; |
115 | |
116 | OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE); |
117 | OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE); |
118 | OUT_BATCH(d, i, eu_pool_config); |
119 | OUT_BATCH(d, i, 0); |
120 | OUT_BATCH(d, i, 0); |
121 | OUT_BATCH(d, i, 0); |
122 | } |
123 | |
124 | OUT_BATCH(d, i, MI_BATCH_BUFFER_END); |
125 | so->aux_size = i * sizeof(u32) - so->aux_offset; |
126 | so->aux_offset += so->batch_offset; |
127 | /* |
128 | * Since we are sending length, we need to strictly conform to |
129 | * all requirements. For Gen2 this must be a multiple of 8. |
130 | */ |
131 | so->aux_size = ALIGN(so->aux_size, 8); |
132 | |
133 | ret = 0; |
134 | out: |
135 | __i915_gem_object_flush_map(obj: so->vma->obj, offset: 0, size: i * sizeof(u32)); |
136 | __i915_gem_object_release_map(obj: so->vma->obj); |
137 | return ret; |
138 | } |
139 | |
140 | #undef OUT_BATCH |
141 | |
142 | int intel_renderstate_init(struct intel_renderstate *so, |
143 | struct intel_context *ce) |
144 | { |
145 | struct intel_engine_cs *engine = ce->engine; |
146 | struct drm_i915_gem_object *obj = NULL; |
147 | int err; |
148 | |
149 | memset(so, 0, sizeof(*so)); |
150 | |
151 | so->rodata = render_state_get_rodata(engine); |
152 | if (so->rodata) { |
153 | if (so->rodata->batch_items * 4 > PAGE_SIZE) |
154 | return -EINVAL; |
155 | |
156 | obj = i915_gem_object_create_internal(i915: engine->i915, PAGE_SIZE); |
157 | if (IS_ERR(ptr: obj)) |
158 | return PTR_ERR(ptr: obj); |
159 | |
160 | so->vma = i915_vma_instance(obj, vm: &engine->gt->ggtt->vm, NULL); |
161 | if (IS_ERR(ptr: so->vma)) { |
162 | err = PTR_ERR(ptr: so->vma); |
163 | goto err_obj; |
164 | } |
165 | } |
166 | |
167 | i915_gem_ww_ctx_init(ctx: &so->ww, intr: true); |
168 | retry: |
169 | err = intel_context_pin_ww(ce, ww: &so->ww); |
170 | if (err) |
171 | goto err_fini; |
172 | |
173 | /* return early if there's nothing to setup */ |
174 | if (!err && !so->rodata) |
175 | return 0; |
176 | |
177 | err = i915_gem_object_lock(obj: so->vma->obj, ww: &so->ww); |
178 | if (err) |
179 | goto err_context; |
180 | |
181 | err = i915_vma_pin_ww(vma: so->vma, ww: &so->ww, size: 0, alignment: 0, PIN_GLOBAL | PIN_HIGH); |
182 | if (err) |
183 | goto err_context; |
184 | |
185 | err = render_state_setup(so, i915: engine->i915); |
186 | if (err) |
187 | goto err_unpin; |
188 | |
189 | return 0; |
190 | |
191 | err_unpin: |
192 | i915_vma_unpin(vma: so->vma); |
193 | err_context: |
194 | intel_context_unpin(ce); |
195 | err_fini: |
196 | if (err == -EDEADLK) { |
197 | err = i915_gem_ww_ctx_backoff(ctx: &so->ww); |
198 | if (!err) |
199 | goto retry; |
200 | } |
201 | i915_gem_ww_ctx_fini(ctx: &so->ww); |
202 | err_obj: |
203 | if (obj) |
204 | i915_gem_object_put(obj); |
205 | so->vma = NULL; |
206 | return err; |
207 | } |
208 | |
209 | int intel_renderstate_emit(struct intel_renderstate *so, |
210 | struct i915_request *rq) |
211 | { |
212 | struct intel_engine_cs *engine = rq->engine; |
213 | int err; |
214 | |
215 | if (!so->vma) |
216 | return 0; |
217 | |
218 | err = i915_vma_move_to_active(vma: so->vma, rq, flags: 0); |
219 | if (err) |
220 | return err; |
221 | |
222 | err = engine->emit_bb_start(rq, |
223 | so->batch_offset, so->batch_size, |
224 | I915_DISPATCH_SECURE); |
225 | if (err) |
226 | return err; |
227 | |
228 | if (so->aux_size > 8) { |
229 | err = engine->emit_bb_start(rq, |
230 | so->aux_offset, so->aux_size, |
231 | I915_DISPATCH_SECURE); |
232 | if (err) |
233 | return err; |
234 | } |
235 | |
236 | return 0; |
237 | } |
238 | |
239 | void intel_renderstate_fini(struct intel_renderstate *so, |
240 | struct intel_context *ce) |
241 | { |
242 | if (so->vma) { |
243 | i915_vma_unpin(vma: so->vma); |
244 | i915_vma_close(vma: so->vma); |
245 | } |
246 | |
247 | intel_context_unpin(ce); |
248 | i915_gem_ww_ctx_fini(ctx: &so->ww); |
249 | |
250 | if (so->vma) |
251 | i915_gem_object_put(obj: so->vma->obj); |
252 | } |
253 | |