1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2019 Intel Corporation |
4 | */ |
5 | |
6 | #include "gem/i915_gem_internal.h" |
7 | #include "gem/i915_gem_lmem.h" |
8 | #include "gem/i915_gem_object.h" |
9 | |
10 | #include "i915_drv.h" |
11 | #include "i915_vma.h" |
12 | #include "intel_engine.h" |
13 | #include "intel_engine_regs.h" |
14 | #include "intel_gpu_commands.h" |
15 | #include "intel_ring.h" |
16 | #include "intel_gt.h" |
17 | #include "intel_timeline.h" |
18 | |
19 | unsigned int intel_ring_update_space(struct intel_ring *ring) |
20 | { |
21 | unsigned int space; |
22 | |
23 | space = __intel_ring_space(head: ring->head, tail: ring->emit, size: ring->size); |
24 | |
25 | ring->space = space; |
26 | return space; |
27 | } |
28 | |
29 | void __intel_ring_pin(struct intel_ring *ring) |
30 | { |
31 | GEM_BUG_ON(!atomic_read(&ring->pin_count)); |
32 | atomic_inc(v: &ring->pin_count); |
33 | } |
34 | |
35 | int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww) |
36 | { |
37 | struct i915_vma *vma = ring->vma; |
38 | unsigned int flags; |
39 | void *addr; |
40 | int ret; |
41 | |
42 | if (atomic_fetch_inc(v: &ring->pin_count)) |
43 | return 0; |
44 | |
45 | /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ |
46 | flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); |
47 | |
48 | if (i915_gem_object_is_stolen(obj: vma->obj)) |
49 | flags |= PIN_MAPPABLE; |
50 | else |
51 | flags |= PIN_HIGH; |
52 | |
53 | ret = i915_ggtt_pin(vma, ww, align: 0, flags); |
54 | if (unlikely(ret)) |
55 | goto err_unpin; |
56 | |
57 | if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) { |
58 | addr = (void __force *)i915_vma_pin_iomap(vma); |
59 | } else { |
60 | int type = intel_gt_coherent_map_type(gt: vma->vm->gt, obj: vma->obj, always_coherent: false); |
61 | |
62 | addr = i915_gem_object_pin_map(obj: vma->obj, type); |
63 | } |
64 | |
65 | if (IS_ERR(ptr: addr)) { |
66 | ret = PTR_ERR(ptr: addr); |
67 | goto err_ring; |
68 | } |
69 | |
70 | i915_vma_make_unshrinkable(vma); |
71 | |
72 | /* Discard any unused bytes beyond that submitted to hw. */ |
73 | intel_ring_reset(ring, tail: ring->emit); |
74 | |
75 | ring->vaddr = addr; |
76 | return 0; |
77 | |
78 | err_ring: |
79 | i915_vma_unpin(vma); |
80 | err_unpin: |
81 | atomic_dec(v: &ring->pin_count); |
82 | return ret; |
83 | } |
84 | |
85 | void intel_ring_reset(struct intel_ring *ring, u32 tail) |
86 | { |
87 | tail = intel_ring_wrap(ring, pos: tail); |
88 | ring->tail = tail; |
89 | ring->head = tail; |
90 | ring->emit = tail; |
91 | intel_ring_update_space(ring); |
92 | } |
93 | |
94 | void intel_ring_unpin(struct intel_ring *ring) |
95 | { |
96 | struct i915_vma *vma = ring->vma; |
97 | |
98 | if (!atomic_dec_and_test(v: &ring->pin_count)) |
99 | return; |
100 | |
101 | i915_vma_unset_ggtt_write(vma); |
102 | if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) |
103 | i915_vma_unpin_iomap(vma); |
104 | else |
105 | i915_gem_object_unpin_map(obj: vma->obj); |
106 | |
107 | i915_vma_make_purgeable(vma); |
108 | i915_vma_unpin(vma); |
109 | } |
110 | |
111 | static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) |
112 | { |
113 | struct i915_address_space *vm = &ggtt->vm; |
114 | struct drm_i915_private *i915 = vm->i915; |
115 | struct drm_i915_gem_object *obj; |
116 | struct i915_vma *vma; |
117 | |
118 | obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE | |
119 | I915_BO_ALLOC_PM_VOLATILE); |
120 | if (IS_ERR(ptr: obj) && i915_ggtt_has_aperture(ggtt) && !HAS_LLC(i915)) |
121 | obj = i915_gem_object_create_stolen(dev_priv: i915, size); |
122 | if (IS_ERR(ptr: obj)) |
123 | obj = i915_gem_object_create_internal(i915, size); |
124 | if (IS_ERR(ptr: obj)) |
125 | return ERR_CAST(ptr: obj); |
126 | |
127 | /* |
128 | * Mark ring buffers as read-only from GPU side (so no stray overwrites) |
129 | * if supported by the platform's GGTT. |
130 | */ |
131 | if (vm->has_read_only) |
132 | i915_gem_object_set_readonly(obj); |
133 | |
134 | vma = i915_vma_instance(obj, vm, NULL); |
135 | if (IS_ERR(ptr: vma)) |
136 | goto err; |
137 | |
138 | return vma; |
139 | |
140 | err: |
141 | i915_gem_object_put(obj); |
142 | return vma; |
143 | } |
144 | |
145 | struct intel_ring * |
146 | intel_engine_create_ring(struct intel_engine_cs *engine, int size) |
147 | { |
148 | struct drm_i915_private *i915 = engine->i915; |
149 | struct intel_ring *ring; |
150 | struct i915_vma *vma; |
151 | |
152 | GEM_BUG_ON(!is_power_of_2(size)); |
153 | GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); |
154 | |
155 | ring = kzalloc(size: sizeof(*ring), GFP_KERNEL); |
156 | if (!ring) |
157 | return ERR_PTR(error: -ENOMEM); |
158 | |
159 | kref_init(kref: &ring->ref); |
160 | ring->size = size; |
161 | ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(size); |
162 | |
163 | /* |
164 | * Workaround an erratum on the i830 which causes a hang if |
165 | * the TAIL pointer points to within the last 2 cachelines |
166 | * of the buffer. |
167 | */ |
168 | ring->effective_size = size; |
169 | if (IS_I830(i915) || IS_I845G(i915)) |
170 | ring->effective_size -= 2 * CACHELINE_BYTES; |
171 | |
172 | intel_ring_update_space(ring); |
173 | |
174 | vma = create_ring_vma(ggtt: engine->gt->ggtt, size); |
175 | if (IS_ERR(ptr: vma)) { |
176 | kfree(objp: ring); |
177 | return ERR_CAST(ptr: vma); |
178 | } |
179 | ring->vma = vma; |
180 | |
181 | return ring; |
182 | } |
183 | |
184 | void intel_ring_free(struct kref *ref) |
185 | { |
186 | struct intel_ring *ring = container_of(ref, typeof(*ring), ref); |
187 | |
188 | i915_vma_put(vma: ring->vma); |
189 | kfree(objp: ring); |
190 | } |
191 | |
192 | static noinline int |
193 | wait_for_space(struct intel_ring *ring, |
194 | struct intel_timeline *tl, |
195 | unsigned int bytes) |
196 | { |
197 | struct i915_request *target; |
198 | long timeout; |
199 | |
200 | if (intel_ring_update_space(ring) >= bytes) |
201 | return 0; |
202 | |
203 | GEM_BUG_ON(list_empty(&tl->requests)); |
204 | list_for_each_entry(target, &tl->requests, link) { |
205 | if (target->ring != ring) |
206 | continue; |
207 | |
208 | /* Would completion of this request free enough space? */ |
209 | if (bytes <= __intel_ring_space(head: target->postfix, |
210 | tail: ring->emit, size: ring->size)) |
211 | break; |
212 | } |
213 | |
214 | if (GEM_WARN_ON(&target->link == &tl->requests)) |
215 | return -ENOSPC; |
216 | |
217 | timeout = i915_request_wait(rq: target, |
218 | I915_WAIT_INTERRUPTIBLE, |
219 | MAX_SCHEDULE_TIMEOUT); |
220 | if (timeout < 0) |
221 | return timeout; |
222 | |
223 | i915_request_retire_upto(rq: target); |
224 | |
225 | intel_ring_update_space(ring); |
226 | GEM_BUG_ON(ring->space < bytes); |
227 | return 0; |
228 | } |
229 | |
230 | u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) |
231 | { |
232 | struct intel_ring *ring = rq->ring; |
233 | const unsigned int remain_usable = ring->effective_size - ring->emit; |
234 | const unsigned int bytes = num_dwords * sizeof(u32); |
235 | unsigned int need_wrap = 0; |
236 | unsigned int total_bytes; |
237 | u32 *cs; |
238 | |
239 | /* Packets must be qword aligned. */ |
240 | GEM_BUG_ON(num_dwords & 1); |
241 | |
242 | total_bytes = bytes + rq->reserved_space; |
243 | GEM_BUG_ON(total_bytes > ring->effective_size); |
244 | |
245 | if (unlikely(total_bytes > remain_usable)) { |
246 | const int remain_actual = ring->size - ring->emit; |
247 | |
248 | if (bytes > remain_usable) { |
249 | /* |
250 | * Not enough space for the basic request. So need to |
251 | * flush out the remainder and then wait for |
252 | * base + reserved. |
253 | */ |
254 | total_bytes += remain_actual; |
255 | need_wrap = remain_actual | 1; |
256 | } else { |
257 | /* |
258 | * The base request will fit but the reserved space |
259 | * falls off the end. So we don't need an immediate |
260 | * wrap and only need to effectively wait for the |
261 | * reserved size from the start of ringbuffer. |
262 | */ |
263 | total_bytes = rq->reserved_space + remain_actual; |
264 | } |
265 | } |
266 | |
267 | if (unlikely(total_bytes > ring->space)) { |
268 | int ret; |
269 | |
270 | /* |
271 | * Space is reserved in the ringbuffer for finalising the |
272 | * request, as that cannot be allowed to fail. During request |
273 | * finalisation, reserved_space is set to 0 to stop the |
274 | * overallocation and the assumption is that then we never need |
275 | * to wait (which has the risk of failing with EINTR). |
276 | * |
277 | * See also i915_request_alloc() and i915_request_add(). |
278 | */ |
279 | GEM_BUG_ON(!rq->reserved_space); |
280 | |
281 | ret = wait_for_space(ring, |
282 | tl: i915_request_timeline(rq), |
283 | bytes: total_bytes); |
284 | if (unlikely(ret)) |
285 | return ERR_PTR(error: ret); |
286 | } |
287 | |
288 | if (unlikely(need_wrap)) { |
289 | need_wrap &= ~1; |
290 | GEM_BUG_ON(need_wrap > ring->space); |
291 | GEM_BUG_ON(ring->emit + need_wrap > ring->size); |
292 | GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); |
293 | |
294 | /* Fill the tail with MI_NOOP */ |
295 | memset64(s: ring->vaddr + ring->emit, v: 0, n: need_wrap / sizeof(u64)); |
296 | ring->space -= need_wrap; |
297 | ring->emit = 0; |
298 | } |
299 | |
300 | GEM_BUG_ON(ring->emit > ring->size - bytes); |
301 | GEM_BUG_ON(ring->space < bytes); |
302 | cs = ring->vaddr + ring->emit; |
303 | if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) |
304 | memset32(s: cs, POISON_INUSE, n: bytes / sizeof(*cs)); |
305 | ring->emit += bytes; |
306 | ring->space -= bytes; |
307 | |
308 | return cs; |
309 | } |
310 | |
311 | /* Align the ring tail to a cacheline boundary */ |
312 | int intel_ring_cacheline_align(struct i915_request *rq) |
313 | { |
314 | int num_dwords; |
315 | void *cs; |
316 | |
317 | num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); |
318 | if (num_dwords == 0) |
319 | return 0; |
320 | |
321 | num_dwords = CACHELINE_DWORDS - num_dwords; |
322 | GEM_BUG_ON(num_dwords & 1); |
323 | |
324 | cs = intel_ring_begin(rq, num_dwords); |
325 | if (IS_ERR(ptr: cs)) |
326 | return PTR_ERR(ptr: cs); |
327 | |
328 | memset64(s: cs, v: (u64)MI_NOOP << 32 | MI_NOOP, n: num_dwords / 2); |
329 | intel_ring_advance(rq, cs: cs + num_dwords); |
330 | |
331 | GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); |
332 | return 0; |
333 | } |
334 | |
335 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
336 | #include "selftest_ring.c" |
337 | #endif |
338 | |