1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2021 Intel Corporation |
4 | */ |
5 | |
6 | /** |
7 | * DOC: display pinning helpers |
8 | */ |
9 | |
10 | #include "gem/i915_gem_domain.h" |
11 | #include "gem/i915_gem_object.h" |
12 | |
13 | #include "i915_drv.h" |
14 | #include "intel_display_types.h" |
15 | #include "intel_dpt.h" |
16 | #include "intel_fb.h" |
17 | #include "intel_fb_pin.h" |
18 | |
19 | static struct i915_vma * |
20 | intel_pin_fb_obj_dpt(struct drm_framebuffer *fb, |
21 | const struct i915_gtt_view *view, |
22 | bool uses_fence, |
23 | unsigned long *out_flags, |
24 | struct i915_address_space *vm) |
25 | { |
26 | struct drm_device *dev = fb->dev; |
27 | struct drm_i915_private *dev_priv = to_i915(dev); |
28 | struct drm_i915_gem_object *obj = intel_fb_obj(fb); |
29 | struct i915_gem_ww_ctx ww; |
30 | struct i915_vma *vma; |
31 | u32 alignment; |
32 | int ret; |
33 | |
34 | /* |
35 | * We are not syncing against the binding (and potential migrations) |
36 | * below, so this vm must never be async. |
37 | */ |
38 | if (drm_WARN_ON(&dev_priv->drm, vm->bind_async_flags)) |
39 | return ERR_PTR(error: -EINVAL); |
40 | |
41 | if (WARN_ON(!i915_gem_object_is_framebuffer(obj))) |
42 | return ERR_PTR(error: -EINVAL); |
43 | |
44 | alignment = 4096 * 512; |
45 | |
46 | atomic_inc(v: &dev_priv->gpu_error.pending_fb_pin); |
47 | |
48 | for_i915_gem_ww(&ww, ret, true) { |
49 | ret = i915_gem_object_lock(obj, ww: &ww); |
50 | if (ret) |
51 | continue; |
52 | |
53 | if (HAS_LMEM(dev_priv)) { |
54 | unsigned int flags = obj->flags; |
55 | |
56 | /* |
57 | * For this type of buffer we need to able to read from the CPU |
58 | * the clear color value found in the buffer, hence we need to |
59 | * ensure it is always in the mappable part of lmem, if this is |
60 | * a small-bar device. |
61 | */ |
62 | if (intel_fb_rc_ccs_cc_plane(fb) >= 0) |
63 | flags &= ~I915_BO_ALLOC_GPU_ONLY; |
64 | ret = __i915_gem_object_migrate(obj, ww: &ww, id: INTEL_REGION_LMEM_0, |
65 | flags); |
66 | if (ret) |
67 | continue; |
68 | } |
69 | |
70 | ret = i915_gem_object_set_cache_level(obj, cache_level: I915_CACHE_NONE); |
71 | if (ret) |
72 | continue; |
73 | |
74 | vma = i915_vma_instance(obj, vm, view); |
75 | if (IS_ERR(ptr: vma)) { |
76 | ret = PTR_ERR(ptr: vma); |
77 | continue; |
78 | } |
79 | |
80 | if (i915_vma_misplaced(vma, size: 0, alignment, flags: 0)) { |
81 | ret = i915_vma_unbind(vma); |
82 | if (ret) |
83 | continue; |
84 | } |
85 | |
86 | ret = i915_vma_pin_ww(vma, ww: &ww, size: 0, alignment, PIN_GLOBAL); |
87 | if (ret) |
88 | continue; |
89 | } |
90 | if (ret) { |
91 | vma = ERR_PTR(error: ret); |
92 | goto err; |
93 | } |
94 | |
95 | vma->display_alignment = max(vma->display_alignment, alignment); |
96 | |
97 | i915_gem_object_flush_if_display(obj); |
98 | |
99 | i915_vma_get(vma); |
100 | err: |
101 | atomic_dec(v: &dev_priv->gpu_error.pending_fb_pin); |
102 | |
103 | return vma; |
104 | } |
105 | |
106 | struct i915_vma * |
107 | intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, |
108 | bool phys_cursor, |
109 | const struct i915_gtt_view *view, |
110 | bool uses_fence, |
111 | unsigned long *out_flags) |
112 | { |
113 | struct drm_device *dev = fb->dev; |
114 | struct drm_i915_private *dev_priv = to_i915(dev); |
115 | struct drm_i915_gem_object *obj = intel_fb_obj(fb); |
116 | intel_wakeref_t wakeref; |
117 | struct i915_gem_ww_ctx ww; |
118 | struct i915_vma *vma; |
119 | unsigned int pinctl; |
120 | u32 alignment; |
121 | int ret; |
122 | |
123 | if (drm_WARN_ON(dev, !i915_gem_object_is_framebuffer(obj))) |
124 | return ERR_PTR(error: -EINVAL); |
125 | |
126 | if (phys_cursor) |
127 | alignment = intel_cursor_alignment(i915: dev_priv); |
128 | else |
129 | alignment = intel_surf_alignment(fb, color_plane: 0); |
130 | if (drm_WARN_ON(dev, alignment && !is_power_of_2(alignment))) |
131 | return ERR_PTR(error: -EINVAL); |
132 | |
133 | /* Note that the w/a also requires 64 PTE of padding following the |
134 | * bo. We currently fill all unused PTE with the shadow page and so |
135 | * we should always have valid PTE following the scanout preventing |
136 | * the VT-d warning. |
137 | */ |
138 | if (intel_scanout_needs_vtd_wa(i915: dev_priv) && alignment < 256 * 1024) |
139 | alignment = 256 * 1024; |
140 | |
141 | /* |
142 | * Global gtt pte registers are special registers which actually forward |
143 | * writes to a chunk of system memory. Which means that there is no risk |
144 | * that the register values disappear as soon as we call |
145 | * intel_runtime_pm_put(), so it is correct to wrap only the |
146 | * pin/unpin/fence and not more. |
147 | */ |
148 | wakeref = intel_runtime_pm_get(rpm: &dev_priv->runtime_pm); |
149 | |
150 | atomic_inc(v: &dev_priv->gpu_error.pending_fb_pin); |
151 | |
152 | /* |
153 | * Valleyview is definitely limited to scanning out the first |
154 | * 512MiB. Lets presume this behaviour was inherited from the |
155 | * g4x display engine and that all earlier gen are similarly |
156 | * limited. Testing suggests that it is a little more |
157 | * complicated than this. For example, Cherryview appears quite |
158 | * happy to scanout from anywhere within its global aperture. |
159 | */ |
160 | pinctl = 0; |
161 | if (HAS_GMCH(dev_priv)) |
162 | pinctl |= PIN_MAPPABLE; |
163 | |
164 | i915_gem_ww_ctx_init(ctx: &ww, intr: true); |
165 | retry: |
166 | ret = i915_gem_object_lock(obj, ww: &ww); |
167 | if (!ret && phys_cursor) |
168 | ret = i915_gem_object_attach_phys(obj, align: alignment); |
169 | else if (!ret && HAS_LMEM(dev_priv)) |
170 | ret = i915_gem_object_migrate(obj, ww: &ww, id: INTEL_REGION_LMEM_0); |
171 | if (!ret) |
172 | ret = i915_gem_object_pin_pages(obj); |
173 | if (ret) |
174 | goto err; |
175 | |
176 | vma = i915_gem_object_pin_to_display_plane(obj, ww: &ww, alignment, |
177 | view, flags: pinctl); |
178 | if (IS_ERR(ptr: vma)) { |
179 | ret = PTR_ERR(ptr: vma); |
180 | goto err_unpin; |
181 | } |
182 | |
183 | if (uses_fence && i915_vma_is_map_and_fenceable(vma)) { |
184 | /* |
185 | * Install a fence for tiled scan-out. Pre-i965 always needs a |
186 | * fence, whereas 965+ only requires a fence if using |
187 | * framebuffer compression. For simplicity, we always, when |
188 | * possible, install a fence as the cost is not that onerous. |
189 | * |
190 | * If we fail to fence the tiled scanout, then either the |
191 | * modeset will reject the change (which is highly unlikely as |
192 | * the affected systems, all but one, do not have unmappable |
193 | * space) or we will not be able to enable full powersaving |
194 | * techniques (also likely not to apply due to various limits |
195 | * FBC and the like impose on the size of the buffer, which |
196 | * presumably we violated anyway with this unmappable buffer). |
197 | * Anyway, it is presumably better to stumble onwards with |
198 | * something and try to run the system in a "less than optimal" |
199 | * mode that matches the user configuration. |
200 | */ |
201 | ret = i915_vma_pin_fence(vma); |
202 | if (ret != 0 && DISPLAY_VER(dev_priv) < 4) { |
203 | i915_vma_unpin(vma); |
204 | goto err_unpin; |
205 | } |
206 | ret = 0; |
207 | |
208 | if (vma->fence) |
209 | *out_flags |= PLANE_HAS_FENCE; |
210 | } |
211 | |
212 | i915_vma_get(vma); |
213 | |
214 | err_unpin: |
215 | i915_gem_object_unpin_pages(obj); |
216 | err: |
217 | if (ret == -EDEADLK) { |
218 | ret = i915_gem_ww_ctx_backoff(ctx: &ww); |
219 | if (!ret) |
220 | goto retry; |
221 | } |
222 | i915_gem_ww_ctx_fini(ctx: &ww); |
223 | if (ret) |
224 | vma = ERR_PTR(error: ret); |
225 | |
226 | atomic_dec(v: &dev_priv->gpu_error.pending_fb_pin); |
227 | intel_runtime_pm_put(rpm: &dev_priv->runtime_pm, wref: wakeref); |
228 | return vma; |
229 | } |
230 | |
231 | void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) |
232 | { |
233 | if (flags & PLANE_HAS_FENCE) |
234 | i915_vma_unpin_fence(vma); |
235 | i915_vma_unpin(vma); |
236 | i915_vma_put(vma); |
237 | } |
238 | |
239 | int intel_plane_pin_fb(struct intel_plane_state *plane_state) |
240 | { |
241 | struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); |
242 | struct drm_i915_private *dev_priv = to_i915(dev: plane->base.dev); |
243 | struct drm_framebuffer *fb = plane_state->hw.fb; |
244 | struct i915_vma *vma; |
245 | bool phys_cursor = |
246 | plane->id == PLANE_CURSOR && |
247 | DISPLAY_INFO(dev_priv)->cursor_needs_physical; |
248 | |
249 | if (!intel_fb_uses_dpt(fb)) { |
250 | vma = intel_pin_and_fence_fb_obj(fb, phys_cursor, |
251 | view: &plane_state->view.gtt, |
252 | uses_fence: intel_plane_uses_fence(plane_state), |
253 | out_flags: &plane_state->flags); |
254 | if (IS_ERR(ptr: vma)) |
255 | return PTR_ERR(ptr: vma); |
256 | |
257 | plane_state->ggtt_vma = vma; |
258 | |
259 | /* |
260 | * Pre-populate the dma address before we enter the vblank |
261 | * evade critical section as i915_gem_object_get_dma_address() |
262 | * will trigger might_sleep() even if it won't actually sleep, |
263 | * which is the case when the fb has already been pinned. |
264 | */ |
265 | if (phys_cursor) |
266 | plane_state->phys_dma_addr = |
267 | i915_gem_object_get_dma_address(intel_fb_obj(fb), 0); |
268 | } else { |
269 | struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); |
270 | |
271 | vma = intel_dpt_pin(vm: intel_fb->dpt_vm); |
272 | if (IS_ERR(ptr: vma)) |
273 | return PTR_ERR(ptr: vma); |
274 | |
275 | plane_state->ggtt_vma = vma; |
276 | |
277 | vma = intel_pin_fb_obj_dpt(fb, view: &plane_state->view.gtt, uses_fence: false, |
278 | out_flags: &plane_state->flags, vm: intel_fb->dpt_vm); |
279 | if (IS_ERR(ptr: vma)) { |
280 | intel_dpt_unpin(vm: intel_fb->dpt_vm); |
281 | plane_state->ggtt_vma = NULL; |
282 | return PTR_ERR(ptr: vma); |
283 | } |
284 | |
285 | plane_state->dpt_vma = vma; |
286 | |
287 | WARN_ON(plane_state->ggtt_vma == plane_state->dpt_vma); |
288 | } |
289 | |
290 | return 0; |
291 | } |
292 | |
293 | void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) |
294 | { |
295 | struct drm_framebuffer *fb = old_plane_state->hw.fb; |
296 | struct i915_vma *vma; |
297 | |
298 | if (!intel_fb_uses_dpt(fb)) { |
299 | vma = fetch_and_zero(&old_plane_state->ggtt_vma); |
300 | if (vma) |
301 | intel_unpin_fb_vma(vma, flags: old_plane_state->flags); |
302 | } else { |
303 | struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); |
304 | |
305 | vma = fetch_and_zero(&old_plane_state->dpt_vma); |
306 | if (vma) |
307 | intel_unpin_fb_vma(vma, flags: old_plane_state->flags); |
308 | |
309 | vma = fetch_and_zero(&old_plane_state->ggtt_vma); |
310 | if (vma) |
311 | intel_dpt_unpin(vm: intel_fb->dpt_vm); |
312 | } |
313 | } |
314 | |