1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2019 Intel Corporation |
4 | */ |
5 | |
6 | #include "i915_drv.h" |
7 | |
8 | #include "intel_breadcrumbs.h" |
9 | #include "intel_context.h" |
10 | #include "intel_engine.h" |
11 | #include "intel_engine_heartbeat.h" |
12 | #include "intel_engine_pm.h" |
13 | #include "intel_gt.h" |
14 | #include "intel_gt_pm.h" |
15 | #include "intel_rc6.h" |
16 | #include "intel_ring.h" |
17 | #include "shmem_utils.h" |
18 | #include "intel_gt_regs.h" |
19 | |
20 | static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine) |
21 | { |
22 | struct drm_i915_private *i915 = engine->i915; |
23 | |
24 | if (MEDIA_VER(i915) >= 13 && engine->id == GSC0) { |
25 | intel_uncore_write(uncore: engine->gt->uncore, |
26 | RC_PSMI_CTRL_GSCCS, |
27 | _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE)); |
28 | /* hysteresis 0xA=5us as recommended in spec*/ |
29 | intel_uncore_write(uncore: engine->gt->uncore, |
30 | PWRCTX_MAXCNT_GSCCS, |
31 | val: 0xA); |
32 | } |
33 | } |
34 | |
35 | static void dbg_poison_ce(struct intel_context *ce) |
36 | { |
37 | if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) |
38 | return; |
39 | |
40 | if (ce->state) { |
41 | struct drm_i915_gem_object *obj = ce->state->obj; |
42 | int type = intel_gt_coherent_map_type(gt: ce->engine->gt, obj, always_coherent: true); |
43 | void *map; |
44 | |
45 | if (!i915_gem_object_trylock(obj, NULL)) |
46 | return; |
47 | |
48 | map = i915_gem_object_pin_map(obj, type); |
49 | if (!IS_ERR(ptr: map)) { |
50 | memset(map, CONTEXT_REDZONE, obj->base.size); |
51 | i915_gem_object_flush_map(obj); |
52 | i915_gem_object_unpin_map(obj); |
53 | } |
54 | i915_gem_object_unlock(obj); |
55 | } |
56 | } |
57 | |
58 | static int __engine_unpark(struct intel_wakeref *wf) |
59 | { |
60 | struct intel_engine_cs *engine = |
61 | container_of(wf, typeof(*engine), wakeref); |
62 | struct intel_context *ce; |
63 | |
64 | ENGINE_TRACE(engine, "\n" ); |
65 | |
66 | engine->wakeref_track = intel_gt_pm_get(gt: engine->gt); |
67 | |
68 | /* Discard stale context state from across idling */ |
69 | ce = engine->kernel_context; |
70 | if (ce) { |
71 | GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags)); |
72 | |
73 | /* Flush all pending HW writes before we touch the context */ |
74 | while (unlikely(intel_context_inflight(ce))) |
75 | intel_engine_flush_submission(engine); |
76 | |
77 | /* First poison the image to verify we never fully trust it */ |
78 | dbg_poison_ce(ce); |
79 | |
80 | /* Scrub the context image after our loss of control */ |
81 | ce->ops->reset(ce); |
82 | |
83 | CE_TRACE(ce, "reset { seqno:%x, *hwsp:%x, ring:%x }\n" , |
84 | ce->timeline->seqno, |
85 | READ_ONCE(*ce->timeline->hwsp_seqno), |
86 | ce->ring->emit); |
87 | GEM_BUG_ON(ce->timeline->seqno != |
88 | READ_ONCE(*ce->timeline->hwsp_seqno)); |
89 | } |
90 | |
91 | if (engine->unpark) |
92 | engine->unpark(engine); |
93 | |
94 | intel_breadcrumbs_unpark(b: engine->breadcrumbs); |
95 | intel_engine_unpark_heartbeat(engine); |
96 | return 0; |
97 | } |
98 | |
99 | static void duration(struct dma_fence *fence, struct dma_fence_cb *cb) |
100 | { |
101 | struct i915_request *rq = to_request(fence); |
102 | |
103 | ewma__engine_latency_add(e: &rq->engine->latency, |
104 | val: ktime_us_delta(later: rq->fence.timestamp, |
105 | earlier: rq->duration.emitted)); |
106 | } |
107 | |
108 | static void |
109 | __queue_and_release_pm(struct i915_request *rq, |
110 | struct intel_timeline *tl, |
111 | struct intel_engine_cs *engine) |
112 | { |
113 | struct intel_gt_timelines *timelines = &engine->gt->timelines; |
114 | |
115 | ENGINE_TRACE(engine, "parking\n" ); |
116 | |
117 | /* |
118 | * Open coded one half of intel_context_enter, which we have to omit |
119 | * here (see the large comment below) and because the other part must |
120 | * not be called due constructing directly with __i915_request_create |
121 | * which increments active count via intel_context_mark_active. |
122 | */ |
123 | GEM_BUG_ON(rq->context->active_count != 1); |
124 | __intel_gt_pm_get(gt: engine->gt); |
125 | rq->context->wakeref = intel_wakeref_track(wf: &engine->gt->wakeref); |
126 | |
127 | /* |
128 | * We have to serialise all potential retirement paths with our |
129 | * submission, as we don't want to underflow either the |
130 | * engine->wakeref.counter or our timeline->active_count. |
131 | * |
132 | * Equally, we cannot allow a new submission to start until |
133 | * after we finish queueing, nor could we allow that submitter |
134 | * to retire us before we are ready! |
135 | */ |
136 | spin_lock(lock: &timelines->lock); |
137 | |
138 | /* Let intel_gt_retire_requests() retire us (acquired under lock) */ |
139 | if (!atomic_fetch_inc(v: &tl->active_count)) |
140 | list_add_tail(new: &tl->link, head: &timelines->active_list); |
141 | |
142 | /* Hand the request over to HW and so engine_retire() */ |
143 | __i915_request_queue_bh(rq); |
144 | |
145 | /* Let new submissions commence (and maybe retire this timeline) */ |
146 | __intel_wakeref_defer_park(wf: &engine->wakeref); |
147 | |
148 | spin_unlock(lock: &timelines->lock); |
149 | } |
150 | |
151 | static bool switch_to_kernel_context(struct intel_engine_cs *engine) |
152 | { |
153 | struct intel_context *ce = engine->kernel_context; |
154 | struct i915_request *rq; |
155 | bool result = true; |
156 | |
157 | /* |
158 | * This is execlist specific behaviour intended to ensure the GPU is |
159 | * idle by switching to a known 'safe' context. With GuC submission, the |
160 | * same idle guarantee is achieved by other means (disabling |
161 | * scheduling). Further, switching to a 'safe' context has no effect |
162 | * with GuC submission as the scheduler can just switch back again. |
163 | * |
164 | * FIXME: Move this backend scheduler specific behaviour into the |
165 | * scheduler backend. |
166 | */ |
167 | if (intel_engine_uses_guc(engine)) |
168 | return true; |
169 | |
170 | /* GPU is pointing to the void, as good as in the kernel context. */ |
171 | if (intel_gt_is_wedged(gt: engine->gt)) |
172 | return true; |
173 | |
174 | GEM_BUG_ON(!intel_context_is_barrier(ce)); |
175 | GEM_BUG_ON(ce->timeline->hwsp_ggtt != engine->status_page.vma); |
176 | |
177 | /* Already inside the kernel context, safe to power down. */ |
178 | if (engine->wakeref_serial == engine->serial) |
179 | return true; |
180 | |
181 | /* |
182 | * Note, we do this without taking the timeline->mutex. We cannot |
183 | * as we may be called while retiring the kernel context and so |
184 | * already underneath the timeline->mutex. Instead we rely on the |
185 | * exclusive property of the __engine_park that prevents anyone |
186 | * else from creating a request on this engine. This also requires |
187 | * that the ring is empty and we avoid any waits while constructing |
188 | * the context, as they assume protection by the timeline->mutex. |
189 | * This should hold true as we can only park the engine after |
190 | * retiring the last request, thus all rings should be empty and |
191 | * all timelines idle. |
192 | * |
193 | * For unlocking, there are 2 other parties and the GPU who have a |
194 | * stake here. |
195 | * |
196 | * A new gpu user will be waiting on the engine-pm to start their |
197 | * engine_unpark. New waiters are predicated on engine->wakeref.count |
198 | * and so intel_wakeref_defer_park() acts like a mutex_unlock of the |
199 | * engine->wakeref. |
200 | * |
201 | * The other party is intel_gt_retire_requests(), which is walking the |
202 | * list of active timelines looking for completions. Meanwhile as soon |
203 | * as we call __i915_request_queue(), the GPU may complete our request. |
204 | * Ergo, if we put ourselves on the timelines.active_list |
205 | * (se intel_timeline_enter()) before we increment the |
206 | * engine->wakeref.count, we may see the request completion and retire |
207 | * it causing an underflow of the engine->wakeref. |
208 | */ |
209 | set_bit(CONTEXT_IS_PARKING, addr: &ce->flags); |
210 | GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); |
211 | |
212 | rq = __i915_request_create(ce, GFP_NOWAIT); |
213 | if (IS_ERR(ptr: rq)) |
214 | /* Context switch failed, hope for the best! Maybe reset? */ |
215 | goto out_unlock; |
216 | |
217 | /* Check again on the next retirement. */ |
218 | engine->wakeref_serial = engine->serial + 1; |
219 | i915_request_add_active_barriers(rq); |
220 | |
221 | /* Install ourselves as a preemption barrier */ |
222 | rq->sched.attr.priority = I915_PRIORITY_BARRIER; |
223 | if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */ |
224 | /* |
225 | * Use an interrupt for precise measurement of duration, |
226 | * otherwise we rely on someone else retiring all the requests |
227 | * which may delay the signaling (i.e. we will likely wait |
228 | * until the background request retirement running every |
229 | * second or two). |
230 | */ |
231 | BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq)); |
232 | dma_fence_add_callback(fence: &rq->fence, cb: &rq->duration.cb, func: duration); |
233 | rq->duration.emitted = ktime_get(); |
234 | } |
235 | |
236 | /* Expose ourselves to the world */ |
237 | __queue_and_release_pm(rq, tl: ce->timeline, engine); |
238 | |
239 | result = false; |
240 | out_unlock: |
241 | clear_bit(CONTEXT_IS_PARKING, addr: &ce->flags); |
242 | return result; |
243 | } |
244 | |
245 | static void call_idle_barriers(struct intel_engine_cs *engine) |
246 | { |
247 | struct llist_node *node, *next; |
248 | |
249 | llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { |
250 | struct dma_fence_cb *cb = |
251 | container_of((struct list_head *)node, |
252 | typeof(*cb), node); |
253 | |
254 | cb->func(ERR_PTR(error: -EAGAIN), cb); |
255 | } |
256 | } |
257 | |
258 | static int __engine_park(struct intel_wakeref *wf) |
259 | { |
260 | struct intel_engine_cs *engine = |
261 | container_of(wf, typeof(*engine), wakeref); |
262 | |
263 | engine->saturated = 0; |
264 | |
265 | /* |
266 | * If one and only one request is completed between pm events, |
267 | * we know that we are inside the kernel context and it is |
268 | * safe to power down. (We are paranoid in case that runtime |
269 | * suspend causes corruption to the active context image, and |
270 | * want to avoid that impacting userspace.) |
271 | */ |
272 | if (!switch_to_kernel_context(engine)) |
273 | return -EBUSY; |
274 | |
275 | ENGINE_TRACE(engine, "parked\n" ); |
276 | |
277 | call_idle_barriers(engine); /* cleanup after wedging */ |
278 | |
279 | intel_engine_park_heartbeat(engine); |
280 | intel_breadcrumbs_park(b: engine->breadcrumbs); |
281 | |
282 | if (engine->park) |
283 | engine->park(engine); |
284 | |
285 | /* While gt calls i915_vma_parked(), we have to break the lock cycle */ |
286 | intel_gt_pm_put_async(gt: engine->gt, handle: engine->wakeref_track); |
287 | return 0; |
288 | } |
289 | |
290 | static const struct intel_wakeref_ops wf_ops = { |
291 | .get = __engine_unpark, |
292 | .put = __engine_park, |
293 | }; |
294 | |
295 | void intel_engine_init__pm(struct intel_engine_cs *engine) |
296 | { |
297 | intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops, engine->name); |
298 | intel_engine_init_heartbeat(engine); |
299 | |
300 | intel_gsc_idle_msg_enable(engine); |
301 | } |
302 | |
303 | /** |
304 | * intel_engine_reset_pinned_contexts - Reset the pinned contexts of |
305 | * an engine. |
306 | * @engine: The engine whose pinned contexts we want to reset. |
307 | * |
308 | * Typically the pinned context LMEM images lose or get their content |
309 | * corrupted on suspend. This function resets their images. |
310 | */ |
311 | void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine) |
312 | { |
313 | struct intel_context *ce; |
314 | |
315 | list_for_each_entry(ce, &engine->pinned_contexts_list, |
316 | pinned_contexts_link) { |
317 | /* kernel context gets reset at __engine_unpark() */ |
318 | if (ce == engine->kernel_context) |
319 | continue; |
320 | |
321 | dbg_poison_ce(ce); |
322 | ce->ops->reset(ce); |
323 | } |
324 | } |
325 | |
326 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
327 | #include "selftest_engine_pm.c" |
328 | #endif |
329 | |