1 | /* |
2 | * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice (including the next |
12 | * paragraph) shall be included in all copies or substantial portions of the |
13 | * Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
21 | * SOFTWARE. |
22 | * |
23 | * Authors: |
24 | * Anhua Xu |
25 | * Kevin Tian <kevin.tian@intel.com> |
26 | * |
27 | * Contributors: |
28 | * Min He <min.he@intel.com> |
29 | * Bing Niu <bing.niu@intel.com> |
30 | * Zhi Wang <zhi.a.wang@intel.com> |
31 | * |
32 | */ |
33 | |
34 | #include "i915_drv.h" |
35 | #include "gvt.h" |
36 | |
37 | static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu) |
38 | { |
39 | enum intel_engine_id i; |
40 | struct intel_engine_cs *engine; |
41 | |
42 | for_each_engine(engine, vgpu->gvt->gt, i) { |
43 | if (!list_empty(workload_q_head(vgpu, engine))) |
44 | return true; |
45 | } |
46 | |
47 | return false; |
48 | } |
49 | |
50 | /* We give 2 seconds higher prio for vGPU during start */ |
51 | #define GVT_SCHED_VGPU_PRI_TIME 2 |
52 | |
53 | struct vgpu_sched_data { |
54 | struct list_head lru_list; |
55 | struct intel_vgpu *vgpu; |
56 | bool active; |
57 | bool pri_sched; |
58 | ktime_t pri_time; |
59 | ktime_t sched_in_time; |
60 | ktime_t sched_time; |
61 | ktime_t left_ts; |
62 | ktime_t allocated_ts; |
63 | |
64 | struct vgpu_sched_ctl sched_ctl; |
65 | }; |
66 | |
67 | struct gvt_sched_data { |
68 | struct intel_gvt *gvt; |
69 | struct hrtimer timer; |
70 | unsigned long period; |
71 | struct list_head lru_runq_head; |
72 | ktime_t expire_time; |
73 | }; |
74 | |
75 | static void vgpu_update_timeslice(struct intel_vgpu *vgpu, ktime_t cur_time) |
76 | { |
77 | ktime_t delta_ts; |
78 | struct vgpu_sched_data *vgpu_data; |
79 | |
80 | if (!vgpu || vgpu == vgpu->gvt->idle_vgpu) |
81 | return; |
82 | |
83 | vgpu_data = vgpu->sched_data; |
84 | delta_ts = ktime_sub(cur_time, vgpu_data->sched_in_time); |
85 | vgpu_data->sched_time = ktime_add(vgpu_data->sched_time, delta_ts); |
86 | vgpu_data->left_ts = ktime_sub(vgpu_data->left_ts, delta_ts); |
87 | vgpu_data->sched_in_time = cur_time; |
88 | } |
89 | |
90 | #define GVT_TS_BALANCE_PERIOD_MS 100 |
91 | #define GVT_TS_BALANCE_STAGE_NUM 10 |
92 | |
93 | static void gvt_balance_timeslice(struct gvt_sched_data *sched_data) |
94 | { |
95 | struct vgpu_sched_data *vgpu_data; |
96 | struct list_head *pos; |
97 | static u64 stage_check; |
98 | int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM; |
99 | |
100 | /* The timeslice accumulation reset at stage 0, which is |
101 | * allocated again without adding previous debt. |
102 | */ |
103 | if (stage == 0) { |
104 | int total_weight = 0; |
105 | ktime_t fair_timeslice; |
106 | |
107 | list_for_each(pos, &sched_data->lru_runq_head) { |
108 | vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); |
109 | total_weight += vgpu_data->sched_ctl.weight; |
110 | } |
111 | |
112 | list_for_each(pos, &sched_data->lru_runq_head) { |
113 | vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); |
114 | fair_timeslice = ktime_divns(kt: ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS), |
115 | div: total_weight) * vgpu_data->sched_ctl.weight; |
116 | |
117 | vgpu_data->allocated_ts = fair_timeslice; |
118 | vgpu_data->left_ts = vgpu_data->allocated_ts; |
119 | } |
120 | } else { |
121 | list_for_each(pos, &sched_data->lru_runq_head) { |
122 | vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); |
123 | |
124 | /* timeslice for next 100ms should add the left/debt |
125 | * slice of previous stages. |
126 | */ |
127 | vgpu_data->left_ts += vgpu_data->allocated_ts; |
128 | } |
129 | } |
130 | } |
131 | |
132 | static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) |
133 | { |
134 | struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; |
135 | enum intel_engine_id i; |
136 | struct intel_engine_cs *engine; |
137 | struct vgpu_sched_data *vgpu_data; |
138 | ktime_t cur_time; |
139 | |
140 | /* no need to schedule if next_vgpu is the same with current_vgpu, |
141 | * let scheduler chose next_vgpu again by setting it to NULL. |
142 | */ |
143 | if (scheduler->next_vgpu == scheduler->current_vgpu) { |
144 | scheduler->next_vgpu = NULL; |
145 | return; |
146 | } |
147 | |
148 | /* |
149 | * after the flag is set, workload dispatch thread will |
150 | * stop dispatching workload for current vgpu |
151 | */ |
152 | scheduler->need_reschedule = true; |
153 | |
154 | /* still have uncompleted workload? */ |
155 | for_each_engine(engine, gvt->gt, i) { |
156 | if (scheduler->current_workload[engine->id]) |
157 | return; |
158 | } |
159 | |
160 | cur_time = ktime_get(); |
161 | vgpu_update_timeslice(vgpu: scheduler->current_vgpu, cur_time); |
162 | vgpu_data = scheduler->next_vgpu->sched_data; |
163 | vgpu_data->sched_in_time = cur_time; |
164 | |
165 | /* switch current vgpu */ |
166 | scheduler->current_vgpu = scheduler->next_vgpu; |
167 | scheduler->next_vgpu = NULL; |
168 | |
169 | scheduler->need_reschedule = false; |
170 | |
171 | /* wake up workload dispatch thread */ |
172 | for_each_engine(engine, gvt->gt, i) |
173 | wake_up(&scheduler->waitq[engine->id]); |
174 | } |
175 | |
176 | static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data) |
177 | { |
178 | struct vgpu_sched_data *vgpu_data; |
179 | struct intel_vgpu *vgpu = NULL; |
180 | struct list_head *head = &sched_data->lru_runq_head; |
181 | struct list_head *pos; |
182 | |
183 | /* search a vgpu with pending workload */ |
184 | list_for_each(pos, head) { |
185 | |
186 | vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); |
187 | if (!vgpu_has_pending_workload(vgpu: vgpu_data->vgpu)) |
188 | continue; |
189 | |
190 | if (vgpu_data->pri_sched) { |
191 | if (ktime_before(cmp1: ktime_get(), cmp2: vgpu_data->pri_time)) { |
192 | vgpu = vgpu_data->vgpu; |
193 | break; |
194 | } else |
195 | vgpu_data->pri_sched = false; |
196 | } |
197 | |
198 | /* Return the vGPU only if it has time slice left */ |
199 | if (vgpu_data->left_ts > 0) { |
200 | vgpu = vgpu_data->vgpu; |
201 | break; |
202 | } |
203 | } |
204 | |
205 | return vgpu; |
206 | } |
207 | |
208 | /* in nanosecond */ |
209 | #define GVT_DEFAULT_TIME_SLICE 1000000 |
210 | |
211 | static void tbs_sched_func(struct gvt_sched_data *sched_data) |
212 | { |
213 | struct intel_gvt *gvt = sched_data->gvt; |
214 | struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; |
215 | struct vgpu_sched_data *vgpu_data; |
216 | struct intel_vgpu *vgpu = NULL; |
217 | |
218 | /* no active vgpu or has already had a target */ |
219 | if (list_empty(head: &sched_data->lru_runq_head) || scheduler->next_vgpu) |
220 | goto out; |
221 | |
222 | vgpu = find_busy_vgpu(sched_data); |
223 | if (vgpu) { |
224 | scheduler->next_vgpu = vgpu; |
225 | vgpu_data = vgpu->sched_data; |
226 | if (!vgpu_data->pri_sched) { |
227 | /* Move the last used vGPU to the tail of lru_list */ |
228 | list_del_init(entry: &vgpu_data->lru_list); |
229 | list_add_tail(new: &vgpu_data->lru_list, |
230 | head: &sched_data->lru_runq_head); |
231 | } |
232 | } else { |
233 | scheduler->next_vgpu = gvt->idle_vgpu; |
234 | } |
235 | out: |
236 | if (scheduler->next_vgpu) |
237 | try_to_schedule_next_vgpu(gvt); |
238 | } |
239 | |
240 | void intel_gvt_schedule(struct intel_gvt *gvt) |
241 | { |
242 | struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; |
243 | ktime_t cur_time; |
244 | |
245 | mutex_lock(&gvt->sched_lock); |
246 | cur_time = ktime_get(); |
247 | |
248 | if (test_and_clear_bit(nr: INTEL_GVT_REQUEST_SCHED, |
249 | addr: (void *)&gvt->service_request)) { |
250 | if (cur_time >= sched_data->expire_time) { |
251 | gvt_balance_timeslice(sched_data); |
252 | sched_data->expire_time = ktime_add_ms( |
253 | kt: cur_time, GVT_TS_BALANCE_PERIOD_MS); |
254 | } |
255 | } |
256 | clear_bit(nr: INTEL_GVT_REQUEST_EVENT_SCHED, addr: (void *)&gvt->service_request); |
257 | |
258 | vgpu_update_timeslice(vgpu: gvt->scheduler.current_vgpu, cur_time); |
259 | tbs_sched_func(sched_data); |
260 | |
261 | mutex_unlock(lock: &gvt->sched_lock); |
262 | } |
263 | |
264 | static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data) |
265 | { |
266 | struct gvt_sched_data *data; |
267 | |
268 | data = container_of(timer_data, struct gvt_sched_data, timer); |
269 | |
270 | intel_gvt_request_service(gvt: data->gvt, service: INTEL_GVT_REQUEST_SCHED); |
271 | |
272 | hrtimer_add_expires_ns(timer: &data->timer, ns: data->period); |
273 | |
274 | return HRTIMER_RESTART; |
275 | } |
276 | |
277 | static int tbs_sched_init(struct intel_gvt *gvt) |
278 | { |
279 | struct intel_gvt_workload_scheduler *scheduler = |
280 | &gvt->scheduler; |
281 | |
282 | struct gvt_sched_data *data; |
283 | |
284 | data = kzalloc(size: sizeof(*data), GFP_KERNEL); |
285 | if (!data) |
286 | return -ENOMEM; |
287 | |
288 | INIT_LIST_HEAD(list: &data->lru_runq_head); |
289 | hrtimer_init(timer: &data->timer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_ABS); |
290 | data->timer.function = tbs_timer_fn; |
291 | data->period = GVT_DEFAULT_TIME_SLICE; |
292 | data->gvt = gvt; |
293 | |
294 | scheduler->sched_data = data; |
295 | |
296 | return 0; |
297 | } |
298 | |
299 | static void tbs_sched_clean(struct intel_gvt *gvt) |
300 | { |
301 | struct intel_gvt_workload_scheduler *scheduler = |
302 | &gvt->scheduler; |
303 | struct gvt_sched_data *data = scheduler->sched_data; |
304 | |
305 | hrtimer_cancel(timer: &data->timer); |
306 | |
307 | kfree(objp: data); |
308 | scheduler->sched_data = NULL; |
309 | } |
310 | |
311 | static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) |
312 | { |
313 | struct vgpu_sched_data *data; |
314 | |
315 | data = kzalloc(size: sizeof(*data), GFP_KERNEL); |
316 | if (!data) |
317 | return -ENOMEM; |
318 | |
319 | data->sched_ctl.weight = vgpu->sched_ctl.weight; |
320 | data->vgpu = vgpu; |
321 | INIT_LIST_HEAD(list: &data->lru_list); |
322 | |
323 | vgpu->sched_data = data; |
324 | |
325 | return 0; |
326 | } |
327 | |
328 | static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu) |
329 | { |
330 | struct intel_gvt *gvt = vgpu->gvt; |
331 | struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; |
332 | |
333 | kfree(objp: vgpu->sched_data); |
334 | vgpu->sched_data = NULL; |
335 | |
336 | /* this vgpu id has been removed */ |
337 | if (idr_is_empty(idr: &gvt->vgpu_idr)) |
338 | hrtimer_cancel(timer: &sched_data->timer); |
339 | } |
340 | |
341 | static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) |
342 | { |
343 | struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data; |
344 | struct vgpu_sched_data *vgpu_data = vgpu->sched_data; |
345 | ktime_t now; |
346 | |
347 | if (!list_empty(head: &vgpu_data->lru_list)) |
348 | return; |
349 | |
350 | now = ktime_get(); |
351 | vgpu_data->pri_time = ktime_add(now, |
352 | ktime_set(GVT_SCHED_VGPU_PRI_TIME, 0)); |
353 | vgpu_data->pri_sched = true; |
354 | |
355 | list_add(new: &vgpu_data->lru_list, head: &sched_data->lru_runq_head); |
356 | |
357 | if (!hrtimer_active(timer: &sched_data->timer)) |
358 | hrtimer_start(timer: &sched_data->timer, ktime_add_ns(ktime_get(), |
359 | sched_data->period), mode: HRTIMER_MODE_ABS); |
360 | vgpu_data->active = true; |
361 | } |
362 | |
363 | static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu) |
364 | { |
365 | struct vgpu_sched_data *vgpu_data = vgpu->sched_data; |
366 | |
367 | list_del_init(entry: &vgpu_data->lru_list); |
368 | vgpu_data->active = false; |
369 | } |
370 | |
371 | static const struct intel_gvt_sched_policy_ops tbs_schedule_ops = { |
372 | .init = tbs_sched_init, |
373 | .clean = tbs_sched_clean, |
374 | .init_vgpu = tbs_sched_init_vgpu, |
375 | .clean_vgpu = tbs_sched_clean_vgpu, |
376 | .start_schedule = tbs_sched_start_schedule, |
377 | .stop_schedule = tbs_sched_stop_schedule, |
378 | }; |
379 | |
380 | int intel_gvt_init_sched_policy(struct intel_gvt *gvt) |
381 | { |
382 | int ret; |
383 | |
384 | mutex_lock(&gvt->sched_lock); |
385 | gvt->scheduler.sched_ops = &tbs_schedule_ops; |
386 | ret = gvt->scheduler.sched_ops->init(gvt); |
387 | mutex_unlock(lock: &gvt->sched_lock); |
388 | |
389 | return ret; |
390 | } |
391 | |
392 | void intel_gvt_clean_sched_policy(struct intel_gvt *gvt) |
393 | { |
394 | mutex_lock(&gvt->sched_lock); |
395 | gvt->scheduler.sched_ops->clean(gvt); |
396 | mutex_unlock(lock: &gvt->sched_lock); |
397 | } |
398 | |
399 | /* for per-vgpu scheduler policy, there are 2 per-vgpu data: |
400 | * sched_data, and sched_ctl. We see these 2 data as part of |
401 | * the global scheduler which are proteced by gvt->sched_lock. |
402 | * Caller should make their decision if the vgpu_lock should |
403 | * be hold outside. |
404 | */ |
405 | |
406 | int intel_vgpu_init_sched_policy(struct intel_vgpu *vgpu) |
407 | { |
408 | int ret; |
409 | |
410 | mutex_lock(&vgpu->gvt->sched_lock); |
411 | ret = vgpu->gvt->scheduler.sched_ops->init_vgpu(vgpu); |
412 | mutex_unlock(lock: &vgpu->gvt->sched_lock); |
413 | |
414 | return ret; |
415 | } |
416 | |
417 | void intel_vgpu_clean_sched_policy(struct intel_vgpu *vgpu) |
418 | { |
419 | mutex_lock(&vgpu->gvt->sched_lock); |
420 | vgpu->gvt->scheduler.sched_ops->clean_vgpu(vgpu); |
421 | mutex_unlock(lock: &vgpu->gvt->sched_lock); |
422 | } |
423 | |
424 | void intel_vgpu_start_schedule(struct intel_vgpu *vgpu) |
425 | { |
426 | struct vgpu_sched_data *vgpu_data = vgpu->sched_data; |
427 | |
428 | mutex_lock(&vgpu->gvt->sched_lock); |
429 | if (!vgpu_data->active) { |
430 | gvt_dbg_core("vgpu%d: start schedule\n" , vgpu->id); |
431 | vgpu->gvt->scheduler.sched_ops->start_schedule(vgpu); |
432 | } |
433 | mutex_unlock(lock: &vgpu->gvt->sched_lock); |
434 | } |
435 | |
436 | void intel_gvt_kick_schedule(struct intel_gvt *gvt) |
437 | { |
438 | mutex_lock(&gvt->sched_lock); |
439 | intel_gvt_request_service(gvt, service: INTEL_GVT_REQUEST_EVENT_SCHED); |
440 | mutex_unlock(lock: &gvt->sched_lock); |
441 | } |
442 | |
443 | void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) |
444 | { |
445 | struct intel_gvt_workload_scheduler *scheduler = |
446 | &vgpu->gvt->scheduler; |
447 | struct vgpu_sched_data *vgpu_data = vgpu->sched_data; |
448 | struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; |
449 | struct intel_engine_cs *engine; |
450 | enum intel_engine_id id; |
451 | |
452 | if (!vgpu_data->active) |
453 | return; |
454 | |
455 | gvt_dbg_core("vgpu%d: stop schedule\n" , vgpu->id); |
456 | |
457 | mutex_lock(&vgpu->gvt->sched_lock); |
458 | scheduler->sched_ops->stop_schedule(vgpu); |
459 | |
460 | if (scheduler->next_vgpu == vgpu) |
461 | scheduler->next_vgpu = NULL; |
462 | |
463 | if (scheduler->current_vgpu == vgpu) { |
464 | /* stop workload dispatching */ |
465 | scheduler->need_reschedule = true; |
466 | scheduler->current_vgpu = NULL; |
467 | } |
468 | |
469 | intel_runtime_pm_get(rpm: &dev_priv->runtime_pm); |
470 | spin_lock_bh(lock: &scheduler->mmio_context_lock); |
471 | for_each_engine(engine, vgpu->gvt->gt, id) { |
472 | if (scheduler->engine_owner[engine->id] == vgpu) { |
473 | intel_gvt_switch_mmio(pre: vgpu, NULL, engine); |
474 | scheduler->engine_owner[engine->id] = NULL; |
475 | } |
476 | } |
477 | spin_unlock_bh(lock: &scheduler->mmio_context_lock); |
478 | intel_runtime_pm_put_unchecked(rpm: &dev_priv->runtime_pm); |
479 | mutex_unlock(lock: &vgpu->gvt->sched_lock); |
480 | } |
481 | |