1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2020 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/pm_qos.h> |
7 | #include <linux/sort.h> |
8 | |
9 | #include "gem/i915_gem_internal.h" |
10 | |
11 | #include "i915_reg.h" |
12 | #include "intel_engine_heartbeat.h" |
13 | #include "intel_engine_pm.h" |
14 | #include "intel_engine_regs.h" |
15 | #include "intel_gpu_commands.h" |
16 | #include "intel_gt_clock_utils.h" |
17 | #include "intel_gt_pm.h" |
18 | #include "intel_rc6.h" |
19 | #include "selftest_engine_heartbeat.h" |
20 | #include "selftest_rps.h" |
21 | #include "selftests/igt_flush_test.h" |
22 | #include "selftests/igt_spinner.h" |
23 | #include "selftests/librapl.h" |
24 | |
25 | /* Try to isolate the impact of cstates from determing frequency response */ |
26 | #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ |
27 | |
28 | static void dummy_rps_work(struct work_struct *wrk) |
29 | { |
30 | } |
31 | |
32 | static int cmp_u64(const void *A, const void *B) |
33 | { |
34 | const u64 *a = A, *b = B; |
35 | |
36 | if (*a < *b) |
37 | return -1; |
38 | else if (*a > *b) |
39 | return 1; |
40 | else |
41 | return 0; |
42 | } |
43 | |
44 | static int cmp_u32(const void *A, const void *B) |
45 | { |
46 | const u32 *a = A, *b = B; |
47 | |
48 | if (*a < *b) |
49 | return -1; |
50 | else if (*a > *b) |
51 | return 1; |
52 | else |
53 | return 0; |
54 | } |
55 | |
56 | static struct i915_vma * |
57 | create_spin_counter(struct intel_engine_cs *engine, |
58 | struct i915_address_space *vm, |
59 | bool srm, |
60 | u32 **cancel, |
61 | u32 **counter) |
62 | { |
63 | enum { |
64 | COUNT, |
65 | INC, |
66 | __NGPR__, |
67 | }; |
68 | #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x) |
69 | struct drm_i915_gem_object *obj; |
70 | struct i915_vma *vma; |
71 | unsigned long end; |
72 | u32 *base, *cs; |
73 | int loop, i; |
74 | int err; |
75 | |
76 | obj = i915_gem_object_create_internal(i915: vm->i915, size: 64 << 10); |
77 | if (IS_ERR(ptr: obj)) |
78 | return ERR_CAST(ptr: obj); |
79 | |
80 | end = obj->base.size / sizeof(u32) - 1; |
81 | |
82 | vma = i915_vma_instance(obj, vm, NULL); |
83 | if (IS_ERR(ptr: vma)) { |
84 | err = PTR_ERR(ptr: vma); |
85 | goto err_put; |
86 | } |
87 | |
88 | err = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER); |
89 | if (err) |
90 | goto err_unlock; |
91 | |
92 | i915_vma_lock(vma); |
93 | |
94 | base = i915_gem_object_pin_map(obj, type: I915_MAP_WC); |
95 | if (IS_ERR(ptr: base)) { |
96 | err = PTR_ERR(ptr: base); |
97 | goto err_unpin; |
98 | } |
99 | cs = base; |
100 | |
101 | *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2); |
102 | for (i = 0; i < __NGPR__; i++) { |
103 | *cs++ = i915_mmio_reg_offset(CS_GPR(i)); |
104 | *cs++ = 0; |
105 | *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4; |
106 | *cs++ = 0; |
107 | } |
108 | |
109 | *cs++ = MI_LOAD_REGISTER_IMM(1); |
110 | *cs++ = i915_mmio_reg_offset(CS_GPR(INC)); |
111 | *cs++ = 1; |
112 | |
113 | loop = cs - base; |
114 | |
115 | /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */ |
116 | for (i = 0; i < 1024; i++) { |
117 | *cs++ = MI_MATH(4); |
118 | *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT)); |
119 | *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC)); |
120 | *cs++ = MI_MATH_ADD; |
121 | *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU); |
122 | |
123 | if (srm) { |
124 | *cs++ = MI_STORE_REGISTER_MEM_GEN8; |
125 | *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT)); |
126 | *cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs)); |
127 | *cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs)); |
128 | } |
129 | } |
130 | |
131 | *cs++ = MI_BATCH_BUFFER_START_GEN8; |
132 | *cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs)); |
133 | *cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs)); |
134 | GEM_BUG_ON(cs - base > end); |
135 | |
136 | i915_gem_object_flush_map(obj); |
137 | |
138 | *cancel = base + loop; |
139 | *counter = srm ? memset32(s: base + end, v: 0, n: 1) : NULL; |
140 | return vma; |
141 | |
142 | err_unpin: |
143 | i915_vma_unpin(vma); |
144 | err_unlock: |
145 | i915_vma_unlock(vma); |
146 | err_put: |
147 | i915_gem_object_put(obj); |
148 | return ERR_PTR(error: err); |
149 | } |
150 | |
151 | static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms) |
152 | { |
153 | u8 history[64], i; |
154 | unsigned long end; |
155 | int sleep; |
156 | |
157 | i = 0; |
158 | memset(history, freq, sizeof(history)); |
159 | sleep = 20; |
160 | |
161 | /* The PCU does not change instantly, but drifts towards the goal? */ |
162 | end = jiffies + msecs_to_jiffies(m: timeout_ms); |
163 | do { |
164 | u8 act; |
165 | |
166 | act = read_cagf(rps); |
167 | if (time_after(jiffies, end)) |
168 | return act; |
169 | |
170 | /* Target acquired */ |
171 | if (act == freq) |
172 | return act; |
173 | |
174 | /* Any change within the last N samples? */ |
175 | if (!memchr_inv(p: history, c: act, size: sizeof(history))) |
176 | return act; |
177 | |
178 | history[i] = act; |
179 | i = (i + 1) % ARRAY_SIZE(history); |
180 | |
181 | usleep_range(min: sleep, max: 2 * sleep); |
182 | sleep *= 2; |
183 | if (sleep > timeout_ms * 20) |
184 | sleep = timeout_ms * 20; |
185 | } while (1); |
186 | } |
187 | |
188 | static u8 rps_set_check(struct intel_rps *rps, u8 freq) |
189 | { |
190 | mutex_lock(&rps->lock); |
191 | GEM_BUG_ON(!intel_rps_is_active(rps)); |
192 | if (wait_for(!intel_rps_set(rps, freq), 50)) { |
193 | mutex_unlock(lock: &rps->lock); |
194 | return 0; |
195 | } |
196 | GEM_BUG_ON(rps->last_freq != freq); |
197 | mutex_unlock(lock: &rps->lock); |
198 | |
199 | return wait_for_freq(rps, freq, timeout_ms: 50); |
200 | } |
201 | |
202 | static void show_pstate_limits(struct intel_rps *rps) |
203 | { |
204 | struct drm_i915_private *i915 = rps_to_i915(rps); |
205 | |
206 | if (IS_BROXTON(i915)) { |
207 | pr_info("P_STATE_CAP[%x]: 0x%08x\n" , |
208 | i915_mmio_reg_offset(BXT_RP_STATE_CAP), |
209 | intel_uncore_read(rps_to_uncore(rps), |
210 | BXT_RP_STATE_CAP)); |
211 | } else if (GRAPHICS_VER(i915) == 9) { |
212 | pr_info("P_STATE_LIMITS[%x]: 0x%08x\n" , |
213 | i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS), |
214 | intel_uncore_read(rps_to_uncore(rps), |
215 | GEN9_RP_STATE_LIMITS)); |
216 | } |
217 | } |
218 | |
219 | int live_rps_clock_interval(void *arg) |
220 | { |
221 | struct intel_gt *gt = arg; |
222 | struct intel_rps *rps = >->rps; |
223 | void (*saved_work)(struct work_struct *wrk); |
224 | struct intel_engine_cs *engine; |
225 | enum intel_engine_id id; |
226 | struct igt_spinner spin; |
227 | intel_wakeref_t wakeref; |
228 | int err = 0; |
229 | |
230 | if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) |
231 | return 0; |
232 | |
233 | if (igt_spinner_init(spin: &spin, gt)) |
234 | return -ENOMEM; |
235 | |
236 | intel_gt_pm_wait_for_idle(gt); |
237 | saved_work = rps->work.func; |
238 | rps->work.func = dummy_rps_work; |
239 | |
240 | wakeref = intel_gt_pm_get(gt); |
241 | intel_rps_disable(rps: >->rps); |
242 | |
243 | intel_gt_check_clock_frequency(gt); |
244 | |
245 | for_each_engine(engine, gt, id) { |
246 | struct i915_request *rq; |
247 | u32 cycles; |
248 | u64 dt; |
249 | |
250 | if (!intel_engine_can_store_dword(engine)) |
251 | continue; |
252 | |
253 | st_engine_heartbeat_disable(engine); |
254 | |
255 | rq = igt_spinner_create_request(spin: &spin, |
256 | ce: engine->kernel_context, |
257 | MI_NOOP); |
258 | if (IS_ERR(ptr: rq)) { |
259 | st_engine_heartbeat_enable(engine); |
260 | err = PTR_ERR(ptr: rq); |
261 | break; |
262 | } |
263 | |
264 | i915_request_add(rq); |
265 | |
266 | if (!igt_wait_for_spinner(spin: &spin, rq)) { |
267 | pr_err("%s: RPS spinner did not start\n" , |
268 | engine->name); |
269 | igt_spinner_end(spin: &spin); |
270 | st_engine_heartbeat_enable(engine); |
271 | intel_gt_set_wedged(gt: engine->gt); |
272 | err = -EIO; |
273 | break; |
274 | } |
275 | |
276 | intel_uncore_forcewake_get(uncore: gt->uncore, domains: FORCEWAKE_ALL); |
277 | |
278 | intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0); |
279 | |
280 | /* Set the evaluation interval to infinity! */ |
281 | intel_uncore_write_fw(gt->uncore, |
282 | GEN6_RP_UP_EI, 0xffffffff); |
283 | intel_uncore_write_fw(gt->uncore, |
284 | GEN6_RP_UP_THRESHOLD, 0xffffffff); |
285 | |
286 | intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, |
287 | GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG); |
288 | |
289 | if (wait_for(intel_uncore_read_fw(gt->uncore, |
290 | GEN6_RP_CUR_UP_EI), |
291 | 10)) { |
292 | /* Just skip the test; assume lack of HW support */ |
293 | pr_notice("%s: rps evaluation interval not ticking\n" , |
294 | engine->name); |
295 | err = -ENODEV; |
296 | } else { |
297 | ktime_t dt_[5]; |
298 | u32 cycles_[5]; |
299 | int i; |
300 | |
301 | for (i = 0; i < 5; i++) { |
302 | preempt_disable(); |
303 | |
304 | cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); |
305 | dt_[i] = ktime_get(); |
306 | |
307 | udelay(1000); |
308 | |
309 | cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); |
310 | dt_[i] = ktime_sub(ktime_get(), dt_[i]); |
311 | |
312 | preempt_enable(); |
313 | } |
314 | |
315 | /* Use the median of both cycle/dt; close enough */ |
316 | sort(base: cycles_, num: 5, size: sizeof(*cycles_), cmp_func: cmp_u32, NULL); |
317 | cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4; |
318 | sort(base: dt_, num: 5, size: sizeof(*dt_), cmp_func: cmp_u64, NULL); |
319 | dt = div_u64(dividend: dt_[1] + 2 * dt_[2] + dt_[3], divisor: 4); |
320 | } |
321 | |
322 | intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0); |
323 | intel_uncore_forcewake_put(uncore: gt->uncore, domains: FORCEWAKE_ALL); |
324 | |
325 | igt_spinner_end(spin: &spin); |
326 | st_engine_heartbeat_enable(engine); |
327 | |
328 | if (err == 0) { |
329 | u64 time = intel_gt_pm_interval_to_ns(gt, count: cycles); |
330 | u32 expected = |
331 | intel_gt_ns_to_pm_interval(gt, ns: dt); |
332 | |
333 | pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n" , |
334 | engine->name, cycles, time, dt, expected, |
335 | gt->clock_frequency / 1000); |
336 | |
337 | if (10 * time < 8 * dt || |
338 | 8 * time > 10 * dt) { |
339 | pr_err("%s: rps clock time does not match walltime!\n" , |
340 | engine->name); |
341 | err = -EINVAL; |
342 | } |
343 | |
344 | if (10 * expected < 8 * cycles || |
345 | 8 * expected > 10 * cycles) { |
346 | pr_err("%s: walltime does not match rps clock ticks!\n" , |
347 | engine->name); |
348 | err = -EINVAL; |
349 | } |
350 | } |
351 | |
352 | if (igt_flush_test(i915: gt->i915)) |
353 | err = -EIO; |
354 | |
355 | break; /* once is enough */ |
356 | } |
357 | |
358 | intel_rps_enable(rps: >->rps); |
359 | intel_gt_pm_put(gt, handle: wakeref); |
360 | |
361 | igt_spinner_fini(spin: &spin); |
362 | |
363 | intel_gt_pm_wait_for_idle(gt); |
364 | rps->work.func = saved_work; |
365 | |
366 | if (err == -ENODEV) /* skipped, don't report a fail */ |
367 | err = 0; |
368 | |
369 | return err; |
370 | } |
371 | |
372 | int live_rps_control(void *arg) |
373 | { |
374 | struct intel_gt *gt = arg; |
375 | struct intel_rps *rps = >->rps; |
376 | void (*saved_work)(struct work_struct *wrk); |
377 | struct intel_engine_cs *engine; |
378 | enum intel_engine_id id; |
379 | struct igt_spinner spin; |
380 | intel_wakeref_t wakeref; |
381 | int err = 0; |
382 | |
383 | /* |
384 | * Check that the actual frequency matches our requested frequency, |
385 | * to verify our control mechanism. We have to be careful that the |
386 | * PCU may throttle the GPU in which case the actual frequency used |
387 | * will be lowered than requested. |
388 | */ |
389 | |
390 | if (!intel_rps_is_enabled(rps)) |
391 | return 0; |
392 | |
393 | if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */ |
394 | return 0; |
395 | |
396 | if (igt_spinner_init(spin: &spin, gt)) |
397 | return -ENOMEM; |
398 | |
399 | intel_gt_pm_wait_for_idle(gt); |
400 | saved_work = rps->work.func; |
401 | rps->work.func = dummy_rps_work; |
402 | |
403 | wakeref = intel_gt_pm_get(gt); |
404 | for_each_engine(engine, gt, id) { |
405 | struct i915_request *rq; |
406 | ktime_t min_dt, max_dt; |
407 | int f, limit; |
408 | int min, max; |
409 | |
410 | if (!intel_engine_can_store_dword(engine)) |
411 | continue; |
412 | |
413 | st_engine_heartbeat_disable(engine); |
414 | |
415 | rq = igt_spinner_create_request(spin: &spin, |
416 | ce: engine->kernel_context, |
417 | MI_NOOP); |
418 | if (IS_ERR(ptr: rq)) { |
419 | err = PTR_ERR(ptr: rq); |
420 | break; |
421 | } |
422 | |
423 | i915_request_add(rq); |
424 | |
425 | if (!igt_wait_for_spinner(spin: &spin, rq)) { |
426 | pr_err("%s: RPS spinner did not start\n" , |
427 | engine->name); |
428 | igt_spinner_end(spin: &spin); |
429 | st_engine_heartbeat_enable(engine); |
430 | intel_gt_set_wedged(gt: engine->gt); |
431 | err = -EIO; |
432 | break; |
433 | } |
434 | |
435 | if (rps_set_check(rps, freq: rps->min_freq) != rps->min_freq) { |
436 | pr_err("%s: could not set minimum frequency [%x], only %x!\n" , |
437 | engine->name, rps->min_freq, read_cagf(rps)); |
438 | igt_spinner_end(spin: &spin); |
439 | st_engine_heartbeat_enable(engine); |
440 | show_pstate_limits(rps); |
441 | err = -EINVAL; |
442 | break; |
443 | } |
444 | |
445 | for (f = rps->min_freq + 1; f < rps->max_freq; f++) { |
446 | if (rps_set_check(rps, freq: f) < f) |
447 | break; |
448 | } |
449 | |
450 | limit = rps_set_check(rps, freq: f); |
451 | |
452 | if (rps_set_check(rps, freq: rps->min_freq) != rps->min_freq) { |
453 | pr_err("%s: could not restore minimum frequency [%x], only %x!\n" , |
454 | engine->name, rps->min_freq, read_cagf(rps)); |
455 | igt_spinner_end(spin: &spin); |
456 | st_engine_heartbeat_enable(engine); |
457 | show_pstate_limits(rps); |
458 | err = -EINVAL; |
459 | break; |
460 | } |
461 | |
462 | max_dt = ktime_get(); |
463 | max = rps_set_check(rps, freq: limit); |
464 | max_dt = ktime_sub(ktime_get(), max_dt); |
465 | |
466 | min_dt = ktime_get(); |
467 | min = rps_set_check(rps, freq: rps->min_freq); |
468 | min_dt = ktime_sub(ktime_get(), min_dt); |
469 | |
470 | igt_spinner_end(spin: &spin); |
471 | st_engine_heartbeat_enable(engine); |
472 | |
473 | pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n" , |
474 | engine->name, |
475 | rps->min_freq, intel_gpu_freq(rps, rps->min_freq), |
476 | rps->max_freq, intel_gpu_freq(rps, rps->max_freq), |
477 | limit, intel_gpu_freq(rps, limit), |
478 | min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); |
479 | |
480 | if (limit == rps->min_freq) { |
481 | pr_err("%s: GPU throttled to minimum!\n" , |
482 | engine->name); |
483 | show_pstate_limits(rps); |
484 | err = -ENODEV; |
485 | break; |
486 | } |
487 | |
488 | if (igt_flush_test(i915: gt->i915)) { |
489 | err = -EIO; |
490 | break; |
491 | } |
492 | } |
493 | intel_gt_pm_put(gt, handle: wakeref); |
494 | |
495 | igt_spinner_fini(spin: &spin); |
496 | |
497 | intel_gt_pm_wait_for_idle(gt); |
498 | rps->work.func = saved_work; |
499 | |
500 | return err; |
501 | } |
502 | |
503 | static void show_pcu_config(struct intel_rps *rps) |
504 | { |
505 | struct drm_i915_private *i915 = rps_to_i915(rps); |
506 | unsigned int max_gpu_freq, min_gpu_freq; |
507 | intel_wakeref_t wakeref; |
508 | int gpu_freq; |
509 | |
510 | if (!HAS_LLC(i915)) |
511 | return; |
512 | |
513 | min_gpu_freq = rps->min_freq; |
514 | max_gpu_freq = rps->max_freq; |
515 | if (GRAPHICS_VER(i915) >= 9) { |
516 | /* Convert GT frequency to 50 HZ units */ |
517 | min_gpu_freq /= GEN9_FREQ_SCALER; |
518 | max_gpu_freq /= GEN9_FREQ_SCALER; |
519 | } |
520 | |
521 | wakeref = intel_runtime_pm_get(rpm: rps_to_uncore(rps)->rpm); |
522 | |
523 | pr_info("%5s %5s %5s\n" , "GPU" , "eCPU" , "eRing" ); |
524 | for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { |
525 | int ia_freq = gpu_freq; |
526 | |
527 | snb_pcode_read(uncore: rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE, |
528 | val: &ia_freq, NULL); |
529 | |
530 | pr_info("%5d %5d %5d\n" , |
531 | gpu_freq * 50, |
532 | ((ia_freq >> 0) & 0xff) * 100, |
533 | ((ia_freq >> 8) & 0xff) * 100); |
534 | } |
535 | |
536 | intel_runtime_pm_put(rpm: rps_to_uncore(rps)->rpm, wref: wakeref); |
537 | } |
538 | |
539 | static u64 __measure_frequency(u32 *cntr, int duration_ms) |
540 | { |
541 | u64 dc, dt; |
542 | |
543 | dc = READ_ONCE(*cntr); |
544 | dt = ktime_get(); |
545 | usleep_range(min: 1000 * duration_ms, max: 2000 * duration_ms); |
546 | dc = READ_ONCE(*cntr) - dc; |
547 | dt = ktime_get() - dt; |
548 | |
549 | return div64_u64(dividend: 1000 * 1000 * dc, divisor: dt); |
550 | } |
551 | |
552 | static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq) |
553 | { |
554 | u64 x[5]; |
555 | int i; |
556 | |
557 | *freq = rps_set_check(rps, freq: *freq); |
558 | for (i = 0; i < 5; i++) |
559 | x[i] = __measure_frequency(cntr, duration_ms: 2); |
560 | *freq = (*freq + read_cagf(rps)) / 2; |
561 | |
562 | /* A simple triangle filter for better result stability */ |
563 | sort(base: x, num: 5, size: sizeof(*x), cmp_func: cmp_u64, NULL); |
564 | return div_u64(dividend: x[1] + 2 * x[2] + x[3], divisor: 4); |
565 | } |
566 | |
567 | static u64 __measure_cs_frequency(struct intel_engine_cs *engine, |
568 | int duration_ms) |
569 | { |
570 | u64 dc, dt; |
571 | |
572 | dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)); |
573 | dt = ktime_get(); |
574 | usleep_range(min: 1000 * duration_ms, max: 2000 * duration_ms); |
575 | dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc; |
576 | dt = ktime_get() - dt; |
577 | |
578 | return div64_u64(dividend: 1000 * 1000 * dc, divisor: dt); |
579 | } |
580 | |
581 | static u64 measure_cs_frequency_at(struct intel_rps *rps, |
582 | struct intel_engine_cs *engine, |
583 | int *freq) |
584 | { |
585 | u64 x[5]; |
586 | int i; |
587 | |
588 | *freq = rps_set_check(rps, freq: *freq); |
589 | for (i = 0; i < 5; i++) |
590 | x[i] = __measure_cs_frequency(engine, duration_ms: 2); |
591 | *freq = (*freq + read_cagf(rps)) / 2; |
592 | |
593 | /* A simple triangle filter for better result stability */ |
594 | sort(base: x, num: 5, size: sizeof(*x), cmp_func: cmp_u64, NULL); |
595 | return div_u64(dividend: x[1] + 2 * x[2] + x[3], divisor: 4); |
596 | } |
597 | |
598 | static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d) |
599 | { |
600 | return f_d * x > f_n * y && f_n * x < f_d * y; |
601 | } |
602 | |
603 | int live_rps_frequency_cs(void *arg) |
604 | { |
605 | void (*saved_work)(struct work_struct *wrk); |
606 | struct intel_gt *gt = arg; |
607 | struct intel_rps *rps = >->rps; |
608 | struct intel_engine_cs *engine; |
609 | struct pm_qos_request qos; |
610 | enum intel_engine_id id; |
611 | int err = 0; |
612 | |
613 | /* |
614 | * The premise is that the GPU does change frequency at our behest. |
615 | * Let's check there is a correspondence between the requested |
616 | * frequency, the actual frequency, and the observed clock rate. |
617 | */ |
618 | |
619 | if (!intel_rps_is_enabled(rps)) |
620 | return 0; |
621 | |
622 | if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ |
623 | return 0; |
624 | |
625 | if (CPU_LATENCY >= 0) |
626 | cpu_latency_qos_add_request(req: &qos, CPU_LATENCY); |
627 | |
628 | intel_gt_pm_wait_for_idle(gt); |
629 | saved_work = rps->work.func; |
630 | rps->work.func = dummy_rps_work; |
631 | |
632 | for_each_engine(engine, gt, id) { |
633 | struct i915_request *rq; |
634 | struct i915_vma *vma; |
635 | u32 *cancel, *cntr; |
636 | struct { |
637 | u64 count; |
638 | int freq; |
639 | } min, max; |
640 | |
641 | st_engine_heartbeat_disable(engine); |
642 | |
643 | vma = create_spin_counter(engine, |
644 | vm: engine->kernel_context->vm, srm: false, |
645 | cancel: &cancel, counter: &cntr); |
646 | if (IS_ERR(ptr: vma)) { |
647 | err = PTR_ERR(ptr: vma); |
648 | st_engine_heartbeat_enable(engine); |
649 | break; |
650 | } |
651 | |
652 | rq = intel_engine_create_kernel_request(engine); |
653 | if (IS_ERR(ptr: rq)) { |
654 | err = PTR_ERR(ptr: rq); |
655 | goto err_vma; |
656 | } |
657 | |
658 | err = i915_vma_move_to_active(vma, rq, flags: 0); |
659 | if (!err) |
660 | err = rq->engine->emit_bb_start(rq, |
661 | i915_vma_offset(vma), |
662 | PAGE_SIZE, 0); |
663 | i915_request_add(rq); |
664 | if (err) |
665 | goto err_vma; |
666 | |
667 | if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)), |
668 | 10)) { |
669 | pr_err("%s: timed loop did not start\n" , |
670 | engine->name); |
671 | goto err_vma; |
672 | } |
673 | |
674 | min.freq = rps->min_freq; |
675 | min.count = measure_cs_frequency_at(rps, engine, freq: &min.freq); |
676 | |
677 | max.freq = rps->max_freq; |
678 | max.count = measure_cs_frequency_at(rps, engine, freq: &max.freq); |
679 | |
680 | pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n" , |
681 | engine->name, |
682 | min.count, intel_gpu_freq(rps, min.freq), |
683 | max.count, intel_gpu_freq(rps, max.freq), |
684 | (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, |
685 | max.freq * min.count)); |
686 | |
687 | if (!scaled_within(x: max.freq * min.count, |
688 | y: min.freq * max.count, |
689 | f_n: 2, f_d: 3)) { |
690 | int f; |
691 | |
692 | pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n" , |
693 | engine->name, |
694 | max.freq * min.count, |
695 | min.freq * max.count); |
696 | show_pcu_config(rps); |
697 | |
698 | for (f = min.freq + 1; f <= rps->max_freq; f++) { |
699 | int act = f; |
700 | u64 count; |
701 | |
702 | count = measure_cs_frequency_at(rps, engine, freq: &act); |
703 | if (act < f) |
704 | break; |
705 | |
706 | pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n" , |
707 | engine->name, |
708 | act, intel_gpu_freq(rps, act), count, |
709 | (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, |
710 | act * min.count)); |
711 | |
712 | f = act; /* may skip ahead [pcu granularity] */ |
713 | } |
714 | |
715 | err = -EINTR; /* ignore error, continue on with test */ |
716 | } |
717 | |
718 | err_vma: |
719 | *cancel = MI_BATCH_BUFFER_END; |
720 | i915_gem_object_flush_map(obj: vma->obj); |
721 | i915_gem_object_unpin_map(obj: vma->obj); |
722 | i915_vma_unpin(vma); |
723 | i915_vma_unlock(vma); |
724 | i915_vma_put(vma); |
725 | |
726 | st_engine_heartbeat_enable(engine); |
727 | if (igt_flush_test(i915: gt->i915)) |
728 | err = -EIO; |
729 | if (err) |
730 | break; |
731 | } |
732 | |
733 | intel_gt_pm_wait_for_idle(gt); |
734 | rps->work.func = saved_work; |
735 | |
736 | if (CPU_LATENCY >= 0) |
737 | cpu_latency_qos_remove_request(req: &qos); |
738 | |
739 | return err; |
740 | } |
741 | |
742 | int live_rps_frequency_srm(void *arg) |
743 | { |
744 | void (*saved_work)(struct work_struct *wrk); |
745 | struct intel_gt *gt = arg; |
746 | struct intel_rps *rps = >->rps; |
747 | struct intel_engine_cs *engine; |
748 | struct pm_qos_request qos; |
749 | enum intel_engine_id id; |
750 | int err = 0; |
751 | |
752 | /* |
753 | * The premise is that the GPU does change frequency at our behest. |
754 | * Let's check there is a correspondence between the requested |
755 | * frequency, the actual frequency, and the observed clock rate. |
756 | */ |
757 | |
758 | if (!intel_rps_is_enabled(rps)) |
759 | return 0; |
760 | |
761 | if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ |
762 | return 0; |
763 | |
764 | if (CPU_LATENCY >= 0) |
765 | cpu_latency_qos_add_request(req: &qos, CPU_LATENCY); |
766 | |
767 | intel_gt_pm_wait_for_idle(gt); |
768 | saved_work = rps->work.func; |
769 | rps->work.func = dummy_rps_work; |
770 | |
771 | for_each_engine(engine, gt, id) { |
772 | struct i915_request *rq; |
773 | struct i915_vma *vma; |
774 | u32 *cancel, *cntr; |
775 | struct { |
776 | u64 count; |
777 | int freq; |
778 | } min, max; |
779 | |
780 | st_engine_heartbeat_disable(engine); |
781 | |
782 | vma = create_spin_counter(engine, |
783 | vm: engine->kernel_context->vm, srm: true, |
784 | cancel: &cancel, counter: &cntr); |
785 | if (IS_ERR(ptr: vma)) { |
786 | err = PTR_ERR(ptr: vma); |
787 | st_engine_heartbeat_enable(engine); |
788 | break; |
789 | } |
790 | |
791 | rq = intel_engine_create_kernel_request(engine); |
792 | if (IS_ERR(ptr: rq)) { |
793 | err = PTR_ERR(ptr: rq); |
794 | goto err_vma; |
795 | } |
796 | |
797 | err = i915_vma_move_to_active(vma, rq, flags: 0); |
798 | if (!err) |
799 | err = rq->engine->emit_bb_start(rq, |
800 | i915_vma_offset(vma), |
801 | PAGE_SIZE, 0); |
802 | i915_request_add(rq); |
803 | if (err) |
804 | goto err_vma; |
805 | |
806 | if (wait_for(READ_ONCE(*cntr), 10)) { |
807 | pr_err("%s: timed loop did not start\n" , |
808 | engine->name); |
809 | goto err_vma; |
810 | } |
811 | |
812 | min.freq = rps->min_freq; |
813 | min.count = measure_frequency_at(rps, cntr, freq: &min.freq); |
814 | |
815 | max.freq = rps->max_freq; |
816 | max.count = measure_frequency_at(rps, cntr, freq: &max.freq); |
817 | |
818 | pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n" , |
819 | engine->name, |
820 | min.count, intel_gpu_freq(rps, min.freq), |
821 | max.count, intel_gpu_freq(rps, max.freq), |
822 | (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, |
823 | max.freq * min.count)); |
824 | |
825 | if (!scaled_within(x: max.freq * min.count, |
826 | y: min.freq * max.count, |
827 | f_n: 1, f_d: 2)) { |
828 | int f; |
829 | |
830 | pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n" , |
831 | engine->name, |
832 | max.freq * min.count, |
833 | min.freq * max.count); |
834 | show_pcu_config(rps); |
835 | |
836 | for (f = min.freq + 1; f <= rps->max_freq; f++) { |
837 | int act = f; |
838 | u64 count; |
839 | |
840 | count = measure_frequency_at(rps, cntr, freq: &act); |
841 | if (act < f) |
842 | break; |
843 | |
844 | pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n" , |
845 | engine->name, |
846 | act, intel_gpu_freq(rps, act), count, |
847 | (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, |
848 | act * min.count)); |
849 | |
850 | f = act; /* may skip ahead [pcu granularity] */ |
851 | } |
852 | |
853 | err = -EINTR; /* ignore error, continue on with test */ |
854 | } |
855 | |
856 | err_vma: |
857 | *cancel = MI_BATCH_BUFFER_END; |
858 | i915_gem_object_flush_map(obj: vma->obj); |
859 | i915_gem_object_unpin_map(obj: vma->obj); |
860 | i915_vma_unpin(vma); |
861 | i915_vma_unlock(vma); |
862 | i915_vma_put(vma); |
863 | |
864 | st_engine_heartbeat_enable(engine); |
865 | if (igt_flush_test(i915: gt->i915)) |
866 | err = -EIO; |
867 | if (err) |
868 | break; |
869 | } |
870 | |
871 | intel_gt_pm_wait_for_idle(gt); |
872 | rps->work.func = saved_work; |
873 | |
874 | if (CPU_LATENCY >= 0) |
875 | cpu_latency_qos_remove_request(req: &qos); |
876 | |
877 | return err; |
878 | } |
879 | |
880 | static void sleep_for_ei(struct intel_rps *rps, int timeout_us) |
881 | { |
882 | /* Flush any previous EI */ |
883 | usleep_range(min: timeout_us, max: 2 * timeout_us); |
884 | |
885 | /* Reset the interrupt status */ |
886 | rps_disable_interrupts(rps); |
887 | GEM_BUG_ON(rps->pm_iir); |
888 | rps_enable_interrupts(rps); |
889 | |
890 | /* And then wait for the timeout, for real this time */ |
891 | usleep_range(min: 2 * timeout_us, max: 3 * timeout_us); |
892 | } |
893 | |
894 | static int __rps_up_interrupt(struct intel_rps *rps, |
895 | struct intel_engine_cs *engine, |
896 | struct igt_spinner *spin) |
897 | { |
898 | struct intel_uncore *uncore = engine->uncore; |
899 | struct i915_request *rq; |
900 | u32 timeout; |
901 | |
902 | if (!intel_engine_can_store_dword(engine)) |
903 | return 0; |
904 | |
905 | rps_set_check(rps, freq: rps->min_freq); |
906 | |
907 | rq = igt_spinner_create_request(spin, ce: engine->kernel_context, MI_NOOP); |
908 | if (IS_ERR(ptr: rq)) |
909 | return PTR_ERR(ptr: rq); |
910 | |
911 | i915_request_get(rq); |
912 | i915_request_add(rq); |
913 | |
914 | if (!igt_wait_for_spinner(spin, rq)) { |
915 | pr_err("%s: RPS spinner did not start\n" , |
916 | engine->name); |
917 | i915_request_put(rq); |
918 | intel_gt_set_wedged(gt: engine->gt); |
919 | return -EIO; |
920 | } |
921 | |
922 | if (!intel_rps_is_active(rps)) { |
923 | pr_err("%s: RPS not enabled on starting spinner\n" , |
924 | engine->name); |
925 | igt_spinner_end(spin); |
926 | i915_request_put(rq); |
927 | return -EINVAL; |
928 | } |
929 | |
930 | if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) { |
931 | pr_err("%s: RPS did not register UP interrupt\n" , |
932 | engine->name); |
933 | i915_request_put(rq); |
934 | return -EINVAL; |
935 | } |
936 | |
937 | if (rps->last_freq != rps->min_freq) { |
938 | pr_err("%s: RPS did not program min frequency\n" , |
939 | engine->name); |
940 | i915_request_put(rq); |
941 | return -EINVAL; |
942 | } |
943 | |
944 | timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI); |
945 | timeout = intel_gt_pm_interval_to_ns(gt: engine->gt, count: timeout); |
946 | timeout = DIV_ROUND_UP(timeout, 1000); |
947 | |
948 | sleep_for_ei(rps, timeout_us: timeout); |
949 | GEM_BUG_ON(i915_request_completed(rq)); |
950 | |
951 | igt_spinner_end(spin); |
952 | i915_request_put(rq); |
953 | |
954 | if (rps->cur_freq != rps->min_freq) { |
955 | pr_err("%s: Frequency unexpectedly changed [up], now %d!\n" , |
956 | engine->name, intel_rps_read_actual_frequency(rps)); |
957 | return -EINVAL; |
958 | } |
959 | |
960 | if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) { |
961 | pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n" , |
962 | engine->name, rps->pm_iir, |
963 | intel_uncore_read(uncore, GEN6_RP_PREV_UP), |
964 | intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), |
965 | intel_uncore_read(uncore, GEN6_RP_UP_EI)); |
966 | return -EINVAL; |
967 | } |
968 | |
969 | return 0; |
970 | } |
971 | |
972 | static int __rps_down_interrupt(struct intel_rps *rps, |
973 | struct intel_engine_cs *engine) |
974 | { |
975 | struct intel_uncore *uncore = engine->uncore; |
976 | u32 timeout; |
977 | |
978 | rps_set_check(rps, freq: rps->max_freq); |
979 | |
980 | if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) { |
981 | pr_err("%s: RPS did not register DOWN interrupt\n" , |
982 | engine->name); |
983 | return -EINVAL; |
984 | } |
985 | |
986 | if (rps->last_freq != rps->max_freq) { |
987 | pr_err("%s: RPS did not program max frequency\n" , |
988 | engine->name); |
989 | return -EINVAL; |
990 | } |
991 | |
992 | timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); |
993 | timeout = intel_gt_pm_interval_to_ns(gt: engine->gt, count: timeout); |
994 | timeout = DIV_ROUND_UP(timeout, 1000); |
995 | |
996 | sleep_for_ei(rps, timeout_us: timeout); |
997 | |
998 | if (rps->cur_freq != rps->max_freq) { |
999 | pr_err("%s: Frequency unexpectedly changed [down], now %d!\n" , |
1000 | engine->name, |
1001 | intel_rps_read_actual_frequency(rps)); |
1002 | return -EINVAL; |
1003 | } |
1004 | |
1005 | if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) { |
1006 | pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n" , |
1007 | engine->name, rps->pm_iir, |
1008 | intel_uncore_read(uncore, GEN6_RP_PREV_DOWN), |
1009 | intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD), |
1010 | intel_uncore_read(uncore, GEN6_RP_DOWN_EI), |
1011 | intel_uncore_read(uncore, GEN6_RP_PREV_UP), |
1012 | intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), |
1013 | intel_uncore_read(uncore, GEN6_RP_UP_EI)); |
1014 | return -EINVAL; |
1015 | } |
1016 | |
1017 | return 0; |
1018 | } |
1019 | |
1020 | int live_rps_interrupt(void *arg) |
1021 | { |
1022 | struct intel_gt *gt = arg; |
1023 | struct intel_rps *rps = >->rps; |
1024 | void (*saved_work)(struct work_struct *wrk); |
1025 | struct intel_engine_cs *engine; |
1026 | enum intel_engine_id id; |
1027 | struct igt_spinner spin; |
1028 | intel_wakeref_t wakeref; |
1029 | u32 pm_events; |
1030 | int err = 0; |
1031 | |
1032 | /* |
1033 | * First, let's check whether or not we are receiving interrupts. |
1034 | */ |
1035 | |
1036 | if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6) |
1037 | return 0; |
1038 | |
1039 | pm_events = 0; |
1040 | with_intel_gt_pm(gt, wakeref) |
1041 | pm_events = rps->pm_events; |
1042 | if (!pm_events) { |
1043 | pr_err("No RPS PM events registered, but RPS is enabled?\n" ); |
1044 | return -ENODEV; |
1045 | } |
1046 | |
1047 | if (igt_spinner_init(spin: &spin, gt)) |
1048 | return -ENOMEM; |
1049 | |
1050 | intel_gt_pm_wait_for_idle(gt); |
1051 | saved_work = rps->work.func; |
1052 | rps->work.func = dummy_rps_work; |
1053 | |
1054 | for_each_engine(engine, gt, id) { |
1055 | /* Keep the engine busy with a spinner; expect an UP! */ |
1056 | if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { |
1057 | intel_gt_pm_wait_for_idle(gt: engine->gt); |
1058 | GEM_BUG_ON(intel_rps_is_active(rps)); |
1059 | |
1060 | st_engine_heartbeat_disable(engine); |
1061 | |
1062 | err = __rps_up_interrupt(rps, engine, spin: &spin); |
1063 | |
1064 | st_engine_heartbeat_enable(engine); |
1065 | if (err) |
1066 | goto out; |
1067 | |
1068 | intel_gt_pm_wait_for_idle(gt: engine->gt); |
1069 | } |
1070 | |
1071 | /* Keep the engine awake but idle and check for DOWN */ |
1072 | if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { |
1073 | st_engine_heartbeat_disable(engine); |
1074 | intel_rc6_disable(rc6: >->rc6); |
1075 | |
1076 | err = __rps_down_interrupt(rps, engine); |
1077 | |
1078 | intel_rc6_enable(rc6: >->rc6); |
1079 | st_engine_heartbeat_enable(engine); |
1080 | if (err) |
1081 | goto out; |
1082 | } |
1083 | } |
1084 | |
1085 | out: |
1086 | if (igt_flush_test(i915: gt->i915)) |
1087 | err = -EIO; |
1088 | |
1089 | igt_spinner_fini(spin: &spin); |
1090 | |
1091 | intel_gt_pm_wait_for_idle(gt); |
1092 | rps->work.func = saved_work; |
1093 | |
1094 | return err; |
1095 | } |
1096 | |
1097 | static u64 __measure_power(int duration_ms) |
1098 | { |
1099 | u64 dE, dt; |
1100 | |
1101 | dE = librapl_energy_uJ(); |
1102 | dt = ktime_get(); |
1103 | usleep_range(min: 1000 * duration_ms, max: 2000 * duration_ms); |
1104 | dE = librapl_energy_uJ() - dE; |
1105 | dt = ktime_get() - dt; |
1106 | |
1107 | return div64_u64(dividend: 1000 * 1000 * dE, divisor: dt); |
1108 | } |
1109 | |
1110 | static u64 measure_power(struct intel_rps *rps, int *freq) |
1111 | { |
1112 | u64 x[5]; |
1113 | int i; |
1114 | |
1115 | for (i = 0; i < 5; i++) |
1116 | x[i] = __measure_power(duration_ms: 5); |
1117 | |
1118 | *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2; |
1119 | |
1120 | /* A simple triangle filter for better result stability */ |
1121 | sort(base: x, num: 5, size: sizeof(*x), cmp_func: cmp_u64, NULL); |
1122 | return div_u64(dividend: x[1] + 2 * x[2] + x[3], divisor: 4); |
1123 | } |
1124 | |
1125 | static u64 measure_power_at(struct intel_rps *rps, int *freq) |
1126 | { |
1127 | *freq = rps_set_check(rps, freq: *freq); |
1128 | return measure_power(rps, freq); |
1129 | } |
1130 | |
1131 | int live_rps_power(void *arg) |
1132 | { |
1133 | struct intel_gt *gt = arg; |
1134 | struct intel_rps *rps = >->rps; |
1135 | void (*saved_work)(struct work_struct *wrk); |
1136 | struct intel_engine_cs *engine; |
1137 | enum intel_engine_id id; |
1138 | struct igt_spinner spin; |
1139 | int err = 0; |
1140 | |
1141 | /* |
1142 | * Our fundamental assumption is that running at lower frequency |
1143 | * actually saves power. Let's see if our RAPL measurement support |
1144 | * that theory. |
1145 | */ |
1146 | |
1147 | if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) |
1148 | return 0; |
1149 | |
1150 | if (!librapl_supported(i915: gt->i915)) |
1151 | return 0; |
1152 | |
1153 | if (igt_spinner_init(spin: &spin, gt)) |
1154 | return -ENOMEM; |
1155 | |
1156 | intel_gt_pm_wait_for_idle(gt); |
1157 | saved_work = rps->work.func; |
1158 | rps->work.func = dummy_rps_work; |
1159 | |
1160 | for_each_engine(engine, gt, id) { |
1161 | struct i915_request *rq; |
1162 | struct { |
1163 | u64 power; |
1164 | int freq; |
1165 | } min, max; |
1166 | |
1167 | if (!intel_engine_can_store_dword(engine)) |
1168 | continue; |
1169 | |
1170 | st_engine_heartbeat_disable(engine); |
1171 | |
1172 | rq = igt_spinner_create_request(spin: &spin, |
1173 | ce: engine->kernel_context, |
1174 | MI_NOOP); |
1175 | if (IS_ERR(ptr: rq)) { |
1176 | st_engine_heartbeat_enable(engine); |
1177 | err = PTR_ERR(ptr: rq); |
1178 | break; |
1179 | } |
1180 | |
1181 | i915_request_add(rq); |
1182 | |
1183 | if (!igt_wait_for_spinner(spin: &spin, rq)) { |
1184 | pr_err("%s: RPS spinner did not start\n" , |
1185 | engine->name); |
1186 | igt_spinner_end(spin: &spin); |
1187 | st_engine_heartbeat_enable(engine); |
1188 | intel_gt_set_wedged(gt: engine->gt); |
1189 | err = -EIO; |
1190 | break; |
1191 | } |
1192 | |
1193 | max.freq = rps->max_freq; |
1194 | max.power = measure_power_at(rps, freq: &max.freq); |
1195 | |
1196 | min.freq = rps->min_freq; |
1197 | min.power = measure_power_at(rps, freq: &min.freq); |
1198 | |
1199 | igt_spinner_end(spin: &spin); |
1200 | st_engine_heartbeat_enable(engine); |
1201 | |
1202 | pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n" , |
1203 | engine->name, |
1204 | min.power, intel_gpu_freq(rps, min.freq), |
1205 | max.power, intel_gpu_freq(rps, max.freq)); |
1206 | |
1207 | if (10 * min.freq >= 9 * max.freq) { |
1208 | pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n" , |
1209 | min.freq, intel_gpu_freq(rps, min.freq), |
1210 | max.freq, intel_gpu_freq(rps, max.freq)); |
1211 | continue; |
1212 | } |
1213 | |
1214 | if (11 * min.power > 10 * max.power) { |
1215 | pr_err("%s: did not conserve power when setting lower frequency!\n" , |
1216 | engine->name); |
1217 | err = -EINVAL; |
1218 | break; |
1219 | } |
1220 | |
1221 | if (igt_flush_test(i915: gt->i915)) { |
1222 | err = -EIO; |
1223 | break; |
1224 | } |
1225 | } |
1226 | |
1227 | igt_spinner_fini(spin: &spin); |
1228 | |
1229 | intel_gt_pm_wait_for_idle(gt); |
1230 | rps->work.func = saved_work; |
1231 | |
1232 | return err; |
1233 | } |
1234 | |
1235 | int live_rps_dynamic(void *arg) |
1236 | { |
1237 | struct intel_gt *gt = arg; |
1238 | struct intel_rps *rps = >->rps; |
1239 | struct intel_engine_cs *engine; |
1240 | enum intel_engine_id id; |
1241 | struct igt_spinner spin; |
1242 | int err = 0; |
1243 | |
1244 | /* |
1245 | * We've looked at the bascs, and have established that we |
1246 | * can change the clock frequency and that the HW will generate |
1247 | * interrupts based on load. Now we check how we integrate those |
1248 | * moving parts into dynamic reclocking based on load. |
1249 | */ |
1250 | |
1251 | if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) |
1252 | return 0; |
1253 | |
1254 | if (igt_spinner_init(spin: &spin, gt)) |
1255 | return -ENOMEM; |
1256 | |
1257 | if (intel_rps_has_interrupts(rps)) |
1258 | pr_info("RPS has interrupt support\n" ); |
1259 | if (intel_rps_uses_timer(rps)) |
1260 | pr_info("RPS has timer support\n" ); |
1261 | |
1262 | for_each_engine(engine, gt, id) { |
1263 | struct i915_request *rq; |
1264 | struct { |
1265 | ktime_t dt; |
1266 | u8 freq; |
1267 | } min, max; |
1268 | |
1269 | if (!intel_engine_can_store_dword(engine)) |
1270 | continue; |
1271 | |
1272 | intel_gt_pm_wait_for_idle(gt); |
1273 | GEM_BUG_ON(intel_rps_is_active(rps)); |
1274 | rps->cur_freq = rps->min_freq; |
1275 | |
1276 | intel_engine_pm_get(engine); |
1277 | intel_rc6_disable(rc6: >->rc6); |
1278 | GEM_BUG_ON(rps->last_freq != rps->min_freq); |
1279 | |
1280 | rq = igt_spinner_create_request(spin: &spin, |
1281 | ce: engine->kernel_context, |
1282 | MI_NOOP); |
1283 | if (IS_ERR(ptr: rq)) { |
1284 | err = PTR_ERR(ptr: rq); |
1285 | goto err; |
1286 | } |
1287 | |
1288 | i915_request_add(rq); |
1289 | |
1290 | max.dt = ktime_get(); |
1291 | max.freq = wait_for_freq(rps, freq: rps->max_freq, timeout_ms: 500); |
1292 | max.dt = ktime_sub(ktime_get(), max.dt); |
1293 | |
1294 | igt_spinner_end(spin: &spin); |
1295 | |
1296 | min.dt = ktime_get(); |
1297 | min.freq = wait_for_freq(rps, freq: rps->min_freq, timeout_ms: 2000); |
1298 | min.dt = ktime_sub(ktime_get(), min.dt); |
1299 | |
1300 | pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n" , |
1301 | engine->name, |
1302 | max.freq, intel_gpu_freq(rps, max.freq), |
1303 | ktime_to_ns(max.dt), |
1304 | min.freq, intel_gpu_freq(rps, min.freq), |
1305 | ktime_to_ns(min.dt)); |
1306 | if (min.freq >= max.freq) { |
1307 | pr_err("%s: dynamic reclocking of spinner failed\n!" , |
1308 | engine->name); |
1309 | err = -EINVAL; |
1310 | } |
1311 | |
1312 | err: |
1313 | intel_rc6_enable(rc6: >->rc6); |
1314 | intel_engine_pm_put(engine); |
1315 | |
1316 | if (igt_flush_test(i915: gt->i915)) |
1317 | err = -EIO; |
1318 | if (err) |
1319 | break; |
1320 | } |
1321 | |
1322 | igt_spinner_fini(spin: &spin); |
1323 | |
1324 | return err; |
1325 | } |
1326 | |