1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2019 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/string_helpers.h> |
7 | |
8 | #include <drm/i915_drm.h> |
9 | |
10 | #include "display/intel_display.h" |
11 | #include "display/intel_display_irq.h" |
12 | #include "i915_drv.h" |
13 | #include "i915_irq.h" |
14 | #include "i915_reg.h" |
15 | #include "intel_breadcrumbs.h" |
16 | #include "intel_gt.h" |
17 | #include "intel_gt_clock_utils.h" |
18 | #include "intel_gt_irq.h" |
19 | #include "intel_gt_pm.h" |
20 | #include "intel_gt_pm_irq.h" |
21 | #include "intel_gt_print.h" |
22 | #include "intel_gt_regs.h" |
23 | #include "intel_mchbar_regs.h" |
24 | #include "intel_pcode.h" |
25 | #include "intel_rps.h" |
26 | #include "vlv_sideband.h" |
27 | #include "../../../platform/x86/intel_ips.h" |
28 | |
29 | #define BUSY_MAX_EI 20u /* ms */ |
30 | |
31 | /* |
32 | * Lock protecting IPS related data structures |
33 | */ |
34 | static DEFINE_SPINLOCK(mchdev_lock); |
35 | |
36 | static struct intel_gt *rps_to_gt(struct intel_rps *rps) |
37 | { |
38 | return container_of(rps, struct intel_gt, rps); |
39 | } |
40 | |
41 | static struct drm_i915_private *rps_to_i915(struct intel_rps *rps) |
42 | { |
43 | return rps_to_gt(rps)->i915; |
44 | } |
45 | |
46 | static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) |
47 | { |
48 | return rps_to_gt(rps)->uncore; |
49 | } |
50 | |
51 | static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps) |
52 | { |
53 | struct intel_gt *gt = rps_to_gt(rps); |
54 | |
55 | return >->uc.guc.slpc; |
56 | } |
57 | |
58 | static bool rps_uses_slpc(struct intel_rps *rps) |
59 | { |
60 | struct intel_gt *gt = rps_to_gt(rps); |
61 | |
62 | return intel_uc_uses_guc_slpc(uc: >->uc); |
63 | } |
64 | |
65 | static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) |
66 | { |
67 | return mask & ~rps->pm_intrmsk_mbz; |
68 | } |
69 | |
70 | static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) |
71 | { |
72 | intel_uncore_write_fw(uncore, reg, val); |
73 | } |
74 | |
75 | static void rps_timer(struct timer_list *t) |
76 | { |
77 | struct intel_rps *rps = from_timer(rps, t, timer); |
78 | struct intel_gt *gt = rps_to_gt(rps); |
79 | struct intel_engine_cs *engine; |
80 | ktime_t dt, last, timestamp; |
81 | enum intel_engine_id id; |
82 | s64 max_busy[3] = {}; |
83 | |
84 | timestamp = 0; |
85 | for_each_engine(engine, gt, id) { |
86 | s64 busy; |
87 | int i; |
88 | |
89 | dt = intel_engine_get_busy_time(engine, now: ×tamp); |
90 | last = engine->stats.rps; |
91 | engine->stats.rps = dt; |
92 | |
93 | busy = ktime_to_ns(ktime_sub(dt, last)); |
94 | for (i = 0; i < ARRAY_SIZE(max_busy); i++) { |
95 | if (busy > max_busy[i]) |
96 | swap(busy, max_busy[i]); |
97 | } |
98 | } |
99 | last = rps->pm_timestamp; |
100 | rps->pm_timestamp = timestamp; |
101 | |
102 | if (intel_rps_is_active(rps)) { |
103 | s64 busy; |
104 | int i; |
105 | |
106 | dt = ktime_sub(timestamp, last); |
107 | |
108 | /* |
109 | * Our goal is to evaluate each engine independently, so we run |
110 | * at the lowest clocks required to sustain the heaviest |
111 | * workload. However, a task may be split into sequential |
112 | * dependent operations across a set of engines, such that |
113 | * the independent contributions do not account for high load, |
114 | * but overall the task is GPU bound. For example, consider |
115 | * video decode on vcs followed by colour post-processing |
116 | * on vecs, followed by general post-processing on rcs. |
117 | * Since multi-engines being active does imply a single |
118 | * continuous workload across all engines, we hedge our |
119 | * bets by only contributing a factor of the distributed |
120 | * load into our busyness calculation. |
121 | */ |
122 | busy = max_busy[0]; |
123 | for (i = 1; i < ARRAY_SIZE(max_busy); i++) { |
124 | if (!max_busy[i]) |
125 | break; |
126 | |
127 | busy += div_u64(dividend: max_busy[i], divisor: 1 << i); |
128 | } |
129 | GT_TRACE(gt, |
130 | "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n" , |
131 | busy, (int)div64_u64(100 * busy, dt), |
132 | max_busy[0], max_busy[1], max_busy[2], |
133 | rps->pm_interval); |
134 | |
135 | if (100 * busy > rps->power.up_threshold * dt && |
136 | rps->cur_freq < rps->max_freq_softlimit) { |
137 | rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; |
138 | rps->pm_interval = 1; |
139 | queue_work(wq: gt->i915->unordered_wq, work: &rps->work); |
140 | } else if (100 * busy < rps->power.down_threshold * dt && |
141 | rps->cur_freq > rps->min_freq_softlimit) { |
142 | rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; |
143 | rps->pm_interval = 1; |
144 | queue_work(wq: gt->i915->unordered_wq, work: &rps->work); |
145 | } else { |
146 | rps->last_adj = 0; |
147 | } |
148 | |
149 | mod_timer(timer: &rps->timer, |
150 | expires: jiffies + msecs_to_jiffies(m: rps->pm_interval)); |
151 | rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI); |
152 | } |
153 | } |
154 | |
155 | static void rps_start_timer(struct intel_rps *rps) |
156 | { |
157 | rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); |
158 | rps->pm_interval = 1; |
159 | mod_timer(timer: &rps->timer, expires: jiffies + 1); |
160 | } |
161 | |
162 | static void rps_stop_timer(struct intel_rps *rps) |
163 | { |
164 | del_timer_sync(timer: &rps->timer); |
165 | rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); |
166 | cancel_work_sync(work: &rps->work); |
167 | } |
168 | |
169 | static u32 rps_pm_mask(struct intel_rps *rps, u8 val) |
170 | { |
171 | u32 mask = 0; |
172 | |
173 | /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */ |
174 | if (val > rps->min_freq_softlimit) |
175 | mask |= (GEN6_PM_RP_UP_EI_EXPIRED | |
176 | GEN6_PM_RP_DOWN_THRESHOLD | |
177 | GEN6_PM_RP_DOWN_TIMEOUT); |
178 | |
179 | if (val < rps->max_freq_softlimit) |
180 | mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; |
181 | |
182 | mask &= rps->pm_events; |
183 | |
184 | return rps_pm_sanitize_mask(rps, mask: ~mask); |
185 | } |
186 | |
187 | static void rps_reset_ei(struct intel_rps *rps) |
188 | { |
189 | memset(&rps->ei, 0, sizeof(rps->ei)); |
190 | } |
191 | |
192 | static void rps_enable_interrupts(struct intel_rps *rps) |
193 | { |
194 | struct intel_gt *gt = rps_to_gt(rps); |
195 | |
196 | GEM_BUG_ON(rps_uses_slpc(rps)); |
197 | |
198 | GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n" , |
199 | rps->pm_events, rps_pm_mask(rps, rps->last_freq)); |
200 | |
201 | rps_reset_ei(rps); |
202 | |
203 | spin_lock_irq(lock: gt->irq_lock); |
204 | gen6_gt_pm_enable_irq(gt, enable_mask: rps->pm_events); |
205 | spin_unlock_irq(lock: gt->irq_lock); |
206 | |
207 | intel_uncore_write(uncore: gt->uncore, |
208 | GEN6_PMINTRMSK, val: rps_pm_mask(rps, val: rps->last_freq)); |
209 | } |
210 | |
211 | static void gen6_rps_reset_interrupts(struct intel_rps *rps) |
212 | { |
213 | gen6_gt_pm_reset_iir(gt: rps_to_gt(rps), GEN6_PM_RPS_EVENTS); |
214 | } |
215 | |
216 | static void gen11_rps_reset_interrupts(struct intel_rps *rps) |
217 | { |
218 | while (gen11_gt_reset_one_iir(gt: rps_to_gt(rps), bank: 0, GEN11_GTPM)) |
219 | ; |
220 | } |
221 | |
222 | static void rps_reset_interrupts(struct intel_rps *rps) |
223 | { |
224 | struct intel_gt *gt = rps_to_gt(rps); |
225 | |
226 | spin_lock_irq(lock: gt->irq_lock); |
227 | if (GRAPHICS_VER(gt->i915) >= 11) |
228 | gen11_rps_reset_interrupts(rps); |
229 | else |
230 | gen6_rps_reset_interrupts(rps); |
231 | |
232 | rps->pm_iir = 0; |
233 | spin_unlock_irq(lock: gt->irq_lock); |
234 | } |
235 | |
236 | static void rps_disable_interrupts(struct intel_rps *rps) |
237 | { |
238 | struct intel_gt *gt = rps_to_gt(rps); |
239 | |
240 | intel_uncore_write(uncore: gt->uncore, |
241 | GEN6_PMINTRMSK, val: rps_pm_sanitize_mask(rps, mask: ~0u)); |
242 | |
243 | spin_lock_irq(lock: gt->irq_lock); |
244 | gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); |
245 | spin_unlock_irq(lock: gt->irq_lock); |
246 | |
247 | intel_synchronize_irq(i915: gt->i915); |
248 | |
249 | /* |
250 | * Now that we will not be generating any more work, flush any |
251 | * outstanding tasks. As we are called on the RPS idle path, |
252 | * we will reset the GPU to minimum frequencies, so the current |
253 | * state of the worker can be discarded. |
254 | */ |
255 | cancel_work_sync(work: &rps->work); |
256 | |
257 | rps_reset_interrupts(rps); |
258 | GT_TRACE(gt, "interrupts:off\n" ); |
259 | } |
260 | |
261 | static const struct cparams { |
262 | u16 i; |
263 | u16 t; |
264 | u16 m; |
265 | u16 c; |
266 | } cparams[] = { |
267 | { 1, 1333, 301, 28664 }, |
268 | { 1, 1066, 294, 24460 }, |
269 | { 1, 800, 294, 25192 }, |
270 | { 0, 1333, 276, 27605 }, |
271 | { 0, 1066, 276, 27605 }, |
272 | { 0, 800, 231, 23784 }, |
273 | }; |
274 | |
275 | static void gen5_rps_init(struct intel_rps *rps) |
276 | { |
277 | struct drm_i915_private *i915 = rps_to_i915(rps); |
278 | struct intel_uncore *uncore = rps_to_uncore(rps); |
279 | u8 fmax, fmin, fstart; |
280 | u32 rgvmodectl; |
281 | int c_m, i; |
282 | |
283 | if (i915->fsb_freq <= 3200) |
284 | c_m = 0; |
285 | else if (i915->fsb_freq <= 4800) |
286 | c_m = 1; |
287 | else |
288 | c_m = 2; |
289 | |
290 | for (i = 0; i < ARRAY_SIZE(cparams); i++) { |
291 | if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) { |
292 | rps->ips.m = cparams[i].m; |
293 | rps->ips.c = cparams[i].c; |
294 | break; |
295 | } |
296 | } |
297 | |
298 | rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); |
299 | |
300 | /* Set up min, max, and cur for interrupt handling */ |
301 | fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; |
302 | fmin = (rgvmodectl & MEMMODE_FMIN_MASK); |
303 | fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> |
304 | MEMMODE_FSTART_SHIFT; |
305 | drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n" , |
306 | fmax, fmin, fstart); |
307 | |
308 | rps->min_freq = fmax; |
309 | rps->efficient_freq = fstart; |
310 | rps->max_freq = fmin; |
311 | } |
312 | |
313 | static unsigned long |
314 | __ips_chipset_val(struct intel_ips *ips) |
315 | { |
316 | struct intel_uncore *uncore = |
317 | rps_to_uncore(container_of(ips, struct intel_rps, ips)); |
318 | unsigned long now = jiffies_to_msecs(j: jiffies), dt; |
319 | unsigned long result; |
320 | u64 total, delta; |
321 | |
322 | lockdep_assert_held(&mchdev_lock); |
323 | |
324 | /* |
325 | * Prevent division-by-zero if we are asking too fast. |
326 | * Also, we don't get interesting results if we are polling |
327 | * faster than once in 10ms, so just return the saved value |
328 | * in such cases. |
329 | */ |
330 | dt = now - ips->last_time1; |
331 | if (dt <= 10) |
332 | return ips->chipset_power; |
333 | |
334 | /* FIXME: handle per-counter overflow */ |
335 | total = intel_uncore_read(uncore, DMIEC); |
336 | total += intel_uncore_read(uncore, DDREC); |
337 | total += intel_uncore_read(uncore, CSIEC); |
338 | |
339 | delta = total - ips->last_count1; |
340 | |
341 | result = div_u64(dividend: div_u64(dividend: ips->m * delta, divisor: dt) + ips->c, divisor: 10); |
342 | |
343 | ips->last_count1 = total; |
344 | ips->last_time1 = now; |
345 | |
346 | ips->chipset_power = result; |
347 | |
348 | return result; |
349 | } |
350 | |
351 | static unsigned long ips_mch_val(struct intel_uncore *uncore) |
352 | { |
353 | unsigned int m, x, b; |
354 | u32 tsfs; |
355 | |
356 | tsfs = intel_uncore_read(uncore, TSFS); |
357 | x = intel_uncore_read8(uncore, TR1); |
358 | |
359 | b = tsfs & TSFS_INTR_MASK; |
360 | m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT; |
361 | |
362 | return m * x / 127 - b; |
363 | } |
364 | |
365 | static int _pxvid_to_vd(u8 pxvid) |
366 | { |
367 | if (pxvid == 0) |
368 | return 0; |
369 | |
370 | if (pxvid >= 8 && pxvid < 31) |
371 | pxvid = 31; |
372 | |
373 | return (pxvid + 2) * 125; |
374 | } |
375 | |
376 | static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid) |
377 | { |
378 | const int vd = _pxvid_to_vd(pxvid); |
379 | |
380 | if (INTEL_INFO(i915)->is_mobile) |
381 | return max(vd - 1125, 0); |
382 | |
383 | return vd; |
384 | } |
385 | |
386 | static void __gen5_ips_update(struct intel_ips *ips) |
387 | { |
388 | struct intel_uncore *uncore = |
389 | rps_to_uncore(container_of(ips, struct intel_rps, ips)); |
390 | u64 now, delta, dt; |
391 | u32 count; |
392 | |
393 | lockdep_assert_held(&mchdev_lock); |
394 | |
395 | now = ktime_get_raw_ns(); |
396 | dt = now - ips->last_time2; |
397 | do_div(dt, NSEC_PER_MSEC); |
398 | |
399 | /* Don't divide by 0 */ |
400 | if (dt <= 10) |
401 | return; |
402 | |
403 | count = intel_uncore_read(uncore, GFXEC); |
404 | delta = count - ips->last_count2; |
405 | |
406 | ips->last_count2 = count; |
407 | ips->last_time2 = now; |
408 | |
409 | /* More magic constants... */ |
410 | ips->gfx_power = div_u64(dividend: delta * 1181, divisor: dt * 10); |
411 | } |
412 | |
413 | static void gen5_rps_update(struct intel_rps *rps) |
414 | { |
415 | spin_lock_irq(lock: &mchdev_lock); |
416 | __gen5_ips_update(ips: &rps->ips); |
417 | spin_unlock_irq(lock: &mchdev_lock); |
418 | } |
419 | |
420 | static unsigned int gen5_invert_freq(struct intel_rps *rps, |
421 | unsigned int val) |
422 | { |
423 | /* Invert the frequency bin into an ips delay */ |
424 | val = rps->max_freq - val; |
425 | val = rps->min_freq + val; |
426 | |
427 | return val; |
428 | } |
429 | |
430 | static int __gen5_rps_set(struct intel_rps *rps, u8 val) |
431 | { |
432 | struct intel_uncore *uncore = rps_to_uncore(rps); |
433 | u16 rgvswctl; |
434 | |
435 | lockdep_assert_held(&mchdev_lock); |
436 | |
437 | rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); |
438 | if (rgvswctl & MEMCTL_CMD_STS) { |
439 | drm_dbg(&rps_to_i915(rps)->drm, |
440 | "gpu busy, RCS change rejected\n" ); |
441 | return -EBUSY; /* still busy with another command */ |
442 | } |
443 | |
444 | /* Invert the frequency bin into an ips delay */ |
445 | val = gen5_invert_freq(rps, val); |
446 | |
447 | rgvswctl = |
448 | (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | |
449 | (val << MEMCTL_FREQ_SHIFT) | |
450 | MEMCTL_SFCAVM; |
451 | intel_uncore_write16(uncore, MEMSWCTL, val: rgvswctl); |
452 | intel_uncore_posting_read16(uncore, MEMSWCTL); |
453 | |
454 | rgvswctl |= MEMCTL_CMD_STS; |
455 | intel_uncore_write16(uncore, MEMSWCTL, val: rgvswctl); |
456 | |
457 | return 0; |
458 | } |
459 | |
460 | static int gen5_rps_set(struct intel_rps *rps, u8 val) |
461 | { |
462 | int err; |
463 | |
464 | spin_lock_irq(lock: &mchdev_lock); |
465 | err = __gen5_rps_set(rps, val); |
466 | spin_unlock_irq(lock: &mchdev_lock); |
467 | |
468 | return err; |
469 | } |
470 | |
471 | static unsigned long intel_pxfreq(u32 vidfreq) |
472 | { |
473 | int div = (vidfreq & 0x3f0000) >> 16; |
474 | int post = (vidfreq & 0x3000) >> 12; |
475 | int pre = (vidfreq & 0x7); |
476 | |
477 | if (!pre) |
478 | return 0; |
479 | |
480 | return div * 133333 / (pre << post); |
481 | } |
482 | |
483 | static unsigned int init_emon(struct intel_uncore *uncore) |
484 | { |
485 | u8 pxw[16]; |
486 | int i; |
487 | |
488 | /* Disable to program */ |
489 | intel_uncore_write(uncore, ECR, val: 0); |
490 | intel_uncore_posting_read(uncore, ECR); |
491 | |
492 | /* Program energy weights for various events */ |
493 | intel_uncore_write(uncore, SDEW, val: 0x15040d00); |
494 | intel_uncore_write(uncore, CSIEW0, val: 0x007f0000); |
495 | intel_uncore_write(uncore, CSIEW1, val: 0x1e220004); |
496 | intel_uncore_write(uncore, CSIEW2, val: 0x04000004); |
497 | |
498 | for (i = 0; i < 5; i++) |
499 | intel_uncore_write(uncore, PEW(i), val: 0); |
500 | for (i = 0; i < 3; i++) |
501 | intel_uncore_write(uncore, DEW(i), val: 0); |
502 | |
503 | /* Program P-state weights to account for frequency power adjustment */ |
504 | for (i = 0; i < 16; i++) { |
505 | u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i)); |
506 | unsigned int freq = intel_pxfreq(vidfreq: pxvidfreq); |
507 | unsigned int vid = |
508 | (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; |
509 | unsigned int val; |
510 | |
511 | val = vid * vid * freq / 1000 * 255; |
512 | val /= 127 * 127 * 900; |
513 | |
514 | pxw[i] = val; |
515 | } |
516 | /* Render standby states get 0 weight */ |
517 | pxw[14] = 0; |
518 | pxw[15] = 0; |
519 | |
520 | for (i = 0; i < 4; i++) { |
521 | intel_uncore_write(uncore, PXW(i), |
522 | val: pxw[i * 4 + 0] << 24 | |
523 | pxw[i * 4 + 1] << 16 | |
524 | pxw[i * 4 + 2] << 8 | |
525 | pxw[i * 4 + 3] << 0); |
526 | } |
527 | |
528 | /* Adjust magic regs to magic values (more experimental results) */ |
529 | intel_uncore_write(uncore, OGW0, val: 0); |
530 | intel_uncore_write(uncore, OGW1, val: 0); |
531 | intel_uncore_write(uncore, EG0, val: 0x00007f00); |
532 | intel_uncore_write(uncore, EG1, val: 0x0000000e); |
533 | intel_uncore_write(uncore, EG2, val: 0x000e0000); |
534 | intel_uncore_write(uncore, EG3, val: 0x68000300); |
535 | intel_uncore_write(uncore, EG4, val: 0x42000000); |
536 | intel_uncore_write(uncore, EG5, val: 0x00140031); |
537 | intel_uncore_write(uncore, EG6, val: 0); |
538 | intel_uncore_write(uncore, EG7, val: 0); |
539 | |
540 | for (i = 0; i < 8; i++) |
541 | intel_uncore_write(uncore, PXWL(i), val: 0); |
542 | |
543 | /* Enable PMON + select events */ |
544 | intel_uncore_write(uncore, ECR, val: 0x80000019); |
545 | |
546 | return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK; |
547 | } |
548 | |
549 | static bool gen5_rps_enable(struct intel_rps *rps) |
550 | { |
551 | struct drm_i915_private *i915 = rps_to_i915(rps); |
552 | struct intel_uncore *uncore = rps_to_uncore(rps); |
553 | u8 fstart, vstart; |
554 | u32 rgvmodectl; |
555 | |
556 | spin_lock_irq(lock: &mchdev_lock); |
557 | |
558 | rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); |
559 | |
560 | /* Enable temp reporting */ |
561 | intel_uncore_write16(uncore, PMMISC, |
562 | val: intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN); |
563 | intel_uncore_write16(uncore, TSC1, |
564 | val: intel_uncore_read16(uncore, TSC1) | TSE); |
565 | |
566 | /* 100ms RC evaluation intervals */ |
567 | intel_uncore_write(uncore, RCUPEI, val: 100000); |
568 | intel_uncore_write(uncore, RCDNEI, val: 100000); |
569 | |
570 | /* Set max/min thresholds to 90ms and 80ms respectively */ |
571 | intel_uncore_write(uncore, RCBMAXAVG, val: 90000); |
572 | intel_uncore_write(uncore, RCBMINAVG, val: 80000); |
573 | |
574 | intel_uncore_write(uncore, MEMIHYST, val: 1); |
575 | |
576 | /* Set up min, max, and cur for interrupt handling */ |
577 | fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> |
578 | MEMMODE_FSTART_SHIFT; |
579 | |
580 | vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & |
581 | PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; |
582 | |
583 | intel_uncore_write(uncore, |
584 | MEMINTREN, |
585 | MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); |
586 | |
587 | intel_uncore_write(uncore, VIDSTART, val: vstart); |
588 | intel_uncore_posting_read(uncore, VIDSTART); |
589 | |
590 | rgvmodectl |= MEMMODE_SWMODE_EN; |
591 | intel_uncore_write(uncore, MEMMODECTL, val: rgvmodectl); |
592 | |
593 | if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & |
594 | MEMCTL_CMD_STS) == 0, 10)) |
595 | drm_err(&uncore->i915->drm, |
596 | "stuck trying to change perf mode\n" ); |
597 | mdelay(1); |
598 | |
599 | __gen5_rps_set(rps, val: rps->cur_freq); |
600 | |
601 | rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); |
602 | rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); |
603 | rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC); |
604 | rps->ips.last_time1 = jiffies_to_msecs(j: jiffies); |
605 | |
606 | rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); |
607 | rps->ips.last_time2 = ktime_get_raw_ns(); |
608 | |
609 | spin_lock(lock: &i915->irq_lock); |
610 | ilk_enable_display_irq(i915, DE_PCU_EVENT); |
611 | spin_unlock(lock: &i915->irq_lock); |
612 | |
613 | spin_unlock_irq(lock: &mchdev_lock); |
614 | |
615 | rps->ips.corr = init_emon(uncore); |
616 | |
617 | return true; |
618 | } |
619 | |
620 | static void gen5_rps_disable(struct intel_rps *rps) |
621 | { |
622 | struct drm_i915_private *i915 = rps_to_i915(rps); |
623 | struct intel_uncore *uncore = rps_to_uncore(rps); |
624 | u16 rgvswctl; |
625 | |
626 | spin_lock_irq(lock: &mchdev_lock); |
627 | |
628 | spin_lock(lock: &i915->irq_lock); |
629 | ilk_disable_display_irq(i915, DE_PCU_EVENT); |
630 | spin_unlock(lock: &i915->irq_lock); |
631 | |
632 | rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); |
633 | |
634 | /* Ack interrupts, disable EFC interrupt */ |
635 | intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, set: 0); |
636 | intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); |
637 | |
638 | /* Go back to the starting frequency */ |
639 | __gen5_rps_set(rps, val: rps->idle_freq); |
640 | mdelay(1); |
641 | rgvswctl |= MEMCTL_CMD_STS; |
642 | intel_uncore_write(uncore, MEMSWCTL, val: rgvswctl); |
643 | mdelay(1); |
644 | |
645 | spin_unlock_irq(lock: &mchdev_lock); |
646 | } |
647 | |
648 | static u32 rps_limits(struct intel_rps *rps, u8 val) |
649 | { |
650 | u32 limits; |
651 | |
652 | /* |
653 | * Only set the down limit when we've reached the lowest level to avoid |
654 | * getting more interrupts, otherwise leave this clear. This prevents a |
655 | * race in the hw when coming out of rc6: There's a tiny window where |
656 | * the hw runs at the minimal clock before selecting the desired |
657 | * frequency, if the down threshold expires in that window we will not |
658 | * receive a down interrupt. |
659 | */ |
660 | if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { |
661 | limits = rps->max_freq_softlimit << 23; |
662 | if (val <= rps->min_freq_softlimit) |
663 | limits |= rps->min_freq_softlimit << 14; |
664 | } else { |
665 | limits = rps->max_freq_softlimit << 24; |
666 | if (val <= rps->min_freq_softlimit) |
667 | limits |= rps->min_freq_softlimit << 16; |
668 | } |
669 | |
670 | return limits; |
671 | } |
672 | |
673 | static void rps_set_power(struct intel_rps *rps, int new_power) |
674 | { |
675 | struct intel_gt *gt = rps_to_gt(rps); |
676 | struct intel_uncore *uncore = gt->uncore; |
677 | u32 ei_up = 0, ei_down = 0; |
678 | |
679 | lockdep_assert_held(&rps->power.mutex); |
680 | |
681 | if (new_power == rps->power.mode) |
682 | return; |
683 | |
684 | /* Note the units here are not exactly 1us, but 1280ns. */ |
685 | switch (new_power) { |
686 | case LOW_POWER: |
687 | ei_up = 16000; |
688 | ei_down = 32000; |
689 | break; |
690 | |
691 | case BETWEEN: |
692 | ei_up = 13000; |
693 | ei_down = 32000; |
694 | break; |
695 | |
696 | case HIGH_POWER: |
697 | ei_up = 10000; |
698 | ei_down = 32000; |
699 | break; |
700 | } |
701 | |
702 | /* When byt can survive without system hang with dynamic |
703 | * sw freq adjustments, this restriction can be lifted. |
704 | */ |
705 | if (IS_VALLEYVIEW(gt->i915)) |
706 | goto skip_hw_write; |
707 | |
708 | GT_TRACE(gt, |
709 | "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n" , |
710 | new_power, |
711 | rps->power.up_threshold, ei_up, |
712 | rps->power.down_threshold, ei_down); |
713 | |
714 | set(uncore, GEN6_RP_UP_EI, |
715 | val: intel_gt_ns_to_pm_interval(gt, ns: ei_up * 1000)); |
716 | set(uncore, GEN6_RP_UP_THRESHOLD, |
717 | val: intel_gt_ns_to_pm_interval(gt, |
718 | ns: ei_up * rps->power.up_threshold * 10)); |
719 | |
720 | set(uncore, GEN6_RP_DOWN_EI, |
721 | val: intel_gt_ns_to_pm_interval(gt, ns: ei_down * 1000)); |
722 | set(uncore, GEN6_RP_DOWN_THRESHOLD, |
723 | val: intel_gt_ns_to_pm_interval(gt, |
724 | ns: ei_down * |
725 | rps->power.down_threshold * 10)); |
726 | |
727 | set(uncore, GEN6_RP_CONTROL, |
728 | val: (GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | |
729 | GEN6_RP_MEDIA_HW_NORMAL_MODE | |
730 | GEN6_RP_MEDIA_IS_GFX | |
731 | GEN6_RP_ENABLE | |
732 | GEN6_RP_UP_BUSY_AVG | |
733 | GEN6_RP_DOWN_IDLE_AVG); |
734 | |
735 | skip_hw_write: |
736 | rps->power.mode = new_power; |
737 | } |
738 | |
739 | static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) |
740 | { |
741 | int new_power; |
742 | |
743 | new_power = rps->power.mode; |
744 | switch (rps->power.mode) { |
745 | case LOW_POWER: |
746 | if (val > rps->efficient_freq + 1 && |
747 | val > rps->cur_freq) |
748 | new_power = BETWEEN; |
749 | break; |
750 | |
751 | case BETWEEN: |
752 | if (val <= rps->efficient_freq && |
753 | val < rps->cur_freq) |
754 | new_power = LOW_POWER; |
755 | else if (val >= rps->rp0_freq && |
756 | val > rps->cur_freq) |
757 | new_power = HIGH_POWER; |
758 | break; |
759 | |
760 | case HIGH_POWER: |
761 | if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && |
762 | val < rps->cur_freq) |
763 | new_power = BETWEEN; |
764 | break; |
765 | } |
766 | /* Max/min bins are special */ |
767 | if (val <= rps->min_freq_softlimit) |
768 | new_power = LOW_POWER; |
769 | if (val >= rps->max_freq_softlimit) |
770 | new_power = HIGH_POWER; |
771 | |
772 | mutex_lock(&rps->power.mutex); |
773 | if (rps->power.interactive) |
774 | new_power = HIGH_POWER; |
775 | rps_set_power(rps, new_power); |
776 | mutex_unlock(lock: &rps->power.mutex); |
777 | } |
778 | |
779 | void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) |
780 | { |
781 | GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n" , |
782 | str_yes_no(interactive)); |
783 | |
784 | mutex_lock(&rps->power.mutex); |
785 | if (interactive) { |
786 | if (!rps->power.interactive++ && intel_rps_is_active(rps)) |
787 | rps_set_power(rps, new_power: HIGH_POWER); |
788 | } else { |
789 | GEM_BUG_ON(!rps->power.interactive); |
790 | rps->power.interactive--; |
791 | } |
792 | mutex_unlock(lock: &rps->power.mutex); |
793 | } |
794 | |
795 | static int gen6_rps_set(struct intel_rps *rps, u8 val) |
796 | { |
797 | struct intel_uncore *uncore = rps_to_uncore(rps); |
798 | struct drm_i915_private *i915 = rps_to_i915(rps); |
799 | u32 swreq; |
800 | |
801 | GEM_BUG_ON(rps_uses_slpc(rps)); |
802 | |
803 | if (GRAPHICS_VER(i915) >= 9) |
804 | swreq = GEN9_FREQUENCY(val); |
805 | else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) |
806 | swreq = HSW_FREQUENCY(val); |
807 | else |
808 | swreq = (GEN6_FREQUENCY(val) | |
809 | GEN6_OFFSET(0) | |
810 | GEN6_AGGRESSIVE_TURBO); |
811 | set(uncore, GEN6_RPNSWREQ, val: swreq); |
812 | |
813 | GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n" , |
814 | val, intel_gpu_freq(rps, val), swreq); |
815 | |
816 | return 0; |
817 | } |
818 | |
819 | static int vlv_rps_set(struct intel_rps *rps, u8 val) |
820 | { |
821 | struct drm_i915_private *i915 = rps_to_i915(rps); |
822 | int err; |
823 | |
824 | vlv_punit_get(i915); |
825 | err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); |
826 | vlv_punit_put(i915); |
827 | |
828 | GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n" , |
829 | val, intel_gpu_freq(rps, val)); |
830 | |
831 | return err; |
832 | } |
833 | |
834 | static int rps_set(struct intel_rps *rps, u8 val, bool update) |
835 | { |
836 | struct drm_i915_private *i915 = rps_to_i915(rps); |
837 | int err; |
838 | |
839 | if (val == rps->last_freq) |
840 | return 0; |
841 | |
842 | if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) |
843 | err = vlv_rps_set(rps, val); |
844 | else if (GRAPHICS_VER(i915) >= 6) |
845 | err = gen6_rps_set(rps, val); |
846 | else |
847 | err = gen5_rps_set(rps, val); |
848 | if (err) |
849 | return err; |
850 | |
851 | if (update && GRAPHICS_VER(i915) >= 6) |
852 | gen6_rps_set_thresholds(rps, val); |
853 | rps->last_freq = val; |
854 | |
855 | return 0; |
856 | } |
857 | |
858 | void intel_rps_unpark(struct intel_rps *rps) |
859 | { |
860 | if (!intel_rps_is_enabled(rps)) |
861 | return; |
862 | |
863 | GT_TRACE(rps_to_gt(rps), "unpark:%x\n" , rps->cur_freq); |
864 | |
865 | /* |
866 | * Use the user's desired frequency as a guide, but for better |
867 | * performance, jump directly to RPe as our starting frequency. |
868 | */ |
869 | mutex_lock(&rps->lock); |
870 | |
871 | intel_rps_set_active(rps); |
872 | intel_rps_set(rps, |
873 | clamp(rps->cur_freq, |
874 | rps->min_freq_softlimit, |
875 | rps->max_freq_softlimit)); |
876 | |
877 | mutex_unlock(lock: &rps->lock); |
878 | |
879 | rps->pm_iir = 0; |
880 | if (intel_rps_has_interrupts(rps)) |
881 | rps_enable_interrupts(rps); |
882 | if (intel_rps_uses_timer(rps)) |
883 | rps_start_timer(rps); |
884 | |
885 | if (GRAPHICS_VER(rps_to_i915(rps)) == 5) |
886 | gen5_rps_update(rps); |
887 | } |
888 | |
889 | void intel_rps_park(struct intel_rps *rps) |
890 | { |
891 | int adj; |
892 | |
893 | if (!intel_rps_is_enabled(rps)) |
894 | return; |
895 | |
896 | if (!intel_rps_clear_active(rps)) |
897 | return; |
898 | |
899 | if (intel_rps_uses_timer(rps)) |
900 | rps_stop_timer(rps); |
901 | if (intel_rps_has_interrupts(rps)) |
902 | rps_disable_interrupts(rps); |
903 | |
904 | if (rps->last_freq <= rps->idle_freq) |
905 | return; |
906 | |
907 | /* |
908 | * The punit delays the write of the frequency and voltage until it |
909 | * determines the GPU is awake. During normal usage we don't want to |
910 | * waste power changing the frequency if the GPU is sleeping (rc6). |
911 | * However, the GPU and driver is now idle and we do not want to delay |
912 | * switching to minimum voltage (reducing power whilst idle) as we do |
913 | * not expect to be woken in the near future and so must flush the |
914 | * change by waking the device. |
915 | * |
916 | * We choose to take the media powerwell (either would do to trick the |
917 | * punit into committing the voltage change) as that takes a lot less |
918 | * power than the render powerwell. |
919 | */ |
920 | intel_uncore_forcewake_get(uncore: rps_to_uncore(rps), domains: FORCEWAKE_MEDIA); |
921 | rps_set(rps, val: rps->idle_freq, update: false); |
922 | intel_uncore_forcewake_put(uncore: rps_to_uncore(rps), domains: FORCEWAKE_MEDIA); |
923 | |
924 | /* |
925 | * Since we will try and restart from the previously requested |
926 | * frequency on unparking, treat this idle point as a downclock |
927 | * interrupt and reduce the frequency for resume. If we park/unpark |
928 | * more frequently than the rps worker can run, we will not respond |
929 | * to any EI and never see a change in frequency. |
930 | * |
931 | * (Note we accommodate Cherryview's limitation of only using an |
932 | * even bin by applying it to all.) |
933 | */ |
934 | adj = rps->last_adj; |
935 | if (adj < 0) |
936 | adj *= 2; |
937 | else /* CHV needs even encode values */ |
938 | adj = -2; |
939 | rps->last_adj = adj; |
940 | rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq); |
941 | if (rps->cur_freq < rps->efficient_freq) { |
942 | rps->cur_freq = rps->efficient_freq; |
943 | rps->last_adj = 0; |
944 | } |
945 | |
946 | GT_TRACE(rps_to_gt(rps), "park:%x\n" , rps->cur_freq); |
947 | } |
948 | |
949 | u32 intel_rps_get_boost_frequency(struct intel_rps *rps) |
950 | { |
951 | struct intel_guc_slpc *slpc; |
952 | |
953 | if (rps_uses_slpc(rps)) { |
954 | slpc = rps_to_slpc(rps); |
955 | |
956 | return slpc->boost_freq; |
957 | } else { |
958 | return intel_gpu_freq(rps, val: rps->boost_freq); |
959 | } |
960 | } |
961 | |
962 | static int rps_set_boost_freq(struct intel_rps *rps, u32 val) |
963 | { |
964 | bool boost = false; |
965 | |
966 | /* Validate against (static) hardware limits */ |
967 | val = intel_freq_opcode(rps, val); |
968 | if (val < rps->min_freq || val > rps->max_freq) |
969 | return -EINVAL; |
970 | |
971 | mutex_lock(&rps->lock); |
972 | if (val != rps->boost_freq) { |
973 | rps->boost_freq = val; |
974 | boost = atomic_read(v: &rps->num_waiters); |
975 | } |
976 | mutex_unlock(lock: &rps->lock); |
977 | if (boost) |
978 | queue_work(wq: rps_to_gt(rps)->i915->unordered_wq, work: &rps->work); |
979 | |
980 | return 0; |
981 | } |
982 | |
983 | int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq) |
984 | { |
985 | struct intel_guc_slpc *slpc; |
986 | |
987 | if (rps_uses_slpc(rps)) { |
988 | slpc = rps_to_slpc(rps); |
989 | |
990 | return intel_guc_slpc_set_boost_freq(slpc, val: freq); |
991 | } else { |
992 | return rps_set_boost_freq(rps, val: freq); |
993 | } |
994 | } |
995 | |
996 | void intel_rps_dec_waiters(struct intel_rps *rps) |
997 | { |
998 | struct intel_guc_slpc *slpc; |
999 | |
1000 | if (rps_uses_slpc(rps)) { |
1001 | slpc = rps_to_slpc(rps); |
1002 | |
1003 | intel_guc_slpc_dec_waiters(slpc); |
1004 | } else { |
1005 | atomic_dec(v: &rps->num_waiters); |
1006 | } |
1007 | } |
1008 | |
1009 | void intel_rps_boost(struct i915_request *rq) |
1010 | { |
1011 | struct intel_guc_slpc *slpc; |
1012 | |
1013 | if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) |
1014 | return; |
1015 | |
1016 | /* Serializes with i915_request_retire() */ |
1017 | if (!test_and_set_bit(nr: I915_FENCE_FLAG_BOOST, addr: &rq->fence.flags)) { |
1018 | struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; |
1019 | |
1020 | if (rps_uses_slpc(rps)) { |
1021 | slpc = rps_to_slpc(rps); |
1022 | |
1023 | if (slpc->min_freq_softlimit >= slpc->boost_freq) |
1024 | return; |
1025 | |
1026 | /* Return if old value is non zero */ |
1027 | if (!atomic_fetch_inc(v: &slpc->num_waiters)) { |
1028 | GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n" , |
1029 | rq->fence.context, rq->fence.seqno); |
1030 | queue_work(wq: rps_to_gt(rps)->i915->unordered_wq, |
1031 | work: &slpc->boost_work); |
1032 | } |
1033 | |
1034 | return; |
1035 | } |
1036 | |
1037 | if (atomic_fetch_inc(v: &rps->num_waiters)) |
1038 | return; |
1039 | |
1040 | if (!intel_rps_is_active(rps)) |
1041 | return; |
1042 | |
1043 | GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n" , |
1044 | rq->fence.context, rq->fence.seqno); |
1045 | |
1046 | if (READ_ONCE(rps->cur_freq) < rps->boost_freq) |
1047 | queue_work(wq: rps_to_gt(rps)->i915->unordered_wq, work: &rps->work); |
1048 | |
1049 | WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */ |
1050 | } |
1051 | } |
1052 | |
1053 | int intel_rps_set(struct intel_rps *rps, u8 val) |
1054 | { |
1055 | int err; |
1056 | |
1057 | lockdep_assert_held(&rps->lock); |
1058 | GEM_BUG_ON(val > rps->max_freq); |
1059 | GEM_BUG_ON(val < rps->min_freq); |
1060 | |
1061 | if (intel_rps_is_active(rps)) { |
1062 | err = rps_set(rps, val, update: true); |
1063 | if (err) |
1064 | return err; |
1065 | |
1066 | /* |
1067 | * Make sure we continue to get interrupts |
1068 | * until we hit the minimum or maximum frequencies. |
1069 | */ |
1070 | if (intel_rps_has_interrupts(rps)) { |
1071 | struct intel_uncore *uncore = rps_to_uncore(rps); |
1072 | |
1073 | set(uncore, |
1074 | GEN6_RP_INTERRUPT_LIMITS, val: rps_limits(rps, val)); |
1075 | |
1076 | set(uncore, GEN6_PMINTRMSK, val: rps_pm_mask(rps, val)); |
1077 | } |
1078 | } |
1079 | |
1080 | rps->cur_freq = val; |
1081 | return 0; |
1082 | } |
1083 | |
1084 | static u32 intel_rps_read_state_cap(struct intel_rps *rps) |
1085 | { |
1086 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1087 | struct intel_uncore *uncore = rps_to_uncore(rps); |
1088 | |
1089 | if (IS_PONTEVECCHIO(i915)) |
1090 | return intel_uncore_read(uncore, PVC_RP_STATE_CAP); |
1091 | else if (IS_XEHPSDV(i915)) |
1092 | return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP); |
1093 | else if (IS_GEN9_LP(i915)) |
1094 | return intel_uncore_read(uncore, BXT_RP_STATE_CAP); |
1095 | else |
1096 | return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); |
1097 | } |
1098 | |
1099 | static void |
1100 | mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) |
1101 | { |
1102 | struct intel_uncore *uncore = rps_to_uncore(rps); |
1103 | u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ? |
1104 | intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) : |
1105 | intel_uncore_read(uncore, MTL_RP_STATE_CAP); |
1106 | u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ? |
1107 | intel_uncore_read(uncore, MTL_MPE_FREQUENCY) : |
1108 | intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY); |
1109 | |
1110 | /* MTL values are in units of 16.67 MHz */ |
1111 | caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap); |
1112 | caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap); |
1113 | caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe); |
1114 | } |
1115 | |
1116 | static void |
1117 | __gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) |
1118 | { |
1119 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1120 | u32 rp_state_cap; |
1121 | |
1122 | rp_state_cap = intel_rps_read_state_cap(rps); |
1123 | |
1124 | /* static values from HW: RP0 > RP1 > RPn (min_freq) */ |
1125 | if (IS_GEN9_LP(i915)) { |
1126 | caps->rp0_freq = (rp_state_cap >> 16) & 0xff; |
1127 | caps->rp1_freq = (rp_state_cap >> 8) & 0xff; |
1128 | caps->min_freq = (rp_state_cap >> 0) & 0xff; |
1129 | } else { |
1130 | caps->rp0_freq = (rp_state_cap >> 0) & 0xff; |
1131 | if (GRAPHICS_VER(i915) >= 10) |
1132 | caps->rp1_freq = REG_FIELD_GET(RPE_MASK, |
1133 | intel_uncore_read(to_gt(i915)->uncore, |
1134 | GEN10_FREQ_INFO_REC)); |
1135 | else |
1136 | caps->rp1_freq = (rp_state_cap >> 8) & 0xff; |
1137 | caps->min_freq = (rp_state_cap >> 16) & 0xff; |
1138 | } |
1139 | |
1140 | if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { |
1141 | /* |
1142 | * In this case rp_state_cap register reports frequencies in |
1143 | * units of 50 MHz. Convert these to the actual "hw unit", i.e. |
1144 | * units of 16.67 MHz |
1145 | */ |
1146 | caps->rp0_freq *= GEN9_FREQ_SCALER; |
1147 | caps->rp1_freq *= GEN9_FREQ_SCALER; |
1148 | caps->min_freq *= GEN9_FREQ_SCALER; |
1149 | } |
1150 | } |
1151 | |
1152 | /** |
1153 | * gen6_rps_get_freq_caps - Get freq caps exposed by HW |
1154 | * @rps: the intel_rps structure |
1155 | * @caps: returned freq caps |
1156 | * |
1157 | * Returned "caps" frequencies should be converted to MHz using |
1158 | * intel_gpu_freq() |
1159 | */ |
1160 | void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) |
1161 | { |
1162 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1163 | |
1164 | if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) |
1165 | return mtl_get_freq_caps(rps, caps); |
1166 | else |
1167 | return __gen6_rps_get_freq_caps(rps, caps); |
1168 | } |
1169 | |
1170 | static void gen6_rps_init(struct intel_rps *rps) |
1171 | { |
1172 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1173 | struct intel_rps_freq_caps caps; |
1174 | |
1175 | gen6_rps_get_freq_caps(rps, caps: &caps); |
1176 | rps->rp0_freq = caps.rp0_freq; |
1177 | rps->rp1_freq = caps.rp1_freq; |
1178 | rps->min_freq = caps.min_freq; |
1179 | |
1180 | /* hw_max = RP0 until we check for overclocking */ |
1181 | rps->max_freq = rps->rp0_freq; |
1182 | |
1183 | rps->efficient_freq = rps->rp1_freq; |
1184 | if (IS_HASWELL(i915) || IS_BROADWELL(i915) || |
1185 | IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { |
1186 | u32 ddcc_status = 0; |
1187 | u32 mult = 1; |
1188 | |
1189 | if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) |
1190 | mult = GEN9_FREQ_SCALER; |
1191 | if (snb_pcode_read(uncore: rps_to_gt(rps)->uncore, |
1192 | HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, |
1193 | val: &ddcc_status, NULL) == 0) |
1194 | rps->efficient_freq = |
1195 | clamp_t(u32, |
1196 | ((ddcc_status >> 8) & 0xff) * mult, |
1197 | rps->min_freq, |
1198 | rps->max_freq); |
1199 | } |
1200 | } |
1201 | |
1202 | static bool rps_reset(struct intel_rps *rps) |
1203 | { |
1204 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1205 | |
1206 | /* force a reset */ |
1207 | rps->power.mode = -1; |
1208 | rps->last_freq = -1; |
1209 | |
1210 | if (rps_set(rps, val: rps->min_freq, update: true)) { |
1211 | drm_err(&i915->drm, "Failed to reset RPS to initial values\n" ); |
1212 | return false; |
1213 | } |
1214 | |
1215 | rps->cur_freq = rps->min_freq; |
1216 | return true; |
1217 | } |
1218 | |
1219 | /* See the Gen9_GT_PM_Programming_Guide doc for the below */ |
1220 | static bool gen9_rps_enable(struct intel_rps *rps) |
1221 | { |
1222 | struct intel_gt *gt = rps_to_gt(rps); |
1223 | struct intel_uncore *uncore = gt->uncore; |
1224 | |
1225 | /* Program defaults and thresholds for RPS */ |
1226 | if (GRAPHICS_VER(gt->i915) == 9) |
1227 | intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, |
1228 | GEN9_FREQUENCY(rps->rp1_freq)); |
1229 | |
1230 | intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); |
1231 | |
1232 | rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; |
1233 | |
1234 | return rps_reset(rps); |
1235 | } |
1236 | |
1237 | static bool gen8_rps_enable(struct intel_rps *rps) |
1238 | { |
1239 | struct intel_uncore *uncore = rps_to_uncore(rps); |
1240 | |
1241 | intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, |
1242 | HSW_FREQUENCY(rps->rp1_freq)); |
1243 | |
1244 | intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); |
1245 | |
1246 | rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; |
1247 | |
1248 | return rps_reset(rps); |
1249 | } |
1250 | |
1251 | static bool gen6_rps_enable(struct intel_rps *rps) |
1252 | { |
1253 | struct intel_uncore *uncore = rps_to_uncore(rps); |
1254 | |
1255 | /* Power down if completely idle for over 50ms */ |
1256 | intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); |
1257 | intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); |
1258 | |
1259 | rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | |
1260 | GEN6_PM_RP_DOWN_THRESHOLD | |
1261 | GEN6_PM_RP_DOWN_TIMEOUT); |
1262 | |
1263 | return rps_reset(rps); |
1264 | } |
1265 | |
1266 | static int chv_rps_max_freq(struct intel_rps *rps) |
1267 | { |
1268 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1269 | struct intel_gt *gt = rps_to_gt(rps); |
1270 | u32 val; |
1271 | |
1272 | val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); |
1273 | |
1274 | switch (gt->info.sseu.eu_total) { |
1275 | case 8: |
1276 | /* (2 * 4) config */ |
1277 | val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT; |
1278 | break; |
1279 | case 12: |
1280 | /* (2 * 6) config */ |
1281 | val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT; |
1282 | break; |
1283 | case 16: |
1284 | /* (2 * 8) config */ |
1285 | default: |
1286 | /* Setting (2 * 8) Min RP0 for any other combination */ |
1287 | val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT; |
1288 | break; |
1289 | } |
1290 | |
1291 | return val & FB_GFX_FREQ_FUSE_MASK; |
1292 | } |
1293 | |
1294 | static int chv_rps_rpe_freq(struct intel_rps *rps) |
1295 | { |
1296 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1297 | u32 val; |
1298 | |
1299 | val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG); |
1300 | val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT; |
1301 | |
1302 | return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; |
1303 | } |
1304 | |
1305 | static int chv_rps_guar_freq(struct intel_rps *rps) |
1306 | { |
1307 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1308 | u32 val; |
1309 | |
1310 | val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); |
1311 | |
1312 | return val & FB_GFX_FREQ_FUSE_MASK; |
1313 | } |
1314 | |
1315 | static u32 chv_rps_min_freq(struct intel_rps *rps) |
1316 | { |
1317 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1318 | u32 val; |
1319 | |
1320 | val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE); |
1321 | val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT; |
1322 | |
1323 | return val & FB_GFX_FREQ_FUSE_MASK; |
1324 | } |
1325 | |
1326 | static bool chv_rps_enable(struct intel_rps *rps) |
1327 | { |
1328 | struct intel_uncore *uncore = rps_to_uncore(rps); |
1329 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1330 | u32 val; |
1331 | |
1332 | /* 1: Program defaults and thresholds for RPS*/ |
1333 | intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); |
1334 | intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); |
1335 | intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); |
1336 | intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); |
1337 | intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); |
1338 | |
1339 | intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); |
1340 | |
1341 | /* 2: Enable RPS */ |
1342 | intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, |
1343 | GEN6_RP_MEDIA_HW_NORMAL_MODE | |
1344 | GEN6_RP_MEDIA_IS_GFX | |
1345 | GEN6_RP_ENABLE | |
1346 | GEN6_RP_UP_BUSY_AVG | |
1347 | GEN6_RP_DOWN_IDLE_AVG); |
1348 | |
1349 | rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | |
1350 | GEN6_PM_RP_DOWN_THRESHOLD | |
1351 | GEN6_PM_RP_DOWN_TIMEOUT); |
1352 | |
1353 | /* Setting Fixed Bias */ |
1354 | vlv_punit_get(i915); |
1355 | |
1356 | val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; |
1357 | vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); |
1358 | |
1359 | val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); |
1360 | |
1361 | vlv_punit_put(i915); |
1362 | |
1363 | /* RPS code assumes GPLL is used */ |
1364 | drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, |
1365 | "GPLL not enabled\n" ); |
1366 | |
1367 | drm_dbg(&i915->drm, "GPLL enabled? %s\n" , |
1368 | str_yes_no(val & GPLLENABLE)); |
1369 | drm_dbg(&i915->drm, "GPU status: 0x%08x\n" , val); |
1370 | |
1371 | return rps_reset(rps); |
1372 | } |
1373 | |
1374 | static int vlv_rps_guar_freq(struct intel_rps *rps) |
1375 | { |
1376 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1377 | u32 val, rp1; |
1378 | |
1379 | val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); |
1380 | |
1381 | rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK; |
1382 | rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; |
1383 | |
1384 | return rp1; |
1385 | } |
1386 | |
1387 | static int vlv_rps_max_freq(struct intel_rps *rps) |
1388 | { |
1389 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1390 | u32 val, rp0; |
1391 | |
1392 | val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); |
1393 | |
1394 | rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; |
1395 | /* Clamp to max */ |
1396 | rp0 = min_t(u32, rp0, 0xea); |
1397 | |
1398 | return rp0; |
1399 | } |
1400 | |
1401 | static int vlv_rps_rpe_freq(struct intel_rps *rps) |
1402 | { |
1403 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1404 | u32 val, rpe; |
1405 | |
1406 | val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO); |
1407 | rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; |
1408 | val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI); |
1409 | rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; |
1410 | |
1411 | return rpe; |
1412 | } |
1413 | |
1414 | static int vlv_rps_min_freq(struct intel_rps *rps) |
1415 | { |
1416 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1417 | u32 val; |
1418 | |
1419 | val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff; |
1420 | /* |
1421 | * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value |
1422 | * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on |
1423 | * a BYT-M B0 the above register contains 0xbf. Moreover when setting |
1424 | * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 |
1425 | * to make sure it matches what Punit accepts. |
1426 | */ |
1427 | return max_t(u32, val, 0xc0); |
1428 | } |
1429 | |
1430 | static bool vlv_rps_enable(struct intel_rps *rps) |
1431 | { |
1432 | struct intel_uncore *uncore = rps_to_uncore(rps); |
1433 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1434 | u32 val; |
1435 | |
1436 | intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); |
1437 | intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); |
1438 | intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); |
1439 | intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); |
1440 | intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); |
1441 | |
1442 | intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); |
1443 | |
1444 | intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, |
1445 | GEN6_RP_MEDIA_TURBO | |
1446 | GEN6_RP_MEDIA_HW_NORMAL_MODE | |
1447 | GEN6_RP_MEDIA_IS_GFX | |
1448 | GEN6_RP_ENABLE | |
1449 | GEN6_RP_UP_BUSY_AVG | |
1450 | GEN6_RP_DOWN_IDLE_CONT); |
1451 | |
1452 | /* WaGsvRC0ResidencyMethod:vlv */ |
1453 | rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; |
1454 | |
1455 | vlv_punit_get(i915); |
1456 | |
1457 | /* Setting Fixed Bias */ |
1458 | val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; |
1459 | vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); |
1460 | |
1461 | val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); |
1462 | |
1463 | vlv_punit_put(i915); |
1464 | |
1465 | /* RPS code assumes GPLL is used */ |
1466 | drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, |
1467 | "GPLL not enabled\n" ); |
1468 | |
1469 | drm_dbg(&i915->drm, "GPLL enabled? %s\n" , |
1470 | str_yes_no(val & GPLLENABLE)); |
1471 | drm_dbg(&i915->drm, "GPU status: 0x%08x\n" , val); |
1472 | |
1473 | return rps_reset(rps); |
1474 | } |
1475 | |
1476 | static unsigned long __ips_gfx_val(struct intel_ips *ips) |
1477 | { |
1478 | struct intel_rps *rps = container_of(ips, typeof(*rps), ips); |
1479 | struct intel_uncore *uncore = rps_to_uncore(rps); |
1480 | unsigned int t, state1, state2; |
1481 | u32 pxvid, ext_v; |
1482 | u64 corr, corr2; |
1483 | |
1484 | lockdep_assert_held(&mchdev_lock); |
1485 | |
1486 | pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq)); |
1487 | pxvid = (pxvid >> 24) & 0x7f; |
1488 | ext_v = pvid_to_extvid(i915: rps_to_i915(rps), pxvid); |
1489 | |
1490 | state1 = ext_v; |
1491 | |
1492 | /* Revel in the empirically derived constants */ |
1493 | |
1494 | /* Correction factor in 1/100000 units */ |
1495 | t = ips_mch_val(uncore); |
1496 | if (t > 80) |
1497 | corr = t * 2349 + 135940; |
1498 | else if (t >= 50) |
1499 | corr = t * 964 + 29317; |
1500 | else /* < 50 */ |
1501 | corr = t * 301 + 1004; |
1502 | |
1503 | corr = div_u64(dividend: corr * 150142 * state1, divisor: 10000) - 78642; |
1504 | corr2 = div_u64(dividend: corr, divisor: 100000) * ips->corr; |
1505 | |
1506 | state2 = div_u64(dividend: corr2 * state1, divisor: 10000); |
1507 | state2 /= 100; /* convert to mW */ |
1508 | |
1509 | __gen5_ips_update(ips); |
1510 | |
1511 | return ips->gfx_power + state2; |
1512 | } |
1513 | |
1514 | static bool has_busy_stats(struct intel_rps *rps) |
1515 | { |
1516 | struct intel_engine_cs *engine; |
1517 | enum intel_engine_id id; |
1518 | |
1519 | for_each_engine(engine, rps_to_gt(rps), id) { |
1520 | if (!intel_engine_supports_stats(engine)) |
1521 | return false; |
1522 | } |
1523 | |
1524 | return true; |
1525 | } |
1526 | |
1527 | void intel_rps_enable(struct intel_rps *rps) |
1528 | { |
1529 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1530 | struct intel_uncore *uncore = rps_to_uncore(rps); |
1531 | bool enabled = false; |
1532 | |
1533 | if (!HAS_RPS(i915)) |
1534 | return; |
1535 | |
1536 | if (rps_uses_slpc(rps)) |
1537 | return; |
1538 | |
1539 | intel_gt_check_clock_frequency(gt: rps_to_gt(rps)); |
1540 | |
1541 | intel_uncore_forcewake_get(uncore, domains: FORCEWAKE_ALL); |
1542 | if (rps->max_freq <= rps->min_freq) |
1543 | /* leave disabled, no room for dynamic reclocking */; |
1544 | else if (IS_CHERRYVIEW(i915)) |
1545 | enabled = chv_rps_enable(rps); |
1546 | else if (IS_VALLEYVIEW(i915)) |
1547 | enabled = vlv_rps_enable(rps); |
1548 | else if (GRAPHICS_VER(i915) >= 9) |
1549 | enabled = gen9_rps_enable(rps); |
1550 | else if (GRAPHICS_VER(i915) >= 8) |
1551 | enabled = gen8_rps_enable(rps); |
1552 | else if (GRAPHICS_VER(i915) >= 6) |
1553 | enabled = gen6_rps_enable(rps); |
1554 | else if (IS_IRONLAKE_M(i915)) |
1555 | enabled = gen5_rps_enable(rps); |
1556 | else |
1557 | MISSING_CASE(GRAPHICS_VER(i915)); |
1558 | intel_uncore_forcewake_put(uncore, domains: FORCEWAKE_ALL); |
1559 | if (!enabled) |
1560 | return; |
1561 | |
1562 | GT_TRACE(rps_to_gt(rps), |
1563 | "min:%x, max:%x, freq:[%d, %d], thresholds:[%u, %u]\n" , |
1564 | rps->min_freq, rps->max_freq, |
1565 | intel_gpu_freq(rps, rps->min_freq), |
1566 | intel_gpu_freq(rps, rps->max_freq), |
1567 | rps->power.up_threshold, |
1568 | rps->power.down_threshold); |
1569 | |
1570 | GEM_BUG_ON(rps->max_freq < rps->min_freq); |
1571 | GEM_BUG_ON(rps->idle_freq > rps->max_freq); |
1572 | |
1573 | GEM_BUG_ON(rps->efficient_freq < rps->min_freq); |
1574 | GEM_BUG_ON(rps->efficient_freq > rps->max_freq); |
1575 | |
1576 | if (has_busy_stats(rps)) |
1577 | intel_rps_set_timer(rps); |
1578 | else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11) |
1579 | intel_rps_set_interrupts(rps); |
1580 | else |
1581 | /* Ironlake currently uses intel_ips.ko */ {} |
1582 | |
1583 | intel_rps_set_enabled(rps); |
1584 | } |
1585 | |
1586 | static void gen6_rps_disable(struct intel_rps *rps) |
1587 | { |
1588 | set(uncore: rps_to_uncore(rps), GEN6_RP_CONTROL, val: 0); |
1589 | } |
1590 | |
1591 | void intel_rps_disable(struct intel_rps *rps) |
1592 | { |
1593 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1594 | |
1595 | if (!intel_rps_is_enabled(rps)) |
1596 | return; |
1597 | |
1598 | intel_rps_clear_enabled(rps); |
1599 | intel_rps_clear_interrupts(rps); |
1600 | intel_rps_clear_timer(rps); |
1601 | |
1602 | if (GRAPHICS_VER(i915) >= 6) |
1603 | gen6_rps_disable(rps); |
1604 | else if (IS_IRONLAKE_M(i915)) |
1605 | gen5_rps_disable(rps); |
1606 | } |
1607 | |
1608 | static int byt_gpu_freq(struct intel_rps *rps, int val) |
1609 | { |
1610 | /* |
1611 | * N = val - 0xb7 |
1612 | * Slow = Fast = GPLL ref * N |
1613 | */ |
1614 | return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); |
1615 | } |
1616 | |
1617 | static int byt_freq_opcode(struct intel_rps *rps, int val) |
1618 | { |
1619 | return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; |
1620 | } |
1621 | |
1622 | static int chv_gpu_freq(struct intel_rps *rps, int val) |
1623 | { |
1624 | /* |
1625 | * N = val / 2 |
1626 | * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 |
1627 | */ |
1628 | return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); |
1629 | } |
1630 | |
1631 | static int chv_freq_opcode(struct intel_rps *rps, int val) |
1632 | { |
1633 | /* CHV needs even values */ |
1634 | return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; |
1635 | } |
1636 | |
1637 | int intel_gpu_freq(struct intel_rps *rps, int val) |
1638 | { |
1639 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1640 | |
1641 | if (GRAPHICS_VER(i915) >= 9) |
1642 | return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, |
1643 | GEN9_FREQ_SCALER); |
1644 | else if (IS_CHERRYVIEW(i915)) |
1645 | return chv_gpu_freq(rps, val); |
1646 | else if (IS_VALLEYVIEW(i915)) |
1647 | return byt_gpu_freq(rps, val); |
1648 | else if (GRAPHICS_VER(i915) >= 6) |
1649 | return val * GT_FREQUENCY_MULTIPLIER; |
1650 | else |
1651 | return val; |
1652 | } |
1653 | |
1654 | int intel_freq_opcode(struct intel_rps *rps, int val) |
1655 | { |
1656 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1657 | |
1658 | if (GRAPHICS_VER(i915) >= 9) |
1659 | return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, |
1660 | GT_FREQUENCY_MULTIPLIER); |
1661 | else if (IS_CHERRYVIEW(i915)) |
1662 | return chv_freq_opcode(rps, val); |
1663 | else if (IS_VALLEYVIEW(i915)) |
1664 | return byt_freq_opcode(rps, val); |
1665 | else if (GRAPHICS_VER(i915) >= 6) |
1666 | return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); |
1667 | else |
1668 | return val; |
1669 | } |
1670 | |
1671 | static void vlv_init_gpll_ref_freq(struct intel_rps *rps) |
1672 | { |
1673 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1674 | |
1675 | rps->gpll_ref_freq = |
1676 | vlv_get_cck_clock(dev_priv: i915, name: "GPLL ref" , |
1677 | CCK_GPLL_CLOCK_CONTROL, |
1678 | ref_freq: i915->czclk_freq); |
1679 | |
1680 | drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n" , |
1681 | rps->gpll_ref_freq); |
1682 | } |
1683 | |
1684 | static void vlv_rps_init(struct intel_rps *rps) |
1685 | { |
1686 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1687 | |
1688 | vlv_iosf_sb_get(i915, |
1689 | BIT(VLV_IOSF_SB_PUNIT) | |
1690 | BIT(VLV_IOSF_SB_NC) | |
1691 | BIT(VLV_IOSF_SB_CCK)); |
1692 | |
1693 | vlv_init_gpll_ref_freq(rps); |
1694 | |
1695 | rps->max_freq = vlv_rps_max_freq(rps); |
1696 | rps->rp0_freq = rps->max_freq; |
1697 | drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n" , |
1698 | intel_gpu_freq(rps, rps->max_freq), rps->max_freq); |
1699 | |
1700 | rps->efficient_freq = vlv_rps_rpe_freq(rps); |
1701 | drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n" , |
1702 | intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); |
1703 | |
1704 | rps->rp1_freq = vlv_rps_guar_freq(rps); |
1705 | drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n" , |
1706 | intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); |
1707 | |
1708 | rps->min_freq = vlv_rps_min_freq(rps); |
1709 | drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n" , |
1710 | intel_gpu_freq(rps, rps->min_freq), rps->min_freq); |
1711 | |
1712 | vlv_iosf_sb_put(i915, |
1713 | BIT(VLV_IOSF_SB_PUNIT) | |
1714 | BIT(VLV_IOSF_SB_NC) | |
1715 | BIT(VLV_IOSF_SB_CCK)); |
1716 | } |
1717 | |
1718 | static void chv_rps_init(struct intel_rps *rps) |
1719 | { |
1720 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1721 | |
1722 | vlv_iosf_sb_get(i915, |
1723 | BIT(VLV_IOSF_SB_PUNIT) | |
1724 | BIT(VLV_IOSF_SB_NC) | |
1725 | BIT(VLV_IOSF_SB_CCK)); |
1726 | |
1727 | vlv_init_gpll_ref_freq(rps); |
1728 | |
1729 | rps->max_freq = chv_rps_max_freq(rps); |
1730 | rps->rp0_freq = rps->max_freq; |
1731 | drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n" , |
1732 | intel_gpu_freq(rps, rps->max_freq), rps->max_freq); |
1733 | |
1734 | rps->efficient_freq = chv_rps_rpe_freq(rps); |
1735 | drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n" , |
1736 | intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); |
1737 | |
1738 | rps->rp1_freq = chv_rps_guar_freq(rps); |
1739 | drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n" , |
1740 | intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); |
1741 | |
1742 | rps->min_freq = chv_rps_min_freq(rps); |
1743 | drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n" , |
1744 | intel_gpu_freq(rps, rps->min_freq), rps->min_freq); |
1745 | |
1746 | vlv_iosf_sb_put(i915, |
1747 | BIT(VLV_IOSF_SB_PUNIT) | |
1748 | BIT(VLV_IOSF_SB_NC) | |
1749 | BIT(VLV_IOSF_SB_CCK)); |
1750 | |
1751 | drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq | |
1752 | rps->rp1_freq | rps->min_freq) & 1, |
1753 | "Odd GPU freq values\n" ); |
1754 | } |
1755 | |
1756 | static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) |
1757 | { |
1758 | ei->ktime = ktime_get_raw(); |
1759 | ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT); |
1760 | ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT); |
1761 | } |
1762 | |
1763 | static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) |
1764 | { |
1765 | struct intel_uncore *uncore = rps_to_uncore(rps); |
1766 | const struct intel_rps_ei *prev = &rps->ei; |
1767 | struct intel_rps_ei now; |
1768 | u32 events = 0; |
1769 | |
1770 | if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) |
1771 | return 0; |
1772 | |
1773 | vlv_c0_read(uncore, ei: &now); |
1774 | |
1775 | if (prev->ktime) { |
1776 | u64 time, c0; |
1777 | u32 render, media; |
1778 | |
1779 | time = ktime_us_delta(later: now.ktime, earlier: prev->ktime); |
1780 | |
1781 | time *= rps_to_i915(rps)->czclk_freq; |
1782 | |
1783 | /* Workload can be split between render + media, |
1784 | * e.g. SwapBuffers being blitted in X after being rendered in |
1785 | * mesa. To account for this we need to combine both engines |
1786 | * into our activity counter. |
1787 | */ |
1788 | render = now.render_c0 - prev->render_c0; |
1789 | media = now.media_c0 - prev->media_c0; |
1790 | c0 = max(render, media); |
1791 | c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ |
1792 | |
1793 | if (c0 > time * rps->power.up_threshold) |
1794 | events = GEN6_PM_RP_UP_THRESHOLD; |
1795 | else if (c0 < time * rps->power.down_threshold) |
1796 | events = GEN6_PM_RP_DOWN_THRESHOLD; |
1797 | } |
1798 | |
1799 | rps->ei = now; |
1800 | return events; |
1801 | } |
1802 | |
1803 | static void rps_work(struct work_struct *work) |
1804 | { |
1805 | struct intel_rps *rps = container_of(work, typeof(*rps), work); |
1806 | struct intel_gt *gt = rps_to_gt(rps); |
1807 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1808 | bool client_boost = false; |
1809 | int new_freq, adj, min, max; |
1810 | u32 pm_iir = 0; |
1811 | |
1812 | spin_lock_irq(lock: gt->irq_lock); |
1813 | pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events; |
1814 | client_boost = atomic_read(v: &rps->num_waiters); |
1815 | spin_unlock_irq(lock: gt->irq_lock); |
1816 | |
1817 | /* Make sure we didn't queue anything we're not going to process. */ |
1818 | if (!pm_iir && !client_boost) |
1819 | goto out; |
1820 | |
1821 | mutex_lock(&rps->lock); |
1822 | if (!intel_rps_is_active(rps)) { |
1823 | mutex_unlock(lock: &rps->lock); |
1824 | return; |
1825 | } |
1826 | |
1827 | pm_iir |= vlv_wa_c0_ei(rps, pm_iir); |
1828 | |
1829 | adj = rps->last_adj; |
1830 | new_freq = rps->cur_freq; |
1831 | min = rps->min_freq_softlimit; |
1832 | max = rps->max_freq_softlimit; |
1833 | if (client_boost) |
1834 | max = rps->max_freq; |
1835 | |
1836 | GT_TRACE(gt, |
1837 | "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n" , |
1838 | pm_iir, str_yes_no(client_boost), |
1839 | adj, new_freq, min, max); |
1840 | |
1841 | if (client_boost && new_freq < rps->boost_freq) { |
1842 | new_freq = rps->boost_freq; |
1843 | adj = 0; |
1844 | } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { |
1845 | if (adj > 0) |
1846 | adj *= 2; |
1847 | else /* CHV needs even encode values */ |
1848 | adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1; |
1849 | |
1850 | if (new_freq >= rps->max_freq_softlimit) |
1851 | adj = 0; |
1852 | } else if (client_boost) { |
1853 | adj = 0; |
1854 | } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { |
1855 | if (rps->cur_freq > rps->efficient_freq) |
1856 | new_freq = rps->efficient_freq; |
1857 | else if (rps->cur_freq > rps->min_freq_softlimit) |
1858 | new_freq = rps->min_freq_softlimit; |
1859 | adj = 0; |
1860 | } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { |
1861 | if (adj < 0) |
1862 | adj *= 2; |
1863 | else /* CHV needs even encode values */ |
1864 | adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1; |
1865 | |
1866 | if (new_freq <= rps->min_freq_softlimit) |
1867 | adj = 0; |
1868 | } else { /* unknown event */ |
1869 | adj = 0; |
1870 | } |
1871 | |
1872 | /* |
1873 | * sysfs frequency limits may have snuck in while |
1874 | * servicing the interrupt |
1875 | */ |
1876 | new_freq += adj; |
1877 | new_freq = clamp_t(int, new_freq, min, max); |
1878 | |
1879 | if (intel_rps_set(rps, val: new_freq)) { |
1880 | drm_dbg(&i915->drm, "Failed to set new GPU frequency\n" ); |
1881 | adj = 0; |
1882 | } |
1883 | rps->last_adj = adj; |
1884 | |
1885 | mutex_unlock(lock: &rps->lock); |
1886 | |
1887 | out: |
1888 | spin_lock_irq(lock: gt->irq_lock); |
1889 | gen6_gt_pm_unmask_irq(gt, mask: rps->pm_events); |
1890 | spin_unlock_irq(lock: gt->irq_lock); |
1891 | } |
1892 | |
1893 | void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) |
1894 | { |
1895 | struct intel_gt *gt = rps_to_gt(rps); |
1896 | const u32 events = rps->pm_events & pm_iir; |
1897 | |
1898 | lockdep_assert_held(gt->irq_lock); |
1899 | |
1900 | if (unlikely(!events)) |
1901 | return; |
1902 | |
1903 | GT_TRACE(gt, "irq events:%x\n" , events); |
1904 | |
1905 | gen6_gt_pm_mask_irq(gt, mask: events); |
1906 | |
1907 | rps->pm_iir |= events; |
1908 | queue_work(wq: gt->i915->unordered_wq, work: &rps->work); |
1909 | } |
1910 | |
1911 | void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) |
1912 | { |
1913 | struct intel_gt *gt = rps_to_gt(rps); |
1914 | u32 events; |
1915 | |
1916 | events = pm_iir & rps->pm_events; |
1917 | if (events) { |
1918 | spin_lock(lock: gt->irq_lock); |
1919 | |
1920 | GT_TRACE(gt, "irq events:%x\n" , events); |
1921 | |
1922 | gen6_gt_pm_mask_irq(gt, mask: events); |
1923 | rps->pm_iir |= events; |
1924 | |
1925 | queue_work(wq: gt->i915->unordered_wq, work: &rps->work); |
1926 | spin_unlock(lock: gt->irq_lock); |
1927 | } |
1928 | |
1929 | if (GRAPHICS_VER(gt->i915) >= 8) |
1930 | return; |
1931 | |
1932 | if (pm_iir & PM_VEBOX_USER_INTERRUPT) |
1933 | intel_engine_cs_irq(engine: gt->engine[VECS0], iir: pm_iir >> 10); |
1934 | |
1935 | if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) |
1936 | drm_dbg(&rps_to_i915(rps)->drm, |
1937 | "Command parser error, pm_iir 0x%08x\n" , pm_iir); |
1938 | } |
1939 | |
1940 | void gen5_rps_irq_handler(struct intel_rps *rps) |
1941 | { |
1942 | struct intel_uncore *uncore = rps_to_uncore(rps); |
1943 | u32 busy_up, busy_down, max_avg, min_avg; |
1944 | u8 new_freq; |
1945 | |
1946 | spin_lock(lock: &mchdev_lock); |
1947 | |
1948 | intel_uncore_write16(uncore, |
1949 | MEMINTRSTS, |
1950 | val: intel_uncore_read(uncore, MEMINTRSTS)); |
1951 | |
1952 | intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); |
1953 | busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); |
1954 | busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); |
1955 | max_avg = intel_uncore_read(uncore, RCBMAXAVG); |
1956 | min_avg = intel_uncore_read(uncore, RCBMINAVG); |
1957 | |
1958 | /* Handle RCS change request from hw */ |
1959 | new_freq = rps->cur_freq; |
1960 | if (busy_up > max_avg) |
1961 | new_freq++; |
1962 | else if (busy_down < min_avg) |
1963 | new_freq--; |
1964 | new_freq = clamp(new_freq, |
1965 | rps->min_freq_softlimit, |
1966 | rps->max_freq_softlimit); |
1967 | |
1968 | if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, val: new_freq)) |
1969 | rps->cur_freq = new_freq; |
1970 | |
1971 | spin_unlock(lock: &mchdev_lock); |
1972 | } |
1973 | |
1974 | void intel_rps_init_early(struct intel_rps *rps) |
1975 | { |
1976 | mutex_init(&rps->lock); |
1977 | mutex_init(&rps->power.mutex); |
1978 | |
1979 | INIT_WORK(&rps->work, rps_work); |
1980 | timer_setup(&rps->timer, rps_timer, 0); |
1981 | |
1982 | atomic_set(v: &rps->num_waiters, i: 0); |
1983 | } |
1984 | |
1985 | void intel_rps_init(struct intel_rps *rps) |
1986 | { |
1987 | struct drm_i915_private *i915 = rps_to_i915(rps); |
1988 | |
1989 | if (rps_uses_slpc(rps)) |
1990 | return; |
1991 | |
1992 | if (IS_CHERRYVIEW(i915)) |
1993 | chv_rps_init(rps); |
1994 | else if (IS_VALLEYVIEW(i915)) |
1995 | vlv_rps_init(rps); |
1996 | else if (GRAPHICS_VER(i915) >= 6) |
1997 | gen6_rps_init(rps); |
1998 | else if (IS_IRONLAKE_M(i915)) |
1999 | gen5_rps_init(rps); |
2000 | |
2001 | /* Derive initial user preferences/limits from the hardware limits */ |
2002 | rps->max_freq_softlimit = rps->max_freq; |
2003 | rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit; |
2004 | rps->min_freq_softlimit = rps->min_freq; |
2005 | rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit; |
2006 | |
2007 | /* After setting max-softlimit, find the overclock max freq */ |
2008 | if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { |
2009 | u32 params = 0; |
2010 | |
2011 | snb_pcode_read(uncore: rps_to_gt(rps)->uncore, GEN6_READ_OC_PARAMS, val: ¶ms, NULL); |
2012 | if (params & BIT(31)) { /* OC supported */ |
2013 | drm_dbg(&i915->drm, |
2014 | "Overclocking supported, max: %dMHz, overclock: %dMHz\n" , |
2015 | (rps->max_freq & 0xff) * 50, |
2016 | (params & 0xff) * 50); |
2017 | rps->max_freq = params & 0xff; |
2018 | } |
2019 | } |
2020 | |
2021 | /* Set default thresholds in % */ |
2022 | rps->power.up_threshold = 95; |
2023 | rps_to_gt(rps)->defaults.rps_up_threshold = rps->power.up_threshold; |
2024 | rps->power.down_threshold = 85; |
2025 | rps_to_gt(rps)->defaults.rps_down_threshold = rps->power.down_threshold; |
2026 | |
2027 | /* Finally allow us to boost to max by default */ |
2028 | rps->boost_freq = rps->max_freq; |
2029 | rps->idle_freq = rps->min_freq; |
2030 | |
2031 | /* Start in the middle, from here we will autotune based on workload */ |
2032 | rps->cur_freq = rps->efficient_freq; |
2033 | |
2034 | rps->pm_intrmsk_mbz = 0; |
2035 | |
2036 | /* |
2037 | * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer |
2038 | * if GEN6_PM_UP_EI_EXPIRED is masked. |
2039 | * |
2040 | * TODO: verify if this can be reproduced on VLV,CHV. |
2041 | */ |
2042 | if (GRAPHICS_VER(i915) <= 7) |
2043 | rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; |
2044 | |
2045 | if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11) |
2046 | rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; |
2047 | |
2048 | /* GuC needs ARAT expired interrupt unmasked */ |
2049 | if (intel_uc_uses_guc_submission(uc: &rps_to_gt(rps)->uc)) |
2050 | rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; |
2051 | } |
2052 | |
2053 | void intel_rps_sanitize(struct intel_rps *rps) |
2054 | { |
2055 | if (rps_uses_slpc(rps)) |
2056 | return; |
2057 | |
2058 | if (GRAPHICS_VER(rps_to_i915(rps)) >= 6) |
2059 | rps_disable_interrupts(rps); |
2060 | } |
2061 | |
2062 | u32 intel_rps_read_rpstat(struct intel_rps *rps) |
2063 | { |
2064 | struct drm_i915_private *i915 = rps_to_i915(rps); |
2065 | i915_reg_t rpstat; |
2066 | |
2067 | rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; |
2068 | |
2069 | return intel_uncore_read(uncore: rps_to_gt(rps)->uncore, reg: rpstat); |
2070 | } |
2071 | |
2072 | static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) |
2073 | { |
2074 | struct drm_i915_private *i915 = rps_to_i915(rps); |
2075 | u32 cagf; |
2076 | |
2077 | if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) |
2078 | cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat); |
2079 | else if (GRAPHICS_VER(i915) >= 12) |
2080 | cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat); |
2081 | else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) |
2082 | cagf = REG_FIELD_GET(RPE_MASK, rpstat); |
2083 | else if (GRAPHICS_VER(i915) >= 9) |
2084 | cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat); |
2085 | else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) |
2086 | cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat); |
2087 | else if (GRAPHICS_VER(i915) >= 6) |
2088 | cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat); |
2089 | else |
2090 | cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rpstat)); |
2091 | |
2092 | return cagf; |
2093 | } |
2094 | |
2095 | static u32 __read_cagf(struct intel_rps *rps, bool take_fw) |
2096 | { |
2097 | struct drm_i915_private *i915 = rps_to_i915(rps); |
2098 | struct intel_uncore *uncore = rps_to_uncore(rps); |
2099 | i915_reg_t r = INVALID_MMIO_REG; |
2100 | u32 freq; |
2101 | |
2102 | /* |
2103 | * For Gen12+ reading freq from HW does not need a forcewake and |
2104 | * registers will return 0 freq when GT is in RC6 |
2105 | */ |
2106 | if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { |
2107 | r = MTL_MIRROR_TARGET_WP1; |
2108 | } else if (GRAPHICS_VER(i915) >= 12) { |
2109 | r = GEN12_RPSTAT1; |
2110 | } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { |
2111 | vlv_punit_get(i915); |
2112 | freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); |
2113 | vlv_punit_put(i915); |
2114 | } else if (GRAPHICS_VER(i915) >= 6) { |
2115 | r = GEN6_RPSTAT1; |
2116 | } else { |
2117 | r = MEMSTAT_ILK; |
2118 | } |
2119 | |
2120 | if (i915_mmio_reg_valid(r)) |
2121 | freq = take_fw ? intel_uncore_read(uncore, reg: r) : intel_uncore_read_fw(uncore, r); |
2122 | |
2123 | return intel_rps_get_cagf(rps, rpstat: freq); |
2124 | } |
2125 | |
2126 | static u32 read_cagf(struct intel_rps *rps) |
2127 | { |
2128 | return __read_cagf(rps, take_fw: true); |
2129 | } |
2130 | |
2131 | u32 intel_rps_read_actual_frequency(struct intel_rps *rps) |
2132 | { |
2133 | struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; |
2134 | intel_wakeref_t wakeref; |
2135 | u32 freq = 0; |
2136 | |
2137 | with_intel_runtime_pm_if_in_use(rpm, wakeref) |
2138 | freq = intel_gpu_freq(rps, val: read_cagf(rps)); |
2139 | |
2140 | return freq; |
2141 | } |
2142 | |
2143 | u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps) |
2144 | { |
2145 | return intel_gpu_freq(rps, val: __read_cagf(rps, take_fw: false)); |
2146 | } |
2147 | |
2148 | static u32 intel_rps_read_punit_req(struct intel_rps *rps) |
2149 | { |
2150 | struct intel_uncore *uncore = rps_to_uncore(rps); |
2151 | struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; |
2152 | intel_wakeref_t wakeref; |
2153 | u32 freq = 0; |
2154 | |
2155 | with_intel_runtime_pm_if_in_use(rpm, wakeref) |
2156 | freq = intel_uncore_read(uncore, GEN6_RPNSWREQ); |
2157 | |
2158 | return freq; |
2159 | } |
2160 | |
2161 | static u32 intel_rps_get_req(u32 pureq) |
2162 | { |
2163 | u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT; |
2164 | |
2165 | return req; |
2166 | } |
2167 | |
2168 | u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps) |
2169 | { |
2170 | u32 freq = intel_rps_get_req(pureq: intel_rps_read_punit_req(rps)); |
2171 | |
2172 | return intel_gpu_freq(rps, val: freq); |
2173 | } |
2174 | |
2175 | u32 intel_rps_get_requested_frequency(struct intel_rps *rps) |
2176 | { |
2177 | if (rps_uses_slpc(rps)) |
2178 | return intel_rps_read_punit_req_frequency(rps); |
2179 | else |
2180 | return intel_gpu_freq(rps, val: rps->cur_freq); |
2181 | } |
2182 | |
2183 | u32 intel_rps_get_max_frequency(struct intel_rps *rps) |
2184 | { |
2185 | struct intel_guc_slpc *slpc = rps_to_slpc(rps); |
2186 | |
2187 | if (rps_uses_slpc(rps)) |
2188 | return slpc->max_freq_softlimit; |
2189 | else |
2190 | return intel_gpu_freq(rps, val: rps->max_freq_softlimit); |
2191 | } |
2192 | |
2193 | /** |
2194 | * intel_rps_get_max_raw_freq - returns the max frequency in some raw format. |
2195 | * @rps: the intel_rps structure |
2196 | * |
2197 | * Returns the max frequency in a raw format. In newer platforms raw is in |
2198 | * units of 50 MHz. |
2199 | */ |
2200 | u32 intel_rps_get_max_raw_freq(struct intel_rps *rps) |
2201 | { |
2202 | struct intel_guc_slpc *slpc = rps_to_slpc(rps); |
2203 | u32 freq; |
2204 | |
2205 | if (rps_uses_slpc(rps)) { |
2206 | return DIV_ROUND_CLOSEST(slpc->rp0_freq, |
2207 | GT_FREQUENCY_MULTIPLIER); |
2208 | } else { |
2209 | freq = rps->max_freq; |
2210 | if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { |
2211 | /* Convert GT frequency to 50 MHz units */ |
2212 | freq /= GEN9_FREQ_SCALER; |
2213 | } |
2214 | return freq; |
2215 | } |
2216 | } |
2217 | |
2218 | u32 intel_rps_get_rp0_frequency(struct intel_rps *rps) |
2219 | { |
2220 | struct intel_guc_slpc *slpc = rps_to_slpc(rps); |
2221 | |
2222 | if (rps_uses_slpc(rps)) |
2223 | return slpc->rp0_freq; |
2224 | else |
2225 | return intel_gpu_freq(rps, val: rps->rp0_freq); |
2226 | } |
2227 | |
2228 | u32 intel_rps_get_rp1_frequency(struct intel_rps *rps) |
2229 | { |
2230 | struct intel_guc_slpc *slpc = rps_to_slpc(rps); |
2231 | |
2232 | if (rps_uses_slpc(rps)) |
2233 | return slpc->rp1_freq; |
2234 | else |
2235 | return intel_gpu_freq(rps, val: rps->rp1_freq); |
2236 | } |
2237 | |
2238 | u32 intel_rps_get_rpn_frequency(struct intel_rps *rps) |
2239 | { |
2240 | struct intel_guc_slpc *slpc = rps_to_slpc(rps); |
2241 | |
2242 | if (rps_uses_slpc(rps)) |
2243 | return slpc->min_freq; |
2244 | else |
2245 | return intel_gpu_freq(rps, val: rps->min_freq); |
2246 | } |
2247 | |
2248 | static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) |
2249 | { |
2250 | struct intel_gt *gt = rps_to_gt(rps); |
2251 | struct drm_i915_private *i915 = gt->i915; |
2252 | struct intel_uncore *uncore = gt->uncore; |
2253 | struct intel_rps_freq_caps caps; |
2254 | u32 rp_state_limits; |
2255 | u32 gt_perf_status; |
2256 | u32 rpmodectl, rpinclimit, rpdeclimit; |
2257 | u32 rpstat, cagf, reqf; |
2258 | u32 rpcurupei, rpcurup, rpprevup; |
2259 | u32 rpcurdownei, rpcurdown, rpprevdown; |
2260 | u32 rpupei, rpupt, rpdownei, rpdownt; |
2261 | u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; |
2262 | |
2263 | rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); |
2264 | gen6_rps_get_freq_caps(rps, caps: &caps); |
2265 | if (IS_GEN9_LP(i915)) |
2266 | gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); |
2267 | else |
2268 | gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); |
2269 | |
2270 | /* RPSTAT1 is in the GT power well */ |
2271 | intel_uncore_forcewake_get(uncore, domains: FORCEWAKE_ALL); |
2272 | |
2273 | reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); |
2274 | if (GRAPHICS_VER(i915) >= 9) { |
2275 | reqf >>= 23; |
2276 | } else { |
2277 | reqf &= ~GEN6_TURBO_DISABLE; |
2278 | if (IS_HASWELL(i915) || IS_BROADWELL(i915)) |
2279 | reqf >>= 24; |
2280 | else |
2281 | reqf >>= 25; |
2282 | } |
2283 | reqf = intel_gpu_freq(rps, val: reqf); |
2284 | |
2285 | rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); |
2286 | rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); |
2287 | rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); |
2288 | |
2289 | rpstat = intel_rps_read_rpstat(rps); |
2290 | rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; |
2291 | rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; |
2292 | rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; |
2293 | rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; |
2294 | rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; |
2295 | rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; |
2296 | |
2297 | rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); |
2298 | rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); |
2299 | |
2300 | rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); |
2301 | rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); |
2302 | |
2303 | cagf = intel_rps_read_actual_frequency(rps); |
2304 | |
2305 | intel_uncore_forcewake_put(uncore, domains: FORCEWAKE_ALL); |
2306 | |
2307 | if (GRAPHICS_VER(i915) >= 11) { |
2308 | pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); |
2309 | pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); |
2310 | /* |
2311 | * The equivalent to the PM ISR & IIR cannot be read |
2312 | * without affecting the current state of the system |
2313 | */ |
2314 | pm_isr = 0; |
2315 | pm_iir = 0; |
2316 | } else if (GRAPHICS_VER(i915) >= 8) { |
2317 | pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); |
2318 | pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); |
2319 | pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); |
2320 | pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); |
2321 | } else { |
2322 | pm_ier = intel_uncore_read(uncore, GEN6_PMIER); |
2323 | pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); |
2324 | pm_isr = intel_uncore_read(uncore, GEN6_PMISR); |
2325 | pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); |
2326 | } |
2327 | pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); |
2328 | |
2329 | drm_printf(p, f: "Video Turbo Mode: %s\n" , |
2330 | str_yes_no(v: rpmodectl & GEN6_RP_MEDIA_TURBO)); |
2331 | drm_printf(p, f: "HW control enabled: %s\n" , |
2332 | str_yes_no(v: rpmodectl & GEN6_RP_ENABLE)); |
2333 | drm_printf(p, f: "SW control enabled: %s\n" , |
2334 | str_yes_no(v: (rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); |
2335 | |
2336 | drm_printf(p, f: "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n" , |
2337 | pm_ier, pm_imr, pm_mask); |
2338 | if (GRAPHICS_VER(i915) <= 10) |
2339 | drm_printf(p, f: "PM ISR=0x%08x IIR=0x%08x\n" , |
2340 | pm_isr, pm_iir); |
2341 | drm_printf(p, f: "pm_intrmsk_mbz: 0x%08x\n" , |
2342 | rps->pm_intrmsk_mbz); |
2343 | drm_printf(p, f: "GT_PERF_STATUS: 0x%08x\n" , gt_perf_status); |
2344 | drm_printf(p, f: "Render p-state ratio: %d\n" , |
2345 | (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); |
2346 | drm_printf(p, f: "Render p-state VID: %d\n" , |
2347 | gt_perf_status & 0xff); |
2348 | drm_printf(p, f: "Render p-state limit: %d\n" , |
2349 | rp_state_limits & 0xff); |
2350 | drm_printf(p, f: "RPSTAT1: 0x%08x\n" , rpstat); |
2351 | drm_printf(p, f: "RPMODECTL: 0x%08x\n" , rpmodectl); |
2352 | drm_printf(p, f: "RPINCLIMIT: 0x%08x\n" , rpinclimit); |
2353 | drm_printf(p, f: "RPDECLIMIT: 0x%08x\n" , rpdeclimit); |
2354 | drm_printf(p, f: "RPNSWREQ: %dMHz\n" , reqf); |
2355 | drm_printf(p, f: "CAGF: %dMHz\n" , cagf); |
2356 | drm_printf(p, f: "RP CUR UP EI: %d (%lldns)\n" , |
2357 | rpcurupei, |
2358 | intel_gt_pm_interval_to_ns(gt, count: rpcurupei)); |
2359 | drm_printf(p, f: "RP CUR UP: %d (%lldns)\n" , |
2360 | rpcurup, intel_gt_pm_interval_to_ns(gt, count: rpcurup)); |
2361 | drm_printf(p, f: "RP PREV UP: %d (%lldns)\n" , |
2362 | rpprevup, intel_gt_pm_interval_to_ns(gt, count: rpprevup)); |
2363 | drm_printf(p, f: "Up threshold: %d%%\n" , |
2364 | rps->power.up_threshold); |
2365 | drm_printf(p, f: "RP UP EI: %d (%lldns)\n" , |
2366 | rpupei, intel_gt_pm_interval_to_ns(gt, count: rpupei)); |
2367 | drm_printf(p, f: "RP UP THRESHOLD: %d (%lldns)\n" , |
2368 | rpupt, intel_gt_pm_interval_to_ns(gt, count: rpupt)); |
2369 | |
2370 | drm_printf(p, f: "RP CUR DOWN EI: %d (%lldns)\n" , |
2371 | rpcurdownei, |
2372 | intel_gt_pm_interval_to_ns(gt, count: rpcurdownei)); |
2373 | drm_printf(p, f: "RP CUR DOWN: %d (%lldns)\n" , |
2374 | rpcurdown, |
2375 | intel_gt_pm_interval_to_ns(gt, count: rpcurdown)); |
2376 | drm_printf(p, f: "RP PREV DOWN: %d (%lldns)\n" , |
2377 | rpprevdown, |
2378 | intel_gt_pm_interval_to_ns(gt, count: rpprevdown)); |
2379 | drm_printf(p, f: "Down threshold: %d%%\n" , |
2380 | rps->power.down_threshold); |
2381 | drm_printf(p, f: "RP DOWN EI: %d (%lldns)\n" , |
2382 | rpdownei, intel_gt_pm_interval_to_ns(gt, count: rpdownei)); |
2383 | drm_printf(p, f: "RP DOWN THRESHOLD: %d (%lldns)\n" , |
2384 | rpdownt, intel_gt_pm_interval_to_ns(gt, count: rpdownt)); |
2385 | |
2386 | drm_printf(p, f: "Lowest (RPN) frequency: %dMHz\n" , |
2387 | intel_gpu_freq(rps, val: caps.min_freq)); |
2388 | drm_printf(p, f: "Nominal (RP1) frequency: %dMHz\n" , |
2389 | intel_gpu_freq(rps, val: caps.rp1_freq)); |
2390 | drm_printf(p, f: "Max non-overclocked (RP0) frequency: %dMHz\n" , |
2391 | intel_gpu_freq(rps, val: caps.rp0_freq)); |
2392 | drm_printf(p, f: "Max overclocked frequency: %dMHz\n" , |
2393 | intel_gpu_freq(rps, val: rps->max_freq)); |
2394 | |
2395 | drm_printf(p, f: "Current freq: %d MHz\n" , |
2396 | intel_gpu_freq(rps, val: rps->cur_freq)); |
2397 | drm_printf(p, f: "Actual freq: %d MHz\n" , cagf); |
2398 | drm_printf(p, f: "Idle freq: %d MHz\n" , |
2399 | intel_gpu_freq(rps, val: rps->idle_freq)); |
2400 | drm_printf(p, f: "Min freq: %d MHz\n" , |
2401 | intel_gpu_freq(rps, val: rps->min_freq)); |
2402 | drm_printf(p, f: "Boost freq: %d MHz\n" , |
2403 | intel_gpu_freq(rps, val: rps->boost_freq)); |
2404 | drm_printf(p, f: "Max freq: %d MHz\n" , |
2405 | intel_gpu_freq(rps, val: rps->max_freq)); |
2406 | drm_printf(p, |
2407 | f: "efficient (RPe) frequency: %d MHz\n" , |
2408 | intel_gpu_freq(rps, val: rps->efficient_freq)); |
2409 | } |
2410 | |
2411 | static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p) |
2412 | { |
2413 | struct intel_gt *gt = rps_to_gt(rps); |
2414 | struct intel_uncore *uncore = gt->uncore; |
2415 | struct intel_rps_freq_caps caps; |
2416 | u32 pm_mask; |
2417 | |
2418 | gen6_rps_get_freq_caps(rps, caps: &caps); |
2419 | pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); |
2420 | |
2421 | drm_printf(p, f: "PM MASK=0x%08x\n" , pm_mask); |
2422 | drm_printf(p, f: "pm_intrmsk_mbz: 0x%08x\n" , |
2423 | rps->pm_intrmsk_mbz); |
2424 | drm_printf(p, f: "RPSTAT1: 0x%08x\n" , intel_rps_read_rpstat(rps)); |
2425 | drm_printf(p, f: "RPNSWREQ: %dMHz\n" , intel_rps_get_requested_frequency(rps)); |
2426 | drm_printf(p, f: "Lowest (RPN) frequency: %dMHz\n" , |
2427 | intel_gpu_freq(rps, val: caps.min_freq)); |
2428 | drm_printf(p, f: "Nominal (RP1) frequency: %dMHz\n" , |
2429 | intel_gpu_freq(rps, val: caps.rp1_freq)); |
2430 | drm_printf(p, f: "Max non-overclocked (RP0) frequency: %dMHz\n" , |
2431 | intel_gpu_freq(rps, val: caps.rp0_freq)); |
2432 | drm_printf(p, f: "Current freq: %d MHz\n" , |
2433 | intel_rps_get_requested_frequency(rps)); |
2434 | drm_printf(p, f: "Actual freq: %d MHz\n" , |
2435 | intel_rps_read_actual_frequency(rps)); |
2436 | drm_printf(p, f: "Min freq: %d MHz\n" , |
2437 | intel_rps_get_min_frequency(rps)); |
2438 | drm_printf(p, f: "Boost freq: %d MHz\n" , |
2439 | intel_rps_get_boost_frequency(rps)); |
2440 | drm_printf(p, f: "Max freq: %d MHz\n" , |
2441 | intel_rps_get_max_frequency(rps)); |
2442 | drm_printf(p, |
2443 | f: "efficient (RPe) frequency: %d MHz\n" , |
2444 | intel_gpu_freq(rps, val: caps.rp1_freq)); |
2445 | } |
2446 | |
2447 | void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) |
2448 | { |
2449 | if (rps_uses_slpc(rps)) |
2450 | return slpc_frequency_dump(rps, p); |
2451 | else |
2452 | return rps_frequency_dump(rps, p); |
2453 | } |
2454 | |
2455 | static int set_max_freq(struct intel_rps *rps, u32 val) |
2456 | { |
2457 | struct drm_i915_private *i915 = rps_to_i915(rps); |
2458 | int ret = 0; |
2459 | |
2460 | mutex_lock(&rps->lock); |
2461 | |
2462 | val = intel_freq_opcode(rps, val); |
2463 | if (val < rps->min_freq || |
2464 | val > rps->max_freq || |
2465 | val < rps->min_freq_softlimit) { |
2466 | ret = -EINVAL; |
2467 | goto unlock; |
2468 | } |
2469 | |
2470 | if (val > rps->rp0_freq) |
2471 | drm_dbg(&i915->drm, "User requested overclocking to %d\n" , |
2472 | intel_gpu_freq(rps, val)); |
2473 | |
2474 | rps->max_freq_softlimit = val; |
2475 | |
2476 | val = clamp_t(int, rps->cur_freq, |
2477 | rps->min_freq_softlimit, |
2478 | rps->max_freq_softlimit); |
2479 | |
2480 | /* |
2481 | * We still need *_set_rps to process the new max_delay and |
2482 | * update the interrupt limits and PMINTRMSK even though |
2483 | * frequency request may be unchanged. |
2484 | */ |
2485 | intel_rps_set(rps, val); |
2486 | |
2487 | unlock: |
2488 | mutex_unlock(lock: &rps->lock); |
2489 | |
2490 | return ret; |
2491 | } |
2492 | |
2493 | int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val) |
2494 | { |
2495 | struct intel_guc_slpc *slpc = rps_to_slpc(rps); |
2496 | |
2497 | if (rps_uses_slpc(rps)) |
2498 | return intel_guc_slpc_set_max_freq(slpc, val); |
2499 | else |
2500 | return set_max_freq(rps, val); |
2501 | } |
2502 | |
2503 | u32 intel_rps_get_min_frequency(struct intel_rps *rps) |
2504 | { |
2505 | struct intel_guc_slpc *slpc = rps_to_slpc(rps); |
2506 | |
2507 | if (rps_uses_slpc(rps)) |
2508 | return slpc->min_freq_softlimit; |
2509 | else |
2510 | return intel_gpu_freq(rps, val: rps->min_freq_softlimit); |
2511 | } |
2512 | |
2513 | /** |
2514 | * intel_rps_get_min_raw_freq - returns the min frequency in some raw format. |
2515 | * @rps: the intel_rps structure |
2516 | * |
2517 | * Returns the min frequency in a raw format. In newer platforms raw is in |
2518 | * units of 50 MHz. |
2519 | */ |
2520 | u32 intel_rps_get_min_raw_freq(struct intel_rps *rps) |
2521 | { |
2522 | struct intel_guc_slpc *slpc = rps_to_slpc(rps); |
2523 | u32 freq; |
2524 | |
2525 | if (rps_uses_slpc(rps)) { |
2526 | return DIV_ROUND_CLOSEST(slpc->min_freq, |
2527 | GT_FREQUENCY_MULTIPLIER); |
2528 | } else { |
2529 | freq = rps->min_freq; |
2530 | if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { |
2531 | /* Convert GT frequency to 50 MHz units */ |
2532 | freq /= GEN9_FREQ_SCALER; |
2533 | } |
2534 | return freq; |
2535 | } |
2536 | } |
2537 | |
2538 | static int set_min_freq(struct intel_rps *rps, u32 val) |
2539 | { |
2540 | int ret = 0; |
2541 | |
2542 | mutex_lock(&rps->lock); |
2543 | |
2544 | val = intel_freq_opcode(rps, val); |
2545 | if (val < rps->min_freq || |
2546 | val > rps->max_freq || |
2547 | val > rps->max_freq_softlimit) { |
2548 | ret = -EINVAL; |
2549 | goto unlock; |
2550 | } |
2551 | |
2552 | rps->min_freq_softlimit = val; |
2553 | |
2554 | val = clamp_t(int, rps->cur_freq, |
2555 | rps->min_freq_softlimit, |
2556 | rps->max_freq_softlimit); |
2557 | |
2558 | /* |
2559 | * We still need *_set_rps to process the new min_delay and |
2560 | * update the interrupt limits and PMINTRMSK even though |
2561 | * frequency request may be unchanged. |
2562 | */ |
2563 | intel_rps_set(rps, val); |
2564 | |
2565 | unlock: |
2566 | mutex_unlock(lock: &rps->lock); |
2567 | |
2568 | return ret; |
2569 | } |
2570 | |
2571 | int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) |
2572 | { |
2573 | struct intel_guc_slpc *slpc = rps_to_slpc(rps); |
2574 | |
2575 | if (rps_uses_slpc(rps)) |
2576 | return intel_guc_slpc_set_min_freq(slpc, val); |
2577 | else |
2578 | return set_min_freq(rps, val); |
2579 | } |
2580 | |
2581 | u8 intel_rps_get_up_threshold(struct intel_rps *rps) |
2582 | { |
2583 | return rps->power.up_threshold; |
2584 | } |
2585 | |
2586 | static int rps_set_threshold(struct intel_rps *rps, u8 *threshold, u8 val) |
2587 | { |
2588 | int ret; |
2589 | |
2590 | if (val > 100) |
2591 | return -EINVAL; |
2592 | |
2593 | ret = mutex_lock_interruptible(&rps->lock); |
2594 | if (ret) |
2595 | return ret; |
2596 | |
2597 | if (*threshold == val) |
2598 | goto out_unlock; |
2599 | |
2600 | *threshold = val; |
2601 | |
2602 | /* Force reset. */ |
2603 | rps->last_freq = -1; |
2604 | mutex_lock(&rps->power.mutex); |
2605 | rps->power.mode = -1; |
2606 | mutex_unlock(lock: &rps->power.mutex); |
2607 | |
2608 | intel_rps_set(rps, clamp(rps->cur_freq, |
2609 | rps->min_freq_softlimit, |
2610 | rps->max_freq_softlimit)); |
2611 | |
2612 | out_unlock: |
2613 | mutex_unlock(lock: &rps->lock); |
2614 | |
2615 | return ret; |
2616 | } |
2617 | |
2618 | int intel_rps_set_up_threshold(struct intel_rps *rps, u8 threshold) |
2619 | { |
2620 | return rps_set_threshold(rps, threshold: &rps->power.up_threshold, val: threshold); |
2621 | } |
2622 | |
2623 | u8 intel_rps_get_down_threshold(struct intel_rps *rps) |
2624 | { |
2625 | return rps->power.down_threshold; |
2626 | } |
2627 | |
2628 | int intel_rps_set_down_threshold(struct intel_rps *rps, u8 threshold) |
2629 | { |
2630 | return rps_set_threshold(rps, threshold: &rps->power.down_threshold, val: threshold); |
2631 | } |
2632 | |
2633 | static void intel_rps_set_manual(struct intel_rps *rps, bool enable) |
2634 | { |
2635 | struct intel_uncore *uncore = rps_to_uncore(rps); |
2636 | u32 state = enable ? GEN9_RPSWCTL_ENABLE : GEN9_RPSWCTL_DISABLE; |
2637 | |
2638 | /* Allow punit to process software requests */ |
2639 | intel_uncore_write(uncore, GEN6_RP_CONTROL, val: state); |
2640 | } |
2641 | |
2642 | void intel_rps_raise_unslice(struct intel_rps *rps) |
2643 | { |
2644 | struct intel_uncore *uncore = rps_to_uncore(rps); |
2645 | |
2646 | mutex_lock(&rps->lock); |
2647 | |
2648 | if (rps_uses_slpc(rps)) { |
2649 | /* RP limits have not been initialized yet for SLPC path */ |
2650 | struct intel_rps_freq_caps caps; |
2651 | |
2652 | gen6_rps_get_freq_caps(rps, caps: &caps); |
2653 | |
2654 | intel_rps_set_manual(rps, enable: true); |
2655 | intel_uncore_write(uncore, GEN6_RPNSWREQ, |
2656 | val: ((caps.rp0_freq << |
2657 | GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | |
2658 | GEN9_IGNORE_SLICE_RATIO)); |
2659 | intel_rps_set_manual(rps, enable: false); |
2660 | } else { |
2661 | intel_rps_set(rps, val: rps->rp0_freq); |
2662 | } |
2663 | |
2664 | mutex_unlock(lock: &rps->lock); |
2665 | } |
2666 | |
2667 | void intel_rps_lower_unslice(struct intel_rps *rps) |
2668 | { |
2669 | struct intel_uncore *uncore = rps_to_uncore(rps); |
2670 | |
2671 | mutex_lock(&rps->lock); |
2672 | |
2673 | if (rps_uses_slpc(rps)) { |
2674 | /* RP limits have not been initialized yet for SLPC path */ |
2675 | struct intel_rps_freq_caps caps; |
2676 | |
2677 | gen6_rps_get_freq_caps(rps, caps: &caps); |
2678 | |
2679 | intel_rps_set_manual(rps, enable: true); |
2680 | intel_uncore_write(uncore, GEN6_RPNSWREQ, |
2681 | val: ((caps.min_freq << |
2682 | GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | |
2683 | GEN9_IGNORE_SLICE_RATIO)); |
2684 | intel_rps_set_manual(rps, enable: false); |
2685 | } else { |
2686 | intel_rps_set(rps, val: rps->min_freq); |
2687 | } |
2688 | |
2689 | mutex_unlock(lock: &rps->lock); |
2690 | } |
2691 | |
2692 | static u32 rps_read_mmio(struct intel_rps *rps, i915_reg_t reg32) |
2693 | { |
2694 | struct intel_gt *gt = rps_to_gt(rps); |
2695 | intel_wakeref_t wakeref; |
2696 | u32 val; |
2697 | |
2698 | with_intel_runtime_pm(gt->uncore->rpm, wakeref) |
2699 | val = intel_uncore_read(uncore: gt->uncore, reg: reg32); |
2700 | |
2701 | return val; |
2702 | } |
2703 | |
2704 | bool rps_read_mask_mmio(struct intel_rps *rps, |
2705 | i915_reg_t reg32, u32 mask) |
2706 | { |
2707 | return rps_read_mmio(rps, reg32) & mask; |
2708 | } |
2709 | |
2710 | /* External interface for intel_ips.ko */ |
2711 | |
2712 | static struct drm_i915_private __rcu *ips_mchdev; |
2713 | |
2714 | /* |
2715 | * Tells the intel_ips driver that the i915 driver is now loaded, if |
2716 | * IPS got loaded first. |
2717 | * |
2718 | * This awkward dance is so that neither module has to depend on the |
2719 | * other in order for IPS to do the appropriate communication of |
2720 | * GPU turbo limits to i915. |
2721 | */ |
2722 | static void |
2723 | ips_ping_for_i915_load(void) |
2724 | { |
2725 | void (*link)(void); |
2726 | |
2727 | link = symbol_get(ips_link_to_i915_driver); |
2728 | if (link) { |
2729 | link(); |
2730 | symbol_put(ips_link_to_i915_driver); |
2731 | } |
2732 | } |
2733 | |
2734 | void intel_rps_driver_register(struct intel_rps *rps) |
2735 | { |
2736 | struct intel_gt *gt = rps_to_gt(rps); |
2737 | |
2738 | /* |
2739 | * We only register the i915 ips part with intel-ips once everything is |
2740 | * set up, to avoid intel-ips sneaking in and reading bogus values. |
2741 | */ |
2742 | if (GRAPHICS_VER(gt->i915) == 5) { |
2743 | GEM_BUG_ON(ips_mchdev); |
2744 | rcu_assign_pointer(ips_mchdev, gt->i915); |
2745 | ips_ping_for_i915_load(); |
2746 | } |
2747 | } |
2748 | |
2749 | void intel_rps_driver_unregister(struct intel_rps *rps) |
2750 | { |
2751 | if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps)) |
2752 | rcu_assign_pointer(ips_mchdev, NULL); |
2753 | } |
2754 | |
2755 | static struct drm_i915_private *mchdev_get(void) |
2756 | { |
2757 | struct drm_i915_private *i915; |
2758 | |
2759 | rcu_read_lock(); |
2760 | i915 = rcu_dereference(ips_mchdev); |
2761 | if (i915 && !kref_get_unless_zero(kref: &i915->drm.ref)) |
2762 | i915 = NULL; |
2763 | rcu_read_unlock(); |
2764 | |
2765 | return i915; |
2766 | } |
2767 | |
2768 | /** |
2769 | * i915_read_mch_val - return value for IPS use |
2770 | * |
2771 | * Calculate and return a value for the IPS driver to use when deciding whether |
2772 | * we have thermal and power headroom to increase CPU or GPU power budget. |
2773 | */ |
2774 | unsigned long i915_read_mch_val(void) |
2775 | { |
2776 | struct drm_i915_private *i915; |
2777 | unsigned long chipset_val = 0; |
2778 | unsigned long graphics_val = 0; |
2779 | intel_wakeref_t wakeref; |
2780 | |
2781 | i915 = mchdev_get(); |
2782 | if (!i915) |
2783 | return 0; |
2784 | |
2785 | with_intel_runtime_pm(&i915->runtime_pm, wakeref) { |
2786 | struct intel_ips *ips = &to_gt(i915)->rps.ips; |
2787 | |
2788 | spin_lock_irq(lock: &mchdev_lock); |
2789 | chipset_val = __ips_chipset_val(ips); |
2790 | graphics_val = __ips_gfx_val(ips); |
2791 | spin_unlock_irq(lock: &mchdev_lock); |
2792 | } |
2793 | |
2794 | drm_dev_put(dev: &i915->drm); |
2795 | return chipset_val + graphics_val; |
2796 | } |
2797 | EXPORT_SYMBOL_GPL(i915_read_mch_val); |
2798 | |
2799 | /** |
2800 | * i915_gpu_raise - raise GPU frequency limit |
2801 | * |
2802 | * Raise the limit; IPS indicates we have thermal headroom. |
2803 | */ |
2804 | bool i915_gpu_raise(void) |
2805 | { |
2806 | struct drm_i915_private *i915; |
2807 | struct intel_rps *rps; |
2808 | |
2809 | i915 = mchdev_get(); |
2810 | if (!i915) |
2811 | return false; |
2812 | |
2813 | rps = &to_gt(i915)->rps; |
2814 | |
2815 | spin_lock_irq(lock: &mchdev_lock); |
2816 | if (rps->max_freq_softlimit < rps->max_freq) |
2817 | rps->max_freq_softlimit++; |
2818 | spin_unlock_irq(lock: &mchdev_lock); |
2819 | |
2820 | drm_dev_put(dev: &i915->drm); |
2821 | return true; |
2822 | } |
2823 | EXPORT_SYMBOL_GPL(i915_gpu_raise); |
2824 | |
2825 | /** |
2826 | * i915_gpu_lower - lower GPU frequency limit |
2827 | * |
2828 | * IPS indicates we're close to a thermal limit, so throttle back the GPU |
2829 | * frequency maximum. |
2830 | */ |
2831 | bool i915_gpu_lower(void) |
2832 | { |
2833 | struct drm_i915_private *i915; |
2834 | struct intel_rps *rps; |
2835 | |
2836 | i915 = mchdev_get(); |
2837 | if (!i915) |
2838 | return false; |
2839 | |
2840 | rps = &to_gt(i915)->rps; |
2841 | |
2842 | spin_lock_irq(lock: &mchdev_lock); |
2843 | if (rps->max_freq_softlimit > rps->min_freq) |
2844 | rps->max_freq_softlimit--; |
2845 | spin_unlock_irq(lock: &mchdev_lock); |
2846 | |
2847 | drm_dev_put(dev: &i915->drm); |
2848 | return true; |
2849 | } |
2850 | EXPORT_SYMBOL_GPL(i915_gpu_lower); |
2851 | |
2852 | /** |
2853 | * i915_gpu_busy - indicate GPU business to IPS |
2854 | * |
2855 | * Tell the IPS driver whether or not the GPU is busy. |
2856 | */ |
2857 | bool i915_gpu_busy(void) |
2858 | { |
2859 | struct drm_i915_private *i915; |
2860 | bool ret; |
2861 | |
2862 | i915 = mchdev_get(); |
2863 | if (!i915) |
2864 | return false; |
2865 | |
2866 | ret = to_gt(i915)->awake; |
2867 | |
2868 | drm_dev_put(dev: &i915->drm); |
2869 | return ret; |
2870 | } |
2871 | EXPORT_SYMBOL_GPL(i915_gpu_busy); |
2872 | |
2873 | /** |
2874 | * i915_gpu_turbo_disable - disable graphics turbo |
2875 | * |
2876 | * Disable graphics turbo by resetting the max frequency and setting the |
2877 | * current frequency to the default. |
2878 | */ |
2879 | bool i915_gpu_turbo_disable(void) |
2880 | { |
2881 | struct drm_i915_private *i915; |
2882 | struct intel_rps *rps; |
2883 | bool ret; |
2884 | |
2885 | i915 = mchdev_get(); |
2886 | if (!i915) |
2887 | return false; |
2888 | |
2889 | rps = &to_gt(i915)->rps; |
2890 | |
2891 | spin_lock_irq(lock: &mchdev_lock); |
2892 | rps->max_freq_softlimit = rps->min_freq; |
2893 | ret = !__gen5_rps_set(rps: &to_gt(i915)->rps, val: rps->min_freq); |
2894 | spin_unlock_irq(lock: &mchdev_lock); |
2895 | |
2896 | drm_dev_put(dev: &i915->drm); |
2897 | return ret; |
2898 | } |
2899 | EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); |
2900 | |
2901 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
2902 | #include "selftest_rps.c" |
2903 | #include "selftest_slpc.c" |
2904 | #endif |
2905 | |