1 | /* |
2 | * SPDX-License-Identifier: MIT |
3 | * |
4 | * Copyright © 2017-2018 Intel Corporation |
5 | */ |
6 | |
7 | #include <linux/pm_runtime.h> |
8 | |
9 | #include "gt/intel_engine.h" |
10 | #include "gt/intel_engine_pm.h" |
11 | #include "gt/intel_engine_regs.h" |
12 | #include "gt/intel_engine_user.h" |
13 | #include "gt/intel_gt.h" |
14 | #include "gt/intel_gt_pm.h" |
15 | #include "gt/intel_gt_regs.h" |
16 | #include "gt/intel_rc6.h" |
17 | #include "gt/intel_rps.h" |
18 | |
19 | #include "i915_drv.h" |
20 | #include "i915_pmu.h" |
21 | |
22 | /* Frequency for the sampling timer for events which need it. */ |
23 | #define FREQUENCY 200 |
24 | #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY) |
25 | |
26 | #define ENGINE_SAMPLE_MASK \ |
27 | (BIT(I915_SAMPLE_BUSY) | \ |
28 | BIT(I915_SAMPLE_WAIT) | \ |
29 | BIT(I915_SAMPLE_SEMA)) |
30 | |
31 | static cpumask_t i915_pmu_cpumask; |
32 | static unsigned int i915_pmu_target_cpu = -1; |
33 | |
34 | static u8 engine_config_sample(u64 config) |
35 | { |
36 | return config & I915_PMU_SAMPLE_MASK; |
37 | } |
38 | |
39 | static u8 engine_event_sample(struct perf_event *event) |
40 | { |
41 | return engine_config_sample(config: event->attr.config); |
42 | } |
43 | |
44 | static u8 engine_event_class(struct perf_event *event) |
45 | { |
46 | return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff; |
47 | } |
48 | |
49 | static u8 engine_event_instance(struct perf_event *event) |
50 | { |
51 | return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff; |
52 | } |
53 | |
54 | static bool is_engine_config(const u64 config) |
55 | { |
56 | return config < __I915_PMU_OTHER(0); |
57 | } |
58 | |
59 | static unsigned int config_gt_id(const u64 config) |
60 | { |
61 | return config >> __I915_PMU_GT_SHIFT; |
62 | } |
63 | |
64 | static u64 config_counter(const u64 config) |
65 | { |
66 | return config & ~(~0ULL << __I915_PMU_GT_SHIFT); |
67 | } |
68 | |
69 | static unsigned int other_bit(const u64 config) |
70 | { |
71 | unsigned int val; |
72 | |
73 | switch (config_counter(config)) { |
74 | case I915_PMU_ACTUAL_FREQUENCY: |
75 | val = __I915_PMU_ACTUAL_FREQUENCY_ENABLED; |
76 | break; |
77 | case I915_PMU_REQUESTED_FREQUENCY: |
78 | val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED; |
79 | break; |
80 | case I915_PMU_RC6_RESIDENCY: |
81 | val = __I915_PMU_RC6_RESIDENCY_ENABLED; |
82 | break; |
83 | default: |
84 | /* |
85 | * Events that do not require sampling, or tracking state |
86 | * transitions between enabled and disabled can be ignored. |
87 | */ |
88 | return -1; |
89 | } |
90 | |
91 | return I915_ENGINE_SAMPLE_COUNT + |
92 | config_gt_id(config) * __I915_PMU_TRACKED_EVENT_COUNT + |
93 | val; |
94 | } |
95 | |
96 | static unsigned int config_bit(const u64 config) |
97 | { |
98 | if (is_engine_config(config)) |
99 | return engine_config_sample(config); |
100 | else |
101 | return other_bit(config); |
102 | } |
103 | |
104 | static u32 config_mask(const u64 config) |
105 | { |
106 | unsigned int bit = config_bit(config); |
107 | |
108 | if (__builtin_constant_p(config)) |
109 | BUILD_BUG_ON(bit > |
110 | BITS_PER_TYPE(typeof_member(struct i915_pmu, |
111 | enable)) - 1); |
112 | else |
113 | WARN_ON_ONCE(bit > |
114 | BITS_PER_TYPE(typeof_member(struct i915_pmu, |
115 | enable)) - 1); |
116 | |
117 | return BIT(config_bit(config)); |
118 | } |
119 | |
120 | static bool is_engine_event(struct perf_event *event) |
121 | { |
122 | return is_engine_config(config: event->attr.config); |
123 | } |
124 | |
125 | static unsigned int event_bit(struct perf_event *event) |
126 | { |
127 | return config_bit(config: event->attr.config); |
128 | } |
129 | |
130 | static u32 frequency_enabled_mask(void) |
131 | { |
132 | unsigned int i; |
133 | u32 mask = 0; |
134 | |
135 | for (i = 0; i < I915_PMU_MAX_GT; i++) |
136 | mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) | |
137 | config_mask(__I915_PMU_REQUESTED_FREQUENCY(i)); |
138 | |
139 | return mask; |
140 | } |
141 | |
142 | static bool pmu_needs_timer(struct i915_pmu *pmu) |
143 | { |
144 | struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); |
145 | u32 enable; |
146 | |
147 | /* |
148 | * Only some counters need the sampling timer. |
149 | * |
150 | * We start with a bitmask of all currently enabled events. |
151 | */ |
152 | enable = pmu->enable; |
153 | |
154 | /* |
155 | * Mask out all the ones which do not need the timer, or in |
156 | * other words keep all the ones that could need the timer. |
157 | */ |
158 | enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK; |
159 | |
160 | /* |
161 | * Also there is software busyness tracking available we do not |
162 | * need the timer for I915_SAMPLE_BUSY counter. |
163 | */ |
164 | if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) |
165 | enable &= ~BIT(I915_SAMPLE_BUSY); |
166 | |
167 | /* |
168 | * If some bits remain it means we need the sampling timer running. |
169 | */ |
170 | return enable; |
171 | } |
172 | |
173 | static u64 __get_rc6(struct intel_gt *gt) |
174 | { |
175 | struct drm_i915_private *i915 = gt->i915; |
176 | u64 val; |
177 | |
178 | val = intel_rc6_residency_ns(rc6: >->rc6, id: INTEL_RC6_RES_RC6); |
179 | |
180 | if (HAS_RC6p(i915)) |
181 | val += intel_rc6_residency_ns(rc6: >->rc6, id: INTEL_RC6_RES_RC6p); |
182 | |
183 | if (HAS_RC6pp(i915)) |
184 | val += intel_rc6_residency_ns(rc6: >->rc6, id: INTEL_RC6_RES_RC6pp); |
185 | |
186 | return val; |
187 | } |
188 | |
189 | static inline s64 ktime_since_raw(const ktime_t kt) |
190 | { |
191 | return ktime_to_ns(ktime_sub(ktime_get_raw(), kt)); |
192 | } |
193 | |
194 | static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample) |
195 | { |
196 | return pmu->sample[gt_id][sample].cur; |
197 | } |
198 | |
199 | static void |
200 | store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val) |
201 | { |
202 | pmu->sample[gt_id][sample].cur = val; |
203 | } |
204 | |
205 | static void |
206 | add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul) |
207 | { |
208 | pmu->sample[gt_id][sample].cur += mul_u32_u32(a: val, b: mul); |
209 | } |
210 | |
211 | static u64 get_rc6(struct intel_gt *gt) |
212 | { |
213 | struct drm_i915_private *i915 = gt->i915; |
214 | const unsigned int gt_id = gt->info.id; |
215 | struct i915_pmu *pmu = &i915->pmu; |
216 | unsigned long flags; |
217 | bool awake = false; |
218 | u64 val; |
219 | |
220 | if (intel_gt_pm_get_if_awake(gt)) { |
221 | val = __get_rc6(gt); |
222 | intel_gt_pm_put_async(gt); |
223 | awake = true; |
224 | } |
225 | |
226 | spin_lock_irqsave(&pmu->lock, flags); |
227 | |
228 | if (awake) { |
229 | store_sample(pmu, gt_id, sample: __I915_SAMPLE_RC6, val); |
230 | } else { |
231 | /* |
232 | * We think we are runtime suspended. |
233 | * |
234 | * Report the delta from when the device was suspended to now, |
235 | * on top of the last known real value, as the approximated RC6 |
236 | * counter value. |
237 | */ |
238 | val = ktime_since_raw(kt: pmu->sleep_last[gt_id]); |
239 | val += read_sample(pmu, gt_id, sample: __I915_SAMPLE_RC6); |
240 | } |
241 | |
242 | if (val < read_sample(pmu, gt_id, sample: __I915_SAMPLE_RC6_LAST_REPORTED)) |
243 | val = read_sample(pmu, gt_id, sample: __I915_SAMPLE_RC6_LAST_REPORTED); |
244 | else |
245 | store_sample(pmu, gt_id, sample: __I915_SAMPLE_RC6_LAST_REPORTED, val); |
246 | |
247 | spin_unlock_irqrestore(lock: &pmu->lock, flags); |
248 | |
249 | return val; |
250 | } |
251 | |
252 | static void init_rc6(struct i915_pmu *pmu) |
253 | { |
254 | struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); |
255 | struct intel_gt *gt; |
256 | unsigned int i; |
257 | |
258 | for_each_gt(gt, i915, i) { |
259 | intel_wakeref_t wakeref; |
260 | |
261 | with_intel_runtime_pm(gt->uncore->rpm, wakeref) { |
262 | u64 val = __get_rc6(gt); |
263 | |
264 | store_sample(pmu, gt_id: i, sample: __I915_SAMPLE_RC6, val); |
265 | store_sample(pmu, gt_id: i, sample: __I915_SAMPLE_RC6_LAST_REPORTED, |
266 | val); |
267 | pmu->sleep_last[i] = ktime_get_raw(); |
268 | } |
269 | } |
270 | } |
271 | |
272 | static void park_rc6(struct intel_gt *gt) |
273 | { |
274 | struct i915_pmu *pmu = >->i915->pmu; |
275 | |
276 | store_sample(pmu, gt_id: gt->info.id, sample: __I915_SAMPLE_RC6, val: __get_rc6(gt)); |
277 | pmu->sleep_last[gt->info.id] = ktime_get_raw(); |
278 | } |
279 | |
280 | static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) |
281 | { |
282 | if (!pmu->timer_enabled && pmu_needs_timer(pmu)) { |
283 | pmu->timer_enabled = true; |
284 | pmu->timer_last = ktime_get(); |
285 | hrtimer_start_range_ns(timer: &pmu->timer, |
286 | tim: ns_to_ktime(PERIOD), range_ns: 0, |
287 | mode: HRTIMER_MODE_REL_PINNED); |
288 | } |
289 | } |
290 | |
291 | void i915_pmu_gt_parked(struct intel_gt *gt) |
292 | { |
293 | struct i915_pmu *pmu = >->i915->pmu; |
294 | |
295 | if (!pmu->base.event_init) |
296 | return; |
297 | |
298 | spin_lock_irq(lock: &pmu->lock); |
299 | |
300 | park_rc6(gt); |
301 | |
302 | /* |
303 | * Signal sampling timer to stop if only engine events are enabled and |
304 | * GPU went idle. |
305 | */ |
306 | pmu->unparked &= ~BIT(gt->info.id); |
307 | if (pmu->unparked == 0) |
308 | pmu->timer_enabled = false; |
309 | |
310 | spin_unlock_irq(lock: &pmu->lock); |
311 | } |
312 | |
313 | void i915_pmu_gt_unparked(struct intel_gt *gt) |
314 | { |
315 | struct i915_pmu *pmu = >->i915->pmu; |
316 | |
317 | if (!pmu->base.event_init) |
318 | return; |
319 | |
320 | spin_lock_irq(lock: &pmu->lock); |
321 | |
322 | /* |
323 | * Re-enable sampling timer when GPU goes active. |
324 | */ |
325 | if (pmu->unparked == 0) |
326 | __i915_pmu_maybe_start_timer(pmu); |
327 | |
328 | pmu->unparked |= BIT(gt->info.id); |
329 | |
330 | spin_unlock_irq(lock: &pmu->lock); |
331 | } |
332 | |
333 | static void |
334 | add_sample(struct i915_pmu_sample *sample, u32 val) |
335 | { |
336 | sample->cur += val; |
337 | } |
338 | |
339 | static bool exclusive_mmio_access(const struct drm_i915_private *i915) |
340 | { |
341 | /* |
342 | * We have to avoid concurrent mmio cache line access on gen7 or |
343 | * risk a machine hang. For a fun history lesson dig out the old |
344 | * userspace intel_gpu_top and run it on Ivybridge or Haswell! |
345 | */ |
346 | return GRAPHICS_VER(i915) == 7; |
347 | } |
348 | |
349 | static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) |
350 | { |
351 | struct intel_engine_pmu *pmu = &engine->pmu; |
352 | bool busy; |
353 | u32 val; |
354 | |
355 | val = ENGINE_READ_FW(engine, RING_CTL); |
356 | if (val == 0) /* powerwell off => engine idle */ |
357 | return; |
358 | |
359 | if (val & RING_WAIT) |
360 | add_sample(sample: &pmu->sample[I915_SAMPLE_WAIT], val: period_ns); |
361 | if (val & RING_WAIT_SEMAPHORE) |
362 | add_sample(sample: &pmu->sample[I915_SAMPLE_SEMA], val: period_ns); |
363 | |
364 | /* No need to sample when busy stats are supported. */ |
365 | if (intel_engine_supports_stats(engine)) |
366 | return; |
367 | |
368 | /* |
369 | * While waiting on a semaphore or event, MI_MODE reports the |
370 | * ring as idle. However, previously using the seqno, and with |
371 | * execlists sampling, we account for the ring waiting as the |
372 | * engine being busy. Therefore, we record the sample as being |
373 | * busy if either waiting or !idle. |
374 | */ |
375 | busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); |
376 | if (!busy) { |
377 | val = ENGINE_READ_FW(engine, RING_MI_MODE); |
378 | busy = !(val & MODE_IDLE); |
379 | } |
380 | if (busy) |
381 | add_sample(sample: &pmu->sample[I915_SAMPLE_BUSY], val: period_ns); |
382 | } |
383 | |
384 | static void |
385 | engines_sample(struct intel_gt *gt, unsigned int period_ns) |
386 | { |
387 | struct drm_i915_private *i915 = gt->i915; |
388 | struct intel_engine_cs *engine; |
389 | enum intel_engine_id id; |
390 | unsigned long flags; |
391 | |
392 | if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) |
393 | return; |
394 | |
395 | if (!intel_gt_pm_is_awake(gt)) |
396 | return; |
397 | |
398 | for_each_engine(engine, gt, id) { |
399 | if (!engine->pmu.enable) |
400 | continue; |
401 | |
402 | if (!intel_engine_pm_get_if_awake(engine)) |
403 | continue; |
404 | |
405 | if (exclusive_mmio_access(i915)) { |
406 | spin_lock_irqsave(&engine->uncore->lock, flags); |
407 | engine_sample(engine, period_ns); |
408 | spin_unlock_irqrestore(lock: &engine->uncore->lock, flags); |
409 | } else { |
410 | engine_sample(engine, period_ns); |
411 | } |
412 | |
413 | intel_engine_pm_put_async(engine); |
414 | } |
415 | } |
416 | |
417 | static bool |
418 | frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt) |
419 | { |
420 | return pmu->enable & |
421 | (config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt)) | |
422 | config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt))); |
423 | } |
424 | |
425 | static void |
426 | frequency_sample(struct intel_gt *gt, unsigned int period_ns) |
427 | { |
428 | struct drm_i915_private *i915 = gt->i915; |
429 | const unsigned int gt_id = gt->info.id; |
430 | struct i915_pmu *pmu = &i915->pmu; |
431 | struct intel_rps *rps = >->rps; |
432 | |
433 | if (!frequency_sampling_enabled(pmu, gt: gt_id)) |
434 | return; |
435 | |
436 | /* Report 0/0 (actual/requested) frequency while parked. */ |
437 | if (!intel_gt_pm_get_if_awake(gt)) |
438 | return; |
439 | |
440 | if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) { |
441 | u32 val; |
442 | |
443 | /* |
444 | * We take a quick peek here without using forcewake |
445 | * so that we don't perturb the system under observation |
446 | * (forcewake => !rc6 => increased power use). We expect |
447 | * that if the read fails because it is outside of the |
448 | * mmio power well, then it will return 0 -- in which |
449 | * case we assume the system is running at the intended |
450 | * frequency. Fortunately, the read should rarely fail! |
451 | */ |
452 | val = intel_rps_read_actual_frequency_fw(rps); |
453 | if (!val) |
454 | val = intel_gpu_freq(rps, val: rps->cur_freq); |
455 | |
456 | add_sample_mult(pmu, gt_id, sample: __I915_SAMPLE_FREQ_ACT, |
457 | val, mul: period_ns / 1000); |
458 | } |
459 | |
460 | if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) { |
461 | add_sample_mult(pmu, gt_id, sample: __I915_SAMPLE_FREQ_REQ, |
462 | val: intel_rps_get_requested_frequency(rps), |
463 | mul: period_ns / 1000); |
464 | } |
465 | |
466 | intel_gt_pm_put_async(gt); |
467 | } |
468 | |
469 | static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) |
470 | { |
471 | struct drm_i915_private *i915 = |
472 | container_of(hrtimer, struct drm_i915_private, pmu.timer); |
473 | struct i915_pmu *pmu = &i915->pmu; |
474 | unsigned int period_ns; |
475 | struct intel_gt *gt; |
476 | unsigned int i; |
477 | ktime_t now; |
478 | |
479 | if (!READ_ONCE(pmu->timer_enabled)) |
480 | return HRTIMER_NORESTART; |
481 | |
482 | now = ktime_get(); |
483 | period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last)); |
484 | pmu->timer_last = now; |
485 | |
486 | /* |
487 | * Strictly speaking the passed in period may not be 100% accurate for |
488 | * all internal calculation, since some amount of time can be spent on |
489 | * grabbing the forcewake. However the potential error from timer call- |
490 | * back delay greatly dominates this so we keep it simple. |
491 | */ |
492 | |
493 | for_each_gt(gt, i915, i) { |
494 | if (!(pmu->unparked & BIT(i))) |
495 | continue; |
496 | |
497 | engines_sample(gt, period_ns); |
498 | frequency_sample(gt, period_ns); |
499 | } |
500 | |
501 | hrtimer_forward(timer: hrtimer, now, interval: ns_to_ktime(PERIOD)); |
502 | |
503 | return HRTIMER_RESTART; |
504 | } |
505 | |
506 | static void i915_pmu_event_destroy(struct perf_event *event) |
507 | { |
508 | struct drm_i915_private *i915 = |
509 | container_of(event->pmu, typeof(*i915), pmu.base); |
510 | |
511 | drm_WARN_ON(&i915->drm, event->parent); |
512 | |
513 | drm_dev_put(dev: &i915->drm); |
514 | } |
515 | |
516 | static int |
517 | engine_event_status(struct intel_engine_cs *engine, |
518 | enum drm_i915_pmu_engine_sample sample) |
519 | { |
520 | switch (sample) { |
521 | case I915_SAMPLE_BUSY: |
522 | case I915_SAMPLE_WAIT: |
523 | break; |
524 | case I915_SAMPLE_SEMA: |
525 | if (GRAPHICS_VER(engine->i915) < 6) |
526 | return -ENODEV; |
527 | break; |
528 | default: |
529 | return -ENOENT; |
530 | } |
531 | |
532 | return 0; |
533 | } |
534 | |
535 | static int |
536 | config_status(struct drm_i915_private *i915, u64 config) |
537 | { |
538 | struct intel_gt *gt = to_gt(i915); |
539 | |
540 | unsigned int gt_id = config_gt_id(config); |
541 | unsigned int max_gt_id = HAS_EXTRA_GT_LIST(i915) ? 1 : 0; |
542 | |
543 | if (gt_id > max_gt_id) |
544 | return -ENOENT; |
545 | |
546 | switch (config_counter(config)) { |
547 | case I915_PMU_ACTUAL_FREQUENCY: |
548 | if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) |
549 | /* Requires a mutex for sampling! */ |
550 | return -ENODEV; |
551 | fallthrough; |
552 | case I915_PMU_REQUESTED_FREQUENCY: |
553 | if (GRAPHICS_VER(i915) < 6) |
554 | return -ENODEV; |
555 | break; |
556 | case I915_PMU_INTERRUPTS: |
557 | if (gt_id) |
558 | return -ENOENT; |
559 | break; |
560 | case I915_PMU_RC6_RESIDENCY: |
561 | if (!gt->rc6.supported) |
562 | return -ENODEV; |
563 | break; |
564 | case I915_PMU_SOFTWARE_GT_AWAKE_TIME: |
565 | break; |
566 | default: |
567 | return -ENOENT; |
568 | } |
569 | |
570 | return 0; |
571 | } |
572 | |
573 | static int engine_event_init(struct perf_event *event) |
574 | { |
575 | struct drm_i915_private *i915 = |
576 | container_of(event->pmu, typeof(*i915), pmu.base); |
577 | struct intel_engine_cs *engine; |
578 | |
579 | engine = intel_engine_lookup_user(i915, class: engine_event_class(event), |
580 | instance: engine_event_instance(event)); |
581 | if (!engine) |
582 | return -ENODEV; |
583 | |
584 | return engine_event_status(engine, sample: engine_event_sample(event)); |
585 | } |
586 | |
587 | static int i915_pmu_event_init(struct perf_event *event) |
588 | { |
589 | struct drm_i915_private *i915 = |
590 | container_of(event->pmu, typeof(*i915), pmu.base); |
591 | struct i915_pmu *pmu = &i915->pmu; |
592 | int ret; |
593 | |
594 | if (pmu->closed) |
595 | return -ENODEV; |
596 | |
597 | if (event->attr.type != event->pmu->type) |
598 | return -ENOENT; |
599 | |
600 | /* unsupported modes and filters */ |
601 | if (event->attr.sample_period) /* no sampling */ |
602 | return -EINVAL; |
603 | |
604 | if (has_branch_stack(event)) |
605 | return -EOPNOTSUPP; |
606 | |
607 | if (event->cpu < 0) |
608 | return -EINVAL; |
609 | |
610 | /* only allow running on one cpu at a time */ |
611 | if (!cpumask_test_cpu(cpu: event->cpu, cpumask: &i915_pmu_cpumask)) |
612 | return -EINVAL; |
613 | |
614 | if (is_engine_event(event)) |
615 | ret = engine_event_init(event); |
616 | else |
617 | ret = config_status(i915, config: event->attr.config); |
618 | if (ret) |
619 | return ret; |
620 | |
621 | if (!event->parent) { |
622 | drm_dev_get(dev: &i915->drm); |
623 | event->destroy = i915_pmu_event_destroy; |
624 | } |
625 | |
626 | return 0; |
627 | } |
628 | |
629 | static u64 __i915_pmu_event_read(struct perf_event *event) |
630 | { |
631 | struct drm_i915_private *i915 = |
632 | container_of(event->pmu, typeof(*i915), pmu.base); |
633 | struct i915_pmu *pmu = &i915->pmu; |
634 | u64 val = 0; |
635 | |
636 | if (is_engine_event(event)) { |
637 | u8 sample = engine_event_sample(event); |
638 | struct intel_engine_cs *engine; |
639 | |
640 | engine = intel_engine_lookup_user(i915, |
641 | class: engine_event_class(event), |
642 | instance: engine_event_instance(event)); |
643 | |
644 | if (drm_WARN_ON_ONCE(&i915->drm, !engine)) { |
645 | /* Do nothing */ |
646 | } else if (sample == I915_SAMPLE_BUSY && |
647 | intel_engine_supports_stats(engine)) { |
648 | ktime_t unused; |
649 | |
650 | val = ktime_to_ns(kt: intel_engine_get_busy_time(engine, |
651 | now: &unused)); |
652 | } else { |
653 | val = engine->pmu.sample[sample].cur; |
654 | } |
655 | } else { |
656 | const unsigned int gt_id = config_gt_id(config: event->attr.config); |
657 | const u64 config = config_counter(config: event->attr.config); |
658 | |
659 | switch (config) { |
660 | case I915_PMU_ACTUAL_FREQUENCY: |
661 | val = |
662 | div_u64(dividend: read_sample(pmu, gt_id, |
663 | sample: __I915_SAMPLE_FREQ_ACT), |
664 | USEC_PER_SEC /* to MHz */); |
665 | break; |
666 | case I915_PMU_REQUESTED_FREQUENCY: |
667 | val = |
668 | div_u64(dividend: read_sample(pmu, gt_id, |
669 | sample: __I915_SAMPLE_FREQ_REQ), |
670 | USEC_PER_SEC /* to MHz */); |
671 | break; |
672 | case I915_PMU_INTERRUPTS: |
673 | val = READ_ONCE(pmu->irq_count); |
674 | break; |
675 | case I915_PMU_RC6_RESIDENCY: |
676 | val = get_rc6(gt: i915->gt[gt_id]); |
677 | break; |
678 | case I915_PMU_SOFTWARE_GT_AWAKE_TIME: |
679 | val = ktime_to_ns(kt: intel_gt_get_awake_time(gt: to_gt(i915))); |
680 | break; |
681 | } |
682 | } |
683 | |
684 | return val; |
685 | } |
686 | |
687 | static void i915_pmu_event_read(struct perf_event *event) |
688 | { |
689 | struct drm_i915_private *i915 = |
690 | container_of(event->pmu, typeof(*i915), pmu.base); |
691 | struct hw_perf_event *hwc = &event->hw; |
692 | struct i915_pmu *pmu = &i915->pmu; |
693 | u64 prev, new; |
694 | |
695 | if (pmu->closed) { |
696 | event->hw.state = PERF_HES_STOPPED; |
697 | return; |
698 | } |
699 | |
700 | prev = local64_read(&hwc->prev_count); |
701 | do { |
702 | new = __i915_pmu_event_read(event); |
703 | } while (!local64_try_cmpxchg(l: &hwc->prev_count, old: &prev, new)); |
704 | |
705 | local64_add(new - prev, &event->count); |
706 | } |
707 | |
708 | static void i915_pmu_enable(struct perf_event *event) |
709 | { |
710 | struct drm_i915_private *i915 = |
711 | container_of(event->pmu, typeof(*i915), pmu.base); |
712 | const unsigned int bit = event_bit(event); |
713 | struct i915_pmu *pmu = &i915->pmu; |
714 | unsigned long flags; |
715 | |
716 | if (bit == -1) |
717 | goto update; |
718 | |
719 | spin_lock_irqsave(&pmu->lock, flags); |
720 | |
721 | /* |
722 | * Update the bitmask of enabled events and increment |
723 | * the event reference counter. |
724 | */ |
725 | BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS); |
726 | GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); |
727 | GEM_BUG_ON(pmu->enable_count[bit] == ~0); |
728 | |
729 | pmu->enable |= BIT(bit); |
730 | pmu->enable_count[bit]++; |
731 | |
732 | /* |
733 | * Start the sampling timer if needed and not already enabled. |
734 | */ |
735 | __i915_pmu_maybe_start_timer(pmu); |
736 | |
737 | /* |
738 | * For per-engine events the bitmask and reference counting |
739 | * is stored per engine. |
740 | */ |
741 | if (is_engine_event(event)) { |
742 | u8 sample = engine_event_sample(event); |
743 | struct intel_engine_cs *engine; |
744 | |
745 | engine = intel_engine_lookup_user(i915, |
746 | class: engine_event_class(event), |
747 | instance: engine_event_instance(event)); |
748 | |
749 | BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) != |
750 | I915_ENGINE_SAMPLE_COUNT); |
751 | BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) != |
752 | I915_ENGINE_SAMPLE_COUNT); |
753 | GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); |
754 | GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); |
755 | GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); |
756 | |
757 | engine->pmu.enable |= BIT(sample); |
758 | engine->pmu.enable_count[sample]++; |
759 | } |
760 | |
761 | spin_unlock_irqrestore(lock: &pmu->lock, flags); |
762 | |
763 | update: |
764 | /* |
765 | * Store the current counter value so we can report the correct delta |
766 | * for all listeners. Even when the event was already enabled and has |
767 | * an existing non-zero value. |
768 | */ |
769 | local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); |
770 | } |
771 | |
772 | static void i915_pmu_disable(struct perf_event *event) |
773 | { |
774 | struct drm_i915_private *i915 = |
775 | container_of(event->pmu, typeof(*i915), pmu.base); |
776 | const unsigned int bit = event_bit(event); |
777 | struct i915_pmu *pmu = &i915->pmu; |
778 | unsigned long flags; |
779 | |
780 | if (bit == -1) |
781 | return; |
782 | |
783 | spin_lock_irqsave(&pmu->lock, flags); |
784 | |
785 | if (is_engine_event(event)) { |
786 | u8 sample = engine_event_sample(event); |
787 | struct intel_engine_cs *engine; |
788 | |
789 | engine = intel_engine_lookup_user(i915, |
790 | class: engine_event_class(event), |
791 | instance: engine_event_instance(event)); |
792 | |
793 | GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); |
794 | GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); |
795 | GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); |
796 | |
797 | /* |
798 | * Decrement the reference count and clear the enabled |
799 | * bitmask when the last listener on an event goes away. |
800 | */ |
801 | if (--engine->pmu.enable_count[sample] == 0) |
802 | engine->pmu.enable &= ~BIT(sample); |
803 | } |
804 | |
805 | GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); |
806 | GEM_BUG_ON(pmu->enable_count[bit] == 0); |
807 | /* |
808 | * Decrement the reference count and clear the enabled |
809 | * bitmask when the last listener on an event goes away. |
810 | */ |
811 | if (--pmu->enable_count[bit] == 0) { |
812 | pmu->enable &= ~BIT(bit); |
813 | pmu->timer_enabled &= pmu_needs_timer(pmu); |
814 | } |
815 | |
816 | spin_unlock_irqrestore(lock: &pmu->lock, flags); |
817 | } |
818 | |
819 | static void i915_pmu_event_start(struct perf_event *event, int flags) |
820 | { |
821 | struct drm_i915_private *i915 = |
822 | container_of(event->pmu, typeof(*i915), pmu.base); |
823 | struct i915_pmu *pmu = &i915->pmu; |
824 | |
825 | if (pmu->closed) |
826 | return; |
827 | |
828 | i915_pmu_enable(event); |
829 | event->hw.state = 0; |
830 | } |
831 | |
832 | static void i915_pmu_event_stop(struct perf_event *event, int flags) |
833 | { |
834 | struct drm_i915_private *i915 = |
835 | container_of(event->pmu, typeof(*i915), pmu.base); |
836 | struct i915_pmu *pmu = &i915->pmu; |
837 | |
838 | if (pmu->closed) |
839 | goto out; |
840 | |
841 | if (flags & PERF_EF_UPDATE) |
842 | i915_pmu_event_read(event); |
843 | i915_pmu_disable(event); |
844 | |
845 | out: |
846 | event->hw.state = PERF_HES_STOPPED; |
847 | } |
848 | |
849 | static int i915_pmu_event_add(struct perf_event *event, int flags) |
850 | { |
851 | struct drm_i915_private *i915 = |
852 | container_of(event->pmu, typeof(*i915), pmu.base); |
853 | struct i915_pmu *pmu = &i915->pmu; |
854 | |
855 | if (pmu->closed) |
856 | return -ENODEV; |
857 | |
858 | if (flags & PERF_EF_START) |
859 | i915_pmu_event_start(event, flags); |
860 | |
861 | return 0; |
862 | } |
863 | |
864 | static void i915_pmu_event_del(struct perf_event *event, int flags) |
865 | { |
866 | i915_pmu_event_stop(event, PERF_EF_UPDATE); |
867 | } |
868 | |
869 | static int i915_pmu_event_event_idx(struct perf_event *event) |
870 | { |
871 | return 0; |
872 | } |
873 | |
874 | struct i915_str_attribute { |
875 | struct device_attribute attr; |
876 | const char *str; |
877 | }; |
878 | |
879 | static ssize_t i915_pmu_format_show(struct device *dev, |
880 | struct device_attribute *attr, char *buf) |
881 | { |
882 | struct i915_str_attribute *eattr; |
883 | |
884 | eattr = container_of(attr, struct i915_str_attribute, attr); |
885 | return sprintf(buf, fmt: "%s\n" , eattr->str); |
886 | } |
887 | |
888 | #define I915_PMU_FORMAT_ATTR(_name, _config) \ |
889 | (&((struct i915_str_attribute[]) { \ |
890 | { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \ |
891 | .str = _config, } \ |
892 | })[0].attr.attr) |
893 | |
894 | static struct attribute *i915_pmu_format_attrs[] = { |
895 | I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20" ), |
896 | NULL, |
897 | }; |
898 | |
899 | static const struct attribute_group i915_pmu_format_attr_group = { |
900 | .name = "format" , |
901 | .attrs = i915_pmu_format_attrs, |
902 | }; |
903 | |
904 | struct i915_ext_attribute { |
905 | struct device_attribute attr; |
906 | unsigned long val; |
907 | }; |
908 | |
909 | static ssize_t i915_pmu_event_show(struct device *dev, |
910 | struct device_attribute *attr, char *buf) |
911 | { |
912 | struct i915_ext_attribute *eattr; |
913 | |
914 | eattr = container_of(attr, struct i915_ext_attribute, attr); |
915 | return sprintf(buf, fmt: "config=0x%lx\n" , eattr->val); |
916 | } |
917 | |
918 | static ssize_t cpumask_show(struct device *dev, |
919 | struct device_attribute *attr, char *buf) |
920 | { |
921 | return cpumap_print_to_pagebuf(list: true, buf, mask: &i915_pmu_cpumask); |
922 | } |
923 | |
924 | static DEVICE_ATTR_RO(cpumask); |
925 | |
926 | static struct attribute *i915_cpumask_attrs[] = { |
927 | &dev_attr_cpumask.attr, |
928 | NULL, |
929 | }; |
930 | |
931 | static const struct attribute_group i915_pmu_cpumask_attr_group = { |
932 | .attrs = i915_cpumask_attrs, |
933 | }; |
934 | |
935 | #define __event(__counter, __name, __unit) \ |
936 | { \ |
937 | .counter = (__counter), \ |
938 | .name = (__name), \ |
939 | .unit = (__unit), \ |
940 | .global = false, \ |
941 | } |
942 | |
943 | #define __global_event(__counter, __name, __unit) \ |
944 | { \ |
945 | .counter = (__counter), \ |
946 | .name = (__name), \ |
947 | .unit = (__unit), \ |
948 | .global = true, \ |
949 | } |
950 | |
951 | #define __engine_event(__sample, __name) \ |
952 | { \ |
953 | .sample = (__sample), \ |
954 | .name = (__name), \ |
955 | } |
956 | |
957 | static struct i915_ext_attribute * |
958 | add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config) |
959 | { |
960 | sysfs_attr_init(&attr->attr.attr); |
961 | attr->attr.attr.name = name; |
962 | attr->attr.attr.mode = 0444; |
963 | attr->attr.show = i915_pmu_event_show; |
964 | attr->val = config; |
965 | |
966 | return ++attr; |
967 | } |
968 | |
969 | static struct perf_pmu_events_attr * |
970 | add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, |
971 | const char *str) |
972 | { |
973 | sysfs_attr_init(&attr->attr.attr); |
974 | attr->attr.attr.name = name; |
975 | attr->attr.attr.mode = 0444; |
976 | attr->attr.show = perf_event_sysfs_show; |
977 | attr->event_str = str; |
978 | |
979 | return ++attr; |
980 | } |
981 | |
982 | static struct attribute ** |
983 | create_event_attributes(struct i915_pmu *pmu) |
984 | { |
985 | struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); |
986 | static const struct { |
987 | unsigned int counter; |
988 | const char *name; |
989 | const char *unit; |
990 | bool global; |
991 | } events[] = { |
992 | __event(0, "actual-frequency" , "M" ), |
993 | __event(1, "requested-frequency" , "M" ), |
994 | __global_event(2, "interrupts" , NULL), |
995 | __event(3, "rc6-residency" , "ns" ), |
996 | __event(4, "software-gt-awake-time" , "ns" ), |
997 | }; |
998 | static const struct { |
999 | enum drm_i915_pmu_engine_sample sample; |
1000 | char *name; |
1001 | } engine_events[] = { |
1002 | __engine_event(I915_SAMPLE_BUSY, "busy" ), |
1003 | __engine_event(I915_SAMPLE_SEMA, "sema" ), |
1004 | __engine_event(I915_SAMPLE_WAIT, "wait" ), |
1005 | }; |
1006 | unsigned int count = 0; |
1007 | struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; |
1008 | struct i915_ext_attribute *i915_attr = NULL, *i915_iter; |
1009 | struct attribute **attr = NULL, **attr_iter; |
1010 | struct intel_engine_cs *engine; |
1011 | struct intel_gt *gt; |
1012 | unsigned int i, j; |
1013 | |
1014 | /* Count how many counters we will be exposing. */ |
1015 | for_each_gt(gt, i915, j) { |
1016 | for (i = 0; i < ARRAY_SIZE(events); i++) { |
1017 | u64 config = ___I915_PMU_OTHER(j, events[i].counter); |
1018 | |
1019 | if (!config_status(i915, config)) |
1020 | count++; |
1021 | } |
1022 | } |
1023 | |
1024 | for_each_uabi_engine(engine, i915) { |
1025 | for (i = 0; i < ARRAY_SIZE(engine_events); i++) { |
1026 | if (!engine_event_status(engine, |
1027 | sample: engine_events[i].sample)) |
1028 | count++; |
1029 | } |
1030 | } |
1031 | |
1032 | /* Allocate attribute objects and table. */ |
1033 | i915_attr = kcalloc(n: count, size: sizeof(*i915_attr), GFP_KERNEL); |
1034 | if (!i915_attr) |
1035 | goto err_alloc; |
1036 | |
1037 | pmu_attr = kcalloc(n: count, size: sizeof(*pmu_attr), GFP_KERNEL); |
1038 | if (!pmu_attr) |
1039 | goto err_alloc; |
1040 | |
1041 | /* Max one pointer of each attribute type plus a termination entry. */ |
1042 | attr = kcalloc(n: count * 2 + 1, size: sizeof(*attr), GFP_KERNEL); |
1043 | if (!attr) |
1044 | goto err_alloc; |
1045 | |
1046 | i915_iter = i915_attr; |
1047 | pmu_iter = pmu_attr; |
1048 | attr_iter = attr; |
1049 | |
1050 | /* Initialize supported non-engine counters. */ |
1051 | for_each_gt(gt, i915, j) { |
1052 | for (i = 0; i < ARRAY_SIZE(events); i++) { |
1053 | u64 config = ___I915_PMU_OTHER(j, events[i].counter); |
1054 | char *str; |
1055 | |
1056 | if (config_status(i915, config)) |
1057 | continue; |
1058 | |
1059 | if (events[i].global || !HAS_EXTRA_GT_LIST(i915)) |
1060 | str = kstrdup(s: events[i].name, GFP_KERNEL); |
1061 | else |
1062 | str = kasprintf(GFP_KERNEL, fmt: "%s-gt%u" , |
1063 | events[i].name, j); |
1064 | if (!str) |
1065 | goto err; |
1066 | |
1067 | *attr_iter++ = &i915_iter->attr.attr; |
1068 | i915_iter = add_i915_attr(attr: i915_iter, name: str, config); |
1069 | |
1070 | if (events[i].unit) { |
1071 | if (events[i].global || !HAS_EXTRA_GT_LIST(i915)) |
1072 | str = kasprintf(GFP_KERNEL, fmt: "%s.unit" , |
1073 | events[i].name); |
1074 | else |
1075 | str = kasprintf(GFP_KERNEL, fmt: "%s-gt%u.unit" , |
1076 | events[i].name, j); |
1077 | if (!str) |
1078 | goto err; |
1079 | |
1080 | *attr_iter++ = &pmu_iter->attr.attr; |
1081 | pmu_iter = add_pmu_attr(attr: pmu_iter, name: str, |
1082 | str: events[i].unit); |
1083 | } |
1084 | } |
1085 | } |
1086 | |
1087 | /* Initialize supported engine counters. */ |
1088 | for_each_uabi_engine(engine, i915) { |
1089 | for (i = 0; i < ARRAY_SIZE(engine_events); i++) { |
1090 | char *str; |
1091 | |
1092 | if (engine_event_status(engine, |
1093 | sample: engine_events[i].sample)) |
1094 | continue; |
1095 | |
1096 | str = kasprintf(GFP_KERNEL, fmt: "%s-%s" , |
1097 | engine->name, engine_events[i].name); |
1098 | if (!str) |
1099 | goto err; |
1100 | |
1101 | *attr_iter++ = &i915_iter->attr.attr; |
1102 | i915_iter = |
1103 | add_i915_attr(attr: i915_iter, name: str, |
1104 | __I915_PMU_ENGINE(engine->uabi_class, |
1105 | engine->uabi_instance, |
1106 | engine_events[i].sample)); |
1107 | |
1108 | str = kasprintf(GFP_KERNEL, fmt: "%s-%s.unit" , |
1109 | engine->name, engine_events[i].name); |
1110 | if (!str) |
1111 | goto err; |
1112 | |
1113 | *attr_iter++ = &pmu_iter->attr.attr; |
1114 | pmu_iter = add_pmu_attr(attr: pmu_iter, name: str, str: "ns" ); |
1115 | } |
1116 | } |
1117 | |
1118 | pmu->i915_attr = i915_attr; |
1119 | pmu->pmu_attr = pmu_attr; |
1120 | |
1121 | return attr; |
1122 | |
1123 | err:; |
1124 | for (attr_iter = attr; *attr_iter; attr_iter++) |
1125 | kfree(objp: (*attr_iter)->name); |
1126 | |
1127 | err_alloc: |
1128 | kfree(objp: attr); |
1129 | kfree(objp: i915_attr); |
1130 | kfree(objp: pmu_attr); |
1131 | |
1132 | return NULL; |
1133 | } |
1134 | |
1135 | static void free_event_attributes(struct i915_pmu *pmu) |
1136 | { |
1137 | struct attribute **attr_iter = pmu->events_attr_group.attrs; |
1138 | |
1139 | for (; *attr_iter; attr_iter++) |
1140 | kfree(objp: (*attr_iter)->name); |
1141 | |
1142 | kfree(objp: pmu->events_attr_group.attrs); |
1143 | kfree(objp: pmu->i915_attr); |
1144 | kfree(objp: pmu->pmu_attr); |
1145 | |
1146 | pmu->events_attr_group.attrs = NULL; |
1147 | pmu->i915_attr = NULL; |
1148 | pmu->pmu_attr = NULL; |
1149 | } |
1150 | |
1151 | static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) |
1152 | { |
1153 | struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); |
1154 | |
1155 | GEM_BUG_ON(!pmu->base.event_init); |
1156 | |
1157 | /* Select the first online CPU as a designated reader. */ |
1158 | if (cpumask_empty(srcp: &i915_pmu_cpumask)) |
1159 | cpumask_set_cpu(cpu, dstp: &i915_pmu_cpumask); |
1160 | |
1161 | return 0; |
1162 | } |
1163 | |
1164 | static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) |
1165 | { |
1166 | struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); |
1167 | unsigned int target = i915_pmu_target_cpu; |
1168 | |
1169 | GEM_BUG_ON(!pmu->base.event_init); |
1170 | |
1171 | /* |
1172 | * Unregistering an instance generates a CPU offline event which we must |
1173 | * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask. |
1174 | */ |
1175 | if (pmu->closed) |
1176 | return 0; |
1177 | |
1178 | if (cpumask_test_and_clear_cpu(cpu, cpumask: &i915_pmu_cpumask)) { |
1179 | target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); |
1180 | |
1181 | /* Migrate events if there is a valid target */ |
1182 | if (target < nr_cpu_ids) { |
1183 | cpumask_set_cpu(cpu: target, dstp: &i915_pmu_cpumask); |
1184 | i915_pmu_target_cpu = target; |
1185 | } |
1186 | } |
1187 | |
1188 | if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) { |
1189 | perf_pmu_migrate_context(pmu: &pmu->base, src_cpu: cpu, dst_cpu: target); |
1190 | pmu->cpuhp.cpu = target; |
1191 | } |
1192 | |
1193 | return 0; |
1194 | } |
1195 | |
1196 | static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; |
1197 | |
1198 | int i915_pmu_init(void) |
1199 | { |
1200 | int ret; |
1201 | |
1202 | ret = cpuhp_setup_state_multi(state: CPUHP_AP_ONLINE_DYN, |
1203 | name: "perf/x86/intel/i915:online" , |
1204 | startup: i915_pmu_cpu_online, |
1205 | teardown: i915_pmu_cpu_offline); |
1206 | if (ret < 0) |
1207 | pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n" , |
1208 | ret); |
1209 | else |
1210 | cpuhp_slot = ret; |
1211 | |
1212 | return 0; |
1213 | } |
1214 | |
1215 | void i915_pmu_exit(void) |
1216 | { |
1217 | if (cpuhp_slot != CPUHP_INVALID) |
1218 | cpuhp_remove_multi_state(state: cpuhp_slot); |
1219 | } |
1220 | |
1221 | static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) |
1222 | { |
1223 | if (cpuhp_slot == CPUHP_INVALID) |
1224 | return -EINVAL; |
1225 | |
1226 | return cpuhp_state_add_instance(state: cpuhp_slot, node: &pmu->cpuhp.node); |
1227 | } |
1228 | |
1229 | static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) |
1230 | { |
1231 | cpuhp_state_remove_instance(state: cpuhp_slot, node: &pmu->cpuhp.node); |
1232 | } |
1233 | |
1234 | static bool is_igp(struct drm_i915_private *i915) |
1235 | { |
1236 | struct pci_dev *pdev = to_pci_dev(i915->drm.dev); |
1237 | |
1238 | /* IGP is 0000:00:02.0 */ |
1239 | return pci_domain_nr(bus: pdev->bus) == 0 && |
1240 | pdev->bus->number == 0 && |
1241 | PCI_SLOT(pdev->devfn) == 2 && |
1242 | PCI_FUNC(pdev->devfn) == 0; |
1243 | } |
1244 | |
1245 | void i915_pmu_register(struct drm_i915_private *i915) |
1246 | { |
1247 | struct i915_pmu *pmu = &i915->pmu; |
1248 | const struct attribute_group *attr_groups[] = { |
1249 | &i915_pmu_format_attr_group, |
1250 | &pmu->events_attr_group, |
1251 | &i915_pmu_cpumask_attr_group, |
1252 | NULL |
1253 | }; |
1254 | |
1255 | int ret = -ENOMEM; |
1256 | |
1257 | if (GRAPHICS_VER(i915) <= 2) { |
1258 | drm_info(&i915->drm, "PMU not supported for this GPU." ); |
1259 | return; |
1260 | } |
1261 | |
1262 | spin_lock_init(&pmu->lock); |
1263 | hrtimer_init(timer: &pmu->timer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL); |
1264 | pmu->timer.function = i915_sample; |
1265 | pmu->cpuhp.cpu = -1; |
1266 | init_rc6(pmu); |
1267 | |
1268 | if (!is_igp(i915)) { |
1269 | pmu->name = kasprintf(GFP_KERNEL, |
1270 | fmt: "i915_%s" , |
1271 | dev_name(dev: i915->drm.dev)); |
1272 | if (pmu->name) { |
1273 | /* tools/perf reserves colons as special. */ |
1274 | strreplace(str: (char *)pmu->name, old: ':', new: '_'); |
1275 | } |
1276 | } else { |
1277 | pmu->name = "i915" ; |
1278 | } |
1279 | if (!pmu->name) |
1280 | goto err; |
1281 | |
1282 | pmu->events_attr_group.name = "events" ; |
1283 | pmu->events_attr_group.attrs = create_event_attributes(pmu); |
1284 | if (!pmu->events_attr_group.attrs) |
1285 | goto err_name; |
1286 | |
1287 | pmu->base.attr_groups = kmemdup(p: attr_groups, size: sizeof(attr_groups), |
1288 | GFP_KERNEL); |
1289 | if (!pmu->base.attr_groups) |
1290 | goto err_attr; |
1291 | |
1292 | pmu->base.module = THIS_MODULE; |
1293 | pmu->base.task_ctx_nr = perf_invalid_context; |
1294 | pmu->base.event_init = i915_pmu_event_init; |
1295 | pmu->base.add = i915_pmu_event_add; |
1296 | pmu->base.del = i915_pmu_event_del; |
1297 | pmu->base.start = i915_pmu_event_start; |
1298 | pmu->base.stop = i915_pmu_event_stop; |
1299 | pmu->base.read = i915_pmu_event_read; |
1300 | pmu->base.event_idx = i915_pmu_event_event_idx; |
1301 | |
1302 | ret = perf_pmu_register(pmu: &pmu->base, name: pmu->name, type: -1); |
1303 | if (ret) |
1304 | goto err_groups; |
1305 | |
1306 | ret = i915_pmu_register_cpuhp_state(pmu); |
1307 | if (ret) |
1308 | goto err_unreg; |
1309 | |
1310 | return; |
1311 | |
1312 | err_unreg: |
1313 | perf_pmu_unregister(pmu: &pmu->base); |
1314 | err_groups: |
1315 | kfree(objp: pmu->base.attr_groups); |
1316 | err_attr: |
1317 | pmu->base.event_init = NULL; |
1318 | free_event_attributes(pmu); |
1319 | err_name: |
1320 | if (!is_igp(i915)) |
1321 | kfree(objp: pmu->name); |
1322 | err: |
1323 | drm_notice(&i915->drm, "Failed to register PMU!\n" ); |
1324 | } |
1325 | |
1326 | void i915_pmu_unregister(struct drm_i915_private *i915) |
1327 | { |
1328 | struct i915_pmu *pmu = &i915->pmu; |
1329 | |
1330 | if (!pmu->base.event_init) |
1331 | return; |
1332 | |
1333 | /* |
1334 | * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu |
1335 | * ensures all currently executing ones will have exited before we |
1336 | * proceed with unregistration. |
1337 | */ |
1338 | pmu->closed = true; |
1339 | synchronize_rcu(); |
1340 | |
1341 | hrtimer_cancel(timer: &pmu->timer); |
1342 | |
1343 | i915_pmu_unregister_cpuhp_state(pmu); |
1344 | |
1345 | perf_pmu_unregister(pmu: &pmu->base); |
1346 | pmu->base.event_init = NULL; |
1347 | kfree(objp: pmu->base.attr_groups); |
1348 | if (!is_igp(i915)) |
1349 | kfree(objp: pmu->name); |
1350 | free_event_attributes(pmu); |
1351 | } |
1352 | |