1 | /* SPDX-License-Identifier: MIT */ |
2 | /* |
3 | * Copyright © 2019 Intel Corporation |
4 | */ |
5 | |
6 | #ifndef _I915_PERF_TYPES_H_ |
7 | #define _I915_PERF_TYPES_H_ |
8 | |
9 | #include <linux/atomic.h> |
10 | #include <linux/device.h> |
11 | #include <linux/hrtimer.h> |
12 | #include <linux/llist.h> |
13 | #include <linux/poll.h> |
14 | #include <linux/sysfs.h> |
15 | #include <linux/types.h> |
16 | #include <linux/uuid.h> |
17 | #include <linux/wait.h> |
18 | #include <uapi/drm/i915_drm.h> |
19 | |
20 | #include "gt/intel_engine_types.h" |
21 | #include "gt/intel_sseu.h" |
22 | #include "i915_reg_defs.h" |
23 | #include "intel_uncore.h" |
24 | #include "intel_wakeref.h" |
25 | |
26 | struct drm_i915_private; |
27 | struct file; |
28 | struct i915_active; |
29 | struct i915_gem_context; |
30 | struct i915_perf; |
31 | struct i915_vma; |
32 | struct intel_context; |
33 | struct intel_engine_cs; |
34 | |
35 | enum { |
36 | PERF_GROUP_OAG = 0, |
37 | PERF_GROUP_OAM_SAMEDIA_0 = 0, |
38 | |
39 | PERF_GROUP_MAX, |
40 | PERF_GROUP_INVALID = U32_MAX, |
41 | }; |
42 | |
43 | enum { |
44 | HDR_32_BIT = 0, |
45 | HDR_64_BIT, |
46 | }; |
47 | |
48 | struct i915_perf_regs { |
49 | u32 base; |
50 | i915_reg_t oa_head_ptr; |
51 | i915_reg_t oa_tail_ptr; |
52 | i915_reg_t oa_buffer; |
53 | i915_reg_t oa_ctx_ctrl; |
54 | i915_reg_t oa_ctrl; |
55 | i915_reg_t oa_debug; |
56 | i915_reg_t oa_status; |
57 | u32 oa_ctrl_counter_format_shift; |
58 | }; |
59 | |
60 | enum oa_type { |
61 | TYPE_OAG, |
62 | TYPE_OAM, |
63 | }; |
64 | |
65 | struct i915_oa_format { |
66 | u32 format; |
67 | int size; |
68 | int type; |
69 | enum report_header ; |
70 | }; |
71 | |
72 | struct i915_oa_reg { |
73 | i915_reg_t addr; |
74 | u32 value; |
75 | }; |
76 | |
77 | struct i915_oa_config { |
78 | struct i915_perf *perf; |
79 | |
80 | char uuid[UUID_STRING_LEN + 1]; |
81 | int id; |
82 | |
83 | const struct i915_oa_reg *mux_regs; |
84 | u32 mux_regs_len; |
85 | const struct i915_oa_reg *b_counter_regs; |
86 | u32 b_counter_regs_len; |
87 | const struct i915_oa_reg *flex_regs; |
88 | u32 flex_regs_len; |
89 | |
90 | struct attribute_group sysfs_metric; |
91 | struct attribute *attrs[2]; |
92 | struct kobj_attribute sysfs_metric_id; |
93 | |
94 | struct kref ref; |
95 | struct rcu_head rcu; |
96 | }; |
97 | |
98 | struct i915_perf_stream; |
99 | |
100 | /** |
101 | * struct i915_perf_stream_ops - the OPs to support a specific stream type |
102 | */ |
103 | struct i915_perf_stream_ops { |
104 | /** |
105 | * @enable: Enables the collection of HW samples, either in response to |
106 | * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened |
107 | * without `I915_PERF_FLAG_DISABLED`. |
108 | */ |
109 | void (*enable)(struct i915_perf_stream *stream); |
110 | |
111 | /** |
112 | * @disable: Disables the collection of HW samples, either in response |
113 | * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying |
114 | * the stream. |
115 | */ |
116 | void (*disable)(struct i915_perf_stream *stream); |
117 | |
118 | /** |
119 | * @poll_wait: Call poll_wait, passing a wait queue that will be woken |
120 | * once there is something ready to read() for the stream |
121 | */ |
122 | void (*poll_wait)(struct i915_perf_stream *stream, |
123 | struct file *file, |
124 | poll_table *wait); |
125 | |
126 | /** |
127 | * @wait_unlocked: For handling a blocking read, wait until there is |
128 | * something to ready to read() for the stream. E.g. wait on the same |
129 | * wait queue that would be passed to poll_wait(). |
130 | */ |
131 | int (*wait_unlocked)(struct i915_perf_stream *stream); |
132 | |
133 | /** |
134 | * @read: Copy buffered metrics as records to userspace |
135 | * **buf**: the userspace, destination buffer |
136 | * **count**: the number of bytes to copy, requested by userspace |
137 | * **offset**: zero at the start of the read, updated as the read |
138 | * proceeds, it represents how many bytes have been copied so far and |
139 | * the buffer offset for copying the next record. |
140 | * |
141 | * Copy as many buffered i915 perf samples and records for this stream |
142 | * to userspace as will fit in the given buffer. |
143 | * |
144 | * Only write complete records; returning -%ENOSPC if there isn't room |
145 | * for a complete record. |
146 | * |
147 | * Return any error condition that results in a short read such as |
148 | * -%ENOSPC or -%EFAULT, even though these may be squashed before |
149 | * returning to userspace. |
150 | */ |
151 | int (*read)(struct i915_perf_stream *stream, |
152 | char __user *buf, |
153 | size_t count, |
154 | size_t *offset); |
155 | |
156 | /** |
157 | * @destroy: Cleanup any stream specific resources. |
158 | * |
159 | * The stream will always be disabled before this is called. |
160 | */ |
161 | void (*destroy)(struct i915_perf_stream *stream); |
162 | }; |
163 | |
164 | /** |
165 | * struct i915_perf_stream - state for a single open stream FD |
166 | */ |
167 | struct i915_perf_stream { |
168 | /** |
169 | * @perf: i915_perf backpointer |
170 | */ |
171 | struct i915_perf *perf; |
172 | |
173 | /** |
174 | * @uncore: mmio access path |
175 | */ |
176 | struct intel_uncore *uncore; |
177 | |
178 | /** |
179 | * @engine: Engine associated with this performance stream. |
180 | */ |
181 | struct intel_engine_cs *engine; |
182 | |
183 | /** |
184 | * @lock: Lock associated with operations on stream |
185 | */ |
186 | struct mutex lock; |
187 | |
188 | /** |
189 | * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` |
190 | * properties given when opening a stream, representing the contents |
191 | * of a single sample as read() by userspace. |
192 | */ |
193 | u32 sample_flags; |
194 | |
195 | /** |
196 | * @sample_size: Considering the configured contents of a sample |
197 | * combined with the required header size, this is the total size |
198 | * of a single sample record. |
199 | */ |
200 | int sample_size; |
201 | |
202 | /** |
203 | * @ctx: %NULL if measuring system-wide across all contexts or a |
204 | * specific context that is being monitored. |
205 | */ |
206 | struct i915_gem_context *ctx; |
207 | |
208 | /** |
209 | * @enabled: Whether the stream is currently enabled, considering |
210 | * whether the stream was opened in a disabled state and based |
211 | * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. |
212 | */ |
213 | bool enabled; |
214 | |
215 | /** |
216 | * @hold_preemption: Whether preemption is put on hold for command |
217 | * submissions done on the @ctx. This is useful for some drivers that |
218 | * cannot easily post process the OA buffer context to subtract delta |
219 | * of performance counters not associated with @ctx. |
220 | */ |
221 | bool hold_preemption; |
222 | |
223 | /** |
224 | * @ops: The callbacks providing the implementation of this specific |
225 | * type of configured stream. |
226 | */ |
227 | const struct i915_perf_stream_ops *ops; |
228 | |
229 | /** |
230 | * @oa_config: The OA configuration used by the stream. |
231 | */ |
232 | struct i915_oa_config *oa_config; |
233 | |
234 | /** |
235 | * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily |
236 | * each time @oa_config changes. |
237 | */ |
238 | struct llist_head oa_config_bos; |
239 | |
240 | /** |
241 | * @pinned_ctx: The OA context specific information. |
242 | */ |
243 | struct intel_context *pinned_ctx; |
244 | |
245 | /** |
246 | * @specific_ctx_id: The id of the specific context. |
247 | */ |
248 | u32 specific_ctx_id; |
249 | |
250 | /** |
251 | * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits. |
252 | */ |
253 | u32 specific_ctx_id_mask; |
254 | |
255 | /** |
256 | * @poll_check_timer: High resolution timer that will periodically |
257 | * check for data in the circular OA buffer for notifying userspace |
258 | * (e.g. during a read() or poll()). |
259 | */ |
260 | struct hrtimer poll_check_timer; |
261 | |
262 | /** |
263 | * @poll_wq: The wait queue that hrtimer callback wakes when it |
264 | * sees data ready to read in the circular OA buffer. |
265 | */ |
266 | wait_queue_head_t poll_wq; |
267 | |
268 | /** |
269 | * @pollin: Whether there is data available to read. |
270 | */ |
271 | bool pollin; |
272 | |
273 | /** |
274 | * @periodic: Whether periodic sampling is currently enabled. |
275 | */ |
276 | bool periodic; |
277 | |
278 | /** |
279 | * @period_exponent: The OA unit sampling frequency is derived from this. |
280 | */ |
281 | int period_exponent; |
282 | |
283 | /** |
284 | * @oa_buffer: State of the OA buffer. |
285 | */ |
286 | struct { |
287 | const struct i915_oa_format *format; |
288 | struct i915_vma *vma; |
289 | u8 *vaddr; |
290 | u32 last_ctx_id; |
291 | int size_exponent; |
292 | |
293 | /** |
294 | * @ptr_lock: Locks reads and writes to all head/tail state |
295 | * |
296 | * Consider: the head and tail pointer state needs to be read |
297 | * consistently from a hrtimer callback (atomic context) and |
298 | * read() fop (user context) with tail pointer updates happening |
299 | * in atomic context and head updates in user context and the |
300 | * (unlikely) possibility of read() errors needing to reset all |
301 | * head/tail state. |
302 | * |
303 | * Note: Contention/performance aren't currently a significant |
304 | * concern here considering the relatively low frequency of |
305 | * hrtimer callbacks (5ms period) and that reads typically only |
306 | * happen in response to a hrtimer event and likely complete |
307 | * before the next callback. |
308 | * |
309 | * Note: This lock is not held *while* reading and copying data |
310 | * to userspace so the value of head observed in htrimer |
311 | * callbacks won't represent any partial consumption of data. |
312 | */ |
313 | spinlock_t ptr_lock; |
314 | |
315 | /** |
316 | * @head: Although we can always read back the head pointer register, |
317 | * we prefer to avoid trusting the HW state, just to avoid any |
318 | * risk that some hardware condition could * somehow bump the |
319 | * head pointer unpredictably and cause us to forward the wrong |
320 | * OA buffer data to userspace. |
321 | */ |
322 | u32 head; |
323 | |
324 | /** |
325 | * @tail: The last verified tail that can be read by userspace. |
326 | */ |
327 | u32 tail; |
328 | } oa_buffer; |
329 | |
330 | /** |
331 | * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be |
332 | * reprogrammed. |
333 | */ |
334 | struct i915_vma *noa_wait; |
335 | |
336 | /** |
337 | * @poll_oa_period: The period in nanoseconds at which the OA |
338 | * buffer should be checked for available data. |
339 | */ |
340 | u64 poll_oa_period; |
341 | }; |
342 | |
343 | /** |
344 | * struct i915_oa_ops - Gen specific implementation of an OA unit stream |
345 | */ |
346 | struct i915_oa_ops { |
347 | /** |
348 | * @is_valid_b_counter_reg: Validates register's address for |
349 | * programming boolean counters for a particular platform. |
350 | */ |
351 | bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr); |
352 | |
353 | /** |
354 | * @is_valid_mux_reg: Validates register's address for programming mux |
355 | * for a particular platform. |
356 | */ |
357 | bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr); |
358 | |
359 | /** |
360 | * @is_valid_flex_reg: Validates register's address for programming |
361 | * flex EU filtering for a particular platform. |
362 | */ |
363 | bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr); |
364 | |
365 | /** |
366 | * @enable_metric_set: Selects and applies any MUX configuration to set |
367 | * up the Boolean and Custom (B/C) counters that are part of the |
368 | * counter reports being sampled. May apply system constraints such as |
369 | * disabling EU clock gating as required. |
370 | */ |
371 | int (*enable_metric_set)(struct i915_perf_stream *stream, |
372 | struct i915_active *active); |
373 | |
374 | /** |
375 | * @disable_metric_set: Remove system constraints associated with using |
376 | * the OA unit. |
377 | */ |
378 | void (*disable_metric_set)(struct i915_perf_stream *stream); |
379 | |
380 | /** |
381 | * @oa_enable: Enable periodic sampling |
382 | */ |
383 | void (*oa_enable)(struct i915_perf_stream *stream); |
384 | |
385 | /** |
386 | * @oa_disable: Disable periodic sampling |
387 | */ |
388 | void (*oa_disable)(struct i915_perf_stream *stream); |
389 | |
390 | /** |
391 | * @read: Copy data from the circular OA buffer into a given userspace |
392 | * buffer. |
393 | */ |
394 | int (*read)(struct i915_perf_stream *stream, |
395 | char __user *buf, |
396 | size_t count, |
397 | size_t *offset); |
398 | |
399 | /** |
400 | * @oa_hw_tail_read: read the OA tail pointer register |
401 | * |
402 | * In particular this enables us to share all the fiddly code for |
403 | * handling the OA unit tail pointer race that affects multiple |
404 | * generations. |
405 | */ |
406 | u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); |
407 | }; |
408 | |
409 | struct i915_perf_group { |
410 | /* |
411 | * @exclusive_stream: The stream currently using the OA unit. This is |
412 | * sometimes accessed outside a syscall associated to its file |
413 | * descriptor. |
414 | */ |
415 | struct i915_perf_stream *exclusive_stream; |
416 | |
417 | /* |
418 | * @num_engines: The number of engines using this OA unit. |
419 | */ |
420 | u32 num_engines; |
421 | |
422 | /* |
423 | * @regs: OA buffer register group for programming the OA unit. |
424 | */ |
425 | struct i915_perf_regs regs; |
426 | |
427 | /* |
428 | * @type: Type of OA unit - OAM, OAG etc. |
429 | */ |
430 | enum oa_type type; |
431 | }; |
432 | |
433 | struct i915_perf_gt { |
434 | /* |
435 | * Lock associated with anything below within this structure. |
436 | */ |
437 | struct mutex lock; |
438 | |
439 | /** |
440 | * @sseu: sseu configuration selected to run while perf is active, |
441 | * applies to all contexts. |
442 | */ |
443 | struct intel_sseu sseu; |
444 | |
445 | /** |
446 | * @num_perf_groups: number of perf groups per gt. |
447 | */ |
448 | u32 num_perf_groups; |
449 | |
450 | /* |
451 | * @group: list of OA groups - one for each OA buffer. |
452 | */ |
453 | struct i915_perf_group *group; |
454 | }; |
455 | |
456 | struct i915_perf { |
457 | struct drm_i915_private *i915; |
458 | |
459 | struct kobject *metrics_kobj; |
460 | |
461 | /* |
462 | * Lock associated with adding/modifying/removing OA configs |
463 | * in perf->metrics_idr. |
464 | */ |
465 | struct mutex metrics_lock; |
466 | |
467 | /* |
468 | * List of dynamic configurations (struct i915_oa_config), you |
469 | * need to hold perf->metrics_lock to access it. |
470 | */ |
471 | struct idr metrics_idr; |
472 | |
473 | /** |
474 | * For rate limiting any notifications of spurious |
475 | * invalid OA reports |
476 | */ |
477 | struct ratelimit_state spurious_report_rs; |
478 | |
479 | /** |
480 | * For rate limiting any notifications of tail pointer |
481 | * race. |
482 | */ |
483 | struct ratelimit_state tail_pointer_race; |
484 | |
485 | u32 gen7_latched_oastatus1; |
486 | u32 ctx_oactxctrl_offset; |
487 | u32 ctx_flexeu0_offset; |
488 | |
489 | /** |
490 | * The RPT_ID/reason field for Gen8+ includes a bit |
491 | * to determine if the CTX ID in the report is valid |
492 | * but the specific bit differs between Gen 8 and 9 |
493 | */ |
494 | u32 gen8_valid_ctx_bit; |
495 | |
496 | struct i915_oa_ops ops; |
497 | const struct i915_oa_format *oa_formats; |
498 | |
499 | /** |
500 | * Use a format mask to store the supported formats |
501 | * for a platform. |
502 | */ |
503 | #define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG) |
504 | unsigned long format_mask[FORMAT_MASK_SIZE]; |
505 | |
506 | atomic64_t noa_programming_delay; |
507 | }; |
508 | |
509 | #endif /* _I915_PERF_TYPES_H_ */ |
510 | |