1/* SPDX-License-Identifier: MIT */
2/*
3 * Copyright © 2019 Intel Corporation
4 */
5
6#ifndef _I915_PERF_TYPES_H_
7#define _I915_PERF_TYPES_H_
8
9#include <linux/atomic.h>
10#include <linux/device.h>
11#include <linux/hrtimer.h>
12#include <linux/llist.h>
13#include <linux/poll.h>
14#include <linux/sysfs.h>
15#include <linux/types.h>
16#include <linux/uuid.h>
17#include <linux/wait.h>
18#include <uapi/drm/i915_drm.h>
19
20#include "gt/intel_engine_types.h"
21#include "gt/intel_sseu.h"
22#include "i915_reg_defs.h"
23#include "intel_uncore.h"
24#include "intel_wakeref.h"
25
26struct drm_i915_private;
27struct file;
28struct i915_active;
29struct i915_gem_context;
30struct i915_perf;
31struct i915_vma;
32struct intel_context;
33struct intel_engine_cs;
34
35enum {
36 PERF_GROUP_OAG = 0,
37 PERF_GROUP_OAM_SAMEDIA_0 = 0,
38
39 PERF_GROUP_MAX,
40 PERF_GROUP_INVALID = U32_MAX,
41};
42
43enum report_header {
44 HDR_32_BIT = 0,
45 HDR_64_BIT,
46};
47
48struct i915_perf_regs {
49 u32 base;
50 i915_reg_t oa_head_ptr;
51 i915_reg_t oa_tail_ptr;
52 i915_reg_t oa_buffer;
53 i915_reg_t oa_ctx_ctrl;
54 i915_reg_t oa_ctrl;
55 i915_reg_t oa_debug;
56 i915_reg_t oa_status;
57 u32 oa_ctrl_counter_format_shift;
58};
59
60enum oa_type {
61 TYPE_OAG,
62 TYPE_OAM,
63};
64
65struct i915_oa_format {
66 u32 format;
67 int size;
68 int type;
69 enum report_header header;
70};
71
72struct i915_oa_reg {
73 i915_reg_t addr;
74 u32 value;
75};
76
77struct i915_oa_config {
78 struct i915_perf *perf;
79
80 char uuid[UUID_STRING_LEN + 1];
81 int id;
82
83 const struct i915_oa_reg *mux_regs;
84 u32 mux_regs_len;
85 const struct i915_oa_reg *b_counter_regs;
86 u32 b_counter_regs_len;
87 const struct i915_oa_reg *flex_regs;
88 u32 flex_regs_len;
89
90 struct attribute_group sysfs_metric;
91 struct attribute *attrs[2];
92 struct kobj_attribute sysfs_metric_id;
93
94 struct kref ref;
95 struct rcu_head rcu;
96};
97
98struct i915_perf_stream;
99
100/**
101 * struct i915_perf_stream_ops - the OPs to support a specific stream type
102 */
103struct i915_perf_stream_ops {
104 /**
105 * @enable: Enables the collection of HW samples, either in response to
106 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
107 * without `I915_PERF_FLAG_DISABLED`.
108 */
109 void (*enable)(struct i915_perf_stream *stream);
110
111 /**
112 * @disable: Disables the collection of HW samples, either in response
113 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
114 * the stream.
115 */
116 void (*disable)(struct i915_perf_stream *stream);
117
118 /**
119 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
120 * once there is something ready to read() for the stream
121 */
122 void (*poll_wait)(struct i915_perf_stream *stream,
123 struct file *file,
124 poll_table *wait);
125
126 /**
127 * @wait_unlocked: For handling a blocking read, wait until there is
128 * something to ready to read() for the stream. E.g. wait on the same
129 * wait queue that would be passed to poll_wait().
130 */
131 int (*wait_unlocked)(struct i915_perf_stream *stream);
132
133 /**
134 * @read: Copy buffered metrics as records to userspace
135 * **buf**: the userspace, destination buffer
136 * **count**: the number of bytes to copy, requested by userspace
137 * **offset**: zero at the start of the read, updated as the read
138 * proceeds, it represents how many bytes have been copied so far and
139 * the buffer offset for copying the next record.
140 *
141 * Copy as many buffered i915 perf samples and records for this stream
142 * to userspace as will fit in the given buffer.
143 *
144 * Only write complete records; returning -%ENOSPC if there isn't room
145 * for a complete record.
146 *
147 * Return any error condition that results in a short read such as
148 * -%ENOSPC or -%EFAULT, even though these may be squashed before
149 * returning to userspace.
150 */
151 int (*read)(struct i915_perf_stream *stream,
152 char __user *buf,
153 size_t count,
154 size_t *offset);
155
156 /**
157 * @destroy: Cleanup any stream specific resources.
158 *
159 * The stream will always be disabled before this is called.
160 */
161 void (*destroy)(struct i915_perf_stream *stream);
162};
163
164/**
165 * struct i915_perf_stream - state for a single open stream FD
166 */
167struct i915_perf_stream {
168 /**
169 * @perf: i915_perf backpointer
170 */
171 struct i915_perf *perf;
172
173 /**
174 * @uncore: mmio access path
175 */
176 struct intel_uncore *uncore;
177
178 /**
179 * @engine: Engine associated with this performance stream.
180 */
181 struct intel_engine_cs *engine;
182
183 /**
184 * @lock: Lock associated with operations on stream
185 */
186 struct mutex lock;
187
188 /**
189 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
190 * properties given when opening a stream, representing the contents
191 * of a single sample as read() by userspace.
192 */
193 u32 sample_flags;
194
195 /**
196 * @sample_size: Considering the configured contents of a sample
197 * combined with the required header size, this is the total size
198 * of a single sample record.
199 */
200 int sample_size;
201
202 /**
203 * @ctx: %NULL if measuring system-wide across all contexts or a
204 * specific context that is being monitored.
205 */
206 struct i915_gem_context *ctx;
207
208 /**
209 * @enabled: Whether the stream is currently enabled, considering
210 * whether the stream was opened in a disabled state and based
211 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
212 */
213 bool enabled;
214
215 /**
216 * @hold_preemption: Whether preemption is put on hold for command
217 * submissions done on the @ctx. This is useful for some drivers that
218 * cannot easily post process the OA buffer context to subtract delta
219 * of performance counters not associated with @ctx.
220 */
221 bool hold_preemption;
222
223 /**
224 * @ops: The callbacks providing the implementation of this specific
225 * type of configured stream.
226 */
227 const struct i915_perf_stream_ops *ops;
228
229 /**
230 * @oa_config: The OA configuration used by the stream.
231 */
232 struct i915_oa_config *oa_config;
233
234 /**
235 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
236 * each time @oa_config changes.
237 */
238 struct llist_head oa_config_bos;
239
240 /**
241 * @pinned_ctx: The OA context specific information.
242 */
243 struct intel_context *pinned_ctx;
244
245 /**
246 * @specific_ctx_id: The id of the specific context.
247 */
248 u32 specific_ctx_id;
249
250 /**
251 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
252 */
253 u32 specific_ctx_id_mask;
254
255 /**
256 * @poll_check_timer: High resolution timer that will periodically
257 * check for data in the circular OA buffer for notifying userspace
258 * (e.g. during a read() or poll()).
259 */
260 struct hrtimer poll_check_timer;
261
262 /**
263 * @poll_wq: The wait queue that hrtimer callback wakes when it
264 * sees data ready to read in the circular OA buffer.
265 */
266 wait_queue_head_t poll_wq;
267
268 /**
269 * @pollin: Whether there is data available to read.
270 */
271 bool pollin;
272
273 /**
274 * @periodic: Whether periodic sampling is currently enabled.
275 */
276 bool periodic;
277
278 /**
279 * @period_exponent: The OA unit sampling frequency is derived from this.
280 */
281 int period_exponent;
282
283 /**
284 * @oa_buffer: State of the OA buffer.
285 */
286 struct {
287 const struct i915_oa_format *format;
288 struct i915_vma *vma;
289 u8 *vaddr;
290 u32 last_ctx_id;
291 int size_exponent;
292
293 /**
294 * @ptr_lock: Locks reads and writes to all head/tail state
295 *
296 * Consider: the head and tail pointer state needs to be read
297 * consistently from a hrtimer callback (atomic context) and
298 * read() fop (user context) with tail pointer updates happening
299 * in atomic context and head updates in user context and the
300 * (unlikely) possibility of read() errors needing to reset all
301 * head/tail state.
302 *
303 * Note: Contention/performance aren't currently a significant
304 * concern here considering the relatively low frequency of
305 * hrtimer callbacks (5ms period) and that reads typically only
306 * happen in response to a hrtimer event and likely complete
307 * before the next callback.
308 *
309 * Note: This lock is not held *while* reading and copying data
310 * to userspace so the value of head observed in htrimer
311 * callbacks won't represent any partial consumption of data.
312 */
313 spinlock_t ptr_lock;
314
315 /**
316 * @head: Although we can always read back the head pointer register,
317 * we prefer to avoid trusting the HW state, just to avoid any
318 * risk that some hardware condition could * somehow bump the
319 * head pointer unpredictably and cause us to forward the wrong
320 * OA buffer data to userspace.
321 */
322 u32 head;
323
324 /**
325 * @tail: The last verified tail that can be read by userspace.
326 */
327 u32 tail;
328 } oa_buffer;
329
330 /**
331 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
332 * reprogrammed.
333 */
334 struct i915_vma *noa_wait;
335
336 /**
337 * @poll_oa_period: The period in nanoseconds at which the OA
338 * buffer should be checked for available data.
339 */
340 u64 poll_oa_period;
341};
342
343/**
344 * struct i915_oa_ops - Gen specific implementation of an OA unit stream
345 */
346struct i915_oa_ops {
347 /**
348 * @is_valid_b_counter_reg: Validates register's address for
349 * programming boolean counters for a particular platform.
350 */
351 bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
352
353 /**
354 * @is_valid_mux_reg: Validates register's address for programming mux
355 * for a particular platform.
356 */
357 bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
358
359 /**
360 * @is_valid_flex_reg: Validates register's address for programming
361 * flex EU filtering for a particular platform.
362 */
363 bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
364
365 /**
366 * @enable_metric_set: Selects and applies any MUX configuration to set
367 * up the Boolean and Custom (B/C) counters that are part of the
368 * counter reports being sampled. May apply system constraints such as
369 * disabling EU clock gating as required.
370 */
371 int (*enable_metric_set)(struct i915_perf_stream *stream,
372 struct i915_active *active);
373
374 /**
375 * @disable_metric_set: Remove system constraints associated with using
376 * the OA unit.
377 */
378 void (*disable_metric_set)(struct i915_perf_stream *stream);
379
380 /**
381 * @oa_enable: Enable periodic sampling
382 */
383 void (*oa_enable)(struct i915_perf_stream *stream);
384
385 /**
386 * @oa_disable: Disable periodic sampling
387 */
388 void (*oa_disable)(struct i915_perf_stream *stream);
389
390 /**
391 * @read: Copy data from the circular OA buffer into a given userspace
392 * buffer.
393 */
394 int (*read)(struct i915_perf_stream *stream,
395 char __user *buf,
396 size_t count,
397 size_t *offset);
398
399 /**
400 * @oa_hw_tail_read: read the OA tail pointer register
401 *
402 * In particular this enables us to share all the fiddly code for
403 * handling the OA unit tail pointer race that affects multiple
404 * generations.
405 */
406 u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
407};
408
409struct i915_perf_group {
410 /*
411 * @exclusive_stream: The stream currently using the OA unit. This is
412 * sometimes accessed outside a syscall associated to its file
413 * descriptor.
414 */
415 struct i915_perf_stream *exclusive_stream;
416
417 /*
418 * @num_engines: The number of engines using this OA unit.
419 */
420 u32 num_engines;
421
422 /*
423 * @regs: OA buffer register group for programming the OA unit.
424 */
425 struct i915_perf_regs regs;
426
427 /*
428 * @type: Type of OA unit - OAM, OAG etc.
429 */
430 enum oa_type type;
431};
432
433struct i915_perf_gt {
434 /*
435 * Lock associated with anything below within this structure.
436 */
437 struct mutex lock;
438
439 /**
440 * @sseu: sseu configuration selected to run while perf is active,
441 * applies to all contexts.
442 */
443 struct intel_sseu sseu;
444
445 /**
446 * @num_perf_groups: number of perf groups per gt.
447 */
448 u32 num_perf_groups;
449
450 /*
451 * @group: list of OA groups - one for each OA buffer.
452 */
453 struct i915_perf_group *group;
454};
455
456struct i915_perf {
457 struct drm_i915_private *i915;
458
459 struct kobject *metrics_kobj;
460
461 /*
462 * Lock associated with adding/modifying/removing OA configs
463 * in perf->metrics_idr.
464 */
465 struct mutex metrics_lock;
466
467 /*
468 * List of dynamic configurations (struct i915_oa_config), you
469 * need to hold perf->metrics_lock to access it.
470 */
471 struct idr metrics_idr;
472
473 /**
474 * For rate limiting any notifications of spurious
475 * invalid OA reports
476 */
477 struct ratelimit_state spurious_report_rs;
478
479 /**
480 * For rate limiting any notifications of tail pointer
481 * race.
482 */
483 struct ratelimit_state tail_pointer_race;
484
485 u32 gen7_latched_oastatus1;
486 u32 ctx_oactxctrl_offset;
487 u32 ctx_flexeu0_offset;
488
489 /**
490 * The RPT_ID/reason field for Gen8+ includes a bit
491 * to determine if the CTX ID in the report is valid
492 * but the specific bit differs between Gen 8 and 9
493 */
494 u32 gen8_valid_ctx_bit;
495
496 struct i915_oa_ops ops;
497 const struct i915_oa_format *oa_formats;
498
499 /**
500 * Use a format mask to store the supported formats
501 * for a platform.
502 */
503#define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG)
504 unsigned long format_mask[FORMAT_MASK_SIZE];
505
506 atomic64_t noa_programming_delay;
507};
508
509#endif /* _I915_PERF_TYPES_H_ */
510

source code of linux/drivers/gpu/drm/i915/i915_perf_types.h