1 | /* |
2 | * Performance events: |
3 | * |
4 | * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> |
5 | * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar |
6 | * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra |
7 | * |
8 | * Data type definitions, declarations, prototypes. |
9 | * |
10 | * Started by: Thomas Gleixner and Ingo Molnar |
11 | * |
12 | * For licencing details see kernel-base/COPYING |
13 | */ |
14 | #ifndef _LINUX_PERF_EVENT_H |
15 | #define _LINUX_PERF_EVENT_H |
16 | |
17 | #include <uapi/linux/perf_event.h> |
18 | #include <uapi/linux/bpf_perf_event.h> |
19 | |
20 | /* |
21 | * Kernel-internal data types and definitions: |
22 | */ |
23 | |
24 | #ifdef CONFIG_PERF_EVENTS |
25 | # include <asm/perf_event.h> |
26 | # include <asm/local64.h> |
27 | #endif |
28 | |
29 | #define PERF_GUEST_ACTIVE 0x01 |
30 | #define PERF_GUEST_USER 0x02 |
31 | |
32 | struct perf_guest_info_callbacks { |
33 | unsigned int (*state)(void); |
34 | unsigned long (*get_ip)(void); |
35 | unsigned int (*handle_intel_pt_intr)(void); |
36 | }; |
37 | |
38 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
39 | #include <asm/hw_breakpoint.h> |
40 | #endif |
41 | |
42 | #include <linux/list.h> |
43 | #include <linux/mutex.h> |
44 | #include <linux/rculist.h> |
45 | #include <linux/rcupdate.h> |
46 | #include <linux/spinlock.h> |
47 | #include <linux/hrtimer.h> |
48 | #include <linux/fs.h> |
49 | #include <linux/pid_namespace.h> |
50 | #include <linux/workqueue.h> |
51 | #include <linux/ftrace.h> |
52 | #include <linux/cpu.h> |
53 | #include <linux/irq_work.h> |
54 | #include <linux/static_key.h> |
55 | #include <linux/jump_label_ratelimit.h> |
56 | #include <linux/atomic.h> |
57 | #include <linux/sysfs.h> |
58 | #include <linux/perf_regs.h> |
59 | #include <linux/cgroup.h> |
60 | #include <linux/refcount.h> |
61 | #include <linux/security.h> |
62 | #include <linux/static_call.h> |
63 | #include <asm/local.h> |
64 | |
65 | struct perf_callchain_entry { |
66 | __u64 nr; |
67 | __u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */ |
68 | }; |
69 | |
70 | struct perf_callchain_entry_ctx { |
71 | struct perf_callchain_entry *entry; |
72 | u32 max_stack; |
73 | u32 nr; |
74 | short contexts; |
75 | bool contexts_maxed; |
76 | }; |
77 | |
78 | typedef unsigned long (*perf_copy_f)(void *dst, const void *src, |
79 | unsigned long off, unsigned long len); |
80 | |
81 | struct perf_raw_frag { |
82 | union { |
83 | struct perf_raw_frag *next; |
84 | unsigned long pad; |
85 | }; |
86 | perf_copy_f copy; |
87 | void *data; |
88 | u32 size; |
89 | } __packed; |
90 | |
91 | struct perf_raw_record { |
92 | struct perf_raw_frag frag; |
93 | u32 size; |
94 | }; |
95 | |
96 | /* |
97 | * branch stack layout: |
98 | * nr: number of taken branches stored in entries[] |
99 | * hw_idx: The low level index of raw branch records |
100 | * for the most recent branch. |
101 | * -1ULL means invalid/unknown. |
102 | * |
103 | * Note that nr can vary from sample to sample |
104 | * branches (to, from) are stored from most recent |
105 | * to least recent, i.e., entries[0] contains the most |
106 | * recent branch. |
107 | * The entries[] is an abstraction of raw branch records, |
108 | * which may not be stored in age order in HW, e.g. Intel LBR. |
109 | * The hw_idx is to expose the low level index of raw |
110 | * branch record for the most recent branch aka entries[0]. |
111 | * The hw_idx index is between -1 (unknown) and max depth, |
112 | * which can be retrieved in /sys/devices/cpu/caps/branches. |
113 | * For the architectures whose raw branch records are |
114 | * already stored in age order, the hw_idx should be 0. |
115 | */ |
116 | struct perf_branch_stack { |
117 | __u64 nr; |
118 | __u64 hw_idx; |
119 | struct perf_branch_entry entries[]; |
120 | }; |
121 | |
122 | struct task_struct; |
123 | |
124 | /* |
125 | * extra PMU register associated with an event |
126 | */ |
127 | struct { |
128 | u64 ; /* register value */ |
129 | unsigned int ; /* register address or index */ |
130 | int ; /* extra register already allocated */ |
131 | int ; /* index in shared_regs->regs[] */ |
132 | }; |
133 | |
134 | /** |
135 | * hw_perf_event::flag values |
136 | * |
137 | * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific |
138 | * usage. |
139 | */ |
140 | #define PERF_EVENT_FLAG_ARCH 0x0000ffff |
141 | #define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 |
142 | |
143 | /** |
144 | * struct hw_perf_event - performance event hardware details: |
145 | */ |
146 | struct hw_perf_event { |
147 | #ifdef CONFIG_PERF_EVENTS |
148 | union { |
149 | struct { /* hardware */ |
150 | u64 config; |
151 | u64 last_tag; |
152 | unsigned long config_base; |
153 | unsigned long event_base; |
154 | int event_base_rdpmc; |
155 | int idx; |
156 | int last_cpu; |
157 | int flags; |
158 | |
159 | struct hw_perf_event_extra extra_reg; |
160 | struct hw_perf_event_extra branch_reg; |
161 | }; |
162 | struct { /* software */ |
163 | struct hrtimer hrtimer; |
164 | }; |
165 | struct { /* tracepoint */ |
166 | /* for tp_event->class */ |
167 | struct list_head tp_list; |
168 | }; |
169 | struct { /* amd_power */ |
170 | u64 pwr_acc; |
171 | u64 ptsc; |
172 | }; |
173 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
174 | struct { /* breakpoint */ |
175 | /* |
176 | * Crufty hack to avoid the chicken and egg |
177 | * problem hw_breakpoint has with context |
178 | * creation and event initalization. |
179 | */ |
180 | struct arch_hw_breakpoint info; |
181 | struct list_head bp_list; |
182 | }; |
183 | #endif |
184 | struct { /* amd_iommu */ |
185 | u8 iommu_bank; |
186 | u8 iommu_cntr; |
187 | u16 padding; |
188 | u64 conf; |
189 | u64 conf1; |
190 | }; |
191 | }; |
192 | /* |
193 | * If the event is a per task event, this will point to the task in |
194 | * question. See the comment in perf_event_alloc(). |
195 | */ |
196 | struct task_struct *target; |
197 | |
198 | /* |
199 | * PMU would store hardware filter configuration |
200 | * here. |
201 | */ |
202 | void *addr_filters; |
203 | |
204 | /* Last sync'ed generation of filters */ |
205 | unsigned long addr_filters_gen; |
206 | |
207 | /* |
208 | * hw_perf_event::state flags; used to track the PERF_EF_* state. |
209 | */ |
210 | #define PERF_HES_STOPPED 0x01 /* the counter is stopped */ |
211 | #define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ |
212 | #define PERF_HES_ARCH 0x04 |
213 | |
214 | int state; |
215 | |
216 | /* |
217 | * The last observed hardware counter value, updated with a |
218 | * local64_cmpxchg() such that pmu::read() can be called nested. |
219 | */ |
220 | local64_t prev_count; |
221 | |
222 | /* |
223 | * The period to start the next sample with. |
224 | */ |
225 | u64 sample_period; |
226 | |
227 | union { |
228 | struct { /* Sampling */ |
229 | /* |
230 | * The period we started this sample with. |
231 | */ |
232 | u64 last_period; |
233 | |
234 | /* |
235 | * However much is left of the current period; |
236 | * note that this is a full 64bit value and |
237 | * allows for generation of periods longer |
238 | * than hardware might allow. |
239 | */ |
240 | local64_t period_left; |
241 | }; |
242 | struct { /* Topdown events counting for context switch */ |
243 | u64 saved_metric; |
244 | u64 saved_slots; |
245 | }; |
246 | }; |
247 | |
248 | /* |
249 | * State for throttling the event, see __perf_event_overflow() and |
250 | * perf_adjust_freq_unthr_context(). |
251 | */ |
252 | u64 interrupts_seq; |
253 | u64 interrupts; |
254 | |
255 | /* |
256 | * State for freq target events, see __perf_event_overflow() and |
257 | * perf_adjust_freq_unthr_context(). |
258 | */ |
259 | u64 freq_time_stamp; |
260 | u64 freq_count_stamp; |
261 | #endif |
262 | }; |
263 | |
264 | struct perf_event; |
265 | |
266 | /* |
267 | * Common implementation detail of pmu::{start,commit,cancel}_txn |
268 | */ |
269 | #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ |
270 | #define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ |
271 | |
272 | /** |
273 | * pmu::capabilities flags |
274 | */ |
275 | #define PERF_PMU_CAP_NO_INTERRUPT 0x0001 |
276 | #define PERF_PMU_CAP_NO_NMI 0x0002 |
277 | #define PERF_PMU_CAP_AUX_NO_SG 0x0004 |
278 | #define PERF_PMU_CAP_EXTENDED_REGS 0x0008 |
279 | #define PERF_PMU_CAP_EXCLUSIVE 0x0010 |
280 | #define PERF_PMU_CAP_ITRACE 0x0020 |
281 | #define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x0040 |
282 | #define PERF_PMU_CAP_NO_EXCLUDE 0x0080 |
283 | #define PERF_PMU_CAP_AUX_OUTPUT 0x0100 |
284 | #define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0200 |
285 | |
286 | struct perf_output_handle; |
287 | |
288 | /** |
289 | * struct pmu - generic performance monitoring unit |
290 | */ |
291 | struct pmu { |
292 | struct list_head entry; |
293 | |
294 | struct module *module; |
295 | struct device *dev; |
296 | const struct attribute_group **attr_groups; |
297 | const struct attribute_group **attr_update; |
298 | const char *name; |
299 | int type; |
300 | |
301 | /* |
302 | * various common per-pmu feature flags |
303 | */ |
304 | int capabilities; |
305 | |
306 | int __percpu *pmu_disable_count; |
307 | struct perf_cpu_context __percpu *pmu_cpu_context; |
308 | atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ |
309 | int task_ctx_nr; |
310 | int hrtimer_interval_ms; |
311 | |
312 | /* number of address filters this PMU can do */ |
313 | unsigned int nr_addr_filters; |
314 | |
315 | /* |
316 | * Fully disable/enable this PMU, can be used to protect from the PMI |
317 | * as well as for lazy/batch writing of the MSRs. |
318 | */ |
319 | void (*pmu_enable) (struct pmu *pmu); /* optional */ |
320 | void (*pmu_disable) (struct pmu *pmu); /* optional */ |
321 | |
322 | /* |
323 | * Try and initialize the event for this PMU. |
324 | * |
325 | * Returns: |
326 | * -ENOENT -- @event is not for this PMU |
327 | * |
328 | * -ENODEV -- @event is for this PMU but PMU not present |
329 | * -EBUSY -- @event is for this PMU but PMU temporarily unavailable |
330 | * -EINVAL -- @event is for this PMU but @event is not valid |
331 | * -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported |
332 | * -EACCES -- @event is for this PMU, @event is valid, but no privileges |
333 | * |
334 | * 0 -- @event is for this PMU and valid |
335 | * |
336 | * Other error return values are allowed. |
337 | */ |
338 | int (*event_init) (struct perf_event *event); |
339 | |
340 | /* |
341 | * Notification that the event was mapped or unmapped. Called |
342 | * in the context of the mapping task. |
343 | */ |
344 | void (*event_mapped) (struct perf_event *event, struct mm_struct *mm); /* optional */ |
345 | void (*event_unmapped) (struct perf_event *event, struct mm_struct *mm); /* optional */ |
346 | |
347 | /* |
348 | * Flags for ->add()/->del()/ ->start()/->stop(). There are |
349 | * matching hw_perf_event::state flags. |
350 | */ |
351 | #define PERF_EF_START 0x01 /* start the counter when adding */ |
352 | #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ |
353 | #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ |
354 | |
355 | /* |
356 | * Adds/Removes a counter to/from the PMU, can be done inside a |
357 | * transaction, see the ->*_txn() methods. |
358 | * |
359 | * The add/del callbacks will reserve all hardware resources required |
360 | * to service the event, this includes any counter constraint |
361 | * scheduling etc. |
362 | * |
363 | * Called with IRQs disabled and the PMU disabled on the CPU the event |
364 | * is on. |
365 | * |
366 | * ->add() called without PERF_EF_START should result in the same state |
367 | * as ->add() followed by ->stop(). |
368 | * |
369 | * ->del() must always PERF_EF_UPDATE stop an event. If it calls |
370 | * ->stop() that must deal with already being stopped without |
371 | * PERF_EF_UPDATE. |
372 | */ |
373 | int (*add) (struct perf_event *event, int flags); |
374 | void (*del) (struct perf_event *event, int flags); |
375 | |
376 | /* |
377 | * Starts/Stops a counter present on the PMU. |
378 | * |
379 | * The PMI handler should stop the counter when perf_event_overflow() |
380 | * returns !0. ->start() will be used to continue. |
381 | * |
382 | * Also used to change the sample period. |
383 | * |
384 | * Called with IRQs disabled and the PMU disabled on the CPU the event |
385 | * is on -- will be called from NMI context with the PMU generates |
386 | * NMIs. |
387 | * |
388 | * ->stop() with PERF_EF_UPDATE will read the counter and update |
389 | * period/count values like ->read() would. |
390 | * |
391 | * ->start() with PERF_EF_RELOAD will reprogram the counter |
392 | * value, must be preceded by a ->stop() with PERF_EF_UPDATE. |
393 | */ |
394 | void (*start) (struct perf_event *event, int flags); |
395 | void (*stop) (struct perf_event *event, int flags); |
396 | |
397 | /* |
398 | * Updates the counter value of the event. |
399 | * |
400 | * For sampling capable PMUs this will also update the software period |
401 | * hw_perf_event::period_left field. |
402 | */ |
403 | void (*read) (struct perf_event *event); |
404 | |
405 | /* |
406 | * Group events scheduling is treated as a transaction, add |
407 | * group events as a whole and perform one schedulability test. |
408 | * If the test fails, roll back the whole group |
409 | * |
410 | * Start the transaction, after this ->add() doesn't need to |
411 | * do schedulability tests. |
412 | * |
413 | * Optional. |
414 | */ |
415 | void (*start_txn) (struct pmu *pmu, unsigned int txn_flags); |
416 | /* |
417 | * If ->start_txn() disabled the ->add() schedulability test |
418 | * then ->commit_txn() is required to perform one. On success |
419 | * the transaction is closed. On error the transaction is kept |
420 | * open until ->cancel_txn() is called. |
421 | * |
422 | * Optional. |
423 | */ |
424 | int (*commit_txn) (struct pmu *pmu); |
425 | /* |
426 | * Will cancel the transaction, assumes ->del() is called |
427 | * for each successful ->add() during the transaction. |
428 | * |
429 | * Optional. |
430 | */ |
431 | void (*cancel_txn) (struct pmu *pmu); |
432 | |
433 | /* |
434 | * Will return the value for perf_event_mmap_page::index for this event, |
435 | * if no implementation is provided it will default to: event->hw.idx + 1. |
436 | */ |
437 | int (*event_idx) (struct perf_event *event); /*optional */ |
438 | |
439 | /* |
440 | * context-switches callback |
441 | */ |
442 | void (*sched_task) (struct perf_event_context *ctx, |
443 | bool sched_in); |
444 | |
445 | /* |
446 | * Kmem cache of PMU specific data |
447 | */ |
448 | struct kmem_cache *task_ctx_cache; |
449 | |
450 | /* |
451 | * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data) |
452 | * can be synchronized using this function. See Intel LBR callstack support |
453 | * implementation and Perf core context switch handling callbacks for usage |
454 | * examples. |
455 | */ |
456 | void (*swap_task_ctx) (struct perf_event_context *prev, |
457 | struct perf_event_context *next); |
458 | /* optional */ |
459 | |
460 | /* |
461 | * Set up pmu-private data structures for an AUX area |
462 | */ |
463 | void *(*setup_aux) (struct perf_event *event, void **pages, |
464 | int nr_pages, bool overwrite); |
465 | /* optional */ |
466 | |
467 | /* |
468 | * Free pmu-private AUX data structures |
469 | */ |
470 | void (*free_aux) (void *aux); /* optional */ |
471 | |
472 | /* |
473 | * Take a snapshot of the AUX buffer without touching the event |
474 | * state, so that preempting ->start()/->stop() callbacks does |
475 | * not interfere with their logic. Called in PMI context. |
476 | * |
477 | * Returns the size of AUX data copied to the output handle. |
478 | * |
479 | * Optional. |
480 | */ |
481 | long (*snapshot_aux) (struct perf_event *event, |
482 | struct perf_output_handle *handle, |
483 | unsigned long size); |
484 | |
485 | /* |
486 | * Validate address range filters: make sure the HW supports the |
487 | * requested configuration and number of filters; return 0 if the |
488 | * supplied filters are valid, -errno otherwise. |
489 | * |
490 | * Runs in the context of the ioctl()ing process and is not serialized |
491 | * with the rest of the PMU callbacks. |
492 | */ |
493 | int (*addr_filters_validate) (struct list_head *filters); |
494 | /* optional */ |
495 | |
496 | /* |
497 | * Synchronize address range filter configuration: |
498 | * translate hw-agnostic filters into hardware configuration in |
499 | * event::hw::addr_filters. |
500 | * |
501 | * Runs as a part of filter sync sequence that is done in ->start() |
502 | * callback by calling perf_event_addr_filters_sync(). |
503 | * |
504 | * May (and should) traverse event::addr_filters::list, for which its |
505 | * caller provides necessary serialization. |
506 | */ |
507 | void (*addr_filters_sync) (struct perf_event *event); |
508 | /* optional */ |
509 | |
510 | /* |
511 | * Check if event can be used for aux_output purposes for |
512 | * events of this PMU. |
513 | * |
514 | * Runs from perf_event_open(). Should return 0 for "no match" |
515 | * or non-zero for "match". |
516 | */ |
517 | int (*aux_output_match) (struct perf_event *event); |
518 | /* optional */ |
519 | |
520 | /* |
521 | * Filter events for PMU-specific reasons. |
522 | */ |
523 | int (*filter_match) (struct perf_event *event); /* optional */ |
524 | |
525 | /* |
526 | * Check period value for PERF_EVENT_IOC_PERIOD ioctl. |
527 | */ |
528 | int (*check_period) (struct perf_event *event, u64 value); /* optional */ |
529 | }; |
530 | |
531 | enum perf_addr_filter_action_t { |
532 | PERF_ADDR_FILTER_ACTION_STOP = 0, |
533 | PERF_ADDR_FILTER_ACTION_START, |
534 | PERF_ADDR_FILTER_ACTION_FILTER, |
535 | }; |
536 | |
537 | /** |
538 | * struct perf_addr_filter - address range filter definition |
539 | * @entry: event's filter list linkage |
540 | * @path: object file's path for file-based filters |
541 | * @offset: filter range offset |
542 | * @size: filter range size (size==0 means single address trigger) |
543 | * @action: filter/start/stop |
544 | * |
545 | * This is a hardware-agnostic filter configuration as specified by the user. |
546 | */ |
547 | struct perf_addr_filter { |
548 | struct list_head entry; |
549 | struct path path; |
550 | unsigned long offset; |
551 | unsigned long size; |
552 | enum perf_addr_filter_action_t action; |
553 | }; |
554 | |
555 | /** |
556 | * struct perf_addr_filters_head - container for address range filters |
557 | * @list: list of filters for this event |
558 | * @lock: spinlock that serializes accesses to the @list and event's |
559 | * (and its children's) filter generations. |
560 | * @nr_file_filters: number of file-based filters |
561 | * |
562 | * A child event will use parent's @list (and therefore @lock), so they are |
563 | * bundled together; see perf_event_addr_filters(). |
564 | */ |
565 | struct perf_addr_filters_head { |
566 | struct list_head list; |
567 | raw_spinlock_t lock; |
568 | unsigned int nr_file_filters; |
569 | }; |
570 | |
571 | struct perf_addr_filter_range { |
572 | unsigned long start; |
573 | unsigned long size; |
574 | }; |
575 | |
576 | /** |
577 | * enum perf_event_state - the states of an event: |
578 | */ |
579 | enum perf_event_state { |
580 | PERF_EVENT_STATE_DEAD = -4, |
581 | PERF_EVENT_STATE_EXIT = -3, |
582 | PERF_EVENT_STATE_ERROR = -2, |
583 | PERF_EVENT_STATE_OFF = -1, |
584 | PERF_EVENT_STATE_INACTIVE = 0, |
585 | PERF_EVENT_STATE_ACTIVE = 1, |
586 | }; |
587 | |
588 | struct file; |
589 | struct perf_sample_data; |
590 | |
591 | typedef void (*perf_overflow_handler_t)(struct perf_event *, |
592 | struct perf_sample_data *, |
593 | struct pt_regs *regs); |
594 | |
595 | /* |
596 | * Event capabilities. For event_caps and groups caps. |
597 | * |
598 | * PERF_EV_CAP_SOFTWARE: Is a software event. |
599 | * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read |
600 | * from any CPU in the package where it is active. |
601 | * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and |
602 | * cannot be a group leader. If an event with this flag is detached from the |
603 | * group it is scheduled out and moved into an unrecoverable ERROR state. |
604 | */ |
605 | #define PERF_EV_CAP_SOFTWARE BIT(0) |
606 | #define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1) |
607 | #define PERF_EV_CAP_SIBLING BIT(2) |
608 | |
609 | #define SWEVENT_HLIST_BITS 8 |
610 | #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) |
611 | |
612 | struct swevent_hlist { |
613 | struct hlist_head heads[SWEVENT_HLIST_SIZE]; |
614 | struct rcu_head rcu_head; |
615 | }; |
616 | |
617 | #define PERF_ATTACH_CONTEXT 0x01 |
618 | #define PERF_ATTACH_GROUP 0x02 |
619 | #define PERF_ATTACH_TASK 0x04 |
620 | #define PERF_ATTACH_TASK_DATA 0x08 |
621 | #define PERF_ATTACH_ITRACE 0x10 |
622 | #define PERF_ATTACH_SCHED_CB 0x20 |
623 | #define PERF_ATTACH_CHILD 0x40 |
624 | |
625 | struct bpf_prog; |
626 | struct perf_cgroup; |
627 | struct perf_buffer; |
628 | |
629 | struct pmu_event_list { |
630 | raw_spinlock_t lock; |
631 | struct list_head list; |
632 | }; |
633 | |
634 | #define for_each_sibling_event(sibling, event) \ |
635 | if ((event)->group_leader == (event)) \ |
636 | list_for_each_entry((sibling), &(event)->sibling_list, sibling_list) |
637 | |
638 | /** |
639 | * struct perf_event - performance event kernel representation: |
640 | */ |
641 | struct perf_event { |
642 | #ifdef CONFIG_PERF_EVENTS |
643 | /* |
644 | * entry onto perf_event_context::event_list; |
645 | * modifications require ctx->lock |
646 | * RCU safe iterations. |
647 | */ |
648 | struct list_head event_entry; |
649 | |
650 | /* |
651 | * Locked for modification by both ctx->mutex and ctx->lock; holding |
652 | * either sufficies for read. |
653 | */ |
654 | struct list_head sibling_list; |
655 | struct list_head active_list; |
656 | /* |
657 | * Node on the pinned or flexible tree located at the event context; |
658 | */ |
659 | struct rb_node group_node; |
660 | u64 group_index; |
661 | /* |
662 | * We need storage to track the entries in perf_pmu_migrate_context; we |
663 | * cannot use the event_entry because of RCU and we want to keep the |
664 | * group in tact which avoids us using the other two entries. |
665 | */ |
666 | struct list_head migrate_entry; |
667 | |
668 | struct hlist_node hlist_entry; |
669 | struct list_head active_entry; |
670 | int nr_siblings; |
671 | |
672 | /* Not serialized. Only written during event initialization. */ |
673 | int event_caps; |
674 | /* The cumulative AND of all event_caps for events in this group. */ |
675 | int group_caps; |
676 | |
677 | struct perf_event *group_leader; |
678 | struct pmu *pmu; |
679 | void *pmu_private; |
680 | |
681 | enum perf_event_state state; |
682 | unsigned int attach_state; |
683 | local64_t count; |
684 | atomic64_t child_count; |
685 | |
686 | /* |
687 | * These are the total time in nanoseconds that the event |
688 | * has been enabled (i.e. eligible to run, and the task has |
689 | * been scheduled in, if this is a per-task event) |
690 | * and running (scheduled onto the CPU), respectively. |
691 | */ |
692 | u64 total_time_enabled; |
693 | u64 total_time_running; |
694 | u64 tstamp; |
695 | |
696 | struct perf_event_attr attr; |
697 | u16 header_size; |
698 | u16 id_header_size; |
699 | u16 read_size; |
700 | struct hw_perf_event hw; |
701 | |
702 | struct perf_event_context *ctx; |
703 | atomic_long_t refcount; |
704 | |
705 | /* |
706 | * These accumulate total time (in nanoseconds) that children |
707 | * events have been enabled and running, respectively. |
708 | */ |
709 | atomic64_t child_total_time_enabled; |
710 | atomic64_t child_total_time_running; |
711 | |
712 | /* |
713 | * Protect attach/detach and child_list: |
714 | */ |
715 | struct mutex child_mutex; |
716 | struct list_head child_list; |
717 | struct perf_event *parent; |
718 | |
719 | int oncpu; |
720 | int cpu; |
721 | |
722 | struct list_head owner_entry; |
723 | struct task_struct *owner; |
724 | |
725 | /* mmap bits */ |
726 | struct mutex mmap_mutex; |
727 | atomic_t mmap_count; |
728 | |
729 | struct perf_buffer *rb; |
730 | struct list_head rb_entry; |
731 | unsigned long rcu_batches; |
732 | int rcu_pending; |
733 | |
734 | /* poll related */ |
735 | wait_queue_head_t waitq; |
736 | struct fasync_struct *fasync; |
737 | |
738 | /* delayed work for NMIs and such */ |
739 | int pending_wakeup; |
740 | int pending_kill; |
741 | int pending_disable; |
742 | unsigned long pending_addr; /* SIGTRAP */ |
743 | struct irq_work pending; |
744 | |
745 | atomic_t event_limit; |
746 | |
747 | /* address range filters */ |
748 | struct perf_addr_filters_head addr_filters; |
749 | /* vma address array for file-based filders */ |
750 | struct perf_addr_filter_range *addr_filter_ranges; |
751 | unsigned long addr_filters_gen; |
752 | |
753 | /* for aux_output events */ |
754 | struct perf_event *aux_event; |
755 | |
756 | void (*destroy)(struct perf_event *); |
757 | struct rcu_head rcu_head; |
758 | |
759 | struct pid_namespace *ns; |
760 | u64 id; |
761 | |
762 | atomic64_t lost_samples; |
763 | |
764 | u64 (*clock)(void); |
765 | perf_overflow_handler_t overflow_handler; |
766 | void *overflow_handler_context; |
767 | #ifdef CONFIG_BPF_SYSCALL |
768 | perf_overflow_handler_t orig_overflow_handler; |
769 | struct bpf_prog *prog; |
770 | u64 bpf_cookie; |
771 | #endif |
772 | |
773 | #ifdef CONFIG_EVENT_TRACING |
774 | struct trace_event_call *tp_event; |
775 | struct event_filter *filter; |
776 | #ifdef CONFIG_FUNCTION_TRACER |
777 | struct ftrace_ops ftrace_ops; |
778 | #endif |
779 | #endif |
780 | |
781 | #ifdef CONFIG_CGROUP_PERF |
782 | struct perf_cgroup *cgrp; /* cgroup event is attach to */ |
783 | #endif |
784 | |
785 | #ifdef CONFIG_SECURITY |
786 | void *security; |
787 | #endif |
788 | struct list_head sb_list; |
789 | #endif /* CONFIG_PERF_EVENTS */ |
790 | }; |
791 | |
792 | |
793 | struct perf_event_groups { |
794 | struct rb_root tree; |
795 | u64 index; |
796 | }; |
797 | |
798 | /** |
799 | * struct perf_event_context - event context structure |
800 | * |
801 | * Used as a container for task events and CPU events as well: |
802 | */ |
803 | struct perf_event_context { |
804 | struct pmu *pmu; |
805 | /* |
806 | * Protect the states of the events in the list, |
807 | * nr_active, and the list: |
808 | */ |
809 | raw_spinlock_t lock; |
810 | /* |
811 | * Protect the list of events. Locking either mutex or lock |
812 | * is sufficient to ensure the list doesn't change; to change |
813 | * the list you need to lock both the mutex and the spinlock. |
814 | */ |
815 | struct mutex mutex; |
816 | |
817 | struct list_head active_ctx_list; |
818 | struct perf_event_groups pinned_groups; |
819 | struct perf_event_groups flexible_groups; |
820 | struct list_head event_list; |
821 | |
822 | struct list_head pinned_active; |
823 | struct list_head flexible_active; |
824 | |
825 | int nr_events; |
826 | int nr_active; |
827 | int nr_user; |
828 | int is_active; |
829 | int nr_stat; |
830 | int nr_freq; |
831 | int rotate_disable; |
832 | /* |
833 | * Set when nr_events != nr_active, except tolerant to events not |
834 | * necessary to be active due to scheduling constraints, such as cgroups. |
835 | */ |
836 | int rotate_necessary; |
837 | refcount_t refcount; |
838 | struct task_struct *task; |
839 | |
840 | /* |
841 | * Context clock, runs when context enabled. |
842 | */ |
843 | u64 time; |
844 | u64 timestamp; |
845 | u64 timeoffset; |
846 | |
847 | /* |
848 | * These fields let us detect when two contexts have both |
849 | * been cloned (inherited) from a common ancestor. |
850 | */ |
851 | struct perf_event_context *parent_ctx; |
852 | u64 parent_gen; |
853 | u64 generation; |
854 | int pin_count; |
855 | #ifdef CONFIG_CGROUP_PERF |
856 | int nr_cgroups; /* cgroup evts */ |
857 | #endif |
858 | void *task_ctx_data; /* pmu specific data */ |
859 | struct rcu_head rcu_head; |
860 | }; |
861 | |
862 | /* |
863 | * Number of contexts where an event can trigger: |
864 | * task, softirq, hardirq, nmi. |
865 | */ |
866 | #define PERF_NR_CONTEXTS 4 |
867 | |
868 | /** |
869 | * struct perf_cpu_context - per cpu event context structure |
870 | */ |
871 | struct perf_cpu_context { |
872 | struct perf_event_context ctx; |
873 | struct perf_event_context *task_ctx; |
874 | int active_oncpu; |
875 | int exclusive; |
876 | |
877 | raw_spinlock_t hrtimer_lock; |
878 | struct hrtimer hrtimer; |
879 | ktime_t hrtimer_interval; |
880 | unsigned int hrtimer_active; |
881 | |
882 | #ifdef CONFIG_CGROUP_PERF |
883 | struct perf_cgroup *cgrp; |
884 | struct list_head cgrp_cpuctx_entry; |
885 | #endif |
886 | |
887 | struct list_head sched_cb_entry; |
888 | int sched_cb_usage; |
889 | |
890 | int online; |
891 | /* |
892 | * Per-CPU storage for iterators used in visit_groups_merge. The default |
893 | * storage is of size 2 to hold the CPU and any CPU event iterators. |
894 | */ |
895 | int heap_size; |
896 | struct perf_event **heap; |
897 | struct perf_event *heap_default[2]; |
898 | }; |
899 | |
900 | struct perf_output_handle { |
901 | struct perf_event *event; |
902 | struct perf_buffer *rb; |
903 | unsigned long wakeup; |
904 | unsigned long size; |
905 | u64 aux_flags; |
906 | union { |
907 | void *addr; |
908 | unsigned long head; |
909 | }; |
910 | int page; |
911 | }; |
912 | |
913 | struct bpf_perf_event_data_kern { |
914 | bpf_user_pt_regs_t *regs; |
915 | struct perf_sample_data *data; |
916 | struct perf_event *event; |
917 | }; |
918 | |
919 | #ifdef CONFIG_CGROUP_PERF |
920 | |
921 | /* |
922 | * perf_cgroup_info keeps track of time_enabled for a cgroup. |
923 | * This is a per-cpu dynamically allocated data structure. |
924 | */ |
925 | struct perf_cgroup_info { |
926 | u64 time; |
927 | u64 timestamp; |
928 | u64 timeoffset; |
929 | int active; |
930 | }; |
931 | |
932 | struct perf_cgroup { |
933 | struct cgroup_subsys_state css; |
934 | struct perf_cgroup_info __percpu *info; |
935 | }; |
936 | |
937 | /* |
938 | * Must ensure cgroup is pinned (css_get) before calling |
939 | * this function. In other words, we cannot call this function |
940 | * if there is no cgroup event for the current CPU context. |
941 | */ |
942 | static inline struct perf_cgroup * |
943 | perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx) |
944 | { |
945 | return container_of(task_css_check(task, perf_event_cgrp_id, |
946 | ctx ? lockdep_is_held(&ctx->lock) |
947 | : true), |
948 | struct perf_cgroup, css); |
949 | } |
950 | #endif /* CONFIG_CGROUP_PERF */ |
951 | |
952 | #ifdef CONFIG_PERF_EVENTS |
953 | |
954 | extern void *perf_aux_output_begin(struct perf_output_handle *handle, |
955 | struct perf_event *event); |
956 | extern void perf_aux_output_end(struct perf_output_handle *handle, |
957 | unsigned long size); |
958 | extern int perf_aux_output_skip(struct perf_output_handle *handle, |
959 | unsigned long size); |
960 | extern void *perf_get_aux(struct perf_output_handle *handle); |
961 | extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags); |
962 | extern void perf_event_itrace_started(struct perf_event *event); |
963 | |
964 | extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); |
965 | extern void perf_pmu_unregister(struct pmu *pmu); |
966 | |
967 | extern void __perf_event_task_sched_in(struct task_struct *prev, |
968 | struct task_struct *task); |
969 | extern void __perf_event_task_sched_out(struct task_struct *prev, |
970 | struct task_struct *next); |
971 | extern int perf_event_init_task(struct task_struct *child, u64 clone_flags); |
972 | extern void perf_event_exit_task(struct task_struct *child); |
973 | extern void perf_event_free_task(struct task_struct *task); |
974 | extern void perf_event_delayed_put(struct task_struct *task); |
975 | extern struct file *perf_event_get(unsigned int fd); |
976 | extern const struct perf_event *perf_get_event(struct file *file); |
977 | extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event); |
978 | extern void perf_event_print_debug(void); |
979 | extern void perf_pmu_disable(struct pmu *pmu); |
980 | extern void perf_pmu_enable(struct pmu *pmu); |
981 | extern void perf_sched_cb_dec(struct pmu *pmu); |
982 | extern void perf_sched_cb_inc(struct pmu *pmu); |
983 | extern int perf_event_task_disable(void); |
984 | extern int perf_event_task_enable(void); |
985 | |
986 | extern void perf_pmu_resched(struct pmu *pmu); |
987 | |
988 | extern int perf_event_refresh(struct perf_event *event, int refresh); |
989 | extern void perf_event_update_userpage(struct perf_event *event); |
990 | extern int perf_event_release_kernel(struct perf_event *event); |
991 | extern struct perf_event * |
992 | perf_event_create_kernel_counter(struct perf_event_attr *attr, |
993 | int cpu, |
994 | struct task_struct *task, |
995 | perf_overflow_handler_t callback, |
996 | void *context); |
997 | extern void perf_pmu_migrate_context(struct pmu *pmu, |
998 | int src_cpu, int dst_cpu); |
999 | int perf_event_read_local(struct perf_event *event, u64 *value, |
1000 | u64 *enabled, u64 *running); |
1001 | extern u64 perf_event_read_value(struct perf_event *event, |
1002 | u64 *enabled, u64 *running); |
1003 | |
1004 | |
1005 | struct perf_sample_data { |
1006 | /* |
1007 | * Fields set by perf_sample_data_init(), group so as to |
1008 | * minimize the cachelines touched. |
1009 | */ |
1010 | u64 addr; |
1011 | struct perf_raw_record *raw; |
1012 | struct perf_branch_stack *br_stack; |
1013 | u64 period; |
1014 | union perf_sample_weight weight; |
1015 | u64 txn; |
1016 | union perf_mem_data_src data_src; |
1017 | |
1018 | /* |
1019 | * The other fields, optionally {set,used} by |
1020 | * perf_{prepare,output}_sample(). |
1021 | */ |
1022 | u64 type; |
1023 | u64 ip; |
1024 | struct { |
1025 | u32 pid; |
1026 | u32 tid; |
1027 | } tid_entry; |
1028 | u64 time; |
1029 | u64 id; |
1030 | u64 stream_id; |
1031 | struct { |
1032 | u32 cpu; |
1033 | u32 reserved; |
1034 | } cpu_entry; |
1035 | struct perf_callchain_entry *callchain; |
1036 | u64 aux_size; |
1037 | |
1038 | struct perf_regs regs_user; |
1039 | struct perf_regs regs_intr; |
1040 | u64 stack_user_size; |
1041 | |
1042 | u64 phys_addr; |
1043 | u64 cgroup; |
1044 | u64 data_page_size; |
1045 | u64 code_page_size; |
1046 | } ____cacheline_aligned; |
1047 | |
1048 | /* default value for data source */ |
1049 | #define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ |
1050 | PERF_MEM_S(LVL, NA) |\ |
1051 | PERF_MEM_S(SNOOP, NA) |\ |
1052 | PERF_MEM_S(LOCK, NA) |\ |
1053 | PERF_MEM_S(TLB, NA)) |
1054 | |
1055 | static inline void perf_sample_data_init(struct perf_sample_data *data, |
1056 | u64 addr, u64 period) |
1057 | { |
1058 | /* remaining struct members initialized in perf_prepare_sample() */ |
1059 | data->addr = addr; |
1060 | data->raw = NULL; |
1061 | data->br_stack = NULL; |
1062 | data->period = period; |
1063 | data->weight.full = 0; |
1064 | data->data_src.val = PERF_MEM_NA; |
1065 | data->txn = 0; |
1066 | } |
1067 | |
1068 | /* |
1069 | * Clear all bitfields in the perf_branch_entry. |
1070 | * The to and from fields are not cleared because they are |
1071 | * systematically modified by caller. |
1072 | */ |
1073 | static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br) |
1074 | { |
1075 | br->mispred = 0; |
1076 | br->predicted = 0; |
1077 | br->in_tx = 0; |
1078 | br->abort = 0; |
1079 | br->cycles = 0; |
1080 | br->type = 0; |
1081 | br->reserved = 0; |
1082 | } |
1083 | |
1084 | extern void perf_output_sample(struct perf_output_handle *handle, |
1085 | struct perf_event_header *header, |
1086 | struct perf_sample_data *data, |
1087 | struct perf_event *event); |
1088 | extern void perf_prepare_sample(struct perf_event_header *header, |
1089 | struct perf_sample_data *data, |
1090 | struct perf_event *event, |
1091 | struct pt_regs *regs); |
1092 | |
1093 | extern int perf_event_overflow(struct perf_event *event, |
1094 | struct perf_sample_data *data, |
1095 | struct pt_regs *regs); |
1096 | |
1097 | extern void perf_event_output_forward(struct perf_event *event, |
1098 | struct perf_sample_data *data, |
1099 | struct pt_regs *regs); |
1100 | extern void perf_event_output_backward(struct perf_event *event, |
1101 | struct perf_sample_data *data, |
1102 | struct pt_regs *regs); |
1103 | extern int perf_event_output(struct perf_event *event, |
1104 | struct perf_sample_data *data, |
1105 | struct pt_regs *regs); |
1106 | |
1107 | static inline bool |
1108 | is_default_overflow_handler(struct perf_event *event) |
1109 | { |
1110 | if (likely(event->overflow_handler == perf_event_output_forward)) |
1111 | return true; |
1112 | if (unlikely(event->overflow_handler == perf_event_output_backward)) |
1113 | return true; |
1114 | return false; |
1115 | } |
1116 | |
1117 | extern void |
1118 | perf_event_header__init_id(struct perf_event_header *header, |
1119 | struct perf_sample_data *data, |
1120 | struct perf_event *event); |
1121 | extern void |
1122 | perf_event__output_id_sample(struct perf_event *event, |
1123 | struct perf_output_handle *handle, |
1124 | struct perf_sample_data *sample); |
1125 | |
1126 | extern void |
1127 | perf_log_lost_samples(struct perf_event *event, u64 lost); |
1128 | |
1129 | static inline bool event_has_any_exclude_flag(struct perf_event *event) |
1130 | { |
1131 | struct perf_event_attr *attr = &event->attr; |
1132 | |
1133 | return attr->exclude_idle || attr->exclude_user || |
1134 | attr->exclude_kernel || attr->exclude_hv || |
1135 | attr->exclude_guest || attr->exclude_host; |
1136 | } |
1137 | |
1138 | static inline bool is_sampling_event(struct perf_event *event) |
1139 | { |
1140 | return event->attr.sample_period != 0; |
1141 | } |
1142 | |
1143 | /* |
1144 | * Return 1 for a software event, 0 for a hardware event |
1145 | */ |
1146 | static inline int is_software_event(struct perf_event *event) |
1147 | { |
1148 | return event->event_caps & PERF_EV_CAP_SOFTWARE; |
1149 | } |
1150 | |
1151 | /* |
1152 | * Return 1 for event in sw context, 0 for event in hw context |
1153 | */ |
1154 | static inline int in_software_context(struct perf_event *event) |
1155 | { |
1156 | return event->ctx->pmu->task_ctx_nr == perf_sw_context; |
1157 | } |
1158 | |
1159 | static inline int is_exclusive_pmu(struct pmu *pmu) |
1160 | { |
1161 | return pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE; |
1162 | } |
1163 | |
1164 | extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
1165 | |
1166 | extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64); |
1167 | extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); |
1168 | |
1169 | #ifndef perf_arch_fetch_caller_regs |
1170 | static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } |
1171 | #endif |
1172 | |
1173 | /* |
1174 | * When generating a perf sample in-line, instead of from an interrupt / |
1175 | * exception, we lack a pt_regs. This is typically used from software events |
1176 | * like: SW_CONTEXT_SWITCHES, SW_MIGRATIONS and the tie-in with tracepoints. |
1177 | * |
1178 | * We typically don't need a full set, but (for x86) do require: |
1179 | * - ip for PERF_SAMPLE_IP |
1180 | * - cs for user_mode() tests |
1181 | * - sp for PERF_SAMPLE_CALLCHAIN |
1182 | * - eflags for MISC bits and CALLCHAIN (see: perf_hw_regs()) |
1183 | * |
1184 | * NOTE: assumes @regs is otherwise already 0 filled; this is important for |
1185 | * things like PERF_SAMPLE_REGS_INTR. |
1186 | */ |
1187 | static inline void perf_fetch_caller_regs(struct pt_regs *regs) |
1188 | { |
1189 | perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); |
1190 | } |
1191 | |
1192 | static __always_inline void |
1193 | perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) |
1194 | { |
1195 | if (static_key_false(&perf_swevent_enabled[event_id])) |
1196 | __perf_sw_event(event_id, nr, regs, addr); |
1197 | } |
1198 | |
1199 | DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]); |
1200 | |
1201 | /* |
1202 | * 'Special' version for the scheduler, it hard assumes no recursion, |
1203 | * which is guaranteed by us not actually scheduling inside other swevents |
1204 | * because those disable preemption. |
1205 | */ |
1206 | static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) |
1207 | { |
1208 | struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); |
1209 | |
1210 | perf_fetch_caller_regs(regs); |
1211 | ___perf_sw_event(event_id, nr, regs, addr); |
1212 | } |
1213 | |
1214 | extern struct static_key_false perf_sched_events; |
1215 | |
1216 | static __always_inline bool __perf_sw_enabled(int swevt) |
1217 | { |
1218 | return static_key_false(&perf_swevent_enabled[swevt]); |
1219 | } |
1220 | |
1221 | static inline void perf_event_task_migrate(struct task_struct *task) |
1222 | { |
1223 | if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS)) |
1224 | task->sched_migrated = 1; |
1225 | } |
1226 | |
1227 | static inline void perf_event_task_sched_in(struct task_struct *prev, |
1228 | struct task_struct *task) |
1229 | { |
1230 | if (static_branch_unlikely(&perf_sched_events)) |
1231 | __perf_event_task_sched_in(prev, task); |
1232 | |
1233 | if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) && |
1234 | task->sched_migrated) { |
1235 | __perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); |
1236 | task->sched_migrated = 0; |
1237 | } |
1238 | } |
1239 | |
1240 | static inline void perf_event_task_sched_out(struct task_struct *prev, |
1241 | struct task_struct *next) |
1242 | { |
1243 | if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES)) |
1244 | __perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0); |
1245 | |
1246 | #ifdef CONFIG_CGROUP_PERF |
1247 | if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) && |
1248 | perf_cgroup_from_task(prev, NULL) != |
1249 | perf_cgroup_from_task(next, NULL)) |
1250 | __perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0); |
1251 | #endif |
1252 | |
1253 | if (static_branch_unlikely(&perf_sched_events)) |
1254 | __perf_event_task_sched_out(prev, next); |
1255 | } |
1256 | |
1257 | extern void perf_event_mmap(struct vm_area_struct *vma); |
1258 | |
1259 | extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, |
1260 | bool unregister, const char *sym); |
1261 | extern void perf_event_bpf_event(struct bpf_prog *prog, |
1262 | enum perf_bpf_event_type type, |
1263 | u16 flags); |
1264 | |
1265 | #ifdef CONFIG_GUEST_PERF_EVENTS |
1266 | extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; |
1267 | |
1268 | DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state); |
1269 | DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip); |
1270 | DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr); |
1271 | |
1272 | static inline unsigned int perf_guest_state(void) |
1273 | { |
1274 | return static_call(__perf_guest_state)(); |
1275 | } |
1276 | static inline unsigned long perf_guest_get_ip(void) |
1277 | { |
1278 | return static_call(__perf_guest_get_ip)(); |
1279 | } |
1280 | static inline unsigned int perf_guest_handle_intel_pt_intr(void) |
1281 | { |
1282 | return static_call(__perf_guest_handle_intel_pt_intr)(); |
1283 | } |
1284 | extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); |
1285 | extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); |
1286 | #else |
1287 | static inline unsigned int perf_guest_state(void) { return 0; } |
1288 | static inline unsigned long perf_guest_get_ip(void) { return 0; } |
1289 | static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; } |
1290 | #endif /* CONFIG_GUEST_PERF_EVENTS */ |
1291 | |
1292 | extern void perf_event_exec(void); |
1293 | extern void perf_event_comm(struct task_struct *tsk, bool exec); |
1294 | extern void perf_event_namespaces(struct task_struct *tsk); |
1295 | extern void perf_event_fork(struct task_struct *tsk); |
1296 | extern void perf_event_text_poke(const void *addr, |
1297 | const void *old_bytes, size_t old_len, |
1298 | const void *new_bytes, size_t new_len); |
1299 | |
1300 | /* Callchains */ |
1301 | DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); |
1302 | |
1303 | extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); |
1304 | extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); |
1305 | extern struct perf_callchain_entry * |
1306 | get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, |
1307 | u32 max_stack, bool crosstask, bool add_mark); |
1308 | extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs); |
1309 | extern int get_callchain_buffers(int max_stack); |
1310 | extern void put_callchain_buffers(void); |
1311 | extern struct perf_callchain_entry *get_callchain_entry(int *rctx); |
1312 | extern void put_callchain_entry(int rctx); |
1313 | |
1314 | extern int sysctl_perf_event_max_stack; |
1315 | extern int sysctl_perf_event_max_contexts_per_stack; |
1316 | |
1317 | static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip) |
1318 | { |
1319 | if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) { |
1320 | struct perf_callchain_entry *entry = ctx->entry; |
1321 | entry->ip[entry->nr++] = ip; |
1322 | ++ctx->contexts; |
1323 | return 0; |
1324 | } else { |
1325 | ctx->contexts_maxed = true; |
1326 | return -1; /* no more room, stop walking the stack */ |
1327 | } |
1328 | } |
1329 | |
1330 | static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) |
1331 | { |
1332 | if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) { |
1333 | struct perf_callchain_entry *entry = ctx->entry; |
1334 | entry->ip[entry->nr++] = ip; |
1335 | ++ctx->nr; |
1336 | return 0; |
1337 | } else { |
1338 | return -1; /* no more room, stop walking the stack */ |
1339 | } |
1340 | } |
1341 | |
1342 | extern int sysctl_perf_event_paranoid; |
1343 | extern int sysctl_perf_event_mlock; |
1344 | extern int sysctl_perf_event_sample_rate; |
1345 | extern int sysctl_perf_cpu_time_max_percent; |
1346 | |
1347 | extern void perf_sample_event_took(u64 sample_len_ns); |
1348 | |
1349 | int perf_proc_update_handler(struct ctl_table *table, int write, |
1350 | void *buffer, size_t *lenp, loff_t *ppos); |
1351 | int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, |
1352 | void *buffer, size_t *lenp, loff_t *ppos); |
1353 | int perf_event_max_stack_handler(struct ctl_table *table, int write, |
1354 | void *buffer, size_t *lenp, loff_t *ppos); |
1355 | |
1356 | /* Access to perf_event_open(2) syscall. */ |
1357 | #define PERF_SECURITY_OPEN 0 |
1358 | |
1359 | /* Finer grained perf_event_open(2) access control. */ |
1360 | #define PERF_SECURITY_CPU 1 |
1361 | #define PERF_SECURITY_KERNEL 2 |
1362 | #define PERF_SECURITY_TRACEPOINT 3 |
1363 | |
1364 | static inline int perf_is_paranoid(void) |
1365 | { |
1366 | return sysctl_perf_event_paranoid > -1; |
1367 | } |
1368 | |
1369 | static inline int perf_allow_kernel(struct perf_event_attr *attr) |
1370 | { |
1371 | if (sysctl_perf_event_paranoid > 1 && !perfmon_capable()) |
1372 | return -EACCES; |
1373 | |
1374 | return security_perf_event_open(attr, PERF_SECURITY_KERNEL); |
1375 | } |
1376 | |
1377 | static inline int perf_allow_cpu(struct perf_event_attr *attr) |
1378 | { |
1379 | if (sysctl_perf_event_paranoid > 0 && !perfmon_capable()) |
1380 | return -EACCES; |
1381 | |
1382 | return security_perf_event_open(attr, PERF_SECURITY_CPU); |
1383 | } |
1384 | |
1385 | static inline int perf_allow_tracepoint(struct perf_event_attr *attr) |
1386 | { |
1387 | if (sysctl_perf_event_paranoid > -1 && !perfmon_capable()) |
1388 | return -EPERM; |
1389 | |
1390 | return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); |
1391 | } |
1392 | |
1393 | extern void perf_event_init(void); |
1394 | extern void perf_tp_event(u16 event_type, u64 count, void *record, |
1395 | int entry_size, struct pt_regs *regs, |
1396 | struct hlist_head *head, int rctx, |
1397 | struct task_struct *task); |
1398 | extern void perf_bp_event(struct perf_event *event, void *data); |
1399 | |
1400 | #ifndef perf_misc_flags |
1401 | # define perf_misc_flags(regs) \ |
1402 | (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL) |
1403 | # define perf_instruction_pointer(regs) instruction_pointer(regs) |
1404 | #endif |
1405 | #ifndef perf_arch_bpf_user_pt_regs |
1406 | # define perf_arch_bpf_user_pt_regs(regs) regs |
1407 | #endif |
1408 | |
1409 | static inline bool has_branch_stack(struct perf_event *event) |
1410 | { |
1411 | return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; |
1412 | } |
1413 | |
1414 | static inline bool needs_branch_stack(struct perf_event *event) |
1415 | { |
1416 | return event->attr.branch_sample_type != 0; |
1417 | } |
1418 | |
1419 | static inline bool has_aux(struct perf_event *event) |
1420 | { |
1421 | return event->pmu->setup_aux; |
1422 | } |
1423 | |
1424 | static inline bool is_write_backward(struct perf_event *event) |
1425 | { |
1426 | return !!event->attr.write_backward; |
1427 | } |
1428 | |
1429 | static inline bool has_addr_filter(struct perf_event *event) |
1430 | { |
1431 | return event->pmu->nr_addr_filters; |
1432 | } |
1433 | |
1434 | /* |
1435 | * An inherited event uses parent's filters |
1436 | */ |
1437 | static inline struct perf_addr_filters_head * |
1438 | perf_event_addr_filters(struct perf_event *event) |
1439 | { |
1440 | struct perf_addr_filters_head *ifh = &event->addr_filters; |
1441 | |
1442 | if (event->parent) |
1443 | ifh = &event->parent->addr_filters; |
1444 | |
1445 | return ifh; |
1446 | } |
1447 | |
1448 | extern void perf_event_addr_filters_sync(struct perf_event *event); |
1449 | extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id); |
1450 | |
1451 | extern int perf_output_begin(struct perf_output_handle *handle, |
1452 | struct perf_sample_data *data, |
1453 | struct perf_event *event, unsigned int size); |
1454 | extern int perf_output_begin_forward(struct perf_output_handle *handle, |
1455 | struct perf_sample_data *data, |
1456 | struct perf_event *event, |
1457 | unsigned int size); |
1458 | extern int perf_output_begin_backward(struct perf_output_handle *handle, |
1459 | struct perf_sample_data *data, |
1460 | struct perf_event *event, |
1461 | unsigned int size); |
1462 | |
1463 | extern void perf_output_end(struct perf_output_handle *handle); |
1464 | extern unsigned int perf_output_copy(struct perf_output_handle *handle, |
1465 | const void *buf, unsigned int len); |
1466 | extern unsigned int perf_output_skip(struct perf_output_handle *handle, |
1467 | unsigned int len); |
1468 | extern long perf_output_copy_aux(struct perf_output_handle *aux_handle, |
1469 | struct perf_output_handle *handle, |
1470 | unsigned long from, unsigned long to); |
1471 | extern int perf_swevent_get_recursion_context(void); |
1472 | extern void perf_swevent_put_recursion_context(int rctx); |
1473 | extern u64 perf_swevent_set_period(struct perf_event *event); |
1474 | extern void perf_event_enable(struct perf_event *event); |
1475 | extern void perf_event_disable(struct perf_event *event); |
1476 | extern void perf_event_disable_local(struct perf_event *event); |
1477 | extern void perf_event_disable_inatomic(struct perf_event *event); |
1478 | extern void perf_event_task_tick(void); |
1479 | extern int perf_event_account_interrupt(struct perf_event *event); |
1480 | extern int perf_event_period(struct perf_event *event, u64 value); |
1481 | extern u64 perf_event_pause(struct perf_event *event, bool reset); |
1482 | #else /* !CONFIG_PERF_EVENTS: */ |
1483 | static inline void * |
1484 | perf_aux_output_begin(struct perf_output_handle *handle, |
1485 | struct perf_event *event) { return NULL; } |
1486 | static inline void |
1487 | perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) |
1488 | { } |
1489 | static inline int |
1490 | perf_aux_output_skip(struct perf_output_handle *handle, |
1491 | unsigned long size) { return -EINVAL; } |
1492 | static inline void * |
1493 | perf_get_aux(struct perf_output_handle *handle) { return NULL; } |
1494 | static inline void |
1495 | perf_event_task_migrate(struct task_struct *task) { } |
1496 | static inline void |
1497 | perf_event_task_sched_in(struct task_struct *prev, |
1498 | struct task_struct *task) { } |
1499 | static inline void |
1500 | perf_event_task_sched_out(struct task_struct *prev, |
1501 | struct task_struct *next) { } |
1502 | static inline int perf_event_init_task(struct task_struct *child, |
1503 | u64 clone_flags) { return 0; } |
1504 | static inline void perf_event_exit_task(struct task_struct *child) { } |
1505 | static inline void perf_event_free_task(struct task_struct *task) { } |
1506 | static inline void perf_event_delayed_put(struct task_struct *task) { } |
1507 | static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); } |
1508 | static inline const struct perf_event *perf_get_event(struct file *file) |
1509 | { |
1510 | return ERR_PTR(-EINVAL); |
1511 | } |
1512 | static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event) |
1513 | { |
1514 | return ERR_PTR(-EINVAL); |
1515 | } |
1516 | static inline int perf_event_read_local(struct perf_event *event, u64 *value, |
1517 | u64 *enabled, u64 *running) |
1518 | { |
1519 | return -EINVAL; |
1520 | } |
1521 | static inline void perf_event_print_debug(void) { } |
1522 | static inline int perf_event_task_disable(void) { return -EINVAL; } |
1523 | static inline int perf_event_task_enable(void) { return -EINVAL; } |
1524 | static inline int perf_event_refresh(struct perf_event *event, int refresh) |
1525 | { |
1526 | return -EINVAL; |
1527 | } |
1528 | |
1529 | static inline void |
1530 | perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } |
1531 | static inline void |
1532 | perf_bp_event(struct perf_event *event, void *data) { } |
1533 | |
1534 | static inline void perf_event_mmap(struct vm_area_struct *vma) { } |
1535 | |
1536 | typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); |
1537 | static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, |
1538 | bool unregister, const char *sym) { } |
1539 | static inline void perf_event_bpf_event(struct bpf_prog *prog, |
1540 | enum perf_bpf_event_type type, |
1541 | u16 flags) { } |
1542 | static inline void perf_event_exec(void) { } |
1543 | static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } |
1544 | static inline void perf_event_namespaces(struct task_struct *tsk) { } |
1545 | static inline void perf_event_fork(struct task_struct *tsk) { } |
1546 | static inline void perf_event_text_poke(const void *addr, |
1547 | const void *old_bytes, |
1548 | size_t old_len, |
1549 | const void *new_bytes, |
1550 | size_t new_len) { } |
1551 | static inline void perf_event_init(void) { } |
1552 | static inline int perf_swevent_get_recursion_context(void) { return -1; } |
1553 | static inline void perf_swevent_put_recursion_context(int rctx) { } |
1554 | static inline u64 perf_swevent_set_period(struct perf_event *event) { return 0; } |
1555 | static inline void perf_event_enable(struct perf_event *event) { } |
1556 | static inline void perf_event_disable(struct perf_event *event) { } |
1557 | static inline int __perf_event_disable(void *info) { return -1; } |
1558 | static inline void perf_event_task_tick(void) { } |
1559 | static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } |
1560 | static inline int perf_event_period(struct perf_event *event, u64 value) |
1561 | { |
1562 | return -EINVAL; |
1563 | } |
1564 | static inline u64 perf_event_pause(struct perf_event *event, bool reset) |
1565 | { |
1566 | return 0; |
1567 | } |
1568 | #endif |
1569 | |
1570 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) |
1571 | extern void perf_restore_debug_store(void); |
1572 | #else |
1573 | static inline void perf_restore_debug_store(void) { } |
1574 | #endif |
1575 | |
1576 | static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag) |
1577 | { |
1578 | return frag->pad < sizeof(u64); |
1579 | } |
1580 | |
1581 | #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) |
1582 | |
1583 | struct perf_pmu_events_attr { |
1584 | struct device_attribute attr; |
1585 | u64 id; |
1586 | const char *event_str; |
1587 | }; |
1588 | |
1589 | struct perf_pmu_events_ht_attr { |
1590 | struct device_attribute attr; |
1591 | u64 id; |
1592 | const char *event_str_ht; |
1593 | const char *event_str_noht; |
1594 | }; |
1595 | |
1596 | struct perf_pmu_events_hybrid_attr { |
1597 | struct device_attribute attr; |
1598 | u64 id; |
1599 | const char *event_str; |
1600 | u64 pmu_type; |
1601 | }; |
1602 | |
1603 | struct perf_pmu_format_hybrid_attr { |
1604 | struct device_attribute attr; |
1605 | u64 pmu_type; |
1606 | }; |
1607 | |
1608 | ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, |
1609 | char *page); |
1610 | |
1611 | #define PMU_EVENT_ATTR(_name, _var, _id, _show) \ |
1612 | static struct perf_pmu_events_attr _var = { \ |
1613 | .attr = __ATTR(_name, 0444, _show, NULL), \ |
1614 | .id = _id, \ |
1615 | }; |
1616 | |
1617 | #define PMU_EVENT_ATTR_STRING(_name, _var, _str) \ |
1618 | static struct perf_pmu_events_attr _var = { \ |
1619 | .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ |
1620 | .id = 0, \ |
1621 | .event_str = _str, \ |
1622 | }; |
1623 | |
1624 | #define PMU_EVENT_ATTR_ID(_name, _show, _id) \ |
1625 | (&((struct perf_pmu_events_attr[]) { \ |
1626 | { .attr = __ATTR(_name, 0444, _show, NULL), \ |
1627 | .id = _id, } \ |
1628 | })[0].attr.attr) |
1629 | |
1630 | #define PMU_FORMAT_ATTR(_name, _format) \ |
1631 | static ssize_t \ |
1632 | _name##_show(struct device *dev, \ |
1633 | struct device_attribute *attr, \ |
1634 | char *page) \ |
1635 | { \ |
1636 | BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ |
1637 | return sprintf(page, _format "\n"); \ |
1638 | } \ |
1639 | \ |
1640 | static struct device_attribute format_attr_##_name = __ATTR_RO(_name) |
1641 | |
1642 | /* Performance counter hotplug functions */ |
1643 | #ifdef CONFIG_PERF_EVENTS |
1644 | int perf_event_init_cpu(unsigned int cpu); |
1645 | int perf_event_exit_cpu(unsigned int cpu); |
1646 | #else |
1647 | #define perf_event_init_cpu NULL |
1648 | #define perf_event_exit_cpu NULL |
1649 | #endif |
1650 | |
1651 | extern void __weak arch_perf_update_userpage(struct perf_event *event, |
1652 | struct perf_event_mmap_page *userpg, |
1653 | u64 now); |
1654 | |
1655 | #ifdef CONFIG_MMU |
1656 | extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr); |
1657 | #endif |
1658 | |
1659 | /* |
1660 | * Snapshot branch stack on software events. |
1661 | * |
1662 | * Branch stack can be very useful in understanding software events. For |
1663 | * example, when a long function, e.g. sys_perf_event_open, returns an |
1664 | * errno, it is not obvious why the function failed. Branch stack could |
1665 | * provide very helpful information in this type of scenarios. |
1666 | * |
1667 | * On software event, it is necessary to stop the hardware branch recorder |
1668 | * fast. Otherwise, the hardware register/buffer will be flushed with |
1669 | * entries of the triggering event. Therefore, static call is used to |
1670 | * stop the hardware recorder. |
1671 | */ |
1672 | |
1673 | /* |
1674 | * cnt is the number of entries allocated for entries. |
1675 | * Return number of entries copied to . |
1676 | */ |
1677 | typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries, |
1678 | unsigned int cnt); |
1679 | DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t); |
1680 | |
1681 | #ifndef PERF_NEEDS_LOPWR_CB |
1682 | static inline void perf_lopwr_cb(bool mode) |
1683 | { |
1684 | } |
1685 | #endif |
1686 | |
1687 | #endif /* _LINUX_PERF_EVENT_H */ |
1688 | |