1 | /* |
---|---|
2 | * Performance events x86 architecture header |
3 | * |
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> |
5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar |
6 | * Copyright (C) 2009 Jaswinder Singh Rajput |
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter |
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra |
9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> |
10 | * Copyright (C) 2009 Google, Inc., Stephane Eranian |
11 | * |
12 | * For licencing details see kernel-base/COPYING |
13 | */ |
14 | |
15 | #include <linux/perf_event.h> |
16 | |
17 | #include <asm/fpu/xstate.h> |
18 | #include <asm/intel_ds.h> |
19 | #include <asm/cpu.h> |
20 | #include <asm/msr.h> |
21 | |
22 | /* To enable MSR tracing please use the generic trace points. */ |
23 | |
24 | /* |
25 | * | NHM/WSM | SNB | |
26 | * register ------------------------------- |
27 | * | HT | no HT | HT | no HT | |
28 | *----------------------------------------- |
29 | * offcore | core | core | cpu | core | |
30 | * lbr_sel | core | core | cpu | core | |
31 | * ld_lat | cpu | core | cpu | core | |
32 | *----------------------------------------- |
33 | * |
34 | * Given that there is a small number of shared regs, |
35 | * we can pre-allocate their slot in the per-cpu |
36 | * per-core reg tables. |
37 | */ |
38 | enum extra_reg_type { |
39 | EXTRA_REG_NONE = -1, /* not used */ |
40 | |
41 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ |
42 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ |
43 | EXTRA_REG_LBR = 2, /* lbr_select */ |
44 | EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ |
45 | EXTRA_REG_FE = 4, /* fe_* */ |
46 | EXTRA_REG_SNOOP_0 = 5, /* snoop response 0 */ |
47 | EXTRA_REG_SNOOP_1 = 6, /* snoop response 1 */ |
48 | |
49 | EXTRA_REG_MAX /* number of entries needed */ |
50 | }; |
51 | |
52 | struct event_constraint { |
53 | union { |
54 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
55 | u64 idxmsk64; |
56 | }; |
57 | u64 code; |
58 | u64 cmask; |
59 | int weight; |
60 | int overlap; |
61 | int flags; |
62 | unsigned int size; |
63 | }; |
64 | |
65 | static inline bool constraint_match(struct event_constraint *c, u64 ecode) |
66 | { |
67 | return ((ecode & c->cmask) - c->code) <= (u64)c->size; |
68 | } |
69 | |
70 | #define PERF_ARCH(name, val) \ |
71 | PERF_X86_EVENT_##name = val, |
72 | |
73 | /* |
74 | * struct hw_perf_event.flags flags |
75 | */ |
76 | enum { |
77 | #include "perf_event_flags.h" |
78 | }; |
79 | |
80 | #undef PERF_ARCH |
81 | |
82 | #define PERF_ARCH(name, val) \ |
83 | static_assert((PERF_X86_EVENT_##name & PERF_EVENT_FLAG_ARCH) == \ |
84 | PERF_X86_EVENT_##name); |
85 | |
86 | #include "perf_event_flags.h" |
87 | |
88 | #undef PERF_ARCH |
89 | |
90 | static inline bool is_topdown_count(struct perf_event *event) |
91 | { |
92 | return event->hw.flags & PERF_X86_EVENT_TOPDOWN; |
93 | } |
94 | |
95 | static inline bool is_metric_event(struct perf_event *event) |
96 | { |
97 | u64 config = event->attr.config; |
98 | |
99 | return ((config & ARCH_PERFMON_EVENTSEL_EVENT) == 0) && |
100 | ((config & INTEL_ARCH_EVENT_MASK) >= INTEL_TD_METRIC_RETIRING) && |
101 | ((config & INTEL_ARCH_EVENT_MASK) <= INTEL_TD_METRIC_MAX); |
102 | } |
103 | |
104 | static inline bool is_slots_event(struct perf_event *event) |
105 | { |
106 | return (event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_TD_SLOTS; |
107 | } |
108 | |
109 | static inline bool is_topdown_event(struct perf_event *event) |
110 | { |
111 | return is_metric_event(event) || is_slots_event(event); |
112 | } |
113 | |
114 | int is_x86_event(struct perf_event *event); |
115 | |
116 | static inline bool check_leader_group(struct perf_event *leader, int flags) |
117 | { |
118 | return is_x86_event(event: leader) ? !!(leader->hw.flags & flags) : false; |
119 | } |
120 | |
121 | static inline bool is_branch_counters_group(struct perf_event *event) |
122 | { |
123 | return check_leader_group(leader: event->group_leader, flags: PERF_X86_EVENT_BRANCH_COUNTERS); |
124 | } |
125 | |
126 | static inline bool is_pebs_counter_event_group(struct perf_event *event) |
127 | { |
128 | return check_leader_group(leader: event->group_leader, flags: PERF_X86_EVENT_PEBS_CNTR); |
129 | } |
130 | |
131 | static inline bool is_acr_event_group(struct perf_event *event) |
132 | { |
133 | return check_leader_group(leader: event->group_leader, flags: PERF_X86_EVENT_ACR); |
134 | } |
135 | |
136 | struct amd_nb { |
137 | int nb_id; /* NorthBridge id */ |
138 | int refcnt; /* reference count */ |
139 | struct perf_event *owners[X86_PMC_IDX_MAX]; |
140 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
141 | }; |
142 | |
143 | #define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) |
144 | #define PEBS_PMI_AFTER_EACH_RECORD BIT_ULL(60) |
145 | #define PEBS_OUTPUT_OFFSET 61 |
146 | #define PEBS_OUTPUT_MASK (3ull << PEBS_OUTPUT_OFFSET) |
147 | #define PEBS_OUTPUT_PT (1ull << PEBS_OUTPUT_OFFSET) |
148 | #define PEBS_VIA_PT_MASK (PEBS_OUTPUT_PT | PEBS_PMI_AFTER_EACH_RECORD) |
149 | |
150 | /* |
151 | * Flags PEBS can handle without an PMI. |
152 | * |
153 | * TID can only be handled by flushing at context switch. |
154 | * REGS_USER can be handled for events limited to ring 3. |
155 | * |
156 | */ |
157 | #define LARGE_PEBS_FLAGS \ |
158 | (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ |
159 | PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ |
160 | PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ |
161 | PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ |
162 | PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \ |
163 | PERF_SAMPLE_PERIOD | PERF_SAMPLE_CODE_PAGE_SIZE | \ |
164 | PERF_SAMPLE_WEIGHT_TYPE) |
165 | |
166 | #define PEBS_GP_REGS \ |
167 | ((1ULL << PERF_REG_X86_AX) | \ |
168 | (1ULL << PERF_REG_X86_BX) | \ |
169 | (1ULL << PERF_REG_X86_CX) | \ |
170 | (1ULL << PERF_REG_X86_DX) | \ |
171 | (1ULL << PERF_REG_X86_DI) | \ |
172 | (1ULL << PERF_REG_X86_SI) | \ |
173 | (1ULL << PERF_REG_X86_SP) | \ |
174 | (1ULL << PERF_REG_X86_BP) | \ |
175 | (1ULL << PERF_REG_X86_IP) | \ |
176 | (1ULL << PERF_REG_X86_FLAGS) | \ |
177 | (1ULL << PERF_REG_X86_R8) | \ |
178 | (1ULL << PERF_REG_X86_R9) | \ |
179 | (1ULL << PERF_REG_X86_R10) | \ |
180 | (1ULL << PERF_REG_X86_R11) | \ |
181 | (1ULL << PERF_REG_X86_R12) | \ |
182 | (1ULL << PERF_REG_X86_R13) | \ |
183 | (1ULL << PERF_REG_X86_R14) | \ |
184 | (1ULL << PERF_REG_X86_R15)) |
185 | |
186 | /* |
187 | * Per register state. |
188 | */ |
189 | struct er_account { |
190 | raw_spinlock_t lock; /* per-core: protect structure */ |
191 | u64 config; /* extra MSR config */ |
192 | u64 reg; /* extra MSR number */ |
193 | atomic_t ref; /* reference count */ |
194 | }; |
195 | |
196 | /* |
197 | * Per core/cpu state |
198 | * |
199 | * Used to coordinate shared registers between HT threads or |
200 | * among events on a single PMU. |
201 | */ |
202 | struct intel_shared_regs { |
203 | struct er_account regs[EXTRA_REG_MAX]; |
204 | int refcnt; /* per-core: #HT threads */ |
205 | unsigned core_id; /* per-core: core id */ |
206 | }; |
207 | |
208 | enum intel_excl_state_type { |
209 | INTEL_EXCL_UNUSED = 0, /* counter is unused */ |
210 | INTEL_EXCL_SHARED = 1, /* counter can be used by both threads */ |
211 | INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */ |
212 | }; |
213 | |
214 | struct intel_excl_states { |
215 | enum intel_excl_state_type state[X86_PMC_IDX_MAX]; |
216 | bool sched_started; /* true if scheduling has started */ |
217 | }; |
218 | |
219 | struct intel_excl_cntrs { |
220 | raw_spinlock_t lock; |
221 | |
222 | struct intel_excl_states states[2]; |
223 | |
224 | union { |
225 | u16 has_exclusive[2]; |
226 | u32 exclusive_present; |
227 | }; |
228 | |
229 | int refcnt; /* per-core: #HT threads */ |
230 | unsigned core_id; /* per-core: core id */ |
231 | }; |
232 | |
233 | struct x86_perf_task_context; |
234 | #define MAX_LBR_ENTRIES 32 |
235 | |
236 | enum { |
237 | LBR_FORMAT_32 = 0x00, |
238 | LBR_FORMAT_LIP = 0x01, |
239 | LBR_FORMAT_EIP = 0x02, |
240 | LBR_FORMAT_EIP_FLAGS = 0x03, |
241 | LBR_FORMAT_EIP_FLAGS2 = 0x04, |
242 | LBR_FORMAT_INFO = 0x05, |
243 | LBR_FORMAT_TIME = 0x06, |
244 | LBR_FORMAT_INFO2 = 0x07, |
245 | LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2, |
246 | }; |
247 | |
248 | enum { |
249 | X86_PERF_KFREE_SHARED = 0, |
250 | X86_PERF_KFREE_EXCL = 1, |
251 | X86_PERF_KFREE_MAX |
252 | }; |
253 | |
254 | struct cpu_hw_events { |
255 | /* |
256 | * Generic x86 PMC bits |
257 | */ |
258 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
259 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
260 | unsigned long dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
261 | int enabled; |
262 | |
263 | int n_events; /* the # of events in the below arrays */ |
264 | int n_added; /* the # last events in the below arrays; |
265 | they've never been enabled yet */ |
266 | int n_txn; /* the # last events in the below arrays; |
267 | added in the current transaction */ |
268 | int n_txn_pair; |
269 | int n_txn_metric; |
270 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
271 | u64 tags[X86_PMC_IDX_MAX]; |
272 | |
273 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
274 | struct event_constraint *event_constraint[X86_PMC_IDX_MAX]; |
275 | |
276 | int n_excl; /* the number of exclusive events */ |
277 | int n_late_setup; /* the num of events needs late setup */ |
278 | |
279 | unsigned int txn_flags; |
280 | int is_fake; |
281 | |
282 | /* |
283 | * Intel DebugStore bits |
284 | */ |
285 | struct debug_store *ds; |
286 | void *ds_pebs_vaddr; |
287 | void *ds_bts_vaddr; |
288 | u64 pebs_enabled; |
289 | int n_pebs; |
290 | int n_large_pebs; |
291 | int n_pebs_via_pt; |
292 | int pebs_output; |
293 | |
294 | /* Current super set of events hardware configuration */ |
295 | u64 pebs_data_cfg; |
296 | u64 active_pebs_data_cfg; |
297 | int pebs_record_size; |
298 | |
299 | /* Intel Fixed counter configuration */ |
300 | u64 fixed_ctrl_val; |
301 | u64 active_fixed_ctrl_val; |
302 | |
303 | /* Intel ACR configuration */ |
304 | u64 acr_cfg_b[X86_PMC_IDX_MAX]; |
305 | u64 acr_cfg_c[X86_PMC_IDX_MAX]; |
306 | |
307 | /* |
308 | * Intel LBR bits |
309 | */ |
310 | int lbr_users; |
311 | int lbr_pebs_users; |
312 | struct perf_branch_stack lbr_stack; |
313 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
314 | u64 lbr_counters[MAX_LBR_ENTRIES]; /* branch stack extra */ |
315 | union { |
316 | struct er_account *lbr_sel; |
317 | struct er_account *lbr_ctl; |
318 | }; |
319 | u64 br_sel; |
320 | void *last_task_ctx; |
321 | int last_log_id; |
322 | int lbr_select; |
323 | void *lbr_xsave; |
324 | |
325 | /* |
326 | * Intel host/guest exclude bits |
327 | */ |
328 | u64 intel_ctrl_guest_mask; |
329 | u64 intel_ctrl_host_mask; |
330 | struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; |
331 | |
332 | /* |
333 | * Intel checkpoint mask |
334 | */ |
335 | u64 intel_cp_status; |
336 | |
337 | /* |
338 | * manage shared (per-core, per-cpu) registers |
339 | * used on Intel NHM/WSM/SNB |
340 | */ |
341 | struct intel_shared_regs *shared_regs; |
342 | /* |
343 | * manage exclusive counter access between hyperthread |
344 | */ |
345 | struct event_constraint *constraint_list; /* in enable order */ |
346 | struct intel_excl_cntrs *excl_cntrs; |
347 | int excl_thread_id; /* 0 or 1 */ |
348 | |
349 | /* |
350 | * SKL TSX_FORCE_ABORT shadow |
351 | */ |
352 | u64 tfa_shadow; |
353 | |
354 | /* |
355 | * Perf Metrics |
356 | */ |
357 | /* number of accepted metrics events */ |
358 | int n_metric; |
359 | |
360 | /* |
361 | * AMD specific bits |
362 | */ |
363 | struct amd_nb *amd_nb; |
364 | int brs_active; /* BRS is enabled */ |
365 | |
366 | /* Inverted mask of bits to clear in the perf_ctr ctrl registers */ |
367 | u64 perf_ctr_virt_mask; |
368 | int n_pair; /* Large increment events */ |
369 | |
370 | void *kfree_on_online[X86_PERF_KFREE_MAX]; |
371 | |
372 | struct pmu *pmu; |
373 | }; |
374 | |
375 | #define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \ |
376 | { .idxmsk64 = (n) }, \ |
377 | .code = (c), \ |
378 | .size = (e) - (c), \ |
379 | .cmask = (m), \ |
380 | .weight = (w), \ |
381 | .overlap = (o), \ |
382 | .flags = f, \ |
383 | } |
384 | |
385 | #define __EVENT_CONSTRAINT(c, n, m, w, o, f) \ |
386 | __EVENT_CONSTRAINT_RANGE(c, c, n, m, w, o, f) |
387 | |
388 | #define EVENT_CONSTRAINT(c, n, m) \ |
389 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0) |
390 | |
391 | /* |
392 | * The constraint_match() function only works for 'simple' event codes |
393 | * and not for extended (AMD64_EVENTSEL_EVENT) events codes. |
394 | */ |
395 | #define EVENT_CONSTRAINT_RANGE(c, e, n, m) \ |
396 | __EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0) |
397 | |
398 | #define INTEL_EXCLEVT_CONSTRAINT(c, n) \ |
399 | __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\ |
400 | 0, PERF_X86_EVENT_EXCL) |
401 | |
402 | /* |
403 | * The overlap flag marks event constraints with overlapping counter |
404 | * masks. This is the case if the counter mask of such an event is not |
405 | * a subset of any other counter mask of a constraint with an equal or |
406 | * higher weight, e.g.: |
407 | * |
408 | * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); |
409 | * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); |
410 | * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); |
411 | * |
412 | * The event scheduler may not select the correct counter in the first |
413 | * cycle because it needs to know which subsequent events will be |
414 | * scheduled. It may fail to schedule the events then. So we set the |
415 | * overlap flag for such constraints to give the scheduler a hint which |
416 | * events to select for counter rescheduling. |
417 | * |
418 | * Care must be taken as the rescheduling algorithm is O(n!) which |
419 | * will increase scheduling cycles for an over-committed system |
420 | * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros |
421 | * and its counter masks must be kept at a minimum. |
422 | */ |
423 | #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ |
424 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0) |
425 | |
426 | /* |
427 | * Constraint on the Event code. |
428 | */ |
429 | #define INTEL_EVENT_CONSTRAINT(c, n) \ |
430 | EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) |
431 | |
432 | /* |
433 | * Constraint on a range of Event codes |
434 | */ |
435 | #define INTEL_EVENT_CONSTRAINT_RANGE(c, e, n) \ |
436 | EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT) |
437 | |
438 | /* |
439 | * Constraint on the Event code + UMask + fixed-mask |
440 | * |
441 | * filter mask to validate fixed counter events. |
442 | * the following filters disqualify for fixed counters: |
443 | * - inv |
444 | * - edge |
445 | * - cnt-mask |
446 | * - in_tx |
447 | * - in_tx_checkpointed |
448 | * The other filters are supported by fixed counters. |
449 | * The any-thread option is supported starting with v3. |
450 | */ |
451 | #define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED) |
452 | #define FIXED_EVENT_CONSTRAINT(c, n) \ |
453 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS) |
454 | |
455 | /* |
456 | * The special metric counters do not actually exist. They are calculated from |
457 | * the combination of the FxCtr3 + MSR_PERF_METRICS. |
458 | * |
459 | * The special metric counters are mapped to a dummy offset for the scheduler. |
460 | * The sharing between multiple users of the same metric without multiplexing |
461 | * is not allowed, even though the hardware supports that in principle. |
462 | */ |
463 | |
464 | #define METRIC_EVENT_CONSTRAINT(c, n) \ |
465 | EVENT_CONSTRAINT(c, (1ULL << (INTEL_PMC_IDX_METRIC_BASE + n)), \ |
466 | INTEL_ARCH_EVENT_MASK) |
467 | |
468 | /* |
469 | * Constraint on the Event code + UMask |
470 | */ |
471 | #define INTEL_UEVENT_CONSTRAINT(c, n) \ |
472 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) |
473 | |
474 | /* Constraint on specific umask bit only + event */ |
475 | #define INTEL_UBIT_EVENT_CONSTRAINT(c, n) \ |
476 | EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c)) |
477 | |
478 | /* Like UEVENT_CONSTRAINT, but match flags too */ |
479 | #define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \ |
480 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) |
481 | |
482 | #define INTEL_EXCLUEVT_CONSTRAINT(c, n) \ |
483 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ |
484 | HWEIGHT(n), 0, PERF_X86_EVENT_EXCL) |
485 | |
486 | #define INTEL_PLD_CONSTRAINT(c, n) \ |
487 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ |
488 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) |
489 | |
490 | #define INTEL_PSD_CONSTRAINT(c, n) \ |
491 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ |
492 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_STLAT) |
493 | |
494 | #define INTEL_PST_CONSTRAINT(c, n) \ |
495 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ |
496 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) |
497 | |
498 | #define INTEL_HYBRID_LAT_CONSTRAINT(c, n) \ |
499 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ |
500 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID) |
501 | |
502 | #define INTEL_HYBRID_LDLAT_CONSTRAINT(c, n) \ |
503 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ |
504 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_LD_HSW) |
505 | |
506 | #define INTEL_HYBRID_STLAT_CONSTRAINT(c, n) \ |
507 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ |
508 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_ST_HSW) |
509 | |
510 | /* Event constraint, but match on all event flags too. */ |
511 | #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ |
512 | EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) |
513 | |
514 | #define INTEL_FLAGS_EVENT_CONSTRAINT_RANGE(c, e, n) \ |
515 | EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) |
516 | |
517 | /* Check only flags, but allow all event/umask */ |
518 | #define INTEL_ALL_EVENT_CONSTRAINT(code, n) \ |
519 | EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS) |
520 | |
521 | /* Check flags and event code, and set the HSW store flag */ |
522 | #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \ |
523 | __EVENT_CONSTRAINT(code, n, \ |
524 | ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ |
525 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) |
526 | |
527 | /* Check flags and event code, and set the HSW load flag */ |
528 | #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \ |
529 | __EVENT_CONSTRAINT(code, n, \ |
530 | ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ |
531 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) |
532 | |
533 | #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(code, end, n) \ |
534 | __EVENT_CONSTRAINT_RANGE(code, end, n, \ |
535 | ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ |
536 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) |
537 | |
538 | #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \ |
539 | __EVENT_CONSTRAINT(code, n, \ |
540 | ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ |
541 | HWEIGHT(n), 0, \ |
542 | PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL) |
543 | |
544 | /* Check flags and event code/umask, and set the HSW store flag */ |
545 | #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \ |
546 | __EVENT_CONSTRAINT(code, n, \ |
547 | INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ |
548 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) |
549 | |
550 | #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \ |
551 | __EVENT_CONSTRAINT(code, n, \ |
552 | INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ |
553 | HWEIGHT(n), 0, \ |
554 | PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL) |
555 | |
556 | /* Check flags and event code/umask, and set the HSW load flag */ |
557 | #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \ |
558 | __EVENT_CONSTRAINT(code, n, \ |
559 | INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ |
560 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) |
561 | |
562 | #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \ |
563 | __EVENT_CONSTRAINT(code, n, \ |
564 | INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ |
565 | HWEIGHT(n), 0, \ |
566 | PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL) |
567 | |
568 | /* Check flags and event code/umask, and set the HSW N/A flag */ |
569 | #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \ |
570 | __EVENT_CONSTRAINT(code, n, \ |
571 | INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ |
572 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW) |
573 | |
574 | |
575 | /* |
576 | * We define the end marker as having a weight of -1 |
577 | * to enable blacklisting of events using a counter bitmask |
578 | * of zero and thus a weight of zero. |
579 | * The end marker has a weight that cannot possibly be |
580 | * obtained from counting the bits in the bitmask. |
581 | */ |
582 | #define EVENT_CONSTRAINT_END { .weight = -1 } |
583 | |
584 | /* |
585 | * Check for end marker with weight == -1 |
586 | */ |
587 | #define for_each_event_constraint(e, c) \ |
588 | for ((e) = (c); (e)->weight != -1; (e)++) |
589 | |
590 | /* |
591 | * Extra registers for specific events. |
592 | * |
593 | * Some events need large masks and require external MSRs. |
594 | * Those extra MSRs end up being shared for all events on |
595 | * a PMU and sometimes between PMU of sibling HT threads. |
596 | * In either case, the kernel needs to handle conflicting |
597 | * accesses to those extra, shared, regs. The data structure |
598 | * to manage those registers is stored in cpu_hw_event. |
599 | */ |
600 | struct extra_reg { |
601 | unsigned int event; |
602 | unsigned int msr; |
603 | u64 config_mask; |
604 | u64 valid_mask; |
605 | int idx; /* per_xxx->regs[] reg index */ |
606 | bool extra_msr_access; |
607 | }; |
608 | |
609 | #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ |
610 | .event = (e), \ |
611 | .msr = (ms), \ |
612 | .config_mask = (m), \ |
613 | .valid_mask = (vm), \ |
614 | .idx = EXTRA_REG_##i, \ |
615 | .extra_msr_access = true, \ |
616 | } |
617 | |
618 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ |
619 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) |
620 | |
621 | #define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \ |
622 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \ |
623 | ARCH_PERFMON_EVENTSEL_UMASK, vm, idx) |
624 | |
625 | #define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \ |
626 | INTEL_UEVENT_EXTRA_REG(c, \ |
627 | MSR_PEBS_LD_LAT_THRESHOLD, \ |
628 | 0xffff, \ |
629 | LDLAT) |
630 | |
631 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) |
632 | |
633 | union perf_capabilities { |
634 | struct { |
635 | u64 lbr_format:6; |
636 | u64 pebs_trap:1; |
637 | u64 pebs_arch_reg:1; |
638 | u64 pebs_format:4; |
639 | u64 smm_freeze:1; |
640 | /* |
641 | * PMU supports separate counter range for writing |
642 | * values > 32bit. |
643 | */ |
644 | u64 full_width_write:1; |
645 | u64 pebs_baseline:1; |
646 | u64 perf_metrics:1; |
647 | u64 pebs_output_pt_available:1; |
648 | u64 pebs_timing_info:1; |
649 | u64 anythread_deprecated:1; |
650 | u64 rdpmc_metrics_clear:1; |
651 | }; |
652 | u64 capabilities; |
653 | }; |
654 | |
655 | struct x86_pmu_quirk { |
656 | struct x86_pmu_quirk *next; |
657 | void (*func)(void); |
658 | }; |
659 | |
660 | union x86_pmu_config { |
661 | struct { |
662 | u64 event:8, |
663 | umask:8, |
664 | usr:1, |
665 | os:1, |
666 | edge:1, |
667 | pc:1, |
668 | interrupt:1, |
669 | __reserved1:1, |
670 | en:1, |
671 | inv:1, |
672 | cmask:8, |
673 | event2:4, |
674 | __reserved2:4, |
675 | go:1, |
676 | ho:1; |
677 | } bits; |
678 | u64 value; |
679 | }; |
680 | |
681 | #define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value |
682 | |
683 | enum { |
684 | x86_lbr_exclusive_lbr, |
685 | x86_lbr_exclusive_bts, |
686 | x86_lbr_exclusive_pt, |
687 | x86_lbr_exclusive_max, |
688 | }; |
689 | |
690 | #define PERF_PEBS_DATA_SOURCE_MAX 0x100 |
691 | #define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1) |
692 | #define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 |
693 | #define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) |
694 | |
695 | #define X86_HYBRID_PMU_ATOM_IDX 0 |
696 | #define X86_HYBRID_PMU_CORE_IDX 1 |
697 | #define X86_HYBRID_PMU_TINY_IDX 2 |
698 | |
699 | enum hybrid_pmu_type { |
700 | not_hybrid, |
701 | hybrid_small = BIT(X86_HYBRID_PMU_ATOM_IDX), |
702 | hybrid_big = BIT(X86_HYBRID_PMU_CORE_IDX), |
703 | hybrid_tiny = BIT(X86_HYBRID_PMU_TINY_IDX), |
704 | |
705 | /* The belows are only used for matching */ |
706 | hybrid_big_small = hybrid_big | hybrid_small, |
707 | hybrid_small_tiny = hybrid_small | hybrid_tiny, |
708 | hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny, |
709 | }; |
710 | |
711 | struct x86_hybrid_pmu { |
712 | struct pmu pmu; |
713 | const char *name; |
714 | enum hybrid_pmu_type pmu_type; |
715 | cpumask_t supported_cpus; |
716 | union perf_capabilities intel_cap; |
717 | u64 intel_ctrl; |
718 | u64 pebs_events_mask; |
719 | u64 config_mask; |
720 | union { |
721 | u64 cntr_mask64; |
722 | unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
723 | }; |
724 | union { |
725 | u64 fixed_cntr_mask64; |
726 | unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
727 | }; |
728 | |
729 | union { |
730 | u64 acr_cntr_mask64; |
731 | unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
732 | }; |
733 | union { |
734 | u64 acr_cause_mask64; |
735 | unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
736 | }; |
737 | struct event_constraint unconstrained; |
738 | |
739 | u64 hw_cache_event_ids |
740 | [PERF_COUNT_HW_CACHE_MAX] |
741 | [PERF_COUNT_HW_CACHE_OP_MAX] |
742 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
743 | u64 hw_cache_extra_regs |
744 | [PERF_COUNT_HW_CACHE_MAX] |
745 | [PERF_COUNT_HW_CACHE_OP_MAX] |
746 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
747 | struct event_constraint *event_constraints; |
748 | struct event_constraint *pebs_constraints; |
749 | struct extra_reg *extra_regs; |
750 | |
751 | unsigned int late_ack :1, |
752 | mid_ack :1, |
753 | enabled_ack :1; |
754 | |
755 | u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX]; |
756 | }; |
757 | |
758 | static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu) |
759 | { |
760 | return container_of(pmu, struct x86_hybrid_pmu, pmu); |
761 | } |
762 | |
763 | extern struct static_key_false perf_is_hybrid; |
764 | #define is_hybrid() static_branch_unlikely(&perf_is_hybrid) |
765 | |
766 | #define hybrid(_pmu, _field) \ |
767 | (*({ \ |
768 | typeof(&x86_pmu._field) __Fp = &x86_pmu._field; \ |
769 | \ |
770 | if (is_hybrid() && (_pmu)) \ |
771 | __Fp = &hybrid_pmu(_pmu)->_field; \ |
772 | \ |
773 | __Fp; \ |
774 | })) |
775 | |
776 | #define hybrid_var(_pmu, _var) \ |
777 | (*({ \ |
778 | typeof(&_var) __Fp = &_var; \ |
779 | \ |
780 | if (is_hybrid() && (_pmu)) \ |
781 | __Fp = &hybrid_pmu(_pmu)->_var; \ |
782 | \ |
783 | __Fp; \ |
784 | })) |
785 | |
786 | #define hybrid_bit(_pmu, _field) \ |
787 | ({ \ |
788 | bool __Fp = x86_pmu._field; \ |
789 | \ |
790 | if (is_hybrid() && (_pmu)) \ |
791 | __Fp = hybrid_pmu(_pmu)->_field; \ |
792 | \ |
793 | __Fp; \ |
794 | }) |
795 | |
796 | /* |
797 | * struct x86_pmu - generic x86 pmu |
798 | */ |
799 | struct x86_pmu { |
800 | /* |
801 | * Generic x86 PMC bits |
802 | */ |
803 | const char *name; |
804 | int version; |
805 | int (*handle_irq)(struct pt_regs *); |
806 | void (*disable_all)(void); |
807 | void (*enable_all)(int added); |
808 | void (*enable)(struct perf_event *); |
809 | void (*disable)(struct perf_event *); |
810 | void (*assign)(struct perf_event *event, int idx); |
811 | void (*add)(struct perf_event *); |
812 | void (*del)(struct perf_event *); |
813 | void (*read)(struct perf_event *event); |
814 | int (*set_period)(struct perf_event *event); |
815 | u64 (*update)(struct perf_event *event); |
816 | int (*hw_config)(struct perf_event *event); |
817 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); |
818 | void (*late_setup)(void); |
819 | void (*pebs_enable)(struct perf_event *event); |
820 | void (*pebs_disable)(struct perf_event *event); |
821 | void (*pebs_enable_all)(void); |
822 | void (*pebs_disable_all)(void); |
823 | unsigned eventsel; |
824 | unsigned perfctr; |
825 | unsigned fixedctr; |
826 | int (*addr_offset)(int index, bool eventsel); |
827 | int (*rdpmc_index)(int index); |
828 | u64 (*event_map)(int); |
829 | int max_events; |
830 | u64 config_mask; |
831 | union { |
832 | u64 cntr_mask64; |
833 | unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
834 | }; |
835 | union { |
836 | u64 fixed_cntr_mask64; |
837 | unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
838 | }; |
839 | union { |
840 | u64 acr_cntr_mask64; |
841 | unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
842 | }; |
843 | union { |
844 | u64 acr_cause_mask64; |
845 | unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
846 | }; |
847 | int cntval_bits; |
848 | u64 cntval_mask; |
849 | union { |
850 | unsigned long events_maskl; |
851 | unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)]; |
852 | }; |
853 | int events_mask_len; |
854 | int apic; |
855 | u64 max_period; |
856 | struct event_constraint * |
857 | (*get_event_constraints)(struct cpu_hw_events *cpuc, |
858 | int idx, |
859 | struct perf_event *event); |
860 | |
861 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
862 | struct perf_event *event); |
863 | |
864 | void (*start_scheduling)(struct cpu_hw_events *cpuc); |
865 | |
866 | void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr); |
867 | |
868 | void (*stop_scheduling)(struct cpu_hw_events *cpuc); |
869 | |
870 | struct event_constraint *event_constraints; |
871 | struct x86_pmu_quirk *quirks; |
872 | void (*limit_period)(struct perf_event *event, s64 *l); |
873 | |
874 | /* PMI handler bits */ |
875 | unsigned int late_ack :1, |
876 | mid_ack :1, |
877 | enabled_ack :1; |
878 | /* |
879 | * sysfs attrs |
880 | */ |
881 | int attr_rdpmc_broken; |
882 | int attr_rdpmc; |
883 | struct attribute **format_attrs; |
884 | |
885 | ssize_t (*events_sysfs_show)(char *page, u64 config); |
886 | const struct attribute_group **attr_update; |
887 | |
888 | unsigned long attr_freeze_on_smi; |
889 | |
890 | /* |
891 | * CPU Hotplug hooks |
892 | */ |
893 | int (*cpu_prepare)(int cpu); |
894 | void (*cpu_starting)(int cpu); |
895 | void (*cpu_dying)(int cpu); |
896 | void (*cpu_dead)(int cpu); |
897 | |
898 | void (*check_microcode)(void); |
899 | void (*sched_task)(struct perf_event_pmu_context *pmu_ctx, |
900 | struct task_struct *task, bool sched_in); |
901 | |
902 | /* |
903 | * Intel Arch Perfmon v2+ |
904 | */ |
905 | u64 intel_ctrl; |
906 | union perf_capabilities intel_cap; |
907 | |
908 | /* |
909 | * Intel DebugStore bits |
910 | */ |
911 | unsigned int bts :1, |
912 | bts_active :1, |
913 | ds_pebs :1, |
914 | pebs_active :1, |
915 | pebs_broken :1, |
916 | pebs_prec_dist :1, |
917 | pebs_no_tlb :1, |
918 | pebs_no_isolation :1, |
919 | pebs_block :1, |
920 | pebs_ept :1; |
921 | int pebs_record_size; |
922 | int pebs_buffer_size; |
923 | u64 pebs_events_mask; |
924 | void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data); |
925 | struct event_constraint *pebs_constraints; |
926 | void (*pebs_aliases)(struct perf_event *event); |
927 | u64 (*pebs_latency_data)(struct perf_event *event, u64 status); |
928 | unsigned long large_pebs_flags; |
929 | u64 rtm_abort_event; |
930 | u64 pebs_capable; |
931 | |
932 | /* |
933 | * Intel LBR |
934 | */ |
935 | unsigned int lbr_tos, lbr_from, lbr_to, |
936 | lbr_info, lbr_nr; /* LBR base regs and size */ |
937 | union { |
938 | u64 lbr_sel_mask; /* LBR_SELECT valid bits */ |
939 | u64 lbr_ctl_mask; /* LBR_CTL valid bits */ |
940 | }; |
941 | union { |
942 | const int *lbr_sel_map; /* lbr_select mappings */ |
943 | int *lbr_ctl_map; /* LBR_CTL mappings */ |
944 | }; |
945 | u64 lbr_callstack_users; /* lbr callstack system wide users */ |
946 | bool lbr_double_abort; /* duplicated lbr aborts */ |
947 | bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ |
948 | |
949 | unsigned int lbr_has_info:1; |
950 | unsigned int lbr_has_tsx:1; |
951 | unsigned int lbr_from_flags:1; |
952 | unsigned int lbr_to_cycles:1; |
953 | |
954 | /* |
955 | * Intel Architectural LBR CPUID Enumeration |
956 | */ |
957 | unsigned int lbr_depth_mask:8; |
958 | unsigned int lbr_deep_c_reset:1; |
959 | unsigned int lbr_lip:1; |
960 | unsigned int lbr_cpl:1; |
961 | unsigned int lbr_filter:1; |
962 | unsigned int lbr_call_stack:1; |
963 | unsigned int lbr_mispred:1; |
964 | unsigned int lbr_timed_lbr:1; |
965 | unsigned int lbr_br_type:1; |
966 | unsigned int lbr_counters:4; |
967 | |
968 | void (*lbr_reset)(void); |
969 | void (*lbr_read)(struct cpu_hw_events *cpuc); |
970 | void (*lbr_save)(void *ctx); |
971 | void (*lbr_restore)(void *ctx); |
972 | |
973 | /* |
974 | * Intel PT/LBR/BTS are exclusive |
975 | */ |
976 | atomic_t lbr_exclusive[x86_lbr_exclusive_max]; |
977 | |
978 | /* |
979 | * Intel perf metrics |
980 | */ |
981 | int num_topdown_events; |
982 | |
983 | /* |
984 | * AMD bits |
985 | */ |
986 | unsigned int amd_nb_constraints : 1; |
987 | u64 perf_ctr_pair_en; |
988 | |
989 | /* |
990 | * Extra registers for events |
991 | */ |
992 | struct extra_reg *extra_regs; |
993 | unsigned int flags; |
994 | |
995 | /* |
996 | * Intel host/guest support (KVM) |
997 | */ |
998 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr, void *data); |
999 | |
1000 | /* |
1001 | * Check period value for PERF_EVENT_IOC_PERIOD ioctl. |
1002 | */ |
1003 | int (*check_period) (struct perf_event *event, u64 period); |
1004 | |
1005 | int (*aux_output_match) (struct perf_event *event); |
1006 | |
1007 | void (*filter)(struct pmu *pmu, int cpu, bool *ret); |
1008 | /* |
1009 | * Hybrid support |
1010 | * |
1011 | * Most PMU capabilities are the same among different hybrid PMUs. |
1012 | * The global x86_pmu saves the architecture capabilities, which |
1013 | * are available for all PMUs. The hybrid_pmu only includes the |
1014 | * unique capabilities. |
1015 | */ |
1016 | int num_hybrid_pmus; |
1017 | struct x86_hybrid_pmu *hybrid_pmu; |
1018 | enum intel_cpu_type (*get_hybrid_cpu_type) (void); |
1019 | }; |
1020 | |
1021 | struct x86_perf_task_context_opt { |
1022 | int lbr_callstack_users; |
1023 | int lbr_stack_state; |
1024 | int log_id; |
1025 | }; |
1026 | |
1027 | struct x86_perf_task_context { |
1028 | u64 lbr_sel; |
1029 | int tos; |
1030 | int valid_lbrs; |
1031 | struct x86_perf_task_context_opt opt; |
1032 | struct lbr_entry lbr[MAX_LBR_ENTRIES]; |
1033 | }; |
1034 | |
1035 | struct x86_perf_task_context_arch_lbr { |
1036 | struct x86_perf_task_context_opt opt; |
1037 | struct lbr_entry entries[]; |
1038 | }; |
1039 | |
1040 | /* |
1041 | * Add padding to guarantee the 64-byte alignment of the state buffer. |
1042 | * |
1043 | * The structure is dynamically allocated. The size of the LBR state may vary |
1044 | * based on the number of LBR registers. |
1045 | * |
1046 | * Do not put anything after the LBR state. |
1047 | */ |
1048 | struct x86_perf_task_context_arch_lbr_xsave { |
1049 | struct x86_perf_task_context_opt opt; |
1050 | |
1051 | union { |
1052 | struct xregs_state xsave; |
1053 | struct { |
1054 | struct fxregs_state i387; |
1055 | struct xstate_header header; |
1056 | struct arch_lbr_state lbr; |
1057 | } __attribute__ ((packed, aligned (XSAVE_ALIGNMENT))); |
1058 | }; |
1059 | }; |
1060 | |
1061 | #define x86_add_quirk(func_) \ |
1062 | do { \ |
1063 | static struct x86_pmu_quirk __quirk __initdata = { \ |
1064 | .func = func_, \ |
1065 | }; \ |
1066 | __quirk.next = x86_pmu.quirks; \ |
1067 | x86_pmu.quirks = &__quirk; \ |
1068 | } while (0) |
1069 | |
1070 | /* |
1071 | * x86_pmu flags |
1072 | */ |
1073 | #define PMU_FL_NO_HT_SHARING 0x1 /* no hyper-threading resource sharing */ |
1074 | #define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */ |
1075 | #define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */ |
1076 | #define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ |
1077 | #define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ |
1078 | #define PMU_FL_TFA 0x20 /* deal with TSX force abort */ |
1079 | #define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ |
1080 | #define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */ |
1081 | #define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ |
1082 | #define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */ |
1083 | #define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */ |
1084 | #define PMU_FL_DYN_CONSTRAINT 0x800 /* Needs dynamic constraint */ |
1085 | |
1086 | #define EVENT_VAR(_id) event_attr_##_id |
1087 | #define EVENT_PTR(_id) &event_attr_##_id.attr.attr |
1088 | |
1089 | #define EVENT_ATTR(_name, _id) \ |
1090 | static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ |
1091 | .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ |
1092 | .id = PERF_COUNT_HW_##_id, \ |
1093 | .event_str = NULL, \ |
1094 | }; |
1095 | |
1096 | #define EVENT_ATTR_STR(_name, v, str) \ |
1097 | static struct perf_pmu_events_attr event_attr_##v = { \ |
1098 | .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ |
1099 | .id = 0, \ |
1100 | .event_str = str, \ |
1101 | }; |
1102 | |
1103 | #define EVENT_ATTR_STR_HT(_name, v, noht, ht) \ |
1104 | static struct perf_pmu_events_ht_attr event_attr_##v = { \ |
1105 | .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\ |
1106 | .id = 0, \ |
1107 | .event_str_noht = noht, \ |
1108 | .event_str_ht = ht, \ |
1109 | } |
1110 | |
1111 | #define EVENT_ATTR_STR_HYBRID(_name, v, str, _pmu) \ |
1112 | static struct perf_pmu_events_hybrid_attr event_attr_##v = { \ |
1113 | .attr = __ATTR(_name, 0444, events_hybrid_sysfs_show, NULL),\ |
1114 | .id = 0, \ |
1115 | .event_str = str, \ |
1116 | .pmu_type = _pmu, \ |
1117 | } |
1118 | |
1119 | #define FORMAT_HYBRID_PTR(_id) (&format_attr_hybrid_##_id.attr.attr) |
1120 | |
1121 | #define FORMAT_ATTR_HYBRID(_name, _pmu) \ |
1122 | static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\ |
1123 | .attr = __ATTR_RO(_name), \ |
1124 | .pmu_type = _pmu, \ |
1125 | } |
1126 | |
1127 | int is_x86_event(struct perf_event *event); |
1128 | struct pmu *x86_get_pmu(unsigned int cpu); |
1129 | extern struct x86_pmu x86_pmu __read_mostly; |
1130 | |
1131 | DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period); |
1132 | DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update); |
1133 | DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); |
1134 | DECLARE_STATIC_CALL(x86_pmu_late_setup, *x86_pmu.late_setup); |
1135 | DECLARE_STATIC_CALL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable); |
1136 | DECLARE_STATIC_CALL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable); |
1137 | DECLARE_STATIC_CALL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all); |
1138 | DECLARE_STATIC_CALL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all); |
1139 | |
1140 | static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) |
1141 | { |
1142 | if (static_cpu_has(X86_FEATURE_ARCH_LBR)) |
1143 | return &((struct x86_perf_task_context_arch_lbr *)ctx)->opt; |
1144 | |
1145 | return &((struct x86_perf_task_context *)ctx)->opt; |
1146 | } |
1147 | |
1148 | static inline bool x86_pmu_has_lbr_callstack(void) |
1149 | { |
1150 | return x86_pmu.lbr_sel_map && |
1151 | x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0; |
1152 | } |
1153 | |
1154 | DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); |
1155 | DECLARE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
1156 | |
1157 | int x86_perf_event_set_period(struct perf_event *event); |
1158 | |
1159 | /* |
1160 | * Generalized hw caching related hw_event table, filled |
1161 | * in on a per model basis. A value of 0 means |
1162 | * 'not supported', -1 means 'hw_event makes no sense on |
1163 | * this CPU', any other value means the raw hw_event |
1164 | * ID. |
1165 | */ |
1166 | |
1167 | #define C(x) PERF_COUNT_HW_CACHE_##x |
1168 | |
1169 | extern u64 __read_mostly hw_cache_event_ids |
1170 | [PERF_COUNT_HW_CACHE_MAX] |
1171 | [PERF_COUNT_HW_CACHE_OP_MAX] |
1172 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
1173 | extern u64 __read_mostly hw_cache_extra_regs |
1174 | [PERF_COUNT_HW_CACHE_MAX] |
1175 | [PERF_COUNT_HW_CACHE_OP_MAX] |
1176 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
1177 | |
1178 | u64 x86_perf_event_update(struct perf_event *event); |
1179 | |
1180 | static inline u64 intel_pmu_topdown_event_update(struct perf_event *event, u64 *val) |
1181 | { |
1182 | return x86_perf_event_update(event); |
1183 | } |
1184 | DECLARE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update); |
1185 | |
1186 | static inline unsigned int x86_pmu_config_addr(int index) |
1187 | { |
1188 | return x86_pmu.eventsel + (x86_pmu.addr_offset ? |
1189 | x86_pmu.addr_offset(index, true) : index); |
1190 | } |
1191 | |
1192 | static inline unsigned int x86_pmu_event_addr(int index) |
1193 | { |
1194 | return x86_pmu.perfctr + (x86_pmu.addr_offset ? |
1195 | x86_pmu.addr_offset(index, false) : index); |
1196 | } |
1197 | |
1198 | static inline unsigned int x86_pmu_fixed_ctr_addr(int index) |
1199 | { |
1200 | return x86_pmu.fixedctr + (x86_pmu.addr_offset ? |
1201 | x86_pmu.addr_offset(index, false) : index); |
1202 | } |
1203 | |
1204 | static inline int x86_pmu_rdpmc_index(int index) |
1205 | { |
1206 | return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; |
1207 | } |
1208 | |
1209 | bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask, |
1210 | unsigned long *fixed_cntr_mask); |
1211 | |
1212 | int x86_add_exclusive(unsigned int what); |
1213 | |
1214 | void x86_del_exclusive(unsigned int what); |
1215 | |
1216 | int x86_reserve_hardware(void); |
1217 | |
1218 | void x86_release_hardware(void); |
1219 | |
1220 | int x86_pmu_max_precise(void); |
1221 | |
1222 | void hw_perf_lbr_event_destroy(struct perf_event *event); |
1223 | |
1224 | int x86_setup_perfctr(struct perf_event *event); |
1225 | |
1226 | int x86_pmu_hw_config(struct perf_event *event); |
1227 | |
1228 | void x86_pmu_disable_all(void); |
1229 | |
1230 | static inline bool has_amd_brs(struct hw_perf_event *hwc) |
1231 | { |
1232 | return hwc->flags & PERF_X86_EVENT_AMD_BRS; |
1233 | } |
1234 | |
1235 | static inline bool is_counter_pair(struct hw_perf_event *hwc) |
1236 | { |
1237 | return hwc->flags & PERF_X86_EVENT_PAIR; |
1238 | } |
1239 | |
1240 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
1241 | u64 enable_mask) |
1242 | { |
1243 | u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); |
1244 | |
1245 | if (hwc->extra_reg.reg) |
1246 | wrmsrq(msr: hwc->extra_reg.reg, val: hwc->extra_reg.config); |
1247 | |
1248 | /* |
1249 | * Add enabled Merge event on next counter |
1250 | * if large increment event being enabled on this counter |
1251 | */ |
1252 | if (is_counter_pair(hwc)) |
1253 | wrmsrq(msr: x86_pmu_config_addr(index: hwc->idx + 1), val: x86_pmu.perf_ctr_pair_en); |
1254 | |
1255 | wrmsrq(msr: hwc->config_base, val: (hwc->config | enable_mask) & ~disable_mask); |
1256 | } |
1257 | |
1258 | void x86_pmu_enable_all(int added); |
1259 | |
1260 | int perf_assign_events(struct event_constraint **constraints, int n, |
1261 | int wmin, int wmax, int gpmax, int *assign); |
1262 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); |
1263 | |
1264 | void x86_pmu_stop(struct perf_event *event, int flags); |
1265 | |
1266 | static inline void x86_pmu_disable_event(struct perf_event *event) |
1267 | { |
1268 | u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); |
1269 | struct hw_perf_event *hwc = &event->hw; |
1270 | |
1271 | wrmsrq(msr: hwc->config_base, val: hwc->config & ~disable_mask); |
1272 | |
1273 | if (is_counter_pair(hwc)) |
1274 | wrmsrq(msr: x86_pmu_config_addr(index: hwc->idx + 1), val: 0); |
1275 | } |
1276 | |
1277 | void x86_pmu_enable_event(struct perf_event *event); |
1278 | |
1279 | int x86_pmu_handle_irq(struct pt_regs *regs); |
1280 | |
1281 | void x86_pmu_show_pmu_cap(struct pmu *pmu); |
1282 | |
1283 | static inline int x86_pmu_num_counters(struct pmu *pmu) |
1284 | { |
1285 | return hweight64(hybrid(pmu, cntr_mask64)); |
1286 | } |
1287 | |
1288 | static inline int x86_pmu_max_num_counters(struct pmu *pmu) |
1289 | { |
1290 | return fls64(hybrid(pmu, cntr_mask64)); |
1291 | } |
1292 | |
1293 | static inline int x86_pmu_num_counters_fixed(struct pmu *pmu) |
1294 | { |
1295 | return hweight64(hybrid(pmu, fixed_cntr_mask64)); |
1296 | } |
1297 | |
1298 | static inline int x86_pmu_max_num_counters_fixed(struct pmu *pmu) |
1299 | { |
1300 | return fls64(hybrid(pmu, fixed_cntr_mask64)); |
1301 | } |
1302 | |
1303 | static inline u64 x86_pmu_get_event_config(struct perf_event *event) |
1304 | { |
1305 | return event->attr.config & hybrid(event->pmu, config_mask); |
1306 | } |
1307 | |
1308 | extern struct event_constraint emptyconstraint; |
1309 | |
1310 | extern struct event_constraint unconstrained; |
1311 | |
1312 | static inline bool kernel_ip(unsigned long ip) |
1313 | { |
1314 | #ifdef CONFIG_X86_32 |
1315 | return ip > PAGE_OFFSET; |
1316 | #else |
1317 | return (long)ip < 0; |
1318 | #endif |
1319 | } |
1320 | |
1321 | /* |
1322 | * Not all PMUs provide the right context information to place the reported IP |
1323 | * into full context. Specifically segment registers are typically not |
1324 | * supplied. |
1325 | * |
1326 | * Assuming the address is a linear address (it is for IBS), we fake the CS and |
1327 | * vm86 mode using the known zero-based code segment and 'fix up' the registers |
1328 | * to reflect this. |
1329 | * |
1330 | * Intel PEBS/LBR appear to typically provide the effective address, nothing |
1331 | * much we can do about that but pray and treat it like a linear address. |
1332 | */ |
1333 | static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) |
1334 | { |
1335 | regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS; |
1336 | if (regs->flags & X86_VM_MASK) |
1337 | regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK); |
1338 | regs->ip = ip; |
1339 | } |
1340 | |
1341 | /* |
1342 | * x86control flow change classification |
1343 | * x86control flow changes include branches, interrupts, traps, faults |
1344 | */ |
1345 | enum { |
1346 | X86_BR_NONE = 0, /* unknown */ |
1347 | |
1348 | X86_BR_USER = 1 << 0, /* branch target is user */ |
1349 | X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ |
1350 | |
1351 | X86_BR_CALL = 1 << 2, /* call */ |
1352 | X86_BR_RET = 1 << 3, /* return */ |
1353 | X86_BR_SYSCALL = 1 << 4, /* syscall */ |
1354 | X86_BR_SYSRET = 1 << 5, /* syscall return */ |
1355 | X86_BR_INT = 1 << 6, /* sw interrupt */ |
1356 | X86_BR_IRET = 1 << 7, /* return from interrupt */ |
1357 | X86_BR_JCC = 1 << 8, /* conditional */ |
1358 | X86_BR_JMP = 1 << 9, /* jump */ |
1359 | X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ |
1360 | X86_BR_IND_CALL = 1 << 11,/* indirect calls */ |
1361 | X86_BR_ABORT = 1 << 12,/* transaction abort */ |
1362 | X86_BR_IN_TX = 1 << 13,/* in transaction */ |
1363 | X86_BR_NO_TX = 1 << 14,/* not in transaction */ |
1364 | X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ |
1365 | X86_BR_CALL_STACK = 1 << 16,/* call stack */ |
1366 | X86_BR_IND_JMP = 1 << 17,/* indirect jump */ |
1367 | |
1368 | X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */ |
1369 | |
1370 | }; |
1371 | |
1372 | #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) |
1373 | #define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX) |
1374 | |
1375 | #define X86_BR_ANY \ |
1376 | (X86_BR_CALL |\ |
1377 | X86_BR_RET |\ |
1378 | X86_BR_SYSCALL |\ |
1379 | X86_BR_SYSRET |\ |
1380 | X86_BR_INT |\ |
1381 | X86_BR_IRET |\ |
1382 | X86_BR_JCC |\ |
1383 | X86_BR_JMP |\ |
1384 | X86_BR_IRQ |\ |
1385 | X86_BR_ABORT |\ |
1386 | X86_BR_IND_CALL |\ |
1387 | X86_BR_IND_JMP |\ |
1388 | X86_BR_ZERO_CALL) |
1389 | |
1390 | #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) |
1391 | |
1392 | #define X86_BR_ANY_CALL \ |
1393 | (X86_BR_CALL |\ |
1394 | X86_BR_IND_CALL |\ |
1395 | X86_BR_ZERO_CALL |\ |
1396 | X86_BR_SYSCALL |\ |
1397 | X86_BR_IRQ |\ |
1398 | X86_BR_INT) |
1399 | |
1400 | int common_branch_type(int type); |
1401 | int branch_type(unsigned long from, unsigned long to, int abort); |
1402 | int branch_type_fused(unsigned long from, unsigned long to, int abort, |
1403 | int *offset); |
1404 | |
1405 | ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); |
1406 | ssize_t intel_event_sysfs_show(char *page, u64 config); |
1407 | |
1408 | ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, |
1409 | char *page); |
1410 | ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, |
1411 | char *page); |
1412 | ssize_t events_hybrid_sysfs_show(struct device *dev, |
1413 | struct device_attribute *attr, |
1414 | char *page); |
1415 | |
1416 | static inline bool fixed_counter_disabled(int i, struct pmu *pmu) |
1417 | { |
1418 | u64 intel_ctrl = hybrid(pmu, intel_ctrl); |
1419 | |
1420 | return !(intel_ctrl >> (i + INTEL_PMC_IDX_FIXED)); |
1421 | } |
1422 | |
1423 | #ifdef CONFIG_CPU_SUP_AMD |
1424 | |
1425 | int amd_pmu_init(void); |
1426 | |
1427 | int amd_pmu_lbr_init(void); |
1428 | void amd_pmu_lbr_reset(void); |
1429 | void amd_pmu_lbr_read(void); |
1430 | void amd_pmu_lbr_add(struct perf_event *event); |
1431 | void amd_pmu_lbr_del(struct perf_event *event); |
1432 | void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, |
1433 | struct task_struct *task, bool sched_in); |
1434 | void amd_pmu_lbr_enable_all(void); |
1435 | void amd_pmu_lbr_disable_all(void); |
1436 | int amd_pmu_lbr_hw_config(struct perf_event *event); |
1437 | |
1438 | static __always_inline void __amd_pmu_lbr_disable(void) |
1439 | { |
1440 | u64 dbg_ctl, dbg_extn_cfg; |
1441 | |
1442 | rdmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); |
1443 | wrmsrq(MSR_AMD_DBG_EXTN_CFG, val: dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); |
1444 | |
1445 | if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { |
1446 | rdmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl); |
1447 | wrmsrq(MSR_IA32_DEBUGCTLMSR, val: dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); |
1448 | } |
1449 | } |
1450 | |
1451 | #ifdef CONFIG_PERF_EVENTS_AMD_BRS |
1452 | |
1453 | #define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */ |
1454 | |
1455 | int amd_brs_init(void); |
1456 | void amd_brs_disable(void); |
1457 | void amd_brs_enable(void); |
1458 | void amd_brs_enable_all(void); |
1459 | void amd_brs_disable_all(void); |
1460 | void amd_brs_drain(void); |
1461 | void amd_brs_lopwr_init(void); |
1462 | int amd_brs_hw_config(struct perf_event *event); |
1463 | void amd_brs_reset(void); |
1464 | |
1465 | static inline void amd_pmu_brs_add(struct perf_event *event) |
1466 | { |
1467 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
1468 | |
1469 | perf_sched_cb_inc(pmu: event->pmu); |
1470 | cpuc->lbr_users++; |
1471 | /* |
1472 | * No need to reset BRS because it is reset |
1473 | * on brs_enable() and it is saturating |
1474 | */ |
1475 | } |
1476 | |
1477 | static inline void amd_pmu_brs_del(struct perf_event *event) |
1478 | { |
1479 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
1480 | |
1481 | cpuc->lbr_users--; |
1482 | WARN_ON_ONCE(cpuc->lbr_users < 0); |
1483 | |
1484 | perf_sched_cb_dec(pmu: event->pmu); |
1485 | } |
1486 | |
1487 | void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, |
1488 | struct task_struct *task, bool sched_in); |
1489 | #else |
1490 | static inline int amd_brs_init(void) |
1491 | { |
1492 | return 0; |
1493 | } |
1494 | static inline void amd_brs_disable(void) {} |
1495 | static inline void amd_brs_enable(void) {} |
1496 | static inline void amd_brs_drain(void) {} |
1497 | static inline void amd_brs_lopwr_init(void) {} |
1498 | static inline void amd_brs_disable_all(void) {} |
1499 | static inline int amd_brs_hw_config(struct perf_event *event) |
1500 | { |
1501 | return 0; |
1502 | } |
1503 | static inline void amd_brs_reset(void) {} |
1504 | |
1505 | static inline void amd_pmu_brs_add(struct perf_event *event) |
1506 | { |
1507 | } |
1508 | |
1509 | static inline void amd_pmu_brs_del(struct perf_event *event) |
1510 | { |
1511 | } |
1512 | |
1513 | static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, |
1514 | struct task_struct *task, bool sched_in) |
1515 | { |
1516 | } |
1517 | |
1518 | static inline void amd_brs_enable_all(void) |
1519 | { |
1520 | } |
1521 | |
1522 | #endif |
1523 | |
1524 | #else /* CONFIG_CPU_SUP_AMD */ |
1525 | |
1526 | static inline int amd_pmu_init(void) |
1527 | { |
1528 | return 0; |
1529 | } |
1530 | |
1531 | static inline int amd_brs_init(void) |
1532 | { |
1533 | return -EOPNOTSUPP; |
1534 | } |
1535 | |
1536 | static inline void amd_brs_drain(void) |
1537 | { |
1538 | } |
1539 | |
1540 | static inline void amd_brs_enable_all(void) |
1541 | { |
1542 | } |
1543 | |
1544 | static inline void amd_brs_disable_all(void) |
1545 | { |
1546 | } |
1547 | #endif /* CONFIG_CPU_SUP_AMD */ |
1548 | |
1549 | static inline int is_pebs_pt(struct perf_event *event) |
1550 | { |
1551 | return !!(event->hw.flags & PERF_X86_EVENT_PEBS_VIA_PT); |
1552 | } |
1553 | |
1554 | #ifdef CONFIG_CPU_SUP_INTEL |
1555 | |
1556 | static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period) |
1557 | { |
1558 | struct hw_perf_event *hwc = &event->hw; |
1559 | unsigned int hw_event, bts_event; |
1560 | |
1561 | if (event->attr.freq) |
1562 | return false; |
1563 | |
1564 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
1565 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
1566 | |
1567 | return hw_event == bts_event && period == 1; |
1568 | } |
1569 | |
1570 | static inline bool intel_pmu_has_bts(struct perf_event *event) |
1571 | { |
1572 | struct hw_perf_event *hwc = &event->hw; |
1573 | |
1574 | return intel_pmu_has_bts_period(event, period: hwc->sample_period); |
1575 | } |
1576 | |
1577 | static __always_inline void __intel_pmu_pebs_disable_all(void) |
1578 | { |
1579 | wrmsrq(MSR_IA32_PEBS_ENABLE, val: 0); |
1580 | } |
1581 | |
1582 | static __always_inline void __intel_pmu_arch_lbr_disable(void) |
1583 | { |
1584 | wrmsrq(MSR_ARCH_LBR_CTL, val: 0); |
1585 | } |
1586 | |
1587 | static __always_inline void __intel_pmu_lbr_disable(void) |
1588 | { |
1589 | u64 debugctl; |
1590 | |
1591 | rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); |
1592 | debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); |
1593 | wrmsrq(MSR_IA32_DEBUGCTLMSR, val: debugctl); |
1594 | } |
1595 | |
1596 | int intel_pmu_save_and_restart(struct perf_event *event); |
1597 | |
1598 | struct event_constraint * |
1599 | x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
1600 | struct perf_event *event); |
1601 | |
1602 | extern int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu); |
1603 | extern void intel_cpuc_finish(struct cpu_hw_events *cpuc); |
1604 | |
1605 | int intel_pmu_init(void); |
1606 | |
1607 | void init_debug_store_on_cpu(int cpu); |
1608 | |
1609 | void fini_debug_store_on_cpu(int cpu); |
1610 | |
1611 | void release_ds_buffers(void); |
1612 | |
1613 | void reserve_ds_buffers(void); |
1614 | |
1615 | void release_lbr_buffers(void); |
1616 | |
1617 | void reserve_lbr_buffers(void); |
1618 | |
1619 | extern struct event_constraint bts_constraint; |
1620 | extern struct event_constraint vlbr_constraint; |
1621 | |
1622 | void intel_pmu_enable_bts(u64 config); |
1623 | |
1624 | void intel_pmu_disable_bts(void); |
1625 | |
1626 | int intel_pmu_drain_bts_buffer(void); |
1627 | |
1628 | void intel_pmu_late_setup(void); |
1629 | |
1630 | u64 grt_latency_data(struct perf_event *event, u64 status); |
1631 | |
1632 | u64 cmt_latency_data(struct perf_event *event, u64 status); |
1633 | |
1634 | u64 lnl_latency_data(struct perf_event *event, u64 status); |
1635 | |
1636 | u64 arl_h_latency_data(struct perf_event *event, u64 status); |
1637 | |
1638 | extern struct event_constraint intel_core2_pebs_event_constraints[]; |
1639 | |
1640 | extern struct event_constraint intel_atom_pebs_event_constraints[]; |
1641 | |
1642 | extern struct event_constraint intel_slm_pebs_event_constraints[]; |
1643 | |
1644 | extern struct event_constraint intel_glm_pebs_event_constraints[]; |
1645 | |
1646 | extern struct event_constraint intel_glp_pebs_event_constraints[]; |
1647 | |
1648 | extern struct event_constraint intel_grt_pebs_event_constraints[]; |
1649 | |
1650 | extern struct event_constraint intel_nehalem_pebs_event_constraints[]; |
1651 | |
1652 | extern struct event_constraint intel_westmere_pebs_event_constraints[]; |
1653 | |
1654 | extern struct event_constraint intel_snb_pebs_event_constraints[]; |
1655 | |
1656 | extern struct event_constraint intel_ivb_pebs_event_constraints[]; |
1657 | |
1658 | extern struct event_constraint intel_hsw_pebs_event_constraints[]; |
1659 | |
1660 | extern struct event_constraint intel_bdw_pebs_event_constraints[]; |
1661 | |
1662 | extern struct event_constraint intel_skl_pebs_event_constraints[]; |
1663 | |
1664 | extern struct event_constraint intel_icl_pebs_event_constraints[]; |
1665 | |
1666 | extern struct event_constraint intel_glc_pebs_event_constraints[]; |
1667 | |
1668 | extern struct event_constraint intel_lnc_pebs_event_constraints[]; |
1669 | |
1670 | struct event_constraint *intel_pebs_constraints(struct perf_event *event); |
1671 | |
1672 | void intel_pmu_pebs_add(struct perf_event *event); |
1673 | |
1674 | void intel_pmu_pebs_del(struct perf_event *event); |
1675 | |
1676 | void intel_pmu_pebs_enable(struct perf_event *event); |
1677 | |
1678 | void intel_pmu_pebs_disable(struct perf_event *event); |
1679 | |
1680 | void intel_pmu_pebs_enable_all(void); |
1681 | |
1682 | void intel_pmu_pebs_disable_all(void); |
1683 | |
1684 | void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); |
1685 | |
1686 | void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc); |
1687 | |
1688 | void intel_pmu_drain_pebs_buffer(void); |
1689 | |
1690 | void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); |
1691 | |
1692 | void intel_pebs_init(void); |
1693 | |
1694 | void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, |
1695 | struct cpu_hw_events *cpuc, |
1696 | struct perf_event *event); |
1697 | |
1698 | void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, |
1699 | struct task_struct *task, bool sched_in); |
1700 | |
1701 | u64 lbr_from_signext_quirk_wr(u64 val); |
1702 | |
1703 | void intel_pmu_lbr_reset(void); |
1704 | |
1705 | void intel_pmu_lbr_reset_32(void); |
1706 | |
1707 | void intel_pmu_lbr_reset_64(void); |
1708 | |
1709 | void intel_pmu_lbr_add(struct perf_event *event); |
1710 | |
1711 | void intel_pmu_lbr_del(struct perf_event *event); |
1712 | |
1713 | void intel_pmu_lbr_enable_all(bool pmi); |
1714 | |
1715 | void intel_pmu_lbr_disable_all(void); |
1716 | |
1717 | void intel_pmu_lbr_read(void); |
1718 | |
1719 | void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc); |
1720 | |
1721 | void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc); |
1722 | |
1723 | void intel_pmu_lbr_save(void *ctx); |
1724 | |
1725 | void intel_pmu_lbr_restore(void *ctx); |
1726 | |
1727 | void intel_pmu_lbr_init_core(void); |
1728 | |
1729 | void intel_pmu_lbr_init_nhm(void); |
1730 | |
1731 | void intel_pmu_lbr_init_atom(void); |
1732 | |
1733 | void intel_pmu_lbr_init_slm(void); |
1734 | |
1735 | void intel_pmu_lbr_init_snb(void); |
1736 | |
1737 | void intel_pmu_lbr_init_hsw(void); |
1738 | |
1739 | void intel_pmu_lbr_init_skl(void); |
1740 | |
1741 | void intel_pmu_lbr_init_knl(void); |
1742 | |
1743 | void intel_pmu_lbr_init(void); |
1744 | |
1745 | void intel_pmu_arch_lbr_init(void); |
1746 | |
1747 | void intel_pmu_pebs_data_source_nhm(void); |
1748 | |
1749 | void intel_pmu_pebs_data_source_skl(bool pmem); |
1750 | |
1751 | void intel_pmu_pebs_data_source_adl(void); |
1752 | |
1753 | void intel_pmu_pebs_data_source_grt(void); |
1754 | |
1755 | void intel_pmu_pebs_data_source_mtl(void); |
1756 | |
1757 | void intel_pmu_pebs_data_source_arl_h(void); |
1758 | |
1759 | void intel_pmu_pebs_data_source_cmt(void); |
1760 | |
1761 | void intel_pmu_pebs_data_source_lnl(void); |
1762 | |
1763 | int intel_pmu_setup_lbr_filter(struct perf_event *event); |
1764 | |
1765 | void intel_pt_interrupt(void); |
1766 | |
1767 | int intel_bts_interrupt(void); |
1768 | |
1769 | void intel_bts_enable_local(void); |
1770 | |
1771 | void intel_bts_disable_local(void); |
1772 | |
1773 | int p4_pmu_init(void); |
1774 | |
1775 | int p6_pmu_init(void); |
1776 | |
1777 | int knc_pmu_init(void); |
1778 | |
1779 | static inline int is_ht_workaround_enabled(void) |
1780 | { |
1781 | return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED); |
1782 | } |
1783 | |
1784 | static inline u64 intel_pmu_pebs_mask(u64 cntr_mask) |
1785 | { |
1786 | return MAX_PEBS_EVENTS_MASK & cntr_mask; |
1787 | } |
1788 | |
1789 | static inline int intel_pmu_max_num_pebs(struct pmu *pmu) |
1790 | { |
1791 | static_assert(MAX_PEBS_EVENTS == 32); |
1792 | return fls(x: (u32)hybrid(pmu, pebs_events_mask)); |
1793 | } |
1794 | |
1795 | #else /* CONFIG_CPU_SUP_INTEL */ |
1796 | |
1797 | static inline void reserve_ds_buffers(void) |
1798 | { |
1799 | } |
1800 | |
1801 | static inline void release_ds_buffers(void) |
1802 | { |
1803 | } |
1804 | |
1805 | static inline void release_lbr_buffers(void) |
1806 | { |
1807 | } |
1808 | |
1809 | static inline void reserve_lbr_buffers(void) |
1810 | { |
1811 | } |
1812 | |
1813 | static inline int intel_pmu_init(void) |
1814 | { |
1815 | return 0; |
1816 | } |
1817 | |
1818 | static inline int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) |
1819 | { |
1820 | return 0; |
1821 | } |
1822 | |
1823 | static inline void intel_cpuc_finish(struct cpu_hw_events *cpuc) |
1824 | { |
1825 | } |
1826 | |
1827 | static inline int is_ht_workaround_enabled(void) |
1828 | { |
1829 | return 0; |
1830 | } |
1831 | #endif /* CONFIG_CPU_SUP_INTEL */ |
1832 | |
1833 | #if ((defined CONFIG_CPU_SUP_CENTAUR) || (defined CONFIG_CPU_SUP_ZHAOXIN)) |
1834 | int zhaoxin_pmu_init(void); |
1835 | #else |
1836 | static inline int zhaoxin_pmu_init(void) |
1837 | { |
1838 | return 0; |
1839 | } |
1840 | #endif /*CONFIG_CPU_SUP_CENTAUR or CONFIG_CPU_SUP_ZHAOXIN*/ |
1841 |
Definitions
- extra_reg_type
- event_constraint
- constraint_match
- is_topdown_count
- is_metric_event
- is_slots_event
- is_topdown_event
- check_leader_group
- is_branch_counters_group
- is_pebs_counter_event_group
- is_acr_event_group
- amd_nb
- er_account
- intel_shared_regs
- intel_excl_state_type
- intel_excl_states
- intel_excl_cntrs
- cpu_hw_events
- extra_reg
- perf_capabilities
- x86_pmu_quirk
- x86_pmu_config
- hybrid_pmu_type
- x86_hybrid_pmu
- hybrid_pmu
- x86_pmu
- x86_perf_task_context_opt
- x86_perf_task_context
- x86_perf_task_context_arch_lbr
- x86_perf_task_context_arch_lbr_xsave
- task_context_opt
- x86_pmu_has_lbr_callstack
- intel_pmu_topdown_event_update
- x86_pmu_config_addr
- x86_pmu_event_addr
- x86_pmu_fixed_ctr_addr
- x86_pmu_rdpmc_index
- has_amd_brs
- is_counter_pair
- __x86_pmu_enable_event
- x86_pmu_disable_event
- x86_pmu_num_counters
- x86_pmu_max_num_counters
- x86_pmu_num_counters_fixed
- x86_pmu_max_num_counters_fixed
- x86_pmu_get_event_config
- kernel_ip
- set_linear_ip
- fixed_counter_disabled
- __amd_pmu_lbr_disable
- amd_pmu_brs_add
- amd_pmu_brs_del
- is_pebs_pt
- intel_pmu_has_bts_period
- intel_pmu_has_bts
- __intel_pmu_pebs_disable_all
- __intel_pmu_arch_lbr_disable
- __intel_pmu_lbr_disable
- is_ht_workaround_enabled
- intel_pmu_pebs_mask
Improve your Profiling and Debugging skills
Find out more