1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Performance event support for sparc64. |
3 | * |
4 | * Copyright (C) 2009, 2010 David S. Miller <davem@davemloft.net> |
5 | * |
6 | * This code is based almost entirely upon the x86 perf event |
7 | * code, which is: |
8 | * |
9 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> |
10 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar |
11 | * Copyright (C) 2009 Jaswinder Singh Rajput |
12 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter |
13 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra |
14 | */ |
15 | |
16 | #include <linux/perf_event.h> |
17 | #include <linux/kprobes.h> |
18 | #include <linux/ftrace.h> |
19 | #include <linux/kernel.h> |
20 | #include <linux/kdebug.h> |
21 | #include <linux/mutex.h> |
22 | |
23 | #include <asm/stacktrace.h> |
24 | #include <asm/cpudata.h> |
25 | #include <linux/uaccess.h> |
26 | #include <linux/atomic.h> |
27 | #include <linux/sched/clock.h> |
28 | #include <asm/nmi.h> |
29 | #include <asm/pcr.h> |
30 | #include <asm/cacheflush.h> |
31 | |
32 | #include "kernel.h" |
33 | #include "kstack.h" |
34 | |
35 | /* Two classes of sparc64 chips currently exist. All of which have |
36 | * 32-bit counters which can generate overflow interrupts on the |
37 | * transition from 0xffffffff to 0. |
38 | * |
39 | * All chips upto and including SPARC-T3 have two performance |
40 | * counters. The two 32-bit counters are accessed in one go using a |
41 | * single 64-bit register. |
42 | * |
43 | * On these older chips both counters are controlled using a single |
44 | * control register. The only way to stop all sampling is to clear |
45 | * all of the context (user, supervisor, hypervisor) sampling enable |
46 | * bits. But these bits apply to both counters, thus the two counters |
47 | * can't be enabled/disabled individually. |
48 | * |
49 | * Furthermore, the control register on these older chips have two |
50 | * event fields, one for each of the two counters. It's thus nearly |
51 | * impossible to have one counter going while keeping the other one |
52 | * stopped. Therefore it is possible to get overflow interrupts for |
53 | * counters not currently "in use" and that condition must be checked |
54 | * in the overflow interrupt handler. |
55 | * |
56 | * So we use a hack, in that we program inactive counters with the |
57 | * "sw_count0" and "sw_count1" events. These count how many times |
58 | * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an |
59 | * unusual way to encode a NOP and therefore will not trigger in |
60 | * normal code. |
61 | * |
62 | * Starting with SPARC-T4 we have one control register per counter. |
63 | * And the counters are stored in individual registers. The registers |
64 | * for the counters are 64-bit but only a 32-bit counter is |
65 | * implemented. The event selections on SPARC-T4 lack any |
66 | * restrictions, therefore we can elide all of the complicated |
67 | * conflict resolution code we have for SPARC-T3 and earlier chips. |
68 | */ |
69 | |
70 | #define MAX_HWEVENTS 4 |
71 | #define MAX_PCRS 4 |
72 | #define MAX_PERIOD ((1UL << 32) - 1) |
73 | |
74 | #define PIC_UPPER_INDEX 0 |
75 | #define PIC_LOWER_INDEX 1 |
76 | #define PIC_NO_INDEX -1 |
77 | |
78 | struct cpu_hw_events { |
79 | /* Number of events currently scheduled onto this cpu. |
80 | * This tells how many entries in the arrays below |
81 | * are valid. |
82 | */ |
83 | int n_events; |
84 | |
85 | /* Number of new events added since the last hw_perf_disable(). |
86 | * This works because the perf event layer always adds new |
87 | * events inside of a perf_{disable,enable}() sequence. |
88 | */ |
89 | int n_added; |
90 | |
91 | /* Array of events current scheduled on this cpu. */ |
92 | struct perf_event *event[MAX_HWEVENTS]; |
93 | |
94 | /* Array of encoded longs, specifying the %pcr register |
95 | * encoding and the mask of PIC counters this even can |
96 | * be scheduled on. See perf_event_encode() et al. |
97 | */ |
98 | unsigned long events[MAX_HWEVENTS]; |
99 | |
100 | /* The current counter index assigned to an event. When the |
101 | * event hasn't been programmed into the cpu yet, this will |
102 | * hold PIC_NO_INDEX. The event->hw.idx value tells us where |
103 | * we ought to schedule the event. |
104 | */ |
105 | int current_idx[MAX_HWEVENTS]; |
106 | |
107 | /* Software copy of %pcr register(s) on this cpu. */ |
108 | u64 pcr[MAX_HWEVENTS]; |
109 | |
110 | /* Enabled/disable state. */ |
111 | int enabled; |
112 | |
113 | unsigned int txn_flags; |
114 | }; |
115 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; |
116 | |
117 | /* An event map describes the characteristics of a performance |
118 | * counter event. In particular it gives the encoding as well as |
119 | * a mask telling which counters the event can be measured on. |
120 | * |
121 | * The mask is unused on SPARC-T4 and later. |
122 | */ |
123 | struct perf_event_map { |
124 | u16 encoding; |
125 | u8 pic_mask; |
126 | #define PIC_NONE 0x00 |
127 | #define PIC_UPPER 0x01 |
128 | #define PIC_LOWER 0x02 |
129 | }; |
130 | |
131 | /* Encode a perf_event_map entry into a long. */ |
132 | static unsigned long perf_event_encode(const struct perf_event_map *pmap) |
133 | { |
134 | return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask; |
135 | } |
136 | |
137 | static u8 perf_event_get_msk(unsigned long val) |
138 | { |
139 | return val & 0xff; |
140 | } |
141 | |
142 | static u64 perf_event_get_enc(unsigned long val) |
143 | { |
144 | return val >> 16; |
145 | } |
146 | |
147 | #define C(x) PERF_COUNT_HW_CACHE_##x |
148 | |
149 | #define CACHE_OP_UNSUPPORTED 0xfffe |
150 | #define CACHE_OP_NONSENSE 0xffff |
151 | |
152 | typedef struct perf_event_map cache_map_t |
153 | [PERF_COUNT_HW_CACHE_MAX] |
154 | [PERF_COUNT_HW_CACHE_OP_MAX] |
155 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
156 | |
157 | struct sparc_pmu { |
158 | const struct perf_event_map *(*event_map)(int); |
159 | const cache_map_t *cache_map; |
160 | int max_events; |
161 | u32 (*read_pmc)(int); |
162 | void (*write_pmc)(int, u64); |
163 | int upper_shift; |
164 | int lower_shift; |
165 | int event_mask; |
166 | int user_bit; |
167 | int priv_bit; |
168 | int hv_bit; |
169 | int irq_bit; |
170 | int upper_nop; |
171 | int lower_nop; |
172 | unsigned int flags; |
173 | #define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001 |
174 | #define SPARC_PMU_HAS_CONFLICTS 0x00000002 |
175 | int max_hw_events; |
176 | int num_pcrs; |
177 | int num_pic_regs; |
178 | }; |
179 | |
180 | static u32 sparc_default_read_pmc(int idx) |
181 | { |
182 | u64 val; |
183 | |
184 | val = pcr_ops->read_pic(0); |
185 | if (idx == PIC_UPPER_INDEX) |
186 | val >>= 32; |
187 | |
188 | return val & 0xffffffff; |
189 | } |
190 | |
191 | static void sparc_default_write_pmc(int idx, u64 val) |
192 | { |
193 | u64 shift, mask, pic; |
194 | |
195 | shift = 0; |
196 | if (idx == PIC_UPPER_INDEX) |
197 | shift = 32; |
198 | |
199 | mask = ((u64) 0xffffffff) << shift; |
200 | val <<= shift; |
201 | |
202 | pic = pcr_ops->read_pic(0); |
203 | pic &= ~mask; |
204 | pic |= val; |
205 | pcr_ops->write_pic(0, pic); |
206 | } |
207 | |
208 | static const struct perf_event_map ultra3_perfmon_event_map[] = { |
209 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, |
210 | [PERF_COUNT_HW_INSTRUCTIONS] = { .encoding: 0x0001, PIC_UPPER | PIC_LOWER }, |
211 | [PERF_COUNT_HW_CACHE_REFERENCES] = { .encoding: 0x0009, PIC_LOWER }, |
212 | [PERF_COUNT_HW_CACHE_MISSES] = { .encoding: 0x0009, PIC_UPPER }, |
213 | }; |
214 | |
215 | static const struct perf_event_map *ultra3_event_map(int event_id) |
216 | { |
217 | return &ultra3_perfmon_event_map[event_id]; |
218 | } |
219 | |
220 | static const cache_map_t ultra3_cache_map = { |
221 | [C(L1D)] = { |
222 | [C(OP_READ)] = { |
223 | [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, |
224 | [C(RESULT_MISS)] = { .encoding: 0x09, PIC_UPPER, }, |
225 | }, |
226 | [C(OP_WRITE)] = { |
227 | [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER }, |
228 | [C(RESULT_MISS)] = { .encoding: 0x0a, PIC_UPPER }, |
229 | }, |
230 | [C(OP_PREFETCH)] = { |
231 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
232 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
233 | }, |
234 | }, |
235 | [C(L1I)] = { |
236 | [C(OP_READ)] = { |
237 | [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, |
238 | [C(RESULT_MISS)] = { .encoding: 0x09, PIC_UPPER, }, |
239 | }, |
240 | [ C(OP_WRITE) ] = { |
241 | [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, |
242 | [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, |
243 | }, |
244 | [ C(OP_PREFETCH) ] = { |
245 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
246 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
247 | }, |
248 | }, |
249 | [C(LL)] = { |
250 | [C(OP_READ)] = { |
251 | [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, }, |
252 | [C(RESULT_MISS)] = { .encoding: 0x0c, PIC_UPPER, }, |
253 | }, |
254 | [C(OP_WRITE)] = { |
255 | [C(RESULT_ACCESS)] = { .encoding: 0x0c, PIC_LOWER }, |
256 | [C(RESULT_MISS)] = { .encoding: 0x0c, PIC_UPPER }, |
257 | }, |
258 | [C(OP_PREFETCH)] = { |
259 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
260 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
261 | }, |
262 | }, |
263 | [C(DTLB)] = { |
264 | [C(OP_READ)] = { |
265 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
266 | [C(RESULT_MISS)] = { .encoding: 0x12, PIC_UPPER, }, |
267 | }, |
268 | [ C(OP_WRITE) ] = { |
269 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
270 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
271 | }, |
272 | [ C(OP_PREFETCH) ] = { |
273 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
274 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
275 | }, |
276 | }, |
277 | [C(ITLB)] = { |
278 | [C(OP_READ)] = { |
279 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
280 | [C(RESULT_MISS)] = { .encoding: 0x11, PIC_UPPER, }, |
281 | }, |
282 | [ C(OP_WRITE) ] = { |
283 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
284 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
285 | }, |
286 | [ C(OP_PREFETCH) ] = { |
287 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
288 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
289 | }, |
290 | }, |
291 | [C(BPU)] = { |
292 | [C(OP_READ)] = { |
293 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
294 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
295 | }, |
296 | [ C(OP_WRITE) ] = { |
297 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
298 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
299 | }, |
300 | [ C(OP_PREFETCH) ] = { |
301 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
302 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
303 | }, |
304 | }, |
305 | [C(NODE)] = { |
306 | [C(OP_READ)] = { |
307 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
308 | [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
309 | }, |
310 | [ C(OP_WRITE) ] = { |
311 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
312 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
313 | }, |
314 | [ C(OP_PREFETCH) ] = { |
315 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
316 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
317 | }, |
318 | }, |
319 | }; |
320 | |
321 | static const struct sparc_pmu ultra3_pmu = { |
322 | .event_map = ultra3_event_map, |
323 | .cache_map = &ultra3_cache_map, |
324 | .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), |
325 | .read_pmc = sparc_default_read_pmc, |
326 | .write_pmc = sparc_default_write_pmc, |
327 | .upper_shift = 11, |
328 | .lower_shift = 4, |
329 | .event_mask = 0x3f, |
330 | .user_bit = PCR_UTRACE, |
331 | .priv_bit = PCR_STRACE, |
332 | .upper_nop = 0x1c, |
333 | .lower_nop = 0x14, |
334 | .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | |
335 | SPARC_PMU_HAS_CONFLICTS), |
336 | .max_hw_events = 2, |
337 | .num_pcrs = 1, |
338 | .num_pic_regs = 1, |
339 | }; |
340 | |
341 | /* Niagara1 is very limited. The upper PIC is hard-locked to count |
342 | * only instructions, so it is free running which creates all kinds of |
343 | * problems. Some hardware designs make one wonder if the creator |
344 | * even looked at how this stuff gets used by software. |
345 | */ |
346 | static const struct perf_event_map niagara1_perfmon_event_map[] = { |
347 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER }, |
348 | [PERF_COUNT_HW_INSTRUCTIONS] = { .encoding: 0x00, PIC_UPPER }, |
349 | [PERF_COUNT_HW_CACHE_REFERENCES] = { .encoding: 0, PIC_NONE }, |
350 | [PERF_COUNT_HW_CACHE_MISSES] = { .encoding: 0x03, PIC_LOWER }, |
351 | }; |
352 | |
353 | static const struct perf_event_map *niagara1_event_map(int event_id) |
354 | { |
355 | return &niagara1_perfmon_event_map[event_id]; |
356 | } |
357 | |
358 | static const cache_map_t niagara1_cache_map = { |
359 | [C(L1D)] = { |
360 | [C(OP_READ)] = { |
361 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
362 | [C(RESULT_MISS)] = { .encoding: 0x03, PIC_LOWER, }, |
363 | }, |
364 | [C(OP_WRITE)] = { |
365 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
366 | [C(RESULT_MISS)] = { .encoding: 0x03, PIC_LOWER, }, |
367 | }, |
368 | [C(OP_PREFETCH)] = { |
369 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
370 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
371 | }, |
372 | }, |
373 | [C(L1I)] = { |
374 | [C(OP_READ)] = { |
375 | [C(RESULT_ACCESS)] = { .encoding: 0x00, PIC_UPPER }, |
376 | [C(RESULT_MISS)] = { .encoding: 0x02, PIC_LOWER, }, |
377 | }, |
378 | [ C(OP_WRITE) ] = { |
379 | [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, |
380 | [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, |
381 | }, |
382 | [ C(OP_PREFETCH) ] = { |
383 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
384 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
385 | }, |
386 | }, |
387 | [C(LL)] = { |
388 | [C(OP_READ)] = { |
389 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
390 | [C(RESULT_MISS)] = { .encoding: 0x07, PIC_LOWER, }, |
391 | }, |
392 | [C(OP_WRITE)] = { |
393 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
394 | [C(RESULT_MISS)] = { .encoding: 0x07, PIC_LOWER, }, |
395 | }, |
396 | [C(OP_PREFETCH)] = { |
397 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
398 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
399 | }, |
400 | }, |
401 | [C(DTLB)] = { |
402 | [C(OP_READ)] = { |
403 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
404 | [C(RESULT_MISS)] = { .encoding: 0x05, PIC_LOWER, }, |
405 | }, |
406 | [ C(OP_WRITE) ] = { |
407 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
408 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
409 | }, |
410 | [ C(OP_PREFETCH) ] = { |
411 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
412 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
413 | }, |
414 | }, |
415 | [C(ITLB)] = { |
416 | [C(OP_READ)] = { |
417 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
418 | [C(RESULT_MISS)] = { .encoding: 0x04, PIC_LOWER, }, |
419 | }, |
420 | [ C(OP_WRITE) ] = { |
421 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
422 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
423 | }, |
424 | [ C(OP_PREFETCH) ] = { |
425 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
426 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
427 | }, |
428 | }, |
429 | [C(BPU)] = { |
430 | [C(OP_READ)] = { |
431 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
432 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
433 | }, |
434 | [ C(OP_WRITE) ] = { |
435 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
436 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
437 | }, |
438 | [ C(OP_PREFETCH) ] = { |
439 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
440 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
441 | }, |
442 | }, |
443 | [C(NODE)] = { |
444 | [C(OP_READ)] = { |
445 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
446 | [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
447 | }, |
448 | [ C(OP_WRITE) ] = { |
449 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
450 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
451 | }, |
452 | [ C(OP_PREFETCH) ] = { |
453 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
454 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
455 | }, |
456 | }, |
457 | }; |
458 | |
459 | static const struct sparc_pmu niagara1_pmu = { |
460 | .event_map = niagara1_event_map, |
461 | .cache_map = &niagara1_cache_map, |
462 | .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), |
463 | .read_pmc = sparc_default_read_pmc, |
464 | .write_pmc = sparc_default_write_pmc, |
465 | .upper_shift = 0, |
466 | .lower_shift = 4, |
467 | .event_mask = 0x7, |
468 | .user_bit = PCR_UTRACE, |
469 | .priv_bit = PCR_STRACE, |
470 | .upper_nop = 0x0, |
471 | .lower_nop = 0x0, |
472 | .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | |
473 | SPARC_PMU_HAS_CONFLICTS), |
474 | .max_hw_events = 2, |
475 | .num_pcrs = 1, |
476 | .num_pic_regs = 1, |
477 | }; |
478 | |
479 | static const struct perf_event_map niagara2_perfmon_event_map[] = { |
480 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, |
481 | [PERF_COUNT_HW_INSTRUCTIONS] = { .encoding: 0x02ff, PIC_UPPER | PIC_LOWER }, |
482 | [PERF_COUNT_HW_CACHE_REFERENCES] = { .encoding: 0x0208, PIC_UPPER | PIC_LOWER }, |
483 | [PERF_COUNT_HW_CACHE_MISSES] = { .encoding: 0x0302, PIC_UPPER | PIC_LOWER }, |
484 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { .encoding: 0x0201, PIC_UPPER | PIC_LOWER }, |
485 | [PERF_COUNT_HW_BRANCH_MISSES] = { .encoding: 0x0202, PIC_UPPER | PIC_LOWER }, |
486 | }; |
487 | |
488 | static const struct perf_event_map *niagara2_event_map(int event_id) |
489 | { |
490 | return &niagara2_perfmon_event_map[event_id]; |
491 | } |
492 | |
493 | static const cache_map_t niagara2_cache_map = { |
494 | [C(L1D)] = { |
495 | [C(OP_READ)] = { |
496 | [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, |
497 | [C(RESULT_MISS)] = { .encoding: 0x0302, PIC_UPPER | PIC_LOWER, }, |
498 | }, |
499 | [C(OP_WRITE)] = { |
500 | [C(RESULT_ACCESS)] = { .encoding: 0x0210, PIC_UPPER | PIC_LOWER, }, |
501 | [C(RESULT_MISS)] = { .encoding: 0x0302, PIC_UPPER | PIC_LOWER, }, |
502 | }, |
503 | [C(OP_PREFETCH)] = { |
504 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
505 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
506 | }, |
507 | }, |
508 | [C(L1I)] = { |
509 | [C(OP_READ)] = { |
510 | [C(RESULT_ACCESS)] = { .encoding: 0x02ff, PIC_UPPER | PIC_LOWER, }, |
511 | [C(RESULT_MISS)] = { .encoding: 0x0301, PIC_UPPER | PIC_LOWER, }, |
512 | }, |
513 | [ C(OP_WRITE) ] = { |
514 | [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, |
515 | [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, |
516 | }, |
517 | [ C(OP_PREFETCH) ] = { |
518 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
519 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
520 | }, |
521 | }, |
522 | [C(LL)] = { |
523 | [C(OP_READ)] = { |
524 | [C(RESULT_ACCESS)] = { .encoding: 0x0208, PIC_UPPER | PIC_LOWER, }, |
525 | [C(RESULT_MISS)] = { .encoding: 0x0330, PIC_UPPER | PIC_LOWER, }, |
526 | }, |
527 | [C(OP_WRITE)] = { |
528 | [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, |
529 | [C(RESULT_MISS)] = { .encoding: 0x0320, PIC_UPPER | PIC_LOWER, }, |
530 | }, |
531 | [C(OP_PREFETCH)] = { |
532 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
533 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
534 | }, |
535 | }, |
536 | [C(DTLB)] = { |
537 | [C(OP_READ)] = { |
538 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
539 | [C(RESULT_MISS)] = { .encoding: 0x0b08, PIC_UPPER | PIC_LOWER, }, |
540 | }, |
541 | [ C(OP_WRITE) ] = { |
542 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
543 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
544 | }, |
545 | [ C(OP_PREFETCH) ] = { |
546 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
547 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
548 | }, |
549 | }, |
550 | [C(ITLB)] = { |
551 | [C(OP_READ)] = { |
552 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
553 | [C(RESULT_MISS)] = { .encoding: 0xb04, PIC_UPPER | PIC_LOWER, }, |
554 | }, |
555 | [ C(OP_WRITE) ] = { |
556 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
557 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
558 | }, |
559 | [ C(OP_PREFETCH) ] = { |
560 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
561 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
562 | }, |
563 | }, |
564 | [C(BPU)] = { |
565 | [C(OP_READ)] = { |
566 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
567 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
568 | }, |
569 | [ C(OP_WRITE) ] = { |
570 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
571 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
572 | }, |
573 | [ C(OP_PREFETCH) ] = { |
574 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
575 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
576 | }, |
577 | }, |
578 | [C(NODE)] = { |
579 | [C(OP_READ)] = { |
580 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
581 | [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
582 | }, |
583 | [ C(OP_WRITE) ] = { |
584 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
585 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
586 | }, |
587 | [ C(OP_PREFETCH) ] = { |
588 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
589 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
590 | }, |
591 | }, |
592 | }; |
593 | |
594 | static const struct sparc_pmu niagara2_pmu = { |
595 | .event_map = niagara2_event_map, |
596 | .cache_map = &niagara2_cache_map, |
597 | .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), |
598 | .read_pmc = sparc_default_read_pmc, |
599 | .write_pmc = sparc_default_write_pmc, |
600 | .upper_shift = 19, |
601 | .lower_shift = 6, |
602 | .event_mask = 0xfff, |
603 | .user_bit = PCR_UTRACE, |
604 | .priv_bit = PCR_STRACE, |
605 | .hv_bit = PCR_N2_HTRACE, |
606 | .irq_bit = 0x30, |
607 | .upper_nop = 0x220, |
608 | .lower_nop = 0x220, |
609 | .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | |
610 | SPARC_PMU_HAS_CONFLICTS), |
611 | .max_hw_events = 2, |
612 | .num_pcrs = 1, |
613 | .num_pic_regs = 1, |
614 | }; |
615 | |
616 | static const struct perf_event_map niagara4_perfmon_event_map[] = { |
617 | [PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) }, |
618 | [PERF_COUNT_HW_INSTRUCTIONS] = { .encoding: (3 << 6) | 0x3f }, |
619 | [PERF_COUNT_HW_CACHE_REFERENCES] = { .encoding: (3 << 6) | 0x04 }, |
620 | [PERF_COUNT_HW_CACHE_MISSES] = { .encoding: (16 << 6) | 0x07 }, |
621 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { .encoding: (4 << 6) | 0x01 }, |
622 | [PERF_COUNT_HW_BRANCH_MISSES] = { .encoding: (25 << 6) | 0x0f }, |
623 | }; |
624 | |
625 | static const struct perf_event_map *niagara4_event_map(int event_id) |
626 | { |
627 | return &niagara4_perfmon_event_map[event_id]; |
628 | } |
629 | |
630 | static const cache_map_t niagara4_cache_map = { |
631 | [C(L1D)] = { |
632 | [C(OP_READ)] = { |
633 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 }, |
634 | [C(RESULT_MISS)] = { .encoding: (16 << 6) | 0x07 }, |
635 | }, |
636 | [C(OP_WRITE)] = { |
637 | [C(RESULT_ACCESS)] = { .encoding: (3 << 6) | 0x08 }, |
638 | [C(RESULT_MISS)] = { .encoding: (16 << 6) | 0x07 }, |
639 | }, |
640 | [C(OP_PREFETCH)] = { |
641 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
642 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
643 | }, |
644 | }, |
645 | [C(L1I)] = { |
646 | [C(OP_READ)] = { |
647 | [C(RESULT_ACCESS)] = { .encoding: (3 << 6) | 0x3f }, |
648 | [C(RESULT_MISS)] = { .encoding: (11 << 6) | 0x03 }, |
649 | }, |
650 | [ C(OP_WRITE) ] = { |
651 | [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, |
652 | [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, |
653 | }, |
654 | [ C(OP_PREFETCH) ] = { |
655 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
656 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
657 | }, |
658 | }, |
659 | [C(LL)] = { |
660 | [C(OP_READ)] = { |
661 | [C(RESULT_ACCESS)] = { .encoding: (3 << 6) | 0x04 }, |
662 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
663 | }, |
664 | [C(OP_WRITE)] = { |
665 | [C(RESULT_ACCESS)] = { .encoding: (3 << 6) | 0x08 }, |
666 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
667 | }, |
668 | [C(OP_PREFETCH)] = { |
669 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
670 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
671 | }, |
672 | }, |
673 | [C(DTLB)] = { |
674 | [C(OP_READ)] = { |
675 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
676 | [C(RESULT_MISS)] = { .encoding: (17 << 6) | 0x3f }, |
677 | }, |
678 | [ C(OP_WRITE) ] = { |
679 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
680 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
681 | }, |
682 | [ C(OP_PREFETCH) ] = { |
683 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
684 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
685 | }, |
686 | }, |
687 | [C(ITLB)] = { |
688 | [C(OP_READ)] = { |
689 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
690 | [C(RESULT_MISS)] = { .encoding: (6 << 6) | 0x3f }, |
691 | }, |
692 | [ C(OP_WRITE) ] = { |
693 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
694 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
695 | }, |
696 | [ C(OP_PREFETCH) ] = { |
697 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
698 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
699 | }, |
700 | }, |
701 | [C(BPU)] = { |
702 | [C(OP_READ)] = { |
703 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
704 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, |
705 | }, |
706 | [ C(OP_WRITE) ] = { |
707 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
708 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
709 | }, |
710 | [ C(OP_PREFETCH) ] = { |
711 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
712 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
713 | }, |
714 | }, |
715 | [C(NODE)] = { |
716 | [C(OP_READ)] = { |
717 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, |
718 | [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
719 | }, |
720 | [ C(OP_WRITE) ] = { |
721 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
722 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
723 | }, |
724 | [ C(OP_PREFETCH) ] = { |
725 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, |
726 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, |
727 | }, |
728 | }, |
729 | }; |
730 | |
731 | static u32 sparc_vt_read_pmc(int idx) |
732 | { |
733 | u64 val = pcr_ops->read_pic(idx); |
734 | |
735 | return val & 0xffffffff; |
736 | } |
737 | |
738 | static void sparc_vt_write_pmc(int idx, u64 val) |
739 | { |
740 | u64 pcr; |
741 | |
742 | pcr = pcr_ops->read_pcr(idx); |
743 | /* ensure ov and ntc are reset */ |
744 | pcr &= ~(PCR_N4_OV | PCR_N4_NTC); |
745 | |
746 | pcr_ops->write_pic(idx, val & 0xffffffff); |
747 | |
748 | pcr_ops->write_pcr(idx, pcr); |
749 | } |
750 | |
751 | static const struct sparc_pmu niagara4_pmu = { |
752 | .event_map = niagara4_event_map, |
753 | .cache_map = &niagara4_cache_map, |
754 | .max_events = ARRAY_SIZE(niagara4_perfmon_event_map), |
755 | .read_pmc = sparc_vt_read_pmc, |
756 | .write_pmc = sparc_vt_write_pmc, |
757 | .upper_shift = 5, |
758 | .lower_shift = 5, |
759 | .event_mask = 0x7ff, |
760 | .user_bit = PCR_N4_UTRACE, |
761 | .priv_bit = PCR_N4_STRACE, |
762 | |
763 | /* We explicitly don't support hypervisor tracing. The T4 |
764 | * generates the overflow event for precise events via a trap |
765 | * which will not be generated (ie. it's completely lost) if |
766 | * we happen to be in the hypervisor when the event triggers. |
767 | * Essentially, the overflow event reporting is completely |
768 | * unusable when you have hypervisor mode tracing enabled. |
769 | */ |
770 | .hv_bit = 0, |
771 | |
772 | .irq_bit = PCR_N4_TOE, |
773 | .upper_nop = 0, |
774 | .lower_nop = 0, |
775 | .flags = 0, |
776 | .max_hw_events = 4, |
777 | .num_pcrs = 4, |
778 | .num_pic_regs = 4, |
779 | }; |
780 | |
781 | static const struct sparc_pmu sparc_m7_pmu = { |
782 | .event_map = niagara4_event_map, |
783 | .cache_map = &niagara4_cache_map, |
784 | .max_events = ARRAY_SIZE(niagara4_perfmon_event_map), |
785 | .read_pmc = sparc_vt_read_pmc, |
786 | .write_pmc = sparc_vt_write_pmc, |
787 | .upper_shift = 5, |
788 | .lower_shift = 5, |
789 | .event_mask = 0x7ff, |
790 | .user_bit = PCR_N4_UTRACE, |
791 | .priv_bit = PCR_N4_STRACE, |
792 | |
793 | /* We explicitly don't support hypervisor tracing. */ |
794 | .hv_bit = 0, |
795 | |
796 | .irq_bit = PCR_N4_TOE, |
797 | .upper_nop = 0, |
798 | .lower_nop = 0, |
799 | .flags = 0, |
800 | .max_hw_events = 4, |
801 | .num_pcrs = 4, |
802 | .num_pic_regs = 4, |
803 | }; |
804 | static const struct sparc_pmu *sparc_pmu __read_mostly; |
805 | |
806 | static u64 event_encoding(u64 event_id, int idx) |
807 | { |
808 | if (idx == PIC_UPPER_INDEX) |
809 | event_id <<= sparc_pmu->upper_shift; |
810 | else |
811 | event_id <<= sparc_pmu->lower_shift; |
812 | return event_id; |
813 | } |
814 | |
815 | static u64 mask_for_index(int idx) |
816 | { |
817 | return event_encoding(event_id: sparc_pmu->event_mask, idx); |
818 | } |
819 | |
820 | static u64 nop_for_index(int idx) |
821 | { |
822 | return event_encoding(event_id: idx == PIC_UPPER_INDEX ? |
823 | sparc_pmu->upper_nop : |
824 | sparc_pmu->lower_nop, idx); |
825 | } |
826 | |
827 | static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) |
828 | { |
829 | u64 enc, val, mask = mask_for_index(idx); |
830 | int pcr_index = 0; |
831 | |
832 | if (sparc_pmu->num_pcrs > 1) |
833 | pcr_index = idx; |
834 | |
835 | enc = perf_event_get_enc(val: cpuc->events[idx]); |
836 | |
837 | val = cpuc->pcr[pcr_index]; |
838 | val &= ~mask; |
839 | val |= event_encoding(event_id: enc, idx); |
840 | cpuc->pcr[pcr_index] = val; |
841 | |
842 | pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]); |
843 | } |
844 | |
845 | static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) |
846 | { |
847 | u64 mask = mask_for_index(idx); |
848 | u64 nop = nop_for_index(idx); |
849 | int pcr_index = 0; |
850 | u64 val; |
851 | |
852 | if (sparc_pmu->num_pcrs > 1) |
853 | pcr_index = idx; |
854 | |
855 | val = cpuc->pcr[pcr_index]; |
856 | val &= ~mask; |
857 | val |= nop; |
858 | cpuc->pcr[pcr_index] = val; |
859 | |
860 | pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]); |
861 | } |
862 | |
863 | static u64 sparc_perf_event_update(struct perf_event *event, |
864 | struct hw_perf_event *hwc, int idx) |
865 | { |
866 | int shift = 64 - 32; |
867 | u64 prev_raw_count, new_raw_count; |
868 | s64 delta; |
869 | |
870 | again: |
871 | prev_raw_count = local64_read(&hwc->prev_count); |
872 | new_raw_count = sparc_pmu->read_pmc(idx); |
873 | |
874 | if (local64_cmpxchg(l: &hwc->prev_count, old: prev_raw_count, |
875 | new: new_raw_count) != prev_raw_count) |
876 | goto again; |
877 | |
878 | delta = (new_raw_count << shift) - (prev_raw_count << shift); |
879 | delta >>= shift; |
880 | |
881 | local64_add(delta, &event->count); |
882 | local64_sub(delta, &hwc->period_left); |
883 | |
884 | return new_raw_count; |
885 | } |
886 | |
887 | static int sparc_perf_event_set_period(struct perf_event *event, |
888 | struct hw_perf_event *hwc, int idx) |
889 | { |
890 | s64 left = local64_read(&hwc->period_left); |
891 | s64 period = hwc->sample_period; |
892 | int ret = 0; |
893 | |
894 | /* The period may have been changed by PERF_EVENT_IOC_PERIOD */ |
895 | if (unlikely(period != hwc->last_period)) |
896 | left = period - (hwc->last_period - left); |
897 | |
898 | if (unlikely(left <= -period)) { |
899 | left = period; |
900 | local64_set(&hwc->period_left, left); |
901 | hwc->last_period = period; |
902 | ret = 1; |
903 | } |
904 | |
905 | if (unlikely(left <= 0)) { |
906 | left += period; |
907 | local64_set(&hwc->period_left, left); |
908 | hwc->last_period = period; |
909 | ret = 1; |
910 | } |
911 | if (left > MAX_PERIOD) |
912 | left = MAX_PERIOD; |
913 | |
914 | local64_set(&hwc->prev_count, (u64)-left); |
915 | |
916 | sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff); |
917 | |
918 | perf_event_update_userpage(event); |
919 | |
920 | return ret; |
921 | } |
922 | |
923 | static void read_in_all_counters(struct cpu_hw_events *cpuc) |
924 | { |
925 | int i; |
926 | |
927 | for (i = 0; i < cpuc->n_events; i++) { |
928 | struct perf_event *cp = cpuc->event[i]; |
929 | |
930 | if (cpuc->current_idx[i] != PIC_NO_INDEX && |
931 | cpuc->current_idx[i] != cp->hw.idx) { |
932 | sparc_perf_event_update(event: cp, hwc: &cp->hw, |
933 | idx: cpuc->current_idx[i]); |
934 | cpuc->current_idx[i] = PIC_NO_INDEX; |
935 | if (cp->hw.state & PERF_HES_STOPPED) |
936 | cp->hw.state |= PERF_HES_ARCH; |
937 | } |
938 | } |
939 | } |
940 | |
941 | /* On this PMU all PICs are programmed using a single PCR. Calculate |
942 | * the combined control register value. |
943 | * |
944 | * For such chips we require that all of the events have the same |
945 | * configuration, so just fetch the settings from the first entry. |
946 | */ |
947 | static void calculate_single_pcr(struct cpu_hw_events *cpuc) |
948 | { |
949 | int i; |
950 | |
951 | if (!cpuc->n_added) |
952 | goto out; |
953 | |
954 | /* Assign to counters all unassigned events. */ |
955 | for (i = 0; i < cpuc->n_events; i++) { |
956 | struct perf_event *cp = cpuc->event[i]; |
957 | struct hw_perf_event *hwc = &cp->hw; |
958 | int idx = hwc->idx; |
959 | u64 enc; |
960 | |
961 | if (cpuc->current_idx[i] != PIC_NO_INDEX) |
962 | continue; |
963 | |
964 | sparc_perf_event_set_period(event: cp, hwc, idx); |
965 | cpuc->current_idx[i] = idx; |
966 | |
967 | enc = perf_event_get_enc(val: cpuc->events[i]); |
968 | cpuc->pcr[0] &= ~mask_for_index(idx); |
969 | if (hwc->state & PERF_HES_ARCH) { |
970 | cpuc->pcr[0] |= nop_for_index(idx); |
971 | } else { |
972 | cpuc->pcr[0] |= event_encoding(event_id: enc, idx); |
973 | hwc->state = 0; |
974 | } |
975 | } |
976 | out: |
977 | cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; |
978 | } |
979 | |
980 | static void sparc_pmu_start(struct perf_event *event, int flags); |
981 | |
982 | /* On this PMU each PIC has its own PCR control register. */ |
983 | static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) |
984 | { |
985 | int i; |
986 | |
987 | if (!cpuc->n_added) |
988 | goto out; |
989 | |
990 | for (i = 0; i < cpuc->n_events; i++) { |
991 | struct perf_event *cp = cpuc->event[i]; |
992 | struct hw_perf_event *hwc = &cp->hw; |
993 | int idx = hwc->idx; |
994 | |
995 | if (cpuc->current_idx[i] != PIC_NO_INDEX) |
996 | continue; |
997 | |
998 | cpuc->current_idx[i] = idx; |
999 | |
1000 | if (cp->hw.state & PERF_HES_ARCH) |
1001 | continue; |
1002 | |
1003 | sparc_pmu_start(event: cp, PERF_EF_RELOAD); |
1004 | } |
1005 | out: |
1006 | for (i = 0; i < cpuc->n_events; i++) { |
1007 | struct perf_event *cp = cpuc->event[i]; |
1008 | int idx = cp->hw.idx; |
1009 | |
1010 | cpuc->pcr[idx] |= cp->hw.config_base; |
1011 | } |
1012 | } |
1013 | |
1014 | /* If performance event entries have been added, move existing events |
1015 | * around (if necessary) and then assign new entries to counters. |
1016 | */ |
1017 | static void update_pcrs_for_enable(struct cpu_hw_events *cpuc) |
1018 | { |
1019 | if (cpuc->n_added) |
1020 | read_in_all_counters(cpuc); |
1021 | |
1022 | if (sparc_pmu->num_pcrs == 1) { |
1023 | calculate_single_pcr(cpuc); |
1024 | } else { |
1025 | calculate_multiple_pcrs(cpuc); |
1026 | } |
1027 | } |
1028 | |
1029 | static void sparc_pmu_enable(struct pmu *pmu) |
1030 | { |
1031 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
1032 | int i; |
1033 | |
1034 | if (cpuc->enabled) |
1035 | return; |
1036 | |
1037 | cpuc->enabled = 1; |
1038 | barrier(); |
1039 | |
1040 | if (cpuc->n_events) |
1041 | update_pcrs_for_enable(cpuc); |
1042 | |
1043 | for (i = 0; i < sparc_pmu->num_pcrs; i++) |
1044 | pcr_ops->write_pcr(i, cpuc->pcr[i]); |
1045 | } |
1046 | |
1047 | static void sparc_pmu_disable(struct pmu *pmu) |
1048 | { |
1049 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
1050 | int i; |
1051 | |
1052 | if (!cpuc->enabled) |
1053 | return; |
1054 | |
1055 | cpuc->enabled = 0; |
1056 | cpuc->n_added = 0; |
1057 | |
1058 | for (i = 0; i < sparc_pmu->num_pcrs; i++) { |
1059 | u64 val = cpuc->pcr[i]; |
1060 | |
1061 | val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit | |
1062 | sparc_pmu->hv_bit | sparc_pmu->irq_bit); |
1063 | cpuc->pcr[i] = val; |
1064 | pcr_ops->write_pcr(i, cpuc->pcr[i]); |
1065 | } |
1066 | } |
1067 | |
1068 | static int active_event_index(struct cpu_hw_events *cpuc, |
1069 | struct perf_event *event) |
1070 | { |
1071 | int i; |
1072 | |
1073 | for (i = 0; i < cpuc->n_events; i++) { |
1074 | if (cpuc->event[i] == event) |
1075 | break; |
1076 | } |
1077 | BUG_ON(i == cpuc->n_events); |
1078 | return cpuc->current_idx[i]; |
1079 | } |
1080 | |
1081 | static void sparc_pmu_start(struct perf_event *event, int flags) |
1082 | { |
1083 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
1084 | int idx = active_event_index(cpuc, event); |
1085 | |
1086 | if (flags & PERF_EF_RELOAD) { |
1087 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); |
1088 | sparc_perf_event_set_period(event, hwc: &event->hw, idx); |
1089 | } |
1090 | |
1091 | event->hw.state = 0; |
1092 | |
1093 | sparc_pmu_enable_event(cpuc, hwc: &event->hw, idx); |
1094 | |
1095 | perf_event_update_userpage(event); |
1096 | } |
1097 | |
1098 | static void sparc_pmu_stop(struct perf_event *event, int flags) |
1099 | { |
1100 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
1101 | int idx = active_event_index(cpuc, event); |
1102 | |
1103 | if (!(event->hw.state & PERF_HES_STOPPED)) { |
1104 | sparc_pmu_disable_event(cpuc, hwc: &event->hw, idx); |
1105 | event->hw.state |= PERF_HES_STOPPED; |
1106 | } |
1107 | |
1108 | if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) { |
1109 | sparc_perf_event_update(event, hwc: &event->hw, idx); |
1110 | event->hw.state |= PERF_HES_UPTODATE; |
1111 | } |
1112 | } |
1113 | |
1114 | static void sparc_pmu_del(struct perf_event *event, int _flags) |
1115 | { |
1116 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
1117 | unsigned long flags; |
1118 | int i; |
1119 | |
1120 | local_irq_save(flags); |
1121 | |
1122 | for (i = 0; i < cpuc->n_events; i++) { |
1123 | if (event == cpuc->event[i]) { |
1124 | /* Absorb the final count and turn off the |
1125 | * event. |
1126 | */ |
1127 | sparc_pmu_stop(event, PERF_EF_UPDATE); |
1128 | |
1129 | /* Shift remaining entries down into |
1130 | * the existing slot. |
1131 | */ |
1132 | while (++i < cpuc->n_events) { |
1133 | cpuc->event[i - 1] = cpuc->event[i]; |
1134 | cpuc->events[i - 1] = cpuc->events[i]; |
1135 | cpuc->current_idx[i - 1] = |
1136 | cpuc->current_idx[i]; |
1137 | } |
1138 | |
1139 | perf_event_update_userpage(event); |
1140 | |
1141 | cpuc->n_events--; |
1142 | break; |
1143 | } |
1144 | } |
1145 | |
1146 | local_irq_restore(flags); |
1147 | } |
1148 | |
1149 | static void sparc_pmu_read(struct perf_event *event) |
1150 | { |
1151 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
1152 | int idx = active_event_index(cpuc, event); |
1153 | struct hw_perf_event *hwc = &event->hw; |
1154 | |
1155 | sparc_perf_event_update(event, hwc, idx); |
1156 | } |
1157 | |
1158 | static atomic_t active_events = ATOMIC_INIT(0); |
1159 | static DEFINE_MUTEX(pmc_grab_mutex); |
1160 | |
1161 | static void perf_stop_nmi_watchdog(void *unused) |
1162 | { |
1163 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
1164 | int i; |
1165 | |
1166 | stop_nmi_watchdog(NULL); |
1167 | for (i = 0; i < sparc_pmu->num_pcrs; i++) |
1168 | cpuc->pcr[i] = pcr_ops->read_pcr(i); |
1169 | } |
1170 | |
1171 | static void perf_event_grab_pmc(void) |
1172 | { |
1173 | if (atomic_inc_not_zero(v: &active_events)) |
1174 | return; |
1175 | |
1176 | mutex_lock(&pmc_grab_mutex); |
1177 | if (atomic_read(v: &active_events) == 0) { |
1178 | if (atomic_read(&nmi_active) > 0) { |
1179 | on_each_cpu(func: perf_stop_nmi_watchdog, NULL, wait: 1); |
1180 | BUG_ON(atomic_read(&nmi_active) != 0); |
1181 | } |
1182 | atomic_inc(v: &active_events); |
1183 | } |
1184 | mutex_unlock(lock: &pmc_grab_mutex); |
1185 | } |
1186 | |
1187 | static void perf_event_release_pmc(void) |
1188 | { |
1189 | if (atomic_dec_and_mutex_lock(cnt: &active_events, lock: &pmc_grab_mutex)) { |
1190 | if (atomic_read(&nmi_active) == 0) |
1191 | on_each_cpu(start_nmi_watchdog, NULL, 1); |
1192 | mutex_unlock(lock: &pmc_grab_mutex); |
1193 | } |
1194 | } |
1195 | |
1196 | static const struct perf_event_map *sparc_map_cache_event(u64 config) |
1197 | { |
1198 | unsigned int cache_type, cache_op, cache_result; |
1199 | const struct perf_event_map *pmap; |
1200 | |
1201 | if (!sparc_pmu->cache_map) |
1202 | return ERR_PTR(error: -ENOENT); |
1203 | |
1204 | cache_type = (config >> 0) & 0xff; |
1205 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) |
1206 | return ERR_PTR(error: -EINVAL); |
1207 | |
1208 | cache_op = (config >> 8) & 0xff; |
1209 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) |
1210 | return ERR_PTR(error: -EINVAL); |
1211 | |
1212 | cache_result = (config >> 16) & 0xff; |
1213 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) |
1214 | return ERR_PTR(error: -EINVAL); |
1215 | |
1216 | pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]); |
1217 | |
1218 | if (pmap->encoding == CACHE_OP_UNSUPPORTED) |
1219 | return ERR_PTR(error: -ENOENT); |
1220 | |
1221 | if (pmap->encoding == CACHE_OP_NONSENSE) |
1222 | return ERR_PTR(error: -EINVAL); |
1223 | |
1224 | return pmap; |
1225 | } |
1226 | |
1227 | static void hw_perf_event_destroy(struct perf_event *event) |
1228 | { |
1229 | perf_event_release_pmc(); |
1230 | } |
1231 | |
1232 | /* Make sure all events can be scheduled into the hardware at |
1233 | * the same time. This is simplified by the fact that we only |
1234 | * need to support 2 simultaneous HW events. |
1235 | * |
1236 | * As a side effect, the evts[]->hw.idx values will be assigned |
1237 | * on success. These are pending indexes. When the events are |
1238 | * actually programmed into the chip, these values will propagate |
1239 | * to the per-cpu cpuc->current_idx[] slots, see the code in |
1240 | * maybe_change_configuration() for details. |
1241 | */ |
1242 | static int sparc_check_constraints(struct perf_event **evts, |
1243 | unsigned long *events, int n_ev) |
1244 | { |
1245 | u8 msk0 = 0, msk1 = 0; |
1246 | int idx0 = 0; |
1247 | |
1248 | /* This case is possible when we are invoked from |
1249 | * hw_perf_group_sched_in(). |
1250 | */ |
1251 | if (!n_ev) |
1252 | return 0; |
1253 | |
1254 | if (n_ev > sparc_pmu->max_hw_events) |
1255 | return -1; |
1256 | |
1257 | if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) { |
1258 | int i; |
1259 | |
1260 | for (i = 0; i < n_ev; i++) |
1261 | evts[i]->hw.idx = i; |
1262 | return 0; |
1263 | } |
1264 | |
1265 | msk0 = perf_event_get_msk(val: events[0]); |
1266 | if (n_ev == 1) { |
1267 | if (msk0 & PIC_LOWER) |
1268 | idx0 = 1; |
1269 | goto success; |
1270 | } |
1271 | BUG_ON(n_ev != 2); |
1272 | msk1 = perf_event_get_msk(val: events[1]); |
1273 | |
1274 | /* If both events can go on any counter, OK. */ |
1275 | if (msk0 == (PIC_UPPER | PIC_LOWER) && |
1276 | msk1 == (PIC_UPPER | PIC_LOWER)) |
1277 | goto success; |
1278 | |
1279 | /* If one event is limited to a specific counter, |
1280 | * and the other can go on both, OK. |
1281 | */ |
1282 | if ((msk0 == PIC_UPPER || msk0 == PIC_LOWER) && |
1283 | msk1 == (PIC_UPPER | PIC_LOWER)) { |
1284 | if (msk0 & PIC_LOWER) |
1285 | idx0 = 1; |
1286 | goto success; |
1287 | } |
1288 | |
1289 | if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) && |
1290 | msk0 == (PIC_UPPER | PIC_LOWER)) { |
1291 | if (msk1 & PIC_UPPER) |
1292 | idx0 = 1; |
1293 | goto success; |
1294 | } |
1295 | |
1296 | /* If the events are fixed to different counters, OK. */ |
1297 | if ((msk0 == PIC_UPPER && msk1 == PIC_LOWER) || |
1298 | (msk0 == PIC_LOWER && msk1 == PIC_UPPER)) { |
1299 | if (msk0 & PIC_LOWER) |
1300 | idx0 = 1; |
1301 | goto success; |
1302 | } |
1303 | |
1304 | /* Otherwise, there is a conflict. */ |
1305 | return -1; |
1306 | |
1307 | success: |
1308 | evts[0]->hw.idx = idx0; |
1309 | if (n_ev == 2) |
1310 | evts[1]->hw.idx = idx0 ^ 1; |
1311 | return 0; |
1312 | } |
1313 | |
1314 | static int check_excludes(struct perf_event **evts, int n_prev, int n_new) |
1315 | { |
1316 | int eu = 0, ek = 0, eh = 0; |
1317 | struct perf_event *event; |
1318 | int i, n, first; |
1319 | |
1320 | if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME)) |
1321 | return 0; |
1322 | |
1323 | n = n_prev + n_new; |
1324 | if (n <= 1) |
1325 | return 0; |
1326 | |
1327 | first = 1; |
1328 | for (i = 0; i < n; i++) { |
1329 | event = evts[i]; |
1330 | if (first) { |
1331 | eu = event->attr.exclude_user; |
1332 | ek = event->attr.exclude_kernel; |
1333 | eh = event->attr.exclude_hv; |
1334 | first = 0; |
1335 | } else if (event->attr.exclude_user != eu || |
1336 | event->attr.exclude_kernel != ek || |
1337 | event->attr.exclude_hv != eh) { |
1338 | return -EAGAIN; |
1339 | } |
1340 | } |
1341 | |
1342 | return 0; |
1343 | } |
1344 | |
1345 | static int collect_events(struct perf_event *group, int max_count, |
1346 | struct perf_event *evts[], unsigned long *events, |
1347 | int *current_idx) |
1348 | { |
1349 | struct perf_event *event; |
1350 | int n = 0; |
1351 | |
1352 | if (!is_software_event(event: group)) { |
1353 | if (n >= max_count) |
1354 | return -1; |
1355 | evts[n] = group; |
1356 | events[n] = group->hw.event_base; |
1357 | current_idx[n++] = PIC_NO_INDEX; |
1358 | } |
1359 | for_each_sibling_event(event, group) { |
1360 | if (!is_software_event(event) && |
1361 | event->state != PERF_EVENT_STATE_OFF) { |
1362 | if (n >= max_count) |
1363 | return -1; |
1364 | evts[n] = event; |
1365 | events[n] = event->hw.event_base; |
1366 | current_idx[n++] = PIC_NO_INDEX; |
1367 | } |
1368 | } |
1369 | return n; |
1370 | } |
1371 | |
1372 | static int sparc_pmu_add(struct perf_event *event, int ef_flags) |
1373 | { |
1374 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
1375 | int n0, ret = -EAGAIN; |
1376 | unsigned long flags; |
1377 | |
1378 | local_irq_save(flags); |
1379 | |
1380 | n0 = cpuc->n_events; |
1381 | if (n0 >= sparc_pmu->max_hw_events) |
1382 | goto out; |
1383 | |
1384 | cpuc->event[n0] = event; |
1385 | cpuc->events[n0] = event->hw.event_base; |
1386 | cpuc->current_idx[n0] = PIC_NO_INDEX; |
1387 | |
1388 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; |
1389 | if (!(ef_flags & PERF_EF_START)) |
1390 | event->hw.state |= PERF_HES_ARCH; |
1391 | |
1392 | /* |
1393 | * If group events scheduling transaction was started, |
1394 | * skip the schedulability test here, it will be performed |
1395 | * at commit time(->commit_txn) as a whole |
1396 | */ |
1397 | if (cpuc->txn_flags & PERF_PMU_TXN_ADD) |
1398 | goto nocheck; |
1399 | |
1400 | if (check_excludes(evts: cpuc->event, n_prev: n0, n_new: 1)) |
1401 | goto out; |
1402 | if (sparc_check_constraints(evts: cpuc->event, events: cpuc->events, n_ev: n0 + 1)) |
1403 | goto out; |
1404 | |
1405 | nocheck: |
1406 | cpuc->n_events++; |
1407 | cpuc->n_added++; |
1408 | |
1409 | ret = 0; |
1410 | out: |
1411 | local_irq_restore(flags); |
1412 | return ret; |
1413 | } |
1414 | |
1415 | static int sparc_pmu_event_init(struct perf_event *event) |
1416 | { |
1417 | struct perf_event_attr *attr = &event->attr; |
1418 | struct perf_event *evts[MAX_HWEVENTS]; |
1419 | struct hw_perf_event *hwc = &event->hw; |
1420 | unsigned long events[MAX_HWEVENTS]; |
1421 | int current_idx_dmy[MAX_HWEVENTS]; |
1422 | const struct perf_event_map *pmap; |
1423 | int n; |
1424 | |
1425 | if (atomic_read(&nmi_active) < 0) |
1426 | return -ENODEV; |
1427 | |
1428 | /* does not support taken branch sampling */ |
1429 | if (has_branch_stack(event)) |
1430 | return -EOPNOTSUPP; |
1431 | |
1432 | switch (attr->type) { |
1433 | case PERF_TYPE_HARDWARE: |
1434 | if (attr->config >= sparc_pmu->max_events) |
1435 | return -EINVAL; |
1436 | pmap = sparc_pmu->event_map(attr->config); |
1437 | break; |
1438 | |
1439 | case PERF_TYPE_HW_CACHE: |
1440 | pmap = sparc_map_cache_event(config: attr->config); |
1441 | if (IS_ERR(ptr: pmap)) |
1442 | return PTR_ERR(ptr: pmap); |
1443 | break; |
1444 | |
1445 | case PERF_TYPE_RAW: |
1446 | pmap = NULL; |
1447 | break; |
1448 | |
1449 | default: |
1450 | return -ENOENT; |
1451 | |
1452 | } |
1453 | |
1454 | if (pmap) { |
1455 | hwc->event_base = perf_event_encode(pmap); |
1456 | } else { |
1457 | /* |
1458 | * User gives us "(encoding << 16) | pic_mask" for |
1459 | * PERF_TYPE_RAW events. |
1460 | */ |
1461 | hwc->event_base = attr->config; |
1462 | } |
1463 | |
1464 | /* We save the enable bits in the config_base. */ |
1465 | hwc->config_base = sparc_pmu->irq_bit; |
1466 | if (!attr->exclude_user) |
1467 | hwc->config_base |= sparc_pmu->user_bit; |
1468 | if (!attr->exclude_kernel) |
1469 | hwc->config_base |= sparc_pmu->priv_bit; |
1470 | if (!attr->exclude_hv) |
1471 | hwc->config_base |= sparc_pmu->hv_bit; |
1472 | |
1473 | n = 0; |
1474 | if (event->group_leader != event) { |
1475 | n = collect_events(group: event->group_leader, |
1476 | max_count: sparc_pmu->max_hw_events - 1, |
1477 | evts, events, current_idx: current_idx_dmy); |
1478 | if (n < 0) |
1479 | return -EINVAL; |
1480 | } |
1481 | events[n] = hwc->event_base; |
1482 | evts[n] = event; |
1483 | |
1484 | if (check_excludes(evts, n_prev: n, n_new: 1)) |
1485 | return -EINVAL; |
1486 | |
1487 | if (sparc_check_constraints(evts, events, n_ev: n + 1)) |
1488 | return -EINVAL; |
1489 | |
1490 | hwc->idx = PIC_NO_INDEX; |
1491 | |
1492 | /* Try to do all error checking before this point, as unwinding |
1493 | * state after grabbing the PMC is difficult. |
1494 | */ |
1495 | perf_event_grab_pmc(); |
1496 | event->destroy = hw_perf_event_destroy; |
1497 | |
1498 | if (!hwc->sample_period) { |
1499 | hwc->sample_period = MAX_PERIOD; |
1500 | hwc->last_period = hwc->sample_period; |
1501 | local64_set(&hwc->period_left, hwc->sample_period); |
1502 | } |
1503 | |
1504 | return 0; |
1505 | } |
1506 | |
1507 | /* |
1508 | * Start group events scheduling transaction |
1509 | * Set the flag to make pmu::enable() not perform the |
1510 | * schedulability test, it will be performed at commit time |
1511 | */ |
1512 | static void sparc_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) |
1513 | { |
1514 | struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); |
1515 | |
1516 | WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ |
1517 | |
1518 | cpuhw->txn_flags = txn_flags; |
1519 | if (txn_flags & ~PERF_PMU_TXN_ADD) |
1520 | return; |
1521 | |
1522 | perf_pmu_disable(pmu); |
1523 | } |
1524 | |
1525 | /* |
1526 | * Stop group events scheduling transaction |
1527 | * Clear the flag and pmu::enable() will perform the |
1528 | * schedulability test. |
1529 | */ |
1530 | static void sparc_pmu_cancel_txn(struct pmu *pmu) |
1531 | { |
1532 | struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); |
1533 | unsigned int txn_flags; |
1534 | |
1535 | WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ |
1536 | |
1537 | txn_flags = cpuhw->txn_flags; |
1538 | cpuhw->txn_flags = 0; |
1539 | if (txn_flags & ~PERF_PMU_TXN_ADD) |
1540 | return; |
1541 | |
1542 | perf_pmu_enable(pmu); |
1543 | } |
1544 | |
1545 | /* |
1546 | * Commit group events scheduling transaction |
1547 | * Perform the group schedulability test as a whole |
1548 | * Return 0 if success |
1549 | */ |
1550 | static int sparc_pmu_commit_txn(struct pmu *pmu) |
1551 | { |
1552 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
1553 | int n; |
1554 | |
1555 | if (!sparc_pmu) |
1556 | return -EINVAL; |
1557 | |
1558 | WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ |
1559 | |
1560 | if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) { |
1561 | cpuc->txn_flags = 0; |
1562 | return 0; |
1563 | } |
1564 | |
1565 | n = cpuc->n_events; |
1566 | if (check_excludes(evts: cpuc->event, n_prev: 0, n_new: n)) |
1567 | return -EINVAL; |
1568 | if (sparc_check_constraints(evts: cpuc->event, events: cpuc->events, n_ev: n)) |
1569 | return -EAGAIN; |
1570 | |
1571 | cpuc->txn_flags = 0; |
1572 | perf_pmu_enable(pmu); |
1573 | return 0; |
1574 | } |
1575 | |
1576 | static struct pmu pmu = { |
1577 | .pmu_enable = sparc_pmu_enable, |
1578 | .pmu_disable = sparc_pmu_disable, |
1579 | .event_init = sparc_pmu_event_init, |
1580 | .add = sparc_pmu_add, |
1581 | .del = sparc_pmu_del, |
1582 | .start = sparc_pmu_start, |
1583 | .stop = sparc_pmu_stop, |
1584 | .read = sparc_pmu_read, |
1585 | .start_txn = sparc_pmu_start_txn, |
1586 | .cancel_txn = sparc_pmu_cancel_txn, |
1587 | .commit_txn = sparc_pmu_commit_txn, |
1588 | }; |
1589 | |
1590 | void perf_event_print_debug(void) |
1591 | { |
1592 | unsigned long flags; |
1593 | int cpu, i; |
1594 | |
1595 | if (!sparc_pmu) |
1596 | return; |
1597 | |
1598 | local_irq_save(flags); |
1599 | |
1600 | cpu = smp_processor_id(); |
1601 | |
1602 | pr_info("\n" ); |
1603 | for (i = 0; i < sparc_pmu->num_pcrs; i++) |
1604 | pr_info("CPU#%d: PCR%d[%016llx]\n" , |
1605 | cpu, i, pcr_ops->read_pcr(i)); |
1606 | for (i = 0; i < sparc_pmu->num_pic_regs; i++) |
1607 | pr_info("CPU#%d: PIC%d[%016llx]\n" , |
1608 | cpu, i, pcr_ops->read_pic(i)); |
1609 | |
1610 | local_irq_restore(flags); |
1611 | } |
1612 | |
1613 | static int __kprobes perf_event_nmi_handler(struct notifier_block *self, |
1614 | unsigned long cmd, void *__args) |
1615 | { |
1616 | struct die_args *args = __args; |
1617 | struct perf_sample_data data; |
1618 | struct cpu_hw_events *cpuc; |
1619 | struct pt_regs *regs; |
1620 | u64 finish_clock; |
1621 | u64 start_clock; |
1622 | int i; |
1623 | |
1624 | if (!atomic_read(v: &active_events)) |
1625 | return NOTIFY_DONE; |
1626 | |
1627 | switch (cmd) { |
1628 | case DIE_NMI: |
1629 | break; |
1630 | |
1631 | default: |
1632 | return NOTIFY_DONE; |
1633 | } |
1634 | |
1635 | start_clock = sched_clock(); |
1636 | |
1637 | regs = args->regs; |
1638 | |
1639 | cpuc = this_cpu_ptr(&cpu_hw_events); |
1640 | |
1641 | /* If the PMU has the TOE IRQ enable bits, we need to do a |
1642 | * dummy write to the %pcr to clear the overflow bits and thus |
1643 | * the interrupt. |
1644 | * |
1645 | * Do this before we peek at the counters to determine |
1646 | * overflow so we don't lose any events. |
1647 | */ |
1648 | if (sparc_pmu->irq_bit && |
1649 | sparc_pmu->num_pcrs == 1) |
1650 | pcr_ops->write_pcr(0, cpuc->pcr[0]); |
1651 | |
1652 | for (i = 0; i < cpuc->n_events; i++) { |
1653 | struct perf_event *event = cpuc->event[i]; |
1654 | int idx = cpuc->current_idx[i]; |
1655 | struct hw_perf_event *hwc; |
1656 | u64 val; |
1657 | |
1658 | if (sparc_pmu->irq_bit && |
1659 | sparc_pmu->num_pcrs > 1) |
1660 | pcr_ops->write_pcr(idx, cpuc->pcr[idx]); |
1661 | |
1662 | hwc = &event->hw; |
1663 | val = sparc_perf_event_update(event, hwc, idx); |
1664 | if (val & (1ULL << 31)) |
1665 | continue; |
1666 | |
1667 | perf_sample_data_init(data: &data, addr: 0, period: hwc->last_period); |
1668 | if (!sparc_perf_event_set_period(event, hwc, idx)) |
1669 | continue; |
1670 | |
1671 | if (perf_event_overflow(event, data: &data, regs)) |
1672 | sparc_pmu_stop(event, flags: 0); |
1673 | } |
1674 | |
1675 | finish_clock = sched_clock(); |
1676 | |
1677 | perf_sample_event_took(sample_len_ns: finish_clock - start_clock); |
1678 | |
1679 | return NOTIFY_STOP; |
1680 | } |
1681 | |
1682 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { |
1683 | .notifier_call = perf_event_nmi_handler, |
1684 | }; |
1685 | |
1686 | static bool __init supported_pmu(void) |
1687 | { |
1688 | if (!strcmp(sparc_pmu_type, "ultra3" ) || |
1689 | !strcmp(sparc_pmu_type, "ultra3+" ) || |
1690 | !strcmp(sparc_pmu_type, "ultra3i" ) || |
1691 | !strcmp(sparc_pmu_type, "ultra4+" )) { |
1692 | sparc_pmu = &ultra3_pmu; |
1693 | return true; |
1694 | } |
1695 | if (!strcmp(sparc_pmu_type, "niagara" )) { |
1696 | sparc_pmu = &niagara1_pmu; |
1697 | return true; |
1698 | } |
1699 | if (!strcmp(sparc_pmu_type, "niagara2" ) || |
1700 | !strcmp(sparc_pmu_type, "niagara3" )) { |
1701 | sparc_pmu = &niagara2_pmu; |
1702 | return true; |
1703 | } |
1704 | if (!strcmp(sparc_pmu_type, "niagara4" ) || |
1705 | !strcmp(sparc_pmu_type, "niagara5" )) { |
1706 | sparc_pmu = &niagara4_pmu; |
1707 | return true; |
1708 | } |
1709 | if (!strcmp(sparc_pmu_type, "sparc-m7" )) { |
1710 | sparc_pmu = &sparc_m7_pmu; |
1711 | return true; |
1712 | } |
1713 | return false; |
1714 | } |
1715 | |
1716 | static int __init init_hw_perf_events(void) |
1717 | { |
1718 | int err; |
1719 | |
1720 | pr_info("Performance events: " ); |
1721 | |
1722 | err = pcr_arch_init(); |
1723 | if (err || !supported_pmu()) { |
1724 | pr_cont("No support for PMU type '%s'\n" , sparc_pmu_type); |
1725 | return 0; |
1726 | } |
1727 | |
1728 | pr_cont("Supported PMU type is '%s'\n" , sparc_pmu_type); |
1729 | |
1730 | perf_pmu_register(pmu: &pmu, name: "cpu" , type: PERF_TYPE_RAW); |
1731 | register_die_notifier(nb: &perf_event_nmi_notifier); |
1732 | |
1733 | return 0; |
1734 | } |
1735 | pure_initcall(init_hw_perf_events); |
1736 | |
1737 | void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, |
1738 | struct pt_regs *regs) |
1739 | { |
1740 | unsigned long ksp, fp; |
1741 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1742 | int graph = 0; |
1743 | #endif |
1744 | |
1745 | stack_trace_flush(); |
1746 | |
1747 | perf_callchain_store(ctx: entry, ip: regs->tpc); |
1748 | |
1749 | ksp = regs->u_regs[UREG_I6]; |
1750 | fp = ksp + STACK_BIAS; |
1751 | do { |
1752 | struct sparc_stackf *sf; |
1753 | struct pt_regs *regs; |
1754 | unsigned long pc; |
1755 | |
1756 | if (!kstack_valid(current_thread_info(), sp: fp)) |
1757 | break; |
1758 | |
1759 | sf = (struct sparc_stackf *) fp; |
1760 | regs = (struct pt_regs *) (sf + 1); |
1761 | |
1762 | if (kstack_is_trap_frame(current_thread_info(), regs)) { |
1763 | if (user_mode(regs)) |
1764 | break; |
1765 | pc = regs->tpc; |
1766 | fp = regs->u_regs[UREG_I6] + STACK_BIAS; |
1767 | } else { |
1768 | pc = sf->callers_pc; |
1769 | fp = (unsigned long)sf->fp + STACK_BIAS; |
1770 | } |
1771 | perf_callchain_store(ctx: entry, ip: pc); |
1772 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1773 | if ((pc + 8UL) == (unsigned long) &return_to_handler) { |
1774 | struct ftrace_ret_stack *ret_stack; |
1775 | ret_stack = ftrace_graph_get_ret_stack(current, |
1776 | idx: graph); |
1777 | if (ret_stack) { |
1778 | pc = ret_stack->ret; |
1779 | perf_callchain_store(ctx: entry, ip: pc); |
1780 | graph++; |
1781 | } |
1782 | } |
1783 | #endif |
1784 | } while (entry->nr < entry->max_stack); |
1785 | } |
1786 | |
1787 | static inline int |
1788 | valid_user_frame(const void __user *fp, unsigned long size) |
1789 | { |
1790 | /* addresses should be at least 4-byte aligned */ |
1791 | if (((unsigned long) fp) & 3) |
1792 | return 0; |
1793 | |
1794 | return (__range_not_ok(fp, size, TASK_SIZE) == 0); |
1795 | } |
1796 | |
1797 | static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, |
1798 | struct pt_regs *regs) |
1799 | { |
1800 | unsigned long ufp; |
1801 | |
1802 | ufp = regs->u_regs[UREG_FP] + STACK_BIAS; |
1803 | do { |
1804 | struct sparc_stackf __user *usf; |
1805 | struct sparc_stackf sf; |
1806 | unsigned long pc; |
1807 | |
1808 | usf = (struct sparc_stackf __user *)ufp; |
1809 | if (!valid_user_frame(fp: usf, size: sizeof(sf))) |
1810 | break; |
1811 | |
1812 | if (__copy_from_user_inatomic(to: &sf, from: usf, n: sizeof(sf))) |
1813 | break; |
1814 | |
1815 | pc = sf.callers_pc; |
1816 | ufp = (unsigned long)sf.fp + STACK_BIAS; |
1817 | perf_callchain_store(ctx: entry, ip: pc); |
1818 | } while (entry->nr < entry->max_stack); |
1819 | } |
1820 | |
1821 | static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, |
1822 | struct pt_regs *regs) |
1823 | { |
1824 | unsigned long ufp; |
1825 | |
1826 | ufp = regs->u_regs[UREG_FP] & 0xffffffffUL; |
1827 | do { |
1828 | unsigned long pc; |
1829 | |
1830 | if (thread32_stack_is_64bit(ufp)) { |
1831 | struct sparc_stackf __user *usf; |
1832 | struct sparc_stackf sf; |
1833 | |
1834 | ufp += STACK_BIAS; |
1835 | usf = (struct sparc_stackf __user *)ufp; |
1836 | if (__copy_from_user_inatomic(to: &sf, from: usf, n: sizeof(sf))) |
1837 | break; |
1838 | pc = sf.callers_pc & 0xffffffff; |
1839 | ufp = ((unsigned long) sf.fp) & 0xffffffff; |
1840 | } else { |
1841 | struct sparc_stackf32 __user *usf; |
1842 | struct sparc_stackf32 sf; |
1843 | usf = (struct sparc_stackf32 __user *)ufp; |
1844 | if (__copy_from_user_inatomic(to: &sf, from: usf, n: sizeof(sf))) |
1845 | break; |
1846 | pc = sf.callers_pc; |
1847 | ufp = (unsigned long)sf.fp; |
1848 | } |
1849 | perf_callchain_store(ctx: entry, ip: pc); |
1850 | } while (entry->nr < entry->max_stack); |
1851 | } |
1852 | |
1853 | void |
1854 | perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) |
1855 | { |
1856 | u64 saved_fault_address = current_thread_info()->fault_address; |
1857 | u8 saved_fault_code = get_thread_fault_code(); |
1858 | |
1859 | perf_callchain_store(ctx: entry, ip: regs->tpc); |
1860 | |
1861 | if (!current->mm) |
1862 | return; |
1863 | |
1864 | flushw_user(); |
1865 | |
1866 | pagefault_disable(); |
1867 | |
1868 | if (test_thread_flag(TIF_32BIT)) |
1869 | perf_callchain_user_32(entry, regs); |
1870 | else |
1871 | perf_callchain_user_64(entry, regs); |
1872 | |
1873 | pagefault_enable(); |
1874 | |
1875 | set_thread_fault_code(saved_fault_code); |
1876 | current_thread_info()->fault_address = saved_fault_address; |
1877 | } |
1878 | |