1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Linux performance counter support for LoongArch. |
4 | * |
5 | * Copyright (C) 2022 Loongson Technology Corporation Limited |
6 | * |
7 | * Derived from MIPS: |
8 | * Copyright (C) 2010 MIPS Technologies, Inc. |
9 | * Copyright (C) 2011 Cavium Networks, Inc. |
10 | * Author: Deng-Cheng Zhu |
11 | */ |
12 | |
13 | #include <linux/cpumask.h> |
14 | #include <linux/interrupt.h> |
15 | #include <linux/smp.h> |
16 | #include <linux/kernel.h> |
17 | #include <linux/perf_event.h> |
18 | #include <linux/uaccess.h> |
19 | #include <linux/sched/task_stack.h> |
20 | |
21 | #include <asm/irq.h> |
22 | #include <asm/irq_regs.h> |
23 | #include <asm/stacktrace.h> |
24 | #include <asm/unwind.h> |
25 | |
26 | /* |
27 | * Get the return address for a single stackframe and return a pointer to the |
28 | * next frame tail. |
29 | */ |
30 | static unsigned long |
31 | user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) |
32 | { |
33 | unsigned long err; |
34 | unsigned long __user *user_frame_tail; |
35 | struct stack_frame buftail; |
36 | |
37 | user_frame_tail = (unsigned long __user *)(fp - sizeof(struct stack_frame)); |
38 | |
39 | /* Also check accessibility of one struct frame_tail beyond */ |
40 | if (!access_ok(user_frame_tail, sizeof(buftail))) |
41 | return 0; |
42 | |
43 | pagefault_disable(); |
44 | err = __copy_from_user_inatomic(to: &buftail, from: user_frame_tail, n: sizeof(buftail)); |
45 | pagefault_enable(); |
46 | |
47 | if (err || (unsigned long)user_frame_tail >= buftail.fp) |
48 | return 0; |
49 | |
50 | perf_callchain_store(ctx: entry, ip: buftail.ra); |
51 | |
52 | return buftail.fp; |
53 | } |
54 | |
55 | void perf_callchain_user(struct perf_callchain_entry_ctx *entry, |
56 | struct pt_regs *regs) |
57 | { |
58 | unsigned long fp; |
59 | |
60 | if (perf_guest_state()) { |
61 | /* We don't support guest os callchain now */ |
62 | return; |
63 | } |
64 | |
65 | perf_callchain_store(ctx: entry, ip: regs->csr_era); |
66 | |
67 | fp = regs->regs[22]; |
68 | |
69 | while (entry->nr < entry->max_stack && fp && !((unsigned long)fp & 0xf)) |
70 | fp = user_backtrace(entry, fp); |
71 | } |
72 | |
73 | void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, |
74 | struct pt_regs *regs) |
75 | { |
76 | struct unwind_state state; |
77 | unsigned long addr; |
78 | |
79 | for (unwind_start(&state, current, regs); |
80 | !unwind_done(state: &state); unwind_next_frame(state: &state)) { |
81 | addr = unwind_get_return_address(state: &state); |
82 | if (!addr || perf_callchain_store(ctx: entry, ip: addr)) |
83 | return; |
84 | } |
85 | } |
86 | |
87 | #define LOONGARCH_MAX_HWEVENTS 32 |
88 | |
89 | struct cpu_hw_events { |
90 | /* Array of events on this cpu. */ |
91 | struct perf_event *events[LOONGARCH_MAX_HWEVENTS]; |
92 | |
93 | /* |
94 | * Set the bit (indexed by the counter number) when the counter |
95 | * is used for an event. |
96 | */ |
97 | unsigned long used_mask[BITS_TO_LONGS(LOONGARCH_MAX_HWEVENTS)]; |
98 | |
99 | /* |
100 | * Software copy of the control register for each performance counter. |
101 | */ |
102 | unsigned int saved_ctrl[LOONGARCH_MAX_HWEVENTS]; |
103 | }; |
104 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { |
105 | .saved_ctrl = {0}, |
106 | }; |
107 | |
108 | /* The description of LoongArch performance events. */ |
109 | struct loongarch_perf_event { |
110 | unsigned int event_id; |
111 | }; |
112 | |
113 | static struct loongarch_perf_event raw_event; |
114 | static DEFINE_MUTEX(raw_event_mutex); |
115 | |
116 | #define C(x) PERF_COUNT_HW_CACHE_##x |
117 | #define HW_OP_UNSUPPORTED 0xffffffff |
118 | #define CACHE_OP_UNSUPPORTED 0xffffffff |
119 | |
120 | #define PERF_MAP_ALL_UNSUPPORTED \ |
121 | [0 ... PERF_COUNT_HW_MAX - 1] = {HW_OP_UNSUPPORTED} |
122 | |
123 | #define PERF_CACHE_MAP_ALL_UNSUPPORTED \ |
124 | [0 ... C(MAX) - 1] = { \ |
125 | [0 ... C(OP_MAX) - 1] = { \ |
126 | [0 ... C(RESULT_MAX) - 1] = {CACHE_OP_UNSUPPORTED}, \ |
127 | }, \ |
128 | } |
129 | |
130 | struct loongarch_pmu { |
131 | u64 max_period; |
132 | u64 valid_count; |
133 | u64 overflow; |
134 | const char *name; |
135 | unsigned int num_counters; |
136 | u64 (*read_counter)(unsigned int idx); |
137 | void (*write_counter)(unsigned int idx, u64 val); |
138 | const struct loongarch_perf_event *(*map_raw_event)(u64 config); |
139 | const struct loongarch_perf_event (*general_event_map)[PERF_COUNT_HW_MAX]; |
140 | const struct loongarch_perf_event (*cache_event_map) |
141 | [PERF_COUNT_HW_CACHE_MAX] |
142 | [PERF_COUNT_HW_CACHE_OP_MAX] |
143 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
144 | }; |
145 | |
146 | static struct loongarch_pmu loongarch_pmu; |
147 | |
148 | #define M_PERFCTL_EVENT(event) (event & CSR_PERFCTRL_EVENT) |
149 | |
150 | #define M_PERFCTL_COUNT_EVENT_WHENEVER (CSR_PERFCTRL_PLV0 | \ |
151 | CSR_PERFCTRL_PLV1 | \ |
152 | CSR_PERFCTRL_PLV2 | \ |
153 | CSR_PERFCTRL_PLV3 | \ |
154 | CSR_PERFCTRL_IE) |
155 | |
156 | #define M_PERFCTL_CONFIG_MASK 0x1f0000 |
157 | |
158 | static void pause_local_counters(void); |
159 | static void resume_local_counters(void); |
160 | |
161 | static u64 loongarch_pmu_read_counter(unsigned int idx) |
162 | { |
163 | u64 val = -1; |
164 | |
165 | switch (idx) { |
166 | case 0: |
167 | val = read_csr_perfcntr0(); |
168 | break; |
169 | case 1: |
170 | val = read_csr_perfcntr1(); |
171 | break; |
172 | case 2: |
173 | val = read_csr_perfcntr2(); |
174 | break; |
175 | case 3: |
176 | val = read_csr_perfcntr3(); |
177 | break; |
178 | default: |
179 | WARN_ONCE(1, "Invalid performance counter number (%d)\n" , idx); |
180 | return 0; |
181 | } |
182 | |
183 | return val; |
184 | } |
185 | |
186 | static void loongarch_pmu_write_counter(unsigned int idx, u64 val) |
187 | { |
188 | switch (idx) { |
189 | case 0: |
190 | write_csr_perfcntr0(val); |
191 | return; |
192 | case 1: |
193 | write_csr_perfcntr1(val); |
194 | return; |
195 | case 2: |
196 | write_csr_perfcntr2(val); |
197 | return; |
198 | case 3: |
199 | write_csr_perfcntr3(val); |
200 | return; |
201 | default: |
202 | WARN_ONCE(1, "Invalid performance counter number (%d)\n" , idx); |
203 | return; |
204 | } |
205 | } |
206 | |
207 | static unsigned int loongarch_pmu_read_control(unsigned int idx) |
208 | { |
209 | unsigned int val = -1; |
210 | |
211 | switch (idx) { |
212 | case 0: |
213 | val = read_csr_perfctrl0(); |
214 | break; |
215 | case 1: |
216 | val = read_csr_perfctrl1(); |
217 | break; |
218 | case 2: |
219 | val = read_csr_perfctrl2(); |
220 | break; |
221 | case 3: |
222 | val = read_csr_perfctrl3(); |
223 | break; |
224 | default: |
225 | WARN_ONCE(1, "Invalid performance counter number (%d)\n" , idx); |
226 | return 0; |
227 | } |
228 | |
229 | return val; |
230 | } |
231 | |
232 | static void loongarch_pmu_write_control(unsigned int idx, unsigned int val) |
233 | { |
234 | switch (idx) { |
235 | case 0: |
236 | write_csr_perfctrl0(val); |
237 | return; |
238 | case 1: |
239 | write_csr_perfctrl1(val); |
240 | return; |
241 | case 2: |
242 | write_csr_perfctrl2(val); |
243 | return; |
244 | case 3: |
245 | write_csr_perfctrl3(val); |
246 | return; |
247 | default: |
248 | WARN_ONCE(1, "Invalid performance counter number (%d)\n" , idx); |
249 | return; |
250 | } |
251 | } |
252 | |
253 | static int loongarch_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) |
254 | { |
255 | int i; |
256 | |
257 | for (i = 0; i < loongarch_pmu.num_counters; i++) { |
258 | if (!test_and_set_bit(nr: i, addr: cpuc->used_mask)) |
259 | return i; |
260 | } |
261 | |
262 | return -EAGAIN; |
263 | } |
264 | |
265 | static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx) |
266 | { |
267 | unsigned int cpu; |
268 | struct perf_event *event = container_of(evt, struct perf_event, hw); |
269 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
270 | |
271 | WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); |
272 | |
273 | /* Make sure interrupt enabled. */ |
274 | cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base) | |
275 | (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE; |
276 | |
277 | cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); |
278 | |
279 | /* |
280 | * We do not actually let the counter run. Leave it until start(). |
281 | */ |
282 | pr_debug("Enabling perf counter for CPU%d\n" , cpu); |
283 | } |
284 | |
285 | static void loongarch_pmu_disable_event(int idx) |
286 | { |
287 | unsigned long flags; |
288 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
289 | |
290 | WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); |
291 | |
292 | local_irq_save(flags); |
293 | cpuc->saved_ctrl[idx] = loongarch_pmu_read_control(idx) & |
294 | ~M_PERFCTL_COUNT_EVENT_WHENEVER; |
295 | loongarch_pmu_write_control(idx, val: cpuc->saved_ctrl[idx]); |
296 | local_irq_restore(flags); |
297 | } |
298 | |
299 | static int loongarch_pmu_event_set_period(struct perf_event *event, |
300 | struct hw_perf_event *hwc, |
301 | int idx) |
302 | { |
303 | int ret = 0; |
304 | u64 left = local64_read(&hwc->period_left); |
305 | u64 period = hwc->sample_period; |
306 | |
307 | if (unlikely((left + period) & (1ULL << 63))) { |
308 | /* left underflowed by more than period. */ |
309 | left = period; |
310 | local64_set(&hwc->period_left, left); |
311 | hwc->last_period = period; |
312 | ret = 1; |
313 | } else if (unlikely((left + period) <= period)) { |
314 | /* left underflowed by less than period. */ |
315 | left += period; |
316 | local64_set(&hwc->period_left, left); |
317 | hwc->last_period = period; |
318 | ret = 1; |
319 | } |
320 | |
321 | if (left > loongarch_pmu.max_period) { |
322 | left = loongarch_pmu.max_period; |
323 | local64_set(&hwc->period_left, left); |
324 | } |
325 | |
326 | local64_set(&hwc->prev_count, loongarch_pmu.overflow - left); |
327 | |
328 | loongarch_pmu.write_counter(idx, loongarch_pmu.overflow - left); |
329 | |
330 | perf_event_update_userpage(event); |
331 | |
332 | return ret; |
333 | } |
334 | |
335 | static void loongarch_pmu_event_update(struct perf_event *event, |
336 | struct hw_perf_event *hwc, |
337 | int idx) |
338 | { |
339 | u64 delta; |
340 | u64 prev_raw_count, new_raw_count; |
341 | |
342 | again: |
343 | prev_raw_count = local64_read(&hwc->prev_count); |
344 | new_raw_count = loongarch_pmu.read_counter(idx); |
345 | |
346 | if (local64_cmpxchg(l: &hwc->prev_count, old: prev_raw_count, |
347 | new: new_raw_count) != prev_raw_count) |
348 | goto again; |
349 | |
350 | delta = new_raw_count - prev_raw_count; |
351 | |
352 | local64_add(delta, &event->count); |
353 | local64_sub(delta, &hwc->period_left); |
354 | } |
355 | |
356 | static void loongarch_pmu_start(struct perf_event *event, int flags) |
357 | { |
358 | struct hw_perf_event *hwc = &event->hw; |
359 | |
360 | if (flags & PERF_EF_RELOAD) |
361 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); |
362 | |
363 | hwc->state = 0; |
364 | |
365 | /* Set the period for the event. */ |
366 | loongarch_pmu_event_set_period(event, hwc, idx: hwc->idx); |
367 | |
368 | /* Enable the event. */ |
369 | loongarch_pmu_enable_event(evt: hwc, idx: hwc->idx); |
370 | } |
371 | |
372 | static void loongarch_pmu_stop(struct perf_event *event, int flags) |
373 | { |
374 | struct hw_perf_event *hwc = &event->hw; |
375 | |
376 | if (!(hwc->state & PERF_HES_STOPPED)) { |
377 | /* We are working on a local event. */ |
378 | loongarch_pmu_disable_event(idx: hwc->idx); |
379 | barrier(); |
380 | loongarch_pmu_event_update(event, hwc, idx: hwc->idx); |
381 | hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; |
382 | } |
383 | } |
384 | |
385 | static int loongarch_pmu_add(struct perf_event *event, int flags) |
386 | { |
387 | int idx, err = 0; |
388 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
389 | struct hw_perf_event *hwc = &event->hw; |
390 | |
391 | perf_pmu_disable(pmu: event->pmu); |
392 | |
393 | /* To look for a free counter for this event. */ |
394 | idx = loongarch_pmu_alloc_counter(cpuc, hwc); |
395 | if (idx < 0) { |
396 | err = idx; |
397 | goto out; |
398 | } |
399 | |
400 | /* |
401 | * If there is an event in the counter we are going to use then |
402 | * make sure it is disabled. |
403 | */ |
404 | event->hw.idx = idx; |
405 | loongarch_pmu_disable_event(idx); |
406 | cpuc->events[idx] = event; |
407 | |
408 | hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; |
409 | if (flags & PERF_EF_START) |
410 | loongarch_pmu_start(event, PERF_EF_RELOAD); |
411 | |
412 | /* Propagate our changes to the userspace mapping. */ |
413 | perf_event_update_userpage(event); |
414 | |
415 | out: |
416 | perf_pmu_enable(pmu: event->pmu); |
417 | return err; |
418 | } |
419 | |
420 | static void loongarch_pmu_del(struct perf_event *event, int flags) |
421 | { |
422 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
423 | struct hw_perf_event *hwc = &event->hw; |
424 | int idx = hwc->idx; |
425 | |
426 | WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); |
427 | |
428 | loongarch_pmu_stop(event, PERF_EF_UPDATE); |
429 | cpuc->events[idx] = NULL; |
430 | clear_bit(nr: idx, addr: cpuc->used_mask); |
431 | |
432 | perf_event_update_userpage(event); |
433 | } |
434 | |
435 | static void loongarch_pmu_read(struct perf_event *event) |
436 | { |
437 | struct hw_perf_event *hwc = &event->hw; |
438 | |
439 | /* Don't read disabled counters! */ |
440 | if (hwc->idx < 0) |
441 | return; |
442 | |
443 | loongarch_pmu_event_update(event, hwc, idx: hwc->idx); |
444 | } |
445 | |
446 | static void loongarch_pmu_enable(struct pmu *pmu) |
447 | { |
448 | resume_local_counters(); |
449 | } |
450 | |
451 | static void loongarch_pmu_disable(struct pmu *pmu) |
452 | { |
453 | pause_local_counters(); |
454 | } |
455 | |
456 | static DEFINE_MUTEX(pmu_reserve_mutex); |
457 | static atomic_t active_events = ATOMIC_INIT(0); |
458 | |
459 | static int get_pmc_irq(void) |
460 | { |
461 | struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); |
462 | |
463 | if (d) |
464 | return irq_create_mapping(d, INT_PCOV); |
465 | |
466 | return -EINVAL; |
467 | } |
468 | |
469 | static void reset_counters(void *arg); |
470 | static int __hw_perf_event_init(struct perf_event *event); |
471 | |
472 | static void hw_perf_event_destroy(struct perf_event *event) |
473 | { |
474 | if (atomic_dec_and_mutex_lock(cnt: &active_events, lock: &pmu_reserve_mutex)) { |
475 | on_each_cpu(func: reset_counters, NULL, wait: 1); |
476 | free_irq(get_pmc_irq(), &loongarch_pmu); |
477 | mutex_unlock(lock: &pmu_reserve_mutex); |
478 | } |
479 | } |
480 | |
481 | static void handle_associated_event(struct cpu_hw_events *cpuc, int idx, |
482 | struct perf_sample_data *data, struct pt_regs *regs) |
483 | { |
484 | struct perf_event *event = cpuc->events[idx]; |
485 | struct hw_perf_event *hwc = &event->hw; |
486 | |
487 | loongarch_pmu_event_update(event, hwc, idx); |
488 | data->period = event->hw.last_period; |
489 | if (!loongarch_pmu_event_set_period(event, hwc, idx)) |
490 | return; |
491 | |
492 | if (perf_event_overflow(event, data, regs)) |
493 | loongarch_pmu_disable_event(idx); |
494 | } |
495 | |
496 | static irqreturn_t pmu_handle_irq(int irq, void *dev) |
497 | { |
498 | int n; |
499 | int handled = IRQ_NONE; |
500 | uint64_t counter; |
501 | struct pt_regs *regs; |
502 | struct perf_sample_data data; |
503 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
504 | |
505 | /* |
506 | * First we pause the local counters, so that when we are locked |
507 | * here, the counters are all paused. When it gets locked due to |
508 | * perf_disable(), the timer interrupt handler will be delayed. |
509 | * |
510 | * See also loongarch_pmu_start(). |
511 | */ |
512 | pause_local_counters(); |
513 | |
514 | regs = get_irq_regs(); |
515 | |
516 | perf_sample_data_init(data: &data, addr: 0, period: 0); |
517 | |
518 | for (n = 0; n < loongarch_pmu.num_counters; n++) { |
519 | if (test_bit(n, cpuc->used_mask)) { |
520 | counter = loongarch_pmu.read_counter(n); |
521 | if (counter & loongarch_pmu.overflow) { |
522 | handle_associated_event(cpuc, idx: n, data: &data, regs); |
523 | handled = IRQ_HANDLED; |
524 | } |
525 | } |
526 | } |
527 | |
528 | resume_local_counters(); |
529 | |
530 | /* |
531 | * Do all the work for the pending perf events. We can do this |
532 | * in here because the performance counter interrupt is a regular |
533 | * interrupt, not NMI. |
534 | */ |
535 | if (handled == IRQ_HANDLED) |
536 | irq_work_run(); |
537 | |
538 | return handled; |
539 | } |
540 | |
541 | static int loongarch_pmu_event_init(struct perf_event *event) |
542 | { |
543 | int r, irq; |
544 | unsigned long flags; |
545 | |
546 | /* does not support taken branch sampling */ |
547 | if (has_branch_stack(event)) |
548 | return -EOPNOTSUPP; |
549 | |
550 | switch (event->attr.type) { |
551 | case PERF_TYPE_RAW: |
552 | case PERF_TYPE_HARDWARE: |
553 | case PERF_TYPE_HW_CACHE: |
554 | break; |
555 | |
556 | default: |
557 | /* Init it to avoid false validate_group */ |
558 | event->hw.event_base = 0xffffffff; |
559 | return -ENOENT; |
560 | } |
561 | |
562 | if (event->cpu >= 0 && !cpu_online(cpu: event->cpu)) |
563 | return -ENODEV; |
564 | |
565 | irq = get_pmc_irq(); |
566 | flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED; |
567 | if (!atomic_inc_not_zero(v: &active_events)) { |
568 | mutex_lock(&pmu_reserve_mutex); |
569 | if (atomic_read(v: &active_events) == 0) { |
570 | r = request_irq(irq, handler: pmu_handle_irq, flags, name: "Perf_PMU" , dev: &loongarch_pmu); |
571 | if (r < 0) { |
572 | mutex_unlock(lock: &pmu_reserve_mutex); |
573 | pr_warn("PMU IRQ request failed\n" ); |
574 | return -ENODEV; |
575 | } |
576 | } |
577 | atomic_inc(v: &active_events); |
578 | mutex_unlock(lock: &pmu_reserve_mutex); |
579 | } |
580 | |
581 | return __hw_perf_event_init(event); |
582 | } |
583 | |
584 | static struct pmu pmu = { |
585 | .pmu_enable = loongarch_pmu_enable, |
586 | .pmu_disable = loongarch_pmu_disable, |
587 | .event_init = loongarch_pmu_event_init, |
588 | .add = loongarch_pmu_add, |
589 | .del = loongarch_pmu_del, |
590 | .start = loongarch_pmu_start, |
591 | .stop = loongarch_pmu_stop, |
592 | .read = loongarch_pmu_read, |
593 | }; |
594 | |
595 | static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev) |
596 | { |
597 | return M_PERFCTL_EVENT(pev->event_id); |
598 | } |
599 | |
600 | static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx) |
601 | { |
602 | const struct loongarch_perf_event *pev; |
603 | |
604 | pev = &(*loongarch_pmu.general_event_map)[idx]; |
605 | |
606 | if (pev->event_id == HW_OP_UNSUPPORTED) |
607 | return ERR_PTR(error: -ENOENT); |
608 | |
609 | return pev; |
610 | } |
611 | |
612 | static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 config) |
613 | { |
614 | unsigned int cache_type, cache_op, cache_result; |
615 | const struct loongarch_perf_event *pev; |
616 | |
617 | cache_type = (config >> 0) & 0xff; |
618 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) |
619 | return ERR_PTR(error: -EINVAL); |
620 | |
621 | cache_op = (config >> 8) & 0xff; |
622 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) |
623 | return ERR_PTR(error: -EINVAL); |
624 | |
625 | cache_result = (config >> 16) & 0xff; |
626 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) |
627 | return ERR_PTR(error: -EINVAL); |
628 | |
629 | pev = &((*loongarch_pmu.cache_event_map) |
630 | [cache_type] |
631 | [cache_op] |
632 | [cache_result]); |
633 | |
634 | if (pev->event_id == CACHE_OP_UNSUPPORTED) |
635 | return ERR_PTR(error: -ENOENT); |
636 | |
637 | return pev; |
638 | } |
639 | |
640 | static int validate_group(struct perf_event *event) |
641 | { |
642 | struct cpu_hw_events fake_cpuc; |
643 | struct perf_event *sibling, *leader = event->group_leader; |
644 | |
645 | memset(&fake_cpuc, 0, sizeof(fake_cpuc)); |
646 | |
647 | if (loongarch_pmu_alloc_counter(cpuc: &fake_cpuc, hwc: &leader->hw) < 0) |
648 | return -EINVAL; |
649 | |
650 | for_each_sibling_event(sibling, leader) { |
651 | if (loongarch_pmu_alloc_counter(cpuc: &fake_cpuc, hwc: &sibling->hw) < 0) |
652 | return -EINVAL; |
653 | } |
654 | |
655 | if (loongarch_pmu_alloc_counter(cpuc: &fake_cpuc, hwc: &event->hw) < 0) |
656 | return -EINVAL; |
657 | |
658 | return 0; |
659 | } |
660 | |
661 | static void reset_counters(void *arg) |
662 | { |
663 | int n; |
664 | int counters = loongarch_pmu.num_counters; |
665 | |
666 | for (n = 0; n < counters; n++) { |
667 | loongarch_pmu_write_control(idx: n, val: 0); |
668 | loongarch_pmu.write_counter(n, 0); |
669 | } |
670 | } |
671 | |
672 | static const struct loongarch_perf_event loongson_event_map[PERF_COUNT_HW_MAX] = { |
673 | PERF_MAP_ALL_UNSUPPORTED, |
674 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x00 }, |
675 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01 }, |
676 | [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x08 }, |
677 | [PERF_COUNT_HW_CACHE_MISSES] = { 0x09 }, |
678 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02 }, |
679 | [PERF_COUNT_HW_BRANCH_MISSES] = { 0x03 }, |
680 | }; |
681 | |
682 | static const struct loongarch_perf_event loongson_cache_map |
683 | [PERF_COUNT_HW_CACHE_MAX] |
684 | [PERF_COUNT_HW_CACHE_OP_MAX] |
685 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { |
686 | PERF_CACHE_MAP_ALL_UNSUPPORTED, |
687 | [C(L1D)] = { |
688 | /* |
689 | * Like some other architectures (e.g. ARM), the performance |
690 | * counters don't differentiate between read and write |
691 | * accesses/misses, so this isn't strictly correct, but it's the |
692 | * best we can do. Writes and reads get combined. |
693 | */ |
694 | [C(OP_READ)] = { |
695 | [C(RESULT_ACCESS)] = { 0x8 }, |
696 | [C(RESULT_MISS)] = { .event_id: 0x9 }, |
697 | }, |
698 | [C(OP_WRITE)] = { |
699 | [C(RESULT_ACCESS)] = { 0x8 }, |
700 | [C(RESULT_MISS)] = { .event_id: 0x9 }, |
701 | }, |
702 | [C(OP_PREFETCH)] = { |
703 | [C(RESULT_ACCESS)] = { 0xaa }, |
704 | [C(RESULT_MISS)] = { .event_id: 0xa9 }, |
705 | }, |
706 | }, |
707 | [C(L1I)] = { |
708 | [C(OP_READ)] = { |
709 | [C(RESULT_ACCESS)] = { 0x6 }, |
710 | [C(RESULT_MISS)] = { .event_id: 0x7 }, |
711 | }, |
712 | }, |
713 | [C(LL)] = { |
714 | [C(OP_READ)] = { |
715 | [C(RESULT_ACCESS)] = { .event_id: 0xc }, |
716 | [C(RESULT_MISS)] = { .event_id: 0xd }, |
717 | }, |
718 | [C(OP_WRITE)] = { |
719 | [C(RESULT_ACCESS)] = { .event_id: 0xc }, |
720 | [C(RESULT_MISS)] = { .event_id: 0xd }, |
721 | }, |
722 | }, |
723 | [C(ITLB)] = { |
724 | [C(OP_READ)] = { |
725 | [C(RESULT_MISS)] = { .event_id: 0x3b }, |
726 | }, |
727 | }, |
728 | [C(DTLB)] = { |
729 | [C(OP_READ)] = { |
730 | [C(RESULT_ACCESS)] = { .event_id: 0x4 }, |
731 | [C(RESULT_MISS)] = { .event_id: 0x3c }, |
732 | }, |
733 | [C(OP_WRITE)] = { |
734 | [C(RESULT_ACCESS)] = { 0x4 }, |
735 | [C(RESULT_MISS)] = { .event_id: 0x3c }, |
736 | }, |
737 | }, |
738 | [C(BPU)] = { |
739 | /* Using the same code for *HW_BRANCH* */ |
740 | [C(OP_READ)] = { |
741 | [C(RESULT_ACCESS)] = { .event_id: 0x02 }, |
742 | [C(RESULT_MISS)] = { .event_id: 0x03 }, |
743 | }, |
744 | }, |
745 | }; |
746 | |
747 | static int __hw_perf_event_init(struct perf_event *event) |
748 | { |
749 | int err; |
750 | struct hw_perf_event *hwc = &event->hw; |
751 | struct perf_event_attr *attr = &event->attr; |
752 | const struct loongarch_perf_event *pev; |
753 | |
754 | /* Returning LoongArch event descriptor for generic perf event. */ |
755 | if (PERF_TYPE_HARDWARE == event->attr.type) { |
756 | if (event->attr.config >= PERF_COUNT_HW_MAX) |
757 | return -EINVAL; |
758 | pev = loongarch_pmu_map_general_event(idx: event->attr.config); |
759 | } else if (PERF_TYPE_HW_CACHE == event->attr.type) { |
760 | pev = loongarch_pmu_map_cache_event(config: event->attr.config); |
761 | } else if (PERF_TYPE_RAW == event->attr.type) { |
762 | /* We are working on the global raw event. */ |
763 | mutex_lock(&raw_event_mutex); |
764 | pev = loongarch_pmu.map_raw_event(event->attr.config); |
765 | } else { |
766 | /* The event type is not (yet) supported. */ |
767 | return -EOPNOTSUPP; |
768 | } |
769 | |
770 | if (IS_ERR(ptr: pev)) { |
771 | if (PERF_TYPE_RAW == event->attr.type) |
772 | mutex_unlock(lock: &raw_event_mutex); |
773 | return PTR_ERR(ptr: pev); |
774 | } |
775 | |
776 | /* |
777 | * We allow max flexibility on how each individual counter shared |
778 | * by the single CPU operates (the mode exclusion and the range). |
779 | */ |
780 | hwc->config_base = CSR_PERFCTRL_IE; |
781 | |
782 | hwc->event_base = loongarch_pmu_perf_event_encode(pev); |
783 | if (PERF_TYPE_RAW == event->attr.type) |
784 | mutex_unlock(lock: &raw_event_mutex); |
785 | |
786 | if (!attr->exclude_user) { |
787 | hwc->config_base |= CSR_PERFCTRL_PLV3; |
788 | hwc->config_base |= CSR_PERFCTRL_PLV2; |
789 | } |
790 | if (!attr->exclude_kernel) { |
791 | hwc->config_base |= CSR_PERFCTRL_PLV0; |
792 | } |
793 | if (!attr->exclude_hv) { |
794 | hwc->config_base |= CSR_PERFCTRL_PLV1; |
795 | } |
796 | |
797 | hwc->config_base &= M_PERFCTL_CONFIG_MASK; |
798 | /* |
799 | * The event can belong to another cpu. We do not assign a local |
800 | * counter for it for now. |
801 | */ |
802 | hwc->idx = -1; |
803 | hwc->config = 0; |
804 | |
805 | if (!hwc->sample_period) { |
806 | hwc->sample_period = loongarch_pmu.max_period; |
807 | hwc->last_period = hwc->sample_period; |
808 | local64_set(&hwc->period_left, hwc->sample_period); |
809 | } |
810 | |
811 | err = 0; |
812 | if (event->group_leader != event) |
813 | err = validate_group(event); |
814 | |
815 | event->destroy = hw_perf_event_destroy; |
816 | |
817 | if (err) |
818 | event->destroy(event); |
819 | |
820 | return err; |
821 | } |
822 | |
823 | static void pause_local_counters(void) |
824 | { |
825 | unsigned long flags; |
826 | int ctr = loongarch_pmu.num_counters; |
827 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
828 | |
829 | local_irq_save(flags); |
830 | do { |
831 | ctr--; |
832 | cpuc->saved_ctrl[ctr] = loongarch_pmu_read_control(idx: ctr); |
833 | loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] & |
834 | ~M_PERFCTL_COUNT_EVENT_WHENEVER); |
835 | } while (ctr > 0); |
836 | local_irq_restore(flags); |
837 | } |
838 | |
839 | static void resume_local_counters(void) |
840 | { |
841 | int ctr = loongarch_pmu.num_counters; |
842 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
843 | |
844 | do { |
845 | ctr--; |
846 | loongarch_pmu_write_control(idx: ctr, val: cpuc->saved_ctrl[ctr]); |
847 | } while (ctr > 0); |
848 | } |
849 | |
850 | static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config) |
851 | { |
852 | raw_event.event_id = M_PERFCTL_EVENT(config); |
853 | |
854 | return &raw_event; |
855 | } |
856 | |
857 | static int __init init_hw_perf_events(void) |
858 | { |
859 | int counters; |
860 | |
861 | if (!cpu_has_pmp) |
862 | return -ENODEV; |
863 | |
864 | pr_info("Performance counters: " ); |
865 | counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1; |
866 | |
867 | loongarch_pmu.num_counters = counters; |
868 | loongarch_pmu.max_period = (1ULL << 63) - 1; |
869 | loongarch_pmu.valid_count = (1ULL << 63) - 1; |
870 | loongarch_pmu.overflow = 1ULL << 63; |
871 | loongarch_pmu.name = "loongarch/loongson64" ; |
872 | loongarch_pmu.read_counter = loongarch_pmu_read_counter; |
873 | loongarch_pmu.write_counter = loongarch_pmu_write_counter; |
874 | loongarch_pmu.map_raw_event = loongarch_pmu_map_raw_event; |
875 | loongarch_pmu.general_event_map = &loongson_event_map; |
876 | loongarch_pmu.cache_event_map = &loongson_cache_map; |
877 | |
878 | on_each_cpu(func: reset_counters, NULL, wait: 1); |
879 | |
880 | pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n" , |
881 | loongarch_pmu.name, counters, 64); |
882 | |
883 | perf_pmu_register(pmu: &pmu, name: "cpu" , type: PERF_TYPE_RAW); |
884 | |
885 | return 0; |
886 | } |
887 | early_initcall(init_hw_perf_events); |
888 | |