riscv_pmu_sbi.c source code [linux/drivers/perf/riscv_pmu_sbi.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* RISC-V performance counter support.
4	*
5	* Copyright (C) 2021 Western Digital Corporation or its affiliates.
6	*
7	* This code is based on ARM perf event code which is in turn based on
8	* sparc64 and x86 code.
9	*/
10
11	#define pr_fmt(fmt) "riscv-pmu-sbi: " fmt
12
13	#include <linux/mod_devicetable.h>
14	#include <linux/perf/riscv_pmu.h>
15	#include <linux/platform_device.h>
16	#include <linux/irq.h>
17	#include <linux/irqdomain.h>
18	#include <linux/of_irq.h>
19	#include <linux/of.h>
20	#include <linux/cpu_pm.h>
21	#include <linux/sched/clock.h>
22
23	#include <asm/errata_list.h>
24	#include <asm/sbi.h>
25	#include <asm/hwcap.h>
26
27	#define SYSCTL_NO_USER_ACCESS 0
28	#define SYSCTL_USER_ACCESS 1
29	#define SYSCTL_LEGACY 2
30
31	#define PERF_EVENT_FLAG_NO_USER_ACCESS BIT(SYSCTL_NO_USER_ACCESS)
32	#define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS)
33	#define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY)
34
35	PMU_FORMAT_ATTR(event, "config:0-47");
36	PMU_FORMAT_ATTR(firmware, "config:63");
37
38	static struct attribute *riscv_arch_formats_attr[] = {
39	&format_attr_event.attr,
40	&format_attr_firmware.attr,
41	NULL,
42	};
43
44	static struct attribute_group riscv_pmu_format_group = {
45	.name = "format",
46	.attrs = riscv_arch_formats_attr,
47	};
48
49	static const struct attribute_group *riscv_pmu_attr_groups[] = {
50	&riscv_pmu_format_group,
51	NULL,
52	};
53
54	/ Allow user mode access by default /
55	static int sysctl_perf_user_access __read_mostly = SYSCTL_USER_ACCESS;
56
57	/*
58	* RISC-V doesn't have heterogeneous harts yet. This need to be part of
59	* per_cpu in case of harts with different pmu counters
60	*/
61	static union sbi_pmu_ctr_info *pmu_ctr_list;
62	static bool riscv_pmu_use_irq;
63	static unsigned int riscv_pmu_irq_num;
64	static unsigned int riscv_pmu_irq;
65
66	/ Cache the available counters in a bitmask /
67	static unsigned long cmask;
68
69	struct sbi_pmu_event_data {
70	union {
71	union {
72	struct hw_gen_event {
73	uint32_t event_code:`16`;
74	uint32_t event_type:`4`;
75	uint32_t reserved:`12`;
76	} hw_gen_event;
77	struct hw_cache_event {
78	uint32_t result_id:`1`;
79	uint32_t op_id:`2`;
80	uint32_t cache_id:`13`;
81	uint32_t event_type:`4`;
82	uint32_t reserved:`12`;
83	} hw_cache_event;
84	};
85	uint32_t event_idx;
86	};
87	};
88
89	static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
90	[PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = {
91	SBI_PMU_HW_CPU_CYCLES,
92	SBI_PMU_EVENT_TYPE_HW, `0`}},
93	[PERF_COUNT_HW_INSTRUCTIONS] = {.hw_gen_event = {
94	SBI_PMU_HW_INSTRUCTIONS,
95	SBI_PMU_EVENT_TYPE_HW, `0`}},
96	[PERF_COUNT_HW_CACHE_REFERENCES] = {.hw_gen_event = {
97	SBI_PMU_HW_CACHE_REFERENCES,
98	SBI_PMU_EVENT_TYPE_HW, `0`}},
99	[PERF_COUNT_HW_CACHE_MISSES] = {.hw_gen_event = {
100	SBI_PMU_HW_CACHE_MISSES,
101	SBI_PMU_EVENT_TYPE_HW, `0`}},
102	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {.hw_gen_event = {
103	SBI_PMU_HW_BRANCH_INSTRUCTIONS,
104	SBI_PMU_EVENT_TYPE_HW, `0`}},
105	[PERF_COUNT_HW_BRANCH_MISSES] = {.hw_gen_event = {
106	SBI_PMU_HW_BRANCH_MISSES,
107	SBI_PMU_EVENT_TYPE_HW, `0`}},
108	[PERF_COUNT_HW_BUS_CYCLES] = {.hw_gen_event = {
109	SBI_PMU_HW_BUS_CYCLES,
110	SBI_PMU_EVENT_TYPE_HW, `0`}},
111	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {.hw_gen_event = {
112	SBI_PMU_HW_STALLED_CYCLES_FRONTEND,
113	SBI_PMU_EVENT_TYPE_HW, `0`}},
114	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {.hw_gen_event = {
115	SBI_PMU_HW_STALLED_CYCLES_BACKEND,
116	SBI_PMU_EVENT_TYPE_HW, `0`}},
117	[PERF_COUNT_HW_REF_CPU_CYCLES] = {.hw_gen_event = {
118	SBI_PMU_HW_REF_CPU_CYCLES,
119	SBI_PMU_EVENT_TYPE_HW, `0`}},
120	};
121
122	#define C(x) PERF_COUNT_HW_CACHE_##x
123	static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
124	[PERF_COUNT_HW_CACHE_OP_MAX]
125	[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
126	[C(L1D)] = {
127	[C(OP_READ)] = {
128	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
129	C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
130	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
131	C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
132	},
133	[C(OP_WRITE)] = {
134	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
135	C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
136	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
137	C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
138	},
139	[C(OP_PREFETCH)] = {
140	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
141	C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
142	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
143	C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
144	},
145	},
146	[C(L1I)] = {
147	[C(OP_READ)] = {
148	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
149	C(OP_READ), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
150	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), C(OP_READ),
151	C(L1I), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
152	},
153	[C(OP_WRITE)] = {
154	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
155	C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
156	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
157	C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
158	},
159	[C(OP_PREFETCH)] = {
160	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
161	C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
162	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
163	C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
164	},
165	},
166	[C(LL)] = {
167	[C(OP_READ)] = {
168	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
169	C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
170	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
171	C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
172	},
173	[C(OP_WRITE)] = {
174	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
175	C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
176	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
177	C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
178	},
179	[C(OP_PREFETCH)] = {
180	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
181	C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
182	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
183	C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
184	},
185	},
186	[C(DTLB)] = {
187	[C(OP_READ)] = {
188	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
189	C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
190	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
191	C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
192	},
193	[C(OP_WRITE)] = {
194	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
195	C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
196	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
197	C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
198	},
199	[C(OP_PREFETCH)] = {
200	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
201	C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
202	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
203	C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
204	},
205	},
206	[C(ITLB)] = {
207	[C(OP_READ)] = {
208	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
209	C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
210	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
211	C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
212	},
213	[C(OP_WRITE)] = {
214	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
215	C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
216	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
217	C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
218	},
219	[C(OP_PREFETCH)] = {
220	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
221	C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
222	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
223	C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
224	},
225	},
226	[C(BPU)] = {
227	[C(OP_READ)] = {
228	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
229	C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
230	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
231	C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
232	},
233	[C(OP_WRITE)] = {
234	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
235	C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
236	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
237	C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
238	},
239	[C(OP_PREFETCH)] = {
240	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
241	C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
242	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
243	C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
244	},
245	},
246	[C(NODE)] = {
247	[C(OP_READ)] = {
248	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
249	C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
250	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
251	C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
252	},
253	[C(OP_WRITE)] = {
254	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
255	C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
256	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
257	C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
258	},
259	[C(OP_PREFETCH)] = {
260	[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
261	C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
262	[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
263	C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, `0`}},
264	},
265	},
266	};
267
268	static int pmu_sbi_ctr_get_width(int idx)
269	{
270	return pmu_ctr_list[idx].width;
271	}
272
273	static bool pmu_sbi_ctr_is_fw(int cidx)
274	{
275	union sbi_pmu_ctr_info *info;
276
277	info = &pmu_ctr_list[cidx];
278	if (!info)
279	return false;
280
281	return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false;
282	}
283
284	/*
285	* Returns the counter width of a programmable counter and number of hardware
286	* counters. As we don't support heterogeneous CPUs yet, it is okay to just
287	* return the counter width of the first programmable counter.
288	*/
289	int riscv_pmu_get_hpm_info(u32 hw_ctr_width, u32 num_hw_ctr)
290	{
291	int i;
292	union sbi_pmu_ctr_info *info;
293	u32 hpm_width = `0`, hpm_count = `0`;
294
295	if (!cmask)
296	return -EINVAL;
297
298	for_each_set_bit(i, &cmask, RISCV_MAX_COUNTERS) {
299	info = &pmu_ctr_list[i];
300	if (!info)
301	continue;
302	if (!hpm_width && info->csr != CSR_CYCLE && info->csr != CSR_INSTRET)
303	hpm_width = info->width;
304	if (info->type == SBI_PMU_CTR_TYPE_HW)
305	hpm_count++;
306	}
307
308	*hw_ctr_width = hpm_width;
309	*num_hw_ctr = hpm_count;
310
311	return `0`;
312	}
313	EXPORT_SYMBOL_GPL(riscv_pmu_get_hpm_info);
314
315	static uint8_t pmu_sbi_csr_index(struct perf_event *event)
316	{
317	return pmu_ctr_list[event->hw.idx].csr - CSR_CYCLE;
318	}
319
320	static unsigned long pmu_sbi_get_filter_flags(struct perf_event *event)
321	{
322	unsigned long cflags = `0`;
323	bool guest_events = false;
324
325	if (event->attr.config1 & RISCV_PMU_CONFIG1_GUEST_EVENTS)
326	guest_events = true;
327	if (event->attr.exclude_kernel)
328	cflags \|= guest_events ? SBI_PMU_CFG_FLAG_SET_VSINH : SBI_PMU_CFG_FLAG_SET_SINH;
329	if (event->attr.exclude_user)
330	cflags \|= guest_events ? SBI_PMU_CFG_FLAG_SET_VUINH : SBI_PMU_CFG_FLAG_SET_UINH;
331	if (guest_events && event->attr.exclude_hv)
332	cflags \|= SBI_PMU_CFG_FLAG_SET_SINH;
333	if (event->attr.exclude_host)
334	cflags \|= SBI_PMU_CFG_FLAG_SET_UINH \| SBI_PMU_CFG_FLAG_SET_SINH;
335	if (event->attr.exclude_guest)
336	cflags \|= SBI_PMU_CFG_FLAG_SET_VSINH \| SBI_PMU_CFG_FLAG_SET_VUINH;
337
338	return cflags;
339	}
340
341	static int pmu_sbi_ctr_get_idx(struct perf_event *event)
342	{
343	struct hw_perf_event *hwc = &event->hw;
344	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
345	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
346	struct sbiret ret;
347	int idx;
348	uint64_t cbase = `0`, cmask = rvpmu->cmask;
349	unsigned long cflags = `0`;
350
351	cflags = pmu_sbi_get_filter_flags(event);
352
353	/*
354	* In legacy mode, we have to force the fixed counters for those events
355	* but not in the user access mode as we want to use the other counters
356	* that support sampling/filtering.
357	*/
358	if (hwc->flags & PERF_EVENT_FLAG_LEGACY) {
359	if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
360	cflags \|= SBI_PMU_CFG_FLAG_SKIP_MATCH;
361	cmask = `1`;
362	} else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) {
363	cflags \|= SBI_PMU_CFG_FLAG_SKIP_MATCH;
364	cmask = `1UL` << (CSR_INSTRET - CSR_CYCLE);
365	}
366	}
367
368	/ retrieve the available counter index /
369	#if defined(CONFIG_32BIT)
370	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
371	cmask, cflags, hwc->event_base, hwc->config,
372	hwc->config >> `32`);
373	#else
374	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
375	cmask, cflags, hwc->event_base, hwc->config, `0`);
376	#endif
377	if (ret.error) {
378	pr_debug("Not able to find a counter for event %lx config %llx\n",
379	hwc->event_base, hwc->config);
380	return sbi_err_map_linux_errno(ret.error);
381	}
382
383	idx = ret.value;
384	if (!test_bit(idx, &rvpmu->cmask) \|\| !pmu_ctr_list[idx].value)
385	return -ENOENT;
386
387	/ Additional sanity check for the counter id /
388	if (pmu_sbi_ctr_is_fw(cidx: idx)) {
389	if (!test_and_set_bit(nr: idx, addr: cpuc->used_fw_ctrs))
390	return idx;
391	} else {
392	if (!test_and_set_bit(nr: idx, addr: cpuc->used_hw_ctrs))
393	return idx;
394	}
395
396	return -ENOENT;
397	}
398
399	static void pmu_sbi_ctr_clear_idx(struct perf_event *event)
400	{
401
402	struct hw_perf_event *hwc = &event->hw;
403	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
404	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
405	int idx = hwc->idx;
406
407	if (pmu_sbi_ctr_is_fw(cidx: idx))
408	clear_bit(nr: idx, addr: cpuc->used_fw_ctrs);
409	else
410	clear_bit(nr: idx, addr: cpuc->used_hw_ctrs);
411	}
412
413	static int pmu_event_find_cache(u64 config)
414	{
415	unsigned int cache_type, cache_op, cache_result, ret;
416
417	cache_type = (config >> `0`) & `0xff`;
418	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
419	return -EINVAL;
420
421	cache_op = (config >> `8`) & `0xff`;
422	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
423	return -EINVAL;
424
425	cache_result = (config >> `16`) & `0xff`;
426	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
427	return -EINVAL;
428
429	ret = pmu_cache_event_map[cache_type][cache_op][cache_result].event_idx;
430
431	return ret;
432	}
433
434	static bool pmu_sbi_is_fw_event(struct perf_event *event)
435	{
436	u32 type = event->attr.type;
437	u64 config = event->attr.config;
438
439	if ((type == PERF_TYPE_RAW) && ((config >> `63`) == `1`))
440	return true;
441	else
442	return false;
443	}
444
445	static int pmu_sbi_event_map(struct perf_event event, u64 econfig)
446	{
447	u32 type = event->attr.type;
448	u64 config = event->attr.config;
449	int bSoftware;
450	u64 raw_config_val;
451	int ret;
452
453	switch (type) {
454	case PERF_TYPE_HARDWARE:
455	if (config >= PERF_COUNT_HW_MAX)
456	return -EINVAL;
457	ret = pmu_hw_event_map[event->attr.config].event_idx;
458	break;
459	case PERF_TYPE_HW_CACHE:
460	ret = pmu_event_find_cache(config);
461	break;
462	case PERF_TYPE_RAW:
463	/*
464	* As per SBI specification, the upper 16 bits must be unused for
465	* a raw event. Use the MSB (63b) to distinguish between hardware
466	* raw event and firmware events.
467	*/
468	bSoftware = config >> `63`;
469	raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK;
470	if (bSoftware) {
471	ret = (raw_config_val & `0xFFFF`) \|
472	(SBI_PMU_EVENT_TYPE_FW << `16`);
473	} else {
474	ret = RISCV_PMU_RAW_EVENT_IDX;
475	*econfig = raw_config_val;
476	}
477	break;
478	default:
479	ret = -EINVAL;
480	break;
481	}
482
483	return ret;
484	}
485
486	static u64 pmu_sbi_ctr_read(struct perf_event *event)
487	{
488	struct hw_perf_event *hwc = &event->hw;
489	int idx = hwc->idx;
490	struct sbiret ret;
491	union sbi_pmu_ctr_info info;
492	u64 val = `0`;
493
494	if (pmu_sbi_is_fw_event(event)) {
495	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
496	hwc->idx, `0`, `0`, `0`, `0`, `0`);
497	if (!ret.error)
498	val = ret.value;
499	} else {
500	info = pmu_ctr_list[idx];
501	val = riscv_pmu_ctr_read_csr(info.csr);
502	if (IS_ENABLED(CONFIG_32BIT))
503	val = ((u64)riscv_pmu_ctr_read_csr(info.csr + `0x80`)) << `31` \| val;
504	}
505
506	return val;
507	}
508
509	static void pmu_sbi_set_scounteren(void *arg)
510	{
511	struct perf_event event = (struct* perf_event *)arg;
512
513	if (event->hw.idx != -`1`)
514	csr_write(CSR_SCOUNTEREN,
515	csr_read(CSR_SCOUNTEREN) \| (`1` << pmu_sbi_csr_index(event)));
516	}
517
518	static void pmu_sbi_reset_scounteren(void *arg)
519	{
520	struct perf_event event = (struct* perf_event *)arg;
521
522	if (event->hw.idx != -`1`)
523	csr_write(CSR_SCOUNTEREN,
524	csr_read(CSR_SCOUNTEREN) & ~(`1` << pmu_sbi_csr_index(event)));
525	}
526
527	static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
528	{
529	struct sbiret ret;
530	struct hw_perf_event *hwc = &event->hw;
531	unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
532
533	#if defined(CONFIG_32BIT)
534	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
535	`1`, flag, ival, ival >> `32`, `0`);
536	#else
537	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
538	`1`, flag, ival, `0`, `0`);
539	#endif
540	if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED))
541	pr_err("Starting counter idx %d failed with error %d\n",
542	hwc->idx, sbi_err_map_linux_errno(ret.error));
543
544	if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
545	(hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
546	on_each_cpu_mask(mask: mm_cpumask(mm: event->owner->mm),
547	func: pmu_sbi_set_scounteren, info: (void *)event, wait: `1`);
548	}
549
550	static void pmu_sbi_ctr_stop(struct perf_event event, unsigned* long flag)
551	{
552	struct sbiret ret;
553	struct hw_perf_event *hwc = &event->hw;
554
555	if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
556	(hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
557	on_each_cpu_mask(mask: mm_cpumask(mm: event->owner->mm),
558	func: pmu_sbi_reset_scounteren, info: (void *)event, wait: `1`);
559
560	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, `1`, flag, `0`, `0`, `0`);
561	if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
562	flag != SBI_PMU_STOP_FLAG_RESET)
563	pr_err("Stopping counter idx %d failed with error %d\n",
564	hwc->idx, sbi_err_map_linux_errno(ret.error));
565	}
566
567	static int pmu_sbi_find_num_ctrs(void)
568	{
569	struct sbiret ret;
570
571	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, `0`, `0`, `0`, `0`, `0`, `0`);
572	if (!ret.error)
573	return ret.value;
574	else
575	return sbi_err_map_linux_errno(ret.error);
576	}
577
578	static int pmu_sbi_get_ctrinfo(int nctr, unsigned long *mask)
579	{
580	struct sbiret ret;
581	int i, num_hw_ctr = `0`, num_fw_ctr = `0`;
582	union sbi_pmu_ctr_info cinfo;
583
584	pmu_ctr_list = kcalloc(nctr, sizeof(*pmu_ctr_list), GFP_KERNEL);
585	if (!pmu_ctr_list)
586	return -ENOMEM;
587
588	for (i = `0`; i < nctr; i++) {
589	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, `0`, `0`, `0`, `0`, `0`);
590	if (ret.error)
591	/ The logical counter ids are not expected to be contiguous /
592	continue;
593
594	*mask \|= BIT(i);
595
596	cinfo.value = ret.value;
597	if (cinfo.type == SBI_PMU_CTR_TYPE_FW)
598	num_fw_ctr++;
599	else
600	num_hw_ctr++;
601	pmu_ctr_list[i].value = cinfo.value;
602	}
603
604	pr_info("%d firmware and %d hardware counters\n", num_fw_ctr, num_hw_ctr);
605
606	return `0`;
607	}
608
609	static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
610	{
611	/*
612	* No need to check the error because we are disabling all the counters
613	* which may include counters that are not enabled yet.
614	*/
615	sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
616	`0`, pmu->cmask, `0`, `0`, `0`, `0`);
617	}
618
619	static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
620	{
621	struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
622
623	/ No need to check the error here as we can't do anything about the error /
624	sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, `0`,
625	cpu_hw_evt->used_hw_ctrs[`0`], `0`, `0`, `0`, `0`);
626	}
627
628	/*
629	* This function starts all the used counters in two step approach.
630	* Any counter that did not overflow can be start in a single step
631	* while the overflowed counters need to be started with updated initialization
632	* value.
633	*/
634	static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
635	unsigned long ctr_ovf_mask)
636	{
637	int idx = `0`;
638	struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
639	struct perf_event *event;
640	unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
641	unsigned long ctr_start_mask = `0`;
642	uint64_t max_period;
643	struct hw_perf_event *hwc;
644	u64 init_val = `0`;
645
646	ctr_start_mask = cpu_hw_evt->used_hw_ctrs[`0`] & ~ctr_ovf_mask;
647
648	/ Start all the counters that did not overflow in a single shot /
649	sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, `0`, ctr_start_mask,
650	`0`, `0`, `0`, `0`);
651
652	/ Reinitialize and start all the counter that overflowed /
653	while (ctr_ovf_mask) {
654	if (ctr_ovf_mask & `0x01`) {
655	event = cpu_hw_evt->events[idx];
656	hwc = &event->hw;
657	max_period = riscv_pmu_ctr_get_width_mask(event);
658	init_val = local64_read(&hwc->prev_count) & max_period;
659	#if defined(CONFIG_32BIT)
660	sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, `1`,
661	flag, init_val, init_val >> `32`, `0`);
662	#else
663	sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, `1`,
664	flag, init_val, `0`, `0`);
665	#endif
666	perf_event_update_userpage(event);
667	}
668	ctr_ovf_mask = ctr_ovf_mask >> `1`;
669	idx++;
670	}
671	}
672
673	static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
674	{
675	struct perf_sample_data data;
676	struct pt_regs *regs;
677	struct hw_perf_event *hw_evt;
678	union sbi_pmu_ctr_info *info;
679	int lidx, hidx, fidx;
680	struct riscv_pmu *pmu;
681	struct perf_event *event;
682	unsigned long overflow;
683	unsigned long overflowed_ctrs = `0`;
684	struct cpu_hw_events *cpu_hw_evt = dev;
685	u64 start_clock = sched_clock();
686
687	if (WARN_ON_ONCE(!cpu_hw_evt))
688	return IRQ_NONE;
689
690	/ Firmware counter don't support overflow yet /
691	fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
692	event = cpu_hw_evt->events[fidx];
693	if (!event) {
694	csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
695	return IRQ_NONE;
696	}
697
698	pmu = to_riscv_pmu(event->pmu);
699	pmu_sbi_stop_hw_ctrs(pmu);
700
701	/ Overflow status register should only be read after counter are stopped /
702	ALT_SBI_PMU_OVERFLOW(overflow);
703
704	/*
705	* Overflow interrupt pending bit should only be cleared after stopping
706	* all the counters to avoid any race condition.
707	*/
708	csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
709
710	/ No overflow bit is set /
711	if (!overflow)
712	return IRQ_NONE;
713
714	regs = get_irq_regs();
715
716	for_each_set_bit(lidx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
717	struct perf_event *event = cpu_hw_evt->events[lidx];
718
719	/ Skip if invalid event or user did not request a sampling /
720	if (!event \|\| !is_sampling_event(event))
721	continue;
722
723	info = &pmu_ctr_list[lidx];
724	/ Do a sanity check /
725	if (!info \|\| info->type != SBI_PMU_CTR_TYPE_HW)
726	continue;
727
728	/ compute hardware counter index /
729	hidx = info->csr - CSR_CYCLE;
730	/ check if the corresponding bit is set in sscountovf /
731	if (!(overflow & (`1` << hidx)))
732	continue;
733
734	/*
735	* Keep a track of overflowed counters so that they can be started
736	* with updated initial value.
737	*/
738	overflowed_ctrs \|= `1` << lidx;
739	hw_evt = &event->hw;
740	riscv_pmu_event_update(event);
741	perf_sample_data_init(&data, `0`, hw_evt->last_period);
742	if (riscv_pmu_event_set_period(event)) {
743	/*
744	* Unlike other ISAs, RISC-V don't have to disable interrupts
745	* to avoid throttling here. As per the specification, the
746	* interrupt remains disabled until the OF bit is set.
747	* Interrupts are enabled again only during the start.
748	* TODO: We will need to stop the guest counters once
749	* virtualization support is added.
750	*/
751	perf_event_overflow(event, &data, regs);
752	}
753	}
754
755	pmu_sbi_start_overflow_mask(pmu, ctr_ovf_mask: overflowed_ctrs);
756	perf_sample_event_took(sample_len_ns: sched_clock() - start_clock);
757
758	return IRQ_HANDLED;
759	}
760
761	static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
762	{
763	struct riscv_pmu pmu = hlist_entry_safe(node, struct* riscv_pmu, node);
764	struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
765
766	/*
767	* We keep enabling userspace access to CYCLE, TIME and INSTRET via the
768	* legacy option but that will be removed in the future.
769	*/
770	if (sysctl_perf_user_access == SYSCTL_LEGACY)
771	csr_write(CSR_SCOUNTEREN, `0x7`);
772	else
773	csr_write(CSR_SCOUNTEREN, `0x2`);
774
775	/ Stop all the counters so that they can be enabled from perf /
776	pmu_sbi_stop_all(pmu);
777
778	if (riscv_pmu_use_irq) {
779	cpu_hw_evt->irq = riscv_pmu_irq;
780	csr_clear(CSR_IP, BIT(riscv_pmu_irq_num));
781	csr_set(CSR_IE, BIT(riscv_pmu_irq_num));
782	enable_percpu_irq(irq: riscv_pmu_irq, type: IRQ_TYPE_NONE);
783	}
784
785	return `0`;
786	}
787
788	static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
789	{
790	if (riscv_pmu_use_irq) {
791	disable_percpu_irq(irq: riscv_pmu_irq);
792	csr_clear(CSR_IE, BIT(riscv_pmu_irq_num));
793	}
794
795	/ Disable all counters access for user mode now /
796	csr_write(CSR_SCOUNTEREN, `0x0`);
797
798	return `0`;
799	}
800
801	static int pmu_sbi_setup_irqs(struct riscv_pmu pmu, struct* platform_device *pdev)
802	{
803	int ret;
804	struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
805	struct irq_domain *domain = NULL;
806
807	if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
808	riscv_pmu_irq_num = RV_IRQ_PMU;
809	riscv_pmu_use_irq = true;
810	} else if (IS_ENABLED(CONFIG_ERRATA_THEAD_PMU) &&
811	riscv_cached_mvendorid(`0`) == THEAD_VENDOR_ID &&
812	riscv_cached_marchid(`0`) == `0` &&
813	riscv_cached_mimpid(`0`) == `0`) {
814	riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU;
815	riscv_pmu_use_irq = true;
816	}
817
818	if (!riscv_pmu_use_irq)
819	return -EOPNOTSUPP;
820
821	domain = irq_find_matching_fwnode(fwnode: riscv_get_intc_hwnode(),
822	bus_token: DOMAIN_BUS_ANY);
823	if (!domain) {
824	pr_err("Failed to find INTC IRQ root domain\n");
825	return -ENODEV;
826	}
827
828	riscv_pmu_irq = irq_create_mapping(host: domain, hwirq: riscv_pmu_irq_num);
829	if (!riscv_pmu_irq) {
830	pr_err("Failed to map PMU interrupt for node\n");
831	return -ENODEV;
832	}
833
834	ret = request_percpu_irq(irq: riscv_pmu_irq, handler: pmu_sbi_ovf_handler, devname: "riscv-pmu", percpu_dev_id: hw_events);
835	if (ret) {
836	pr_err("registering percpu irq failed [%d]\n", ret);
837	return ret;
838	}
839
840	return `0`;
841	}
842
843	#ifdef CONFIG_CPU_PM
844	static int riscv_pm_pmu_notify(struct notifier_block b, unsigned* long cmd,
845	void *v)
846	{
847	struct riscv_pmu rvpmu = container_of(b, struct* riscv_pmu, riscv_pm_nb);
848	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
849	int enabled = bitmap_weight(cpuc->used_hw_ctrs, RISCV_MAX_COUNTERS);
850	struct perf_event *event;
851	int idx;
852
853	if (!enabled)
854	return NOTIFY_OK;
855
856	for (idx = `0`; idx < RISCV_MAX_COUNTERS; idx++) {
857	event = cpuc->events[idx];
858	if (!event)
859	continue;
860
861	switch (cmd) {
862	case CPU_PM_ENTER:
863	/*
864	* Stop and update the counter
865	*/
866	riscv_pmu_stop(event, PERF_EF_UPDATE);
867	break;
868	case CPU_PM_EXIT:
869	case CPU_PM_ENTER_FAILED:
870	/*
871	* Restore and enable the counter.
872	*/
873	riscv_pmu_start(event, PERF_EF_RELOAD);
874	break;
875	default:
876	break;
877	}
878	}
879
880	return NOTIFY_OK;
881	}
882
883	static int riscv_pm_pmu_register(struct riscv_pmu *pmu)
884	{
885	pmu->riscv_pm_nb.notifier_call = riscv_pm_pmu_notify;
886	return cpu_pm_register_notifier(&pmu->riscv_pm_nb);
887	}
888
889	static void riscv_pm_pmu_unregister(struct riscv_pmu *pmu)
890	{
891	cpu_pm_unregister_notifier(&pmu->riscv_pm_nb);
892	}
893	#else
894	static inline int riscv_pm_pmu_register(struct riscv_pmu pmu) { return* `0`; }
895	static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { }
896	#endif
897
898	static void riscv_pmu_destroy(struct riscv_pmu *pmu)
899	{
900	riscv_pm_pmu_unregister(pmu);
901	cpuhp_state_remove_instance(state: CPUHP_AP_PERF_RISCV_STARTING, node: &pmu->node);
902	}
903
904	static void pmu_sbi_event_init(struct perf_event *event)
905	{
906	/*
907	* The permissions are set at event_init so that we do not depend
908	* on the sysctl value that can change.
909	*/
910	if (sysctl_perf_user_access == SYSCTL_NO_USER_ACCESS)
911	event->hw.flags \|= PERF_EVENT_FLAG_NO_USER_ACCESS;
912	else if (sysctl_perf_user_access == SYSCTL_USER_ACCESS)
913	event->hw.flags \|= PERF_EVENT_FLAG_USER_ACCESS;
914	else
915	event->hw.flags \|= PERF_EVENT_FLAG_LEGACY;
916	}
917
918	static void pmu_sbi_event_mapped(struct perf_event event, struct* mm_struct *mm)
919	{
920	if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
921	return;
922
923	if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
924	if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
925	event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
926	return;
927	}
928	}
929
930	/*
931	* The user mmapped the event to directly access it: this is where
932	* we determine based on sysctl_perf_user_access if we grant userspace
933	* the direct access to this event. That means that within the same
934	* task, some events may be directly accessible and some other may not,
935	* if the user changes the value of sysctl_perf_user_accesss in the
936	* meantime.
937	*/
938
939	event->hw.flags \|= PERF_EVENT_FLAG_USER_READ_CNT;
940
941	/*
942	* We must enable userspace access before advertising in the user page
943	* that it is possible to do so to avoid any race.
944	* And we must notify all cpus here because threads that currently run
945	* on other cpus will try to directly access the counter too without
946	* calling pmu_sbi_ctr_start.
947	*/
948	if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
949	on_each_cpu_mask(mask: mm_cpumask(mm),
950	func: pmu_sbi_set_scounteren, info: (void *)event, wait: `1`);
951	}
952
953	static void pmu_sbi_event_unmapped(struct perf_event event, struct* mm_struct *mm)
954	{
955	if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
956	return;
957
958	if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
959	if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
960	event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
961	return;
962	}
963	}
964
965	/*
966	* Here we can directly remove user access since the user does not have
967	* access to the user page anymore so we avoid the racy window where the
968	* user could have read cap_user_rdpmc to true right before we disable
969	* it.
970	*/
971	event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT;
972
973	if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
974	on_each_cpu_mask(mask: mm_cpumask(mm),
975	func: pmu_sbi_reset_scounteren, info: (void *)event, wait: `1`);
976	}
977
978	static void riscv_pmu_update_counter_access(void *info)
979	{
980	if (sysctl_perf_user_access == SYSCTL_LEGACY)
981	csr_write(CSR_SCOUNTEREN, `0x7`);
982	else
983	csr_write(CSR_SCOUNTEREN, `0x2`);
984	}
985
986	static int riscv_pmu_proc_user_access_handler(struct ctl_table *table,
987	int write, void *buffer,
988	size_t lenp, loff_t ppos)
989	{
990	int prev = sysctl_perf_user_access;
991	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
992
993	/*
994	* Test against the previous value since we clear SCOUNTEREN when
995	* sysctl_perf_user_access is set to SYSCTL_USER_ACCESS, but we should
996	* not do that if that was already the case.
997	*/
998	if (ret \|\| !write \|\| prev == sysctl_perf_user_access)
999	return ret;
1000
1001	on_each_cpu(func: riscv_pmu_update_counter_access, NULL, wait: `1`);
1002
1003	return `0`;
1004	}
1005
1006	static struct ctl_table sbi_pmu_sysctl_table[] = {
1007	{
1008	.procname = "perf_user_access",
1009	.data = &sysctl_perf_user_access,
1010	.maxlen = sizeof(unsigned int),
1011	.mode = `0644`,
1012	.proc_handler = riscv_pmu_proc_user_access_handler,
1013	.extra1 = SYSCTL_ZERO,
1014	.extra2 = SYSCTL_TWO,
1015	},
1016	{ }
1017	};
1018
1019	static int pmu_sbi_device_probe(struct platform_device *pdev)
1020	{
1021	struct riscv_pmu *pmu = NULL;
1022	int ret = -ENODEV;
1023	int num_counters;
1024
1025	pr_info("SBI PMU extension is available\n");
1026	pmu = riscv_pmu_alloc();
1027	if (!pmu)
1028	return -ENOMEM;
1029
1030	num_counters = pmu_sbi_find_num_ctrs();
1031	if (num_counters < `0`) {
1032	pr_err("SBI PMU extension doesn't provide any counters\n");
1033	goto out_free;
1034	}
1035
1036	/ It is possible to get from SBI more than max number of counters /
1037	if (num_counters > RISCV_MAX_COUNTERS) {
1038	num_counters = RISCV_MAX_COUNTERS;
1039	pr_info("SBI returned more than maximum number of counters. Limiting the number of counters to %d\n", num_counters);
1040	}
1041
1042	/ cache all the information about counters now /
1043	if (pmu_sbi_get_ctrinfo(nctr: num_counters, mask: &cmask))
1044	goto out_free;
1045
1046	ret = pmu_sbi_setup_irqs(pmu, pdev);
1047	if (ret < `0`) {
1048	pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n");
1049	pmu->pmu.capabilities \|= PERF_PMU_CAP_NO_INTERRUPT;
1050	pmu->pmu.capabilities \|= PERF_PMU_CAP_NO_EXCLUDE;
1051	}
1052
1053	pmu->pmu.attr_groups = riscv_pmu_attr_groups;
1054	pmu->cmask = cmask;
1055	pmu->ctr_start = pmu_sbi_ctr_start;
1056	pmu->ctr_stop = pmu_sbi_ctr_stop;
1057	pmu->event_map = pmu_sbi_event_map;
1058	pmu->ctr_get_idx = pmu_sbi_ctr_get_idx;
1059	pmu->ctr_get_width = pmu_sbi_ctr_get_width;
1060	pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx;
1061	pmu->ctr_read = pmu_sbi_ctr_read;
1062	pmu->event_init = pmu_sbi_event_init;
1063	pmu->event_mapped = pmu_sbi_event_mapped;
1064	pmu->event_unmapped = pmu_sbi_event_unmapped;
1065	pmu->csr_index = pmu_sbi_csr_index;
1066
1067	ret = cpuhp_state_add_instance(state: CPUHP_AP_PERF_RISCV_STARTING, node: &pmu->node);
1068	if (ret)
1069	return ret;
1070
1071	ret = riscv_pm_pmu_register(pmu);
1072	if (ret)
1073	goto out_unregister;
1074
1075	ret = perf_pmu_register(pmu: &pmu->pmu, name: "cpu", type: PERF_TYPE_RAW);
1076	if (ret)
1077	goto out_unregister;
1078
1079	register_sysctl("kernel", sbi_pmu_sysctl_table);
1080
1081	return `0`;
1082
1083	out_unregister:
1084	riscv_pmu_destroy(pmu);
1085
1086	out_free:
1087	kfree(objp: pmu);
1088	return ret;
1089	}
1090
1091	static struct platform_driver pmu_sbi_driver = {
1092	.probe = pmu_sbi_device_probe,
1093	.driver = {
1094	.name = RISCV_PMU_SBI_PDEV_NAME,
1095	},
1096	};
1097
1098	static int __init pmu_sbi_devinit(void)
1099	{
1100	int ret;
1101	struct platform_device *pdev;
1102
1103	if (sbi_spec_version < sbi_mk_version(`0`, `3`) \|\|
1104	!sbi_probe_extension(SBI_EXT_PMU)) {
1105	return `0`;
1106	}
1107
1108	ret = cpuhp_setup_state_multi(state: CPUHP_AP_PERF_RISCV_STARTING,
1109	name: "perf/riscv/pmu:starting",
1110	startup: pmu_sbi_starting_cpu, teardown: pmu_sbi_dying_cpu);
1111	if (ret) {
1112	pr_err("CPU hotplug notifier could not be registered: %d\n",
1113	ret);
1114	return ret;
1115	}
1116
1117	ret = platform_driver_register(&pmu_sbi_driver);
1118	if (ret)
1119	return ret;
1120
1121	pdev = platform_device_register_simple(RISCV_PMU_SBI_PDEV_NAME, -`1`, NULL, `0`);
1122	if (IS_ERR(ptr: pdev)) {
1123	platform_driver_unregister(&pmu_sbi_driver);
1124	return PTR_ERR(ptr: pdev);
1125	}
1126
1127	/ Notify legacy implementation that SBI pmu is available/
1128	riscv_pmu_legacy_skip_init();
1129
1130	return ret;
1131	}
1132	device_initcall(pmu_sbi_devinit)
1133

source code of linux/drivers/perf/riscv_pmu_sbi.c