1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * nd_perf.c: NVDIMM Device Performance Monitoring Unit support |
4 | * |
5 | * Perf interface to expose nvdimm performance stats. |
6 | * |
7 | * Copyright (C) 2021 IBM Corporation |
8 | */ |
9 | |
10 | #define pr_fmt(fmt) "nvdimm_pmu: " fmt |
11 | |
12 | #include <linux/nd.h> |
13 | #include <linux/platform_device.h> |
14 | |
15 | #define EVENT(_name, _code) enum{_name = _code} |
16 | |
17 | /* |
18 | * NVDIMM Events codes. |
19 | */ |
20 | |
21 | /* Controller Reset Count */ |
22 | EVENT(CTL_RES_CNT, 0x1); |
23 | /* Controller Reset Elapsed Time */ |
24 | EVENT(CTL_RES_TM, 0x2); |
25 | /* Power-on Seconds */ |
26 | EVENT(POWERON_SECS, 0x3); |
27 | /* Life Remaining */ |
28 | EVENT(MEM_LIFE, 0x4); |
29 | /* Critical Resource Utilization */ |
30 | EVENT(CRI_RES_UTIL, 0x5); |
31 | /* Host Load Count */ |
32 | EVENT(HOST_L_CNT, 0x6); |
33 | /* Host Store Count */ |
34 | EVENT(HOST_S_CNT, 0x7); |
35 | /* Host Store Duration */ |
36 | EVENT(HOST_S_DUR, 0x8); |
37 | /* Host Load Duration */ |
38 | EVENT(HOST_L_DUR, 0x9); |
39 | /* Media Read Count */ |
40 | EVENT(MED_R_CNT, 0xa); |
41 | /* Media Write Count */ |
42 | EVENT(MED_W_CNT, 0xb); |
43 | /* Media Read Duration */ |
44 | EVENT(MED_R_DUR, 0xc); |
45 | /* Media Write Duration */ |
46 | EVENT(MED_W_DUR, 0xd); |
47 | /* Cache Read Hit Count */ |
48 | EVENT(CACHE_RH_CNT, 0xe); |
49 | /* Cache Write Hit Count */ |
50 | EVENT(CACHE_WH_CNT, 0xf); |
51 | /* Fast Write Count */ |
52 | EVENT(FAST_W_CNT, 0x10); |
53 | |
54 | NVDIMM_EVENT_ATTR(ctl_res_cnt, CTL_RES_CNT); |
55 | NVDIMM_EVENT_ATTR(ctl_res_tm, CTL_RES_TM); |
56 | NVDIMM_EVENT_ATTR(poweron_secs, POWERON_SECS); |
57 | NVDIMM_EVENT_ATTR(mem_life, MEM_LIFE); |
58 | NVDIMM_EVENT_ATTR(cri_res_util, CRI_RES_UTIL); |
59 | NVDIMM_EVENT_ATTR(host_l_cnt, HOST_L_CNT); |
60 | NVDIMM_EVENT_ATTR(host_s_cnt, HOST_S_CNT); |
61 | NVDIMM_EVENT_ATTR(host_s_dur, HOST_S_DUR); |
62 | NVDIMM_EVENT_ATTR(host_l_dur, HOST_L_DUR); |
63 | NVDIMM_EVENT_ATTR(med_r_cnt, MED_R_CNT); |
64 | NVDIMM_EVENT_ATTR(med_w_cnt, MED_W_CNT); |
65 | NVDIMM_EVENT_ATTR(med_r_dur, MED_R_DUR); |
66 | NVDIMM_EVENT_ATTR(med_w_dur, MED_W_DUR); |
67 | NVDIMM_EVENT_ATTR(cache_rh_cnt, CACHE_RH_CNT); |
68 | NVDIMM_EVENT_ATTR(cache_wh_cnt, CACHE_WH_CNT); |
69 | NVDIMM_EVENT_ATTR(fast_w_cnt, FAST_W_CNT); |
70 | |
71 | static struct attribute *nvdimm_events_attr[] = { |
72 | NVDIMM_EVENT_PTR(CTL_RES_CNT), |
73 | NVDIMM_EVENT_PTR(CTL_RES_TM), |
74 | NVDIMM_EVENT_PTR(POWERON_SECS), |
75 | NVDIMM_EVENT_PTR(MEM_LIFE), |
76 | NVDIMM_EVENT_PTR(CRI_RES_UTIL), |
77 | NVDIMM_EVENT_PTR(HOST_L_CNT), |
78 | NVDIMM_EVENT_PTR(HOST_S_CNT), |
79 | NVDIMM_EVENT_PTR(HOST_S_DUR), |
80 | NVDIMM_EVENT_PTR(HOST_L_DUR), |
81 | NVDIMM_EVENT_PTR(MED_R_CNT), |
82 | NVDIMM_EVENT_PTR(MED_W_CNT), |
83 | NVDIMM_EVENT_PTR(MED_R_DUR), |
84 | NVDIMM_EVENT_PTR(MED_W_DUR), |
85 | NVDIMM_EVENT_PTR(CACHE_RH_CNT), |
86 | NVDIMM_EVENT_PTR(CACHE_WH_CNT), |
87 | NVDIMM_EVENT_PTR(FAST_W_CNT), |
88 | NULL |
89 | }; |
90 | |
91 | static struct attribute_group nvdimm_pmu_events_group = { |
92 | .name = "events" , |
93 | .attrs = nvdimm_events_attr, |
94 | }; |
95 | |
96 | PMU_FORMAT_ATTR(event, "config:0-4" ); |
97 | |
98 | static struct attribute *nvdimm_pmu_format_attr[] = { |
99 | &format_attr_event.attr, |
100 | NULL, |
101 | }; |
102 | |
103 | static struct attribute_group nvdimm_pmu_format_group = { |
104 | .name = "format" , |
105 | .attrs = nvdimm_pmu_format_attr, |
106 | }; |
107 | |
108 | ssize_t nvdimm_events_sysfs_show(struct device *dev, |
109 | struct device_attribute *attr, char *page) |
110 | { |
111 | struct perf_pmu_events_attr *pmu_attr; |
112 | |
113 | pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); |
114 | |
115 | return sprintf(buf: page, fmt: "event=0x%02llx\n" , pmu_attr->id); |
116 | } |
117 | |
118 | static ssize_t nvdimm_pmu_cpumask_show(struct device *dev, |
119 | struct device_attribute *attr, char *buf) |
120 | { |
121 | struct pmu *pmu = dev_get_drvdata(dev); |
122 | struct nvdimm_pmu *nd_pmu; |
123 | |
124 | nd_pmu = container_of(pmu, struct nvdimm_pmu, pmu); |
125 | |
126 | return cpumap_print_to_pagebuf(list: true, buf, cpumask_of(nd_pmu->cpu)); |
127 | } |
128 | |
129 | static int nvdimm_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) |
130 | { |
131 | struct nvdimm_pmu *nd_pmu; |
132 | u32 target; |
133 | int nodeid; |
134 | const struct cpumask *cpumask; |
135 | |
136 | nd_pmu = hlist_entry_safe(node, struct nvdimm_pmu, node); |
137 | |
138 | /* Clear it, incase given cpu is set in nd_pmu->arch_cpumask */ |
139 | cpumask_test_and_clear_cpu(cpu, cpumask: &nd_pmu->arch_cpumask); |
140 | |
141 | /* |
142 | * If given cpu is not same as current designated cpu for |
143 | * counter access, just return. |
144 | */ |
145 | if (cpu != nd_pmu->cpu) |
146 | return 0; |
147 | |
148 | /* Check for any active cpu in nd_pmu->arch_cpumask */ |
149 | target = cpumask_any(&nd_pmu->arch_cpumask); |
150 | |
151 | /* |
152 | * Incase we don't have any active cpu in nd_pmu->arch_cpumask, |
153 | * check in given cpu's numa node list. |
154 | */ |
155 | if (target >= nr_cpu_ids) { |
156 | nodeid = cpu_to_node(cpu); |
157 | cpumask = cpumask_of_node(node: nodeid); |
158 | target = cpumask_any_but(mask: cpumask, cpu); |
159 | } |
160 | nd_pmu->cpu = target; |
161 | |
162 | /* Migrate nvdimm pmu events to the new target cpu if valid */ |
163 | if (target >= 0 && target < nr_cpu_ids) |
164 | perf_pmu_migrate_context(pmu: &nd_pmu->pmu, src_cpu: cpu, dst_cpu: target); |
165 | |
166 | return 0; |
167 | } |
168 | |
169 | static int nvdimm_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) |
170 | { |
171 | struct nvdimm_pmu *nd_pmu; |
172 | |
173 | nd_pmu = hlist_entry_safe(node, struct nvdimm_pmu, node); |
174 | |
175 | if (nd_pmu->cpu >= nr_cpu_ids) |
176 | nd_pmu->cpu = cpu; |
177 | |
178 | return 0; |
179 | } |
180 | |
181 | static int create_cpumask_attr_group(struct nvdimm_pmu *nd_pmu) |
182 | { |
183 | struct perf_pmu_events_attr *pmu_events_attr; |
184 | struct attribute **attrs_group; |
185 | struct attribute_group *nvdimm_pmu_cpumask_group; |
186 | |
187 | pmu_events_attr = kzalloc(size: sizeof(*pmu_events_attr), GFP_KERNEL); |
188 | if (!pmu_events_attr) |
189 | return -ENOMEM; |
190 | |
191 | attrs_group = kzalloc(size: 2 * sizeof(struct attribute *), GFP_KERNEL); |
192 | if (!attrs_group) { |
193 | kfree(objp: pmu_events_attr); |
194 | return -ENOMEM; |
195 | } |
196 | |
197 | /* Allocate memory for cpumask attribute group */ |
198 | nvdimm_pmu_cpumask_group = kzalloc(size: sizeof(*nvdimm_pmu_cpumask_group), GFP_KERNEL); |
199 | if (!nvdimm_pmu_cpumask_group) { |
200 | kfree(objp: pmu_events_attr); |
201 | kfree(objp: attrs_group); |
202 | return -ENOMEM; |
203 | } |
204 | |
205 | sysfs_attr_init(&pmu_events_attr->attr.attr); |
206 | pmu_events_attr->attr.attr.name = "cpumask" ; |
207 | pmu_events_attr->attr.attr.mode = 0444; |
208 | pmu_events_attr->attr.show = nvdimm_pmu_cpumask_show; |
209 | attrs_group[0] = &pmu_events_attr->attr.attr; |
210 | attrs_group[1] = NULL; |
211 | |
212 | nvdimm_pmu_cpumask_group->attrs = attrs_group; |
213 | nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR] = nvdimm_pmu_cpumask_group; |
214 | return 0; |
215 | } |
216 | |
217 | static int nvdimm_pmu_cpu_hotplug_init(struct nvdimm_pmu *nd_pmu) |
218 | { |
219 | int nodeid, rc; |
220 | const struct cpumask *cpumask; |
221 | |
222 | /* |
223 | * Incase of cpu hotplug feature, arch specific code |
224 | * can provide required cpumask which can be used |
225 | * to get designatd cpu for counter access. |
226 | * Check for any active cpu in nd_pmu->arch_cpumask. |
227 | */ |
228 | if (!cpumask_empty(srcp: &nd_pmu->arch_cpumask)) { |
229 | nd_pmu->cpu = cpumask_any(&nd_pmu->arch_cpumask); |
230 | } else { |
231 | /* pick active cpu from the cpumask of device numa node. */ |
232 | nodeid = dev_to_node(dev: nd_pmu->dev); |
233 | cpumask = cpumask_of_node(node: nodeid); |
234 | nd_pmu->cpu = cpumask_any(cpumask); |
235 | } |
236 | |
237 | rc = cpuhp_setup_state_multi(state: CPUHP_AP_ONLINE_DYN, name: "perf/nvdimm:online" , |
238 | startup: nvdimm_pmu_cpu_online, teardown: nvdimm_pmu_cpu_offline); |
239 | |
240 | if (rc < 0) |
241 | return rc; |
242 | |
243 | nd_pmu->cpuhp_state = rc; |
244 | |
245 | /* Register the pmu instance for cpu hotplug */ |
246 | rc = cpuhp_state_add_instance_nocalls(state: nd_pmu->cpuhp_state, node: &nd_pmu->node); |
247 | if (rc) { |
248 | cpuhp_remove_multi_state(state: nd_pmu->cpuhp_state); |
249 | return rc; |
250 | } |
251 | |
252 | /* Create cpumask attribute group */ |
253 | rc = create_cpumask_attr_group(nd_pmu); |
254 | if (rc) { |
255 | cpuhp_state_remove_instance_nocalls(state: nd_pmu->cpuhp_state, node: &nd_pmu->node); |
256 | cpuhp_remove_multi_state(state: nd_pmu->cpuhp_state); |
257 | return rc; |
258 | } |
259 | |
260 | return 0; |
261 | } |
262 | |
263 | static void nvdimm_pmu_free_hotplug_memory(struct nvdimm_pmu *nd_pmu) |
264 | { |
265 | cpuhp_state_remove_instance_nocalls(state: nd_pmu->cpuhp_state, node: &nd_pmu->node); |
266 | cpuhp_remove_multi_state(state: nd_pmu->cpuhp_state); |
267 | |
268 | if (nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR]) |
269 | kfree(objp: nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR]->attrs); |
270 | kfree(objp: nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR]); |
271 | } |
272 | |
273 | int register_nvdimm_pmu(struct nvdimm_pmu *nd_pmu, struct platform_device *pdev) |
274 | { |
275 | int rc; |
276 | |
277 | if (!nd_pmu || !pdev) |
278 | return -EINVAL; |
279 | |
280 | /* event functions like add/del/read/event_init and pmu name should not be NULL */ |
281 | if (WARN_ON_ONCE(!(nd_pmu->pmu.event_init && nd_pmu->pmu.add && |
282 | nd_pmu->pmu.del && nd_pmu->pmu.read && nd_pmu->pmu.name))) |
283 | return -EINVAL; |
284 | |
285 | nd_pmu->pmu.attr_groups = kzalloc(size: (NVDIMM_PMU_NULL_ATTR + 1) * |
286 | sizeof(struct attribute_group *), GFP_KERNEL); |
287 | if (!nd_pmu->pmu.attr_groups) |
288 | return -ENOMEM; |
289 | |
290 | /* |
291 | * Add platform_device->dev pointer to nvdimm_pmu to access |
292 | * device data in events functions. |
293 | */ |
294 | nd_pmu->dev = &pdev->dev; |
295 | |
296 | /* Fill attribute groups for the nvdimm pmu device */ |
297 | nd_pmu->pmu.attr_groups[NVDIMM_PMU_FORMAT_ATTR] = &nvdimm_pmu_format_group; |
298 | nd_pmu->pmu.attr_groups[NVDIMM_PMU_EVENT_ATTR] = &nvdimm_pmu_events_group; |
299 | nd_pmu->pmu.attr_groups[NVDIMM_PMU_NULL_ATTR] = NULL; |
300 | |
301 | /* Fill attribute group for cpumask */ |
302 | rc = nvdimm_pmu_cpu_hotplug_init(nd_pmu); |
303 | if (rc) { |
304 | pr_info("cpu hotplug feature failed for device: %s\n" , nd_pmu->pmu.name); |
305 | kfree(objp: nd_pmu->pmu.attr_groups); |
306 | return rc; |
307 | } |
308 | |
309 | rc = perf_pmu_register(pmu: &nd_pmu->pmu, name: nd_pmu->pmu.name, type: -1); |
310 | if (rc) { |
311 | nvdimm_pmu_free_hotplug_memory(nd_pmu); |
312 | kfree(objp: nd_pmu->pmu.attr_groups); |
313 | return rc; |
314 | } |
315 | |
316 | pr_info("%s NVDIMM performance monitor support registered\n" , |
317 | nd_pmu->pmu.name); |
318 | |
319 | return 0; |
320 | } |
321 | EXPORT_SYMBOL_GPL(register_nvdimm_pmu); |
322 | |
323 | void unregister_nvdimm_pmu(struct nvdimm_pmu *nd_pmu) |
324 | { |
325 | perf_pmu_unregister(pmu: &nd_pmu->pmu); |
326 | nvdimm_pmu_free_hotplug_memory(nd_pmu); |
327 | kfree(objp: nd_pmu->pmu.attr_groups); |
328 | kfree(objp: nd_pmu); |
329 | } |
330 | EXPORT_SYMBOL_GPL(unregister_nvdimm_pmu); |
331 | |