1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2013 Advanced Micro Devices, Inc. |
4 | * |
5 | * Author: Steven Kinney <Steven.Kinney@amd.com> |
6 | * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com> |
7 | * |
8 | * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation |
9 | */ |
10 | |
11 | #define pr_fmt(fmt) "perf/amd_iommu: " fmt |
12 | |
13 | #include <linux/perf_event.h> |
14 | #include <linux/init.h> |
15 | #include <linux/cpumask.h> |
16 | #include <linux/slab.h> |
17 | #include <linux/amd-iommu.h> |
18 | |
19 | #include "../perf_event.h" |
20 | #include "iommu.h" |
21 | |
22 | /* iommu pmu conf masks */ |
23 | #define GET_CSOURCE(x) ((x)->conf & 0xFFULL) |
24 | #define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL) |
25 | #define GET_DOMID(x) (((x)->conf >> 24) & 0xFFFFULL) |
26 | #define GET_PASID(x) (((x)->conf >> 40) & 0xFFFFFULL) |
27 | |
28 | /* iommu pmu conf1 masks */ |
29 | #define GET_DEVID_MASK(x) ((x)->conf1 & 0xFFFFULL) |
30 | #define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL) |
31 | #define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL) |
32 | |
33 | #define IOMMU_NAME_SIZE 16 |
34 | |
35 | struct perf_amd_iommu { |
36 | struct list_head list; |
37 | struct pmu pmu; |
38 | struct amd_iommu *iommu; |
39 | char name[IOMMU_NAME_SIZE]; |
40 | u8 max_banks; |
41 | u8 max_counters; |
42 | u64 cntr_assign_mask; |
43 | raw_spinlock_t lock; |
44 | }; |
45 | |
46 | static LIST_HEAD(perf_amd_iommu_list); |
47 | |
48 | /*--------------------------------------------- |
49 | * sysfs format attributes |
50 | *---------------------------------------------*/ |
51 | PMU_FORMAT_ATTR(csource, "config:0-7" ); |
52 | PMU_FORMAT_ATTR(devid, "config:8-23" ); |
53 | PMU_FORMAT_ATTR(domid, "config:24-39" ); |
54 | PMU_FORMAT_ATTR(pasid, "config:40-59" ); |
55 | PMU_FORMAT_ATTR(devid_mask, "config1:0-15" ); |
56 | PMU_FORMAT_ATTR(domid_mask, "config1:16-31" ); |
57 | PMU_FORMAT_ATTR(pasid_mask, "config1:32-51" ); |
58 | |
59 | static struct attribute *iommu_format_attrs[] = { |
60 | &format_attr_csource.attr, |
61 | &format_attr_devid.attr, |
62 | &format_attr_pasid.attr, |
63 | &format_attr_domid.attr, |
64 | &format_attr_devid_mask.attr, |
65 | &format_attr_pasid_mask.attr, |
66 | &format_attr_domid_mask.attr, |
67 | NULL, |
68 | }; |
69 | |
70 | static struct attribute_group amd_iommu_format_group = { |
71 | .name = "format" , |
72 | .attrs = iommu_format_attrs, |
73 | }; |
74 | |
75 | /*--------------------------------------------- |
76 | * sysfs events attributes |
77 | *---------------------------------------------*/ |
78 | static struct attribute_group amd_iommu_events_group = { |
79 | .name = "events" , |
80 | }; |
81 | |
82 | struct amd_iommu_event_desc { |
83 | struct device_attribute attr; |
84 | const char *event; |
85 | }; |
86 | |
87 | static ssize_t _iommu_event_show(struct device *dev, |
88 | struct device_attribute *attr, char *buf) |
89 | { |
90 | struct amd_iommu_event_desc *event = |
91 | container_of(attr, struct amd_iommu_event_desc, attr); |
92 | return sprintf(buf, fmt: "%s\n" , event->event); |
93 | } |
94 | |
95 | #define AMD_IOMMU_EVENT_DESC(_name, _event) \ |
96 | { \ |
97 | .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \ |
98 | .event = _event, \ |
99 | } |
100 | |
101 | static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = { |
102 | AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01" ), |
103 | AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02" ), |
104 | AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03" ), |
105 | AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04" ), |
106 | AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05" ), |
107 | AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06" ), |
108 | AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07" ), |
109 | AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08" ), |
110 | AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09" ), |
111 | AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a" ), |
112 | AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b" ), |
113 | AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c" ), |
114 | AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d" ), |
115 | AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e" ), |
116 | AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f" ), |
117 | AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10" ), |
118 | AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11" ), |
119 | AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12" ), |
120 | AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13" ), |
121 | AMD_IOMMU_EVENT_DESC(ign_rd_wr_mmio_1ff8h, "csource=0x14" ), |
122 | AMD_IOMMU_EVENT_DESC(vapic_int_non_guest, "csource=0x15" ), |
123 | AMD_IOMMU_EVENT_DESC(vapic_int_guest, "csource=0x16" ), |
124 | AMD_IOMMU_EVENT_DESC(smi_recv, "csource=0x17" ), |
125 | AMD_IOMMU_EVENT_DESC(smi_blk, "csource=0x18" ), |
126 | { /* end: all zeroes */ }, |
127 | }; |
128 | |
129 | /*--------------------------------------------- |
130 | * sysfs cpumask attributes |
131 | *---------------------------------------------*/ |
132 | static cpumask_t iommu_cpumask; |
133 | |
134 | static ssize_t _iommu_cpumask_show(struct device *dev, |
135 | struct device_attribute *attr, |
136 | char *buf) |
137 | { |
138 | return cpumap_print_to_pagebuf(list: true, buf, mask: &iommu_cpumask); |
139 | } |
140 | static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL); |
141 | |
142 | static struct attribute *iommu_cpumask_attrs[] = { |
143 | &dev_attr_cpumask.attr, |
144 | NULL, |
145 | }; |
146 | |
147 | static struct attribute_group amd_iommu_cpumask_group = { |
148 | .attrs = iommu_cpumask_attrs, |
149 | }; |
150 | |
151 | /*---------------------------------------------*/ |
152 | |
153 | static int get_next_avail_iommu_bnk_cntr(struct perf_event *event) |
154 | { |
155 | struct perf_amd_iommu *piommu = container_of(event->pmu, struct perf_amd_iommu, pmu); |
156 | int max_cntrs = piommu->max_counters; |
157 | int max_banks = piommu->max_banks; |
158 | u32 shift, bank, cntr; |
159 | unsigned long flags; |
160 | int retval; |
161 | |
162 | raw_spin_lock_irqsave(&piommu->lock, flags); |
163 | |
164 | for (bank = 0; bank < max_banks; bank++) { |
165 | for (cntr = 0; cntr < max_cntrs; cntr++) { |
166 | shift = bank + (bank*3) + cntr; |
167 | if (piommu->cntr_assign_mask & BIT_ULL(shift)) { |
168 | continue; |
169 | } else { |
170 | piommu->cntr_assign_mask |= BIT_ULL(shift); |
171 | event->hw.iommu_bank = bank; |
172 | event->hw.iommu_cntr = cntr; |
173 | retval = 0; |
174 | goto out; |
175 | } |
176 | } |
177 | } |
178 | retval = -ENOSPC; |
179 | out: |
180 | raw_spin_unlock_irqrestore(&piommu->lock, flags); |
181 | return retval; |
182 | } |
183 | |
184 | static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu, |
185 | u8 bank, u8 cntr) |
186 | { |
187 | unsigned long flags; |
188 | int max_banks, max_cntrs; |
189 | int shift = 0; |
190 | |
191 | max_banks = perf_iommu->max_banks; |
192 | max_cntrs = perf_iommu->max_counters; |
193 | |
194 | if ((bank > max_banks) || (cntr > max_cntrs)) |
195 | return -EINVAL; |
196 | |
197 | shift = bank + cntr + (bank*3); |
198 | |
199 | raw_spin_lock_irqsave(&perf_iommu->lock, flags); |
200 | perf_iommu->cntr_assign_mask &= ~(1ULL<<shift); |
201 | raw_spin_unlock_irqrestore(&perf_iommu->lock, flags); |
202 | |
203 | return 0; |
204 | } |
205 | |
206 | static int perf_iommu_event_init(struct perf_event *event) |
207 | { |
208 | struct hw_perf_event *hwc = &event->hw; |
209 | |
210 | /* test the event attr type check for PMU enumeration */ |
211 | if (event->attr.type != event->pmu->type) |
212 | return -ENOENT; |
213 | |
214 | /* |
215 | * IOMMU counters are shared across all cores. |
216 | * Therefore, it does not support per-process mode. |
217 | * Also, it does not support event sampling mode. |
218 | */ |
219 | if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) |
220 | return -EINVAL; |
221 | |
222 | if (event->cpu < 0) |
223 | return -EINVAL; |
224 | |
225 | /* update the hw_perf_event struct with the iommu config data */ |
226 | hwc->conf = event->attr.config; |
227 | hwc->conf1 = event->attr.config1; |
228 | |
229 | return 0; |
230 | } |
231 | |
232 | static inline struct amd_iommu *perf_event_2_iommu(struct perf_event *ev) |
233 | { |
234 | return (container_of(ev->pmu, struct perf_amd_iommu, pmu))->iommu; |
235 | } |
236 | |
237 | static void perf_iommu_enable_event(struct perf_event *ev) |
238 | { |
239 | struct amd_iommu *iommu = perf_event_2_iommu(ev); |
240 | struct hw_perf_event *hwc = &ev->hw; |
241 | u8 bank = hwc->iommu_bank; |
242 | u8 cntr = hwc->iommu_cntr; |
243 | u64 reg = 0ULL; |
244 | |
245 | reg = GET_CSOURCE(hwc); |
246 | amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, value: ®); |
247 | |
248 | reg = GET_DEVID_MASK(hwc); |
249 | reg = GET_DEVID(hwc) | (reg << 32); |
250 | if (reg) |
251 | reg |= BIT(31); |
252 | amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, value: ®); |
253 | |
254 | reg = GET_PASID_MASK(hwc); |
255 | reg = GET_PASID(hwc) | (reg << 32); |
256 | if (reg) |
257 | reg |= BIT(31); |
258 | amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, value: ®); |
259 | |
260 | reg = GET_DOMID_MASK(hwc); |
261 | reg = GET_DOMID(hwc) | (reg << 32); |
262 | if (reg) |
263 | reg |= BIT(31); |
264 | amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, value: ®); |
265 | } |
266 | |
267 | static void perf_iommu_disable_event(struct perf_event *event) |
268 | { |
269 | struct amd_iommu *iommu = perf_event_2_iommu(ev: event); |
270 | struct hw_perf_event *hwc = &event->hw; |
271 | u64 reg = 0ULL; |
272 | |
273 | amd_iommu_pc_set_reg(iommu, bank: hwc->iommu_bank, cntr: hwc->iommu_cntr, |
274 | IOMMU_PC_COUNTER_SRC_REG, value: ®); |
275 | } |
276 | |
277 | static void perf_iommu_start(struct perf_event *event, int flags) |
278 | { |
279 | struct hw_perf_event *hwc = &event->hw; |
280 | |
281 | if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) |
282 | return; |
283 | |
284 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); |
285 | hwc->state = 0; |
286 | |
287 | /* |
288 | * To account for power-gating, which prevents write to |
289 | * the counter, we need to enable the counter |
290 | * before setting up counter register. |
291 | */ |
292 | perf_iommu_enable_event(ev: event); |
293 | |
294 | if (flags & PERF_EF_RELOAD) { |
295 | u64 count = 0; |
296 | struct amd_iommu *iommu = perf_event_2_iommu(ev: event); |
297 | |
298 | /* |
299 | * Since the IOMMU PMU only support counting mode, |
300 | * the counter always start with value zero. |
301 | */ |
302 | amd_iommu_pc_set_reg(iommu, bank: hwc->iommu_bank, cntr: hwc->iommu_cntr, |
303 | IOMMU_PC_COUNTER_REG, value: &count); |
304 | } |
305 | |
306 | perf_event_update_userpage(event); |
307 | } |
308 | |
309 | static void perf_iommu_read(struct perf_event *event) |
310 | { |
311 | u64 count; |
312 | struct hw_perf_event *hwc = &event->hw; |
313 | struct amd_iommu *iommu = perf_event_2_iommu(ev: event); |
314 | |
315 | if (amd_iommu_pc_get_reg(iommu, bank: hwc->iommu_bank, cntr: hwc->iommu_cntr, |
316 | IOMMU_PC_COUNTER_REG, value: &count)) |
317 | return; |
318 | |
319 | /* IOMMU pc counter register is only 48 bits */ |
320 | count &= GENMASK_ULL(47, 0); |
321 | |
322 | /* |
323 | * Since the counter always start with value zero, |
324 | * simply just accumulate the count for the event. |
325 | */ |
326 | local64_add(count, &event->count); |
327 | } |
328 | |
329 | static void perf_iommu_stop(struct perf_event *event, int flags) |
330 | { |
331 | struct hw_perf_event *hwc = &event->hw; |
332 | |
333 | if (hwc->state & PERF_HES_UPTODATE) |
334 | return; |
335 | |
336 | /* |
337 | * To account for power-gating, in which reading the counter would |
338 | * return zero, we need to read the register before disabling. |
339 | */ |
340 | perf_iommu_read(event); |
341 | hwc->state |= PERF_HES_UPTODATE; |
342 | |
343 | perf_iommu_disable_event(event); |
344 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); |
345 | hwc->state |= PERF_HES_STOPPED; |
346 | } |
347 | |
348 | static int perf_iommu_add(struct perf_event *event, int flags) |
349 | { |
350 | int retval; |
351 | |
352 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; |
353 | |
354 | /* request an iommu bank/counter */ |
355 | retval = get_next_avail_iommu_bnk_cntr(event); |
356 | if (retval) |
357 | return retval; |
358 | |
359 | if (flags & PERF_EF_START) |
360 | perf_iommu_start(event, PERF_EF_RELOAD); |
361 | |
362 | return 0; |
363 | } |
364 | |
365 | static void perf_iommu_del(struct perf_event *event, int flags) |
366 | { |
367 | struct hw_perf_event *hwc = &event->hw; |
368 | struct perf_amd_iommu *perf_iommu = |
369 | container_of(event->pmu, struct perf_amd_iommu, pmu); |
370 | |
371 | perf_iommu_stop(event, PERF_EF_UPDATE); |
372 | |
373 | /* clear the assigned iommu bank/counter */ |
374 | clear_avail_iommu_bnk_cntr(perf_iommu, |
375 | bank: hwc->iommu_bank, cntr: hwc->iommu_cntr); |
376 | |
377 | perf_event_update_userpage(event); |
378 | } |
379 | |
380 | static __init int _init_events_attrs(void) |
381 | { |
382 | int i = 0, j; |
383 | struct attribute **attrs; |
384 | |
385 | while (amd_iommu_v2_event_descs[i].attr.attr.name) |
386 | i++; |
387 | |
388 | attrs = kcalloc(n: i + 1, size: sizeof(*attrs), GFP_KERNEL); |
389 | if (!attrs) |
390 | return -ENOMEM; |
391 | |
392 | for (j = 0; j < i; j++) |
393 | attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr; |
394 | |
395 | amd_iommu_events_group.attrs = attrs; |
396 | return 0; |
397 | } |
398 | |
399 | static const struct attribute_group *amd_iommu_attr_groups[] = { |
400 | &amd_iommu_format_group, |
401 | &amd_iommu_cpumask_group, |
402 | &amd_iommu_events_group, |
403 | NULL, |
404 | }; |
405 | |
406 | static const struct pmu iommu_pmu __initconst = { |
407 | .event_init = perf_iommu_event_init, |
408 | .add = perf_iommu_add, |
409 | .del = perf_iommu_del, |
410 | .start = perf_iommu_start, |
411 | .stop = perf_iommu_stop, |
412 | .read = perf_iommu_read, |
413 | .task_ctx_nr = perf_invalid_context, |
414 | .attr_groups = amd_iommu_attr_groups, |
415 | .capabilities = PERF_PMU_CAP_NO_EXCLUDE, |
416 | }; |
417 | |
418 | static __init int init_one_iommu(unsigned int idx) |
419 | { |
420 | struct perf_amd_iommu *perf_iommu; |
421 | int ret; |
422 | |
423 | perf_iommu = kzalloc(size: sizeof(struct perf_amd_iommu), GFP_KERNEL); |
424 | if (!perf_iommu) |
425 | return -ENOMEM; |
426 | |
427 | raw_spin_lock_init(&perf_iommu->lock); |
428 | |
429 | perf_iommu->pmu = iommu_pmu; |
430 | perf_iommu->iommu = get_amd_iommu(idx); |
431 | perf_iommu->max_banks = amd_iommu_pc_get_max_banks(idx); |
432 | perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx); |
433 | |
434 | if (!perf_iommu->iommu || |
435 | !perf_iommu->max_banks || |
436 | !perf_iommu->max_counters) { |
437 | kfree(objp: perf_iommu); |
438 | return -EINVAL; |
439 | } |
440 | |
441 | snprintf(buf: perf_iommu->name, IOMMU_NAME_SIZE, fmt: "amd_iommu_%u" , idx); |
442 | |
443 | ret = perf_pmu_register(pmu: &perf_iommu->pmu, name: perf_iommu->name, type: -1); |
444 | if (!ret) { |
445 | pr_info("Detected AMD IOMMU #%d (%d banks, %d counters/bank).\n" , |
446 | idx, perf_iommu->max_banks, perf_iommu->max_counters); |
447 | list_add_tail(new: &perf_iommu->list, head: &perf_amd_iommu_list); |
448 | } else { |
449 | pr_warn("Error initializing IOMMU %d.\n" , idx); |
450 | kfree(objp: perf_iommu); |
451 | } |
452 | return ret; |
453 | } |
454 | |
455 | static __init int amd_iommu_pc_init(void) |
456 | { |
457 | unsigned int i, cnt = 0; |
458 | int ret; |
459 | |
460 | /* Make sure the IOMMU PC resource is available */ |
461 | if (!amd_iommu_pc_supported()) |
462 | return -ENODEV; |
463 | |
464 | ret = _init_events_attrs(); |
465 | if (ret) |
466 | return ret; |
467 | |
468 | /* |
469 | * An IOMMU PMU is specific to an IOMMU, and can function independently. |
470 | * So we go through all IOMMUs and ignore the one that fails init |
471 | * unless all IOMMU are failing. |
472 | */ |
473 | for (i = 0; i < amd_iommu_get_num_iommus(); i++) { |
474 | ret = init_one_iommu(idx: i); |
475 | if (!ret) |
476 | cnt++; |
477 | } |
478 | |
479 | if (!cnt) { |
480 | kfree(objp: amd_iommu_events_group.attrs); |
481 | return -ENODEV; |
482 | } |
483 | |
484 | /* Init cpumask attributes to only core 0 */ |
485 | cpumask_set_cpu(cpu: 0, dstp: &iommu_cpumask); |
486 | return 0; |
487 | } |
488 | |
489 | device_initcall(amd_iommu_pc_init); |
490 | |