1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver |
3 | * |
4 | * Copyright (C) 2021 Marvell. |
5 | */ |
6 | |
7 | #include <linux/init.h> |
8 | #include <linux/io.h> |
9 | #include <linux/module.h> |
10 | #include <linux/of.h> |
11 | #include <linux/perf_event.h> |
12 | #include <linux/hrtimer.h> |
13 | #include <linux/acpi.h> |
14 | #include <linux/platform_device.h> |
15 | |
16 | /* Performance Counters Operating Mode Control Registers */ |
17 | #define DDRC_PERF_CNT_OP_MODE_CTRL 0x8020 |
18 | #define OP_MODE_CTRL_VAL_MANNUAL 0x1 |
19 | |
20 | /* Performance Counters Start Operation Control Registers */ |
21 | #define DDRC_PERF_CNT_START_OP_CTRL 0x8028 |
22 | #define START_OP_CTRL_VAL_START 0x1ULL |
23 | #define START_OP_CTRL_VAL_ACTIVE 0x2 |
24 | |
25 | /* Performance Counters End Operation Control Registers */ |
26 | #define DDRC_PERF_CNT_END_OP_CTRL 0x8030 |
27 | #define END_OP_CTRL_VAL_END 0x1ULL |
28 | |
29 | /* Performance Counters End Status Registers */ |
30 | #define DDRC_PERF_CNT_END_STATUS 0x8038 |
31 | #define END_STATUS_VAL_END_TIMER_MODE_END 0x1 |
32 | |
33 | /* Performance Counters Configuration Registers */ |
34 | #define DDRC_PERF_CFG_BASE 0x8040 |
35 | |
36 | /* 8 Generic event counter + 2 fixed event counters */ |
37 | #define DDRC_PERF_NUM_GEN_COUNTERS 8 |
38 | #define DDRC_PERF_NUM_FIX_COUNTERS 2 |
39 | #define DDRC_PERF_READ_COUNTER_IDX DDRC_PERF_NUM_GEN_COUNTERS |
40 | #define DDRC_PERF_WRITE_COUNTER_IDX (DDRC_PERF_NUM_GEN_COUNTERS + 1) |
41 | #define DDRC_PERF_NUM_COUNTERS (DDRC_PERF_NUM_GEN_COUNTERS + \ |
42 | DDRC_PERF_NUM_FIX_COUNTERS) |
43 | |
44 | /* Generic event counter registers */ |
45 | #define DDRC_PERF_CFG(n) (DDRC_PERF_CFG_BASE + 8 * (n)) |
46 | #define EVENT_ENABLE BIT_ULL(63) |
47 | |
48 | /* Two dedicated event counters for DDR reads and writes */ |
49 | #define EVENT_DDR_READS 101 |
50 | #define EVENT_DDR_WRITES 100 |
51 | |
52 | /* |
53 | * programmable events IDs in programmable event counters. |
54 | * DO NOT change these event-id numbers, they are used to |
55 | * program event bitmap in h/w. |
56 | */ |
57 | #define EVENT_OP_IS_ZQLATCH 55 |
58 | #define EVENT_OP_IS_ZQSTART 54 |
59 | #define EVENT_OP_IS_TCR_MRR 53 |
60 | #define EVENT_OP_IS_DQSOSC_MRR 52 |
61 | #define EVENT_OP_IS_DQSOSC_MPC 51 |
62 | #define EVENT_VISIBLE_WIN_LIMIT_REACHED_WR 50 |
63 | #define EVENT_VISIBLE_WIN_LIMIT_REACHED_RD 49 |
64 | #define EVENT_BSM_STARVATION 48 |
65 | #define EVENT_BSM_ALLOC 47 |
66 | #define EVENT_LPR_REQ_WITH_NOCREDIT 46 |
67 | #define EVENT_HPR_REQ_WITH_NOCREDIT 45 |
68 | #define EVENT_OP_IS_ZQCS 44 |
69 | #define EVENT_OP_IS_ZQCL 43 |
70 | #define EVENT_OP_IS_LOAD_MODE 42 |
71 | #define EVENT_OP_IS_SPEC_REF 41 |
72 | #define EVENT_OP_IS_CRIT_REF 40 |
73 | #define EVENT_OP_IS_REFRESH 39 |
74 | #define EVENT_OP_IS_ENTER_MPSM 35 |
75 | #define EVENT_OP_IS_ENTER_POWERDOWN 31 |
76 | #define EVENT_OP_IS_ENTER_SELFREF 27 |
77 | #define EVENT_WAW_HAZARD 26 |
78 | #define EVENT_RAW_HAZARD 25 |
79 | #define EVENT_WAR_HAZARD 24 |
80 | #define EVENT_WRITE_COMBINE 23 |
81 | #define EVENT_RDWR_TRANSITIONS 22 |
82 | #define EVENT_PRECHARGE_FOR_OTHER 21 |
83 | #define EVENT_PRECHARGE_FOR_RDWR 20 |
84 | #define EVENT_OP_IS_PRECHARGE 19 |
85 | #define EVENT_OP_IS_MWR 18 |
86 | #define EVENT_OP_IS_WR 17 |
87 | #define EVENT_OP_IS_RD 16 |
88 | #define EVENT_OP_IS_RD_ACTIVATE 15 |
89 | #define EVENT_OP_IS_RD_OR_WR 14 |
90 | #define EVENT_OP_IS_ACTIVATE 13 |
91 | #define EVENT_WR_XACT_WHEN_CRITICAL 12 |
92 | #define EVENT_LPR_XACT_WHEN_CRITICAL 11 |
93 | #define EVENT_HPR_XACT_WHEN_CRITICAL 10 |
94 | #define EVENT_DFI_RD_DATA_CYCLES 9 |
95 | #define EVENT_DFI_WR_DATA_CYCLES 8 |
96 | #define EVENT_ACT_BYPASS 7 |
97 | #define EVENT_READ_BYPASS 6 |
98 | #define EVENT_HIF_HI_PRI_RD 5 |
99 | #define EVENT_HIF_RMW 4 |
100 | #define EVENT_HIF_RD 3 |
101 | #define EVENT_HIF_WR 2 |
102 | #define EVENT_HIF_RD_OR_WR 1 |
103 | |
104 | /* Event counter value registers */ |
105 | #define DDRC_PERF_CNT_VALUE_BASE 0x8080 |
106 | #define DDRC_PERF_CNT_VALUE(n) (DDRC_PERF_CNT_VALUE_BASE + 8 * (n)) |
107 | |
108 | /* Fixed event counter enable/disable register */ |
109 | #define DDRC_PERF_CNT_FREERUN_EN 0x80C0 |
110 | #define DDRC_PERF_FREERUN_WRITE_EN 0x1 |
111 | #define DDRC_PERF_FREERUN_READ_EN 0x2 |
112 | |
113 | /* Fixed event counter control register */ |
114 | #define DDRC_PERF_CNT_FREERUN_CTRL 0x80C8 |
115 | #define DDRC_FREERUN_WRITE_CNT_CLR 0x1 |
116 | #define DDRC_FREERUN_READ_CNT_CLR 0x2 |
117 | |
118 | /* Fixed event counter value register */ |
119 | #define DDRC_PERF_CNT_VALUE_WR_OP 0x80D0 |
120 | #define DDRC_PERF_CNT_VALUE_RD_OP 0x80D8 |
121 | #define DDRC_PERF_CNT_VALUE_OVERFLOW BIT_ULL(48) |
122 | #define DDRC_PERF_CNT_MAX_VALUE GENMASK_ULL(48, 0) |
123 | |
124 | struct cn10k_ddr_pmu { |
125 | struct pmu pmu; |
126 | void __iomem *base; |
127 | unsigned int cpu; |
128 | struct device *dev; |
129 | int active_events; |
130 | struct perf_event *events[DDRC_PERF_NUM_COUNTERS]; |
131 | struct hrtimer hrtimer; |
132 | struct hlist_node node; |
133 | }; |
134 | |
135 | #define to_cn10k_ddr_pmu(p) container_of(p, struct cn10k_ddr_pmu, pmu) |
136 | |
137 | static ssize_t cn10k_ddr_pmu_event_show(struct device *dev, |
138 | struct device_attribute *attr, |
139 | char *page) |
140 | { |
141 | struct perf_pmu_events_attr *pmu_attr; |
142 | |
143 | pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); |
144 | return sysfs_emit(buf: page, fmt: "event=0x%02llx\n" , pmu_attr->id); |
145 | |
146 | } |
147 | |
148 | #define CN10K_DDR_PMU_EVENT_ATTR(_name, _id) \ |
149 | PMU_EVENT_ATTR_ID(_name, cn10k_ddr_pmu_event_show, _id) |
150 | |
151 | static struct attribute *cn10k_ddr_perf_events_attrs[] = { |
152 | CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_or_wr_access, EVENT_HIF_RD_OR_WR), |
153 | CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_wr_access, EVENT_HIF_WR), |
154 | CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_access, EVENT_HIF_RD), |
155 | CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rmw_access, EVENT_HIF_RMW), |
156 | CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_pri_rdaccess, EVENT_HIF_HI_PRI_RD), |
157 | CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_bypass_access, EVENT_READ_BYPASS), |
158 | CN10K_DDR_PMU_EVENT_ATTR(ddr_act_bypass_access, EVENT_ACT_BYPASS), |
159 | CN10K_DDR_PMU_EVENT_ATTR(ddr_dif_wr_data_access, EVENT_DFI_WR_DATA_CYCLES), |
160 | CN10K_DDR_PMU_EVENT_ATTR(ddr_dif_rd_data_access, EVENT_DFI_RD_DATA_CYCLES), |
161 | CN10K_DDR_PMU_EVENT_ATTR(ddr_hpri_sched_rd_crit_access, |
162 | EVENT_HPR_XACT_WHEN_CRITICAL), |
163 | CN10K_DDR_PMU_EVENT_ATTR(ddr_lpri_sched_rd_crit_access, |
164 | EVENT_LPR_XACT_WHEN_CRITICAL), |
165 | CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_trxn_crit_access, |
166 | EVENT_WR_XACT_WHEN_CRITICAL), |
167 | CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_active_access, EVENT_OP_IS_ACTIVATE), |
168 | CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_or_wr_access, EVENT_OP_IS_RD_OR_WR), |
169 | CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_active_access, EVENT_OP_IS_RD_ACTIVATE), |
170 | CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_read, EVENT_OP_IS_RD), |
171 | CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_write, EVENT_OP_IS_WR), |
172 | CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_mwr, EVENT_OP_IS_MWR), |
173 | CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge, EVENT_OP_IS_PRECHARGE), |
174 | CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_rdwr, EVENT_PRECHARGE_FOR_RDWR), |
175 | CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_other, |
176 | EVENT_PRECHARGE_FOR_OTHER), |
177 | CN10K_DDR_PMU_EVENT_ATTR(ddr_rdwr_transitions, EVENT_RDWR_TRANSITIONS), |
178 | CN10K_DDR_PMU_EVENT_ATTR(ddr_write_combine, EVENT_WRITE_COMBINE), |
179 | CN10K_DDR_PMU_EVENT_ATTR(ddr_war_hazard, EVENT_WAR_HAZARD), |
180 | CN10K_DDR_PMU_EVENT_ATTR(ddr_raw_hazard, EVENT_RAW_HAZARD), |
181 | CN10K_DDR_PMU_EVENT_ATTR(ddr_waw_hazard, EVENT_WAW_HAZARD), |
182 | CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_selfref, EVENT_OP_IS_ENTER_SELFREF), |
183 | CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_powerdown, EVENT_OP_IS_ENTER_POWERDOWN), |
184 | CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_mpsm, EVENT_OP_IS_ENTER_MPSM), |
185 | CN10K_DDR_PMU_EVENT_ATTR(ddr_refresh, EVENT_OP_IS_REFRESH), |
186 | CN10K_DDR_PMU_EVENT_ATTR(ddr_crit_ref, EVENT_OP_IS_CRIT_REF), |
187 | CN10K_DDR_PMU_EVENT_ATTR(ddr_spec_ref, EVENT_OP_IS_SPEC_REF), |
188 | CN10K_DDR_PMU_EVENT_ATTR(ddr_load_mode, EVENT_OP_IS_LOAD_MODE), |
189 | CN10K_DDR_PMU_EVENT_ATTR(ddr_zqcl, EVENT_OP_IS_ZQCL), |
190 | CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_wr_access, EVENT_OP_IS_ZQCS), |
191 | CN10K_DDR_PMU_EVENT_ATTR(ddr_hpr_req_with_nocredit, |
192 | EVENT_HPR_REQ_WITH_NOCREDIT), |
193 | CN10K_DDR_PMU_EVENT_ATTR(ddr_lpr_req_with_nocredit, |
194 | EVENT_LPR_REQ_WITH_NOCREDIT), |
195 | CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_alloc, EVENT_BSM_ALLOC), |
196 | CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_starvation, EVENT_BSM_STARVATION), |
197 | CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_rd, |
198 | EVENT_VISIBLE_WIN_LIMIT_REACHED_RD), |
199 | CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_wr, |
200 | EVENT_VISIBLE_WIN_LIMIT_REACHED_WR), |
201 | CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mpc, EVENT_OP_IS_DQSOSC_MPC), |
202 | CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mrr, EVENT_OP_IS_DQSOSC_MRR), |
203 | CN10K_DDR_PMU_EVENT_ATTR(ddr_tcr_mrr, EVENT_OP_IS_TCR_MRR), |
204 | CN10K_DDR_PMU_EVENT_ATTR(ddr_zqstart, EVENT_OP_IS_ZQSTART), |
205 | CN10K_DDR_PMU_EVENT_ATTR(ddr_zqlatch, EVENT_OP_IS_ZQLATCH), |
206 | /* Free run event counters */ |
207 | CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_reads, EVENT_DDR_READS), |
208 | CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_writes, EVENT_DDR_WRITES), |
209 | NULL |
210 | }; |
211 | |
212 | static struct attribute_group cn10k_ddr_perf_events_attr_group = { |
213 | .name = "events" , |
214 | .attrs = cn10k_ddr_perf_events_attrs, |
215 | }; |
216 | |
217 | PMU_FORMAT_ATTR(event, "config:0-8" ); |
218 | |
219 | static struct attribute *cn10k_ddr_perf_format_attrs[] = { |
220 | &format_attr_event.attr, |
221 | NULL, |
222 | }; |
223 | |
224 | static struct attribute_group cn10k_ddr_perf_format_attr_group = { |
225 | .name = "format" , |
226 | .attrs = cn10k_ddr_perf_format_attrs, |
227 | }; |
228 | |
229 | static ssize_t cn10k_ddr_perf_cpumask_show(struct device *dev, |
230 | struct device_attribute *attr, |
231 | char *buf) |
232 | { |
233 | struct cn10k_ddr_pmu *pmu = dev_get_drvdata(dev); |
234 | |
235 | return cpumap_print_to_pagebuf(list: true, buf, cpumask_of(pmu->cpu)); |
236 | } |
237 | |
238 | static struct device_attribute cn10k_ddr_perf_cpumask_attr = |
239 | __ATTR(cpumask, 0444, cn10k_ddr_perf_cpumask_show, NULL); |
240 | |
241 | static struct attribute *cn10k_ddr_perf_cpumask_attrs[] = { |
242 | &cn10k_ddr_perf_cpumask_attr.attr, |
243 | NULL, |
244 | }; |
245 | |
246 | static struct attribute_group cn10k_ddr_perf_cpumask_attr_group = { |
247 | .attrs = cn10k_ddr_perf_cpumask_attrs, |
248 | }; |
249 | |
250 | static const struct attribute_group *cn10k_attr_groups[] = { |
251 | &cn10k_ddr_perf_events_attr_group, |
252 | &cn10k_ddr_perf_format_attr_group, |
253 | &cn10k_ddr_perf_cpumask_attr_group, |
254 | NULL, |
255 | }; |
256 | |
257 | /* Default poll timeout is 100 sec, which is very sufficient for |
258 | * 48 bit counter incremented max at 5.6 GT/s, which may take many |
259 | * hours to overflow. |
260 | */ |
261 | static unsigned long cn10k_ddr_pmu_poll_period_sec = 100; |
262 | module_param_named(poll_period_sec, cn10k_ddr_pmu_poll_period_sec, ulong, 0644); |
263 | |
264 | static ktime_t cn10k_ddr_pmu_timer_period(void) |
265 | { |
266 | return ms_to_ktime(ms: (u64)cn10k_ddr_pmu_poll_period_sec * USEC_PER_SEC); |
267 | } |
268 | |
269 | static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap) |
270 | { |
271 | switch (eventid) { |
272 | case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD: |
273 | case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH: |
274 | *event_bitmap = (1ULL << (eventid - 1)); |
275 | break; |
276 | case EVENT_OP_IS_ENTER_SELFREF: |
277 | case EVENT_OP_IS_ENTER_POWERDOWN: |
278 | case EVENT_OP_IS_ENTER_MPSM: |
279 | *event_bitmap = (0xFULL << (eventid - 1)); |
280 | break; |
281 | default: |
282 | pr_err("%s Invalid eventid %d\n" , __func__, eventid); |
283 | return -EINVAL; |
284 | } |
285 | |
286 | return 0; |
287 | } |
288 | |
289 | static int cn10k_ddr_perf_alloc_counter(struct cn10k_ddr_pmu *pmu, |
290 | struct perf_event *event) |
291 | { |
292 | u8 config = event->attr.config; |
293 | int i; |
294 | |
295 | /* DDR read free-run counter index */ |
296 | if (config == EVENT_DDR_READS) { |
297 | pmu->events[DDRC_PERF_READ_COUNTER_IDX] = event; |
298 | return DDRC_PERF_READ_COUNTER_IDX; |
299 | } |
300 | |
301 | /* DDR write free-run counter index */ |
302 | if (config == EVENT_DDR_WRITES) { |
303 | pmu->events[DDRC_PERF_WRITE_COUNTER_IDX] = event; |
304 | return DDRC_PERF_WRITE_COUNTER_IDX; |
305 | } |
306 | |
307 | /* Allocate DDR generic counters */ |
308 | for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) { |
309 | if (pmu->events[i] == NULL) { |
310 | pmu->events[i] = event; |
311 | return i; |
312 | } |
313 | } |
314 | |
315 | return -ENOENT; |
316 | } |
317 | |
318 | static void cn10k_ddr_perf_free_counter(struct cn10k_ddr_pmu *pmu, int counter) |
319 | { |
320 | pmu->events[counter] = NULL; |
321 | } |
322 | |
323 | static int cn10k_ddr_perf_event_init(struct perf_event *event) |
324 | { |
325 | struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); |
326 | struct hw_perf_event *hwc = &event->hw; |
327 | |
328 | if (event->attr.type != event->pmu->type) |
329 | return -ENOENT; |
330 | |
331 | if (is_sampling_event(event)) { |
332 | dev_info(pmu->dev, "Sampling not supported!\n" ); |
333 | return -EOPNOTSUPP; |
334 | } |
335 | |
336 | if (event->cpu < 0) { |
337 | dev_warn(pmu->dev, "Can't provide per-task data!\n" ); |
338 | return -EOPNOTSUPP; |
339 | } |
340 | |
341 | /* We must NOT create groups containing mixed PMUs */ |
342 | if (event->group_leader->pmu != event->pmu && |
343 | !is_software_event(event: event->group_leader)) |
344 | return -EINVAL; |
345 | |
346 | /* Set ownership of event to one CPU, same event can not be observed |
347 | * on multiple cpus at same time. |
348 | */ |
349 | event->cpu = pmu->cpu; |
350 | hwc->idx = -1; |
351 | return 0; |
352 | } |
353 | |
354 | static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu, |
355 | int counter, bool enable) |
356 | { |
357 | u32 reg; |
358 | u64 val; |
359 | |
360 | if (counter > DDRC_PERF_NUM_COUNTERS) { |
361 | pr_err("Error: unsupported counter %d\n" , counter); |
362 | return; |
363 | } |
364 | |
365 | if (counter < DDRC_PERF_NUM_GEN_COUNTERS) { |
366 | reg = DDRC_PERF_CFG(counter); |
367 | val = readq_relaxed(pmu->base + reg); |
368 | |
369 | if (enable) |
370 | val |= EVENT_ENABLE; |
371 | else |
372 | val &= ~EVENT_ENABLE; |
373 | |
374 | writeq_relaxed(val, pmu->base + reg); |
375 | } else { |
376 | val = readq_relaxed(pmu->base + DDRC_PERF_CNT_FREERUN_EN); |
377 | if (enable) { |
378 | if (counter == DDRC_PERF_READ_COUNTER_IDX) |
379 | val |= DDRC_PERF_FREERUN_READ_EN; |
380 | else |
381 | val |= DDRC_PERF_FREERUN_WRITE_EN; |
382 | } else { |
383 | if (counter == DDRC_PERF_READ_COUNTER_IDX) |
384 | val &= ~DDRC_PERF_FREERUN_READ_EN; |
385 | else |
386 | val &= ~DDRC_PERF_FREERUN_WRITE_EN; |
387 | } |
388 | writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_EN); |
389 | } |
390 | } |
391 | |
392 | static u64 cn10k_ddr_perf_read_counter(struct cn10k_ddr_pmu *pmu, int counter) |
393 | { |
394 | u64 val; |
395 | |
396 | if (counter == DDRC_PERF_READ_COUNTER_IDX) |
397 | return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_RD_OP); |
398 | |
399 | if (counter == DDRC_PERF_WRITE_COUNTER_IDX) |
400 | return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_WR_OP); |
401 | |
402 | val = readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE(counter)); |
403 | return val; |
404 | } |
405 | |
406 | static void cn10k_ddr_perf_event_update(struct perf_event *event) |
407 | { |
408 | struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); |
409 | struct hw_perf_event *hwc = &event->hw; |
410 | u64 prev_count, new_count, mask; |
411 | |
412 | do { |
413 | prev_count = local64_read(&hwc->prev_count); |
414 | new_count = cn10k_ddr_perf_read_counter(pmu, counter: hwc->idx); |
415 | } while (local64_xchg(&hwc->prev_count, new_count) != prev_count); |
416 | |
417 | mask = DDRC_PERF_CNT_MAX_VALUE; |
418 | |
419 | local64_add((new_count - prev_count) & mask, &event->count); |
420 | } |
421 | |
422 | static void cn10k_ddr_perf_event_start(struct perf_event *event, int flags) |
423 | { |
424 | struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); |
425 | struct hw_perf_event *hwc = &event->hw; |
426 | int counter = hwc->idx; |
427 | |
428 | local64_set(&hwc->prev_count, 0); |
429 | |
430 | cn10k_ddr_perf_counter_enable(pmu, counter, enable: true); |
431 | |
432 | hwc->state = 0; |
433 | } |
434 | |
435 | static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags) |
436 | { |
437 | struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); |
438 | struct hw_perf_event *hwc = &event->hw; |
439 | u8 config = event->attr.config; |
440 | int counter, ret; |
441 | u32 reg_offset; |
442 | u64 val; |
443 | |
444 | counter = cn10k_ddr_perf_alloc_counter(pmu, event); |
445 | if (counter < 0) |
446 | return -EAGAIN; |
447 | |
448 | pmu->active_events++; |
449 | hwc->idx = counter; |
450 | |
451 | if (pmu->active_events == 1) |
452 | hrtimer_start(timer: &pmu->hrtimer, tim: cn10k_ddr_pmu_timer_period(), |
453 | mode: HRTIMER_MODE_REL_PINNED); |
454 | |
455 | if (counter < DDRC_PERF_NUM_GEN_COUNTERS) { |
456 | /* Generic counters, configure event id */ |
457 | reg_offset = DDRC_PERF_CFG(counter); |
458 | ret = ddr_perf_get_event_bitmap(eventid: config, event_bitmap: &val); |
459 | if (ret) |
460 | return ret; |
461 | |
462 | writeq_relaxed(val, pmu->base + reg_offset); |
463 | } else { |
464 | /* fixed event counter, clear counter value */ |
465 | if (counter == DDRC_PERF_READ_COUNTER_IDX) |
466 | val = DDRC_FREERUN_READ_CNT_CLR; |
467 | else |
468 | val = DDRC_FREERUN_WRITE_CNT_CLR; |
469 | |
470 | writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_CTRL); |
471 | } |
472 | |
473 | hwc->state |= PERF_HES_STOPPED; |
474 | |
475 | if (flags & PERF_EF_START) |
476 | cn10k_ddr_perf_event_start(event, flags); |
477 | |
478 | return 0; |
479 | } |
480 | |
481 | static void cn10k_ddr_perf_event_stop(struct perf_event *event, int flags) |
482 | { |
483 | struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); |
484 | struct hw_perf_event *hwc = &event->hw; |
485 | int counter = hwc->idx; |
486 | |
487 | cn10k_ddr_perf_counter_enable(pmu, counter, enable: false); |
488 | |
489 | if (flags & PERF_EF_UPDATE) |
490 | cn10k_ddr_perf_event_update(event); |
491 | |
492 | hwc->state |= PERF_HES_STOPPED; |
493 | } |
494 | |
495 | static void cn10k_ddr_perf_event_del(struct perf_event *event, int flags) |
496 | { |
497 | struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); |
498 | struct hw_perf_event *hwc = &event->hw; |
499 | int counter = hwc->idx; |
500 | |
501 | cn10k_ddr_perf_event_stop(event, PERF_EF_UPDATE); |
502 | |
503 | cn10k_ddr_perf_free_counter(pmu, counter); |
504 | pmu->active_events--; |
505 | hwc->idx = -1; |
506 | |
507 | /* Cancel timer when no events to capture */ |
508 | if (pmu->active_events == 0) |
509 | hrtimer_cancel(timer: &pmu->hrtimer); |
510 | } |
511 | |
512 | static void cn10k_ddr_perf_pmu_enable(struct pmu *pmu) |
513 | { |
514 | struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu); |
515 | |
516 | writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base + |
517 | DDRC_PERF_CNT_START_OP_CTRL); |
518 | } |
519 | |
520 | static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu) |
521 | { |
522 | struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu); |
523 | |
524 | writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base + |
525 | DDRC_PERF_CNT_END_OP_CTRL); |
526 | } |
527 | |
528 | static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu) |
529 | { |
530 | struct hw_perf_event *hwc; |
531 | int i; |
532 | |
533 | for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) { |
534 | if (pmu->events[i] == NULL) |
535 | continue; |
536 | |
537 | cn10k_ddr_perf_event_update(event: pmu->events[i]); |
538 | } |
539 | |
540 | /* Reset previous count as h/w counter are reset */ |
541 | for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) { |
542 | if (pmu->events[i] == NULL) |
543 | continue; |
544 | |
545 | hwc = &pmu->events[i]->hw; |
546 | local64_set(&hwc->prev_count, 0); |
547 | } |
548 | } |
549 | |
550 | static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu) |
551 | { |
552 | struct perf_event *event; |
553 | struct hw_perf_event *hwc; |
554 | u64 prev_count, new_count; |
555 | u64 value; |
556 | int i; |
557 | |
558 | event = pmu->events[DDRC_PERF_READ_COUNTER_IDX]; |
559 | if (event) { |
560 | hwc = &event->hw; |
561 | prev_count = local64_read(&hwc->prev_count); |
562 | new_count = cn10k_ddr_perf_read_counter(pmu, counter: hwc->idx); |
563 | |
564 | /* Overflow condition is when new count less than |
565 | * previous count |
566 | */ |
567 | if (new_count < prev_count) |
568 | cn10k_ddr_perf_event_update(event); |
569 | } |
570 | |
571 | event = pmu->events[DDRC_PERF_WRITE_COUNTER_IDX]; |
572 | if (event) { |
573 | hwc = &event->hw; |
574 | prev_count = local64_read(&hwc->prev_count); |
575 | new_count = cn10k_ddr_perf_read_counter(pmu, counter: hwc->idx); |
576 | |
577 | /* Overflow condition is when new count less than |
578 | * previous count |
579 | */ |
580 | if (new_count < prev_count) |
581 | cn10k_ddr_perf_event_update(event); |
582 | } |
583 | |
584 | for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) { |
585 | if (pmu->events[i] == NULL) |
586 | continue; |
587 | |
588 | value = cn10k_ddr_perf_read_counter(pmu, counter: i); |
589 | if (value == DDRC_PERF_CNT_MAX_VALUE) { |
590 | pr_info("Counter-(%d) reached max value\n" , i); |
591 | cn10k_ddr_perf_event_update_all(pmu); |
592 | cn10k_ddr_perf_pmu_disable(pmu: &pmu->pmu); |
593 | cn10k_ddr_perf_pmu_enable(pmu: &pmu->pmu); |
594 | } |
595 | } |
596 | |
597 | return IRQ_HANDLED; |
598 | } |
599 | |
600 | static enum hrtimer_restart cn10k_ddr_pmu_timer_handler(struct hrtimer *hrtimer) |
601 | { |
602 | struct cn10k_ddr_pmu *pmu = container_of(hrtimer, struct cn10k_ddr_pmu, |
603 | hrtimer); |
604 | unsigned long flags; |
605 | |
606 | local_irq_save(flags); |
607 | cn10k_ddr_pmu_overflow_handler(pmu); |
608 | local_irq_restore(flags); |
609 | |
610 | hrtimer_forward_now(timer: hrtimer, interval: cn10k_ddr_pmu_timer_period()); |
611 | return HRTIMER_RESTART; |
612 | } |
613 | |
614 | static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) |
615 | { |
616 | struct cn10k_ddr_pmu *pmu = hlist_entry_safe(node, struct cn10k_ddr_pmu, |
617 | node); |
618 | unsigned int target; |
619 | |
620 | if (cpu != pmu->cpu) |
621 | return 0; |
622 | |
623 | target = cpumask_any_but(cpu_online_mask, cpu); |
624 | if (target >= nr_cpu_ids) |
625 | return 0; |
626 | |
627 | perf_pmu_migrate_context(pmu: &pmu->pmu, src_cpu: cpu, dst_cpu: target); |
628 | pmu->cpu = target; |
629 | return 0; |
630 | } |
631 | |
632 | static int cn10k_ddr_perf_probe(struct platform_device *pdev) |
633 | { |
634 | struct cn10k_ddr_pmu *ddr_pmu; |
635 | struct resource *res; |
636 | void __iomem *base; |
637 | char *name; |
638 | int ret; |
639 | |
640 | ddr_pmu = devm_kzalloc(dev: &pdev->dev, size: sizeof(*ddr_pmu), GFP_KERNEL); |
641 | if (!ddr_pmu) |
642 | return -ENOMEM; |
643 | |
644 | ddr_pmu->dev = &pdev->dev; |
645 | platform_set_drvdata(pdev, data: ddr_pmu); |
646 | |
647 | base = devm_platform_get_and_ioremap_resource(pdev, index: 0, res: &res); |
648 | if (IS_ERR(ptr: base)) |
649 | return PTR_ERR(ptr: base); |
650 | |
651 | ddr_pmu->base = base; |
652 | |
653 | /* Setup the PMU counter to work in manual mode */ |
654 | writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base + |
655 | DDRC_PERF_CNT_OP_MODE_CTRL); |
656 | |
657 | ddr_pmu->pmu = (struct pmu) { |
658 | .module = THIS_MODULE, |
659 | .capabilities = PERF_PMU_CAP_NO_EXCLUDE, |
660 | .task_ctx_nr = perf_invalid_context, |
661 | .attr_groups = cn10k_attr_groups, |
662 | .event_init = cn10k_ddr_perf_event_init, |
663 | .add = cn10k_ddr_perf_event_add, |
664 | .del = cn10k_ddr_perf_event_del, |
665 | .start = cn10k_ddr_perf_event_start, |
666 | .stop = cn10k_ddr_perf_event_stop, |
667 | .read = cn10k_ddr_perf_event_update, |
668 | .pmu_enable = cn10k_ddr_perf_pmu_enable, |
669 | .pmu_disable = cn10k_ddr_perf_pmu_disable, |
670 | }; |
671 | |
672 | /* Choose this cpu to collect perf data */ |
673 | ddr_pmu->cpu = raw_smp_processor_id(); |
674 | |
675 | name = devm_kasprintf(dev: ddr_pmu->dev, GFP_KERNEL, fmt: "mrvl_ddr_pmu_%llx" , |
676 | res->start); |
677 | if (!name) |
678 | return -ENOMEM; |
679 | |
680 | hrtimer_init(timer: &ddr_pmu->hrtimer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL); |
681 | ddr_pmu->hrtimer.function = cn10k_ddr_pmu_timer_handler; |
682 | |
683 | cpuhp_state_add_instance_nocalls( |
684 | state: CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, |
685 | node: &ddr_pmu->node); |
686 | |
687 | ret = perf_pmu_register(pmu: &ddr_pmu->pmu, name, type: -1); |
688 | if (ret) |
689 | goto error; |
690 | |
691 | pr_info("CN10K DDR PMU Driver for ddrc@%llx\n" , res->start); |
692 | return 0; |
693 | error: |
694 | cpuhp_state_remove_instance_nocalls( |
695 | state: CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, |
696 | node: &ddr_pmu->node); |
697 | return ret; |
698 | } |
699 | |
700 | static int cn10k_ddr_perf_remove(struct platform_device *pdev) |
701 | { |
702 | struct cn10k_ddr_pmu *ddr_pmu = platform_get_drvdata(pdev); |
703 | |
704 | cpuhp_state_remove_instance_nocalls( |
705 | state: CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, |
706 | node: &ddr_pmu->node); |
707 | |
708 | perf_pmu_unregister(pmu: &ddr_pmu->pmu); |
709 | return 0; |
710 | } |
711 | |
712 | #ifdef CONFIG_OF |
713 | static const struct of_device_id cn10k_ddr_pmu_of_match[] = { |
714 | { .compatible = "marvell,cn10k-ddr-pmu" , }, |
715 | { }, |
716 | }; |
717 | MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match); |
718 | #endif |
719 | |
720 | #ifdef CONFIG_ACPI |
721 | static const struct acpi_device_id cn10k_ddr_pmu_acpi_match[] = { |
722 | {"MRVL000A" , 0}, |
723 | {}, |
724 | }; |
725 | MODULE_DEVICE_TABLE(acpi, cn10k_ddr_pmu_acpi_match); |
726 | #endif |
727 | |
728 | static struct platform_driver cn10k_ddr_pmu_driver = { |
729 | .driver = { |
730 | .name = "cn10k-ddr-pmu" , |
731 | .of_match_table = of_match_ptr(cn10k_ddr_pmu_of_match), |
732 | .acpi_match_table = ACPI_PTR(cn10k_ddr_pmu_acpi_match), |
733 | .suppress_bind_attrs = true, |
734 | }, |
735 | .probe = cn10k_ddr_perf_probe, |
736 | .remove = cn10k_ddr_perf_remove, |
737 | }; |
738 | |
739 | static int __init cn10k_ddr_pmu_init(void) |
740 | { |
741 | int ret; |
742 | |
743 | ret = cpuhp_setup_state_multi( |
744 | state: CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, |
745 | name: "perf/marvell/cn10k/ddr:online" , NULL, |
746 | teardown: cn10k_ddr_pmu_offline_cpu); |
747 | if (ret) |
748 | return ret; |
749 | |
750 | ret = platform_driver_register(&cn10k_ddr_pmu_driver); |
751 | if (ret) |
752 | cpuhp_remove_multi_state( |
753 | state: CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE); |
754 | return ret; |
755 | } |
756 | |
757 | static void __exit cn10k_ddr_pmu_exit(void) |
758 | { |
759 | platform_driver_unregister(&cn10k_ddr_pmu_driver); |
760 | cpuhp_remove_multi_state(state: CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE); |
761 | } |
762 | |
763 | module_init(cn10k_ddr_pmu_init); |
764 | module_exit(cn10k_ddr_pmu_exit); |
765 | |
766 | MODULE_AUTHOR("Bharat Bhushan <bbhushan2@marvell.com>" ); |
767 | MODULE_LICENSE("GPL v2" ); |
768 | |