1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Common Performance counter support functions for PowerISA v2.07 processors. |
4 | * |
5 | * Copyright 2009 Paul Mackerras, IBM Corporation. |
6 | * Copyright 2013 Michael Ellerman, IBM Corporation. |
7 | * Copyright 2016 Madhavan Srinivasan, IBM Corporation. |
8 | */ |
9 | #include "isa207-common.h" |
10 | |
11 | PMU_FORMAT_ATTR(event, "config:0-49" ); |
12 | PMU_FORMAT_ATTR(pmcxsel, "config:0-7" ); |
13 | PMU_FORMAT_ATTR(mark, "config:8" ); |
14 | PMU_FORMAT_ATTR(combine, "config:11" ); |
15 | PMU_FORMAT_ATTR(unit, "config:12-15" ); |
16 | PMU_FORMAT_ATTR(pmc, "config:16-19" ); |
17 | PMU_FORMAT_ATTR(cache_sel, "config:20-23" ); |
18 | PMU_FORMAT_ATTR(sample_mode, "config:24-28" ); |
19 | PMU_FORMAT_ATTR(thresh_sel, "config:29-31" ); |
20 | PMU_FORMAT_ATTR(thresh_stop, "config:32-35" ); |
21 | PMU_FORMAT_ATTR(thresh_start, "config:36-39" ); |
22 | PMU_FORMAT_ATTR(thresh_cmp, "config:40-49" ); |
23 | |
24 | static struct attribute *isa207_pmu_format_attr[] = { |
25 | &format_attr_event.attr, |
26 | &format_attr_pmcxsel.attr, |
27 | &format_attr_mark.attr, |
28 | &format_attr_combine.attr, |
29 | &format_attr_unit.attr, |
30 | &format_attr_pmc.attr, |
31 | &format_attr_cache_sel.attr, |
32 | &format_attr_sample_mode.attr, |
33 | &format_attr_thresh_sel.attr, |
34 | &format_attr_thresh_stop.attr, |
35 | &format_attr_thresh_start.attr, |
36 | &format_attr_thresh_cmp.attr, |
37 | NULL, |
38 | }; |
39 | |
40 | const struct attribute_group isa207_pmu_format_group = { |
41 | .name = "format" , |
42 | .attrs = isa207_pmu_format_attr, |
43 | }; |
44 | |
45 | static inline bool event_is_fab_match(u64 event) |
46 | { |
47 | /* Only check pmc, unit and pmcxsel, ignore the edge bit (0) */ |
48 | event &= 0xff0fe; |
49 | |
50 | /* PM_MRK_FAB_RSP_MATCH & PM_MRK_FAB_RSP_MATCH_CYC */ |
51 | return (event == 0x30056 || event == 0x4f052); |
52 | } |
53 | |
54 | static bool is_event_valid(u64 event) |
55 | { |
56 | u64 valid_mask = EVENT_VALID_MASK; |
57 | |
58 | if (cpu_has_feature(CPU_FTR_ARCH_31)) |
59 | valid_mask = p10_EVENT_VALID_MASK; |
60 | else if (cpu_has_feature(CPU_FTR_ARCH_300)) |
61 | valid_mask = p9_EVENT_VALID_MASK; |
62 | |
63 | return !(event & ~valid_mask); |
64 | } |
65 | |
66 | static inline bool is_event_marked(u64 event) |
67 | { |
68 | if (event & EVENT_IS_MARKED) |
69 | return true; |
70 | |
71 | return false; |
72 | } |
73 | |
74 | static unsigned long sdar_mod_val(u64 event) |
75 | { |
76 | if (cpu_has_feature(CPU_FTR_ARCH_31)) |
77 | return p10_SDAR_MODE(event); |
78 | |
79 | return p9_SDAR_MODE(event); |
80 | } |
81 | |
82 | static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) |
83 | { |
84 | /* |
85 | * MMCRA[SDAR_MODE] specifies how the SDAR should be updated in |
86 | * continuous sampling mode. |
87 | * |
88 | * Incase of Power8: |
89 | * MMCRA[SDAR_MODE] will be programmed as "0b01" for continuous sampling |
90 | * mode and will be un-changed when setting MMCRA[63] (Marked events). |
91 | * |
92 | * Incase of Power9/power10: |
93 | * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'), |
94 | * or if group already have any marked events. |
95 | * For rest |
96 | * MMCRA[SDAR_MODE] will be set from event code. |
97 | * If sdar_mode from event is zero, default to 0b01. Hardware |
98 | * requires that we set a non-zero value. |
99 | */ |
100 | if (cpu_has_feature(CPU_FTR_ARCH_300)) { |
101 | if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE)) |
102 | *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES; |
103 | else if (sdar_mod_val(event)) |
104 | *mmcra |= sdar_mod_val(event) << MMCRA_SDAR_MODE_SHIFT; |
105 | else |
106 | *mmcra |= MMCRA_SDAR_MODE_DCACHE; |
107 | } else |
108 | *mmcra |= MMCRA_SDAR_MODE_TLB; |
109 | } |
110 | |
111 | static int p10_thresh_cmp_val(u64 value) |
112 | { |
113 | int exp = 0; |
114 | u64 result = value; |
115 | |
116 | if (!value) |
117 | return value; |
118 | |
119 | /* |
120 | * Incase of P10, thresh_cmp value is not part of raw event code |
121 | * and provided via attr.config1 parameter. To program threshold in MMCRA, |
122 | * take a 18 bit number N and shift right 2 places and increment |
123 | * the exponent E by 1 until the upper 10 bits of N are zero. |
124 | * Write E to the threshold exponent and write the lower 8 bits of N |
125 | * to the threshold mantissa. |
126 | * The max threshold that can be written is 261120. |
127 | */ |
128 | if (cpu_has_feature(CPU_FTR_ARCH_31)) { |
129 | if (value > 261120) |
130 | value = 261120; |
131 | while ((64 - __builtin_clzl(value)) > 8) { |
132 | exp++; |
133 | value >>= 2; |
134 | } |
135 | |
136 | /* |
137 | * Note that it is invalid to write a mantissa with the |
138 | * upper 2 bits of mantissa being zero, unless the |
139 | * exponent is also zero. |
140 | */ |
141 | if (!(value & 0xC0) && exp) |
142 | result = -1; |
143 | else |
144 | result = (exp << 8) | value; |
145 | } |
146 | return result; |
147 | } |
148 | |
149 | static u64 thresh_cmp_val(u64 value) |
150 | { |
151 | if (cpu_has_feature(CPU_FTR_ARCH_31)) |
152 | value = p10_thresh_cmp_val(value); |
153 | |
154 | /* |
155 | * Since location of threshold compare bits in MMCRA |
156 | * is different for p8, using different shift value. |
157 | */ |
158 | if (cpu_has_feature(CPU_FTR_ARCH_300)) |
159 | return value << p9_MMCRA_THR_CMP_SHIFT; |
160 | else |
161 | return value << MMCRA_THR_CMP_SHIFT; |
162 | } |
163 | |
164 | static unsigned long combine_from_event(u64 event) |
165 | { |
166 | if (cpu_has_feature(CPU_FTR_ARCH_300)) |
167 | return p9_EVENT_COMBINE(event); |
168 | |
169 | return EVENT_COMBINE(event); |
170 | } |
171 | |
172 | static unsigned long combine_shift(unsigned long pmc) |
173 | { |
174 | if (cpu_has_feature(CPU_FTR_ARCH_300)) |
175 | return p9_MMCR1_COMBINE_SHIFT(pmc); |
176 | |
177 | return MMCR1_COMBINE_SHIFT(pmc); |
178 | } |
179 | |
180 | static inline bool event_is_threshold(u64 event) |
181 | { |
182 | return (event >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK; |
183 | } |
184 | |
185 | static bool is_thresh_cmp_valid(u64 event) |
186 | { |
187 | unsigned int cmp, exp; |
188 | |
189 | if (cpu_has_feature(CPU_FTR_ARCH_31)) |
190 | return p10_thresh_cmp_val(value: event) >= 0; |
191 | |
192 | /* |
193 | * Check the mantissa upper two bits are not zero, unless the |
194 | * exponent is also zero. See the THRESH_CMP_MANTISSA doc. |
195 | */ |
196 | |
197 | cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; |
198 | exp = cmp >> 7; |
199 | |
200 | if (exp && (cmp & 0x60) == 0) |
201 | return false; |
202 | |
203 | return true; |
204 | } |
205 | |
206 | static unsigned int dc_ic_rld_quad_l1_sel(u64 event) |
207 | { |
208 | unsigned int cache; |
209 | |
210 | cache = (event >> EVENT_CACHE_SEL_SHIFT) & MMCR1_DC_IC_QUAL_MASK; |
211 | return cache; |
212 | } |
213 | |
214 | static inline u64 isa207_find_source(u64 idx, u32 sub_idx) |
215 | { |
216 | u64 ret = PERF_MEM_NA; |
217 | |
218 | switch(idx) { |
219 | case 0: |
220 | /* Nothing to do */ |
221 | break; |
222 | case 1: |
223 | ret = PH(LVL, L1) | LEVEL(L1) | P(SNOOP, HIT); |
224 | break; |
225 | case 2: |
226 | ret = PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT); |
227 | break; |
228 | case 3: |
229 | ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); |
230 | break; |
231 | case 4: |
232 | if (cpu_has_feature(CPU_FTR_ARCH_31)) { |
233 | ret = P(SNOOP, HIT); |
234 | |
235 | if (sub_idx == 1) |
236 | ret |= PH(LVL, LOC_RAM) | LEVEL(RAM); |
237 | else if (sub_idx == 2 || sub_idx == 3) |
238 | ret |= P(LVL, HIT) | LEVEL(PMEM); |
239 | else if (sub_idx == 4) |
240 | ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2); |
241 | else if (sub_idx == 5 || sub_idx == 7) |
242 | ret |= P(LVL, HIT) | LEVEL(PMEM) | REM; |
243 | else if (sub_idx == 6) |
244 | ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3); |
245 | } else { |
246 | if (sub_idx <= 1) |
247 | ret = PH(LVL, LOC_RAM); |
248 | else if (sub_idx > 1 && sub_idx <= 2) |
249 | ret = PH(LVL, REM_RAM1); |
250 | else |
251 | ret = PH(LVL, REM_RAM2); |
252 | ret |= P(SNOOP, HIT); |
253 | } |
254 | break; |
255 | case 5: |
256 | if (cpu_has_feature(CPU_FTR_ARCH_31)) { |
257 | ret = REM | P(HOPS, 0); |
258 | |
259 | if (sub_idx == 0 || sub_idx == 4) |
260 | ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT); |
261 | else if (sub_idx == 1 || sub_idx == 5) |
262 | ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM); |
263 | else if (sub_idx == 2 || sub_idx == 6) |
264 | ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); |
265 | else if (sub_idx == 3 || sub_idx == 7) |
266 | ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); |
267 | } else { |
268 | if (sub_idx == 0) |
269 | ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HIT) | P(HOPS, 0); |
270 | else if (sub_idx == 1) |
271 | ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HITM) | P(HOPS, 0); |
272 | else if (sub_idx == 2 || sub_idx == 4) |
273 | ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HIT) | P(HOPS, 0); |
274 | else if (sub_idx == 3 || sub_idx == 5) |
275 | ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HITM) | P(HOPS, 0); |
276 | } |
277 | break; |
278 | case 6: |
279 | if (cpu_has_feature(CPU_FTR_ARCH_31)) { |
280 | if (sub_idx == 0) |
281 | ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | |
282 | P(SNOOP, HIT) | P(HOPS, 2); |
283 | else if (sub_idx == 1) |
284 | ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | |
285 | P(SNOOP, HITM) | P(HOPS, 2); |
286 | else if (sub_idx == 2) |
287 | ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | |
288 | P(SNOOP, HIT) | P(HOPS, 3); |
289 | else if (sub_idx == 3) |
290 | ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | |
291 | P(SNOOP, HITM) | P(HOPS, 3); |
292 | } else { |
293 | ret = PH(LVL, REM_CCE2); |
294 | if (sub_idx == 0 || sub_idx == 2) |
295 | ret |= P(SNOOP, HIT); |
296 | else if (sub_idx == 1 || sub_idx == 3) |
297 | ret |= P(SNOOP, HITM); |
298 | } |
299 | break; |
300 | case 7: |
301 | ret = PM(LVL, L1); |
302 | break; |
303 | } |
304 | |
305 | return ret; |
306 | } |
307 | |
308 | void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, |
309 | struct pt_regs *regs) |
310 | { |
311 | u64 idx; |
312 | u32 sub_idx; |
313 | u64 sier; |
314 | u64 val; |
315 | |
316 | /* Skip if no SIER support */ |
317 | if (!(flags & PPMU_HAS_SIER)) { |
318 | dsrc->val = 0; |
319 | return; |
320 | } |
321 | |
322 | sier = mfspr(SPRN_SIER); |
323 | val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT; |
324 | if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31))) |
325 | return; |
326 | |
327 | idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT; |
328 | sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT; |
329 | |
330 | dsrc->val = isa207_find_source(idx, sub_idx); |
331 | if (val == 7) { |
332 | u64 mmcra; |
333 | u32 op_type; |
334 | |
335 | /* |
336 | * Type 0b111 denotes either larx or stcx instruction. Use the |
337 | * MMCRA sampling bits [57:59] along with the type value |
338 | * to determine the exact instruction type. If the sampling |
339 | * criteria is neither load or store, set the type as default |
340 | * to NA. |
341 | */ |
342 | mmcra = mfspr(SPRN_MMCRA); |
343 | |
344 | op_type = (mmcra >> MMCRA_SAMP_ELIG_SHIFT) & MMCRA_SAMP_ELIG_MASK; |
345 | switch (op_type) { |
346 | case 5: |
347 | dsrc->val |= P(OP, LOAD); |
348 | break; |
349 | case 7: |
350 | dsrc->val |= P(OP, STORE); |
351 | break; |
352 | default: |
353 | dsrc->val |= P(OP, NA); |
354 | break; |
355 | } |
356 | } else { |
357 | dsrc->val |= (val == 1) ? P(OP, LOAD) : P(OP, STORE); |
358 | } |
359 | } |
360 | |
361 | void isa207_get_mem_weight(u64 *weight, u64 type) |
362 | { |
363 | union perf_sample_weight *weight_fields; |
364 | u64 weight_lat; |
365 | u64 mmcra = mfspr(SPRN_MMCRA); |
366 | u64 exp = MMCRA_THR_CTR_EXP(mmcra); |
367 | u64 mantissa = MMCRA_THR_CTR_MANT(mmcra); |
368 | u64 sier = mfspr(SPRN_SIER); |
369 | u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT; |
370 | |
371 | if (cpu_has_feature(CPU_FTR_ARCH_31)) |
372 | mantissa = P10_MMCRA_THR_CTR_MANT(mmcra); |
373 | |
374 | if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31))) |
375 | weight_lat = 0; |
376 | else |
377 | weight_lat = mantissa << (2 * exp); |
378 | |
379 | /* |
380 | * Use 64 bit weight field (full) if sample type is |
381 | * WEIGHT. |
382 | * |
383 | * if sample type is WEIGHT_STRUCT: |
384 | * - store memory latency in the lower 32 bits. |
385 | * - For ISA v3.1, use remaining two 16 bit fields of |
386 | * perf_sample_weight to store cycle counter values |
387 | * from sier2. |
388 | */ |
389 | weight_fields = (union perf_sample_weight *)weight; |
390 | if (type & PERF_SAMPLE_WEIGHT) |
391 | weight_fields->full = weight_lat; |
392 | else { |
393 | weight_fields->var1_dw = (u32)weight_lat; |
394 | if (cpu_has_feature(CPU_FTR_ARCH_31)) { |
395 | weight_fields->var2_w = P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2)); |
396 | weight_fields->var3_w = P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2)); |
397 | } |
398 | } |
399 | } |
400 | |
401 | int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1) |
402 | { |
403 | unsigned int unit, pmc, cache, ebb; |
404 | unsigned long mask, value; |
405 | |
406 | mask = value = 0; |
407 | |
408 | if (!is_event_valid(event)) |
409 | return -1; |
410 | |
411 | pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; |
412 | unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; |
413 | if (cpu_has_feature(CPU_FTR_ARCH_31)) |
414 | cache = (event >> EVENT_CACHE_SEL_SHIFT) & |
415 | p10_EVENT_CACHE_SEL_MASK; |
416 | else |
417 | cache = (event >> EVENT_CACHE_SEL_SHIFT) & |
418 | EVENT_CACHE_SEL_MASK; |
419 | ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK; |
420 | |
421 | if (pmc) { |
422 | u64 base_event; |
423 | |
424 | if (pmc > 6) |
425 | return -1; |
426 | |
427 | /* Ignore Linux defined bits when checking event below */ |
428 | base_event = event & ~EVENT_LINUX_MASK; |
429 | |
430 | if (pmc >= 5 && base_event != 0x500fa && |
431 | base_event != 0x600f4) |
432 | return -1; |
433 | |
434 | mask |= CNST_PMC_MASK(pmc); |
435 | value |= CNST_PMC_VAL(pmc); |
436 | |
437 | /* |
438 | * PMC5 and PMC6 are used to count cycles and instructions and |
439 | * they do not support most of the constraint bits. Add a check |
440 | * to exclude PMC5/6 from most of the constraints except for |
441 | * EBB/BHRB. |
442 | */ |
443 | if (pmc >= 5) |
444 | goto ebb_bhrb; |
445 | } |
446 | |
447 | if (pmc <= 4) { |
448 | /* |
449 | * Add to number of counters in use. Note this includes events with |
450 | * a PMC of 0 - they still need a PMC, it's just assigned later. |
451 | * Don't count events on PMC 5 & 6, there is only one valid event |
452 | * on each of those counters, and they are handled above. |
453 | */ |
454 | mask |= CNST_NC_MASK; |
455 | value |= CNST_NC_VAL; |
456 | } |
457 | |
458 | if (unit >= 6 && unit <= 9) { |
459 | if (cpu_has_feature(CPU_FTR_ARCH_31)) { |
460 | if (unit == 6) { |
461 | mask |= CNST_L2L3_GROUP_MASK; |
462 | value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); |
463 | } |
464 | } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { |
465 | mask |= CNST_CACHE_GROUP_MASK; |
466 | value |= CNST_CACHE_GROUP_VAL(event & 0xff); |
467 | |
468 | mask |= CNST_CACHE_PMC4_MASK; |
469 | if (pmc == 4) |
470 | value |= CNST_CACHE_PMC4_VAL; |
471 | } else if (cache & 0x7) { |
472 | /* |
473 | * L2/L3 events contain a cache selector field, which is |
474 | * supposed to be programmed into MMCRC. However MMCRC is only |
475 | * HV writable, and there is no API for guest kernels to modify |
476 | * it. The solution is for the hypervisor to initialise the |
477 | * field to zeroes, and for us to only ever allow events that |
478 | * have a cache selector of zero. The bank selector (bit 3) is |
479 | * irrelevant, as long as the rest of the value is 0. |
480 | */ |
481 | return -1; |
482 | } |
483 | |
484 | } else if (cpu_has_feature(CPU_FTR_ARCH_300) || (event & EVENT_IS_L1)) { |
485 | mask |= CNST_L1_QUAL_MASK; |
486 | value |= CNST_L1_QUAL_VAL(cache); |
487 | } |
488 | |
489 | if (cpu_has_feature(CPU_FTR_ARCH_31)) { |
490 | mask |= CNST_RADIX_SCOPE_GROUP_MASK; |
491 | value |= CNST_RADIX_SCOPE_GROUP_VAL(event >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT); |
492 | } |
493 | |
494 | if (is_event_marked(event)) { |
495 | mask |= CNST_SAMPLE_MASK; |
496 | value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT); |
497 | } |
498 | |
499 | if (cpu_has_feature(CPU_FTR_ARCH_31)) { |
500 | if (event_is_threshold(event) && is_thresh_cmp_valid(event: event_config1)) { |
501 | mask |= CNST_THRESH_CTL_SEL_MASK; |
502 | value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT); |
503 | mask |= p10_CNST_THRESH_CMP_MASK; |
504 | value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1)); |
505 | } else if (event_is_threshold(event)) |
506 | return -1; |
507 | } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { |
508 | if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { |
509 | mask |= CNST_THRESH_MASK; |
510 | value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); |
511 | } else if (event_is_threshold(event)) |
512 | return -1; |
513 | } else { |
514 | /* |
515 | * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC, |
516 | * the threshold control bits are used for the match value. |
517 | */ |
518 | if (event_is_fab_match(event)) { |
519 | mask |= CNST_FAB_MATCH_MASK; |
520 | value |= CNST_FAB_MATCH_VAL(event >> EVENT_THR_CTL_SHIFT); |
521 | } else { |
522 | if (!is_thresh_cmp_valid(event)) |
523 | return -1; |
524 | |
525 | mask |= CNST_THRESH_MASK; |
526 | value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); |
527 | } |
528 | } |
529 | |
530 | ebb_bhrb: |
531 | if (!pmc && ebb) |
532 | /* EBB events must specify the PMC */ |
533 | return -1; |
534 | |
535 | if (event & EVENT_WANTS_BHRB) { |
536 | if (!ebb) |
537 | /* Only EBB events can request BHRB */ |
538 | return -1; |
539 | |
540 | mask |= CNST_IFM_MASK; |
541 | value |= CNST_IFM_VAL(event >> EVENT_IFM_SHIFT); |
542 | } |
543 | |
544 | /* |
545 | * All events must agree on EBB, either all request it or none. |
546 | * EBB events are pinned & exclusive, so this should never actually |
547 | * hit, but we leave it as a fallback in case. |
548 | */ |
549 | mask |= CNST_EBB_MASK; |
550 | value |= CNST_EBB_VAL(ebb); |
551 | |
552 | *maskp = mask; |
553 | *valp = value; |
554 | |
555 | return 0; |
556 | } |
557 | |
558 | int isa207_compute_mmcr(u64 event[], int n_ev, |
559 | unsigned int hwc[], struct mmcr_regs *mmcr, |
560 | struct perf_event *pevents[], u32 flags) |
561 | { |
562 | unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; |
563 | unsigned long mmcr3; |
564 | unsigned int pmc, pmc_inuse; |
565 | int i; |
566 | |
567 | pmc_inuse = 0; |
568 | |
569 | /* First pass to count resource use */ |
570 | for (i = 0; i < n_ev; ++i) { |
571 | pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; |
572 | if (pmc) |
573 | pmc_inuse |= 1 << pmc; |
574 | } |
575 | |
576 | mmcra = mmcr1 = mmcr2 = mmcr3 = 0; |
577 | |
578 | /* |
579 | * Disable bhrb unless explicitly requested |
580 | * by setting MMCRA (BHRBRD) bit. |
581 | */ |
582 | if (cpu_has_feature(CPU_FTR_ARCH_31)) |
583 | mmcra |= MMCRA_BHRB_DISABLE; |
584 | |
585 | /* Second pass: assign PMCs, set all MMCR1 fields */ |
586 | for (i = 0; i < n_ev; ++i) { |
587 | pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; |
588 | unit = (event[i] >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; |
589 | combine = combine_from_event(event: event[i]); |
590 | psel = event[i] & EVENT_PSEL_MASK; |
591 | |
592 | if (!pmc) { |
593 | for (pmc = 1; pmc <= 4; ++pmc) { |
594 | if (!(pmc_inuse & (1 << pmc))) |
595 | break; |
596 | } |
597 | |
598 | pmc_inuse |= 1 << pmc; |
599 | } |
600 | |
601 | if (pmc <= 4) { |
602 | mmcr1 |= unit << MMCR1_UNIT_SHIFT(pmc); |
603 | mmcr1 |= combine << combine_shift(pmc); |
604 | mmcr1 |= psel << MMCR1_PMCSEL_SHIFT(pmc); |
605 | } |
606 | |
607 | /* In continuous sampling mode, update SDAR on TLB miss */ |
608 | mmcra_sdar_mode(event: event[i], mmcra: &mmcra); |
609 | |
610 | if (cpu_has_feature(CPU_FTR_ARCH_300)) { |
611 | cache = dc_ic_rld_quad_l1_sel(event: event[i]); |
612 | mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT; |
613 | } else { |
614 | if (event[i] & EVENT_IS_L1) { |
615 | cache = dc_ic_rld_quad_l1_sel(event: event[i]); |
616 | mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT; |
617 | } |
618 | } |
619 | |
620 | /* Set RADIX_SCOPE_QUAL bit */ |
621 | if (cpu_has_feature(CPU_FTR_ARCH_31)) { |
622 | val = (event[i] >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) & |
623 | p10_EVENT_RADIX_SCOPE_QUAL_MASK; |
624 | mmcr1 |= val << p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT; |
625 | } |
626 | |
627 | if (is_event_marked(event: event[i])) { |
628 | mmcra |= MMCRA_SAMPLE_ENABLE; |
629 | |
630 | val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; |
631 | if (val) { |
632 | mmcra |= (val & 3) << MMCRA_SAMP_MODE_SHIFT; |
633 | mmcra |= (val >> 2) << MMCRA_SAMP_ELIG_SHIFT; |
634 | } |
635 | } |
636 | |
637 | /* |
638 | * PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC, |
639 | * the threshold bits are used for the match value. |
640 | */ |
641 | if (!cpu_has_feature(CPU_FTR_ARCH_300) && event_is_fab_match(event[i])) { |
642 | mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) & |
643 | EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT; |
644 | } else { |
645 | val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK; |
646 | mmcra |= val << MMCRA_THR_CTL_SHIFT; |
647 | val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK; |
648 | mmcra |= val << MMCRA_THR_SEL_SHIFT; |
649 | if (!cpu_has_feature(CPU_FTR_ARCH_31)) { |
650 | val = (event[i] >> EVENT_THR_CMP_SHIFT) & |
651 | EVENT_THR_CMP_MASK; |
652 | mmcra |= thresh_cmp_val(value: val); |
653 | } else if (flags & PPMU_HAS_ATTR_CONFIG1) { |
654 | val = (pevents[i]->attr.config1 >> p10_EVENT_THR_CMP_SHIFT) & |
655 | p10_EVENT_THR_CMP_MASK; |
656 | mmcra |= thresh_cmp_val(value: val); |
657 | } |
658 | } |
659 | |
660 | if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { |
661 | val = (event[i] >> p10_L2L3_EVENT_SHIFT) & |
662 | p10_EVENT_L2L3_SEL_MASK; |
663 | mmcr2 |= val << p10_L2L3_SEL_SHIFT; |
664 | } |
665 | |
666 | if (event[i] & EVENT_WANTS_BHRB) { |
667 | val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK; |
668 | mmcra |= val << MMCRA_IFM_SHIFT; |
669 | } |
670 | |
671 | /* set MMCRA (BHRBRD) to 0 if there is user request for BHRB */ |
672 | if (cpu_has_feature(CPU_FTR_ARCH_31) && |
673 | (has_branch_stack(pevents[i]) || (event[i] & EVENT_WANTS_BHRB))) |
674 | mmcra &= ~MMCRA_BHRB_DISABLE; |
675 | |
676 | if (pevents[i]->attr.exclude_user) |
677 | mmcr2 |= MMCR2_FCP(pmc); |
678 | |
679 | if (pevents[i]->attr.exclude_hv) |
680 | mmcr2 |= MMCR2_FCH(pmc); |
681 | |
682 | if (pevents[i]->attr.exclude_kernel) { |
683 | if (cpu_has_feature(CPU_FTR_HVMODE)) |
684 | mmcr2 |= MMCR2_FCH(pmc); |
685 | else |
686 | mmcr2 |= MMCR2_FCS(pmc); |
687 | } |
688 | |
689 | if (pevents[i]->attr.exclude_idle) |
690 | mmcr2 |= MMCR2_FCWAIT(pmc); |
691 | |
692 | if (cpu_has_feature(CPU_FTR_ARCH_31)) { |
693 | if (pmc <= 4) { |
694 | val = (event[i] >> p10_EVENT_MMCR3_SHIFT) & |
695 | p10_EVENT_MMCR3_MASK; |
696 | mmcr3 |= val << MMCR3_SHIFT(pmc); |
697 | } |
698 | } |
699 | |
700 | hwc[i] = pmc - 1; |
701 | } |
702 | |
703 | /* Return MMCRx values */ |
704 | mmcr->mmcr0 = 0; |
705 | |
706 | /* pmc_inuse is 1-based */ |
707 | if (pmc_inuse & 2) |
708 | mmcr->mmcr0 = MMCR0_PMC1CE; |
709 | |
710 | if (pmc_inuse & 0x7c) |
711 | mmcr->mmcr0 |= MMCR0_PMCjCE; |
712 | |
713 | /* If we're not using PMC 5 or 6, freeze them */ |
714 | if (!(pmc_inuse & 0x60)) |
715 | mmcr->mmcr0 |= MMCR0_FC56; |
716 | |
717 | /* |
718 | * Set mmcr0 (PMCCEXT) for p10 which |
719 | * will restrict access to group B registers |
720 | * when MMCR0 PMCC=0b00. |
721 | */ |
722 | if (cpu_has_feature(CPU_FTR_ARCH_31)) |
723 | mmcr->mmcr0 |= MMCR0_PMCCEXT; |
724 | |
725 | mmcr->mmcr1 = mmcr1; |
726 | mmcr->mmcra = mmcra; |
727 | mmcr->mmcr2 = mmcr2; |
728 | mmcr->mmcr3 = mmcr3; |
729 | |
730 | return 0; |
731 | } |
732 | |
733 | void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) |
734 | { |
735 | if (pmc <= 3) |
736 | mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); |
737 | } |
738 | |
739 | static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int size) |
740 | { |
741 | int i, j; |
742 | |
743 | for (i = 0; i < size; ++i) { |
744 | if (event < ev_alt[i][0]) |
745 | break; |
746 | |
747 | for (j = 0; j < MAX_ALT && ev_alt[i][j]; ++j) |
748 | if (event == ev_alt[i][j]) |
749 | return i; |
750 | } |
751 | |
752 | return -1; |
753 | } |
754 | |
755 | int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, |
756 | const unsigned int ev_alt[][MAX_ALT]) |
757 | { |
758 | int i, j, num_alt = 0; |
759 | u64 alt_event; |
760 | |
761 | alt[num_alt++] = event; |
762 | i = find_alternative(event, ev_alt, size); |
763 | if (i >= 0) { |
764 | /* Filter out the original event, it's already in alt[0] */ |
765 | for (j = 0; j < MAX_ALT; ++j) { |
766 | alt_event = ev_alt[i][j]; |
767 | if (alt_event && alt_event != event) |
768 | alt[num_alt++] = alt_event; |
769 | } |
770 | } |
771 | |
772 | if (flags & PPMU_ONLY_COUNT_RUN) { |
773 | /* |
774 | * We're only counting in RUN state, so PM_CYC is equivalent to |
775 | * PM_RUN_CYC and PM_INST_CMPL === PM_RUN_INST_CMPL. |
776 | */ |
777 | j = num_alt; |
778 | for (i = 0; i < num_alt; ++i) { |
779 | switch (alt[i]) { |
780 | case 0x1e: /* PMC_CYC */ |
781 | alt[j++] = 0x600f4; /* PM_RUN_CYC */ |
782 | break; |
783 | case 0x600f4: |
784 | alt[j++] = 0x1e; |
785 | break; |
786 | case 0x2: /* PM_INST_CMPL */ |
787 | alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */ |
788 | break; |
789 | case 0x500fa: |
790 | alt[j++] = 0x2; |
791 | break; |
792 | } |
793 | } |
794 | num_alt = j; |
795 | } |
796 | |
797 | return num_alt; |
798 | } |
799 | |
800 | int isa3XX_check_attr_config(struct perf_event *ev) |
801 | { |
802 | u64 val, sample_mode; |
803 | u64 event = ev->attr.config; |
804 | |
805 | val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; |
806 | sample_mode = val & 0x3; |
807 | |
808 | /* |
809 | * MMCRA[61:62] is Random Sampling Mode (SM). |
810 | * value of 0b11 is reserved. |
811 | */ |
812 | if (sample_mode == 0x3) |
813 | return -EINVAL; |
814 | |
815 | /* |
816 | * Check for all reserved value |
817 | * Source: Performance Monitoring Unit User Guide |
818 | */ |
819 | switch (val) { |
820 | case 0x5: |
821 | case 0x9: |
822 | case 0xD: |
823 | case 0x19: |
824 | case 0x1D: |
825 | case 0x1A: |
826 | case 0x1E: |
827 | return -EINVAL; |
828 | } |
829 | |
830 | /* |
831 | * MMCRA[48:51]/[52:55]) Threshold Start/Stop |
832 | * Events Selection. |
833 | * 0b11110000/0b00001111 is reserved. |
834 | */ |
835 | val = (event >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK; |
836 | if (((val & 0xF0) == 0xF0) || ((val & 0xF) == 0xF)) |
837 | return -EINVAL; |
838 | |
839 | return 0; |
840 | } |
841 | |