1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Performance event support for s390x - CPU-measurement Counter Facility |
4 | * |
5 | * Copyright IBM Corp. 2012, 2023 |
6 | * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> |
7 | * Thomas Richter <tmricht@linux.ibm.com> |
8 | */ |
9 | #define KMSG_COMPONENT "cpum_cf" |
10 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
11 | |
12 | #include <linux/kernel.h> |
13 | #include <linux/kernel_stat.h> |
14 | #include <linux/percpu.h> |
15 | #include <linux/notifier.h> |
16 | #include <linux/init.h> |
17 | #include <linux/export.h> |
18 | #include <linux/miscdevice.h> |
19 | #include <linux/perf_event.h> |
20 | |
21 | #include <asm/cpu_mf.h> |
22 | #include <asm/hwctrset.h> |
23 | #include <asm/debug.h> |
24 | |
25 | enum cpumf_ctr_set { |
26 | CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */ |
27 | CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */ |
28 | CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */ |
29 | CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */ |
30 | CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */ |
31 | |
32 | /* Maximum number of counter sets */ |
33 | CPUMF_CTR_SET_MAX, |
34 | }; |
35 | |
36 | #define CPUMF_LCCTL_ENABLE_SHIFT 16 |
37 | #define CPUMF_LCCTL_ACTCTL_SHIFT 0 |
38 | |
39 | static inline void ctr_set_enable(u64 *state, u64 ctrsets) |
40 | { |
41 | *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT; |
42 | } |
43 | |
44 | static inline void ctr_set_disable(u64 *state, u64 ctrsets) |
45 | { |
46 | *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT); |
47 | } |
48 | |
49 | static inline void ctr_set_start(u64 *state, u64 ctrsets) |
50 | { |
51 | *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT; |
52 | } |
53 | |
54 | static inline void ctr_set_stop(u64 *state, u64 ctrsets) |
55 | { |
56 | *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT); |
57 | } |
58 | |
59 | static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest) |
60 | { |
61 | switch (set) { |
62 | case CPUMF_CTR_SET_BASIC: |
63 | return stcctm(BASIC, range, dest); |
64 | case CPUMF_CTR_SET_USER: |
65 | return stcctm(PROBLEM_STATE, range, dest); |
66 | case CPUMF_CTR_SET_CRYPTO: |
67 | return stcctm(CRYPTO_ACTIVITY, range, dest); |
68 | case CPUMF_CTR_SET_EXT: |
69 | return stcctm(EXTENDED, range, dest); |
70 | case CPUMF_CTR_SET_MT_DIAG: |
71 | return stcctm(MT_DIAG_CLEARING, range, dest); |
72 | case CPUMF_CTR_SET_MAX: |
73 | return 3; |
74 | } |
75 | return 3; |
76 | } |
77 | |
78 | struct cpu_cf_events { |
79 | refcount_t refcnt; /* Reference count */ |
80 | atomic_t ctr_set[CPUMF_CTR_SET_MAX]; |
81 | u64 state; /* For perf_event_open SVC */ |
82 | u64 dev_state; /* For /dev/hwctr */ |
83 | unsigned int flags; |
84 | size_t used; /* Bytes used in data */ |
85 | size_t usedss; /* Bytes used in start/stop */ |
86 | unsigned char start[PAGE_SIZE]; /* Counter set at event add */ |
87 | unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */ |
88 | unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */ |
89 | unsigned int sets; /* # Counter set saved in memory */ |
90 | }; |
91 | |
92 | static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ |
93 | static debug_info_t *cf_dbg; |
94 | |
95 | /* |
96 | * The CPU Measurement query counter information instruction contains |
97 | * information which varies per machine generation, but is constant and |
98 | * does not change when running on a particular machine, such as counter |
99 | * first and second version number. This is needed to determine the size |
100 | * of counter sets. Extract this information at device driver initialization. |
101 | */ |
102 | static struct cpumf_ctr_info cpumf_ctr_info; |
103 | |
104 | struct cpu_cf_ptr { |
105 | struct cpu_cf_events *cpucf; |
106 | }; |
107 | |
108 | static struct cpu_cf_root { /* Anchor to per CPU data */ |
109 | refcount_t refcnt; /* Overall active events */ |
110 | struct cpu_cf_ptr __percpu *cfptr; |
111 | } cpu_cf_root; |
112 | |
113 | /* |
114 | * Serialize event initialization and event removal. Both are called from |
115 | * user space in task context with perf_event_open() and close() |
116 | * system calls. |
117 | * |
118 | * This mutex serializes functions cpum_cf_alloc_cpu() called at event |
119 | * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu() |
120 | * called at event removal via call back function hw_perf_event_destroy() |
121 | * when the event is deleted. They are serialized to enforce correct |
122 | * bookkeeping of pointer and reference counts anchored by |
123 | * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the |
124 | * per CPU pointers stored in cpu_cf_root::cfptr. |
125 | */ |
126 | static DEFINE_MUTEX(pmc_reserve_mutex); |
127 | |
128 | /* |
129 | * Get pointer to per-cpu structure. |
130 | * |
131 | * Function get_cpu_cfhw() is called from |
132 | * - cfset_copy_all(): This function is protected by cpus_read_lock(), so |
133 | * CPU hot plug remove can not happen. Event removal requires a close() |
134 | * first. |
135 | * |
136 | * Function this_cpu_cfhw() is called from perf common code functions: |
137 | * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}(): |
138 | * All functions execute with interrupts disabled on that particular CPU. |
139 | * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all(). |
140 | * |
141 | * Therefore it is safe to access the CPU specific pointer to the event. |
142 | */ |
143 | static struct cpu_cf_events *get_cpu_cfhw(int cpu) |
144 | { |
145 | struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr; |
146 | |
147 | if (p) { |
148 | struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu); |
149 | |
150 | return q->cpucf; |
151 | } |
152 | return NULL; |
153 | } |
154 | |
155 | static struct cpu_cf_events *this_cpu_cfhw(void) |
156 | { |
157 | return get_cpu_cfhw(smp_processor_id()); |
158 | } |
159 | |
160 | /* Disable counter sets on dedicated CPU */ |
161 | static void cpum_cf_reset_cpu(void *flags) |
162 | { |
163 | lcctl(0); |
164 | } |
165 | |
166 | /* Free per CPU data when the last event is removed. */ |
167 | static void cpum_cf_free_root(void) |
168 | { |
169 | if (!refcount_dec_and_test(r: &cpu_cf_root.refcnt)) |
170 | return; |
171 | free_percpu(pdata: cpu_cf_root.cfptr); |
172 | cpu_cf_root.cfptr = NULL; |
173 | irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); |
174 | on_each_cpu(func: cpum_cf_reset_cpu, NULL, wait: 1); |
175 | debug_sprintf_event(cf_dbg, 4, "%s root.refcnt %u cfptr %d\n" , |
176 | __func__, refcount_read(r: &cpu_cf_root.refcnt), |
177 | !cpu_cf_root.cfptr); |
178 | } |
179 | |
180 | /* |
181 | * On initialization of first event also allocate per CPU data dynamically. |
182 | * Start with an array of pointers, the array size is the maximum number of |
183 | * CPUs possible, which might be larger than the number of CPUs currently |
184 | * online. |
185 | */ |
186 | static int cpum_cf_alloc_root(void) |
187 | { |
188 | int rc = 0; |
189 | |
190 | if (refcount_inc_not_zero(r: &cpu_cf_root.refcnt)) |
191 | return rc; |
192 | |
193 | /* The memory is already zeroed. */ |
194 | cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr); |
195 | if (cpu_cf_root.cfptr) { |
196 | refcount_set(r: &cpu_cf_root.refcnt, n: 1); |
197 | on_each_cpu(func: cpum_cf_reset_cpu, NULL, wait: 1); |
198 | irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); |
199 | } else { |
200 | rc = -ENOMEM; |
201 | } |
202 | |
203 | return rc; |
204 | } |
205 | |
206 | /* Free CPU counter data structure for a PMU */ |
207 | static void cpum_cf_free_cpu(int cpu) |
208 | { |
209 | struct cpu_cf_events *cpuhw; |
210 | struct cpu_cf_ptr *p; |
211 | |
212 | mutex_lock(&pmc_reserve_mutex); |
213 | /* |
214 | * When invoked via CPU hotplug handler, there might be no events |
215 | * installed or that particular CPU might not have an |
216 | * event installed. This anchor pointer can be NULL! |
217 | */ |
218 | if (!cpu_cf_root.cfptr) |
219 | goto out; |
220 | p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); |
221 | cpuhw = p->cpucf; |
222 | /* |
223 | * Might be zero when called from CPU hotplug handler and no event |
224 | * installed on that CPU, but on different CPUs. |
225 | */ |
226 | if (!cpuhw) |
227 | goto out; |
228 | |
229 | if (refcount_dec_and_test(r: &cpuhw->refcnt)) { |
230 | kfree(objp: cpuhw); |
231 | p->cpucf = NULL; |
232 | } |
233 | cpum_cf_free_root(); |
234 | out: |
235 | mutex_unlock(lock: &pmc_reserve_mutex); |
236 | } |
237 | |
238 | /* Allocate CPU counter data structure for a PMU. Called under mutex lock. */ |
239 | static int cpum_cf_alloc_cpu(int cpu) |
240 | { |
241 | struct cpu_cf_events *cpuhw; |
242 | struct cpu_cf_ptr *p; |
243 | int rc; |
244 | |
245 | mutex_lock(&pmc_reserve_mutex); |
246 | rc = cpum_cf_alloc_root(); |
247 | if (rc) |
248 | goto unlock; |
249 | p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); |
250 | cpuhw = p->cpucf; |
251 | |
252 | if (!cpuhw) { |
253 | cpuhw = kzalloc(size: sizeof(*cpuhw), GFP_KERNEL); |
254 | if (cpuhw) { |
255 | p->cpucf = cpuhw; |
256 | refcount_set(r: &cpuhw->refcnt, n: 1); |
257 | } else { |
258 | rc = -ENOMEM; |
259 | } |
260 | } else { |
261 | refcount_inc(r: &cpuhw->refcnt); |
262 | } |
263 | if (rc) { |
264 | /* |
265 | * Error in allocation of event, decrement anchor. Since |
266 | * cpu_cf_event in not created, its destroy() function is not |
267 | * invoked. Adjust the reference counter for the anchor. |
268 | */ |
269 | cpum_cf_free_root(); |
270 | } |
271 | unlock: |
272 | mutex_unlock(lock: &pmc_reserve_mutex); |
273 | return rc; |
274 | } |
275 | |
276 | /* |
277 | * Create/delete per CPU data structures for /dev/hwctr interface and events |
278 | * created by perf_event_open(). |
279 | * If cpu is -1, track task on all available CPUs. This requires |
280 | * allocation of hardware data structures for all CPUs. This setup handles |
281 | * perf_event_open() with task context and /dev/hwctr interface. |
282 | * If cpu is non-zero install event on this CPU only. This setup handles |
283 | * perf_event_open() with CPU context. |
284 | */ |
285 | static int cpum_cf_alloc(int cpu) |
286 | { |
287 | cpumask_var_t mask; |
288 | int rc; |
289 | |
290 | if (cpu == -1) { |
291 | if (!zalloc_cpumask_var(mask: &mask, GFP_KERNEL)) |
292 | return -ENOMEM; |
293 | for_each_online_cpu(cpu) { |
294 | rc = cpum_cf_alloc_cpu(cpu); |
295 | if (rc) { |
296 | for_each_cpu(cpu, mask) |
297 | cpum_cf_free_cpu(cpu); |
298 | break; |
299 | } |
300 | cpumask_set_cpu(cpu, dstp: mask); |
301 | } |
302 | free_cpumask_var(mask); |
303 | } else { |
304 | rc = cpum_cf_alloc_cpu(cpu); |
305 | } |
306 | return rc; |
307 | } |
308 | |
309 | static void cpum_cf_free(int cpu) |
310 | { |
311 | if (cpu == -1) { |
312 | for_each_online_cpu(cpu) |
313 | cpum_cf_free_cpu(cpu); |
314 | } else { |
315 | cpum_cf_free_cpu(cpu); |
316 | } |
317 | } |
318 | |
319 | #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ |
320 | /* interval in seconds */ |
321 | |
322 | /* Counter sets are stored as data stream in a page sized memory buffer and |
323 | * exported to user space via raw data attached to the event sample data. |
324 | * Each counter set starts with an eight byte header consisting of: |
325 | * - a two byte eye catcher (0xfeef) |
326 | * - a one byte counter set number |
327 | * - a two byte counter set size (indicates the number of counters in this set) |
328 | * - a three byte reserved value (must be zero) to make the header the same |
329 | * size as a counter value. |
330 | * All counter values are eight byte in size. |
331 | * |
332 | * All counter sets are followed by a 64 byte trailer. |
333 | * The trailer consists of a: |
334 | * - flag field indicating valid fields when corresponding bit set |
335 | * - the counter facility first and second version number |
336 | * - the CPU speed if nonzero |
337 | * - the time stamp the counter sets have been collected |
338 | * - the time of day (TOD) base value |
339 | * - the machine type. |
340 | * |
341 | * The counter sets are saved when the process is prepared to be executed on a |
342 | * CPU and saved again when the process is going to be removed from a CPU. |
343 | * The difference of both counter sets are calculated and stored in the event |
344 | * sample data area. |
345 | */ |
346 | struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ |
347 | unsigned int def:16; /* 0-15 Data Entry Format */ |
348 | unsigned int set:16; /* 16-31 Counter set identifier */ |
349 | unsigned int ctr:16; /* 32-47 Number of stored counters */ |
350 | unsigned int res1:16; /* 48-63 Reserved */ |
351 | }; |
352 | |
353 | struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ |
354 | /* 0 - 7 */ |
355 | union { |
356 | struct { |
357 | unsigned int clock_base:1; /* TOD clock base set */ |
358 | unsigned int speed:1; /* CPU speed set */ |
359 | /* Measurement alerts */ |
360 | unsigned int mtda:1; /* Loss of MT ctr. data alert */ |
361 | unsigned int caca:1; /* Counter auth. change alert */ |
362 | unsigned int lcda:1; /* Loss of counter data alert */ |
363 | }; |
364 | unsigned long flags; /* 0-63 All indicators */ |
365 | }; |
366 | /* 8 - 15 */ |
367 | unsigned int cfvn:16; /* 64-79 Ctr First Version */ |
368 | unsigned int csvn:16; /* 80-95 Ctr Second Version */ |
369 | unsigned int cpu_speed:32; /* 96-127 CPU speed */ |
370 | /* 16 - 23 */ |
371 | unsigned long timestamp; /* 128-191 Timestamp (TOD) */ |
372 | /* 24 - 55 */ |
373 | union { |
374 | struct { |
375 | unsigned long progusage1; |
376 | unsigned long progusage2; |
377 | unsigned long progusage3; |
378 | unsigned long tod_base; |
379 | }; |
380 | unsigned long progusage[4]; |
381 | }; |
382 | /* 56 - 63 */ |
383 | unsigned int mach_type:16; /* Machine type */ |
384 | unsigned int res1:16; /* Reserved */ |
385 | unsigned int res2:32; /* Reserved */ |
386 | }; |
387 | |
388 | /* Create the trailer data at the end of a page. */ |
389 | static void cfdiag_trailer(struct cf_trailer_entry *te) |
390 | { |
391 | struct cpuid cpuid; |
392 | |
393 | te->cfvn = cpumf_ctr_info.cfvn; /* Counter version numbers */ |
394 | te->csvn = cpumf_ctr_info.csvn; |
395 | |
396 | get_cpu_id(&cpuid); /* Machine type */ |
397 | te->mach_type = cpuid.machine; |
398 | te->cpu_speed = cfdiag_cpu_speed; |
399 | if (te->cpu_speed) |
400 | te->speed = 1; |
401 | te->clock_base = 1; /* Save clock base */ |
402 | te->tod_base = tod_clock_base.tod; |
403 | te->timestamp = get_tod_clock_fast(); |
404 | } |
405 | |
406 | /* |
407 | * The number of counters per counter set varies between machine generations, |
408 | * but is constant when running on a particular machine generation. |
409 | * Determine each counter set size at device driver initialization and |
410 | * retrieve it later. |
411 | */ |
412 | static size_t cpumf_ctr_setsizes[CPUMF_CTR_SET_MAX]; |
413 | static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset) |
414 | { |
415 | size_t ctrset_size = 0; |
416 | |
417 | switch (ctrset) { |
418 | case CPUMF_CTR_SET_BASIC: |
419 | if (cpumf_ctr_info.cfvn >= 1) |
420 | ctrset_size = 6; |
421 | break; |
422 | case CPUMF_CTR_SET_USER: |
423 | if (cpumf_ctr_info.cfvn == 1) |
424 | ctrset_size = 6; |
425 | else if (cpumf_ctr_info.cfvn >= 3) |
426 | ctrset_size = 2; |
427 | break; |
428 | case CPUMF_CTR_SET_CRYPTO: |
429 | if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5) |
430 | ctrset_size = 16; |
431 | else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) |
432 | ctrset_size = 20; |
433 | break; |
434 | case CPUMF_CTR_SET_EXT: |
435 | if (cpumf_ctr_info.csvn == 1) |
436 | ctrset_size = 32; |
437 | else if (cpumf_ctr_info.csvn == 2) |
438 | ctrset_size = 48; |
439 | else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5) |
440 | ctrset_size = 128; |
441 | else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) |
442 | ctrset_size = 160; |
443 | break; |
444 | case CPUMF_CTR_SET_MT_DIAG: |
445 | if (cpumf_ctr_info.csvn > 3) |
446 | ctrset_size = 48; |
447 | break; |
448 | case CPUMF_CTR_SET_MAX: |
449 | break; |
450 | } |
451 | cpumf_ctr_setsizes[ctrset] = ctrset_size; |
452 | } |
453 | |
454 | /* |
455 | * Return the maximum possible counter set size (in number of 8 byte counters) |
456 | * depending on type and model number. |
457 | */ |
458 | static size_t cpum_cf_read_setsize(enum cpumf_ctr_set ctrset) |
459 | { |
460 | return cpumf_ctr_setsizes[ctrset]; |
461 | } |
462 | |
463 | /* Read a counter set. The counter set number determines the counter set and |
464 | * the CPUM-CF first and second version number determine the number of |
465 | * available counters in each counter set. |
466 | * Each counter set starts with header containing the counter set number and |
467 | * the number of eight byte counters. |
468 | * |
469 | * The functions returns the number of bytes occupied by this counter set |
470 | * including the header. |
471 | * If there is no counter in the counter set, this counter set is useless and |
472 | * zero is returned on this case. |
473 | * |
474 | * Note that the counter sets may not be enabled or active and the stcctm |
475 | * instruction might return error 3. Depending on error_ok value this is ok, |
476 | * for example when called from cpumf_pmu_start() call back function. |
477 | */ |
478 | static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, |
479 | size_t room, bool error_ok) |
480 | { |
481 | size_t ctrset_size, need = 0; |
482 | int rc = 3; /* Assume write failure */ |
483 | |
484 | ctrdata->def = CF_DIAG_CTRSET_DEF; |
485 | ctrdata->set = ctrset; |
486 | ctrdata->res1 = 0; |
487 | ctrset_size = cpum_cf_read_setsize(ctrset); |
488 | |
489 | if (ctrset_size) { /* Save data */ |
490 | need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); |
491 | if (need <= room) { |
492 | rc = ctr_stcctm(set: ctrset, range: ctrset_size, |
493 | dest: (u64 *)(ctrdata + 1)); |
494 | } |
495 | if (rc != 3 || error_ok) |
496 | ctrdata->ctr = ctrset_size; |
497 | else |
498 | need = 0; |
499 | } |
500 | |
501 | return need; |
502 | } |
503 | |
504 | static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = { |
505 | [CPUMF_CTR_SET_BASIC] = 0x02, |
506 | [CPUMF_CTR_SET_USER] = 0x04, |
507 | [CPUMF_CTR_SET_CRYPTO] = 0x08, |
508 | [CPUMF_CTR_SET_EXT] = 0x01, |
509 | [CPUMF_CTR_SET_MT_DIAG] = 0x20, |
510 | }; |
511 | |
512 | /* Read out all counter sets and save them in the provided data buffer. |
513 | * The last 64 byte host an artificial trailer entry. |
514 | */ |
515 | static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth, |
516 | bool error_ok) |
517 | { |
518 | struct cf_trailer_entry *trailer; |
519 | size_t offset = 0, done; |
520 | int i; |
521 | |
522 | memset(data, 0, sz); |
523 | sz -= sizeof(*trailer); /* Always room for trailer */ |
524 | for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { |
525 | struct cf_ctrset_entry *ctrdata = data + offset; |
526 | |
527 | if (!(auth & cpumf_ctr_ctl[i])) |
528 | continue; /* Counter set not authorized */ |
529 | |
530 | done = cfdiag_getctrset(ctrdata, ctrset: i, room: sz - offset, error_ok); |
531 | offset += done; |
532 | } |
533 | trailer = data + offset; |
534 | cfdiag_trailer(te: trailer); |
535 | return offset + sizeof(*trailer); |
536 | } |
537 | |
538 | /* Calculate the difference for each counter in a counter set. */ |
539 | static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters) |
540 | { |
541 | for (; --counters >= 0; ++pstart, ++pstop) |
542 | if (*pstop >= *pstart) |
543 | *pstop -= *pstart; |
544 | else |
545 | *pstop = *pstart - *pstop + 1; |
546 | } |
547 | |
548 | /* Scan the counter sets and calculate the difference of each counter |
549 | * in each set. The result is the increment of each counter during the |
550 | * period the counter set has been activated. |
551 | * |
552 | * Return true on success. |
553 | */ |
554 | static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) |
555 | { |
556 | struct cf_trailer_entry *trailer_start, *trailer_stop; |
557 | struct cf_ctrset_entry *ctrstart, *ctrstop; |
558 | size_t offset = 0; |
559 | |
560 | auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; |
561 | do { |
562 | ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); |
563 | ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); |
564 | |
565 | if (memcmp(p: ctrstop, q: ctrstart, size: sizeof(*ctrstop))) { |
566 | pr_err_once("cpum_cf_diag counter set compare error " |
567 | "in set %i\n" , ctrstart->set); |
568 | return 0; |
569 | } |
570 | auth &= ~cpumf_ctr_ctl[ctrstart->set]; |
571 | if (ctrstart->def == CF_DIAG_CTRSET_DEF) { |
572 | cfdiag_diffctrset(pstart: (u64 *)(ctrstart + 1), |
573 | pstop: (u64 *)(ctrstop + 1), counters: ctrstart->ctr); |
574 | offset += ctrstart->ctr * sizeof(u64) + |
575 | sizeof(*ctrstart); |
576 | } |
577 | } while (ctrstart->def && auth); |
578 | |
579 | /* Save time_stamp from start of event in stop's trailer */ |
580 | trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); |
581 | trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset); |
582 | trailer_stop->progusage[0] = trailer_start->timestamp; |
583 | |
584 | return 1; |
585 | } |
586 | |
587 | static enum cpumf_ctr_set get_counter_set(u64 event) |
588 | { |
589 | int set = CPUMF_CTR_SET_MAX; |
590 | |
591 | if (event < 32) |
592 | set = CPUMF_CTR_SET_BASIC; |
593 | else if (event < 64) |
594 | set = CPUMF_CTR_SET_USER; |
595 | else if (event < 128) |
596 | set = CPUMF_CTR_SET_CRYPTO; |
597 | else if (event < 288) |
598 | set = CPUMF_CTR_SET_EXT; |
599 | else if (event >= 448 && event < 496) |
600 | set = CPUMF_CTR_SET_MT_DIAG; |
601 | |
602 | return set; |
603 | } |
604 | |
605 | static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set) |
606 | { |
607 | u16 mtdiag_ctl; |
608 | int err = 0; |
609 | |
610 | /* check required version for counter sets */ |
611 | switch (set) { |
612 | case CPUMF_CTR_SET_BASIC: |
613 | case CPUMF_CTR_SET_USER: |
614 | if (cpumf_ctr_info.cfvn < 1) |
615 | err = -EOPNOTSUPP; |
616 | break; |
617 | case CPUMF_CTR_SET_CRYPTO: |
618 | if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 && |
619 | config > 79) || (cpumf_ctr_info.csvn >= 6 && config > 83)) |
620 | err = -EOPNOTSUPP; |
621 | break; |
622 | case CPUMF_CTR_SET_EXT: |
623 | if (cpumf_ctr_info.csvn < 1) |
624 | err = -EOPNOTSUPP; |
625 | if ((cpumf_ctr_info.csvn == 1 && config > 159) || |
626 | (cpumf_ctr_info.csvn == 2 && config > 175) || |
627 | (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 && |
628 | config > 255) || |
629 | (cpumf_ctr_info.csvn >= 6 && config > 287)) |
630 | err = -EOPNOTSUPP; |
631 | break; |
632 | case CPUMF_CTR_SET_MT_DIAG: |
633 | if (cpumf_ctr_info.csvn <= 3) |
634 | err = -EOPNOTSUPP; |
635 | /* |
636 | * MT-diagnostic counters are read-only. The counter set |
637 | * is automatically enabled and activated on all CPUs with |
638 | * multithreading (SMT). Deactivation of multithreading |
639 | * also disables the counter set. State changes are ignored |
640 | * by lcctl(). Because Linux controls SMT enablement through |
641 | * a kernel parameter only, the counter set is either disabled |
642 | * or enabled and active. |
643 | * |
644 | * Thus, the counters can only be used if SMT is on and the |
645 | * counter set is enabled and active. |
646 | */ |
647 | mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]; |
648 | if (!((cpumf_ctr_info.auth_ctl & mtdiag_ctl) && |
649 | (cpumf_ctr_info.enable_ctl & mtdiag_ctl) && |
650 | (cpumf_ctr_info.act_ctl & mtdiag_ctl))) |
651 | err = -EOPNOTSUPP; |
652 | break; |
653 | case CPUMF_CTR_SET_MAX: |
654 | err = -EOPNOTSUPP; |
655 | } |
656 | |
657 | return err; |
658 | } |
659 | |
660 | /* |
661 | * Change the CPUMF state to active. |
662 | * Enable and activate the CPU-counter sets according |
663 | * to the per-cpu control state. |
664 | */ |
665 | static void cpumf_pmu_enable(struct pmu *pmu) |
666 | { |
667 | struct cpu_cf_events *cpuhw = this_cpu_cfhw(); |
668 | int err; |
669 | |
670 | if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED)) |
671 | return; |
672 | |
673 | err = lcctl(cpuhw->state | cpuhw->dev_state); |
674 | if (err) |
675 | pr_err("Enabling the performance measuring unit failed with rc=%x\n" , err); |
676 | else |
677 | cpuhw->flags |= PMU_F_ENABLED; |
678 | } |
679 | |
680 | /* |
681 | * Change the CPUMF state to inactive. |
682 | * Disable and enable (inactive) the CPU-counter sets according |
683 | * to the per-cpu control state. |
684 | */ |
685 | static void cpumf_pmu_disable(struct pmu *pmu) |
686 | { |
687 | struct cpu_cf_events *cpuhw = this_cpu_cfhw(); |
688 | u64 inactive; |
689 | int err; |
690 | |
691 | if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED)) |
692 | return; |
693 | |
694 | inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); |
695 | inactive |= cpuhw->dev_state; |
696 | err = lcctl(inactive); |
697 | if (err) |
698 | pr_err("Disabling the performance measuring unit failed with rc=%x\n" , err); |
699 | else |
700 | cpuhw->flags &= ~PMU_F_ENABLED; |
701 | } |
702 | |
703 | /* Release the PMU if event is the last perf event */ |
704 | static void hw_perf_event_destroy(struct perf_event *event) |
705 | { |
706 | cpum_cf_free(cpu: event->cpu); |
707 | } |
708 | |
709 | /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ |
710 | static const int cpumf_generic_events_basic[] = { |
711 | [PERF_COUNT_HW_CPU_CYCLES] = 0, |
712 | [PERF_COUNT_HW_INSTRUCTIONS] = 1, |
713 | [PERF_COUNT_HW_CACHE_REFERENCES] = -1, |
714 | [PERF_COUNT_HW_CACHE_MISSES] = -1, |
715 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, |
716 | [PERF_COUNT_HW_BRANCH_MISSES] = -1, |
717 | [PERF_COUNT_HW_BUS_CYCLES] = -1, |
718 | }; |
719 | /* CPUMF <-> perf event mappings for userspace (problem-state set) */ |
720 | static const int cpumf_generic_events_user[] = { |
721 | [PERF_COUNT_HW_CPU_CYCLES] = 32, |
722 | [PERF_COUNT_HW_INSTRUCTIONS] = 33, |
723 | [PERF_COUNT_HW_CACHE_REFERENCES] = -1, |
724 | [PERF_COUNT_HW_CACHE_MISSES] = -1, |
725 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, |
726 | [PERF_COUNT_HW_BRANCH_MISSES] = -1, |
727 | [PERF_COUNT_HW_BUS_CYCLES] = -1, |
728 | }; |
729 | |
730 | static int is_userspace_event(u64 ev) |
731 | { |
732 | return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || |
733 | cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev; |
734 | } |
735 | |
736 | static int __hw_perf_event_init(struct perf_event *event, unsigned int type) |
737 | { |
738 | struct perf_event_attr *attr = &event->attr; |
739 | struct hw_perf_event *hwc = &event->hw; |
740 | enum cpumf_ctr_set set; |
741 | u64 ev; |
742 | |
743 | switch (type) { |
744 | case PERF_TYPE_RAW: |
745 | /* Raw events are used to access counters directly, |
746 | * hence do not permit excludes */ |
747 | if (attr->exclude_kernel || attr->exclude_user || |
748 | attr->exclude_hv) |
749 | return -EOPNOTSUPP; |
750 | ev = attr->config; |
751 | break; |
752 | |
753 | case PERF_TYPE_HARDWARE: |
754 | if (is_sampling_event(event)) /* No sampling support */ |
755 | return -ENOENT; |
756 | ev = attr->config; |
757 | if (!attr->exclude_user && attr->exclude_kernel) { |
758 | /* |
759 | * Count user space (problem-state) only |
760 | * Handle events 32 and 33 as 0:u and 1:u |
761 | */ |
762 | if (!is_userspace_event(ev)) { |
763 | if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) |
764 | return -EOPNOTSUPP; |
765 | ev = cpumf_generic_events_user[ev]; |
766 | } |
767 | } else if (!attr->exclude_kernel && attr->exclude_user) { |
768 | /* No support for kernel space counters only */ |
769 | return -EOPNOTSUPP; |
770 | } else { |
771 | /* Count user and kernel space, incl. events 32 + 33 */ |
772 | if (!is_userspace_event(ev)) { |
773 | if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) |
774 | return -EOPNOTSUPP; |
775 | ev = cpumf_generic_events_basic[ev]; |
776 | } |
777 | } |
778 | break; |
779 | |
780 | default: |
781 | return -ENOENT; |
782 | } |
783 | |
784 | if (ev == -1) |
785 | return -ENOENT; |
786 | |
787 | if (ev > PERF_CPUM_CF_MAX_CTR) |
788 | return -ENOENT; |
789 | |
790 | /* Obtain the counter set to which the specified counter belongs */ |
791 | set = get_counter_set(event: ev); |
792 | switch (set) { |
793 | case CPUMF_CTR_SET_BASIC: |
794 | case CPUMF_CTR_SET_USER: |
795 | case CPUMF_CTR_SET_CRYPTO: |
796 | case CPUMF_CTR_SET_EXT: |
797 | case CPUMF_CTR_SET_MT_DIAG: |
798 | /* |
799 | * Use the hardware perf event structure to store the |
800 | * counter number in the 'config' member and the counter |
801 | * set number in the 'config_base' as bit mask. |
802 | * It is later used to enable/disable the counter(s). |
803 | */ |
804 | hwc->config = ev; |
805 | hwc->config_base = cpumf_ctr_ctl[set]; |
806 | break; |
807 | case CPUMF_CTR_SET_MAX: |
808 | /* The counter could not be associated to a counter set */ |
809 | return -EINVAL; |
810 | } |
811 | |
812 | /* Initialize for using the CPU-measurement counter facility */ |
813 | if (cpum_cf_alloc(cpu: event->cpu)) |
814 | return -ENOMEM; |
815 | event->destroy = hw_perf_event_destroy; |
816 | |
817 | /* |
818 | * Finally, validate version and authorization of the counter set. |
819 | * If the particular CPU counter set is not authorized, |
820 | * return with -ENOENT in order to fall back to other |
821 | * PMUs that might suffice the event request. |
822 | */ |
823 | if (!(hwc->config_base & cpumf_ctr_info.auth_ctl)) |
824 | return -ENOENT; |
825 | return validate_ctr_version(config: hwc->config, set); |
826 | } |
827 | |
828 | /* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different |
829 | * attribute::type values: |
830 | * - PERF_TYPE_HARDWARE: |
831 | * - pmu->type: |
832 | * Handle both type of invocations identical. They address the same hardware. |
833 | * The result is different when event modifiers exclude_kernel and/or |
834 | * exclude_user are also set. |
835 | */ |
836 | static int cpumf_pmu_event_type(struct perf_event *event) |
837 | { |
838 | u64 ev = event->attr.config; |
839 | |
840 | if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev || |
841 | cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev || |
842 | cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || |
843 | cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev) |
844 | return PERF_TYPE_HARDWARE; |
845 | return PERF_TYPE_RAW; |
846 | } |
847 | |
848 | static int cpumf_pmu_event_init(struct perf_event *event) |
849 | { |
850 | unsigned int type = event->attr.type; |
851 | int err; |
852 | |
853 | if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) |
854 | err = __hw_perf_event_init(event, type); |
855 | else if (event->pmu->type == type) |
856 | /* Registered as unknown PMU */ |
857 | err = __hw_perf_event_init(event, type: cpumf_pmu_event_type(event)); |
858 | else |
859 | return -ENOENT; |
860 | |
861 | if (unlikely(err) && event->destroy) |
862 | event->destroy(event); |
863 | |
864 | return err; |
865 | } |
866 | |
867 | static int hw_perf_event_reset(struct perf_event *event) |
868 | { |
869 | u64 prev, new; |
870 | int err; |
871 | |
872 | do { |
873 | prev = local64_read(&event->hw.prev_count); |
874 | err = ecctr(event->hw.config, &new); |
875 | if (err) { |
876 | if (err != 3) |
877 | break; |
878 | /* The counter is not (yet) available. This |
879 | * might happen if the counter set to which |
880 | * this counter belongs is in the disabled |
881 | * state. |
882 | */ |
883 | new = 0; |
884 | } |
885 | } while (local64_cmpxchg(l: &event->hw.prev_count, old: prev, new) != prev); |
886 | |
887 | return err; |
888 | } |
889 | |
890 | static void hw_perf_event_update(struct perf_event *event) |
891 | { |
892 | u64 prev, new, delta; |
893 | int err; |
894 | |
895 | do { |
896 | prev = local64_read(&event->hw.prev_count); |
897 | err = ecctr(event->hw.config, &new); |
898 | if (err) |
899 | return; |
900 | } while (local64_cmpxchg(l: &event->hw.prev_count, old: prev, new) != prev); |
901 | |
902 | delta = (prev <= new) ? new - prev |
903 | : (-1ULL - prev) + new + 1; /* overflow */ |
904 | local64_add(delta, &event->count); |
905 | } |
906 | |
907 | static void cpumf_pmu_read(struct perf_event *event) |
908 | { |
909 | if (event->hw.state & PERF_HES_STOPPED) |
910 | return; |
911 | |
912 | hw_perf_event_update(event); |
913 | } |
914 | |
915 | static void cpumf_pmu_start(struct perf_event *event, int flags) |
916 | { |
917 | struct cpu_cf_events *cpuhw = this_cpu_cfhw(); |
918 | struct hw_perf_event *hwc = &event->hw; |
919 | int i; |
920 | |
921 | if (!(hwc->state & PERF_HES_STOPPED)) |
922 | return; |
923 | |
924 | hwc->state = 0; |
925 | |
926 | /* (Re-)enable and activate the counter set */ |
927 | ctr_set_enable(state: &cpuhw->state, ctrsets: hwc->config_base); |
928 | ctr_set_start(state: &cpuhw->state, ctrsets: hwc->config_base); |
929 | |
930 | /* The counter set to which this counter belongs can be already active. |
931 | * Because all counters in a set are active, the event->hw.prev_count |
932 | * needs to be synchronized. At this point, the counter set can be in |
933 | * the inactive or disabled state. |
934 | */ |
935 | if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { |
936 | cpuhw->usedss = cfdiag_getctr(data: cpuhw->start, |
937 | sz: sizeof(cpuhw->start), |
938 | auth: hwc->config_base, error_ok: true); |
939 | } else { |
940 | hw_perf_event_reset(event); |
941 | } |
942 | |
943 | /* Increment refcount for counter sets */ |
944 | for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) |
945 | if ((hwc->config_base & cpumf_ctr_ctl[i])) |
946 | atomic_inc(v: &cpuhw->ctr_set[i]); |
947 | } |
948 | |
949 | /* Create perf event sample with the counter sets as raw data. The sample |
950 | * is then pushed to the event subsystem and the function checks for |
951 | * possible event overflows. If an event overflow occurs, the PMU is |
952 | * stopped. |
953 | * |
954 | * Return non-zero if an event overflow occurred. |
955 | */ |
956 | static int cfdiag_push_sample(struct perf_event *event, |
957 | struct cpu_cf_events *cpuhw) |
958 | { |
959 | struct perf_sample_data data; |
960 | struct perf_raw_record raw; |
961 | struct pt_regs regs; |
962 | int overflow; |
963 | |
964 | /* Setup perf sample */ |
965 | perf_sample_data_init(data: &data, addr: 0, period: event->hw.last_period); |
966 | memset(®s, 0, sizeof(regs)); |
967 | memset(&raw, 0, sizeof(raw)); |
968 | |
969 | if (event->attr.sample_type & PERF_SAMPLE_CPU) |
970 | data.cpu_entry.cpu = event->cpu; |
971 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { |
972 | raw.frag.size = cpuhw->usedss; |
973 | raw.frag.data = cpuhw->stop; |
974 | perf_sample_save_raw_data(data: &data, raw: &raw); |
975 | } |
976 | |
977 | overflow = perf_event_overflow(event, data: &data, regs: ®s); |
978 | if (overflow) |
979 | event->pmu->stop(event, 0); |
980 | |
981 | perf_event_update_userpage(event); |
982 | return overflow; |
983 | } |
984 | |
985 | static void cpumf_pmu_stop(struct perf_event *event, int flags) |
986 | { |
987 | struct cpu_cf_events *cpuhw = this_cpu_cfhw(); |
988 | struct hw_perf_event *hwc = &event->hw; |
989 | int i; |
990 | |
991 | if (!(hwc->state & PERF_HES_STOPPED)) { |
992 | /* Decrement reference count for this counter set and if this |
993 | * is the last used counter in the set, clear activation |
994 | * control and set the counter set state to inactive. |
995 | */ |
996 | for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { |
997 | if (!(hwc->config_base & cpumf_ctr_ctl[i])) |
998 | continue; |
999 | if (!atomic_dec_return(v: &cpuhw->ctr_set[i])) |
1000 | ctr_set_stop(state: &cpuhw->state, ctrsets: cpumf_ctr_ctl[i]); |
1001 | } |
1002 | hwc->state |= PERF_HES_STOPPED; |
1003 | } |
1004 | |
1005 | if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { |
1006 | if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { |
1007 | local64_inc(&event->count); |
1008 | cpuhw->usedss = cfdiag_getctr(data: cpuhw->stop, |
1009 | sz: sizeof(cpuhw->stop), |
1010 | auth: event->hw.config_base, |
1011 | error_ok: false); |
1012 | if (cfdiag_diffctr(cpuhw, auth: event->hw.config_base)) |
1013 | cfdiag_push_sample(event, cpuhw); |
1014 | } else { |
1015 | hw_perf_event_update(event); |
1016 | } |
1017 | hwc->state |= PERF_HES_UPTODATE; |
1018 | } |
1019 | } |
1020 | |
1021 | static int cpumf_pmu_add(struct perf_event *event, int flags) |
1022 | { |
1023 | struct cpu_cf_events *cpuhw = this_cpu_cfhw(); |
1024 | |
1025 | ctr_set_enable(state: &cpuhw->state, ctrsets: event->hw.config_base); |
1026 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; |
1027 | |
1028 | if (flags & PERF_EF_START) |
1029 | cpumf_pmu_start(event, PERF_EF_RELOAD); |
1030 | |
1031 | return 0; |
1032 | } |
1033 | |
1034 | static void cpumf_pmu_del(struct perf_event *event, int flags) |
1035 | { |
1036 | struct cpu_cf_events *cpuhw = this_cpu_cfhw(); |
1037 | int i; |
1038 | |
1039 | cpumf_pmu_stop(event, PERF_EF_UPDATE); |
1040 | |
1041 | /* Check if any counter in the counter set is still used. If not used, |
1042 | * change the counter set to the disabled state. This also clears the |
1043 | * content of all counters in the set. |
1044 | * |
1045 | * When a new perf event has been added but not yet started, this can |
1046 | * clear enable control and resets all counters in a set. Therefore, |
1047 | * cpumf_pmu_start() always has to reenable a counter set. |
1048 | */ |
1049 | for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) |
1050 | if (!atomic_read(v: &cpuhw->ctr_set[i])) |
1051 | ctr_set_disable(state: &cpuhw->state, ctrsets: cpumf_ctr_ctl[i]); |
1052 | } |
1053 | |
1054 | /* Performance monitoring unit for s390x */ |
1055 | static struct pmu cpumf_pmu = { |
1056 | .task_ctx_nr = perf_sw_context, |
1057 | .capabilities = PERF_PMU_CAP_NO_INTERRUPT, |
1058 | .pmu_enable = cpumf_pmu_enable, |
1059 | .pmu_disable = cpumf_pmu_disable, |
1060 | .event_init = cpumf_pmu_event_init, |
1061 | .add = cpumf_pmu_add, |
1062 | .del = cpumf_pmu_del, |
1063 | .start = cpumf_pmu_start, |
1064 | .stop = cpumf_pmu_stop, |
1065 | .read = cpumf_pmu_read, |
1066 | }; |
1067 | |
1068 | static struct cfset_session { /* CPUs and counter set bit mask */ |
1069 | struct list_head head; /* Head of list of active processes */ |
1070 | } cfset_session = { |
1071 | .head = LIST_HEAD_INIT(cfset_session.head) |
1072 | }; |
1073 | |
1074 | static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */ |
1075 | /* |
1076 | * Synchronize access to device /dev/hwc. This mutex protects against |
1077 | * concurrent access to functions cfset_open() and cfset_release(). |
1078 | * Same for CPU hotplug add and remove events triggering |
1079 | * cpum_cf_online_cpu() and cpum_cf_offline_cpu(). |
1080 | * It also serializes concurrent device ioctl access from multiple |
1081 | * processes accessing /dev/hwc. |
1082 | * |
1083 | * The mutex protects concurrent access to the /dev/hwctr session management |
1084 | * struct cfset_session and reference counting variable cfset_opencnt. |
1085 | */ |
1086 | static DEFINE_MUTEX(cfset_ctrset_mutex); |
1087 | |
1088 | /* |
1089 | * CPU hotplug handles only /dev/hwctr device. |
1090 | * For perf_event_open() the CPU hotplug handling is done on kernel common |
1091 | * code: |
1092 | * - CPU add: Nothing is done since a file descriptor can not be created |
1093 | * and returned to the user. |
1094 | * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and |
1095 | * pmu_delete(). The event itself is removed when the file descriptor is |
1096 | * closed. |
1097 | */ |
1098 | static int cfset_online_cpu(unsigned int cpu); |
1099 | |
1100 | static int cpum_cf_online_cpu(unsigned int cpu) |
1101 | { |
1102 | int rc = 0; |
1103 | |
1104 | /* |
1105 | * Ignore notification for perf_event_open(). |
1106 | * Handle only /dev/hwctr device sessions. |
1107 | */ |
1108 | mutex_lock(&cfset_ctrset_mutex); |
1109 | if (refcount_read(r: &cfset_opencnt)) { |
1110 | rc = cpum_cf_alloc_cpu(cpu); |
1111 | if (!rc) |
1112 | cfset_online_cpu(cpu); |
1113 | } |
1114 | mutex_unlock(lock: &cfset_ctrset_mutex); |
1115 | return rc; |
1116 | } |
1117 | |
1118 | static int cfset_offline_cpu(unsigned int cpu); |
1119 | |
1120 | static int cpum_cf_offline_cpu(unsigned int cpu) |
1121 | { |
1122 | /* |
1123 | * During task exit processing of grouped perf events triggered by CPU |
1124 | * hotplug processing, pmu_disable() is called as part of perf context |
1125 | * removal process. Therefore do not trigger event removal now for |
1126 | * perf_event_open() created events. Perf common code triggers event |
1127 | * destruction when the event file descriptor is closed. |
1128 | * |
1129 | * Handle only /dev/hwctr device sessions. |
1130 | */ |
1131 | mutex_lock(&cfset_ctrset_mutex); |
1132 | if (refcount_read(r: &cfset_opencnt)) { |
1133 | cfset_offline_cpu(cpu); |
1134 | cpum_cf_free_cpu(cpu); |
1135 | } |
1136 | mutex_unlock(lock: &cfset_ctrset_mutex); |
1137 | return 0; |
1138 | } |
1139 | |
1140 | /* Return true if store counter set multiple instruction is available */ |
1141 | static inline int stccm_avail(void) |
1142 | { |
1143 | return test_facility(142); |
1144 | } |
1145 | |
1146 | /* CPU-measurement alerts for the counter facility */ |
1147 | static void cpumf_measurement_alert(struct ext_code ext_code, |
1148 | unsigned int alert, unsigned long unused) |
1149 | { |
1150 | struct cpu_cf_events *cpuhw; |
1151 | |
1152 | if (!(alert & CPU_MF_INT_CF_MASK)) |
1153 | return; |
1154 | |
1155 | inc_irq_stat(IRQEXT_CMC); |
1156 | |
1157 | /* |
1158 | * Measurement alerts are shared and might happen when the PMU |
1159 | * is not reserved. Ignore these alerts in this case. |
1160 | */ |
1161 | cpuhw = this_cpu_cfhw(); |
1162 | if (!cpuhw) |
1163 | return; |
1164 | |
1165 | /* counter authorization change alert */ |
1166 | if (alert & CPU_MF_INT_CF_CACA) |
1167 | qctri(&cpumf_ctr_info); |
1168 | |
1169 | /* loss of counter data alert */ |
1170 | if (alert & CPU_MF_INT_CF_LCDA) |
1171 | pr_err("CPU[%i] Counter data was lost\n" , smp_processor_id()); |
1172 | |
1173 | /* loss of MT counter data alert */ |
1174 | if (alert & CPU_MF_INT_CF_MTDA) |
1175 | pr_warn("CPU[%i] MT counter data was lost\n" , |
1176 | smp_processor_id()); |
1177 | } |
1178 | |
1179 | static int cfset_init(void); |
1180 | static int __init cpumf_pmu_init(void) |
1181 | { |
1182 | int rc; |
1183 | |
1184 | /* Extract counter measurement facility information */ |
1185 | if (!cpum_cf_avail() || qctri(&cpumf_ctr_info)) |
1186 | return -ENODEV; |
1187 | |
1188 | /* Determine and store counter set sizes for later reference */ |
1189 | for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) |
1190 | cpum_cf_make_setsize(ctrset: rc); |
1191 | |
1192 | /* |
1193 | * Clear bit 15 of cr0 to unauthorize problem-state to |
1194 | * extract measurement counters |
1195 | */ |
1196 | system_ctl_clear_bit(0, CR0_CPUMF_EXTRACTION_AUTH_BIT); |
1197 | |
1198 | /* register handler for measurement-alert interruptions */ |
1199 | rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, |
1200 | cpumf_measurement_alert); |
1201 | if (rc) { |
1202 | pr_err("Registering for CPU-measurement alerts failed with rc=%i\n" , rc); |
1203 | return rc; |
1204 | } |
1205 | |
1206 | /* Setup s390dbf facility */ |
1207 | cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); |
1208 | if (!cf_dbg) { |
1209 | pr_err("Registration of s390dbf(cpum_cf) failed\n" ); |
1210 | rc = -ENOMEM; |
1211 | goto out1; |
1212 | } |
1213 | debug_register_view(cf_dbg, &debug_sprintf_view); |
1214 | |
1215 | cpumf_pmu.attr_groups = cpumf_cf_event_group(); |
1216 | rc = perf_pmu_register(pmu: &cpumf_pmu, name: "cpum_cf" , type: -1); |
1217 | if (rc) { |
1218 | pr_err("Registering the cpum_cf PMU failed with rc=%i\n" , rc); |
1219 | goto out2; |
1220 | } else if (stccm_avail()) { /* Setup counter set device */ |
1221 | cfset_init(); |
1222 | } |
1223 | |
1224 | rc = cpuhp_setup_state(state: CPUHP_AP_PERF_S390_CF_ONLINE, |
1225 | name: "perf/s390/cf:online" , |
1226 | startup: cpum_cf_online_cpu, teardown: cpum_cf_offline_cpu); |
1227 | return rc; |
1228 | |
1229 | out2: |
1230 | debug_unregister_view(cf_dbg, &debug_sprintf_view); |
1231 | debug_unregister(cf_dbg); |
1232 | out1: |
1233 | unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert); |
1234 | return rc; |
1235 | } |
1236 | |
1237 | /* Support for the CPU Measurement Facility counter set extraction using |
1238 | * device /dev/hwctr. This allows user space programs to extract complete |
1239 | * counter set via normal file operations. |
1240 | */ |
1241 | |
1242 | struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ |
1243 | unsigned int sets; /* Counter set bit mask */ |
1244 | atomic_t cpus_ack; /* # CPUs successfully executed func */ |
1245 | }; |
1246 | |
1247 | struct cfset_request { /* CPUs and counter set bit mask */ |
1248 | unsigned long ctrset; /* Bit mask of counter set to read */ |
1249 | cpumask_t mask; /* CPU mask to read from */ |
1250 | struct list_head node; /* Chain to cfset_session.head */ |
1251 | }; |
1252 | |
1253 | static void cfset_session_init(void) |
1254 | { |
1255 | INIT_LIST_HEAD(list: &cfset_session.head); |
1256 | } |
1257 | |
1258 | /* Remove current request from global bookkeeping. Maintain a counter set bit |
1259 | * mask on a per CPU basis. |
1260 | * Done in process context under mutex protection. |
1261 | */ |
1262 | static void cfset_session_del(struct cfset_request *p) |
1263 | { |
1264 | list_del(entry: &p->node); |
1265 | } |
1266 | |
1267 | /* Add current request to global bookkeeping. Maintain a counter set bit mask |
1268 | * on a per CPU basis. |
1269 | * Done in process context under mutex protection. |
1270 | */ |
1271 | static void cfset_session_add(struct cfset_request *p) |
1272 | { |
1273 | list_add(new: &p->node, head: &cfset_session.head); |
1274 | } |
1275 | |
1276 | /* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access |
1277 | * path is currently used. |
1278 | * The cpu_cf_events::dev_state is used to denote counter sets in use by this |
1279 | * interface. It is always or'ed in. If this interface is not active, its |
1280 | * value is zero and no additional counter sets will be included. |
1281 | * |
1282 | * The cpu_cf_events::state is used by the perf_event_open SVC and remains |
1283 | * unchanged. |
1284 | * |
1285 | * perf_pmu_enable() and perf_pmu_enable() and its call backs |
1286 | * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the |
1287 | * performance measurement subsystem to enable per process |
1288 | * CPU Measurement counter facility. |
1289 | * The XXX_enable() and XXX_disable functions are used to turn off |
1290 | * x86 performance monitoring interrupt (PMI) during scheduling. |
1291 | * s390 uses these calls to temporarily stop and resume the active CPU |
1292 | * counters sets during scheduling. |
1293 | * |
1294 | * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr |
1295 | * device access. The perf_event_open() SVC interface makes a lot of effort |
1296 | * to only run the counters while the calling process is actively scheduled |
1297 | * to run. |
1298 | * When /dev/hwctr interface is also used at the same time, the counter sets |
1299 | * will keep running, even when the process is scheduled off a CPU. |
1300 | * However this is not a problem and does not lead to wrong counter values |
1301 | * for the perf_event_open() SVC. The current counter value will be recorded |
1302 | * during schedule-in. At schedule-out time the current counter value is |
1303 | * extracted again and the delta is calculated and added to the event. |
1304 | */ |
1305 | /* Stop all counter sets via ioctl interface */ |
1306 | static void cfset_ioctl_off(void *parm) |
1307 | { |
1308 | struct cpu_cf_events *cpuhw = this_cpu_cfhw(); |
1309 | struct cfset_call_on_cpu_parm *p = parm; |
1310 | int rc; |
1311 | |
1312 | /* Check if any counter set used by /dev/hwctr */ |
1313 | for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) |
1314 | if ((p->sets & cpumf_ctr_ctl[rc])) { |
1315 | if (!atomic_dec_return(v: &cpuhw->ctr_set[rc])) { |
1316 | ctr_set_disable(state: &cpuhw->dev_state, |
1317 | ctrsets: cpumf_ctr_ctl[rc]); |
1318 | ctr_set_stop(state: &cpuhw->dev_state, |
1319 | ctrsets: cpumf_ctr_ctl[rc]); |
1320 | } |
1321 | } |
1322 | /* Keep perf_event_open counter sets */ |
1323 | rc = lcctl(cpuhw->dev_state | cpuhw->state); |
1324 | if (rc) |
1325 | pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n" , |
1326 | cpuhw->state, S390_HWCTR_DEVICE, rc); |
1327 | if (!cpuhw->dev_state) |
1328 | cpuhw->flags &= ~PMU_F_IN_USE; |
1329 | } |
1330 | |
1331 | /* Start counter sets on particular CPU */ |
1332 | static void cfset_ioctl_on(void *parm) |
1333 | { |
1334 | struct cpu_cf_events *cpuhw = this_cpu_cfhw(); |
1335 | struct cfset_call_on_cpu_parm *p = parm; |
1336 | int rc; |
1337 | |
1338 | cpuhw->flags |= PMU_F_IN_USE; |
1339 | ctr_set_enable(state: &cpuhw->dev_state, ctrsets: p->sets); |
1340 | ctr_set_start(state: &cpuhw->dev_state, ctrsets: p->sets); |
1341 | for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) |
1342 | if ((p->sets & cpumf_ctr_ctl[rc])) |
1343 | atomic_inc(v: &cpuhw->ctr_set[rc]); |
1344 | rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */ |
1345 | if (!rc) |
1346 | atomic_inc(v: &p->cpus_ack); |
1347 | else |
1348 | pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n" , |
1349 | cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc); |
1350 | } |
1351 | |
1352 | static void cfset_release_cpu(void *p) |
1353 | { |
1354 | struct cpu_cf_events *cpuhw = this_cpu_cfhw(); |
1355 | int rc; |
1356 | |
1357 | cpuhw->dev_state = 0; |
1358 | rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ |
1359 | if (rc) |
1360 | pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n" , |
1361 | cpuhw->state, S390_HWCTR_DEVICE, rc); |
1362 | } |
1363 | |
1364 | /* This modifies the process CPU mask to adopt it to the currently online |
1365 | * CPUs. Offline CPUs can not be addresses. This call terminates the access |
1366 | * and is usually followed by close() or a new iotcl(..., START, ...) which |
1367 | * creates a new request structure. |
1368 | */ |
1369 | static void cfset_all_stop(struct cfset_request *req) |
1370 | { |
1371 | struct cfset_call_on_cpu_parm p = { |
1372 | .sets = req->ctrset, |
1373 | }; |
1374 | |
1375 | cpumask_and(dstp: &req->mask, src1p: &req->mask, cpu_online_mask); |
1376 | on_each_cpu_mask(mask: &req->mask, func: cfset_ioctl_off, info: &p, wait: 1); |
1377 | } |
1378 | |
1379 | /* Release function is also called when application gets terminated without |
1380 | * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. |
1381 | */ |
1382 | static int cfset_release(struct inode *inode, struct file *file) |
1383 | { |
1384 | mutex_lock(&cfset_ctrset_mutex); |
1385 | /* Open followed by close/exit has no private_data */ |
1386 | if (file->private_data) { |
1387 | cfset_all_stop(req: file->private_data); |
1388 | cfset_session_del(p: file->private_data); |
1389 | kfree(objp: file->private_data); |
1390 | file->private_data = NULL; |
1391 | } |
1392 | if (refcount_dec_and_test(r: &cfset_opencnt)) { /* Last close */ |
1393 | on_each_cpu(func: cfset_release_cpu, NULL, wait: 1); |
1394 | cpum_cf_free(cpu: -1); |
1395 | } |
1396 | mutex_unlock(lock: &cfset_ctrset_mutex); |
1397 | return 0; |
1398 | } |
1399 | |
1400 | /* |
1401 | * Open via /dev/hwctr device. Allocate all per CPU resources on the first |
1402 | * open of the device. The last close releases all per CPU resources. |
1403 | * Parallel perf_event_open system calls also use per CPU resources. |
1404 | * These invocations are handled via reference counting on the per CPU data |
1405 | * structures. |
1406 | */ |
1407 | static int cfset_open(struct inode *inode, struct file *file) |
1408 | { |
1409 | int rc = 0; |
1410 | |
1411 | if (!perfmon_capable()) |
1412 | return -EPERM; |
1413 | file->private_data = NULL; |
1414 | |
1415 | mutex_lock(&cfset_ctrset_mutex); |
1416 | if (!refcount_inc_not_zero(r: &cfset_opencnt)) { /* First open */ |
1417 | rc = cpum_cf_alloc(cpu: -1); |
1418 | if (!rc) { |
1419 | cfset_session_init(); |
1420 | refcount_set(r: &cfset_opencnt, n: 1); |
1421 | } |
1422 | } |
1423 | mutex_unlock(lock: &cfset_ctrset_mutex); |
1424 | |
1425 | /* nonseekable_open() never fails */ |
1426 | return rc ?: nonseekable_open(inode, filp: file); |
1427 | } |
1428 | |
1429 | static int cfset_all_start(struct cfset_request *req) |
1430 | { |
1431 | struct cfset_call_on_cpu_parm p = { |
1432 | .sets = req->ctrset, |
1433 | .cpus_ack = ATOMIC_INIT(0), |
1434 | }; |
1435 | cpumask_var_t mask; |
1436 | int rc = 0; |
1437 | |
1438 | if (!alloc_cpumask_var(mask: &mask, GFP_KERNEL)) |
1439 | return -ENOMEM; |
1440 | cpumask_and(dstp: mask, src1p: &req->mask, cpu_online_mask); |
1441 | on_each_cpu_mask(mask, func: cfset_ioctl_on, info: &p, wait: 1); |
1442 | if (atomic_read(v: &p.cpus_ack) != cpumask_weight(srcp: mask)) { |
1443 | on_each_cpu_mask(mask, func: cfset_ioctl_off, info: &p, wait: 1); |
1444 | rc = -EIO; |
1445 | } |
1446 | free_cpumask_var(mask); |
1447 | return rc; |
1448 | } |
1449 | |
1450 | /* Return the maximum required space for all possible CPUs in case one |
1451 | * CPU will be onlined during the START, READ, STOP cycles. |
1452 | * To find out the size of the counter sets, any one CPU will do. They |
1453 | * all have the same counter sets. |
1454 | */ |
1455 | static size_t cfset_needspace(unsigned int sets) |
1456 | { |
1457 | size_t bytes = 0; |
1458 | int i; |
1459 | |
1460 | for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { |
1461 | if (!(sets & cpumf_ctr_ctl[i])) |
1462 | continue; |
1463 | bytes += cpum_cf_read_setsize(ctrset: i) * sizeof(u64) + |
1464 | sizeof(((struct s390_ctrset_setdata *)0)->set) + |
1465 | sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); |
1466 | } |
1467 | bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * |
1468 | (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + |
1469 | sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); |
1470 | return bytes; |
1471 | } |
1472 | |
1473 | static int cfset_all_copy(unsigned long arg, cpumask_t *mask) |
1474 | { |
1475 | struct s390_ctrset_read __user *ctrset_read; |
1476 | unsigned int cpu, cpus, rc = 0; |
1477 | void __user *uptr; |
1478 | |
1479 | ctrset_read = (struct s390_ctrset_read __user *)arg; |
1480 | uptr = ctrset_read->data; |
1481 | for_each_cpu(cpu, mask) { |
1482 | struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu); |
1483 | struct s390_ctrset_cpudata __user *ctrset_cpudata; |
1484 | |
1485 | ctrset_cpudata = uptr; |
1486 | rc = put_user(cpu, &ctrset_cpudata->cpu_nr); |
1487 | rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets); |
1488 | rc |= copy_to_user(to: ctrset_cpudata->data, from: cpuhw->data, |
1489 | n: cpuhw->used); |
1490 | if (rc) { |
1491 | rc = -EFAULT; |
1492 | goto out; |
1493 | } |
1494 | uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used; |
1495 | cond_resched(); |
1496 | } |
1497 | cpus = cpumask_weight(srcp: mask); |
1498 | if (put_user(cpus, &ctrset_read->no_cpus)) |
1499 | rc = -EFAULT; |
1500 | out: |
1501 | return rc; |
1502 | } |
1503 | |
1504 | static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, |
1505 | int ctrset_size, size_t room) |
1506 | { |
1507 | size_t need = 0; |
1508 | int rc = -1; |
1509 | |
1510 | need = sizeof(*p) + sizeof(u64) * ctrset_size; |
1511 | if (need <= room) { |
1512 | p->set = cpumf_ctr_ctl[ctrset]; |
1513 | p->no_cnts = ctrset_size; |
1514 | rc = ctr_stcctm(set: ctrset, range: ctrset_size, dest: (u64 *)p->cv); |
1515 | if (rc == 3) /* Nothing stored */ |
1516 | need = 0; |
1517 | } |
1518 | return need; |
1519 | } |
1520 | |
1521 | /* Read all counter sets. */ |
1522 | static void cfset_cpu_read(void *parm) |
1523 | { |
1524 | struct cpu_cf_events *cpuhw = this_cpu_cfhw(); |
1525 | struct cfset_call_on_cpu_parm *p = parm; |
1526 | int set, set_size; |
1527 | size_t space; |
1528 | |
1529 | /* No data saved yet */ |
1530 | cpuhw->used = 0; |
1531 | cpuhw->sets = 0; |
1532 | memset(cpuhw->data, 0, sizeof(cpuhw->data)); |
1533 | |
1534 | /* Scan the counter sets */ |
1535 | for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { |
1536 | struct s390_ctrset_setdata *sp = (void *)cpuhw->data + |
1537 | cpuhw->used; |
1538 | |
1539 | if (!(p->sets & cpumf_ctr_ctl[set])) |
1540 | continue; /* Counter set not in list */ |
1541 | set_size = cpum_cf_read_setsize(ctrset: set); |
1542 | space = sizeof(cpuhw->data) - cpuhw->used; |
1543 | space = cfset_cpuset_read(p: sp, ctrset: set, ctrset_size: set_size, room: space); |
1544 | if (space) { |
1545 | cpuhw->used += space; |
1546 | cpuhw->sets += 1; |
1547 | } |
1548 | } |
1549 | } |
1550 | |
1551 | static int cfset_all_read(unsigned long arg, struct cfset_request *req) |
1552 | { |
1553 | struct cfset_call_on_cpu_parm p; |
1554 | cpumask_var_t mask; |
1555 | int rc; |
1556 | |
1557 | if (!alloc_cpumask_var(mask: &mask, GFP_KERNEL)) |
1558 | return -ENOMEM; |
1559 | |
1560 | p.sets = req->ctrset; |
1561 | cpumask_and(dstp: mask, src1p: &req->mask, cpu_online_mask); |
1562 | on_each_cpu_mask(mask, func: cfset_cpu_read, info: &p, wait: 1); |
1563 | rc = cfset_all_copy(arg, mask); |
1564 | free_cpumask_var(mask); |
1565 | return rc; |
1566 | } |
1567 | |
1568 | static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req) |
1569 | { |
1570 | int ret = -ENODATA; |
1571 | |
1572 | if (req && req->ctrset) |
1573 | ret = cfset_all_read(arg, req); |
1574 | return ret; |
1575 | } |
1576 | |
1577 | static long cfset_ioctl_stop(struct file *file) |
1578 | { |
1579 | struct cfset_request *req = file->private_data; |
1580 | int ret = -ENXIO; |
1581 | |
1582 | if (req) { |
1583 | cfset_all_stop(req); |
1584 | cfset_session_del(p: req); |
1585 | kfree(objp: req); |
1586 | file->private_data = NULL; |
1587 | ret = 0; |
1588 | } |
1589 | return ret; |
1590 | } |
1591 | |
1592 | static long cfset_ioctl_start(unsigned long arg, struct file *file) |
1593 | { |
1594 | struct s390_ctrset_start __user *ustart; |
1595 | struct s390_ctrset_start start; |
1596 | struct cfset_request *preq; |
1597 | void __user *umask; |
1598 | unsigned int len; |
1599 | int ret = 0; |
1600 | size_t need; |
1601 | |
1602 | if (file->private_data) |
1603 | return -EBUSY; |
1604 | ustart = (struct s390_ctrset_start __user *)arg; |
1605 | if (copy_from_user(to: &start, from: ustart, n: sizeof(start))) |
1606 | return -EFAULT; |
1607 | if (start.version != S390_HWCTR_START_VERSION) |
1608 | return -EINVAL; |
1609 | if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | |
1610 | cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | |
1611 | cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | |
1612 | cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | |
1613 | cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) |
1614 | return -EINVAL; /* Invalid counter set */ |
1615 | if (!start.counter_sets) |
1616 | return -EINVAL; /* No counter set at all? */ |
1617 | |
1618 | preq = kzalloc(size: sizeof(*preq), GFP_KERNEL); |
1619 | if (!preq) |
1620 | return -ENOMEM; |
1621 | cpumask_clear(dstp: &preq->mask); |
1622 | len = min_t(u64, start.cpumask_len, cpumask_size()); |
1623 | umask = (void __user *)start.cpumask; |
1624 | if (copy_from_user(to: &preq->mask, from: umask, n: len)) { |
1625 | kfree(objp: preq); |
1626 | return -EFAULT; |
1627 | } |
1628 | if (cpumask_empty(srcp: &preq->mask)) { |
1629 | kfree(objp: preq); |
1630 | return -EINVAL; |
1631 | } |
1632 | need = cfset_needspace(sets: start.counter_sets); |
1633 | if (put_user(need, &ustart->data_bytes)) { |
1634 | kfree(objp: preq); |
1635 | return -EFAULT; |
1636 | } |
1637 | preq->ctrset = start.counter_sets; |
1638 | ret = cfset_all_start(req: preq); |
1639 | if (!ret) { |
1640 | cfset_session_add(p: preq); |
1641 | file->private_data = preq; |
1642 | } else { |
1643 | kfree(objp: preq); |
1644 | } |
1645 | return ret; |
1646 | } |
1647 | |
1648 | /* Entry point to the /dev/hwctr device interface. |
1649 | * The ioctl system call supports three subcommands: |
1650 | * S390_HWCTR_START: Start the specified counter sets on a CPU list. The |
1651 | * counter set keeps running until explicitly stopped. Returns the number |
1652 | * of bytes needed to store the counter values. If another S390_HWCTR_START |
1653 | * ioctl subcommand is called without a previous S390_HWCTR_STOP stop |
1654 | * command on the same file descriptor, -EBUSY is returned. |
1655 | * S390_HWCTR_READ: Read the counter set values from specified CPU list given |
1656 | * with the S390_HWCTR_START command. |
1657 | * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the |
1658 | * previous S390_HWCTR_START subcommand. |
1659 | */ |
1660 | static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
1661 | { |
1662 | int ret; |
1663 | |
1664 | cpus_read_lock(); |
1665 | mutex_lock(&cfset_ctrset_mutex); |
1666 | switch (cmd) { |
1667 | case S390_HWCTR_START: |
1668 | ret = cfset_ioctl_start(arg, file); |
1669 | break; |
1670 | case S390_HWCTR_STOP: |
1671 | ret = cfset_ioctl_stop(file); |
1672 | break; |
1673 | case S390_HWCTR_READ: |
1674 | ret = cfset_ioctl_read(arg, req: file->private_data); |
1675 | break; |
1676 | default: |
1677 | ret = -ENOTTY; |
1678 | break; |
1679 | } |
1680 | mutex_unlock(lock: &cfset_ctrset_mutex); |
1681 | cpus_read_unlock(); |
1682 | return ret; |
1683 | } |
1684 | |
1685 | static const struct file_operations cfset_fops = { |
1686 | .owner = THIS_MODULE, |
1687 | .open = cfset_open, |
1688 | .release = cfset_release, |
1689 | .unlocked_ioctl = cfset_ioctl, |
1690 | .compat_ioctl = cfset_ioctl, |
1691 | .llseek = no_llseek |
1692 | }; |
1693 | |
1694 | static struct miscdevice cfset_dev = { |
1695 | .name = S390_HWCTR_DEVICE, |
1696 | .minor = MISC_DYNAMIC_MINOR, |
1697 | .fops = &cfset_fops, |
1698 | .mode = 0666, |
1699 | }; |
1700 | |
1701 | /* Hotplug add of a CPU. Scan through all active processes and add |
1702 | * that CPU to the list of CPUs supplied with ioctl(..., START, ...). |
1703 | */ |
1704 | static int cfset_online_cpu(unsigned int cpu) |
1705 | { |
1706 | struct cfset_call_on_cpu_parm p; |
1707 | struct cfset_request *rp; |
1708 | |
1709 | if (!list_empty(head: &cfset_session.head)) { |
1710 | list_for_each_entry(rp, &cfset_session.head, node) { |
1711 | p.sets = rp->ctrset; |
1712 | cfset_ioctl_on(parm: &p); |
1713 | cpumask_set_cpu(cpu, dstp: &rp->mask); |
1714 | } |
1715 | } |
1716 | return 0; |
1717 | } |
1718 | |
1719 | /* Hotplug remove of a CPU. Scan through all active processes and clear |
1720 | * that CPU from the list of CPUs supplied with ioctl(..., START, ...). |
1721 | * Adjust reference counts. |
1722 | */ |
1723 | static int cfset_offline_cpu(unsigned int cpu) |
1724 | { |
1725 | struct cfset_call_on_cpu_parm p; |
1726 | struct cfset_request *rp; |
1727 | |
1728 | if (!list_empty(head: &cfset_session.head)) { |
1729 | list_for_each_entry(rp, &cfset_session.head, node) { |
1730 | p.sets = rp->ctrset; |
1731 | cfset_ioctl_off(parm: &p); |
1732 | cpumask_clear_cpu(cpu, dstp: &rp->mask); |
1733 | } |
1734 | } |
1735 | return 0; |
1736 | } |
1737 | |
1738 | static void cfdiag_read(struct perf_event *event) |
1739 | { |
1740 | } |
1741 | |
1742 | static int get_authctrsets(void) |
1743 | { |
1744 | unsigned long auth = 0; |
1745 | enum cpumf_ctr_set i; |
1746 | |
1747 | for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { |
1748 | if (cpumf_ctr_info.auth_ctl & cpumf_ctr_ctl[i]) |
1749 | auth |= cpumf_ctr_ctl[i]; |
1750 | } |
1751 | return auth; |
1752 | } |
1753 | |
1754 | /* Setup the event. Test for authorized counter sets and only include counter |
1755 | * sets which are authorized at the time of the setup. Including unauthorized |
1756 | * counter sets result in specification exception (and panic). |
1757 | */ |
1758 | static int cfdiag_event_init2(struct perf_event *event) |
1759 | { |
1760 | struct perf_event_attr *attr = &event->attr; |
1761 | int err = 0; |
1762 | |
1763 | /* Set sample_period to indicate sampling */ |
1764 | event->hw.config = attr->config; |
1765 | event->hw.sample_period = attr->sample_period; |
1766 | local64_set(&event->hw.period_left, event->hw.sample_period); |
1767 | local64_set(&event->count, 0); |
1768 | event->hw.last_period = event->hw.sample_period; |
1769 | |
1770 | /* Add all authorized counter sets to config_base. The |
1771 | * the hardware init function is either called per-cpu or just once |
1772 | * for all CPUS (event->cpu == -1). This depends on the whether |
1773 | * counting is started for all CPUs or on a per workload base where |
1774 | * the perf event moves from one CPU to another CPU. |
1775 | * Checking the authorization on any CPU is fine as the hardware |
1776 | * applies the same authorization settings to all CPUs. |
1777 | */ |
1778 | event->hw.config_base = get_authctrsets(); |
1779 | |
1780 | /* No authorized counter sets, nothing to count/sample */ |
1781 | if (!event->hw.config_base) |
1782 | err = -EINVAL; |
1783 | |
1784 | return err; |
1785 | } |
1786 | |
1787 | static int cfdiag_event_init(struct perf_event *event) |
1788 | { |
1789 | struct perf_event_attr *attr = &event->attr; |
1790 | int err = -ENOENT; |
1791 | |
1792 | if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || |
1793 | event->attr.type != event->pmu->type) |
1794 | goto out; |
1795 | |
1796 | /* Raw events are used to access counters directly, |
1797 | * hence do not permit excludes. |
1798 | * This event is useless without PERF_SAMPLE_RAW to return counter set |
1799 | * values as raw data. |
1800 | */ |
1801 | if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv || |
1802 | !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) { |
1803 | err = -EOPNOTSUPP; |
1804 | goto out; |
1805 | } |
1806 | |
1807 | /* Initialize for using the CPU-measurement counter facility */ |
1808 | if (cpum_cf_alloc(cpu: event->cpu)) |
1809 | return -ENOMEM; |
1810 | event->destroy = hw_perf_event_destroy; |
1811 | |
1812 | err = cfdiag_event_init2(event); |
1813 | if (unlikely(err)) |
1814 | event->destroy(event); |
1815 | out: |
1816 | return err; |
1817 | } |
1818 | |
1819 | /* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used |
1820 | * to collect the complete counter sets for a scheduled process. Target |
1821 | * are complete counter sets attached as raw data to the artificial event. |
1822 | * This results in complete counter sets available when a process is |
1823 | * scheduled. Contains the delta of every counter while the process was |
1824 | * running. |
1825 | */ |
1826 | CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); |
1827 | |
1828 | static struct attribute *cfdiag_events_attr[] = { |
1829 | CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG), |
1830 | NULL, |
1831 | }; |
1832 | |
1833 | PMU_FORMAT_ATTR(event, "config:0-63" ); |
1834 | |
1835 | static struct attribute *cfdiag_format_attr[] = { |
1836 | &format_attr_event.attr, |
1837 | NULL, |
1838 | }; |
1839 | |
1840 | static struct attribute_group cfdiag_events_group = { |
1841 | .name = "events" , |
1842 | .attrs = cfdiag_events_attr, |
1843 | }; |
1844 | static struct attribute_group cfdiag_format_group = { |
1845 | .name = "format" , |
1846 | .attrs = cfdiag_format_attr, |
1847 | }; |
1848 | static const struct attribute_group *cfdiag_attr_groups[] = { |
1849 | &cfdiag_events_group, |
1850 | &cfdiag_format_group, |
1851 | NULL, |
1852 | }; |
1853 | |
1854 | /* Performance monitoring unit for event CF_DIAG. Since this event |
1855 | * is also started and stopped via the perf_event_open() system call, use |
1856 | * the same event enable/disable call back functions. They do not |
1857 | * have a pointer to the perf_event strcture as first parameter. |
1858 | * |
1859 | * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common. |
1860 | * Reuse them and distinguish the event (always first parameter) via |
1861 | * 'config' member. |
1862 | */ |
1863 | static struct pmu cf_diag = { |
1864 | .task_ctx_nr = perf_sw_context, |
1865 | .event_init = cfdiag_event_init, |
1866 | .pmu_enable = cpumf_pmu_enable, |
1867 | .pmu_disable = cpumf_pmu_disable, |
1868 | .add = cpumf_pmu_add, |
1869 | .del = cpumf_pmu_del, |
1870 | .start = cpumf_pmu_start, |
1871 | .stop = cpumf_pmu_stop, |
1872 | .read = cfdiag_read, |
1873 | |
1874 | .attr_groups = cfdiag_attr_groups |
1875 | }; |
1876 | |
1877 | /* Calculate memory needed to store all counter sets together with header and |
1878 | * trailer data. This is independent of the counter set authorization which |
1879 | * can vary depending on the configuration. |
1880 | */ |
1881 | static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) |
1882 | { |
1883 | size_t max_size = sizeof(struct cf_trailer_entry); |
1884 | enum cpumf_ctr_set i; |
1885 | |
1886 | for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { |
1887 | size_t size = cpum_cf_read_setsize(ctrset: i); |
1888 | |
1889 | if (size) |
1890 | max_size += size * sizeof(u64) + |
1891 | sizeof(struct cf_ctrset_entry); |
1892 | } |
1893 | return max_size; |
1894 | } |
1895 | |
1896 | /* Get the CPU speed, try sampling facility first and CPU attributes second. */ |
1897 | static void cfdiag_get_cpu_speed(void) |
1898 | { |
1899 | unsigned long mhz; |
1900 | |
1901 | if (cpum_sf_avail()) { /* Sampling facility first */ |
1902 | struct hws_qsi_info_block si; |
1903 | |
1904 | memset(&si, 0, sizeof(si)); |
1905 | if (!qsi(&si)) { |
1906 | cfdiag_cpu_speed = si.cpu_speed; |
1907 | return; |
1908 | } |
1909 | } |
1910 | |
1911 | /* Fallback: CPU speed extract static part. Used in case |
1912 | * CPU Measurement Sampling Facility is turned off. |
1913 | */ |
1914 | mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); |
1915 | if (mhz != -1UL) |
1916 | cfdiag_cpu_speed = mhz & 0xffffffff; |
1917 | } |
1918 | |
1919 | static int cfset_init(void) |
1920 | { |
1921 | size_t need; |
1922 | int rc; |
1923 | |
1924 | cfdiag_get_cpu_speed(); |
1925 | /* Make sure the counter set data fits into predefined buffer. */ |
1926 | need = cfdiag_maxsize(info: &cpumf_ctr_info); |
1927 | if (need > sizeof(((struct cpu_cf_events *)0)->start)) { |
1928 | pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n" , |
1929 | need); |
1930 | return -ENOMEM; |
1931 | } |
1932 | |
1933 | rc = misc_register(misc: &cfset_dev); |
1934 | if (rc) { |
1935 | pr_err("Registration of /dev/%s failed rc=%i\n" , |
1936 | cfset_dev.name, rc); |
1937 | goto out; |
1938 | } |
1939 | |
1940 | rc = perf_pmu_register(pmu: &cf_diag, name: "cpum_cf_diag" , type: -1); |
1941 | if (rc) { |
1942 | misc_deregister(misc: &cfset_dev); |
1943 | pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n" , |
1944 | rc); |
1945 | } |
1946 | out: |
1947 | return rc; |
1948 | } |
1949 | |
1950 | device_initcall(cpumf_pmu_init); |
1951 | |