1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Performance event support - Processor Activity Instrumentation Extension |
4 | * Facility |
5 | * |
6 | * Copyright IBM Corp. 2022 |
7 | * Author(s): Thomas Richter <tmricht@linux.ibm.com> |
8 | */ |
9 | #define KMSG_COMPONENT "pai_ext" |
10 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
11 | |
12 | #include <linux/kernel.h> |
13 | #include <linux/kernel_stat.h> |
14 | #include <linux/percpu.h> |
15 | #include <linux/notifier.h> |
16 | #include <linux/init.h> |
17 | #include <linux/export.h> |
18 | #include <linux/io.h> |
19 | #include <linux/perf_event.h> |
20 | #include <asm/ctlreg.h> |
21 | #include <asm/pai.h> |
22 | #include <asm/debug.h> |
23 | |
24 | #define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */ |
25 | #define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */ |
26 | |
27 | static debug_info_t *paiext_dbg; |
28 | static unsigned int paiext_cnt; /* Extracted with QPACI instruction */ |
29 | |
30 | struct pai_userdata { |
31 | u16 num; |
32 | u64 value; |
33 | } __packed; |
34 | |
35 | /* Create the PAI extension 1 control block area. |
36 | * The PAI extension control block 1 is pointed to by lowcore |
37 | * address 0x1508 for each CPU. This control block is 512 bytes in size |
38 | * and requires a 512 byte boundary alignment. |
39 | */ |
40 | struct paiext_cb { /* PAI extension 1 control block */ |
41 | u64 ; /* Not used */ |
42 | u64 reserved1; |
43 | u64 acc; /* Addr to analytics counter control block */ |
44 | u8 reserved2[488]; |
45 | } __packed; |
46 | |
47 | struct paiext_map { |
48 | unsigned long *area; /* Area for CPU to store counters */ |
49 | struct pai_userdata *save; /* Area to store non-zero counters */ |
50 | enum paievt_mode mode; /* Type of event */ |
51 | unsigned int active_events; /* # of PAI Extension users */ |
52 | refcount_t refcnt; |
53 | struct perf_event *event; /* Perf event for sampling */ |
54 | struct paiext_cb *paiext_cb; /* PAI extension control block area */ |
55 | }; |
56 | |
57 | struct paiext_mapptr { |
58 | struct paiext_map *mapptr; |
59 | }; |
60 | |
61 | static struct paiext_root { /* Anchor to per CPU data */ |
62 | refcount_t refcnt; /* Overall active events */ |
63 | struct paiext_mapptr __percpu *mapptr; |
64 | } paiext_root; |
65 | |
66 | /* Free per CPU data when the last event is removed. */ |
67 | static void paiext_root_free(void) |
68 | { |
69 | if (refcount_dec_and_test(r: &paiext_root.refcnt)) { |
70 | free_percpu(pdata: paiext_root.mapptr); |
71 | paiext_root.mapptr = NULL; |
72 | } |
73 | } |
74 | |
75 | /* On initialization of first event also allocate per CPU data dynamically. |
76 | * Start with an array of pointers, the array size is the maximum number of |
77 | * CPUs possible, which might be larger than the number of CPUs currently |
78 | * online. |
79 | */ |
80 | static int paiext_root_alloc(void) |
81 | { |
82 | if (!refcount_inc_not_zero(r: &paiext_root.refcnt)) { |
83 | /* The memory is already zeroed. */ |
84 | paiext_root.mapptr = alloc_percpu(struct paiext_mapptr); |
85 | if (!paiext_root.mapptr) { |
86 | /* Returning without refcnt adjustment is ok. The |
87 | * error code is handled by paiext_alloc() which |
88 | * decrements refcnt when an event can not be |
89 | * created. |
90 | */ |
91 | return -ENOMEM; |
92 | } |
93 | refcount_set(r: &paiext_root.refcnt, n: 1); |
94 | } |
95 | return 0; |
96 | } |
97 | |
98 | /* Protects against concurrent increment of sampler and counter member |
99 | * increments at the same time and prohibits concurrent execution of |
100 | * counting and sampling events. |
101 | * Ensures that analytics counter block is deallocated only when the |
102 | * sampling and counting on that cpu is zero. |
103 | * For details see paiext_alloc(). |
104 | */ |
105 | static DEFINE_MUTEX(paiext_reserve_mutex); |
106 | |
107 | /* Free all memory allocated for event counting/sampling setup */ |
108 | static void paiext_free(struct paiext_mapptr *mp) |
109 | { |
110 | kfree(objp: mp->mapptr->area); |
111 | kfree(objp: mp->mapptr->paiext_cb); |
112 | kvfree(addr: mp->mapptr->save); |
113 | kfree(objp: mp->mapptr); |
114 | mp->mapptr = NULL; |
115 | } |
116 | |
117 | /* Release the PMU if event is the last perf event */ |
118 | static void paiext_event_destroy(struct perf_event *event) |
119 | { |
120 | struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu); |
121 | struct paiext_map *cpump = mp->mapptr; |
122 | |
123 | free_page(PAI_SAVE_AREA(event)); |
124 | mutex_lock(&paiext_reserve_mutex); |
125 | if (refcount_dec_and_test(r: &cpump->refcnt)) /* Last reference gone */ |
126 | paiext_free(mp); |
127 | paiext_root_free(); |
128 | mutex_unlock(lock: &paiext_reserve_mutex); |
129 | debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n" , __func__, |
130 | event->cpu, mp->mapptr); |
131 | |
132 | } |
133 | |
134 | /* Used to avoid races in checking concurrent access of counting and |
135 | * sampling for pai_extension events. |
136 | * |
137 | * Only one instance of event pai_ext/NNPA_ALL/ for sampling is |
138 | * allowed and when this event is running, no counting event is allowed. |
139 | * Several counting events are allowed in parallel, but no sampling event |
140 | * is allowed while one (or more) counting events are running. |
141 | * |
142 | * This function is called in process context and it is safe to block. |
143 | * When the event initialization functions fails, no other call back will |
144 | * be invoked. |
145 | * |
146 | * Allocate the memory for the event. |
147 | */ |
148 | static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) |
149 | { |
150 | struct paiext_mapptr *mp; |
151 | struct paiext_map *cpump; |
152 | int rc; |
153 | |
154 | mutex_lock(&paiext_reserve_mutex); |
155 | |
156 | rc = paiext_root_alloc(); |
157 | if (rc) |
158 | goto unlock; |
159 | |
160 | mp = per_cpu_ptr(paiext_root.mapptr, event->cpu); |
161 | cpump = mp->mapptr; |
162 | if (!cpump) { /* Paiext_map allocated? */ |
163 | rc = -ENOMEM; |
164 | cpump = kzalloc(size: sizeof(*cpump), GFP_KERNEL); |
165 | if (!cpump) |
166 | goto undo; |
167 | |
168 | /* Allocate memory for counter area and counter extraction. |
169 | * These are |
170 | * - a 512 byte block and requires 512 byte boundary alignment. |
171 | * - a 1KB byte block and requires 1KB boundary alignment. |
172 | * Only the first counting event has to allocate the area. |
173 | * |
174 | * Note: This works with commit 59bb47985c1d by default. |
175 | * Backporting this to kernels without this commit might |
176 | * need adjustment. |
177 | */ |
178 | mp->mapptr = cpump; |
179 | cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL); |
180 | cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL); |
181 | cpump->save = kvmalloc_array(n: paiext_cnt + 1, |
182 | size: sizeof(struct pai_userdata), |
183 | GFP_KERNEL); |
184 | if (!cpump->save || !cpump->area || !cpump->paiext_cb) { |
185 | paiext_free(mp); |
186 | goto undo; |
187 | } |
188 | refcount_set(r: &cpump->refcnt, n: 1); |
189 | cpump->mode = a->sample_period ? PAI_MODE_SAMPLING |
190 | : PAI_MODE_COUNTING; |
191 | } else { |
192 | /* Multiple invocation, check what is active. |
193 | * Supported are multiple counter events or only one sampling |
194 | * event concurrently at any one time. |
195 | */ |
196 | if (cpump->mode == PAI_MODE_SAMPLING || |
197 | (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) { |
198 | rc = -EBUSY; |
199 | goto undo; |
200 | } |
201 | refcount_inc(r: &cpump->refcnt); |
202 | } |
203 | |
204 | rc = 0; |
205 | |
206 | undo: |
207 | if (rc) { |
208 | /* Error in allocation of event, decrement anchor. Since |
209 | * the event in not created, its destroy() function is never |
210 | * invoked. Adjust the reference counter for the anchor. |
211 | */ |
212 | paiext_root_free(); |
213 | } |
214 | unlock: |
215 | mutex_unlock(lock: &paiext_reserve_mutex); |
216 | /* If rc is non-zero, no increment of counter/sampler was done. */ |
217 | return rc; |
218 | } |
219 | |
220 | /* The PAI extension 1 control block supports up to 128 entries. Return |
221 | * the index within PAIE1_CB given the event number. Also validate event |
222 | * number. |
223 | */ |
224 | static int paiext_event_valid(struct perf_event *event) |
225 | { |
226 | u64 cfg = event->attr.config; |
227 | |
228 | if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) { |
229 | /* Offset NNPA in paiext_cb */ |
230 | event->hw.config_base = offsetof(struct paiext_cb, acc); |
231 | return 0; |
232 | } |
233 | return -EINVAL; |
234 | } |
235 | |
236 | /* Might be called on different CPU than the one the event is intended for. */ |
237 | static int paiext_event_init(struct perf_event *event) |
238 | { |
239 | struct perf_event_attr *a = &event->attr; |
240 | int rc; |
241 | |
242 | /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */ |
243 | if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) |
244 | return -ENOENT; |
245 | /* PAI extension event must be valid and in supported range */ |
246 | rc = paiext_event_valid(event); |
247 | if (rc) |
248 | return rc; |
249 | /* Allow only CPU wide operation, no process context for now. */ |
250 | if ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1) |
251 | return -ENOENT; |
252 | /* Allow only event NNPA_ALL for sampling. */ |
253 | if (a->sample_period && a->config != PAI_NNPA_BASE) |
254 | return -EINVAL; |
255 | /* Prohibit exclude_user event selection */ |
256 | if (a->exclude_user) |
257 | return -EINVAL; |
258 | /* Get a page to store last counter values for sampling */ |
259 | if (a->sample_period) { |
260 | PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL); |
261 | if (!PAI_SAVE_AREA(event)) |
262 | return -ENOMEM; |
263 | } |
264 | |
265 | rc = paiext_alloc(a, event); |
266 | if (rc) { |
267 | free_page(PAI_SAVE_AREA(event)); |
268 | return rc; |
269 | } |
270 | event->destroy = paiext_event_destroy; |
271 | |
272 | if (a->sample_period) { |
273 | a->sample_period = 1; |
274 | a->freq = 0; |
275 | /* Register for paicrypt_sched_task() to be called */ |
276 | event->attach_state |= PERF_ATTACH_SCHED_CB; |
277 | /* Add raw data which are the memory mapped counters */ |
278 | a->sample_type |= PERF_SAMPLE_RAW; |
279 | /* Turn off inheritance */ |
280 | a->inherit = 0; |
281 | } |
282 | |
283 | return 0; |
284 | } |
285 | |
286 | static u64 paiext_getctr(unsigned long *area, int nr) |
287 | { |
288 | return area[nr]; |
289 | } |
290 | |
291 | /* Read the counter values. Return value from location in buffer. For event |
292 | * NNPA_ALL sum up all events. |
293 | */ |
294 | static u64 paiext_getdata(struct perf_event *event) |
295 | { |
296 | struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); |
297 | struct paiext_map *cpump = mp->mapptr; |
298 | u64 sum = 0; |
299 | int i; |
300 | |
301 | if (event->attr.config != PAI_NNPA_BASE) |
302 | return paiext_getctr(area: cpump->area, |
303 | nr: event->attr.config - PAI_NNPA_BASE); |
304 | |
305 | for (i = 1; i <= paiext_cnt; i++) |
306 | sum += paiext_getctr(area: cpump->area, nr: i); |
307 | |
308 | return sum; |
309 | } |
310 | |
311 | static u64 paiext_getall(struct perf_event *event) |
312 | { |
313 | return paiext_getdata(event); |
314 | } |
315 | |
316 | static void paiext_read(struct perf_event *event) |
317 | { |
318 | u64 prev, new, delta; |
319 | |
320 | prev = local64_read(&event->hw.prev_count); |
321 | new = paiext_getall(event); |
322 | local64_set(&event->hw.prev_count, new); |
323 | delta = new - prev; |
324 | local64_add(delta, &event->count); |
325 | } |
326 | |
327 | static void paiext_start(struct perf_event *event, int flags) |
328 | { |
329 | struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); |
330 | struct paiext_map *cpump = mp->mapptr; |
331 | u64 sum; |
332 | |
333 | if (!event->attr.sample_period) { /* Counting */ |
334 | sum = paiext_getall(event); /* Get current value */ |
335 | local64_set(&event->hw.prev_count, sum); |
336 | } else { /* Sampling */ |
337 | cpump->event = event; |
338 | perf_sched_cb_inc(pmu: event->pmu); |
339 | } |
340 | } |
341 | |
342 | static int paiext_add(struct perf_event *event, int flags) |
343 | { |
344 | struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); |
345 | struct paiext_map *cpump = mp->mapptr; |
346 | struct paiext_cb *pcb = cpump->paiext_cb; |
347 | |
348 | if (++cpump->active_events == 1) { |
349 | S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb); |
350 | pcb->acc = virt_to_phys(address: cpump->area) | 0x1; |
351 | /* Enable CPU instruction lookup for PAIE1 control block */ |
352 | local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT); |
353 | debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n" , |
354 | __func__, S390_lowcore.aicd, pcb->acc); |
355 | } |
356 | if (flags & PERF_EF_START) |
357 | paiext_start(event, PERF_EF_RELOAD); |
358 | event->hw.state = 0; |
359 | return 0; |
360 | } |
361 | |
362 | static void paiext_stop(struct perf_event *event, int flags) |
363 | { |
364 | struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); |
365 | struct paiext_map *cpump = mp->mapptr; |
366 | |
367 | if (!event->attr.sample_period) { /* Counting */ |
368 | paiext_read(event); |
369 | } else { /* Sampling */ |
370 | perf_sched_cb_dec(pmu: event->pmu); |
371 | cpump->event = NULL; |
372 | } |
373 | event->hw.state = PERF_HES_STOPPED; |
374 | } |
375 | |
376 | static void paiext_del(struct perf_event *event, int flags) |
377 | { |
378 | struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); |
379 | struct paiext_map *cpump = mp->mapptr; |
380 | struct paiext_cb *pcb = cpump->paiext_cb; |
381 | |
382 | paiext_stop(event, PERF_EF_UPDATE); |
383 | if (--cpump->active_events == 0) { |
384 | /* Disable CPU instruction lookup for PAIE1 control block */ |
385 | local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT); |
386 | pcb->acc = 0; |
387 | S390_lowcore.aicd = 0; |
388 | debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n" , |
389 | __func__, S390_lowcore.aicd, pcb->acc); |
390 | } |
391 | } |
392 | |
393 | /* Create raw data and save it in buffer. Returns number of bytes copied. |
394 | * Saves only positive counter entries of the form |
395 | * 2 bytes: Number of counter |
396 | * 8 bytes: Value of counter |
397 | */ |
398 | static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area, |
399 | unsigned long *area_old) |
400 | { |
401 | int i, outidx = 0; |
402 | |
403 | for (i = 1; i <= paiext_cnt; i++) { |
404 | u64 val = paiext_getctr(area, nr: i); |
405 | u64 val_old = paiext_getctr(area: area_old, nr: i); |
406 | |
407 | if (val >= val_old) |
408 | val -= val_old; |
409 | else |
410 | val = (~0ULL - val_old) + val + 1; |
411 | if (val) { |
412 | userdata[outidx].num = i; |
413 | userdata[outidx].value = val; |
414 | outidx++; |
415 | } |
416 | } |
417 | return outidx * sizeof(*userdata); |
418 | } |
419 | |
420 | /* Write sample when one or more counters values are nonzero. |
421 | * |
422 | * Note: The function paiext_sched_task() and paiext_push_sample() are not |
423 | * invoked after function paiext_del() has been called because of function |
424 | * perf_sched_cb_dec(). |
425 | * The function paiext_sched_task() and paiext_push_sample() are only |
426 | * called when sampling is active. Function perf_sched_cb_inc() |
427 | * has been invoked to install function paiext_sched_task() as call back |
428 | * to run at context switch time (see paiext_add()). |
429 | * |
430 | * This causes function perf_event_context_sched_out() and |
431 | * perf_event_context_sched_in() to check whether the PMU has installed an |
432 | * sched_task() callback. That callback is not active after paiext_del() |
433 | * returns and has deleted the event on that CPU. |
434 | */ |
435 | static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump, |
436 | struct perf_event *event) |
437 | { |
438 | struct perf_sample_data data; |
439 | struct perf_raw_record raw; |
440 | struct pt_regs regs; |
441 | int overflow; |
442 | |
443 | /* Setup perf sample */ |
444 | memset(®s, 0, sizeof(regs)); |
445 | memset(&raw, 0, sizeof(raw)); |
446 | memset(&data, 0, sizeof(data)); |
447 | perf_sample_data_init(data: &data, addr: 0, period: event->hw.last_period); |
448 | if (event->attr.sample_type & PERF_SAMPLE_TID) { |
449 | data.tid_entry.pid = task_tgid_nr(current); |
450 | data.tid_entry.tid = task_pid_nr(current); |
451 | } |
452 | if (event->attr.sample_type & PERF_SAMPLE_TIME) |
453 | data.time = event->clock(); |
454 | if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) |
455 | data.id = event->id; |
456 | if (event->attr.sample_type & PERF_SAMPLE_CPU) |
457 | data.cpu_entry.cpu = smp_processor_id(); |
458 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { |
459 | raw.frag.size = rawsize; |
460 | raw.frag.data = cpump->save; |
461 | perf_sample_save_raw_data(data: &data, raw: &raw); |
462 | } |
463 | |
464 | overflow = perf_event_overflow(event, data: &data, regs: ®s); |
465 | perf_event_update_userpage(event); |
466 | /* Save NNPA lowcore area after read in event */ |
467 | memcpy((void *)PAI_SAVE_AREA(event), cpump->area, |
468 | PAIE1_CTRBLOCK_SZ); |
469 | return overflow; |
470 | } |
471 | |
472 | /* Check if there is data to be saved on schedule out of a task. */ |
473 | static int paiext_have_sample(void) |
474 | { |
475 | struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); |
476 | struct paiext_map *cpump = mp->mapptr; |
477 | struct perf_event *event = cpump->event; |
478 | size_t rawsize; |
479 | int rc = 0; |
480 | |
481 | if (!event) |
482 | return 0; |
483 | rawsize = paiext_copy(userdata: cpump->save, area: cpump->area, |
484 | area_old: (unsigned long *)PAI_SAVE_AREA(event)); |
485 | if (rawsize) /* Incremented counters */ |
486 | rc = paiext_push_sample(rawsize, cpump, event); |
487 | return rc; |
488 | } |
489 | |
490 | /* Called on schedule-in and schedule-out. No access to event structure, |
491 | * but for sampling only event NNPA_ALL is allowed. |
492 | */ |
493 | static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) |
494 | { |
495 | /* We started with a clean page on event installation. So read out |
496 | * results on schedule_out and if page was dirty, clear values. |
497 | */ |
498 | if (!sched_in) |
499 | paiext_have_sample(); |
500 | } |
501 | |
502 | /* Attribute definitions for pai extension1 interface. As with other CPU |
503 | * Measurement Facilities, there is one attribute per mapped counter. |
504 | * The number of mapped counters may vary per machine generation. Use |
505 | * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction |
506 | * to determine the number of mapped counters. The instructions returns |
507 | * a positive number, which is the highest number of supported counters. |
508 | * All counters less than this number are also supported, there are no |
509 | * holes. A returned number of zero means no support for mapped counters. |
510 | * |
511 | * The identification of the counter is a unique number. The chosen range |
512 | * is 0x1800 + offset in mapped kernel page. |
513 | * All CPU Measurement Facility counters identifiers must be unique and |
514 | * the numbers from 0 to 496 are already used for the CPU Measurement |
515 | * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography |
516 | * counters. |
517 | * Numbers 0xb0000, 0xbc000 and 0xbd000 are already |
518 | * used for the CPU Measurement Sampling facility. |
519 | */ |
520 | PMU_FORMAT_ATTR(event, "config:0-63" ); |
521 | |
522 | static struct attribute *paiext_format_attr[] = { |
523 | &format_attr_event.attr, |
524 | NULL, |
525 | }; |
526 | |
527 | static struct attribute_group paiext_events_group = { |
528 | .name = "events" , |
529 | .attrs = NULL, /* Filled in attr_event_init() */ |
530 | }; |
531 | |
532 | static struct attribute_group paiext_format_group = { |
533 | .name = "format" , |
534 | .attrs = paiext_format_attr, |
535 | }; |
536 | |
537 | static const struct attribute_group *paiext_attr_groups[] = { |
538 | &paiext_events_group, |
539 | &paiext_format_group, |
540 | NULL, |
541 | }; |
542 | |
543 | /* Performance monitoring unit for mapped counters */ |
544 | static struct pmu paiext = { |
545 | .task_ctx_nr = perf_invalid_context, |
546 | .event_init = paiext_event_init, |
547 | .add = paiext_add, |
548 | .del = paiext_del, |
549 | .start = paiext_start, |
550 | .stop = paiext_stop, |
551 | .read = paiext_read, |
552 | .sched_task = paiext_sched_task, |
553 | .attr_groups = paiext_attr_groups, |
554 | }; |
555 | |
556 | /* List of symbolic PAI extension 1 NNPA counter names. */ |
557 | static const char * const paiext_ctrnames[] = { |
558 | [0] = "NNPA_ALL" , |
559 | [1] = "NNPA_ADD" , |
560 | [2] = "NNPA_SUB" , |
561 | [3] = "NNPA_MUL" , |
562 | [4] = "NNPA_DIV" , |
563 | [5] = "NNPA_MIN" , |
564 | [6] = "NNPA_MAX" , |
565 | [7] = "NNPA_LOG" , |
566 | [8] = "NNPA_EXP" , |
567 | [9] = "NNPA_IBM_RESERVED_9" , |
568 | [10] = "NNPA_RELU" , |
569 | [11] = "NNPA_TANH" , |
570 | [12] = "NNPA_SIGMOID" , |
571 | [13] = "NNPA_SOFTMAX" , |
572 | [14] = "NNPA_BATCHNORM" , |
573 | [15] = "NNPA_MAXPOOL2D" , |
574 | [16] = "NNPA_AVGPOOL2D" , |
575 | [17] = "NNPA_LSTMACT" , |
576 | [18] = "NNPA_GRUACT" , |
577 | [19] = "NNPA_CONVOLUTION" , |
578 | [20] = "NNPA_MATMUL_OP" , |
579 | [21] = "NNPA_MATMUL_OP_BCAST23" , |
580 | [22] = "NNPA_SMALLBATCH" , |
581 | [23] = "NNPA_LARGEDIM" , |
582 | [24] = "NNPA_SMALLTENSOR" , |
583 | [25] = "NNPA_1MFRAME" , |
584 | [26] = "NNPA_2GFRAME" , |
585 | [27] = "NNPA_ACCESSEXCEPT" , |
586 | }; |
587 | |
588 | static void __init attr_event_free(struct attribute **attrs, int num) |
589 | { |
590 | struct perf_pmu_events_attr *pa; |
591 | struct device_attribute *dap; |
592 | int i; |
593 | |
594 | for (i = 0; i < num; i++) { |
595 | dap = container_of(attrs[i], struct device_attribute, attr); |
596 | pa = container_of(dap, struct perf_pmu_events_attr, attr); |
597 | kfree(objp: pa); |
598 | } |
599 | kfree(objp: attrs); |
600 | } |
601 | |
602 | static int __init attr_event_init_one(struct attribute **attrs, int num) |
603 | { |
604 | struct perf_pmu_events_attr *pa; |
605 | |
606 | /* Index larger than array_size, no counter name available */ |
607 | if (num >= ARRAY_SIZE(paiext_ctrnames)) { |
608 | attrs[num] = NULL; |
609 | return 0; |
610 | } |
611 | |
612 | pa = kzalloc(size: sizeof(*pa), GFP_KERNEL); |
613 | if (!pa) |
614 | return -ENOMEM; |
615 | |
616 | sysfs_attr_init(&pa->attr.attr); |
617 | pa->id = PAI_NNPA_BASE + num; |
618 | pa->attr.attr.name = paiext_ctrnames[num]; |
619 | pa->attr.attr.mode = 0444; |
620 | pa->attr.show = cpumf_events_sysfs_show; |
621 | pa->attr.store = NULL; |
622 | attrs[num] = &pa->attr.attr; |
623 | return 0; |
624 | } |
625 | |
626 | /* Create PMU sysfs event attributes on the fly. */ |
627 | static int __init attr_event_init(void) |
628 | { |
629 | struct attribute **attrs; |
630 | int ret, i; |
631 | |
632 | attrs = kmalloc_array(n: paiext_cnt + 2, size: sizeof(*attrs), GFP_KERNEL); |
633 | if (!attrs) |
634 | return -ENOMEM; |
635 | for (i = 0; i <= paiext_cnt; i++) { |
636 | ret = attr_event_init_one(attrs, num: i); |
637 | if (ret) { |
638 | attr_event_free(attrs, num: i); |
639 | return ret; |
640 | } |
641 | } |
642 | attrs[i] = NULL; |
643 | paiext_events_group.attrs = attrs; |
644 | return 0; |
645 | } |
646 | |
647 | static int __init paiext_init(void) |
648 | { |
649 | struct qpaci_info_block ib; |
650 | int rc = -ENOMEM; |
651 | |
652 | if (!test_facility(197)) |
653 | return 0; |
654 | |
655 | qpaci(&ib); |
656 | paiext_cnt = ib.num_nnpa; |
657 | if (paiext_cnt >= PAI_NNPA_MAXCTR) |
658 | paiext_cnt = PAI_NNPA_MAXCTR; |
659 | if (!paiext_cnt) |
660 | return 0; |
661 | |
662 | rc = attr_event_init(); |
663 | if (rc) { |
664 | pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n" ); |
665 | return rc; |
666 | } |
667 | |
668 | /* Setup s390dbf facility */ |
669 | paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128); |
670 | if (!paiext_dbg) { |
671 | pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n" ); |
672 | rc = -ENOMEM; |
673 | goto out_init; |
674 | } |
675 | debug_register_view(paiext_dbg, &debug_sprintf_view); |
676 | |
677 | rc = perf_pmu_register(pmu: &paiext, KMSG_COMPONENT, type: -1); |
678 | if (rc) { |
679 | pr_err("Registration of " KMSG_COMPONENT " PMU failed with " |
680 | "rc=%i\n" , rc); |
681 | goto out_pmu; |
682 | } |
683 | |
684 | return 0; |
685 | |
686 | out_pmu: |
687 | debug_unregister_view(paiext_dbg, &debug_sprintf_view); |
688 | debug_unregister(paiext_dbg); |
689 | out_init: |
690 | attr_event_free(attrs: paiext_events_group.attrs, |
691 | ARRAY_SIZE(paiext_ctrnames) + 1); |
692 | return rc; |
693 | } |
694 | |
695 | device_initcall(paiext_init); |
696 | |