1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Hypervisor supplied "gpci" ("get performance counter info") performance
4 * counter support
5 *
6 * Author: Cody P Schafer <cody@linux.vnet.ibm.com>
7 * Copyright 2014 IBM Corporation.
8 */
9
10#define pr_fmt(fmt) "hv-gpci: " fmt
11
12#include <linux/init.h>
13#include <linux/perf_event.h>
14#include <asm/firmware.h>
15#include <asm/hvcall.h>
16#include <asm/io.h>
17
18#include "hv-gpci.h"
19#include "hv-common.h"
20
21/*
22 * Example usage:
23 * perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8,
24 * secondary_index=0,starting_index=0xffffffff,request=0x10/' ...
25 */
26
27/* u32 */
28EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31);
29/* u32 */
30/*
31 * Note that starting_index, phys_processor_idx, sibling_part_id,
32 * hw_chip_id, partition_id all refer to the same bit range. They
33 * are basically aliases for the starting_index. The specific alias
34 * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h
35 */
36EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63);
37EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63);
38EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63);
39EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63);
40EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63);
41
42/* u16 */
43EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15);
44/* u8 */
45EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23);
46/* u8, bytes of data (1-8) */
47EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31);
48/* u32, byte offset */
49EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63);
50
51static cpumask_t hv_gpci_cpumask;
52
53static struct attribute *format_attrs[] = {
54 &format_attr_request.attr,
55 &format_attr_starting_index.attr,
56 &format_attr_phys_processor_idx.attr,
57 &format_attr_sibling_part_id.attr,
58 &format_attr_hw_chip_id.attr,
59 &format_attr_partition_id.attr,
60 &format_attr_secondary_index.attr,
61 &format_attr_counter_info_version.attr,
62
63 &format_attr_offset.attr,
64 &format_attr_length.attr,
65 NULL,
66};
67
68static const struct attribute_group format_group = {
69 .name = "format",
70 .attrs = format_attrs,
71};
72
73static struct attribute_group event_group = {
74 .name = "events",
75 /* .attrs is set in init */
76};
77
78#define HV_CAPS_ATTR(_name, _format) \
79static ssize_t _name##_show(struct device *dev, \
80 struct device_attribute *attr, \
81 char *page) \
82{ \
83 struct hv_perf_caps caps; \
84 unsigned long hret = hv_perf_caps_get(&caps); \
85 if (hret) \
86 return -EIO; \
87 \
88 return sprintf(page, _format, caps._name); \
89} \
90static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name)
91
92static ssize_t kernel_version_show(struct device *dev,
93 struct device_attribute *attr,
94 char *page)
95{
96 return sprintf(buf: page, fmt: "0x%x\n", COUNTER_INFO_VERSION_CURRENT);
97}
98
99static ssize_t cpumask_show(struct device *dev,
100 struct device_attribute *attr, char *buf)
101{
102 return cpumap_print_to_pagebuf(list: true, buf, mask: &hv_gpci_cpumask);
103}
104
105/* Interface attribute array index to store system information */
106#define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6
107#define INTERFACE_PROCESSOR_CONFIG_ATTR 7
108#define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8
109#define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9
110#define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10
111#define INTERFACE_NULL_ATTR 11
112
113/* Counter request value to retrieve system information */
114enum {
115 PROCESSOR_BUS_TOPOLOGY,
116 PROCESSOR_CONFIG,
117 AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */
118 AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */
119 AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */
120};
121
122static int sysinfo_counter_request[] = {
123 [PROCESSOR_BUS_TOPOLOGY] = 0xD0,
124 [PROCESSOR_CONFIG] = 0x90,
125 [AFFINITY_DOMAIN_VIA_VP] = 0xA0,
126 [AFFINITY_DOMAIN_VIA_DOM] = 0xB0,
127 [AFFINITY_DOMAIN_VIA_PAR] = 0xB1,
128};
129
130static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
131
132static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index,
133 u16 secondary_index, char *buf,
134 size_t *n, struct hv_gpci_request_buffer *arg)
135{
136 unsigned long ret;
137 size_t i, j;
138
139 arg->params.counter_request = cpu_to_be32(req);
140 arg->params.starting_index = cpu_to_be32(starting_index);
141 arg->params.secondary_index = cpu_to_be16(secondary_index);
142
143 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
144 virt_to_phys(address: arg), HGPCI_REQ_BUFFER_SIZE);
145
146 /*
147 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
148 * which means that the current buffer size cannot accommodate
149 * all the information and a partial buffer returned.
150 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
151 *
152 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
153 * performance information, and required to set
154 * "Enable Performance Information Collection" option.
155 */
156 if (ret == H_AUTHORITY)
157 return -EPERM;
158
159 /*
160 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
161 * because of invalid buffer-length/address or due to some hardware
162 * error.
163 */
164 if (ret && (ret != H_PARAMETER))
165 return -EIO;
166
167 /*
168 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
169 * to show the total number of counter_value array elements
170 * returned via hcall.
171 * hcall also populates 'cv_element_size' corresponds to individual
172 * counter_value array element size. Below loop go through all
173 * counter_value array elements as per their size and add it to
174 * the output buffer.
175 */
176 for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) {
177 j = i * be16_to_cpu(arg->params.cv_element_size);
178
179 for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++)
180 *n += sprintf(buf: buf + *n, fmt: "%02x", (u8)arg->bytes[j]);
181 *n += sprintf(buf: buf + *n, fmt: "\n");
182 }
183
184 if (*n >= PAGE_SIZE) {
185 pr_info("System information exceeds PAGE_SIZE\n");
186 return -EFBIG;
187 }
188
189 return ret;
190}
191
192static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr,
193 char *buf)
194{
195 struct hv_gpci_request_buffer *arg;
196 unsigned long ret;
197 size_t n = 0;
198
199 arg = (void *)get_cpu_var(hv_gpci_reqb);
200 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
201
202 /*
203 * Pass the counter request value 0xD0 corresponds to request
204 * type 'Processor_bus_topology', to retrieve
205 * the system topology information.
206 * starting_index value implies the starting hardware
207 * chip id.
208 */
209 ret = systeminfo_gpci_request(req: sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
210 starting_index: 0, secondary_index: 0, buf, n: &n, arg);
211
212 if (!ret)
213 return n;
214
215 if (ret != H_PARAMETER)
216 goto out;
217
218 /*
219 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
220 * implies that buffer can't accommodate all information, and a partial buffer
221 * returned. To handle that, we need to make subsequent requests
222 * with next starting index to retrieve additional (missing) data.
223 * Below loop do subsequent hcalls with next starting index and add it
224 * to buffer util we get all the information.
225 */
226 while (ret == H_PARAMETER) {
227 int returned_values = be16_to_cpu(arg->params.returned_values);
228 int elementsize = be16_to_cpu(arg->params.cv_element_size);
229 int last_element = (returned_values - 1) * elementsize;
230
231 /*
232 * Since the starting index value is part of counter_value
233 * buffer elements, use the starting index value in the last
234 * element and add 1 to make subsequent hcalls.
235 */
236 u32 starting_index = arg->bytes[last_element + 3] +
237 (arg->bytes[last_element + 2] << 8) +
238 (arg->bytes[last_element + 1] << 16) +
239 (arg->bytes[last_element] << 24) + 1;
240
241 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
242
243 ret = systeminfo_gpci_request(req: sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
244 starting_index, secondary_index: 0, buf, n: &n, arg);
245
246 if (!ret)
247 return n;
248
249 if (ret != H_PARAMETER)
250 goto out;
251 }
252
253 return n;
254
255out:
256 put_cpu_var(hv_gpci_reqb);
257 return ret;
258}
259
260static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr,
261 char *buf)
262{
263 struct hv_gpci_request_buffer *arg;
264 unsigned long ret;
265 size_t n = 0;
266
267 arg = (void *)get_cpu_var(hv_gpci_reqb);
268 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
269
270 /*
271 * Pass the counter request value 0x90 corresponds to request
272 * type 'Processor_config', to retrieve
273 * the system processor information.
274 * starting_index value implies the starting hardware
275 * processor index.
276 */
277 ret = systeminfo_gpci_request(req: sysinfo_counter_request[PROCESSOR_CONFIG],
278 starting_index: 0, secondary_index: 0, buf, n: &n, arg);
279
280 if (!ret)
281 return n;
282
283 if (ret != H_PARAMETER)
284 goto out;
285
286 /*
287 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
288 * implies that buffer can't accommodate all information, and a partial buffer
289 * returned. To handle that, we need to take subsequent requests
290 * with next starting index to retrieve additional (missing) data.
291 * Below loop do subsequent hcalls with next starting index and add it
292 * to buffer util we get all the information.
293 */
294 while (ret == H_PARAMETER) {
295 int returned_values = be16_to_cpu(arg->params.returned_values);
296 int elementsize = be16_to_cpu(arg->params.cv_element_size);
297 int last_element = (returned_values - 1) * elementsize;
298
299 /*
300 * Since the starting index is part of counter_value
301 * buffer elements, use the starting index value in the last
302 * element and add 1 to subsequent hcalls.
303 */
304 u32 starting_index = arg->bytes[last_element + 3] +
305 (arg->bytes[last_element + 2] << 8) +
306 (arg->bytes[last_element + 1] << 16) +
307 (arg->bytes[last_element] << 24) + 1;
308
309 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
310
311 ret = systeminfo_gpci_request(req: sysinfo_counter_request[PROCESSOR_CONFIG],
312 starting_index, secondary_index: 0, buf, n: &n, arg);
313
314 if (!ret)
315 return n;
316
317 if (ret != H_PARAMETER)
318 goto out;
319 }
320
321 return n;
322
323out:
324 put_cpu_var(hv_gpci_reqb);
325 return ret;
326}
327
328static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev,
329 struct device_attribute *attr, char *buf)
330{
331 struct hv_gpci_request_buffer *arg;
332 unsigned long ret;
333 size_t n = 0;
334
335 arg = (void *)get_cpu_var(hv_gpci_reqb);
336 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
337
338 /*
339 * Pass the counter request 0xA0 corresponds to request
340 * type 'Affinity_domain_information_by_virutal_processor',
341 * to retrieve the system affinity domain information.
342 * starting_index value refers to the starting hardware
343 * processor index.
344 */
345 ret = systeminfo_gpci_request(req: sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
346 starting_index: 0, secondary_index: 0, buf, n: &n, arg);
347
348 if (!ret)
349 return n;
350
351 if (ret != H_PARAMETER)
352 goto out;
353
354 /*
355 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
356 * implies that buffer can't accommodate all information, and a partial buffer
357 * returned. To handle that, we need to take subsequent requests
358 * with next secondary index to retrieve additional (missing) data.
359 * Below loop do subsequent hcalls with next secondary index and add it
360 * to buffer util we get all the information.
361 */
362 while (ret == H_PARAMETER) {
363 int returned_values = be16_to_cpu(arg->params.returned_values);
364 int elementsize = be16_to_cpu(arg->params.cv_element_size);
365 int last_element = (returned_values - 1) * elementsize;
366
367 /*
368 * Since the starting index and secondary index type is part of the
369 * counter_value buffer elements, use the starting index value in the
370 * last array element as subsequent starting index, and use secondary index
371 * value in the last array element plus 1 as subsequent secondary index.
372 * For counter request '0xA0', starting index points to partition id
373 * and secondary index points to corresponding virtual processor index.
374 */
375 u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8);
376 u16 secondary_index = arg->bytes[last_element + 3] +
377 (arg->bytes[last_element + 2] << 8) + 1;
378
379 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
380
381 ret = systeminfo_gpci_request(req: sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
382 starting_index, secondary_index, buf, n: &n, arg);
383
384 if (!ret)
385 return n;
386
387 if (ret != H_PARAMETER)
388 goto out;
389 }
390
391 return n;
392
393out:
394 put_cpu_var(hv_gpci_reqb);
395 return ret;
396}
397
398static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr,
399 char *buf)
400{
401 struct hv_gpci_request_buffer *arg;
402 unsigned long ret;
403 size_t n = 0;
404
405 arg = (void *)get_cpu_var(hv_gpci_reqb);
406 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
407
408 /*
409 * Pass the counter request 0xB0 corresponds to request
410 * type 'Affinity_domain_information_by_domain',
411 * to retrieve the system affinity domain information.
412 * starting_index value refers to the starting hardware
413 * processor index.
414 */
415 ret = systeminfo_gpci_request(req: sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
416 starting_index: 0, secondary_index: 0, buf, n: &n, arg);
417
418 if (!ret)
419 return n;
420
421 if (ret != H_PARAMETER)
422 goto out;
423
424 /*
425 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
426 * implies that buffer can't accommodate all information, and a partial buffer
427 * returned. To handle that, we need to take subsequent requests
428 * with next starting index to retrieve additional (missing) data.
429 * Below loop do subsequent hcalls with next starting index and add it
430 * to buffer util we get all the information.
431 */
432 while (ret == H_PARAMETER) {
433 int returned_values = be16_to_cpu(arg->params.returned_values);
434 int elementsize = be16_to_cpu(arg->params.cv_element_size);
435 int last_element = (returned_values - 1) * elementsize;
436
437 /*
438 * Since the starting index value is part of counter_value
439 * buffer elements, use the starting index value in the last
440 * element and add 1 to make subsequent hcalls.
441 */
442 u32 starting_index = arg->bytes[last_element + 1] +
443 (arg->bytes[last_element] << 8) + 1;
444
445 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
446
447 ret = systeminfo_gpci_request(req: sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
448 starting_index, secondary_index: 0, buf, n: &n, arg);
449
450 if (!ret)
451 return n;
452
453 if (ret != H_PARAMETER)
454 goto out;
455 }
456
457 return n;
458
459out:
460 put_cpu_var(hv_gpci_reqb);
461 return ret;
462}
463
464static void affinity_domain_via_partition_result_parse(int returned_values,
465 int element_size, char *buf, size_t *last_element,
466 size_t *n, struct hv_gpci_request_buffer *arg)
467{
468 size_t i = 0, j = 0;
469 size_t k, l, m;
470 uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele;
471
472 /*
473 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
474 * to show the total number of counter_value array elements
475 * returned via hcall.
476 * Unlike other request types, the data structure returned by this
477 * request is variable-size. For this counter request type,
478 * hcall populates 'cv_element_size' corresponds to minimum size of
479 * the structure returned i.e; the size of the structure with no domain
480 * information. Below loop go through all counter_value array
481 * to determine the number and size of each domain array element and
482 * add it to the output buffer.
483 */
484 while (i < returned_values) {
485 k = j;
486 for (; k < j + element_size; k++)
487 *n += sprintf(buf: buf + *n, fmt: "%02x", (u8)arg->bytes[k]);
488 *n += sprintf(buf: buf + *n, fmt: "\n");
489
490 total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3];
491 size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1];
492
493 for (l = 0; l < total_affinity_domain_ele; l++) {
494 for (m = 0; m < size_of_each_affinity_domain_ele; m++) {
495 *n += sprintf(buf: buf + *n, fmt: "%02x", (u8)arg->bytes[k]);
496 k++;
497 }
498 *n += sprintf(buf: buf + *n, fmt: "\n");
499 }
500
501 *n += sprintf(buf: buf + *n, fmt: "\n");
502 i++;
503 j = k;
504 }
505
506 *last_element = k;
507}
508
509static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr,
510 char *buf)
511{
512 struct hv_gpci_request_buffer *arg;
513 unsigned long ret;
514 size_t n = 0;
515 size_t last_element = 0;
516 u32 starting_index;
517
518 arg = (void *)get_cpu_var(hv_gpci_reqb);
519 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
520
521 /*
522 * Pass the counter request value 0xB1 corresponds to counter request
523 * type 'Affinity_domain_information_by_partition',
524 * to retrieve the system affinity domain by partition information.
525 * starting_index value refers to the starting hardware
526 * processor index.
527 */
528 arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
529 arg->params.starting_index = cpu_to_be32(0);
530
531 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
532 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
533
534 if (!ret)
535 goto parse_result;
536
537 if (ret && (ret != H_PARAMETER))
538 goto out;
539
540 /*
541 * ret value as 'H_PARAMETER' implies that the current buffer size
542 * can't accommodate all the information, and a partial buffer
543 * returned. To handle that, we need to make subsequent requests
544 * with next starting index to retrieve additional (missing) data.
545 * Below loop do subsequent hcalls with next starting index and add it
546 * to buffer util we get all the information.
547 */
548 while (ret == H_PARAMETER) {
549 affinity_domain_via_partition_result_parse(
550 be16_to_cpu(arg->params.returned_values) - 1,
551 be16_to_cpu(arg->params.cv_element_size), buf,
552 last_element: &last_element, n: &n, arg);
553
554 if (n >= PAGE_SIZE) {
555 put_cpu_var(hv_gpci_reqb);
556 pr_debug("System information exceeds PAGE_SIZE\n");
557 return -EFBIG;
558 }
559
560 /*
561 * Since the starting index value is part of counter_value
562 * buffer elements, use the starting_index value in the last
563 * element and add 1 to make subsequent hcalls.
564 */
565 starting_index = (u8)arg->bytes[last_element] << 8 |
566 (u8)arg->bytes[last_element + 1];
567
568 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
569 arg->params.counter_request = cpu_to_be32(
570 sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
571 arg->params.starting_index = cpu_to_be32(starting_index);
572
573 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
574 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
575
576 if (ret && (ret != H_PARAMETER))
577 goto out;
578 }
579
580parse_result:
581 affinity_domain_via_partition_result_parse(
582 be16_to_cpu(arg->params.returned_values),
583 be16_to_cpu(arg->params.cv_element_size),
584 buf, last_element: &last_element, n: &n, arg);
585
586 put_cpu_var(hv_gpci_reqb);
587 return n;
588
589out:
590 put_cpu_var(hv_gpci_reqb);
591
592 /*
593 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
594 * which means that the current buffer size cannot accommodate
595 * all the information and a partial buffer returned.
596 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
597 *
598 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
599 * performance information, and required to set
600 * "Enable Performance Information Collection" option.
601 */
602 if (ret == H_AUTHORITY)
603 return -EPERM;
604
605 /*
606 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
607 * because of invalid buffer-length/address or due to some hardware
608 * error.
609 */
610 return -EIO;
611}
612
613static DEVICE_ATTR_RO(kernel_version);
614static DEVICE_ATTR_RO(cpumask);
615
616HV_CAPS_ATTR(version, "0x%x\n");
617HV_CAPS_ATTR(ga, "%d\n");
618HV_CAPS_ATTR(expanded, "%d\n");
619HV_CAPS_ATTR(lab, "%d\n");
620HV_CAPS_ATTR(collect_privileged, "%d\n");
621
622static struct attribute *interface_attrs[] = {
623 &dev_attr_kernel_version.attr,
624 &hv_caps_attr_version.attr,
625 &hv_caps_attr_ga.attr,
626 &hv_caps_attr_expanded.attr,
627 &hv_caps_attr_lab.attr,
628 &hv_caps_attr_collect_privileged.attr,
629 /*
630 * This NULL is a placeholder for the processor_bus_topology
631 * attribute, set in init function if applicable.
632 */
633 NULL,
634 /*
635 * This NULL is a placeholder for the processor_config
636 * attribute, set in init function if applicable.
637 */
638 NULL,
639 /*
640 * This NULL is a placeholder for the affinity_domain_via_virtual_processor
641 * attribute, set in init function if applicable.
642 */
643 NULL,
644 /*
645 * This NULL is a placeholder for the affinity_domain_via_domain
646 * attribute, set in init function if applicable.
647 */
648 NULL,
649 /*
650 * This NULL is a placeholder for the affinity_domain_via_partition
651 * attribute, set in init function if applicable.
652 */
653 NULL,
654 NULL,
655};
656
657static struct attribute *cpumask_attrs[] = {
658 &dev_attr_cpumask.attr,
659 NULL,
660};
661
662static const struct attribute_group cpumask_attr_group = {
663 .attrs = cpumask_attrs,
664};
665
666static const struct attribute_group interface_group = {
667 .name = "interface",
668 .attrs = interface_attrs,
669};
670
671static const struct attribute_group *attr_groups[] = {
672 &format_group,
673 &event_group,
674 &interface_group,
675 &cpumask_attr_group,
676 NULL,
677};
678
679static unsigned long single_gpci_request(u32 req, u32 starting_index,
680 u16 secondary_index, u8 version_in, u32 offset, u8 length,
681 u64 *value)
682{
683 unsigned long ret;
684 size_t i;
685 u64 count;
686 struct hv_gpci_request_buffer *arg;
687
688 arg = (void *)get_cpu_var(hv_gpci_reqb);
689 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
690
691 arg->params.counter_request = cpu_to_be32(req);
692 arg->params.starting_index = cpu_to_be32(starting_index);
693 arg->params.secondary_index = cpu_to_be16(secondary_index);
694 arg->params.counter_info_version_in = version_in;
695
696 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
697 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
698
699 /*
700 * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL',
701 * specifies that the current buffer size cannot accommodate
702 * all the information and a partial buffer returned.
703 * Since in this function we are only accessing data for a given starting index,
704 * we don't need to accommodate whole data and can get required count by
705 * accessing first entry data.
706 * Hence hcall fails only incase the ret value is other than H_SUCCESS or
707 * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B).
708 */
709 if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B)
710 ret = 0;
711
712 if (ret) {
713 pr_devel("hcall failed: 0x%lx\n", ret);
714 goto out;
715 }
716
717 /*
718 * we verify offset and length are within the zeroed buffer at event
719 * init.
720 */
721 count = 0;
722 for (i = offset; i < offset + length; i++)
723 count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8);
724
725 *value = count;
726out:
727 put_cpu_var(hv_gpci_reqb);
728 return ret;
729}
730
731static u64 h_gpci_get_value(struct perf_event *event)
732{
733 u64 count;
734 unsigned long ret = single_gpci_request(req: event_get_request(event),
735 starting_index: event_get_starting_index(event),
736 secondary_index: event_get_secondary_index(event),
737 version_in: event_get_counter_info_version(event),
738 offset: event_get_offset(event),
739 length: event_get_length(event),
740 value: &count);
741 if (ret)
742 return 0;
743 return count;
744}
745
746static void h_gpci_event_update(struct perf_event *event)
747{
748 s64 prev;
749 u64 now = h_gpci_get_value(event);
750 prev = local64_xchg(&event->hw.prev_count, now);
751 local64_add(now - prev, &event->count);
752}
753
754static void h_gpci_event_start(struct perf_event *event, int flags)
755{
756 local64_set(&event->hw.prev_count, h_gpci_get_value(event));
757}
758
759static void h_gpci_event_stop(struct perf_event *event, int flags)
760{
761 h_gpci_event_update(event);
762}
763
764static int h_gpci_event_add(struct perf_event *event, int flags)
765{
766 if (flags & PERF_EF_START)
767 h_gpci_event_start(event, flags);
768
769 return 0;
770}
771
772static int h_gpci_event_init(struct perf_event *event)
773{
774 u64 count;
775 u8 length;
776 unsigned long ret;
777
778 /* Not our event */
779 if (event->attr.type != event->pmu->type)
780 return -ENOENT;
781
782 /* config2 is unused */
783 if (event->attr.config2) {
784 pr_devel("config2 set when reserved\n");
785 return -EINVAL;
786 }
787
788 /* no branch sampling */
789 if (has_branch_stack(event))
790 return -EOPNOTSUPP;
791
792 length = event_get_length(event);
793 if (length < 1 || length > 8) {
794 pr_devel("length invalid\n");
795 return -EINVAL;
796 }
797
798 /* last byte within the buffer? */
799 if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) {
800 pr_devel("request outside of buffer: %zu > %zu\n",
801 (size_t)event_get_offset(event) + length,
802 HGPCI_MAX_DATA_BYTES);
803 return -EINVAL;
804 }
805
806 /* check if the request works... */
807 ret = single_gpci_request(req: event_get_request(event),
808 starting_index: event_get_starting_index(event),
809 secondary_index: event_get_secondary_index(event),
810 version_in: event_get_counter_info_version(event),
811 offset: event_get_offset(event),
812 length,
813 value: &count);
814
815 /*
816 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
817 * performance information, and required to set
818 * "Enable Performance Information Collection" option.
819 */
820 if (ret == H_AUTHORITY)
821 return -EPERM;
822
823 if (ret) {
824 pr_devel("gpci hcall failed\n");
825 return -EINVAL;
826 }
827
828 return 0;
829}
830
831static struct pmu h_gpci_pmu = {
832 .task_ctx_nr = perf_invalid_context,
833
834 .name = "hv_gpci",
835 .attr_groups = attr_groups,
836 .event_init = h_gpci_event_init,
837 .add = h_gpci_event_add,
838 .del = h_gpci_event_stop,
839 .start = h_gpci_event_start,
840 .stop = h_gpci_event_stop,
841 .read = h_gpci_event_update,
842 .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
843};
844
845static int ppc_hv_gpci_cpu_online(unsigned int cpu)
846{
847 if (cpumask_empty(srcp: &hv_gpci_cpumask))
848 cpumask_set_cpu(cpu, dstp: &hv_gpci_cpumask);
849
850 return 0;
851}
852
853static int ppc_hv_gpci_cpu_offline(unsigned int cpu)
854{
855 int target;
856
857 /* Check if exiting cpu is used for collecting gpci events */
858 if (!cpumask_test_and_clear_cpu(cpu, cpumask: &hv_gpci_cpumask))
859 return 0;
860
861 /* Find a new cpu to collect gpci events */
862 target = cpumask_last(cpu_active_mask);
863
864 if (target < 0 || target >= nr_cpu_ids) {
865 pr_err("hv_gpci: CPU hotplug init failed\n");
866 return -1;
867 }
868
869 /* Migrate gpci events to the new target */
870 cpumask_set_cpu(cpu: target, dstp: &hv_gpci_cpumask);
871 perf_pmu_migrate_context(pmu: &h_gpci_pmu, src_cpu: cpu, dst_cpu: target);
872
873 return 0;
874}
875
876static int hv_gpci_cpu_hotplug_init(void)
877{
878 return cpuhp_setup_state(state: CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
879 name: "perf/powerpc/hv_gcpi:online",
880 startup: ppc_hv_gpci_cpu_online,
881 teardown: ppc_hv_gpci_cpu_offline);
882}
883
884static struct device_attribute *sysinfo_device_attr_create(int
885 sysinfo_interface_group_index, u32 req)
886{
887 struct device_attribute *attr = NULL;
888 unsigned long ret;
889 struct hv_gpci_request_buffer *arg;
890
891 if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR ||
892 sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) {
893 pr_info("Wrong interface group index for system information\n");
894 return NULL;
895 }
896
897 /* Check for given counter request value support */
898 arg = (void *)get_cpu_var(hv_gpci_reqb);
899 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
900
901 arg->params.counter_request = cpu_to_be32(req);
902
903 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
904 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
905
906 put_cpu_var(hv_gpci_reqb);
907
908 /*
909 * Add given counter request value attribute in the interface_attrs
910 * attribute array, only for valid return types.
911 */
912 if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) {
913 attr = kzalloc(size: sizeof(*attr), GFP_KERNEL);
914 if (!attr)
915 return NULL;
916
917 sysfs_attr_init(&attr->attr);
918 attr->attr.mode = 0444;
919
920 switch (sysinfo_interface_group_index) {
921 case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR:
922 attr->attr.name = "processor_bus_topology";
923 attr->show = processor_bus_topology_show;
924 break;
925 case INTERFACE_PROCESSOR_CONFIG_ATTR:
926 attr->attr.name = "processor_config";
927 attr->show = processor_config_show;
928 break;
929 case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR:
930 attr->attr.name = "affinity_domain_via_virtual_processor";
931 attr->show = affinity_domain_via_virtual_processor_show;
932 break;
933 case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR:
934 attr->attr.name = "affinity_domain_via_domain";
935 attr->show = affinity_domain_via_domain_show;
936 break;
937 case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR:
938 attr->attr.name = "affinity_domain_via_partition";
939 attr->show = affinity_domain_via_partition_show;
940 break;
941 }
942 } else
943 pr_devel("hcall failed, with error: 0x%lx\n", ret);
944
945 return attr;
946}
947
948static void add_sysinfo_interface_files(void)
949{
950 int sysfs_count;
951 struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR];
952 int i;
953
954 sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR;
955
956 /* Get device attribute for a given counter request value */
957 for (i = 0; i < sysfs_count; i++) {
958 attr[i] = sysinfo_device_attr_create(sysinfo_interface_group_index: i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR,
959 req: sysinfo_counter_request[i]);
960
961 if (!attr[i])
962 goto out;
963 }
964
965 /* Add sysinfo interface attributes in the interface_attrs attribute array */
966 for (i = 0; i < sysfs_count; i++)
967 interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr;
968
969 return;
970
971out:
972 /*
973 * The sysinfo interface attributes will be added, only if hcall passed for
974 * all the counter request values. Free the device attribute array incase
975 * of any hcall failure.
976 */
977 if (i > 0) {
978 while (i >= 0) {
979 kfree(objp: attr[i]);
980 i--;
981 }
982 }
983}
984
985static int hv_gpci_init(void)
986{
987 int r;
988 unsigned long hret;
989 struct hv_perf_caps caps;
990 struct hv_gpci_request_buffer *arg;
991
992 hv_gpci_assert_offsets_correct();
993
994 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
995 pr_debug("not a virtualized system, not enabling\n");
996 return -ENODEV;
997 }
998
999 hret = hv_perf_caps_get(caps: &caps);
1000 if (hret) {
1001 pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
1002 hret);
1003 return -ENODEV;
1004 }
1005
1006 /* init cpuhotplug */
1007 r = hv_gpci_cpu_hotplug_init();
1008 if (r)
1009 return r;
1010
1011 /* sampling not supported */
1012 h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1013
1014 arg = (void *)get_cpu_var(hv_gpci_reqb);
1015 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
1016
1017 /*
1018 * hcall H_GET_PERF_COUNTER_INFO populates the output
1019 * counter_info_version value based on the system hypervisor.
1020 * Pass the counter request 0x10 corresponds to request type
1021 * 'Dispatch_timebase_by_processor', to get the supported
1022 * counter_info_version.
1023 */
1024 arg->params.counter_request = cpu_to_be32(0x10);
1025
1026 r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
1027 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
1028 if (r) {
1029 pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r);
1030 arg->params.counter_info_version_out = 0x8;
1031 }
1032
1033 /*
1034 * Use counter_info_version_out value to assign
1035 * required hv-gpci event list.
1036 */
1037 if (arg->params.counter_info_version_out >= 0x8)
1038 event_group.attrs = hv_gpci_event_attrs;
1039 else
1040 event_group.attrs = hv_gpci_event_attrs_v6;
1041
1042 put_cpu_var(hv_gpci_reqb);
1043
1044 r = perf_pmu_register(pmu: &h_gpci_pmu, name: h_gpci_pmu.name, type: -1);
1045 if (r)
1046 return r;
1047
1048 /* sysinfo interface files are only available for power10 and above platforms */
1049 if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10)
1050 add_sysinfo_interface_files();
1051
1052 return 0;
1053}
1054
1055device_initcall(hv_gpci_init);
1056

source code of linux/arch/powerpc/perf/hv-gpci.c