| 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | /* |
| 3 | * Perf interface to expose Dispatch Trace Log counters. |
| 4 | * |
| 5 | * Copyright (C) 2024 Kajol Jain, IBM Corporation |
| 6 | */ |
| 7 | |
| 8 | #ifdef CONFIG_PPC_SPLPAR |
| 9 | #define pr_fmt(fmt) "vpa_dtl: " fmt |
| 10 | |
| 11 | #include <asm/dtl.h> |
| 12 | #include <linux/perf_event.h> |
| 13 | #include <asm/plpar_wrappers.h> |
| 14 | #include <linux/vmalloc.h> |
| 15 | |
| 16 | #define EVENT(_name, _code) enum{_name = _code} |
| 17 | |
| 18 | /* |
| 19 | * Based on Power Architecture Platform Reference(PAPR) documentation, |
| 20 | * Table 14.14. Per Virtual Processor Area, below Dispatch Trace Log(DTL) |
| 21 | * Enable Mask used to get corresponding virtual processor dispatch |
| 22 | * to preempt traces: |
| 23 | * DTL_CEDE(0x1): Trace voluntary (OS initiated) virtual |
| 24 | * processor waits |
| 25 | * DTL_PREEMPT(0x2): Trace time slice preempts |
| 26 | * DTL_FAULT(0x4): Trace virtual partition memory page |
| 27 | faults. |
| 28 | * DTL_ALL(0x7): Trace all (DTL_CEDE | DTL_PREEMPT | DTL_FAULT) |
| 29 | * |
| 30 | * Event codes based on Dispatch Trace Log Enable Mask. |
| 31 | */ |
| 32 | EVENT(DTL_CEDE, 0x1); |
| 33 | EVENT(DTL_PREEMPT, 0x2); |
| 34 | EVENT(DTL_FAULT, 0x4); |
| 35 | EVENT(DTL_ALL, 0x7); |
| 36 | |
| 37 | GENERIC_EVENT_ATTR(dtl_cede, DTL_CEDE); |
| 38 | GENERIC_EVENT_ATTR(dtl_preempt, DTL_PREEMPT); |
| 39 | GENERIC_EVENT_ATTR(dtl_fault, DTL_FAULT); |
| 40 | GENERIC_EVENT_ATTR(dtl_all, DTL_ALL); |
| 41 | |
| 42 | PMU_FORMAT_ATTR(event, "config:0-7" ); |
| 43 | |
| 44 | static struct attribute *events_attr[] = { |
| 45 | GENERIC_EVENT_PTR(DTL_CEDE), |
| 46 | GENERIC_EVENT_PTR(DTL_PREEMPT), |
| 47 | GENERIC_EVENT_PTR(DTL_FAULT), |
| 48 | GENERIC_EVENT_PTR(DTL_ALL), |
| 49 | NULL |
| 50 | }; |
| 51 | |
| 52 | static struct attribute_group event_group = { |
| 53 | .name = "events" , |
| 54 | .attrs = events_attr, |
| 55 | }; |
| 56 | |
| 57 | static struct attribute *format_attrs[] = { |
| 58 | &format_attr_event.attr, |
| 59 | NULL, |
| 60 | }; |
| 61 | |
| 62 | static const struct attribute_group format_group = { |
| 63 | .name = "format" , |
| 64 | .attrs = format_attrs, |
| 65 | }; |
| 66 | |
| 67 | static const struct attribute_group *attr_groups[] = { |
| 68 | &format_group, |
| 69 | &event_group, |
| 70 | NULL, |
| 71 | }; |
| 72 | |
| 73 | struct vpa_dtl { |
| 74 | struct dtl_entry *buf; |
| 75 | u64 last_idx; |
| 76 | }; |
| 77 | |
| 78 | struct vpa_pmu_ctx { |
| 79 | struct perf_output_handle handle; |
| 80 | }; |
| 81 | |
| 82 | struct vpa_pmu_buf { |
| 83 | int nr_pages; |
| 84 | bool snapshot; |
| 85 | u64 *base; |
| 86 | u64 size; |
| 87 | u64 head; |
| 88 | u64 head_size; |
| 89 | /* boot timebase and frequency needs to be saved only at once */ |
| 90 | int boottb_freq_saved; |
| 91 | u64 threshold; |
| 92 | bool full; |
| 93 | }; |
| 94 | |
| 95 | /* |
| 96 | * To corelate each DTL entry with other events across CPU's, |
| 97 | * we need to map timebase from "struct dtl_entry" which phyp |
| 98 | * provides with boot timebase. This also needs timebase frequency. |
| 99 | * Formula is: ((timbase from DTL entry - boot time) / frequency) |
| 100 | * |
| 101 | * To match with size of "struct dtl_entry" to ease post processing, |
| 102 | * padded 24 bytes to the structure. |
| 103 | */ |
| 104 | struct boottb_freq { |
| 105 | u64 boot_tb; |
| 106 | u64 tb_freq; |
| 107 | u64 timebase; |
| 108 | u64 padded[3]; |
| 109 | }; |
| 110 | |
| 111 | static DEFINE_PER_CPU(struct vpa_pmu_ctx, vpa_pmu_ctx); |
| 112 | static DEFINE_PER_CPU(struct vpa_dtl, vpa_dtl_cpu); |
| 113 | |
| 114 | /* variable to capture reference count for the active dtl threads */ |
| 115 | static int dtl_global_refc; |
| 116 | static spinlock_t dtl_global_lock = __SPIN_LOCK_UNLOCKED(dtl_global_lock); |
| 117 | |
| 118 | /* |
| 119 | * Capture DTL data in AUX buffer |
| 120 | */ |
| 121 | static void vpa_dtl_capture_aux(long *n_entries, struct vpa_pmu_buf *buf, |
| 122 | struct vpa_dtl *dtl, int index) |
| 123 | { |
| 124 | struct dtl_entry *aux_copy_buf = (struct dtl_entry *)buf->base; |
| 125 | |
| 126 | /* |
| 127 | * check if there is enough space to contain the |
| 128 | * DTL data. If not, save the data for available |
| 129 | * memory and set full to true. |
| 130 | */ |
| 131 | if (buf->head + *n_entries >= buf->threshold) { |
| 132 | *n_entries = buf->threshold - buf->head; |
| 133 | buf->full = 1; |
| 134 | } |
| 135 | |
| 136 | /* |
| 137 | * Copy to AUX buffer from per-thread address |
| 138 | */ |
| 139 | memcpy(aux_copy_buf + buf->head, &dtl->buf[index], *n_entries * sizeof(struct dtl_entry)); |
| 140 | |
| 141 | if (buf->full) { |
| 142 | /* |
| 143 | * Set head of private aux to zero when buffer is full |
| 144 | * so that next data will be copied to beginning of the |
| 145 | * buffer |
| 146 | */ |
| 147 | buf->head = 0; |
| 148 | return; |
| 149 | } |
| 150 | |
| 151 | buf->head += *n_entries; |
| 152 | |
| 153 | return; |
| 154 | } |
| 155 | |
| 156 | /* |
| 157 | * Function to dump the dispatch trace log buffer data to the |
| 158 | * perf data. |
| 159 | * |
| 160 | * perf_aux_output_begin: This function is called before writing |
| 161 | * to AUX area. This returns the pointer to aux area private structure, |
| 162 | * ie "struct vpa_pmu_buf" here which is set in setup_aux() function. |
| 163 | * The function obtains the output handle (used in perf_aux_output_end). |
| 164 | * when capture completes in vpa_dtl_capture_aux(), call perf_aux_output_end() |
| 165 | * to commit the recorded data. |
| 166 | * |
| 167 | * perf_aux_output_end: This function commits data by adjusting the |
| 168 | * aux_head of "struct perf_buffer". aux_tail will be moved in perf tools |
| 169 | * side when writing the data from aux buffer to perf.data file in disk. |
| 170 | * |
| 171 | * Here in the private aux structure, we maintain head to know where |
| 172 | * to copy data next time in the PMU driver. vpa_pmu_buf->head is moved to |
| 173 | * maintain the aux head for PMU driver. It is responsiblity of PMU |
| 174 | * driver to make sure data is copied between perf_aux_output_begin and |
| 175 | * perf_aux_output_end. |
| 176 | * |
| 177 | * After data is copied in vpa_dtl_capture_aux() function, perf_aux_output_end() |
| 178 | * is called to move the aux->head of "struct perf_buffer" to indicate size of |
| 179 | * data in aux buffer. This will post a PERF_RECORD_AUX into the perf buffer. |
| 180 | * Data will be written to disk only when the allocated buffer is full. |
| 181 | * |
| 182 | * By this approach, all the DTL data will be present as-is in the |
| 183 | * perf.data. The data will be pre-processed in perf tools side when doing |
| 184 | * perf report/perf script and this will avoid time taken to create samples |
| 185 | * in the kernel space. |
| 186 | */ |
| 187 | static void vpa_dtl_dump_sample_data(struct perf_event *event) |
| 188 | { |
| 189 | u64 cur_idx, last_idx, i; |
| 190 | u64 boot_tb; |
| 191 | struct boottb_freq boottb_freq; |
| 192 | |
| 193 | /* actual number of entries read */ |
| 194 | long n_read = 0, read_size = 0; |
| 195 | |
| 196 | /* number of entries added to dtl buffer */ |
| 197 | long n_req; |
| 198 | |
| 199 | struct vpa_pmu_ctx *vpa_ctx = this_cpu_ptr(&vpa_pmu_ctx); |
| 200 | |
| 201 | struct vpa_pmu_buf *aux_buf; |
| 202 | |
| 203 | struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu); |
| 204 | u64 size; |
| 205 | |
| 206 | cur_idx = be64_to_cpu(lppaca_of(event->cpu).dtl_idx); |
| 207 | last_idx = dtl->last_idx; |
| 208 | |
| 209 | if (last_idx + N_DISPATCH_LOG <= cur_idx) |
| 210 | last_idx = cur_idx - N_DISPATCH_LOG + 1; |
| 211 | |
| 212 | n_req = cur_idx - last_idx; |
| 213 | |
| 214 | /* no new entry added to the buffer, return */ |
| 215 | if (n_req <= 0) |
| 216 | return; |
| 217 | |
| 218 | dtl->last_idx = last_idx + n_req; |
| 219 | boot_tb = get_boot_tb(); |
| 220 | |
| 221 | i = last_idx % N_DISPATCH_LOG; |
| 222 | |
| 223 | aux_buf = perf_aux_output_begin(&vpa_ctx->handle, event); |
| 224 | if (!aux_buf) { |
| 225 | pr_debug("returning. no aux\n" ); |
| 226 | return; |
| 227 | } |
| 228 | |
| 229 | if (!aux_buf->boottb_freq_saved) { |
| 230 | pr_debug("Copying boot tb to aux buffer: %lld\n" , boot_tb); |
| 231 | /* Save boot_tb to convert raw timebase to it's relative system boot time */ |
| 232 | boottb_freq.boot_tb = boot_tb; |
| 233 | /* Save tb_ticks_per_sec to convert timebase to sec */ |
| 234 | boottb_freq.tb_freq = tb_ticks_per_sec; |
| 235 | boottb_freq.timebase = 0; |
| 236 | memcpy(aux_buf->base, &boottb_freq, sizeof(boottb_freq)); |
| 237 | aux_buf->head += 1; |
| 238 | aux_buf->boottb_freq_saved = 1; |
| 239 | n_read += 1; |
| 240 | } |
| 241 | |
| 242 | /* read the tail of the buffer if we've wrapped */ |
| 243 | if (i + n_req > N_DISPATCH_LOG) { |
| 244 | read_size = N_DISPATCH_LOG - i; |
| 245 | vpa_dtl_capture_aux(&read_size, aux_buf, dtl, i); |
| 246 | n_req -= read_size; |
| 247 | n_read += read_size; |
| 248 | i = 0; |
| 249 | if (aux_buf->full) { |
| 250 | size = (n_read * sizeof(struct dtl_entry)); |
| 251 | if ((size + aux_buf->head_size) > aux_buf->size) { |
| 252 | size = aux_buf->size - aux_buf->head_size; |
| 253 | perf_aux_output_end(&vpa_ctx->handle, size); |
| 254 | aux_buf->head = 0; |
| 255 | aux_buf->head_size = 0; |
| 256 | } else { |
| 257 | aux_buf->head_size += (n_read * sizeof(struct dtl_entry)); |
| 258 | perf_aux_output_end(&vpa_ctx->handle, n_read * sizeof(struct dtl_entry)); |
| 259 | } |
| 260 | goto out; |
| 261 | } |
| 262 | } |
| 263 | |
| 264 | /* .. and now the head */ |
| 265 | vpa_dtl_capture_aux(&n_req, aux_buf, dtl, i); |
| 266 | |
| 267 | size = ((n_req + n_read) * sizeof(struct dtl_entry)); |
| 268 | if ((size + aux_buf->head_size) > aux_buf->size) { |
| 269 | size = aux_buf->size - aux_buf->head_size; |
| 270 | perf_aux_output_end(&vpa_ctx->handle, size); |
| 271 | aux_buf->head = 0; |
| 272 | aux_buf->head_size = 0; |
| 273 | } else { |
| 274 | aux_buf->head_size += ((n_req + n_read) * sizeof(struct dtl_entry)); |
| 275 | /* Move the aux->head to indicate size of data in aux buffer */ |
| 276 | perf_aux_output_end(&vpa_ctx->handle, (n_req + n_read) * sizeof(struct dtl_entry)); |
| 277 | } |
| 278 | out: |
| 279 | aux_buf->full = 0; |
| 280 | } |
| 281 | |
| 282 | /* |
| 283 | * The VPA Dispatch Trace log counters do not interrupt on overflow. |
| 284 | * Therefore, the kernel needs to poll the counters to avoid missing |
| 285 | * an overflow using hrtimer. The timer interval is based on sample_period |
| 286 | * count provided by user, and minimum interval is 1 millisecond. |
| 287 | */ |
| 288 | static enum hrtimer_restart vpa_dtl_hrtimer_handle(struct hrtimer *hrtimer) |
| 289 | { |
| 290 | struct perf_event *event; |
| 291 | u64 period; |
| 292 | |
| 293 | event = container_of(hrtimer, struct perf_event, hw.hrtimer); |
| 294 | |
| 295 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
| 296 | return HRTIMER_NORESTART; |
| 297 | |
| 298 | vpa_dtl_dump_sample_data(event); |
| 299 | period = max_t(u64, NSEC_PER_MSEC, event->hw.sample_period); |
| 300 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); |
| 301 | |
| 302 | return HRTIMER_RESTART; |
| 303 | } |
| 304 | |
| 305 | static void vpa_dtl_start_hrtimer(struct perf_event *event) |
| 306 | { |
| 307 | u64 period; |
| 308 | struct hw_perf_event *hwc = &event->hw; |
| 309 | |
| 310 | period = max_t(u64, NSEC_PER_MSEC, hwc->sample_period); |
| 311 | hrtimer_start(&hwc->hrtimer, ns_to_ktime(period), HRTIMER_MODE_REL_PINNED); |
| 312 | } |
| 313 | |
| 314 | static void vpa_dtl_stop_hrtimer(struct perf_event *event) |
| 315 | { |
| 316 | struct hw_perf_event *hwc = &event->hw; |
| 317 | |
| 318 | hrtimer_cancel(&hwc->hrtimer); |
| 319 | } |
| 320 | |
| 321 | static void vpa_dtl_reset_global_refc(struct perf_event *event) |
| 322 | { |
| 323 | spin_lock(&dtl_global_lock); |
| 324 | dtl_global_refc--; |
| 325 | if (dtl_global_refc <= 0) { |
| 326 | dtl_global_refc = 0; |
| 327 | up_write(&dtl_access_lock); |
| 328 | } |
| 329 | spin_unlock(&dtl_global_lock); |
| 330 | } |
| 331 | |
| 332 | static int vpa_dtl_mem_alloc(int cpu) |
| 333 | { |
| 334 | struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, cpu); |
| 335 | struct dtl_entry *buf = NULL; |
| 336 | |
| 337 | /* Check for dispatch trace log buffer cache */ |
| 338 | if (!dtl_cache) |
| 339 | return -ENOMEM; |
| 340 | |
| 341 | buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL | GFP_ATOMIC, cpu_to_node(cpu)); |
| 342 | if (!buf) { |
| 343 | pr_warn("buffer allocation failed for cpu %d\n" , cpu); |
| 344 | return -ENOMEM; |
| 345 | } |
| 346 | dtl->buf = buf; |
| 347 | return 0; |
| 348 | } |
| 349 | |
| 350 | static int vpa_dtl_event_init(struct perf_event *event) |
| 351 | { |
| 352 | struct hw_perf_event *hwc = &event->hw; |
| 353 | |
| 354 | /* test the event attr type for PMU enumeration */ |
| 355 | if (event->attr.type != event->pmu->type) |
| 356 | return -ENOENT; |
| 357 | |
| 358 | if (!perfmon_capable()) |
| 359 | return -EACCES; |
| 360 | |
| 361 | /* Return if this is a counting event */ |
| 362 | if (!is_sampling_event(event)) |
| 363 | return -EOPNOTSUPP; |
| 364 | |
| 365 | /* no branch sampling */ |
| 366 | if (has_branch_stack(event)) |
| 367 | return -EOPNOTSUPP; |
| 368 | |
| 369 | /* Invalid eventcode */ |
| 370 | switch (event->attr.config) { |
| 371 | case DTL_LOG_CEDE: |
| 372 | case DTL_LOG_PREEMPT: |
| 373 | case DTL_LOG_FAULT: |
| 374 | case DTL_LOG_ALL: |
| 375 | break; |
| 376 | default: |
| 377 | return -EINVAL; |
| 378 | } |
| 379 | |
| 380 | spin_lock(&dtl_global_lock); |
| 381 | |
| 382 | /* |
| 383 | * To ensure there are no other conflicting dtl users |
| 384 | * (example: /proc/powerpc/vcpudispatch_stats or debugfs dtl), |
| 385 | * below code try to take the dtl_access_lock. |
| 386 | * The dtl_access_lock is a rwlock defined in dtl.h, which is used |
| 387 | * to unsure there is no conflicting dtl users. |
| 388 | * Based on below code, vpa_dtl pmu tries to take write access lock |
| 389 | * and also checks for dtl_global_refc, to make sure that the |
| 390 | * dtl_access_lock is taken by vpa_dtl pmu interface. |
| 391 | */ |
| 392 | if (dtl_global_refc == 0 && !down_write_trylock(&dtl_access_lock)) { |
| 393 | spin_unlock(&dtl_global_lock); |
| 394 | return -EBUSY; |
| 395 | } |
| 396 | |
| 397 | /* Allocate dtl buffer memory */ |
| 398 | if (vpa_dtl_mem_alloc(event->cpu)) { |
| 399 | spin_unlock(&dtl_global_lock); |
| 400 | return -ENOMEM; |
| 401 | } |
| 402 | |
| 403 | /* |
| 404 | * Increment the number of active vpa_dtl pmu threads. The |
| 405 | * dtl_global_refc is used to keep count of cpu threads that |
| 406 | * currently capturing dtl data using vpa_dtl pmu interface. |
| 407 | */ |
| 408 | dtl_global_refc++; |
| 409 | |
| 410 | spin_unlock(&dtl_global_lock); |
| 411 | |
| 412 | hrtimer_setup(&hwc->hrtimer, vpa_dtl_hrtimer_handle, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
| 413 | |
| 414 | /* |
| 415 | * Since hrtimers have a fixed rate, we can do a static freq->period |
| 416 | * mapping and avoid the whole period adjust feedback stuff. |
| 417 | */ |
| 418 | if (event->attr.freq) { |
| 419 | long freq = event->attr.sample_freq; |
| 420 | |
| 421 | event->attr.sample_period = NSEC_PER_SEC / freq; |
| 422 | hwc->sample_period = event->attr.sample_period; |
| 423 | local64_set(&hwc->period_left, hwc->sample_period); |
| 424 | hwc->last_period = hwc->sample_period; |
| 425 | event->attr.freq = 0; |
| 426 | } |
| 427 | |
| 428 | event->destroy = vpa_dtl_reset_global_refc; |
| 429 | return 0; |
| 430 | } |
| 431 | |
| 432 | static int vpa_dtl_event_add(struct perf_event *event, int flags) |
| 433 | { |
| 434 | int ret, hwcpu; |
| 435 | unsigned long addr; |
| 436 | struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu); |
| 437 | |
| 438 | /* |
| 439 | * Register our dtl buffer with the hypervisor. The |
| 440 | * HV expects the buffer size to be passed in the second |
| 441 | * word of the buffer. Refer section '14.11.3.2. H_REGISTER_VPA' |
| 442 | * from PAPR for more information. |
| 443 | */ |
| 444 | ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES); |
| 445 | dtl->last_idx = 0; |
| 446 | |
| 447 | hwcpu = get_hard_smp_processor_id(event->cpu); |
| 448 | addr = __pa(dtl->buf); |
| 449 | |
| 450 | ret = register_dtl(hwcpu, addr); |
| 451 | if (ret) { |
| 452 | pr_warn("DTL registration for cpu %d (hw %d) failed with %d\n" , |
| 453 | event->cpu, hwcpu, ret); |
| 454 | return ret; |
| 455 | } |
| 456 | |
| 457 | /* set our initial buffer indices */ |
| 458 | lppaca_of(event->cpu).dtl_idx = 0; |
| 459 | |
| 460 | /* |
| 461 | * Ensure that our updates to the lppaca fields have |
| 462 | * occurred before we actually enable the logging |
| 463 | */ |
| 464 | smp_wmb(); |
| 465 | |
| 466 | /* enable event logging */ |
| 467 | lppaca_of(event->cpu).dtl_enable_mask = event->attr.config; |
| 468 | |
| 469 | vpa_dtl_start_hrtimer(event); |
| 470 | |
| 471 | return 0; |
| 472 | } |
| 473 | |
| 474 | static void vpa_dtl_event_del(struct perf_event *event, int flags) |
| 475 | { |
| 476 | int hwcpu = get_hard_smp_processor_id(event->cpu); |
| 477 | struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu); |
| 478 | |
| 479 | vpa_dtl_stop_hrtimer(event); |
| 480 | unregister_dtl(hwcpu); |
| 481 | kmem_cache_free(dtl_cache, dtl->buf); |
| 482 | dtl->buf = NULL; |
| 483 | lppaca_of(event->cpu).dtl_enable_mask = 0x0; |
| 484 | } |
| 485 | |
| 486 | /* |
| 487 | * This function definition is empty as vpa_dtl_dump_sample_data |
| 488 | * is used to parse and dump the dispatch trace log data, |
| 489 | * to perf data. |
| 490 | */ |
| 491 | static void vpa_dtl_event_read(struct perf_event *event) |
| 492 | { |
| 493 | } |
| 494 | |
| 495 | /* |
| 496 | * Set up pmu-private data structures for an AUX area |
| 497 | * **pages contains the aux buffer allocated for this event |
| 498 | * for the corresponding cpu. rb_alloc_aux uses "alloc_pages_node" |
| 499 | * and returns pointer to each page address. Map these pages to |
| 500 | * contiguous space using vmap and use that as base address. |
| 501 | * |
| 502 | * The aux private data structure ie, "struct vpa_pmu_buf" mainly |
| 503 | * saves |
| 504 | * - buf->base: aux buffer base address |
| 505 | * - buf->head: offset from base address where data will be written to. |
| 506 | * - buf->size: Size of allocated memory |
| 507 | */ |
| 508 | static void *vpa_dtl_setup_aux(struct perf_event *event, void **pages, |
| 509 | int nr_pages, bool snapshot) |
| 510 | { |
| 511 | int i, cpu = event->cpu; |
| 512 | struct vpa_pmu_buf *buf __free(kfree) = NULL; |
| 513 | struct page **pglist __free(kfree) = NULL; |
| 514 | |
| 515 | /* We need at least one page for this to work. */ |
| 516 | if (!nr_pages) |
| 517 | return NULL; |
| 518 | |
| 519 | if (cpu == -1) |
| 520 | cpu = raw_smp_processor_id(); |
| 521 | |
| 522 | buf = kzalloc_node(sizeof(*buf), GFP_KERNEL, cpu_to_node(cpu)); |
| 523 | if (!buf) |
| 524 | return NULL; |
| 525 | |
| 526 | pglist = kcalloc(nr_pages, sizeof(*pglist), GFP_KERNEL); |
| 527 | if (!pglist) |
| 528 | return NULL; |
| 529 | |
| 530 | for (i = 0; i < nr_pages; ++i) |
| 531 | pglist[i] = virt_to_page(pages[i]); |
| 532 | |
| 533 | buf->base = vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL); |
| 534 | if (!buf->base) |
| 535 | return NULL; |
| 536 | |
| 537 | buf->nr_pages = nr_pages; |
| 538 | buf->snapshot = false; |
| 539 | |
| 540 | buf->size = nr_pages << PAGE_SHIFT; |
| 541 | buf->head = 0; |
| 542 | buf->head_size = 0; |
| 543 | buf->boottb_freq_saved = 0; |
| 544 | buf->threshold = ((buf->size - 32) / sizeof(struct dtl_entry)); |
| 545 | return no_free_ptr(buf); |
| 546 | } |
| 547 | |
| 548 | /* |
| 549 | * free pmu-private AUX data structures |
| 550 | */ |
| 551 | static void vpa_dtl_free_aux(void *aux) |
| 552 | { |
| 553 | struct vpa_pmu_buf *buf = aux; |
| 554 | |
| 555 | vunmap(buf->base); |
| 556 | kfree(buf); |
| 557 | } |
| 558 | |
| 559 | static struct pmu vpa_dtl_pmu = { |
| 560 | .task_ctx_nr = perf_invalid_context, |
| 561 | |
| 562 | .name = "vpa_dtl" , |
| 563 | .attr_groups = attr_groups, |
| 564 | .event_init = vpa_dtl_event_init, |
| 565 | .add = vpa_dtl_event_add, |
| 566 | .del = vpa_dtl_event_del, |
| 567 | .read = vpa_dtl_event_read, |
| 568 | .setup_aux = vpa_dtl_setup_aux, |
| 569 | .free_aux = vpa_dtl_free_aux, |
| 570 | .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_EXCLUSIVE, |
| 571 | }; |
| 572 | |
| 573 | static int vpa_dtl_init(void) |
| 574 | { |
| 575 | int r; |
| 576 | |
| 577 | if (!firmware_has_feature(FW_FEATURE_SPLPAR)) { |
| 578 | pr_debug("not a shared virtualized system, not enabling\n" ); |
| 579 | return -ENODEV; |
| 580 | } |
| 581 | |
| 582 | /* This driver is intended only for L1 host. */ |
| 583 | if (is_kvm_guest()) { |
| 584 | pr_debug("Only supported for L1 host system\n" ); |
| 585 | return -ENODEV; |
| 586 | } |
| 587 | |
| 588 | r = perf_pmu_register(&vpa_dtl_pmu, vpa_dtl_pmu.name, -1); |
| 589 | if (r) |
| 590 | return r; |
| 591 | |
| 592 | return 0; |
| 593 | } |
| 594 | |
| 595 | device_initcall(vpa_dtl_init); |
| 596 | #endif //CONFIG_PPC_SPLPAR |
| 597 | |