1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Virtual Processor Dispatch Trace Log |
4 | * |
5 | * (C) Copyright IBM Corporation 2009 |
6 | * |
7 | * Author: Jeremy Kerr <jk@ozlabs.org> |
8 | */ |
9 | |
10 | #include <linux/slab.h> |
11 | #include <linux/spinlock.h> |
12 | #include <asm/smp.h> |
13 | #include <linux/uaccess.h> |
14 | #include <linux/debugfs.h> |
15 | #include <asm/firmware.h> |
16 | #include <asm/dtl.h> |
17 | #include <asm/lppaca.h> |
18 | #include <asm/plpar_wrappers.h> |
19 | #include <asm/machdep.h> |
20 | |
21 | #ifdef CONFIG_DTL |
22 | struct dtl { |
23 | struct dtl_entry *buf; |
24 | int cpu; |
25 | int buf_entries; |
26 | u64 last_idx; |
27 | spinlock_t lock; |
28 | }; |
29 | static DEFINE_PER_CPU(struct dtl, cpu_dtl); |
30 | |
31 | static u8 dtl_event_mask = DTL_LOG_ALL; |
32 | |
33 | |
34 | /* |
35 | * Size of per-cpu log buffers. Firmware requires that the buffer does |
36 | * not cross a 4k boundary. |
37 | */ |
38 | static int dtl_buf_entries = N_DISPATCH_LOG; |
39 | |
40 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
41 | |
42 | /* |
43 | * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls |
44 | * reading from the dispatch trace log. If other code wants to consume |
45 | * DTL entries, it can set this pointer to a function that will get |
46 | * called once for each DTL entry that gets processed. |
47 | */ |
48 | static void (*dtl_consumer)(struct dtl_entry *entry, u64 index); |
49 | |
50 | struct dtl_ring { |
51 | u64 write_index; |
52 | struct dtl_entry *write_ptr; |
53 | struct dtl_entry *buf; |
54 | struct dtl_entry *buf_end; |
55 | }; |
56 | |
57 | static DEFINE_PER_CPU(struct dtl_ring, dtl_rings); |
58 | |
59 | static atomic_t dtl_count; |
60 | |
61 | /* |
62 | * The cpu accounting code controls the DTL ring buffer, and we get |
63 | * given entries as they are processed. |
64 | */ |
65 | static void consume_dtle(struct dtl_entry *dtle, u64 index) |
66 | { |
67 | struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings); |
68 | struct dtl_entry *wp = dtlr->write_ptr; |
69 | struct lppaca *vpa = local_paca->lppaca_ptr; |
70 | |
71 | if (!wp) |
72 | return; |
73 | |
74 | *wp = *dtle; |
75 | barrier(); |
76 | |
77 | /* check for hypervisor ring buffer overflow, ignore this entry if so */ |
78 | if (index + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) |
79 | return; |
80 | |
81 | ++wp; |
82 | if (wp == dtlr->buf_end) |
83 | wp = dtlr->buf; |
84 | dtlr->write_ptr = wp; |
85 | |
86 | /* incrementing write_index makes the new entry visible */ |
87 | smp_wmb(); |
88 | ++dtlr->write_index; |
89 | } |
90 | |
91 | static int dtl_start(struct dtl *dtl) |
92 | { |
93 | struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu); |
94 | |
95 | dtlr->buf = dtl->buf; |
96 | dtlr->buf_end = dtl->buf + dtl->buf_entries; |
97 | dtlr->write_index = 0; |
98 | |
99 | /* setting write_ptr enables logging into our buffer */ |
100 | smp_wmb(); |
101 | dtlr->write_ptr = dtl->buf; |
102 | |
103 | /* enable event logging */ |
104 | lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask; |
105 | |
106 | dtl_consumer = consume_dtle; |
107 | atomic_inc(&dtl_count); |
108 | return 0; |
109 | } |
110 | |
111 | static void dtl_stop(struct dtl *dtl) |
112 | { |
113 | struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu); |
114 | |
115 | dtlr->write_ptr = NULL; |
116 | smp_wmb(); |
117 | |
118 | dtlr->buf = NULL; |
119 | |
120 | /* restore dtl_enable_mask */ |
121 | lppaca_of(dtl->cpu).dtl_enable_mask = DTL_LOG_PREEMPT; |
122 | |
123 | if (atomic_dec_and_test(&dtl_count)) |
124 | dtl_consumer = NULL; |
125 | } |
126 | |
127 | static u64 dtl_current_index(struct dtl *dtl) |
128 | { |
129 | return per_cpu(dtl_rings, dtl->cpu).write_index; |
130 | } |
131 | |
132 | #else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ |
133 | |
134 | static int dtl_start(struct dtl *dtl) |
135 | { |
136 | unsigned long addr; |
137 | int ret, hwcpu; |
138 | |
139 | /* Register our dtl buffer with the hypervisor. The HV expects the |
140 | * buffer size to be passed in the second word of the buffer */ |
141 | ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES); |
142 | |
143 | hwcpu = get_hard_smp_processor_id(dtl->cpu); |
144 | addr = __pa(dtl->buf); |
145 | ret = register_dtl(hwcpu, addr); |
146 | if (ret) { |
147 | printk(KERN_WARNING "%s: DTL registration for cpu %d (hw %d) " |
148 | "failed with %d\n" , __func__, dtl->cpu, hwcpu, ret); |
149 | return -EIO; |
150 | } |
151 | |
152 | /* set our initial buffer indices */ |
153 | lppaca_of(dtl->cpu).dtl_idx = 0; |
154 | |
155 | /* ensure that our updates to the lppaca fields have occurred before |
156 | * we actually enable the logging */ |
157 | smp_wmb(); |
158 | |
159 | /* enable event logging */ |
160 | lppaca_of(dtl->cpu).dtl_enable_mask = dtl_event_mask; |
161 | |
162 | return 0; |
163 | } |
164 | |
165 | static void dtl_stop(struct dtl *dtl) |
166 | { |
167 | int hwcpu = get_hard_smp_processor_id(dtl->cpu); |
168 | |
169 | lppaca_of(dtl->cpu).dtl_enable_mask = 0x0; |
170 | |
171 | unregister_dtl(hwcpu); |
172 | } |
173 | |
174 | static u64 dtl_current_index(struct dtl *dtl) |
175 | { |
176 | return be64_to_cpu(lppaca_of(dtl->cpu).dtl_idx); |
177 | } |
178 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ |
179 | |
180 | static int dtl_enable(struct dtl *dtl) |
181 | { |
182 | long int n_entries; |
183 | long int rc; |
184 | struct dtl_entry *buf = NULL; |
185 | |
186 | if (!dtl_cache) |
187 | return -ENOMEM; |
188 | |
189 | /* only allow one reader */ |
190 | if (dtl->buf) |
191 | return -EBUSY; |
192 | |
193 | /* ensure there are no other conflicting dtl users */ |
194 | if (!read_trylock(&dtl_access_lock)) |
195 | return -EBUSY; |
196 | |
197 | n_entries = dtl_buf_entries; |
198 | buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu)); |
199 | if (!buf) { |
200 | printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n" , |
201 | __func__, dtl->cpu); |
202 | read_unlock(&dtl_access_lock); |
203 | return -ENOMEM; |
204 | } |
205 | |
206 | spin_lock(&dtl->lock); |
207 | rc = -EBUSY; |
208 | if (!dtl->buf) { |
209 | /* store the original allocation size for use during read */ |
210 | dtl->buf_entries = n_entries; |
211 | dtl->buf = buf; |
212 | dtl->last_idx = 0; |
213 | rc = dtl_start(dtl); |
214 | if (rc) |
215 | dtl->buf = NULL; |
216 | } |
217 | spin_unlock(&dtl->lock); |
218 | |
219 | if (rc) { |
220 | read_unlock(&dtl_access_lock); |
221 | kmem_cache_free(dtl_cache, buf); |
222 | } |
223 | |
224 | return rc; |
225 | } |
226 | |
227 | static void dtl_disable(struct dtl *dtl) |
228 | { |
229 | spin_lock(&dtl->lock); |
230 | dtl_stop(dtl); |
231 | kmem_cache_free(dtl_cache, dtl->buf); |
232 | dtl->buf = NULL; |
233 | dtl->buf_entries = 0; |
234 | spin_unlock(&dtl->lock); |
235 | read_unlock(&dtl_access_lock); |
236 | } |
237 | |
238 | /* file interface */ |
239 | |
240 | static int dtl_file_open(struct inode *inode, struct file *filp) |
241 | { |
242 | struct dtl *dtl = inode->i_private; |
243 | int rc; |
244 | |
245 | rc = dtl_enable(dtl); |
246 | if (rc) |
247 | return rc; |
248 | |
249 | filp->private_data = dtl; |
250 | return 0; |
251 | } |
252 | |
253 | static int dtl_file_release(struct inode *inode, struct file *filp) |
254 | { |
255 | struct dtl *dtl = inode->i_private; |
256 | dtl_disable(dtl); |
257 | return 0; |
258 | } |
259 | |
260 | static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len, |
261 | loff_t *pos) |
262 | { |
263 | long int rc, n_read, n_req, read_size; |
264 | struct dtl *dtl; |
265 | u64 cur_idx, last_idx, i; |
266 | |
267 | if ((len % sizeof(struct dtl_entry)) != 0) |
268 | return -EINVAL; |
269 | |
270 | dtl = filp->private_data; |
271 | |
272 | /* requested number of entries to read */ |
273 | n_req = len / sizeof(struct dtl_entry); |
274 | |
275 | /* actual number of entries read */ |
276 | n_read = 0; |
277 | |
278 | spin_lock(&dtl->lock); |
279 | |
280 | cur_idx = dtl_current_index(dtl); |
281 | last_idx = dtl->last_idx; |
282 | |
283 | if (last_idx + dtl->buf_entries <= cur_idx) |
284 | last_idx = cur_idx - dtl->buf_entries + 1; |
285 | |
286 | if (last_idx + n_req > cur_idx) |
287 | n_req = cur_idx - last_idx; |
288 | |
289 | if (n_req > 0) |
290 | dtl->last_idx = last_idx + n_req; |
291 | |
292 | spin_unlock(&dtl->lock); |
293 | |
294 | if (n_req <= 0) |
295 | return 0; |
296 | |
297 | i = last_idx % dtl->buf_entries; |
298 | |
299 | /* read the tail of the buffer if we've wrapped */ |
300 | if (i + n_req > dtl->buf_entries) { |
301 | read_size = dtl->buf_entries - i; |
302 | |
303 | rc = copy_to_user(buf, &dtl->buf[i], |
304 | read_size * sizeof(struct dtl_entry)); |
305 | if (rc) |
306 | return -EFAULT; |
307 | |
308 | i = 0; |
309 | n_req -= read_size; |
310 | n_read += read_size; |
311 | buf += read_size * sizeof(struct dtl_entry); |
312 | } |
313 | |
314 | /* .. and now the head */ |
315 | rc = copy_to_user(buf, &dtl->buf[i], n_req * sizeof(struct dtl_entry)); |
316 | if (rc) |
317 | return -EFAULT; |
318 | |
319 | n_read += n_req; |
320 | |
321 | return n_read * sizeof(struct dtl_entry); |
322 | } |
323 | |
324 | static const struct file_operations dtl_fops = { |
325 | .open = dtl_file_open, |
326 | .release = dtl_file_release, |
327 | .read = dtl_file_read, |
328 | .llseek = no_llseek, |
329 | }; |
330 | |
331 | static struct dentry *dtl_dir; |
332 | |
333 | static void dtl_setup_file(struct dtl *dtl) |
334 | { |
335 | char name[10]; |
336 | |
337 | sprintf(name, "cpu-%d" , dtl->cpu); |
338 | |
339 | debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops); |
340 | } |
341 | |
342 | static int dtl_init(void) |
343 | { |
344 | int i; |
345 | |
346 | if (!firmware_has_feature(FW_FEATURE_SPLPAR)) |
347 | return -ENODEV; |
348 | |
349 | /* set up common debugfs structure */ |
350 | |
351 | dtl_dir = debugfs_create_dir("dtl" , arch_debugfs_dir); |
352 | |
353 | debugfs_create_x8("dtl_event_mask" , 0600, dtl_dir, &dtl_event_mask); |
354 | debugfs_create_u32("dtl_buf_entries" , 0400, dtl_dir, &dtl_buf_entries); |
355 | |
356 | /* set up the per-cpu log structures */ |
357 | for_each_possible_cpu(i) { |
358 | struct dtl *dtl = &per_cpu(cpu_dtl, i); |
359 | spin_lock_init(&dtl->lock); |
360 | dtl->cpu = i; |
361 | |
362 | dtl_setup_file(dtl); |
363 | } |
364 | |
365 | return 0; |
366 | } |
367 | machine_arch_initcall(pseries, dtl_init); |
368 | #endif /* CONFIG_DTL */ |
369 | |
370 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
371 | /* |
372 | * Scan the dispatch trace log and count up the stolen time. |
373 | * Should be called with interrupts disabled. |
374 | */ |
375 | static notrace u64 scan_dispatch_log(u64 stop_tb) |
376 | { |
377 | u64 i = local_paca->dtl_ridx; |
378 | struct dtl_entry *dtl = local_paca->dtl_curr; |
379 | struct dtl_entry *dtl_end = local_paca->dispatch_log_end; |
380 | struct lppaca *vpa = local_paca->lppaca_ptr; |
381 | u64 tb_delta; |
382 | u64 stolen = 0; |
383 | u64 dtb; |
384 | |
385 | if (!dtl) |
386 | return 0; |
387 | |
388 | if (i == be64_to_cpu(vpa->dtl_idx)) |
389 | return 0; |
390 | while (i < be64_to_cpu(vpa->dtl_idx)) { |
391 | dtb = be64_to_cpu(dtl->timebase); |
392 | tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) + |
393 | be32_to_cpu(dtl->ready_to_enqueue_time); |
394 | barrier(); |
395 | if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) { |
396 | /* buffer has overflowed */ |
397 | i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG; |
398 | dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); |
399 | continue; |
400 | } |
401 | if (dtb > stop_tb) |
402 | break; |
403 | #ifdef CONFIG_DTL |
404 | if (dtl_consumer) |
405 | dtl_consumer(dtl, i); |
406 | #endif |
407 | stolen += tb_delta; |
408 | ++i; |
409 | ++dtl; |
410 | if (dtl == dtl_end) |
411 | dtl = local_paca->dispatch_log; |
412 | } |
413 | local_paca->dtl_ridx = i; |
414 | local_paca->dtl_curr = dtl; |
415 | return stolen; |
416 | } |
417 | |
418 | /* |
419 | * Accumulate stolen time by scanning the dispatch trace log. |
420 | * Called on entry from user mode. |
421 | */ |
422 | void notrace pseries_accumulate_stolen_time(void) |
423 | { |
424 | u64 sst, ust; |
425 | struct cpu_accounting_data *acct = &local_paca->accounting; |
426 | |
427 | sst = scan_dispatch_log(acct->starttime_user); |
428 | ust = scan_dispatch_log(acct->starttime); |
429 | acct->stime -= sst; |
430 | acct->utime -= ust; |
431 | acct->steal_time += ust + sst; |
432 | } |
433 | |
434 | u64 pseries_calculate_stolen_time(u64 stop_tb) |
435 | { |
436 | if (!firmware_has_feature(FW_FEATURE_SPLPAR)) |
437 | return 0; |
438 | |
439 | if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) |
440 | return scan_dispatch_log(stop_tb); |
441 | |
442 | return 0; |
443 | } |
444 | |
445 | #endif |
446 | |