1 | // SPDX-License-Identifier: GPL-2.0 |
---|---|
2 | /* |
3 | * trace_hwlat.c - A simple Hardware Latency detector. |
4 | * |
5 | * Use this tracer to detect large system latencies induced by the behavior of |
6 | * certain underlying system hardware or firmware, independent of Linux itself. |
7 | * The code was developed originally to detect the presence of SMIs on Intel |
8 | * and AMD systems, although there is no dependency upon x86 herein. |
9 | * |
10 | * The classical example usage of this tracer is in detecting the presence of |
11 | * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a |
12 | * somewhat special form of hardware interrupt spawned from earlier CPU debug |
13 | * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge |
14 | * LPC (or other device) to generate a special interrupt under certain |
15 | * circumstances, for example, upon expiration of a special SMI timer device, |
16 | * due to certain external thermal readings, on certain I/O address accesses, |
17 | * and other situations. An SMI hits a special CPU pin, triggers a special |
18 | * SMI mode (complete with special memory map), and the OS is unaware. |
19 | * |
20 | * Although certain hardware-inducing latencies are necessary (for example, |
21 | * a modern system often requires an SMI handler for correct thermal control |
22 | * and remote management) they can wreak havoc upon any OS-level performance |
23 | * guarantees toward low-latency, especially when the OS is not even made |
24 | * aware of the presence of these interrupts. For this reason, we need a |
25 | * somewhat brute force mechanism to detect these interrupts. In this case, |
26 | * we do it by hogging all of the CPU(s) for configurable timer intervals, |
27 | * sampling the built-in CPU timer, looking for discontiguous readings. |
28 | * |
29 | * WARNING: This implementation necessarily introduces latencies. Therefore, |
30 | * you should NEVER use this tracer while running in a production |
31 | * environment requiring any kind of low-latency performance |
32 | * guarantee(s). |
33 | * |
34 | * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> |
35 | * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> |
36 | * |
37 | * Includes useful feedback from Clark Williams <williams@redhat.com> |
38 | * |
39 | */ |
40 | #include <linux/kthread.h> |
41 | #include <linux/tracefs.h> |
42 | #include <linux/uaccess.h> |
43 | #include <linux/cpumask.h> |
44 | #include <linux/delay.h> |
45 | #include <linux/sched/clock.h> |
46 | #include "trace.h" |
47 | |
48 | static struct trace_array *hwlat_trace; |
49 | |
50 | #define U64STR_SIZE 22 /* 20 digits max */ |
51 | |
52 | #define BANNER "hwlat_detector: " |
53 | #define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ |
54 | #define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ |
55 | #define DEFAULT_LAT_THRESHOLD 10 /* 10us */ |
56 | |
57 | static struct dentry *hwlat_sample_width; /* sample width us */ |
58 | static struct dentry *hwlat_sample_window; /* sample window us */ |
59 | static struct dentry *hwlat_thread_mode; /* hwlat thread mode */ |
60 | |
61 | enum { |
62 | MODE_NONE = 0, |
63 | MODE_ROUND_ROBIN, |
64 | MODE_PER_CPU, |
65 | MODE_MAX |
66 | }; |
67 | static char *thread_mode_str[] = { "none", "round-robin", "per-cpu"}; |
68 | |
69 | /* Save the previous tracing_thresh value */ |
70 | static unsigned long save_tracing_thresh; |
71 | |
72 | /* runtime kthread data */ |
73 | struct hwlat_kthread_data { |
74 | struct task_struct *kthread; |
75 | /* NMI timestamp counters */ |
76 | u64 nmi_ts_start; |
77 | u64 nmi_total_ts; |
78 | int nmi_count; |
79 | int nmi_cpu; |
80 | }; |
81 | |
82 | static struct hwlat_kthread_data hwlat_single_cpu_data; |
83 | static DEFINE_PER_CPU(struct hwlat_kthread_data, hwlat_per_cpu_data); |
84 | |
85 | /* Tells NMIs to call back to the hwlat tracer to record timestamps */ |
86 | bool trace_hwlat_callback_enabled; |
87 | |
88 | /* If the user changed threshold, remember it */ |
89 | static u64 last_tracing_thresh = DEFAULT_LAT_THRESHOLD * NSEC_PER_USEC; |
90 | |
91 | /* Individual latency samples are stored here when detected. */ |
92 | struct hwlat_sample { |
93 | u64 seqnum; /* unique sequence */ |
94 | u64 duration; /* delta */ |
95 | u64 outer_duration; /* delta (outer loop) */ |
96 | u64 nmi_total_ts; /* Total time spent in NMIs */ |
97 | struct timespec64 timestamp; /* wall time */ |
98 | int nmi_count; /* # NMIs during this sample */ |
99 | int count; /* # of iterations over thresh */ |
100 | }; |
101 | |
102 | /* keep the global state somewhere. */ |
103 | static struct hwlat_data { |
104 | |
105 | struct mutex lock; /* protect changes */ |
106 | |
107 | u64 count; /* total since reset */ |
108 | |
109 | u64 sample_window; /* total sampling window (on+off) */ |
110 | u64 sample_width; /* active sampling portion of window */ |
111 | |
112 | int thread_mode; /* thread mode */ |
113 | |
114 | } hwlat_data = { |
115 | .sample_window = DEFAULT_SAMPLE_WINDOW, |
116 | .sample_width = DEFAULT_SAMPLE_WIDTH, |
117 | .thread_mode = MODE_ROUND_ROBIN |
118 | }; |
119 | |
120 | static struct hwlat_kthread_data *get_cpu_data(void) |
121 | { |
122 | if (hwlat_data.thread_mode == MODE_PER_CPU) |
123 | return this_cpu_ptr(&hwlat_per_cpu_data); |
124 | else |
125 | return &hwlat_single_cpu_data; |
126 | } |
127 | |
128 | static bool hwlat_busy; |
129 | |
130 | static void trace_hwlat_sample(struct hwlat_sample *sample) |
131 | { |
132 | struct trace_array *tr = hwlat_trace; |
133 | struct trace_event_call *call = &event_hwlat; |
134 | struct trace_buffer *buffer = tr->array_buffer.buffer; |
135 | struct ring_buffer_event *event; |
136 | struct hwlat_entry *entry; |
137 | |
138 | event = trace_buffer_lock_reserve(buffer, type: TRACE_HWLAT, len: sizeof(*entry), |
139 | trace_ctx: tracing_gen_ctx()); |
140 | if (!event) |
141 | return; |
142 | entry = ring_buffer_event_data(event); |
143 | entry->seqnum = sample->seqnum; |
144 | entry->duration = sample->duration; |
145 | entry->outer_duration = sample->outer_duration; |
146 | entry->timestamp = sample->timestamp; |
147 | entry->nmi_total_ts = sample->nmi_total_ts; |
148 | entry->nmi_count = sample->nmi_count; |
149 | entry->count = sample->count; |
150 | |
151 | if (!call_filter_check_discard(call, rec: entry, buffer, event)) |
152 | trace_buffer_unlock_commit_nostack(buffer, event); |
153 | } |
154 | |
155 | /* Macros to encapsulate the time capturing infrastructure */ |
156 | #define time_type u64 |
157 | #define time_get() trace_clock_local() |
158 | #define time_to_us(x) div_u64(x, 1000) |
159 | #define time_sub(a, b) ((a) - (b)) |
160 | #define init_time(a, b) (a = b) |
161 | #define time_u64(a) a |
162 | |
163 | void trace_hwlat_callback(bool enter) |
164 | { |
165 | struct hwlat_kthread_data *kdata = get_cpu_data(); |
166 | |
167 | if (!kdata->kthread) |
168 | return; |
169 | |
170 | /* |
171 | * Currently trace_clock_local() calls sched_clock() and the |
172 | * generic version is not NMI safe. |
173 | */ |
174 | if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { |
175 | if (enter) |
176 | kdata->nmi_ts_start = time_get(); |
177 | else |
178 | kdata->nmi_total_ts += time_get() - kdata->nmi_ts_start; |
179 | } |
180 | |
181 | if (enter) |
182 | kdata->nmi_count++; |
183 | } |
184 | |
185 | /* |
186 | * hwlat_err - report a hwlat error. |
187 | */ |
188 | #define hwlat_err(msg) ({ \ |
189 | struct trace_array *tr = hwlat_trace; \ |
190 | \ |
191 | trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, msg); \ |
192 | }) |
193 | |
194 | /** |
195 | * get_sample - sample the CPU TSC and look for likely hardware latencies |
196 | * |
197 | * Used to repeatedly capture the CPU TSC (or similar), looking for potential |
198 | * hardware-induced latency. Called with interrupts disabled and with |
199 | * hwlat_data.lock held. |
200 | */ |
201 | static int get_sample(void) |
202 | { |
203 | struct hwlat_kthread_data *kdata = get_cpu_data(); |
204 | struct trace_array *tr = hwlat_trace; |
205 | struct hwlat_sample s; |
206 | time_type start, t1, t2, last_t2; |
207 | s64 diff, outer_diff, total, last_total = 0; |
208 | u64 sample = 0; |
209 | u64 thresh = tracing_thresh; |
210 | u64 outer_sample = 0; |
211 | int ret = -1; |
212 | unsigned int count = 0; |
213 | |
214 | do_div(thresh, NSEC_PER_USEC); /* modifies interval value */ |
215 | |
216 | kdata->nmi_total_ts = 0; |
217 | kdata->nmi_count = 0; |
218 | /* Make sure NMIs see this first */ |
219 | barrier(); |
220 | |
221 | trace_hwlat_callback_enabled = true; |
222 | |
223 | init_time(last_t2, 0); |
224 | start = time_get(); /* start timestamp */ |
225 | outer_diff = 0; |
226 | |
227 | do { |
228 | |
229 | t1 = time_get(); /* we'll look for a discontinuity */ |
230 | t2 = time_get(); |
231 | |
232 | if (time_u64(last_t2)) { |
233 | /* Check the delta from outer loop (t2 to next t1) */ |
234 | outer_diff = time_to_us(time_sub(t1, last_t2)); |
235 | /* This shouldn't happen */ |
236 | if (outer_diff < 0) { |
237 | hwlat_err(BANNER "time running backwards\n"); |
238 | goto out; |
239 | } |
240 | if (outer_diff > outer_sample) |
241 | outer_sample = outer_diff; |
242 | } |
243 | last_t2 = t2; |
244 | |
245 | total = time_to_us(time_sub(t2, start)); /* sample width */ |
246 | |
247 | /* Check for possible overflows */ |
248 | if (total < last_total) { |
249 | hwlat_err("Time total overflowed\n"); |
250 | break; |
251 | } |
252 | last_total = total; |
253 | |
254 | /* This checks the inner loop (t1 to t2) */ |
255 | diff = time_to_us(time_sub(t2, t1)); /* current diff */ |
256 | |
257 | if (diff > thresh || outer_diff > thresh) { |
258 | if (!count) |
259 | ktime_get_real_ts64(tv: &s.timestamp); |
260 | count++; |
261 | } |
262 | |
263 | /* This shouldn't happen */ |
264 | if (diff < 0) { |
265 | hwlat_err(BANNER "time running backwards\n"); |
266 | goto out; |
267 | } |
268 | |
269 | if (diff > sample) |
270 | sample = diff; /* only want highest value */ |
271 | |
272 | } while (total <= hwlat_data.sample_width); |
273 | |
274 | barrier(); /* finish the above in the view for NMIs */ |
275 | trace_hwlat_callback_enabled = false; |
276 | barrier(); /* Make sure nmi_total_ts is no longer updated */ |
277 | |
278 | ret = 0; |
279 | |
280 | /* If we exceed the threshold value, we have found a hardware latency */ |
281 | if (sample > thresh || outer_sample > thresh) { |
282 | u64 latency; |
283 | |
284 | ret = 1; |
285 | |
286 | /* We read in microseconds */ |
287 | if (kdata->nmi_total_ts) |
288 | do_div(kdata->nmi_total_ts, NSEC_PER_USEC); |
289 | |
290 | hwlat_data.count++; |
291 | s.seqnum = hwlat_data.count; |
292 | s.duration = sample; |
293 | s.outer_duration = outer_sample; |
294 | s.nmi_total_ts = kdata->nmi_total_ts; |
295 | s.nmi_count = kdata->nmi_count; |
296 | s.count = count; |
297 | trace_hwlat_sample(sample: &s); |
298 | |
299 | latency = max(sample, outer_sample); |
300 | |
301 | /* Keep a running maximum ever recorded hardware latency */ |
302 | if (latency > tr->max_latency) { |
303 | tr->max_latency = latency; |
304 | latency_fsnotify(tr); |
305 | } |
306 | } |
307 | |
308 | out: |
309 | return ret; |
310 | } |
311 | |
312 | static struct cpumask save_cpumask; |
313 | |
314 | static void move_to_next_cpu(void) |
315 | { |
316 | struct cpumask *current_mask = &save_cpumask; |
317 | struct trace_array *tr = hwlat_trace; |
318 | int next_cpu; |
319 | |
320 | /* |
321 | * If for some reason the user modifies the CPU affinity |
322 | * of this thread, then stop migrating for the duration |
323 | * of the current test. |
324 | */ |
325 | if (!cpumask_equal(src1p: current_mask, current->cpus_ptr)) |
326 | goto change_mode; |
327 | |
328 | cpus_read_lock(); |
329 | cpumask_and(dstp: current_mask, cpu_online_mask, src2p: tr->tracing_cpumask); |
330 | next_cpu = cpumask_next(raw_smp_processor_id(), srcp: current_mask); |
331 | cpus_read_unlock(); |
332 | |
333 | if (next_cpu >= nr_cpu_ids) |
334 | next_cpu = cpumask_first(srcp: current_mask); |
335 | |
336 | if (next_cpu >= nr_cpu_ids) /* Shouldn't happen! */ |
337 | goto change_mode; |
338 | |
339 | cpumask_clear(dstp: current_mask); |
340 | cpumask_set_cpu(cpu: next_cpu, dstp: current_mask); |
341 | |
342 | set_cpus_allowed_ptr(current, new_mask: current_mask); |
343 | return; |
344 | |
345 | change_mode: |
346 | hwlat_data.thread_mode = MODE_NONE; |
347 | pr_info(BANNER "cpumask changed while in round-robin mode, switching to mode none\n"); |
348 | } |
349 | |
350 | /* |
351 | * kthread_fn - The CPU time sampling/hardware latency detection kernel thread |
352 | * |
353 | * Used to periodically sample the CPU TSC via a call to get_sample. We |
354 | * disable interrupts, which does (intentionally) introduce latency since we |
355 | * need to ensure nothing else might be running (and thus preempting). |
356 | * Obviously this should never be used in production environments. |
357 | * |
358 | * Executes one loop interaction on each CPU in tracing_cpumask sysfs file. |
359 | */ |
360 | static int kthread_fn(void *data) |
361 | { |
362 | u64 interval; |
363 | |
364 | while (!kthread_should_stop()) { |
365 | |
366 | if (hwlat_data.thread_mode == MODE_ROUND_ROBIN) |
367 | move_to_next_cpu(); |
368 | |
369 | local_irq_disable(); |
370 | get_sample(); |
371 | local_irq_enable(); |
372 | |
373 | mutex_lock(&hwlat_data.lock); |
374 | interval = hwlat_data.sample_window - hwlat_data.sample_width; |
375 | mutex_unlock(lock: &hwlat_data.lock); |
376 | |
377 | do_div(interval, USEC_PER_MSEC); /* modifies interval value */ |
378 | |
379 | /* Always sleep for at least 1ms */ |
380 | if (interval < 1) |
381 | interval = 1; |
382 | |
383 | if (msleep_interruptible(msecs: interval)) |
384 | break; |
385 | } |
386 | |
387 | return 0; |
388 | } |
389 | |
390 | /* |
391 | * stop_stop_kthread - Inform the hardware latency sampling/detector kthread to stop |
392 | * |
393 | * This kicks the running hardware latency sampling/detector kernel thread and |
394 | * tells it to stop sampling now. Use this on unload and at system shutdown. |
395 | */ |
396 | static void stop_single_kthread(void) |
397 | { |
398 | struct hwlat_kthread_data *kdata = get_cpu_data(); |
399 | struct task_struct *kthread; |
400 | |
401 | cpus_read_lock(); |
402 | kthread = kdata->kthread; |
403 | |
404 | if (!kthread) |
405 | goto out_put_cpus; |
406 | |
407 | kthread_stop(k: kthread); |
408 | kdata->kthread = NULL; |
409 | |
410 | out_put_cpus: |
411 | cpus_read_unlock(); |
412 | } |
413 | |
414 | |
415 | /* |
416 | * start_single_kthread - Kick off the hardware latency sampling/detector kthread |
417 | * |
418 | * This starts the kernel thread that will sit and sample the CPU timestamp |
419 | * counter (TSC or similar) and look for potential hardware latencies. |
420 | */ |
421 | static int start_single_kthread(struct trace_array *tr) |
422 | { |
423 | struct hwlat_kthread_data *kdata = get_cpu_data(); |
424 | struct cpumask *current_mask = &save_cpumask; |
425 | struct task_struct *kthread; |
426 | int next_cpu; |
427 | |
428 | cpus_read_lock(); |
429 | if (kdata->kthread) |
430 | goto out_put_cpus; |
431 | |
432 | kthread = kthread_create(kthread_fn, NULL, "hwlatd"); |
433 | if (IS_ERR(ptr: kthread)) { |
434 | pr_err(BANNER "could not start sampling thread\n"); |
435 | cpus_read_unlock(); |
436 | return -ENOMEM; |
437 | } |
438 | |
439 | /* Just pick the first CPU on first iteration */ |
440 | cpumask_and(dstp: current_mask, cpu_online_mask, src2p: tr->tracing_cpumask); |
441 | |
442 | if (hwlat_data.thread_mode == MODE_ROUND_ROBIN) { |
443 | next_cpu = cpumask_first(srcp: current_mask); |
444 | cpumask_clear(dstp: current_mask); |
445 | cpumask_set_cpu(cpu: next_cpu, dstp: current_mask); |
446 | |
447 | } |
448 | |
449 | set_cpus_allowed_ptr(p: kthread, new_mask: current_mask); |
450 | |
451 | kdata->kthread = kthread; |
452 | wake_up_process(tsk: kthread); |
453 | |
454 | out_put_cpus: |
455 | cpus_read_unlock(); |
456 | return 0; |
457 | } |
458 | |
459 | /* |
460 | * stop_cpu_kthread - Stop a hwlat cpu kthread |
461 | */ |
462 | static void stop_cpu_kthread(unsigned int cpu) |
463 | { |
464 | struct task_struct *kthread; |
465 | |
466 | kthread = per_cpu(hwlat_per_cpu_data, cpu).kthread; |
467 | if (kthread) |
468 | kthread_stop(k: kthread); |
469 | per_cpu(hwlat_per_cpu_data, cpu).kthread = NULL; |
470 | } |
471 | |
472 | /* |
473 | * stop_per_cpu_kthreads - Inform the hardware latency sampling/detector kthread to stop |
474 | * |
475 | * This kicks the running hardware latency sampling/detector kernel threads and |
476 | * tells it to stop sampling now. Use this on unload and at system shutdown. |
477 | */ |
478 | static void stop_per_cpu_kthreads(void) |
479 | { |
480 | unsigned int cpu; |
481 | |
482 | cpus_read_lock(); |
483 | for_each_online_cpu(cpu) |
484 | stop_cpu_kthread(cpu); |
485 | cpus_read_unlock(); |
486 | } |
487 | |
488 | /* |
489 | * start_cpu_kthread - Start a hwlat cpu kthread |
490 | */ |
491 | static int start_cpu_kthread(unsigned int cpu) |
492 | { |
493 | struct task_struct *kthread; |
494 | |
495 | /* Do not start a new hwlatd thread if it is already running */ |
496 | if (per_cpu(hwlat_per_cpu_data, cpu).kthread) |
497 | return 0; |
498 | |
499 | kthread = kthread_run_on_cpu(threadfn: kthread_fn, NULL, cpu, namefmt: "hwlatd/%u"); |
500 | if (IS_ERR(ptr: kthread)) { |
501 | pr_err(BANNER "could not start sampling thread\n"); |
502 | return -ENOMEM; |
503 | } |
504 | |
505 | per_cpu(hwlat_per_cpu_data, cpu).kthread = kthread; |
506 | |
507 | return 0; |
508 | } |
509 | |
510 | #ifdef CONFIG_HOTPLUG_CPU |
511 | static void hwlat_hotplug_workfn(struct work_struct *dummy) |
512 | { |
513 | struct trace_array *tr = hwlat_trace; |
514 | unsigned int cpu = smp_processor_id(); |
515 | |
516 | mutex_lock(&trace_types_lock); |
517 | mutex_lock(&hwlat_data.lock); |
518 | cpus_read_lock(); |
519 | |
520 | if (!hwlat_busy || hwlat_data.thread_mode != MODE_PER_CPU) |
521 | goto out_unlock; |
522 | |
523 | if (!cpumask_test_cpu(cpu, cpumask: tr->tracing_cpumask)) |
524 | goto out_unlock; |
525 | |
526 | start_cpu_kthread(cpu); |
527 | |
528 | out_unlock: |
529 | cpus_read_unlock(); |
530 | mutex_unlock(lock: &hwlat_data.lock); |
531 | mutex_unlock(lock: &trace_types_lock); |
532 | } |
533 | |
534 | static DECLARE_WORK(hwlat_hotplug_work, hwlat_hotplug_workfn); |
535 | |
536 | /* |
537 | * hwlat_cpu_init - CPU hotplug online callback function |
538 | */ |
539 | static int hwlat_cpu_init(unsigned int cpu) |
540 | { |
541 | schedule_work_on(cpu, work: &hwlat_hotplug_work); |
542 | return 0; |
543 | } |
544 | |
545 | /* |
546 | * hwlat_cpu_die - CPU hotplug offline callback function |
547 | */ |
548 | static int hwlat_cpu_die(unsigned int cpu) |
549 | { |
550 | stop_cpu_kthread(cpu); |
551 | return 0; |
552 | } |
553 | |
554 | static void hwlat_init_hotplug_support(void) |
555 | { |
556 | int ret; |
557 | |
558 | ret = cpuhp_setup_state(state: CPUHP_AP_ONLINE_DYN, name: "trace/hwlat:online", |
559 | startup: hwlat_cpu_init, teardown: hwlat_cpu_die); |
560 | if (ret < 0) |
561 | pr_warn(BANNER "Error to init cpu hotplug support\n"); |
562 | |
563 | return; |
564 | } |
565 | #else /* CONFIG_HOTPLUG_CPU */ |
566 | static void hwlat_init_hotplug_support(void) |
567 | { |
568 | return; |
569 | } |
570 | #endif /* CONFIG_HOTPLUG_CPU */ |
571 | |
572 | /* |
573 | * start_per_cpu_kthreads - Kick off the hardware latency sampling/detector kthreads |
574 | * |
575 | * This starts the kernel threads that will sit on potentially all cpus and |
576 | * sample the CPU timestamp counter (TSC or similar) and look for potential |
577 | * hardware latencies. |
578 | */ |
579 | static int start_per_cpu_kthreads(struct trace_array *tr) |
580 | { |
581 | struct cpumask *current_mask = &save_cpumask; |
582 | unsigned int cpu; |
583 | int retval; |
584 | |
585 | cpus_read_lock(); |
586 | /* |
587 | * Run only on CPUs in which hwlat is allowed to run. |
588 | */ |
589 | cpumask_and(dstp: current_mask, cpu_online_mask, src2p: tr->tracing_cpumask); |
590 | |
591 | for_each_cpu(cpu, current_mask) { |
592 | retval = start_cpu_kthread(cpu); |
593 | if (retval) |
594 | goto out_error; |
595 | } |
596 | cpus_read_unlock(); |
597 | |
598 | return 0; |
599 | |
600 | out_error: |
601 | cpus_read_unlock(); |
602 | stop_per_cpu_kthreads(); |
603 | return retval; |
604 | } |
605 | |
606 | static void *s_mode_start(struct seq_file *s, loff_t *pos) |
607 | { |
608 | int mode = *pos; |
609 | |
610 | mutex_lock(&hwlat_data.lock); |
611 | |
612 | if (mode >= MODE_MAX) |
613 | return NULL; |
614 | |
615 | return pos; |
616 | } |
617 | |
618 | static void *s_mode_next(struct seq_file *s, void *v, loff_t *pos) |
619 | { |
620 | int mode = ++(*pos); |
621 | |
622 | if (mode >= MODE_MAX) |
623 | return NULL; |
624 | |
625 | return pos; |
626 | } |
627 | |
628 | static int s_mode_show(struct seq_file *s, void *v) |
629 | { |
630 | loff_t *pos = v; |
631 | int mode = *pos; |
632 | |
633 | if (mode == hwlat_data.thread_mode) |
634 | seq_printf(m: s, fmt: "[%s]", thread_mode_str[mode]); |
635 | else |
636 | seq_printf(m: s, fmt: "%s", thread_mode_str[mode]); |
637 | |
638 | if (mode < MODE_MAX - 1) /* if mode is any but last */ |
639 | seq_puts(m: s, s: " "); |
640 | |
641 | return 0; |
642 | } |
643 | |
644 | static void s_mode_stop(struct seq_file *s, void *v) |
645 | { |
646 | seq_puts(m: s, s: "\n"); |
647 | mutex_unlock(lock: &hwlat_data.lock); |
648 | } |
649 | |
650 | static const struct seq_operations thread_mode_seq_ops = { |
651 | .start = s_mode_start, |
652 | .next = s_mode_next, |
653 | .show = s_mode_show, |
654 | .stop = s_mode_stop |
655 | }; |
656 | |
657 | static int hwlat_mode_open(struct inode *inode, struct file *file) |
658 | { |
659 | return seq_open(file, &thread_mode_seq_ops); |
660 | }; |
661 | |
662 | static void hwlat_tracer_start(struct trace_array *tr); |
663 | static void hwlat_tracer_stop(struct trace_array *tr); |
664 | |
665 | /** |
666 | * hwlat_mode_write - Write function for "mode" entry |
667 | * @filp: The active open file structure |
668 | * @ubuf: The user buffer that contains the value to write |
669 | * @cnt: The maximum number of bytes to write to "file" |
670 | * @ppos: The current position in @file |
671 | * |
672 | * This function provides a write implementation for the "mode" interface |
673 | * to the hardware latency detector. hwlatd has different operation modes. |
674 | * The "none" sets the allowed cpumask for a single hwlatd thread at the |
675 | * startup and lets the scheduler handle the migration. The default mode is |
676 | * the "round-robin" one, in which a single hwlatd thread runs, migrating |
677 | * among the allowed CPUs in a round-robin fashion. The "per-cpu" mode |
678 | * creates one hwlatd thread per allowed CPU. |
679 | */ |
680 | static ssize_t hwlat_mode_write(struct file *filp, const char __user *ubuf, |
681 | size_t cnt, loff_t *ppos) |
682 | { |
683 | struct trace_array *tr = hwlat_trace; |
684 | const char *mode; |
685 | char buf[64]; |
686 | int ret, i; |
687 | |
688 | if (cnt >= sizeof(buf)) |
689 | return -EINVAL; |
690 | |
691 | if (copy_from_user(to: buf, from: ubuf, n: cnt)) |
692 | return -EFAULT; |
693 | |
694 | buf[cnt] = 0; |
695 | |
696 | mode = strstrip(str: buf); |
697 | |
698 | ret = -EINVAL; |
699 | |
700 | /* |
701 | * trace_types_lock is taken to avoid concurrency on start/stop |
702 | * and hwlat_busy. |
703 | */ |
704 | mutex_lock(&trace_types_lock); |
705 | if (hwlat_busy) |
706 | hwlat_tracer_stop(tr); |
707 | |
708 | mutex_lock(&hwlat_data.lock); |
709 | |
710 | for (i = 0; i < MODE_MAX; i++) { |
711 | if (strcmp(mode, thread_mode_str[i]) == 0) { |
712 | hwlat_data.thread_mode = i; |
713 | ret = cnt; |
714 | } |
715 | } |
716 | |
717 | mutex_unlock(lock: &hwlat_data.lock); |
718 | |
719 | if (hwlat_busy) |
720 | hwlat_tracer_start(tr); |
721 | mutex_unlock(lock: &trace_types_lock); |
722 | |
723 | *ppos += cnt; |
724 | |
725 | |
726 | |
727 | return ret; |
728 | } |
729 | |
730 | /* |
731 | * The width parameter is read/write using the generic trace_min_max_param |
732 | * method. The *val is protected by the hwlat_data lock and is upper |
733 | * bounded by the window parameter. |
734 | */ |
735 | static struct trace_min_max_param hwlat_width = { |
736 | .lock = &hwlat_data.lock, |
737 | .val = &hwlat_data.sample_width, |
738 | .max = &hwlat_data.sample_window, |
739 | .min = NULL, |
740 | }; |
741 | |
742 | /* |
743 | * The window parameter is read/write using the generic trace_min_max_param |
744 | * method. The *val is protected by the hwlat_data lock and is lower |
745 | * bounded by the width parameter. |
746 | */ |
747 | static struct trace_min_max_param hwlat_window = { |
748 | .lock = &hwlat_data.lock, |
749 | .val = &hwlat_data.sample_window, |
750 | .max = NULL, |
751 | .min = &hwlat_data.sample_width, |
752 | }; |
753 | |
754 | static const struct file_operations thread_mode_fops = { |
755 | .open = hwlat_mode_open, |
756 | .read = seq_read, |
757 | .llseek = seq_lseek, |
758 | .release = seq_release, |
759 | .write = hwlat_mode_write |
760 | }; |
761 | /** |
762 | * init_tracefs - A function to initialize the tracefs interface files |
763 | * |
764 | * This function creates entries in tracefs for "hwlat_detector". |
765 | * It creates the hwlat_detector directory in the tracing directory, |
766 | * and within that directory is the count, width and window files to |
767 | * change and view those values. |
768 | */ |
769 | static int init_tracefs(void) |
770 | { |
771 | int ret; |
772 | struct dentry *top_dir; |
773 | |
774 | ret = tracing_init_dentry(); |
775 | if (ret) |
776 | return -ENOMEM; |
777 | |
778 | top_dir = tracefs_create_dir(name: "hwlat_detector", NULL); |
779 | if (!top_dir) |
780 | return -ENOMEM; |
781 | |
782 | hwlat_sample_window = tracefs_create_file(name: "window", TRACE_MODE_WRITE, |
783 | parent: top_dir, |
784 | data: &hwlat_window, |
785 | fops: &trace_min_max_fops); |
786 | if (!hwlat_sample_window) |
787 | goto err; |
788 | |
789 | hwlat_sample_width = tracefs_create_file(name: "width", TRACE_MODE_WRITE, |
790 | parent: top_dir, |
791 | data: &hwlat_width, |
792 | fops: &trace_min_max_fops); |
793 | if (!hwlat_sample_width) |
794 | goto err; |
795 | |
796 | hwlat_thread_mode = trace_create_file(name: "mode", TRACE_MODE_WRITE, |
797 | parent: top_dir, |
798 | NULL, |
799 | fops: &thread_mode_fops); |
800 | if (!hwlat_thread_mode) |
801 | goto err; |
802 | |
803 | return 0; |
804 | |
805 | err: |
806 | tracefs_remove(dentry: top_dir); |
807 | return -ENOMEM; |
808 | } |
809 | |
810 | static void hwlat_tracer_start(struct trace_array *tr) |
811 | { |
812 | int err; |
813 | |
814 | if (hwlat_data.thread_mode == MODE_PER_CPU) |
815 | err = start_per_cpu_kthreads(tr); |
816 | else |
817 | err = start_single_kthread(tr); |
818 | if (err) |
819 | pr_err(BANNER "Cannot start hwlat kthread\n"); |
820 | } |
821 | |
822 | static void hwlat_tracer_stop(struct trace_array *tr) |
823 | { |
824 | if (hwlat_data.thread_mode == MODE_PER_CPU) |
825 | stop_per_cpu_kthreads(); |
826 | else |
827 | stop_single_kthread(); |
828 | } |
829 | |
830 | static int hwlat_tracer_init(struct trace_array *tr) |
831 | { |
832 | /* Only allow one instance to enable this */ |
833 | if (hwlat_busy) |
834 | return -EBUSY; |
835 | |
836 | hwlat_trace = tr; |
837 | |
838 | hwlat_data.count = 0; |
839 | tr->max_latency = 0; |
840 | save_tracing_thresh = tracing_thresh; |
841 | |
842 | /* tracing_thresh is in nsecs, we speak in usecs */ |
843 | if (!tracing_thresh) |
844 | tracing_thresh = last_tracing_thresh; |
845 | |
846 | if (tracer_tracing_is_on(tr)) |
847 | hwlat_tracer_start(tr); |
848 | |
849 | hwlat_busy = true; |
850 | |
851 | return 0; |
852 | } |
853 | |
854 | static void hwlat_tracer_reset(struct trace_array *tr) |
855 | { |
856 | hwlat_tracer_stop(tr); |
857 | |
858 | /* the tracing threshold is static between runs */ |
859 | last_tracing_thresh = tracing_thresh; |
860 | |
861 | tracing_thresh = save_tracing_thresh; |
862 | hwlat_busy = false; |
863 | } |
864 | |
865 | static struct tracer hwlat_tracer __read_mostly = |
866 | { |
867 | .name = "hwlat", |
868 | .init = hwlat_tracer_init, |
869 | .reset = hwlat_tracer_reset, |
870 | .start = hwlat_tracer_start, |
871 | .stop = hwlat_tracer_stop, |
872 | .allow_instances = true, |
873 | }; |
874 | |
875 | __init static int init_hwlat_tracer(void) |
876 | { |
877 | int ret; |
878 | |
879 | mutex_init(&hwlat_data.lock); |
880 | |
881 | ret = register_tracer(type: &hwlat_tracer); |
882 | if (ret) |
883 | return ret; |
884 | |
885 | hwlat_init_hotplug_support(); |
886 | |
887 | init_tracefs(); |
888 | |
889 | return 0; |
890 | } |
891 | late_initcall(init_hwlat_tracer); |
892 |
Definitions
- hwlat_trace
- hwlat_sample_width
- hwlat_sample_window
- hwlat_thread_mode
- thread_mode_str
- save_tracing_thresh
- hwlat_kthread_data
- hwlat_single_cpu_data
- hwlat_per_cpu_data
- trace_hwlat_callback_enabled
- last_tracing_thresh
- hwlat_sample
- hwlat_data
- hwlat_data
- get_cpu_data
- hwlat_busy
- trace_hwlat_sample
- trace_hwlat_callback
- get_sample
- save_cpumask
- move_to_next_cpu
- kthread_fn
- stop_single_kthread
- start_single_kthread
- stop_cpu_kthread
- stop_per_cpu_kthreads
- start_cpu_kthread
- hwlat_hotplug_workfn
- hwlat_hotplug_work
- hwlat_cpu_init
- hwlat_cpu_die
- hwlat_init_hotplug_support
- start_per_cpu_kthreads
- s_mode_start
- s_mode_next
- s_mode_show
- s_mode_stop
- thread_mode_seq_ops
- hwlat_mode_open
- hwlat_mode_write
- hwlat_width
- hwlat_window
- thread_mode_fops
- init_tracefs
- hwlat_tracer_start
- hwlat_tracer_stop
- hwlat_tracer_init
- hwlat_tracer_reset
- hwlat_tracer
Improve your Profiling and Debugging skills
Find out more