| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #undef TRACE_SYSTEM |
| 3 | #define TRACE_SYSTEM sched |
| 4 | |
| 5 | #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) |
| 6 | #define _TRACE_SCHED_H |
| 7 | |
| 8 | #include <linux/kthread.h> |
| 9 | #include <linux/sched/numa_balancing.h> |
| 10 | #include <linux/tracepoint.h> |
| 11 | #include <linux/binfmts.h> |
| 12 | |
| 13 | /* |
| 14 | * Tracepoint for calling kthread_stop, performed to end a kthread: |
| 15 | */ |
| 16 | TRACE_EVENT(sched_kthread_stop, |
| 17 | |
| 18 | TP_PROTO(struct task_struct *t), |
| 19 | |
| 20 | TP_ARGS(t), |
| 21 | |
| 22 | TP_STRUCT__entry( |
| 23 | __string( comm, t->comm ) |
| 24 | __field( pid_t, pid ) |
| 25 | ), |
| 26 | |
| 27 | TP_fast_assign( |
| 28 | __assign_str(comm); |
| 29 | __entry->pid = t->pid; |
| 30 | ), |
| 31 | |
| 32 | TP_printk("comm=%s pid=%d" , __get_str(comm), __entry->pid) |
| 33 | ); |
| 34 | |
| 35 | /* |
| 36 | * Tracepoint for the return value of the kthread stopping: |
| 37 | */ |
| 38 | TRACE_EVENT(sched_kthread_stop_ret, |
| 39 | |
| 40 | TP_PROTO(int ret), |
| 41 | |
| 42 | TP_ARGS(ret), |
| 43 | |
| 44 | TP_STRUCT__entry( |
| 45 | __field( int, ret ) |
| 46 | ), |
| 47 | |
| 48 | TP_fast_assign( |
| 49 | __entry->ret = ret; |
| 50 | ), |
| 51 | |
| 52 | TP_printk("ret=%d" , __entry->ret) |
| 53 | ); |
| 54 | |
| 55 | /** |
| 56 | * sched_kthread_work_queue_work - called when a work gets queued |
| 57 | * @worker: pointer to the kthread_worker |
| 58 | * @work: pointer to struct kthread_work |
| 59 | * |
| 60 | * This event occurs when a work is queued immediately or once a |
| 61 | * delayed work is actually queued (ie: once the delay has been |
| 62 | * reached). |
| 63 | */ |
| 64 | TRACE_EVENT(sched_kthread_work_queue_work, |
| 65 | |
| 66 | TP_PROTO(struct kthread_worker *worker, |
| 67 | struct kthread_work *work), |
| 68 | |
| 69 | TP_ARGS(worker, work), |
| 70 | |
| 71 | TP_STRUCT__entry( |
| 72 | __field( void *, work ) |
| 73 | __field( void *, function) |
| 74 | __field( void *, worker) |
| 75 | ), |
| 76 | |
| 77 | TP_fast_assign( |
| 78 | __entry->work = work; |
| 79 | __entry->function = work->func; |
| 80 | __entry->worker = worker; |
| 81 | ), |
| 82 | |
| 83 | TP_printk("work struct=%p function=%ps worker=%p" , |
| 84 | __entry->work, __entry->function, __entry->worker) |
| 85 | ); |
| 86 | |
| 87 | /** |
| 88 | * sched_kthread_work_execute_start - called immediately before the work callback |
| 89 | * @work: pointer to struct kthread_work |
| 90 | * |
| 91 | * Allows to track kthread work execution. |
| 92 | */ |
| 93 | TRACE_EVENT(sched_kthread_work_execute_start, |
| 94 | |
| 95 | TP_PROTO(struct kthread_work *work), |
| 96 | |
| 97 | TP_ARGS(work), |
| 98 | |
| 99 | TP_STRUCT__entry( |
| 100 | __field( void *, work ) |
| 101 | __field( void *, function) |
| 102 | ), |
| 103 | |
| 104 | TP_fast_assign( |
| 105 | __entry->work = work; |
| 106 | __entry->function = work->func; |
| 107 | ), |
| 108 | |
| 109 | TP_printk("work struct %p: function %ps" , __entry->work, __entry->function) |
| 110 | ); |
| 111 | |
| 112 | /** |
| 113 | * sched_kthread_work_execute_end - called immediately after the work callback |
| 114 | * @work: pointer to struct work_struct |
| 115 | * @function: pointer to worker function |
| 116 | * |
| 117 | * Allows to track workqueue execution. |
| 118 | */ |
| 119 | TRACE_EVENT(sched_kthread_work_execute_end, |
| 120 | |
| 121 | TP_PROTO(struct kthread_work *work, kthread_work_func_t function), |
| 122 | |
| 123 | TP_ARGS(work, function), |
| 124 | |
| 125 | TP_STRUCT__entry( |
| 126 | __field( void *, work ) |
| 127 | __field( void *, function) |
| 128 | ), |
| 129 | |
| 130 | TP_fast_assign( |
| 131 | __entry->work = work; |
| 132 | __entry->function = function; |
| 133 | ), |
| 134 | |
| 135 | TP_printk("work struct %p: function %ps" , __entry->work, __entry->function) |
| 136 | ); |
| 137 | |
| 138 | /* |
| 139 | * Tracepoint for waking up a task: |
| 140 | */ |
| 141 | DECLARE_EVENT_CLASS(sched_wakeup_template, |
| 142 | |
| 143 | TP_PROTO(struct task_struct *p), |
| 144 | |
| 145 | TP_ARGS(__perf_task(p)), |
| 146 | |
| 147 | TP_STRUCT__entry( |
| 148 | __array( char, comm, TASK_COMM_LEN ) |
| 149 | __field( pid_t, pid ) |
| 150 | __field( int, prio ) |
| 151 | __field( int, target_cpu ) |
| 152 | ), |
| 153 | |
| 154 | TP_fast_assign( |
| 155 | memcpy(__entry->comm, p->comm, TASK_COMM_LEN); |
| 156 | __entry->pid = p->pid; |
| 157 | __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ |
| 158 | __entry->target_cpu = task_cpu(p); |
| 159 | ), |
| 160 | |
| 161 | TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d" , |
| 162 | __entry->comm, __entry->pid, __entry->prio, |
| 163 | __entry->target_cpu) |
| 164 | ); |
| 165 | |
| 166 | /* |
| 167 | * Tracepoint called when waking a task; this tracepoint is guaranteed to be |
| 168 | * called from the waking context. |
| 169 | */ |
| 170 | DEFINE_EVENT(sched_wakeup_template, sched_waking, |
| 171 | TP_PROTO(struct task_struct *p), |
| 172 | TP_ARGS(p)); |
| 173 | |
| 174 | /* |
| 175 | * Tracepoint called when the task is actually woken; p->state == TASK_RUNNING. |
| 176 | * It is not always called from the waking context. |
| 177 | */ |
| 178 | DEFINE_EVENT(sched_wakeup_template, sched_wakeup, |
| 179 | TP_PROTO(struct task_struct *p), |
| 180 | TP_ARGS(p)); |
| 181 | |
| 182 | /* |
| 183 | * Tracepoint for waking up a new task: |
| 184 | */ |
| 185 | DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, |
| 186 | TP_PROTO(struct task_struct *p), |
| 187 | TP_ARGS(p)); |
| 188 | |
| 189 | #ifdef CREATE_TRACE_POINTS |
| 190 | static inline long __trace_sched_switch_state(bool preempt, |
| 191 | unsigned int prev_state, |
| 192 | struct task_struct *p) |
| 193 | { |
| 194 | unsigned int state; |
| 195 | |
| 196 | BUG_ON(p != current); |
| 197 | |
| 198 | /* |
| 199 | * Preemption ignores task state, therefore preempted tasks are always |
| 200 | * RUNNING (we will not have dequeued if state != RUNNING). |
| 201 | */ |
| 202 | if (preempt) |
| 203 | return TASK_REPORT_MAX; |
| 204 | |
| 205 | /* |
| 206 | * task_state_index() uses fls() and returns a value from 0-8 range. |
| 207 | * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using |
| 208 | * it for left shift operation to get the correct task->state |
| 209 | * mapping. |
| 210 | */ |
| 211 | state = __task_state_index(prev_state, p->exit_state); |
| 212 | |
| 213 | return state ? (1 << (state - 1)) : state; |
| 214 | } |
| 215 | #endif /* CREATE_TRACE_POINTS */ |
| 216 | |
| 217 | /* |
| 218 | * Tracepoint for task switches, performed by the scheduler: |
| 219 | */ |
| 220 | TRACE_EVENT(sched_switch, |
| 221 | |
| 222 | TP_PROTO(bool preempt, |
| 223 | struct task_struct *prev, |
| 224 | struct task_struct *next, |
| 225 | unsigned int prev_state), |
| 226 | |
| 227 | TP_ARGS(preempt, prev, next, prev_state), |
| 228 | |
| 229 | TP_STRUCT__entry( |
| 230 | __array( char, prev_comm, TASK_COMM_LEN ) |
| 231 | __field( pid_t, prev_pid ) |
| 232 | __field( int, prev_prio ) |
| 233 | __field( long, prev_state ) |
| 234 | __array( char, next_comm, TASK_COMM_LEN ) |
| 235 | __field( pid_t, next_pid ) |
| 236 | __field( int, next_prio ) |
| 237 | ), |
| 238 | |
| 239 | TP_fast_assign( |
| 240 | memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); |
| 241 | __entry->prev_pid = prev->pid; |
| 242 | __entry->prev_prio = prev->prio; |
| 243 | __entry->prev_state = __trace_sched_switch_state(preempt, prev_state, prev); |
| 244 | memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); |
| 245 | __entry->next_pid = next->pid; |
| 246 | __entry->next_prio = next->prio; |
| 247 | /* XXX SCHED_DEADLINE */ |
| 248 | ), |
| 249 | |
| 250 | TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d" , |
| 251 | __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, |
| 252 | |
| 253 | (__entry->prev_state & (TASK_REPORT_MAX - 1)) ? |
| 254 | __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|" , |
| 255 | { TASK_INTERRUPTIBLE, "S" }, |
| 256 | { TASK_UNINTERRUPTIBLE, "D" }, |
| 257 | { __TASK_STOPPED, "T" }, |
| 258 | { __TASK_TRACED, "t" }, |
| 259 | { EXIT_DEAD, "X" }, |
| 260 | { EXIT_ZOMBIE, "Z" }, |
| 261 | { TASK_PARKED, "P" }, |
| 262 | { TASK_DEAD, "I" }) : |
| 263 | "R" , |
| 264 | |
| 265 | __entry->prev_state & TASK_REPORT_MAX ? "+" : "" , |
| 266 | __entry->next_comm, __entry->next_pid, __entry->next_prio) |
| 267 | ); |
| 268 | |
| 269 | /* |
| 270 | * Tracepoint for a task being migrated: |
| 271 | */ |
| 272 | TRACE_EVENT(sched_migrate_task, |
| 273 | |
| 274 | TP_PROTO(struct task_struct *p, int dest_cpu), |
| 275 | |
| 276 | TP_ARGS(p, dest_cpu), |
| 277 | |
| 278 | TP_STRUCT__entry( |
| 279 | __string( comm, p->comm ) |
| 280 | __field( pid_t, pid ) |
| 281 | __field( int, prio ) |
| 282 | __field( int, orig_cpu ) |
| 283 | __field( int, dest_cpu ) |
| 284 | ), |
| 285 | |
| 286 | TP_fast_assign( |
| 287 | __assign_str(comm); |
| 288 | __entry->pid = p->pid; |
| 289 | __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ |
| 290 | __entry->orig_cpu = task_cpu(p); |
| 291 | __entry->dest_cpu = dest_cpu; |
| 292 | ), |
| 293 | |
| 294 | TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d" , |
| 295 | __get_str(comm), __entry->pid, __entry->prio, |
| 296 | __entry->orig_cpu, __entry->dest_cpu) |
| 297 | ); |
| 298 | |
| 299 | DECLARE_EVENT_CLASS(sched_process_template, |
| 300 | |
| 301 | TP_PROTO(struct task_struct *p), |
| 302 | |
| 303 | TP_ARGS(p), |
| 304 | |
| 305 | TP_STRUCT__entry( |
| 306 | __string( comm, p->comm ) |
| 307 | __field( pid_t, pid ) |
| 308 | __field( int, prio ) |
| 309 | ), |
| 310 | |
| 311 | TP_fast_assign( |
| 312 | __assign_str(comm); |
| 313 | __entry->pid = p->pid; |
| 314 | __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ |
| 315 | ), |
| 316 | |
| 317 | TP_printk("comm=%s pid=%d prio=%d" , |
| 318 | __get_str(comm), __entry->pid, __entry->prio) |
| 319 | ); |
| 320 | |
| 321 | /* |
| 322 | * Tracepoint for freeing a task: |
| 323 | */ |
| 324 | DEFINE_EVENT(sched_process_template, sched_process_free, |
| 325 | TP_PROTO(struct task_struct *p), |
| 326 | TP_ARGS(p)); |
| 327 | |
| 328 | /* |
| 329 | * Tracepoint for a task exiting. |
| 330 | * Note, it's a superset of sched_process_template and should be kept |
| 331 | * compatible as much as possible. sched_process_exits has an extra |
| 332 | * `group_dead` argument, so sched_process_template can't be used, |
| 333 | * unfortunately, just like sched_migrate_task above. |
| 334 | */ |
| 335 | TRACE_EVENT(sched_process_exit, |
| 336 | |
| 337 | TP_PROTO(struct task_struct *p, bool group_dead), |
| 338 | |
| 339 | TP_ARGS(p, group_dead), |
| 340 | |
| 341 | TP_STRUCT__entry( |
| 342 | __array( char, comm, TASK_COMM_LEN ) |
| 343 | __field( pid_t, pid ) |
| 344 | __field( int, prio ) |
| 345 | __field( bool, group_dead ) |
| 346 | ), |
| 347 | |
| 348 | TP_fast_assign( |
| 349 | memcpy(__entry->comm, p->comm, TASK_COMM_LEN); |
| 350 | __entry->pid = p->pid; |
| 351 | __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ |
| 352 | __entry->group_dead = group_dead; |
| 353 | ), |
| 354 | |
| 355 | TP_printk("comm=%s pid=%d prio=%d group_dead=%s" , |
| 356 | __entry->comm, __entry->pid, __entry->prio, |
| 357 | __entry->group_dead ? "true" : "false" |
| 358 | ) |
| 359 | ); |
| 360 | |
| 361 | /* |
| 362 | * Tracepoint for waiting on task to unschedule: |
| 363 | */ |
| 364 | DEFINE_EVENT(sched_process_template, sched_wait_task, |
| 365 | TP_PROTO(struct task_struct *p), |
| 366 | TP_ARGS(p)); |
| 367 | |
| 368 | /* |
| 369 | * Tracepoint for a waiting task: |
| 370 | */ |
| 371 | TRACE_EVENT(sched_process_wait, |
| 372 | |
| 373 | TP_PROTO(struct pid *pid), |
| 374 | |
| 375 | TP_ARGS(pid), |
| 376 | |
| 377 | TP_STRUCT__entry( |
| 378 | __string( comm, current->comm ) |
| 379 | __field( pid_t, pid ) |
| 380 | __field( int, prio ) |
| 381 | ), |
| 382 | |
| 383 | TP_fast_assign( |
| 384 | __assign_str(comm); |
| 385 | __entry->pid = pid_nr(pid); |
| 386 | __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ |
| 387 | ), |
| 388 | |
| 389 | TP_printk("comm=%s pid=%d prio=%d" , |
| 390 | __get_str(comm), __entry->pid, __entry->prio) |
| 391 | ); |
| 392 | |
| 393 | /* |
| 394 | * Tracepoint for kernel_clone: |
| 395 | */ |
| 396 | TRACE_EVENT(sched_process_fork, |
| 397 | |
| 398 | TP_PROTO(struct task_struct *parent, struct task_struct *child), |
| 399 | |
| 400 | TP_ARGS(parent, child), |
| 401 | |
| 402 | TP_STRUCT__entry( |
| 403 | __string( parent_comm, parent->comm ) |
| 404 | __field( pid_t, parent_pid ) |
| 405 | __string( child_comm, child->comm ) |
| 406 | __field( pid_t, child_pid ) |
| 407 | ), |
| 408 | |
| 409 | TP_fast_assign( |
| 410 | __assign_str(parent_comm); |
| 411 | __entry->parent_pid = parent->pid; |
| 412 | __assign_str(child_comm); |
| 413 | __entry->child_pid = child->pid; |
| 414 | ), |
| 415 | |
| 416 | TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d" , |
| 417 | __get_str(parent_comm), __entry->parent_pid, |
| 418 | __get_str(child_comm), __entry->child_pid) |
| 419 | ); |
| 420 | |
| 421 | /* |
| 422 | * Tracepoint for exec: |
| 423 | */ |
| 424 | TRACE_EVENT(sched_process_exec, |
| 425 | |
| 426 | TP_PROTO(struct task_struct *p, pid_t old_pid, |
| 427 | struct linux_binprm *bprm), |
| 428 | |
| 429 | TP_ARGS(p, old_pid, bprm), |
| 430 | |
| 431 | TP_STRUCT__entry( |
| 432 | __string( filename, bprm->filename ) |
| 433 | __field( pid_t, pid ) |
| 434 | __field( pid_t, old_pid ) |
| 435 | ), |
| 436 | |
| 437 | TP_fast_assign( |
| 438 | __assign_str(filename); |
| 439 | __entry->pid = p->pid; |
| 440 | __entry->old_pid = old_pid; |
| 441 | ), |
| 442 | |
| 443 | TP_printk("filename=%s pid=%d old_pid=%d" , __get_str(filename), |
| 444 | __entry->pid, __entry->old_pid) |
| 445 | ); |
| 446 | |
| 447 | /** |
| 448 | * sched_prepare_exec - called before setting up new exec |
| 449 | * @task: pointer to the current task |
| 450 | * @bprm: pointer to linux_binprm used for new exec |
| 451 | * |
| 452 | * Called before flushing the old exec, where @task is still unchanged, but at |
| 453 | * the point of no return during switching to the new exec. At the point it is |
| 454 | * called the exec will either succeed, or on failure terminate the task. Also |
| 455 | * see the "sched_process_exec" tracepoint, which is called right after @task |
| 456 | * has successfully switched to the new exec. |
| 457 | */ |
| 458 | TRACE_EVENT(sched_prepare_exec, |
| 459 | |
| 460 | TP_PROTO(struct task_struct *task, struct linux_binprm *bprm), |
| 461 | |
| 462 | TP_ARGS(task, bprm), |
| 463 | |
| 464 | TP_STRUCT__entry( |
| 465 | __string( interp, bprm->interp ) |
| 466 | __string( filename, bprm->filename ) |
| 467 | __field( pid_t, pid ) |
| 468 | __string( comm, task->comm ) |
| 469 | ), |
| 470 | |
| 471 | TP_fast_assign( |
| 472 | __assign_str(interp); |
| 473 | __assign_str(filename); |
| 474 | __entry->pid = task->pid; |
| 475 | __assign_str(comm); |
| 476 | ), |
| 477 | |
| 478 | TP_printk("interp=%s filename=%s pid=%d comm=%s" , |
| 479 | __get_str(interp), __get_str(filename), |
| 480 | __entry->pid, __get_str(comm)) |
| 481 | ); |
| 482 | |
| 483 | #ifdef CONFIG_SCHEDSTATS |
| 484 | #define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT |
| 485 | #define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS |
| 486 | #else |
| 487 | #define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT_NOP |
| 488 | #define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS_NOP |
| 489 | #endif |
| 490 | |
| 491 | /* |
| 492 | * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE |
| 493 | * adding sched_stat support to SCHED_FIFO/RR would be welcome. |
| 494 | */ |
| 495 | DECLARE_EVENT_CLASS_SCHEDSTAT(sched_stat_template, |
| 496 | |
| 497 | TP_PROTO(struct task_struct *tsk, u64 delay), |
| 498 | |
| 499 | TP_ARGS(__perf_task(tsk), __perf_count(delay)), |
| 500 | |
| 501 | TP_STRUCT__entry( |
| 502 | __string( comm, tsk->comm ) |
| 503 | __field( pid_t, pid ) |
| 504 | __field( u64, delay ) |
| 505 | ), |
| 506 | |
| 507 | TP_fast_assign( |
| 508 | __assign_str(comm); |
| 509 | __entry->pid = tsk->pid; |
| 510 | __entry->delay = delay; |
| 511 | ), |
| 512 | |
| 513 | TP_printk("comm=%s pid=%d delay=%Lu [ns]" , |
| 514 | __get_str(comm), __entry->pid, |
| 515 | (unsigned long long)__entry->delay) |
| 516 | ); |
| 517 | |
| 518 | /* |
| 519 | * Tracepoint for accounting wait time (time the task is runnable |
| 520 | * but not actually running due to scheduler contention). |
| 521 | */ |
| 522 | DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_wait, |
| 523 | TP_PROTO(struct task_struct *tsk, u64 delay), |
| 524 | TP_ARGS(tsk, delay)); |
| 525 | |
| 526 | /* |
| 527 | * Tracepoint for accounting sleep time (time the task is not runnable, |
| 528 | * including iowait, see below). |
| 529 | */ |
| 530 | DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_sleep, |
| 531 | TP_PROTO(struct task_struct *tsk, u64 delay), |
| 532 | TP_ARGS(tsk, delay)); |
| 533 | |
| 534 | /* |
| 535 | * Tracepoint for accounting iowait time (time the task is not runnable |
| 536 | * due to waiting on IO to complete). |
| 537 | */ |
| 538 | DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_iowait, |
| 539 | TP_PROTO(struct task_struct *tsk, u64 delay), |
| 540 | TP_ARGS(tsk, delay)); |
| 541 | |
| 542 | /* |
| 543 | * Tracepoint for accounting blocked time (time the task is in uninterruptible). |
| 544 | */ |
| 545 | DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked, |
| 546 | TP_PROTO(struct task_struct *tsk, u64 delay), |
| 547 | TP_ARGS(tsk, delay)); |
| 548 | |
| 549 | /* |
| 550 | * Tracepoint for accounting runtime (time the task is executing |
| 551 | * on a CPU). |
| 552 | */ |
| 553 | DECLARE_EVENT_CLASS(sched_stat_runtime, |
| 554 | |
| 555 | TP_PROTO(struct task_struct *tsk, u64 runtime), |
| 556 | |
| 557 | TP_ARGS(tsk, __perf_count(runtime)), |
| 558 | |
| 559 | TP_STRUCT__entry( |
| 560 | __string( comm, tsk->comm ) |
| 561 | __field( pid_t, pid ) |
| 562 | __field( u64, runtime ) |
| 563 | ), |
| 564 | |
| 565 | TP_fast_assign( |
| 566 | __assign_str(comm); |
| 567 | __entry->pid = tsk->pid; |
| 568 | __entry->runtime = runtime; |
| 569 | ), |
| 570 | |
| 571 | TP_printk("comm=%s pid=%d runtime=%Lu [ns]" , |
| 572 | __get_str(comm), __entry->pid, |
| 573 | (unsigned long long)__entry->runtime) |
| 574 | ); |
| 575 | |
| 576 | DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, |
| 577 | TP_PROTO(struct task_struct *tsk, u64 runtime), |
| 578 | TP_ARGS(tsk, runtime)); |
| 579 | |
| 580 | /* |
| 581 | * Tracepoint for showing priority inheritance modifying a tasks |
| 582 | * priority. |
| 583 | */ |
| 584 | TRACE_EVENT(sched_pi_setprio, |
| 585 | |
| 586 | TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), |
| 587 | |
| 588 | TP_ARGS(tsk, pi_task), |
| 589 | |
| 590 | TP_STRUCT__entry( |
| 591 | __string( comm, tsk->comm ) |
| 592 | __field( pid_t, pid ) |
| 593 | __field( int, oldprio ) |
| 594 | __field( int, newprio ) |
| 595 | ), |
| 596 | |
| 597 | TP_fast_assign( |
| 598 | __assign_str(comm); |
| 599 | __entry->pid = tsk->pid; |
| 600 | __entry->oldprio = tsk->prio; |
| 601 | __entry->newprio = pi_task ? |
| 602 | min(tsk->normal_prio, pi_task->prio) : |
| 603 | tsk->normal_prio; |
| 604 | /* XXX SCHED_DEADLINE bits missing */ |
| 605 | ), |
| 606 | |
| 607 | TP_printk("comm=%s pid=%d oldprio=%d newprio=%d" , |
| 608 | __get_str(comm), __entry->pid, |
| 609 | __entry->oldprio, __entry->newprio) |
| 610 | ); |
| 611 | |
| 612 | #ifdef CONFIG_DETECT_HUNG_TASK |
| 613 | TRACE_EVENT(sched_process_hang, |
| 614 | TP_PROTO(struct task_struct *tsk), |
| 615 | TP_ARGS(tsk), |
| 616 | |
| 617 | TP_STRUCT__entry( |
| 618 | __string( comm, tsk->comm ) |
| 619 | __field( pid_t, pid ) |
| 620 | ), |
| 621 | |
| 622 | TP_fast_assign( |
| 623 | __assign_str(comm); |
| 624 | __entry->pid = tsk->pid; |
| 625 | ), |
| 626 | |
| 627 | TP_printk("comm=%s pid=%d" , __get_str(comm), __entry->pid) |
| 628 | ); |
| 629 | #endif /* CONFIG_DETECT_HUNG_TASK */ |
| 630 | |
| 631 | /* |
| 632 | * Tracks migration of tasks from one runqueue to another. Can be used to |
| 633 | * detect if automatic NUMA balancing is bouncing between nodes. |
| 634 | */ |
| 635 | TRACE_EVENT(sched_move_numa, |
| 636 | |
| 637 | TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), |
| 638 | |
| 639 | TP_ARGS(tsk, src_cpu, dst_cpu), |
| 640 | |
| 641 | TP_STRUCT__entry( |
| 642 | __field( pid_t, pid ) |
| 643 | __field( pid_t, tgid ) |
| 644 | __field( pid_t, ngid ) |
| 645 | __field( int, src_cpu ) |
| 646 | __field( int, src_nid ) |
| 647 | __field( int, dst_cpu ) |
| 648 | __field( int, dst_nid ) |
| 649 | ), |
| 650 | |
| 651 | TP_fast_assign( |
| 652 | __entry->pid = task_pid_nr(tsk); |
| 653 | __entry->tgid = task_tgid_nr(tsk); |
| 654 | __entry->ngid = task_numa_group_id(tsk); |
| 655 | __entry->src_cpu = src_cpu; |
| 656 | __entry->src_nid = cpu_to_node(src_cpu); |
| 657 | __entry->dst_cpu = dst_cpu; |
| 658 | __entry->dst_nid = cpu_to_node(dst_cpu); |
| 659 | ), |
| 660 | |
| 661 | TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d" , |
| 662 | __entry->pid, __entry->tgid, __entry->ngid, |
| 663 | __entry->src_cpu, __entry->src_nid, |
| 664 | __entry->dst_cpu, __entry->dst_nid) |
| 665 | ); |
| 666 | |
| 667 | DECLARE_EVENT_CLASS(sched_numa_pair_template, |
| 668 | |
| 669 | TP_PROTO(struct task_struct *src_tsk, int src_cpu, |
| 670 | struct task_struct *dst_tsk, int dst_cpu), |
| 671 | |
| 672 | TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu), |
| 673 | |
| 674 | TP_STRUCT__entry( |
| 675 | __field( pid_t, src_pid ) |
| 676 | __field( pid_t, src_tgid ) |
| 677 | __field( pid_t, src_ngid ) |
| 678 | __field( int, src_cpu ) |
| 679 | __field( int, src_nid ) |
| 680 | __field( pid_t, dst_pid ) |
| 681 | __field( pid_t, dst_tgid ) |
| 682 | __field( pid_t, dst_ngid ) |
| 683 | __field( int, dst_cpu ) |
| 684 | __field( int, dst_nid ) |
| 685 | ), |
| 686 | |
| 687 | TP_fast_assign( |
| 688 | __entry->src_pid = task_pid_nr(src_tsk); |
| 689 | __entry->src_tgid = task_tgid_nr(src_tsk); |
| 690 | __entry->src_ngid = task_numa_group_id(src_tsk); |
| 691 | __entry->src_cpu = src_cpu; |
| 692 | __entry->src_nid = cpu_to_node(src_cpu); |
| 693 | __entry->dst_pid = dst_tsk ? task_pid_nr(dst_tsk) : 0; |
| 694 | __entry->dst_tgid = dst_tsk ? task_tgid_nr(dst_tsk) : 0; |
| 695 | __entry->dst_ngid = dst_tsk ? task_numa_group_id(dst_tsk) : 0; |
| 696 | __entry->dst_cpu = dst_cpu; |
| 697 | __entry->dst_nid = dst_cpu >= 0 ? cpu_to_node(dst_cpu) : -1; |
| 698 | ), |
| 699 | |
| 700 | TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d" , |
| 701 | __entry->src_pid, __entry->src_tgid, __entry->src_ngid, |
| 702 | __entry->src_cpu, __entry->src_nid, |
| 703 | __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid, |
| 704 | __entry->dst_cpu, __entry->dst_nid) |
| 705 | ); |
| 706 | |
| 707 | DEFINE_EVENT(sched_numa_pair_template, sched_stick_numa, |
| 708 | |
| 709 | TP_PROTO(struct task_struct *src_tsk, int src_cpu, |
| 710 | struct task_struct *dst_tsk, int dst_cpu), |
| 711 | |
| 712 | TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu) |
| 713 | ); |
| 714 | |
| 715 | DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, |
| 716 | |
| 717 | TP_PROTO(struct task_struct *src_tsk, int src_cpu, |
| 718 | struct task_struct *dst_tsk, int dst_cpu), |
| 719 | |
| 720 | TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu) |
| 721 | ); |
| 722 | |
| 723 | #ifdef CONFIG_NUMA_BALANCING |
| 724 | #define NUMAB_SKIP_REASON \ |
| 725 | EM( NUMAB_SKIP_UNSUITABLE, "unsuitable" ) \ |
| 726 | EM( NUMAB_SKIP_SHARED_RO, "shared_ro" ) \ |
| 727 | EM( NUMAB_SKIP_INACCESSIBLE, "inaccessible" ) \ |
| 728 | EM( NUMAB_SKIP_SCAN_DELAY, "scan_delay" ) \ |
| 729 | EM( NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) \ |
| 730 | EM( NUMAB_SKIP_IGNORE_PID, "ignore_pid_inactive" ) \ |
| 731 | EMe(NUMAB_SKIP_SEQ_COMPLETED, "seq_completed" ) |
| 732 | |
| 733 | /* Redefine for export. */ |
| 734 | #undef EM |
| 735 | #undef EMe |
| 736 | #define EM(a, b) TRACE_DEFINE_ENUM(a); |
| 737 | #define EMe(a, b) TRACE_DEFINE_ENUM(a); |
| 738 | |
| 739 | NUMAB_SKIP_REASON |
| 740 | |
| 741 | /* Redefine for symbolic printing. */ |
| 742 | #undef EM |
| 743 | #undef EMe |
| 744 | #define EM(a, b) { a, b }, |
| 745 | #define EMe(a, b) { a, b } |
| 746 | |
| 747 | TRACE_EVENT(sched_skip_vma_numa, |
| 748 | |
| 749 | TP_PROTO(struct mm_struct *mm, struct vm_area_struct *vma, |
| 750 | enum numa_vmaskip_reason reason), |
| 751 | |
| 752 | TP_ARGS(mm, vma, reason), |
| 753 | |
| 754 | TP_STRUCT__entry( |
| 755 | __field(unsigned long, numa_scan_offset) |
| 756 | __field(unsigned long, vm_start) |
| 757 | __field(unsigned long, vm_end) |
| 758 | __field(enum numa_vmaskip_reason, reason) |
| 759 | ), |
| 760 | |
| 761 | TP_fast_assign( |
| 762 | __entry->numa_scan_offset = mm->numa_scan_offset; |
| 763 | __entry->vm_start = vma->vm_start; |
| 764 | __entry->vm_end = vma->vm_end; |
| 765 | __entry->reason = reason; |
| 766 | ), |
| 767 | |
| 768 | TP_printk("numa_scan_offset=%lX vm_start=%lX vm_end=%lX reason=%s" , |
| 769 | __entry->numa_scan_offset, |
| 770 | __entry->vm_start, |
| 771 | __entry->vm_end, |
| 772 | __print_symbolic(__entry->reason, NUMAB_SKIP_REASON)) |
| 773 | ); |
| 774 | |
| 775 | TRACE_EVENT(sched_skip_cpuset_numa, |
| 776 | |
| 777 | TP_PROTO(struct task_struct *tsk, nodemask_t *mem_allowed_ptr), |
| 778 | |
| 779 | TP_ARGS(tsk, mem_allowed_ptr), |
| 780 | |
| 781 | TP_STRUCT__entry( |
| 782 | __array( char, comm, TASK_COMM_LEN ) |
| 783 | __field( pid_t, pid ) |
| 784 | __field( pid_t, tgid ) |
| 785 | __field( pid_t, ngid ) |
| 786 | __array( unsigned long, mem_allowed, BITS_TO_LONGS(MAX_NUMNODES)) |
| 787 | ), |
| 788 | |
| 789 | TP_fast_assign( |
| 790 | memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); |
| 791 | __entry->pid = task_pid_nr(tsk); |
| 792 | __entry->tgid = task_tgid_nr(tsk); |
| 793 | __entry->ngid = task_numa_group_id(tsk); |
| 794 | BUILD_BUG_ON(sizeof(nodemask_t) != \ |
| 795 | BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long)); |
| 796 | memcpy(__entry->mem_allowed, mem_allowed_ptr->bits, |
| 797 | sizeof(__entry->mem_allowed)); |
| 798 | ), |
| 799 | |
| 800 | TP_printk("comm=%s pid=%d tgid=%d ngid=%d mem_nodes_allowed=%*pbl" , |
| 801 | __entry->comm, |
| 802 | __entry->pid, |
| 803 | __entry->tgid, |
| 804 | __entry->ngid, |
| 805 | MAX_NUMNODES, __entry->mem_allowed) |
| 806 | ); |
| 807 | #endif /* CONFIG_NUMA_BALANCING */ |
| 808 | |
| 809 | /* |
| 810 | * Tracepoint for waking a polling cpu without an IPI. |
| 811 | */ |
| 812 | TRACE_EVENT(sched_wake_idle_without_ipi, |
| 813 | |
| 814 | TP_PROTO(int cpu), |
| 815 | |
| 816 | TP_ARGS(cpu), |
| 817 | |
| 818 | TP_STRUCT__entry( |
| 819 | __field( int, cpu ) |
| 820 | ), |
| 821 | |
| 822 | TP_fast_assign( |
| 823 | __entry->cpu = cpu; |
| 824 | ), |
| 825 | |
| 826 | TP_printk("cpu=%d" , __entry->cpu) |
| 827 | ); |
| 828 | |
| 829 | /* |
| 830 | * Following tracepoints are not exported in tracefs and provide hooking |
| 831 | * mechanisms only for testing and debugging purposes. |
| 832 | * |
| 833 | * Postfixed with _tp to make them easily identifiable in the code. |
| 834 | */ |
| 835 | DECLARE_TRACE(pelt_cfs, |
| 836 | TP_PROTO(struct cfs_rq *cfs_rq), |
| 837 | TP_ARGS(cfs_rq)); |
| 838 | |
| 839 | DECLARE_TRACE(pelt_rt, |
| 840 | TP_PROTO(struct rq *rq), |
| 841 | TP_ARGS(rq)); |
| 842 | |
| 843 | DECLARE_TRACE(pelt_dl, |
| 844 | TP_PROTO(struct rq *rq), |
| 845 | TP_ARGS(rq)); |
| 846 | |
| 847 | DECLARE_TRACE(pelt_hw, |
| 848 | TP_PROTO(struct rq *rq), |
| 849 | TP_ARGS(rq)); |
| 850 | |
| 851 | DECLARE_TRACE(pelt_irq, |
| 852 | TP_PROTO(struct rq *rq), |
| 853 | TP_ARGS(rq)); |
| 854 | |
| 855 | DECLARE_TRACE(pelt_se, |
| 856 | TP_PROTO(struct sched_entity *se), |
| 857 | TP_ARGS(se)); |
| 858 | |
| 859 | DECLARE_TRACE(sched_cpu_capacity, |
| 860 | TP_PROTO(struct rq *rq), |
| 861 | TP_ARGS(rq)); |
| 862 | |
| 863 | DECLARE_TRACE(sched_overutilized, |
| 864 | TP_PROTO(struct root_domain *rd, bool overutilized), |
| 865 | TP_ARGS(rd, overutilized)); |
| 866 | |
| 867 | DECLARE_TRACE(sched_util_est_cfs, |
| 868 | TP_PROTO(struct cfs_rq *cfs_rq), |
| 869 | TP_ARGS(cfs_rq)); |
| 870 | |
| 871 | DECLARE_TRACE(sched_util_est_se, |
| 872 | TP_PROTO(struct sched_entity *se), |
| 873 | TP_ARGS(se)); |
| 874 | |
| 875 | DECLARE_TRACE(sched_update_nr_running, |
| 876 | TP_PROTO(struct rq *rq, int change), |
| 877 | TP_ARGS(rq, change)); |
| 878 | |
| 879 | DECLARE_TRACE(sched_compute_energy, |
| 880 | TP_PROTO(struct task_struct *p, int dst_cpu, unsigned long energy, |
| 881 | unsigned long max_util, unsigned long busy_time), |
| 882 | TP_ARGS(p, dst_cpu, energy, max_util, busy_time)); |
| 883 | |
| 884 | DECLARE_TRACE(sched_entry, |
| 885 | TP_PROTO(bool preempt, unsigned long ip), |
| 886 | TP_ARGS(preempt, ip)); |
| 887 | |
| 888 | DECLARE_TRACE(sched_exit, |
| 889 | TP_PROTO(bool is_switch, unsigned long ip), |
| 890 | TP_ARGS(is_switch, ip)); |
| 891 | |
| 892 | DECLARE_TRACE_CONDITION(sched_set_state, |
| 893 | TP_PROTO(struct task_struct *tsk, int state), |
| 894 | TP_ARGS(tsk, state), |
| 895 | TP_CONDITION(!!(tsk->__state) != !!state)); |
| 896 | |
| 897 | #endif /* _TRACE_SCHED_H */ |
| 898 | |
| 899 | /* This part must be outside protection */ |
| 900 | #include <trace/define_trace.h> |
| 901 | |