f977bb4937
This allows more precise tracking of how the scheduler accounts (and acts upon) a task having spent N nanoseconds of CPU time. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
478 lines
10 KiB
C
478 lines
10 KiB
C
#undef TRACE_SYSTEM
|
|
#define TRACE_SYSTEM sched
|
|
|
|
#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
|
|
#define _TRACE_SCHED_H
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/tracepoint.h>
|
|
|
|
/*
|
|
* Tracepoint for calling kthread_stop, performed to end a kthread:
|
|
*/
|
|
TRACE_EVENT(sched_kthread_stop,
|
|
|
|
TP_PROTO(struct task_struct *t),
|
|
|
|
TP_ARGS(t),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
__field( pid_t, pid )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
|
|
__entry->pid = t->pid;
|
|
),
|
|
|
|
TP_printk("task %s:%d", __entry->comm, __entry->pid)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for the return value of the kthread stopping:
|
|
*/
|
|
TRACE_EVENT(sched_kthread_stop_ret,
|
|
|
|
TP_PROTO(int ret),
|
|
|
|
TP_ARGS(ret),
|
|
|
|
TP_STRUCT__entry(
|
|
__field( int, ret )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->ret = ret;
|
|
),
|
|
|
|
TP_printk("ret %d", __entry->ret)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for waiting on task to unschedule:
|
|
*
|
|
* (NOTE: the 'rq' argument is not used by generic trace events,
|
|
* but used by the latency tracer plugin. )
|
|
*/
|
|
TRACE_EVENT(sched_wait_task,
|
|
|
|
TP_PROTO(struct rq *rq, struct task_struct *p),
|
|
|
|
TP_ARGS(rq, p),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
__field( pid_t, pid )
|
|
__field( int, prio )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
__entry->pid = p->pid;
|
|
__entry->prio = p->prio;
|
|
),
|
|
|
|
TP_printk("task %s:%d [%d]",
|
|
__entry->comm, __entry->pid, __entry->prio)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for waking up a task:
|
|
*
|
|
* (NOTE: the 'rq' argument is not used by generic trace events,
|
|
* but used by the latency tracer plugin. )
|
|
*/
|
|
TRACE_EVENT(sched_wakeup,
|
|
|
|
TP_PROTO(struct rq *rq, struct task_struct *p, int success),
|
|
|
|
TP_ARGS(rq, p, success),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
__field( pid_t, pid )
|
|
__field( int, prio )
|
|
__field( int, success )
|
|
__field( int, cpu )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
__entry->pid = p->pid;
|
|
__entry->prio = p->prio;
|
|
__entry->success = success;
|
|
__entry->cpu = task_cpu(p);
|
|
),
|
|
|
|
TP_printk("task %s:%d [%d] success=%d [%03d]",
|
|
__entry->comm, __entry->pid, __entry->prio,
|
|
__entry->success, __entry->cpu)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for waking up a new task:
|
|
*
|
|
* (NOTE: the 'rq' argument is not used by generic trace events,
|
|
* but used by the latency tracer plugin. )
|
|
*/
|
|
TRACE_EVENT(sched_wakeup_new,
|
|
|
|
TP_PROTO(struct rq *rq, struct task_struct *p, int success),
|
|
|
|
TP_ARGS(rq, p, success),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
__field( pid_t, pid )
|
|
__field( int, prio )
|
|
__field( int, success )
|
|
__field( int, cpu )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
__entry->pid = p->pid;
|
|
__entry->prio = p->prio;
|
|
__entry->success = success;
|
|
__entry->cpu = task_cpu(p);
|
|
),
|
|
|
|
TP_printk("task %s:%d [%d] success=%d [%03d]",
|
|
__entry->comm, __entry->pid, __entry->prio,
|
|
__entry->success, __entry->cpu)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for task switches, performed by the scheduler:
|
|
*
|
|
* (NOTE: the 'rq' argument is not used by generic trace events,
|
|
* but used by the latency tracer plugin. )
|
|
*/
|
|
TRACE_EVENT(sched_switch,
|
|
|
|
TP_PROTO(struct rq *rq, struct task_struct *prev,
|
|
struct task_struct *next),
|
|
|
|
TP_ARGS(rq, prev, next),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, prev_comm, TASK_COMM_LEN )
|
|
__field( pid_t, prev_pid )
|
|
__field( int, prev_prio )
|
|
__field( long, prev_state )
|
|
__array( char, next_comm, TASK_COMM_LEN )
|
|
__field( pid_t, next_pid )
|
|
__field( int, next_prio )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
|
|
__entry->prev_pid = prev->pid;
|
|
__entry->prev_prio = prev->prio;
|
|
__entry->prev_state = prev->state;
|
|
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
|
|
__entry->next_pid = next->pid;
|
|
__entry->next_prio = next->prio;
|
|
),
|
|
|
|
TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]",
|
|
__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
|
|
__entry->prev_state ?
|
|
__print_flags(__entry->prev_state, "|",
|
|
{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
|
|
{ 16, "Z" }, { 32, "X" }, { 64, "x" },
|
|
{ 128, "W" }) : "R",
|
|
__entry->next_comm, __entry->next_pid, __entry->next_prio)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for a task being migrated:
|
|
*/
|
|
TRACE_EVENT(sched_migrate_task,
|
|
|
|
TP_PROTO(struct task_struct *p, int dest_cpu),
|
|
|
|
TP_ARGS(p, dest_cpu),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
__field( pid_t, pid )
|
|
__field( int, prio )
|
|
__field( int, orig_cpu )
|
|
__field( int, dest_cpu )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
__entry->pid = p->pid;
|
|
__entry->prio = p->prio;
|
|
__entry->orig_cpu = task_cpu(p);
|
|
__entry->dest_cpu = dest_cpu;
|
|
),
|
|
|
|
TP_printk("task %s:%d [%d] from: %d to: %d",
|
|
__entry->comm, __entry->pid, __entry->prio,
|
|
__entry->orig_cpu, __entry->dest_cpu)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for freeing a task:
|
|
*/
|
|
TRACE_EVENT(sched_process_free,
|
|
|
|
TP_PROTO(struct task_struct *p),
|
|
|
|
TP_ARGS(p),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
__field( pid_t, pid )
|
|
__field( int, prio )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
__entry->pid = p->pid;
|
|
__entry->prio = p->prio;
|
|
),
|
|
|
|
TP_printk("task %s:%d [%d]",
|
|
__entry->comm, __entry->pid, __entry->prio)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for a task exiting:
|
|
*/
|
|
TRACE_EVENT(sched_process_exit,
|
|
|
|
TP_PROTO(struct task_struct *p),
|
|
|
|
TP_ARGS(p),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
__field( pid_t, pid )
|
|
__field( int, prio )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
__entry->pid = p->pid;
|
|
__entry->prio = p->prio;
|
|
),
|
|
|
|
TP_printk("task %s:%d [%d]",
|
|
__entry->comm, __entry->pid, __entry->prio)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for a waiting task:
|
|
*/
|
|
TRACE_EVENT(sched_process_wait,
|
|
|
|
TP_PROTO(struct pid *pid),
|
|
|
|
TP_ARGS(pid),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
__field( pid_t, pid )
|
|
__field( int, prio )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
|
|
__entry->pid = pid_nr(pid);
|
|
__entry->prio = current->prio;
|
|
),
|
|
|
|
TP_printk("task %s:%d [%d]",
|
|
__entry->comm, __entry->pid, __entry->prio)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for do_fork:
|
|
*/
|
|
TRACE_EVENT(sched_process_fork,
|
|
|
|
TP_PROTO(struct task_struct *parent, struct task_struct *child),
|
|
|
|
TP_ARGS(parent, child),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, parent_comm, TASK_COMM_LEN )
|
|
__field( pid_t, parent_pid )
|
|
__array( char, child_comm, TASK_COMM_LEN )
|
|
__field( pid_t, child_pid )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
|
|
__entry->parent_pid = parent->pid;
|
|
memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
|
|
__entry->child_pid = child->pid;
|
|
),
|
|
|
|
TP_printk("parent %s:%d child %s:%d",
|
|
__entry->parent_comm, __entry->parent_pid,
|
|
__entry->child_comm, __entry->child_pid)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for sending a signal:
|
|
*/
|
|
TRACE_EVENT(sched_signal_send,
|
|
|
|
TP_PROTO(int sig, struct task_struct *p),
|
|
|
|
TP_ARGS(sig, p),
|
|
|
|
TP_STRUCT__entry(
|
|
__field( int, sig )
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
__field( pid_t, pid )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
__entry->pid = p->pid;
|
|
__entry->sig = sig;
|
|
),
|
|
|
|
TP_printk("sig: %d task %s:%d",
|
|
__entry->sig, __entry->comm, __entry->pid)
|
|
);
|
|
|
|
/*
|
|
* XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
|
|
* adding sched_stat support to SCHED_FIFO/RR would be welcome.
|
|
*/
|
|
|
|
/*
|
|
* Tracepoint for accounting wait time (time the task is runnable
|
|
* but not actually running due to scheduler contention).
|
|
*/
|
|
TRACE_EVENT(sched_stat_wait,
|
|
|
|
TP_PROTO(struct task_struct *tsk, u64 delay),
|
|
|
|
TP_ARGS(tsk, delay),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
__field( pid_t, pid )
|
|
__field( u64, delay )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
|
|
__entry->pid = tsk->pid;
|
|
__entry->delay = delay;
|
|
)
|
|
TP_perf_assign(
|
|
__perf_count(delay);
|
|
),
|
|
|
|
TP_printk("task: %s:%d wait: %Lu [ns]",
|
|
__entry->comm, __entry->pid,
|
|
(unsigned long long)__entry->delay)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for accounting runtime (time the task is executing
|
|
* on a CPU).
|
|
*/
|
|
TRACE_EVENT(sched_stat_runtime,
|
|
|
|
TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
|
|
|
|
TP_ARGS(tsk, runtime, vruntime),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
__field( pid_t, pid )
|
|
__field( u64, runtime )
|
|
__field( u64, vruntime )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
|
|
__entry->pid = tsk->pid;
|
|
__entry->runtime = runtime;
|
|
__entry->vruntime = vruntime;
|
|
)
|
|
TP_perf_assign(
|
|
__perf_count(runtime);
|
|
),
|
|
|
|
TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]",
|
|
__entry->comm, __entry->pid,
|
|
(unsigned long long)__entry->runtime,
|
|
(unsigned long long)__entry->vruntime)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for accounting sleep time (time the task is not runnable,
|
|
* including iowait, see below).
|
|
*/
|
|
TRACE_EVENT(sched_stat_sleep,
|
|
|
|
TP_PROTO(struct task_struct *tsk, u64 delay),
|
|
|
|
TP_ARGS(tsk, delay),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
__field( pid_t, pid )
|
|
__field( u64, delay )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
|
|
__entry->pid = tsk->pid;
|
|
__entry->delay = delay;
|
|
)
|
|
TP_perf_assign(
|
|
__perf_count(delay);
|
|
),
|
|
|
|
TP_printk("task: %s:%d sleep: %Lu [ns]",
|
|
__entry->comm, __entry->pid,
|
|
(unsigned long long)__entry->delay)
|
|
);
|
|
|
|
/*
|
|
* Tracepoint for accounting iowait time (time the task is not runnable
|
|
* due to waiting on IO to complete).
|
|
*/
|
|
TRACE_EVENT(sched_stat_iowait,
|
|
|
|
TP_PROTO(struct task_struct *tsk, u64 delay),
|
|
|
|
TP_ARGS(tsk, delay),
|
|
|
|
TP_STRUCT__entry(
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
__field( pid_t, pid )
|
|
__field( u64, delay )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
|
|
__entry->pid = tsk->pid;
|
|
__entry->delay = delay;
|
|
)
|
|
TP_perf_assign(
|
|
__perf_count(delay);
|
|
),
|
|
|
|
TP_printk("task: %s:%d iowait: %Lu [ns]",
|
|
__entry->comm, __entry->pid,
|
|
(unsigned long long)__entry->delay)
|
|
);
|
|
|
|
#endif /* _TRACE_SCHED_H */
|
|
|
|
/* This part must be outside protection */
|
|
#include <trace/define_trace.h>
|