summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKiran Prakesh <kiran@linux.vnet.ibm.com>2009-10-01 22:39:32 +0530
committerJosh Stone <jistone@redhat.com>2009-10-02 16:29:56 -0700
commitae3072f460693e85962556bf0529a729c7d97bf6 (patch)
treeaab6a3db669060012aea4a5c6bdabe597eac6c74
parenta8f5a3bf344f9b014c5adf8b5eada10d09f31219 (diff)
downloadsystemtap-steved-ae3072f460693e85962556bf0529a729c7d97bf6.tar.gz
systemtap-steved-ae3072f460693e85962556bf0529a729c7d97bf6.tar.xz
systemtap-steved-ae3072f460693e85962556bf0529a729c7d97bf6.zip
Scheduler Tapset based on kernel tracepoints
This patch adds kernel tracepoints based probes to the scheduler tapset along with the testcase, scheduler-test-tracepoints.stp and an example script, sched_switch.stp. Signed-off-by: Kiran Prakash <kiran@linux.vnet.ibm.com> Signed-off-by: Josh Stone <jistone@redhat.com>
-rw-r--r--tapset/scheduler.stp353
-rw-r--r--testsuite/buildok/scheduler-test-tracepoints.stp51
-rw-r--r--testsuite/systemtap.examples/profiling/sched_switch.meta14
-rw-r--r--testsuite/systemtap.examples/profiling/sched_switch.stp62
4 files changed, 429 insertions, 51 deletions
diff --git a/tapset/scheduler.stp b/tapset/scheduler.stp
index 3c3d504e..b1911ac2 100644
--- a/tapset/scheduler.stp
+++ b/tapset/scheduler.stp
@@ -20,7 +20,7 @@ function __is_idle:long()
%}
-/* probe scheduler.cpu_off
+/** probe scheduler.cpu_off
*
* Fires when a process is about to stop running on a cpu.
*
@@ -41,7 +41,7 @@ probe scheduler.cpu_off
}
-/* probe scheduler.cpu_on
+/** probe scheduler.cpu_on
*
* Fires when a process is beginning execution on a cpu.
*
@@ -76,26 +76,6 @@ probe scheduler.tick = kernel.function("scheduler_tick")
idle = __is_idle()
}
-
-/* probe scheduler.migrate
- *
- * Fires whenever a task is moved to a different cpu's runqueue.
- *
- * Context:
- * Unknown (sometimes migration thread, sometimes cpu_to)
- *
- * Arguments:
- * task - the process that is being migrated
- * cpu_from - the cpu that is losing the task
- * cpu_to - the cpu that is claiming the task
- */
-probe scheduler.migrate = kernel.function("pull_task")? {
- task = $p
- cpu_from = task_cpu($p) /*thread_info renamed to stack since 2.6.22*/
- cpu_to = $this_cpu
-}
-
-
/* probe scheduler.balance
*
* Fires when a cpu attempts to find more work.
@@ -107,43 +87,314 @@ probe scheduler.migrate = kernel.function("pull_task")? {
probe scheduler.balance = kernel.function("idle_balance")? {}
-/* probe scheduler.ctxswitch
- *
- * Fires when there is a context switch
+/**
+ * probe scheduler.ctxswitch - Fires when there is a context switch. Currently
+ * systemTap can't access arguments of inline
+ * functions. So we choose to probe __switch_to instead
+ * of context_switch()
+ * @prev_pid: The pid of the process to be switched out
+ * @next_pid: The pid of the process to be switched in
+ * @prev_tid: The tid of the process to be switched out
+ * @next_tid: The tid of the process to be switched in
+ * @prev_task_name: The name of the process to be switched out
+ * @next_task_name: The name of the process to be switched in
+ * @prev_priority: The priority of the process to be switched out
+ * @next_priority: The priority of the process to be switched in
+ * @prevtsk_state: the state of the process to be switched out
+ * @nexttsk_state: the state of the process to be switched in
+ */
- * Currently systemTap can't access arguments of inline
- * functions. So we choose to probe __switch_to instead
- * of context_switch()
+probe __scheduler.ctxswitch.tp = kernel.trace("sched_switch")
+{
+ next_pid = $next->tgid
+ next_tid = $next->pid
+ next_task = $next
+ next_task_name = task_execname($next)
+ nexttsk_state = $next->state
+ next_priority = $next->prio
+ prev_priority = $prev->prio
+ prev_pid = $prev->tgid
+ prev_tid = $prev->pid
+ prev_task = $prev
+ prev_task_name = task_execname($prev)
+ prevtsk_state = $prev->state
+}
- * Arguments:
- * prev_pid: The pid of the process to be switched out
- * next_pid: The pid of the process to be switched in
- * prevtsk_state: the state of the process to be switched out
- */
-probe scheduler.ctxswitch =
+probe __scheduler.ctxswitch.kp =
%( arch != "x86_64" && arch != "ia64" %?
- kernel.trace("sched_switch") !, kernel.function("__switch_to")
+ kernel.function("__switch_to")
%:
- kernel.trace("sched_switch") !, kernel.function("context_switch")
+ kernel.function("context_switch")
%)
{
%( arch == "powerpc" %?
- prev_pid = $prev->pid
- next_pid = $new->pid
- prev_task = $prev
- next_task = $new
- prevtsk_state = $prev->state
+ prev_pid = $prev->tgid
+ next_pid = $new->tgid
+ prev_tid = $prev->pid
+ next_tid = $new->pid
+ prev_task = $prev
+ next_task = $new
+ next_priority = $new->prio
+ prev_priority = $prev->prio
+ prev_task_name = task_execname($prev)
+ next_task_name = task_execname($new)
+ prevtsk_state = $prev->state
+ nexttsk_state = $new->state
+
%: %( arch == "x86_64" || arch == "ia64" %?
- prev_pid = $prev->pid
- next_pid = $next->pid
- prev_task = $prev
- next_task = $next
- prevtsk_state = $prev->state
+ prev_pid = $prev->tgid
+ next_pid = $next->tgid
+ prev_tid = $prev->pid
+ next_tid = $next->pid
+ prev_task = $prev
+ next_task = $next
+ next_priority = $next->prio
+ prev_priority = $prev->prio
+ prev_task_name = task_execname($prev)
+ next_task_name = task_execname($next)
+ prevtsk_state = $prev->state
+ nexttsk_state = $next->state
%:
- prev_pid = $prev_p->pid
- next_pid = $next_p->pid
- prev_task = $prev_p
- next_task = $next_p
- prevtsk_state = $prev_p->state
+ prev_pid = $prev_p->tgid
+ next_pid = $next_p->tgid
+ prev_tid = $prev_p->pid
+ next_tid = $next_p->pid
+ prev_task = $prev_p
+ next_task = $next_p
+ next_priority = $next_p->prio
+ prev_priority = $prev_p->prio
+ prev_task_name = task_execname($prev_p)
+ next_task_name = task_execname($next_p)
+ prevtsk_state = $prev_p->state
+ nexttsk_state = $next_p->state
%) %)
}
+
+probe scheduler.ctxswitch
+ = __scheduler.ctxswitch.tp !, __scheduler.ctxswitch.kp
+{}
+
+
+/**
+ * probe scheduler.kthread_stop - Fires when a thread created by kthread_create is stopped.
+ * @thread_pid: pid of the thread being stopped.
+ * @thread_priority: priority of the thread.
+ */
+probe __scheduler.kthread_stop.kp = kernel.function("kthread_stop")
+{
+ thread_pid = $k->tgid
+ thread_priority = $k->priority
+}
+probe __scheduler.kthread_stop.tp = kernel.trace("sched_kthread_stop")
+{
+ thread_pid = $t->tgid
+ thread_priority = $t->prio
+}
+probe scheduler.kthread_stop
+ = __scheduler.kthread_stop.tp !,
+ __scheduler.kthread_stop.kp
+{}
+
+
+/**
+ * probe scheduler.kthread_stop.return - Fires once the kthread is stopped and gets the return value
+ * @return_value: return value after stopping the thread.
+ */
+
+probe __scheduler.kthread_stop.return.kp = kernel.function("kthread_stop").return
+{
+ return_value = $k->exit_code
+}
+probe __scheduler.kthread_stop.return.tp = kernel.trace("sched_kthread_stop_ret")
+{
+ return_value = $ret
+}
+
+probe scheduler.kthread_stop.return
+ = __scheduler.kthread_stop.return.tp !,
+ __scheduler.kthread_stop.return.kp
+{}
+
+/**
+ * probe scheduler.wait_task - Fires when waiting on a task to unschedule.
+ * It waits till the task becomes inactive.
+ * @task_pid: pid of the task the scheduler is waiting on.
+ * @task_priority: priority of the task
+ */
+
+probe scheduler.wait_task
+ = kernel.trace("sched_wait_task") !,
+ kernel.function("wait_task_inactive")
+{
+ task_pid = $p->tgid
+ task_priority = $p->prio
+}
+
+/**
+ * probe scheduler.wakeup - Fires when a task is woken up
+ * @task_pid: pid of the task being woken up
+ * @task_priority: priority of the task being woken up
+ * @task_cpu: cpu of the task being woken up
+ * @task_state: state of the task being woken up
+ * @task_tid: tid of the task being woken up
+ */
+
+probe scheduler.wakeup
+ = kernel.trace("sched_wakeup") !,
+ kernel.function("try_to_wake_up")
+{
+ task = $p
+ task_pid = $p->tgid
+ task_tid = $p->pid
+ task_priority = $p->prio
+ task_cpu = task_cpu($p)
+ task_state = task_state($p)
+}
+
+/**
+ * probe scheduler.wakeup_new - Fires when a newly created task is woken up for the first time
+ * @task_pid: pid of the new task woken up
+ * @task_priority: priority of the new task
+ * @task_tid: tid of the new task woken up
+ * @task_state: state of the task woken up
+ * @task_cpu: cpu of the task woken up
+ */
+probe scheduler.wakeup_new
+ = kernel.trace("sched_wakeup_new") !,
+ kernel.function("wake_up_new_task")
+{
+ task_pid = $p->tgid
+ task_priority = $p->prio
+ task_cpu = task_cpu($p)
+ task_state = task_state($p)
+ task = $p
+ task_tid = $p->pid
+}
+
+/**
+ * probe scheduler.migrate - Traces the migration of the tasks across cpus by the scheduler.
+ * @task: the process that is being migrated.
+ * @pid: pid of the task being migrated.
+ * @priority: priority of the task being migrated.
+ * @cpu_from: the original cpu
+ * @cpu_to: the destination cpu
+ */
+probe __scheduler.migrate.kp1 = kernel.function("pull_task")
+{
+ cpu_to = $this_cpu
+}
+probe __scheduler.migrate.kp = kernel.function("set_task_cpu")
+{
+ cpu_to = $new_cpu
+}
+probe __scheduler.migrate.tp = kernel.trace("sched_migrate_task")
+{
+ cpu_to = $dest_cpu
+}
+probe scheduler.migrate
+ = __scheduler.migrate.tp !,
+ __scheduler.migrate.kp !,
+ __scheduler.migrate.kp1
+{
+ task = $p
+ pid = $p->tgid
+ priority = $p->prio
+ cpu_from = task_cpu($p)
+}
+/**
+ * probe scheduler.process_free - Traces the process of freeing up of a process
+ * @pid: PID of the process getting freed
+ * @priority: priority of the process getting freed
+ */
+probe __scheduler.process_free.kp = kernel.function("delayed_put_task_struct")
+{
+ pid = $tsk->tgid
+ priority = $tsk->prio
+}
+probe __scheduler.process_free.tp = kernel.trace("sched_process_free")
+{
+ pid = $p->tgid
+ priority = $p->prio
+}
+probe scheduler.process_free
+ = __scheduler.process_free.tp !,
+ __scheduler.process_free.kp
+{}
+
+/**
+ * probe scheduler.process_exit - Fires when a process exits
+ * @pid: pid of the process exiting
+ * @priority: priority of the process exiting
+ */
+probe __scheduler.process_exit.kp = kernel.function("do_exit")
+{
+ pid = $tsk->tgid
+ priority = $tsk->priority
+}
+probe __scheduler.process_exit.tp = kernel.trace("sched_process_exit")
+{
+ pid = $p->tgid
+ priority = $p->prio
+}
+
+probe scheduler.process_exit
+ = __scheduler.process_exit.tp !,
+ __scheduler.process_exit.kp
+{}
+
+/**
+ * probe scheduler.process_wait - Fires when scheduler waits on a process
+ * @pid: PID of the process scheduler is waiting on
+ */
+probe __scheduler.process_wait.kp = kernel.function("do_wait")
+{
+ pid = $wo->wo_pid
+}
+probe __scheduler.process_wait.tp = kernel.trace("sched_process_wait")
+{
+ pid = $pid
+}
+probe scheduler.process_wait
+ = __scheduler.process_wait.tp !,
+ __scheduler.process_wait.kp
+{}
+
+/**
+ * probe scheduler.process_fork - Probes the tracepoint for forking a process
+ * @parent_pid: PID of the parent process
+ * @child_pid: PID of the child process
+ */
+probe __scheduler.process_fork.kp = kernel.function("do_fork")
+{
+ parent_pid = $current->tgid
+ child_pid = $p->tgid
+}
+probe __scheduler.process_fork.tp = kernel.trace("sched_process_fork")
+{
+ parent_pid = $parent->tgid
+ child_pid = $child->tgid
+}
+
+probe scheduler.process_fork
+ = __scheduler.process_fork.tp !,
+ __scheduler.process_fork.kp
+{}
+/**
+ * probe scheduler.signal_send - Probes the tracepoint for sending a signal
+ * @pid: pid of the process sending signal
+ * @signal_number: signal number
+ */
+probe __scheduler.signal_send.kp = kernel.function("__send_signal")
+{
+ pid = $t->tgid
+}
+probe __scheduler.signal_send.tp = kernel.trace("sched_signal_send")
+{
+ pid = $p->tgid
+}
+probe scheduler.signal_send
+ = __scheduler.signal_send.tp !,
+ __scheduler.signal_send.kp
+{
+ signal_number = $sig
+}
diff --git a/testsuite/buildok/scheduler-test-tracepoints.stp b/testsuite/buildok/scheduler-test-tracepoints.stp
new file mode 100644
index 00000000..a660c367
--- /dev/null
+++ b/testsuite/buildok/scheduler-test-tracepoints.stp
@@ -0,0 +1,51 @@
+#! stap -up4
+
+//Tests if all probes in the scheduler tapset are resolvable.
+
+probe scheduler.kthread_stop {
+ printf("pid = %d, priority = %d\n", thread_pid, thread_priority);
+}
+
+probe scheduler.kthread_stop.return {
+ printf("return value = %d\n", return_value);
+}
+
+probe scheduler.wait_task {
+ printf("pid = %d, priority = %d\n", task_pid, task_priority);
+}
+
+probe scheduler.wakeup {
+ printf("pid = %d, priority = %d\n, state = %d, cpu = %d, tid = %d\n",task_pid, task_priority, task_state, task_cpu, task_tid);
+}
+
+probe scheduler.wakeup_new {
+ printf("pid = %d, priority = %d, state = %d, cpu = %d, tid = %d\n", task_pid, task_priority, task_state, task_cpu, task_tid);
+}
+
+probe scheduler.ctxswitch {
+ printf("prev_pid = %d, prev_priority = %d, prev_state = %d, prev_task_name = %s, prev_tid = %d, next_pid = %d, next_priority = %d, next_state = %d, next_task_name = %s, next_tid = %d\n", prev_pid, prev_priority, prevtsk_state, prev_task_name, prev_tid, next_pid, next_priority, nexttsk_state, next_task_name, next_tid);
+}
+
+probe scheduler.migrate {
+ printf("pid = %d, priority = %d, original cpu = %d destination cpu = %d\n", pid, priority, cpu_from, cpu_to);
+}
+
+probe scheduler.process_free {
+ printf("pid = %d, priority = %d\n", pid, priority);
+}
+
+probe scheduler.process_exit {
+ printf("pid = %d, priority = %d\n", pid, priority);
+}
+
+probe scheduler.process_wait {
+ printf("pid = %d\n", pid);
+}
+
+probe scheduler.process_fork {
+ printf("parent pid = %d, child pid = %d\n", parent_pid, child_pid);
+}
+
+probe scheduler.signal_send {
+ printf("pid = %d, signal = %d\n", pid, signal_number);
+}
diff --git a/testsuite/systemtap.examples/profiling/sched_switch.meta b/testsuite/systemtap.examples/profiling/sched_switch.meta
new file mode 100644
index 00000000..8f1a2858
--- /dev/null
+++ b/testsuite/systemtap.examples/profiling/sched_switch.meta
@@ -0,0 +1,14 @@
+title: Display the task switches happeningt the scheduler
+name: sched_switch.stp
+version: 1.0
+author: kiran
+keywords: profiling functions
+subsystem: kernel
+status: production
+exit: user-controlled
+output: sorted-list on-exit
+scope: system-wide
+description: The sched_switch.stp script takes two arguments, first argument can be "pid" or "name" to indicate what is being passed as second argument. The script will trace the process based on pid/name and print the scheduler switches happening with the process. If no arguments are passed, it displays all the scheduler switches. This can be used to understand which tasks scheduler the current process being traced, out and when it gets scheduled in again.
+test_check: stap -p4 sched_switch.stp
+test_installcheck: stap sched_switch.stp -c "sleep 1"
+
diff --git a/testsuite/systemtap.examples/profiling/sched_switch.stp b/testsuite/systemtap.examples/profiling/sched_switch.stp
new file mode 100644
index 00000000..24973526
--- /dev/null
+++ b/testsuite/systemtap.examples/profiling/sched_switch.stp
@@ -0,0 +1,62 @@
+/* This script works similar to ftrace's sched_switch. It displays a list of
+ * processes which get switched in and out of the scheduler. The format of display
+ * is PROCESS_NAME PROCESS_PID CPU TIMESTAMP PID: PRIORITY: PROCESS STATE ->/+
+ * NEXT_PID : NEXT_PRIORITY: NEXT_STATE NEXT_PROCESS_NAME
+ * -> indicates that prev process is scheduled out and the next process is
+ * scheduled in.
+ * + indicates that prev process has woken up the next process.
+ * The usage is sched_switch.stp <"pid"/"name"> pid/name
+ */
+
+function state_calc(state) {
+ if(state == 0)
+ status = "R"
+ if(state == 1)
+ status = "S"
+ if(state == 2)
+ status = "D"
+ if(state == 4)
+ status = "T"
+ if(state == 8)
+ status = "T"
+ if(state == 16)
+ status = "Z"
+ if(state == 32)
+ status = "EXIT_DEAD"
+ return status
+}
+probe scheduler.wakeup
+{
+ %( $# == 2 %?
+
+ if(@1 == "pid")
+ if (task_pid != $2 && pid() != $2)
+ next
+ if(@1 == "name")
+ if (task_execname(task) != @2 && execname() != @2)
+ next
+
+ %)
+
+ printf("%-16s%5d%5d%d:%d:%s + %d:%d:%s %16s\n",
+ execname(), task_cpu(task), gettimeofday_ns(),
+ pid(), task_prio(task_current()), state_calc(task_state(task_current())),
+ task_pid(task), task_prio(task), state_calc(task_state(task)),
+ task_execname(task))
+}
+probe scheduler.ctxswitch
+{
+ %( $# == 2 %?
+
+ if(@1 == "pid")
+ if (next_pid != $2 && prev_pid != $2)
+ next
+ if(@1 == "name")
+ if (prev_task_name != @2 && next_task_name != @2)
+ next
+ %)
+
+ printf("%-16s%5d%5d%d:%d:%s ==> %d:%d:%s %16s\n",prev_task_name,
+ task_cpu(prev_task),gettimeofday_ns(),prev_pid,prev_priority,state_calc(prevtsk_state),next_pid,
+ next_priority,state_calc(nexttsk_state),next_task_name)
+}