summaryrefslogtreecommitdiffstats
path: root/runtime
diff options
context:
space:
mode:
authorDavid Smith <dsmith@redhat.com>2009-04-24 12:48:29 -0500
committerDavid Smith <dsmith@redhat.com>2009-04-24 12:48:29 -0500
commit3cb2045e46dcac5d2c33fd22622508891c66bee0 (patch)
tree624385fe4c5d2461f38dbbc417931485b44ad8cb /runtime
parent38e258727786a796835fb669f7413f52e207fd07 (diff)
downloadsystemtap-steved-3cb2045e46dcac5d2c33fd22622508891c66bee0.tar.gz
systemtap-steved-3cb2045e46dcac5d2c33fd22622508891c66bee0.tar.xz
systemtap-steved-3cb2045e46dcac5d2c33fd22622508891c66bee0.zip
Cache and retrieve syscall arguments when needed.
* runtime/task_finder.c (__stp_utrace_task_finder_target_syscall_entry): New function that caches syscall arguments for mmap(), munmap(), and mprotect() for use in __stp_utrace_task_finder_target_syscall_exit(). (__stp_utrace_task_finder_target_syscall_exit): Uses cached syscall argument info when making callbacks. (stap_start_task_finder): Initializes map subsytem for storing syscall argument data. * runtime/task_finder_map.c: New file containing functions to save and retrieve syscall arguments.
Diffstat (limited to 'runtime')
-rw-r--r--runtime/task_finder.c137
-rw-r--r--runtime/task_finder_map.c191
2 files changed, 293 insertions, 35 deletions
diff --git a/runtime/task_finder.c b/runtime/task_finder.c
index 93b89cb9..f5e059ca 100644
--- a/runtime/task_finder.c
+++ b/runtime/task_finder.c
@@ -19,6 +19,7 @@ struct stap_task_finder_target { };
#include "syscall.h"
#include "utrace_compatibility.h"
+#include "task_finder_map.c"
static LIST_HEAD(__stp_task_finder_list);
@@ -129,6 +130,19 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action,
#ifdef UTRACE_ORIG_VERSION
static u32
+__stp_utrace_task_finder_target_syscall_entry(struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ struct pt_regs *regs);
+#else
+static u32
+__stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ struct pt_regs *regs);
+#endif
+
+#ifdef UTRACE_ORIG_VERSION
+static u32
__stp_utrace_task_finder_target_syscall_exit(struct utrace_attached_engine *engine,
struct task_struct *tsk,
struct pt_regs *regs);
@@ -166,6 +180,8 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt)
memset(&new_tgt->ops, 0, sizeof(new_tgt->ops));
new_tgt->ops.report_death = &__stp_utrace_task_finder_target_death;
new_tgt->ops.report_quiesce = &__stp_utrace_task_finder_target_quiesce;
+ new_tgt->ops.report_syscall_entry = \
+ &__stp_utrace_task_finder_target_syscall_entry;
new_tgt->ops.report_syscall_exit = \
&__stp_utrace_task_finder_target_syscall_exit;
@@ -394,6 +410,7 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen)
#define __STP_TASK_BASE_EVENTS (UTRACE_EVENT(DEATH))
#define __STP_TASK_VM_BASE_EVENTS (__STP_TASK_BASE_EVENTS \
+ | UTRACE_EVENT(SYSCALL_ENTRY)\
| UTRACE_EVENT(SYSCALL_EXIT))
/*
@@ -1053,24 +1070,21 @@ __stp_find_file_based_vma(struct mm_struct *mm, unsigned long addr)
#ifdef UTRACE_ORIG_VERSION
static u32
-__stp_utrace_task_finder_target_syscall_exit(struct utrace_attached_engine *engine,
- struct task_struct *tsk,
- struct pt_regs *regs)
+__stp_utrace_task_finder_target_syscall_entry(struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ struct pt_regs *regs)
#else
static u32
-__stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
- struct utrace_attached_engine *engine,
- struct task_struct *tsk,
- struct pt_regs *regs)
+__stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ struct pt_regs *regs)
#endif
{
struct stap_task_finder_target *tgt = engine->data;
long syscall_no;
- unsigned long rv;
- unsigned long args[3];
+ unsigned long args[3] = { 0L };
int rc;
- struct mm_struct *mm;
- struct vm_area_struct *vma;
if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
debug_task_finder_detach();
@@ -1100,37 +1114,92 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
&& tgt->munmap_events == 0))
return UTRACE_RESUME;
+ __stp_tf_handler_start();
+ if (syscall_no == MUNMAP_SYSCALL_NO(tsk)) {
+ // We need 2 arguments
+ syscall_get_arguments(tsk, regs, 0, 2, args);
+ }
+ else if (syscall_no == MMAP_SYSCALL_NO(tsk)
+ || syscall_no == MMAP2_SYSCALL_NO(tsk)) {
+ // For mmap, we really just need the return value, so
+ // there is no need to save arguments
+ }
+ else { // mprotect()
+ // We need 3 arguments
+ syscall_get_arguments(tsk, regs, 0, 3, args);
+ }
+
+ // Remember the syscall information
+ rc = __stp_tf_add_map(tsk, syscall_no, args[0], args[1], args[2]);
+ if (rc != 0)
+ _stp_error("__stp_tf_add_map returned error %d on pid %d",
+ rc, tsk->pid);
+ __stp_tf_handler_end();
+ return UTRACE_RESUME;
+}
+
+#ifdef UTRACE_ORIG_VERSION
+static u32
+__stp_utrace_task_finder_target_syscall_exit(struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ struct pt_regs *regs)
+#else
+static u32
+__stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ struct pt_regs *regs)
+#endif
+{
+ struct stap_task_finder_target *tgt = engine->data;
+ unsigned long rv;
+ struct __stp_tf_map_entry *entry;
+
+ if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
+ debug_task_finder_detach();
+ return UTRACE_DETACH;
+ }
+
+ if (tgt == NULL)
+ return UTRACE_RESUME;
+
+ // See if we can find saved syscall info. If we can, it must
+ // be one of the syscalls we are interested in (and we must
+ // have callbacks to call for it).
+ entry = __stp_tf_get_map_entry(tsk);
+ if (entry == NULL)
+ return UTRACE_RESUME;
+
// Get return value
+ __stp_tf_handler_start();
rv = syscall_get_return_value(tsk, regs);
- // We need the first syscall argument to see what address we
- // were operating on.
- syscall_get_arguments(tsk, regs, 0, 1, args);
-
#ifdef DEBUG_TASK_FINDER_VMA
_stp_dbug(__FUNCTION__, __LINE__,
"tsk %d found %s(0x%lx), returned 0x%lx\n",
tsk->pid,
- ((syscall_no == MMAP_SYSCALL_NO(tsk)) ? "mmap"
- : ((syscall_no == MMAP2_SYSCALL_NO(tsk)) ? "mmap2"
- : ((syscall_no == MPROTECT_SYSCALL_NO(tsk)) ? "mprotect"
- : ((syscall_no == MUNMAP_SYSCALL_NO(tsk)) ? "munmap"
+ ((entry->syscall_no == MMAP_SYSCALL_NO(tsk)) ? "mmap"
+ : ((entry->syscall_no == MMAP2_SYSCALL_NO(tsk)) ? "mmap2"
+ : ((entry->syscall_no == MPROTECT_SYSCALL_NO(tsk))
+ ? "mprotect"
+ : ((entry->syscall_no == MUNMAP_SYSCALL_NO(tsk))
+ ? "munmap"
: "UNKNOWN")))),
- args[0], rv);
+ entry->arg0, rv);
#endif
- __stp_tf_handler_start();
- if (syscall_no == MUNMAP_SYSCALL_NO(tsk)) {
- // We need the 2nd syscall argument for the length.
- syscall_get_arguments(tsk, regs, 1, 1, &args[1]);
+ if (entry->syscall_no == MUNMAP_SYSCALL_NO(tsk)) {
// Call the callbacks
- __stp_call_munmap_callbacks(tgt, tsk, args[0], args[1]);
+ __stp_call_munmap_callbacks(tgt, tsk, entry->arg0, entry->arg1);
}
- else if (syscall_no == MMAP_SYSCALL_NO(tsk)
- || syscall_no == MMAP2_SYSCALL_NO(tsk)) {
+ else if (entry->syscall_no == MMAP_SYSCALL_NO(tsk)
+ || entry->syscall_no == MMAP2_SYSCALL_NO(tsk)) {
+ struct mm_struct *mm;
mm = get_task_mm(tsk);
if (mm) {
+ struct vm_area_struct *vma;
+
down_read(&mm->mmap_sem);
vma = __stp_find_file_based_vma(mm, rv);
@@ -1144,18 +1213,14 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
mmput(mm);
}
}
- else {
- // We need the 2nd syscall argument for the length and
- // the 3rd argument for the protection.
- syscall_get_arguments(tsk, regs, 1, 2, &args[1]);
-
+ else { // mprotect
// Call the callbacks
- __stp_call_mprotect_callbacks(tgt, tsk, args[0], args[1],
- args[2]);
+ __stp_call_mprotect_callbacks(tgt, tsk, entry->arg0,
+ entry->arg1, entry->arg2);
}
-syscall_exit_done:
__stp_tf_handler_end();
+ __stp_tf_remove_map_entry(entry);
return UTRACE_RESUME;
}
@@ -1179,6 +1244,8 @@ stap_start_task_finder(void)
return ENOMEM;
}
+ __stp_tf_map_initialize();
+
atomic_set(&__stp_task_finder_state, __STP_TF_RUNNING);
rcu_read_lock();
diff --git a/runtime/task_finder_map.c b/runtime/task_finder_map.c
new file mode 100644
index 00000000..b770dd0e
--- /dev/null
+++ b/runtime/task_finder_map.c
@@ -0,0 +1,191 @@
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/spinlock.h>
+
+// When handling mmap()/munmap()/mprotect() syscall tracing to notice
+// memory map changes, we need to cache syscall entry parameter values
+// for processing at syscall exit.
+
+// __stp_tf_map_lock protects the hash table.
+// Documentation/spinlocks.txt suggest we can be a bit more clever
+// if we guarantee that in interrupt context we only read, not write
+// the datastructures. We should never change the hash table or the
+// contents in interrupt context (which should only ever call
+// stap_find_map_map_info for getting stored info). So we might
+// want to look into that if this seems a bottleneck.
+static DEFINE_RWLOCK(__stp_tf_map_lock);
+
+#define __STP_TF_HASH_BITS 4
+#define __STP_TF_TABLE_SIZE (1 << __STP_TF_HASH_BITS)
+
+#ifndef TASK_FINDER_MAP_ENTRY_ITEMS
+#define TASK_FINDER_MAP_ENTRY_ITEMS 100
+#endif
+
+struct __stp_tf_map_entry {
+/* private: */
+ struct hlist_node hlist;
+ int usage;
+
+/* public: */
+ pid_t pid;
+ long syscall_no;
+ unsigned long arg0;
+ unsigned long arg1;
+ unsigned long arg2;
+};
+
+static struct __stp_tf_map_entry
+__stp_tf_map_free_list_items[TASK_FINDER_MAP_ENTRY_ITEMS];
+
+static struct hlist_head __stp_tf_map_free_list[1];
+
+static struct hlist_head __stp_tf_map_table[__STP_TF_TABLE_SIZE];
+
+// __stp_tf_map_initialize(): Initialize the free list. Grabs the
+// lock.
+static void
+__stp_tf_map_initialize(void)
+{
+ int i;
+ struct hlist_head *head = &__stp_tf_map_free_list[0];
+
+ unsigned long flags;
+ write_lock_irqsave(&__stp_tf_map_lock, flags);
+ for (i = 0; i < TASK_FINDER_MAP_ENTRY_ITEMS; i++) {
+ hlist_add_head(&__stp_tf_map_free_list_items[i].hlist, head);
+ }
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+}
+
+
+// __stp_tf_map_get_free_entry(): Returns an entry from the free list
+// or NULL. The __stp_tf_map_lock must be write locked before calling this
+// function.
+static struct __stp_tf_map_entry *
+__stp_tf_map_get_free_entry(void)
+{
+ struct hlist_head *head = &__stp_tf_map_free_list[0];
+ struct hlist_node *node;
+ struct __stp_tf_map_entry *entry = NULL;
+
+ if (hlist_empty(head))
+ return NULL;
+ hlist_for_each_entry(entry, node, head, hlist) {
+ break;
+ }
+ if (entry != NULL)
+ hlist_del(&entry->hlist);
+ return entry;
+}
+
+
+// __stp_tf_map_put_free_entry(): Puts an entry back on the free
+// list. The __stp_tf_map_lock must be write locked before calling this
+// function.
+static void
+__stp_tf_map_put_free_entry(struct __stp_tf_map_entry *entry)
+{
+ struct hlist_head *head = &__stp_tf_map_free_list[0];
+ hlist_add_head(&entry->hlist, head);
+}
+
+
+// __stp_tf_map_hash(): Compute the map hash.
+static inline u32
+__stp_tf_map_hash(struct task_struct *tsk)
+{
+ return (jhash_1word(tsk->pid, 0) & (__STP_TF_TABLE_SIZE - 1));
+}
+
+
+// Get map_entry if the map is present in the map hash table.
+// Returns NULL if not present. Takes a read lock on __stp_tf_map_lock.
+static struct __stp_tf_map_entry *
+__stp_tf_get_map_entry(struct task_struct *tsk)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct __stp_tf_map_entry *entry;
+
+ unsigned long flags;
+ read_lock_irqsave(&__stp_tf_map_lock, flags);
+ head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)];
+ hlist_for_each_entry(entry, node, head, hlist) {
+ if (tsk->pid == entry->pid) {
+ read_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return entry;
+ }
+ }
+ read_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return NULL;
+}
+
+
+// Add the map info to the map hash table. Takes a write lock on
+// __stp_tf_map_lock.
+static int
+__stp_tf_add_map(struct task_struct *tsk, long syscall_no, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct __stp_tf_map_entry *entry;
+ unsigned long flags;
+
+ write_lock_irqsave(&__stp_tf_map_lock, flags);
+ head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)];
+ hlist_for_each_entry(entry, node, head, hlist) {
+ // If we find an existing entry, just increment the
+ // usage count.
+ if (tsk->pid == entry->pid) {
+ entry->usage++;
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return 0;
+ }
+ }
+
+ // Get an element from the free list.
+ entry = __stp_tf_map_get_free_entry();
+ if (!entry) {
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return -ENOMEM;
+ }
+ entry->usage = 1;
+ entry->pid = tsk->pid;
+ entry->syscall_no = syscall_no;
+ entry->arg0 = arg0;
+ entry->arg1 = arg1;
+ entry->arg2 = arg2;
+ hlist_add_head(&entry->hlist, head);
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return 0;
+}
+
+
+// Remove the map entry from the map hash table. Takes a write lock on
+// __stp_tf_map_lock.
+static int
+__stp_tf_remove_map_entry(struct __stp_tf_map_entry *entry)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ int found = 0;
+
+ if (entry != NULL) {
+ unsigned long flags;
+ write_lock_irqsave(&__stp_tf_map_lock, flags);
+
+ // Decrement the usage count.
+ entry->usage--;
+
+ // If the entry is unused, put it back on the free
+ // list.
+ if (entry->usage == 0) {
+ hlist_del(&entry->hlist);
+ __stp_tf_map_put_free_entry(entry);
+ }
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ }
+ return 0;
+}