diff options
author | David Smith <dsmith@redhat.com> | 2009-04-24 12:48:29 -0500 |
---|---|---|
committer | David Smith <dsmith@redhat.com> | 2009-04-24 12:48:29 -0500 |
commit | 3cb2045e46dcac5d2c33fd22622508891c66bee0 (patch) | |
tree | 624385fe4c5d2461f38dbbc417931485b44ad8cb /runtime | |
parent | 38e258727786a796835fb669f7413f52e207fd07 (diff) | |
download | systemtap-steved-3cb2045e46dcac5d2c33fd22622508891c66bee0.tar.gz systemtap-steved-3cb2045e46dcac5d2c33fd22622508891c66bee0.tar.xz systemtap-steved-3cb2045e46dcac5d2c33fd22622508891c66bee0.zip |
Cache and retrieve syscall arguments when needed.
* runtime/task_finder.c (__stp_utrace_task_finder_target_syscall_entry):
New function that caches syscall arguments for mmap(), munmap(), and
mprotect() for use in __stp_utrace_task_finder_target_syscall_exit().
(__stp_utrace_task_finder_target_syscall_exit): Uses cached syscall
argument info when making callbacks.
(stap_start_task_finder): Initializes map subsytem for storing syscall
argument data.
* runtime/task_finder_map.c: New file containing functions to save and
retrieve syscall arguments.
Diffstat (limited to 'runtime')
-rw-r--r-- | runtime/task_finder.c | 137 | ||||
-rw-r--r-- | runtime/task_finder_map.c | 191 |
2 files changed, 293 insertions, 35 deletions
diff --git a/runtime/task_finder.c b/runtime/task_finder.c index 93b89cb9..f5e059ca 100644 --- a/runtime/task_finder.c +++ b/runtime/task_finder.c @@ -19,6 +19,7 @@ struct stap_task_finder_target { }; #include "syscall.h" #include "utrace_compatibility.h" +#include "task_finder_map.c" static LIST_HEAD(__stp_task_finder_list); @@ -129,6 +130,19 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action, #ifdef UTRACE_ORIG_VERSION static u32 +__stp_utrace_task_finder_target_syscall_entry(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs); +#else +static u32 +__stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action, + struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs); +#endif + +#ifdef UTRACE_ORIG_VERSION +static u32 __stp_utrace_task_finder_target_syscall_exit(struct utrace_attached_engine *engine, struct task_struct *tsk, struct pt_regs *regs); @@ -166,6 +180,8 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt) memset(&new_tgt->ops, 0, sizeof(new_tgt->ops)); new_tgt->ops.report_death = &__stp_utrace_task_finder_target_death; new_tgt->ops.report_quiesce = &__stp_utrace_task_finder_target_quiesce; + new_tgt->ops.report_syscall_entry = \ + &__stp_utrace_task_finder_target_syscall_entry; new_tgt->ops.report_syscall_exit = \ &__stp_utrace_task_finder_target_syscall_exit; @@ -394,6 +410,7 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen) #define __STP_TASK_BASE_EVENTS (UTRACE_EVENT(DEATH)) #define __STP_TASK_VM_BASE_EVENTS (__STP_TASK_BASE_EVENTS \ + | UTRACE_EVENT(SYSCALL_ENTRY)\ | UTRACE_EVENT(SYSCALL_EXIT)) /* @@ -1053,24 +1070,21 @@ __stp_find_file_based_vma(struct mm_struct *mm, unsigned long addr) #ifdef UTRACE_ORIG_VERSION static u32 -__stp_utrace_task_finder_target_syscall_exit(struct utrace_attached_engine *engine, - struct task_struct *tsk, - struct pt_regs *regs) +__stp_utrace_task_finder_target_syscall_entry(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs) #else static u32 -__stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action, - struct utrace_attached_engine *engine, - struct task_struct *tsk, - struct pt_regs *regs) +__stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action, + struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs) #endif { struct stap_task_finder_target *tgt = engine->data; long syscall_no; - unsigned long rv; - unsigned long args[3]; + unsigned long args[3] = { 0L }; int rc; - struct mm_struct *mm; - struct vm_area_struct *vma; if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { debug_task_finder_detach(); @@ -1100,37 +1114,92 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action, && tgt->munmap_events == 0)) return UTRACE_RESUME; + __stp_tf_handler_start(); + if (syscall_no == MUNMAP_SYSCALL_NO(tsk)) { + // We need 2 arguments + syscall_get_arguments(tsk, regs, 0, 2, args); + } + else if (syscall_no == MMAP_SYSCALL_NO(tsk) + || syscall_no == MMAP2_SYSCALL_NO(tsk)) { + // For mmap, we really just need the return value, so + // there is no need to save arguments + } + else { // mprotect() + // We need 3 arguments + syscall_get_arguments(tsk, regs, 0, 3, args); + } + + // Remember the syscall information + rc = __stp_tf_add_map(tsk, syscall_no, args[0], args[1], args[2]); + if (rc != 0) + _stp_error("__stp_tf_add_map returned error %d on pid %d", + rc, tsk->pid); + __stp_tf_handler_end(); + return UTRACE_RESUME; +} + +#ifdef UTRACE_ORIG_VERSION +static u32 +__stp_utrace_task_finder_target_syscall_exit(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs) +#else +static u32 +__stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action, + struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs) +#endif +{ + struct stap_task_finder_target *tgt = engine->data; + unsigned long rv; + struct __stp_tf_map_entry *entry; + + if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { + debug_task_finder_detach(); + return UTRACE_DETACH; + } + + if (tgt == NULL) + return UTRACE_RESUME; + + // See if we can find saved syscall info. If we can, it must + // be one of the syscalls we are interested in (and we must + // have callbacks to call for it). + entry = __stp_tf_get_map_entry(tsk); + if (entry == NULL) + return UTRACE_RESUME; + // Get return value + __stp_tf_handler_start(); rv = syscall_get_return_value(tsk, regs); - // We need the first syscall argument to see what address we - // were operating on. - syscall_get_arguments(tsk, regs, 0, 1, args); - #ifdef DEBUG_TASK_FINDER_VMA _stp_dbug(__FUNCTION__, __LINE__, "tsk %d found %s(0x%lx), returned 0x%lx\n", tsk->pid, - ((syscall_no == MMAP_SYSCALL_NO(tsk)) ? "mmap" - : ((syscall_no == MMAP2_SYSCALL_NO(tsk)) ? "mmap2" - : ((syscall_no == MPROTECT_SYSCALL_NO(tsk)) ? "mprotect" - : ((syscall_no == MUNMAP_SYSCALL_NO(tsk)) ? "munmap" + ((entry->syscall_no == MMAP_SYSCALL_NO(tsk)) ? "mmap" + : ((entry->syscall_no == MMAP2_SYSCALL_NO(tsk)) ? "mmap2" + : ((entry->syscall_no == MPROTECT_SYSCALL_NO(tsk)) + ? "mprotect" + : ((entry->syscall_no == MUNMAP_SYSCALL_NO(tsk)) + ? "munmap" : "UNKNOWN")))), - args[0], rv); + entry->arg0, rv); #endif - __stp_tf_handler_start(); - if (syscall_no == MUNMAP_SYSCALL_NO(tsk)) { - // We need the 2nd syscall argument for the length. - syscall_get_arguments(tsk, regs, 1, 1, &args[1]); + if (entry->syscall_no == MUNMAP_SYSCALL_NO(tsk)) { // Call the callbacks - __stp_call_munmap_callbacks(tgt, tsk, args[0], args[1]); + __stp_call_munmap_callbacks(tgt, tsk, entry->arg0, entry->arg1); } - else if (syscall_no == MMAP_SYSCALL_NO(tsk) - || syscall_no == MMAP2_SYSCALL_NO(tsk)) { + else if (entry->syscall_no == MMAP_SYSCALL_NO(tsk) + || entry->syscall_no == MMAP2_SYSCALL_NO(tsk)) { + struct mm_struct *mm; mm = get_task_mm(tsk); if (mm) { + struct vm_area_struct *vma; + down_read(&mm->mmap_sem); vma = __stp_find_file_based_vma(mm, rv); @@ -1144,18 +1213,14 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action, mmput(mm); } } - else { - // We need the 2nd syscall argument for the length and - // the 3rd argument for the protection. - syscall_get_arguments(tsk, regs, 1, 2, &args[1]); - + else { // mprotect // Call the callbacks - __stp_call_mprotect_callbacks(tgt, tsk, args[0], args[1], - args[2]); + __stp_call_mprotect_callbacks(tgt, tsk, entry->arg0, + entry->arg1, entry->arg2); } -syscall_exit_done: __stp_tf_handler_end(); + __stp_tf_remove_map_entry(entry); return UTRACE_RESUME; } @@ -1179,6 +1244,8 @@ stap_start_task_finder(void) return ENOMEM; } + __stp_tf_map_initialize(); + atomic_set(&__stp_task_finder_state, __STP_TF_RUNNING); rcu_read_lock(); diff --git a/runtime/task_finder_map.c b/runtime/task_finder_map.c new file mode 100644 index 00000000..b770dd0e --- /dev/null +++ b/runtime/task_finder_map.c @@ -0,0 +1,191 @@ +#include <linux/list.h> +#include <linux/jhash.h> +#include <linux/spinlock.h> + +// When handling mmap()/munmap()/mprotect() syscall tracing to notice +// memory map changes, we need to cache syscall entry parameter values +// for processing at syscall exit. + +// __stp_tf_map_lock protects the hash table. +// Documentation/spinlocks.txt suggest we can be a bit more clever +// if we guarantee that in interrupt context we only read, not write +// the datastructures. We should never change the hash table or the +// contents in interrupt context (which should only ever call +// stap_find_map_map_info for getting stored info). So we might +// want to look into that if this seems a bottleneck. +static DEFINE_RWLOCK(__stp_tf_map_lock); + +#define __STP_TF_HASH_BITS 4 +#define __STP_TF_TABLE_SIZE (1 << __STP_TF_HASH_BITS) + +#ifndef TASK_FINDER_MAP_ENTRY_ITEMS +#define TASK_FINDER_MAP_ENTRY_ITEMS 100 +#endif + +struct __stp_tf_map_entry { +/* private: */ + struct hlist_node hlist; + int usage; + +/* public: */ + pid_t pid; + long syscall_no; + unsigned long arg0; + unsigned long arg1; + unsigned long arg2; +}; + +static struct __stp_tf_map_entry +__stp_tf_map_free_list_items[TASK_FINDER_MAP_ENTRY_ITEMS]; + +static struct hlist_head __stp_tf_map_free_list[1]; + +static struct hlist_head __stp_tf_map_table[__STP_TF_TABLE_SIZE]; + +// __stp_tf_map_initialize(): Initialize the free list. Grabs the +// lock. +static void +__stp_tf_map_initialize(void) +{ + int i; + struct hlist_head *head = &__stp_tf_map_free_list[0]; + + unsigned long flags; + write_lock_irqsave(&__stp_tf_map_lock, flags); + for (i = 0; i < TASK_FINDER_MAP_ENTRY_ITEMS; i++) { + hlist_add_head(&__stp_tf_map_free_list_items[i].hlist, head); + } + write_unlock_irqrestore(&__stp_tf_map_lock, flags); +} + + +// __stp_tf_map_get_free_entry(): Returns an entry from the free list +// or NULL. The __stp_tf_map_lock must be write locked before calling this +// function. +static struct __stp_tf_map_entry * +__stp_tf_map_get_free_entry(void) +{ + struct hlist_head *head = &__stp_tf_map_free_list[0]; + struct hlist_node *node; + struct __stp_tf_map_entry *entry = NULL; + + if (hlist_empty(head)) + return NULL; + hlist_for_each_entry(entry, node, head, hlist) { + break; + } + if (entry != NULL) + hlist_del(&entry->hlist); + return entry; +} + + +// __stp_tf_map_put_free_entry(): Puts an entry back on the free +// list. The __stp_tf_map_lock must be write locked before calling this +// function. +static void +__stp_tf_map_put_free_entry(struct __stp_tf_map_entry *entry) +{ + struct hlist_head *head = &__stp_tf_map_free_list[0]; + hlist_add_head(&entry->hlist, head); +} + + +// __stp_tf_map_hash(): Compute the map hash. +static inline u32 +__stp_tf_map_hash(struct task_struct *tsk) +{ + return (jhash_1word(tsk->pid, 0) & (__STP_TF_TABLE_SIZE - 1)); +} + + +// Get map_entry if the map is present in the map hash table. +// Returns NULL if not present. Takes a read lock on __stp_tf_map_lock. +static struct __stp_tf_map_entry * +__stp_tf_get_map_entry(struct task_struct *tsk) +{ + struct hlist_head *head; + struct hlist_node *node; + struct __stp_tf_map_entry *entry; + + unsigned long flags; + read_lock_irqsave(&__stp_tf_map_lock, flags); + head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)]; + hlist_for_each_entry(entry, node, head, hlist) { + if (tsk->pid == entry->pid) { + read_unlock_irqrestore(&__stp_tf_map_lock, flags); + return entry; + } + } + read_unlock_irqrestore(&__stp_tf_map_lock, flags); + return NULL; +} + + +// Add the map info to the map hash table. Takes a write lock on +// __stp_tf_map_lock. +static int +__stp_tf_add_map(struct task_struct *tsk, long syscall_no, unsigned long arg0, + unsigned long arg1, unsigned long arg2) +{ + struct hlist_head *head; + struct hlist_node *node; + struct __stp_tf_map_entry *entry; + unsigned long flags; + + write_lock_irqsave(&__stp_tf_map_lock, flags); + head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)]; + hlist_for_each_entry(entry, node, head, hlist) { + // If we find an existing entry, just increment the + // usage count. + if (tsk->pid == entry->pid) { + entry->usage++; + write_unlock_irqrestore(&__stp_tf_map_lock, flags); + return 0; + } + } + + // Get an element from the free list. + entry = __stp_tf_map_get_free_entry(); + if (!entry) { + write_unlock_irqrestore(&__stp_tf_map_lock, flags); + return -ENOMEM; + } + entry->usage = 1; + entry->pid = tsk->pid; + entry->syscall_no = syscall_no; + entry->arg0 = arg0; + entry->arg1 = arg1; + entry->arg2 = arg2; + hlist_add_head(&entry->hlist, head); + write_unlock_irqrestore(&__stp_tf_map_lock, flags); + return 0; +} + + +// Remove the map entry from the map hash table. Takes a write lock on +// __stp_tf_map_lock. +static int +__stp_tf_remove_map_entry(struct __stp_tf_map_entry *entry) +{ + struct hlist_head *head; + struct hlist_node *node; + int found = 0; + + if (entry != NULL) { + unsigned long flags; + write_lock_irqsave(&__stp_tf_map_lock, flags); + + // Decrement the usage count. + entry->usage--; + + // If the entry is unused, put it back on the free + // list. + if (entry->usage == 0) { + hlist_del(&entry->hlist); + __stp_tf_map_put_free_entry(entry); + } + write_unlock_irqrestore(&__stp_tf_map_lock, flags); + } + return 0; +} |