diff options
author | Dave Brolley <brolley@redhat.com> | 2009-04-27 11:56:15 -0400 |
---|---|---|
committer | Dave Brolley <brolley@redhat.com> | 2009-04-27 11:56:15 -0400 |
commit | e84f20287ae21b31c3505dfd0a81176687ad1ec5 (patch) | |
tree | 956005556406fc93e4444ee0d425aafed8e241d0 /runtime | |
parent | fb6d28283bd7ea63364a008d32c53687a694642f (diff) | |
parent | 097e4a5b397b9e826453e01caa1f8169886128c5 (diff) | |
download | systemtap-steved-e84f20287ae21b31c3505dfd0a81176687ad1ec5.tar.gz systemtap-steved-e84f20287ae21b31c3505dfd0a81176687ad1ec5.tar.xz systemtap-steved-e84f20287ae21b31c3505dfd0a81176687ad1ec5.zip |
Merge branch 'master' of git://sources.redhat.com/git/systemtap
Conflicts:
configure
testsuite/configure
Diffstat (limited to 'runtime')
-rw-r--r-- | runtime/sym.c | 51 | ||||
-rw-r--r-- | runtime/task_finder.c | 137 | ||||
-rw-r--r-- | runtime/task_finder_map.c | 191 | ||||
-rw-r--r-- | runtime/uprobes/uprobes_i386.c | 24 | ||||
-rw-r--r-- | runtime/vsprintf.c | 31 |
5 files changed, 347 insertions, 87 deletions
diff --git a/runtime/sym.c b/runtime/sym.c index 013edd0c..63dad1af 100644 --- a/runtime/sym.c +++ b/runtime/sym.c @@ -271,34 +271,29 @@ static int _stp_module_check(void) dwfl_module_build_id was not intended to return the end address. */ notes_addr -= m->build_id_len; - if (notes_addr > base_addr) { - for (j = 0; j < m->build_id_len; j++) - { - unsigned char theory, practice; - theory = m->build_id_bits [j]; - practice = ((unsigned char*) notes_addr) [j]; - /* XXX: consider using kread() instead of above. */ - if (theory != practice) - { - #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27) - _stp_error ("%s: inconsistent %s build-id byte #%d " - "(0x%x [actual] vs. 0x%x [debuginfo])\n", - THIS_MODULE->name, m->name, j, - practice, theory); - return 1; - #else - /* This branch is a surrogate for - kernels affected by Fedora bug - #465873. */ - printk(KERN_WARNING - "%s: inconsistent %s build-id byte #%d " - "(0x%x [actual] vs. 0x%x [debuginfo])\n", - THIS_MODULE->name, m->name, j, - practice, theory); - break; /* Note just the first mismatch. */ - #endif - } - } + if (notes_addr <= base_addr) /* shouldn't happen */ + continue; + if (memcmp(m->build_id_bits, (unsigned char*) notes_addr, m->build_id_len)) { + const char *basename; + + basename = strrchr(m->path, '/'); + if (basename) + basename++; + else + basename = m->path; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27) + _stp_error ("Build-id mismatch: \"%s\" %.*M" + " vs. \"%s\" %.*M\n", + m->name, m->build_id_len, notes_addr, + basename, m->build_id_len, m->build_id_bits); + return 1; +#else + /* This branch is a surrogate for kernels + * affected by Fedora bug #465873. */ + printk(KERN_WARNING + "Build-id mismatch: \"%s\" vs. \"%s\"\n", + m->name, basename); +#endif } } /* end checking */ } /* end loop */ diff --git a/runtime/task_finder.c b/runtime/task_finder.c index 93b89cb9..f5e059ca 100644 --- a/runtime/task_finder.c +++ b/runtime/task_finder.c @@ -19,6 +19,7 @@ struct stap_task_finder_target { }; #include "syscall.h" #include "utrace_compatibility.h" +#include "task_finder_map.c" static LIST_HEAD(__stp_task_finder_list); @@ -129,6 +130,19 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action, #ifdef UTRACE_ORIG_VERSION static u32 +__stp_utrace_task_finder_target_syscall_entry(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs); +#else +static u32 +__stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action, + struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs); +#endif + +#ifdef UTRACE_ORIG_VERSION +static u32 __stp_utrace_task_finder_target_syscall_exit(struct utrace_attached_engine *engine, struct task_struct *tsk, struct pt_regs *regs); @@ -166,6 +180,8 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt) memset(&new_tgt->ops, 0, sizeof(new_tgt->ops)); new_tgt->ops.report_death = &__stp_utrace_task_finder_target_death; new_tgt->ops.report_quiesce = &__stp_utrace_task_finder_target_quiesce; + new_tgt->ops.report_syscall_entry = \ + &__stp_utrace_task_finder_target_syscall_entry; new_tgt->ops.report_syscall_exit = \ &__stp_utrace_task_finder_target_syscall_exit; @@ -394,6 +410,7 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen) #define __STP_TASK_BASE_EVENTS (UTRACE_EVENT(DEATH)) #define __STP_TASK_VM_BASE_EVENTS (__STP_TASK_BASE_EVENTS \ + | UTRACE_EVENT(SYSCALL_ENTRY)\ | UTRACE_EVENT(SYSCALL_EXIT)) /* @@ -1053,24 +1070,21 @@ __stp_find_file_based_vma(struct mm_struct *mm, unsigned long addr) #ifdef UTRACE_ORIG_VERSION static u32 -__stp_utrace_task_finder_target_syscall_exit(struct utrace_attached_engine *engine, - struct task_struct *tsk, - struct pt_regs *regs) +__stp_utrace_task_finder_target_syscall_entry(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs) #else static u32 -__stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action, - struct utrace_attached_engine *engine, - struct task_struct *tsk, - struct pt_regs *regs) +__stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action, + struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs) #endif { struct stap_task_finder_target *tgt = engine->data; long syscall_no; - unsigned long rv; - unsigned long args[3]; + unsigned long args[3] = { 0L }; int rc; - struct mm_struct *mm; - struct vm_area_struct *vma; if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { debug_task_finder_detach(); @@ -1100,37 +1114,92 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action, && tgt->munmap_events == 0)) return UTRACE_RESUME; + __stp_tf_handler_start(); + if (syscall_no == MUNMAP_SYSCALL_NO(tsk)) { + // We need 2 arguments + syscall_get_arguments(tsk, regs, 0, 2, args); + } + else if (syscall_no == MMAP_SYSCALL_NO(tsk) + || syscall_no == MMAP2_SYSCALL_NO(tsk)) { + // For mmap, we really just need the return value, so + // there is no need to save arguments + } + else { // mprotect() + // We need 3 arguments + syscall_get_arguments(tsk, regs, 0, 3, args); + } + + // Remember the syscall information + rc = __stp_tf_add_map(tsk, syscall_no, args[0], args[1], args[2]); + if (rc != 0) + _stp_error("__stp_tf_add_map returned error %d on pid %d", + rc, tsk->pid); + __stp_tf_handler_end(); + return UTRACE_RESUME; +} + +#ifdef UTRACE_ORIG_VERSION +static u32 +__stp_utrace_task_finder_target_syscall_exit(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs) +#else +static u32 +__stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action, + struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs) +#endif +{ + struct stap_task_finder_target *tgt = engine->data; + unsigned long rv; + struct __stp_tf_map_entry *entry; + + if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { + debug_task_finder_detach(); + return UTRACE_DETACH; + } + + if (tgt == NULL) + return UTRACE_RESUME; + + // See if we can find saved syscall info. If we can, it must + // be one of the syscalls we are interested in (and we must + // have callbacks to call for it). + entry = __stp_tf_get_map_entry(tsk); + if (entry == NULL) + return UTRACE_RESUME; + // Get return value + __stp_tf_handler_start(); rv = syscall_get_return_value(tsk, regs); - // We need the first syscall argument to see what address we - // were operating on. - syscall_get_arguments(tsk, regs, 0, 1, args); - #ifdef DEBUG_TASK_FINDER_VMA _stp_dbug(__FUNCTION__, __LINE__, "tsk %d found %s(0x%lx), returned 0x%lx\n", tsk->pid, - ((syscall_no == MMAP_SYSCALL_NO(tsk)) ? "mmap" - : ((syscall_no == MMAP2_SYSCALL_NO(tsk)) ? "mmap2" - : ((syscall_no == MPROTECT_SYSCALL_NO(tsk)) ? "mprotect" - : ((syscall_no == MUNMAP_SYSCALL_NO(tsk)) ? "munmap" + ((entry->syscall_no == MMAP_SYSCALL_NO(tsk)) ? "mmap" + : ((entry->syscall_no == MMAP2_SYSCALL_NO(tsk)) ? "mmap2" + : ((entry->syscall_no == MPROTECT_SYSCALL_NO(tsk)) + ? "mprotect" + : ((entry->syscall_no == MUNMAP_SYSCALL_NO(tsk)) + ? "munmap" : "UNKNOWN")))), - args[0], rv); + entry->arg0, rv); #endif - __stp_tf_handler_start(); - if (syscall_no == MUNMAP_SYSCALL_NO(tsk)) { - // We need the 2nd syscall argument for the length. - syscall_get_arguments(tsk, regs, 1, 1, &args[1]); + if (entry->syscall_no == MUNMAP_SYSCALL_NO(tsk)) { // Call the callbacks - __stp_call_munmap_callbacks(tgt, tsk, args[0], args[1]); + __stp_call_munmap_callbacks(tgt, tsk, entry->arg0, entry->arg1); } - else if (syscall_no == MMAP_SYSCALL_NO(tsk) - || syscall_no == MMAP2_SYSCALL_NO(tsk)) { + else if (entry->syscall_no == MMAP_SYSCALL_NO(tsk) + || entry->syscall_no == MMAP2_SYSCALL_NO(tsk)) { + struct mm_struct *mm; mm = get_task_mm(tsk); if (mm) { + struct vm_area_struct *vma; + down_read(&mm->mmap_sem); vma = __stp_find_file_based_vma(mm, rv); @@ -1144,18 +1213,14 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action, mmput(mm); } } - else { - // We need the 2nd syscall argument for the length and - // the 3rd argument for the protection. - syscall_get_arguments(tsk, regs, 1, 2, &args[1]); - + else { // mprotect // Call the callbacks - __stp_call_mprotect_callbacks(tgt, tsk, args[0], args[1], - args[2]); + __stp_call_mprotect_callbacks(tgt, tsk, entry->arg0, + entry->arg1, entry->arg2); } -syscall_exit_done: __stp_tf_handler_end(); + __stp_tf_remove_map_entry(entry); return UTRACE_RESUME; } @@ -1179,6 +1244,8 @@ stap_start_task_finder(void) return ENOMEM; } + __stp_tf_map_initialize(); + atomic_set(&__stp_task_finder_state, __STP_TF_RUNNING); rcu_read_lock(); diff --git a/runtime/task_finder_map.c b/runtime/task_finder_map.c new file mode 100644 index 00000000..b770dd0e --- /dev/null +++ b/runtime/task_finder_map.c @@ -0,0 +1,191 @@ +#include <linux/list.h> +#include <linux/jhash.h> +#include <linux/spinlock.h> + +// When handling mmap()/munmap()/mprotect() syscall tracing to notice +// memory map changes, we need to cache syscall entry parameter values +// for processing at syscall exit. + +// __stp_tf_map_lock protects the hash table. +// Documentation/spinlocks.txt suggest we can be a bit more clever +// if we guarantee that in interrupt context we only read, not write +// the datastructures. We should never change the hash table or the +// contents in interrupt context (which should only ever call +// stap_find_map_map_info for getting stored info). So we might +// want to look into that if this seems a bottleneck. +static DEFINE_RWLOCK(__stp_tf_map_lock); + +#define __STP_TF_HASH_BITS 4 +#define __STP_TF_TABLE_SIZE (1 << __STP_TF_HASH_BITS) + +#ifndef TASK_FINDER_MAP_ENTRY_ITEMS +#define TASK_FINDER_MAP_ENTRY_ITEMS 100 +#endif + +struct __stp_tf_map_entry { +/* private: */ + struct hlist_node hlist; + int usage; + +/* public: */ + pid_t pid; + long syscall_no; + unsigned long arg0; + unsigned long arg1; + unsigned long arg2; +}; + +static struct __stp_tf_map_entry +__stp_tf_map_free_list_items[TASK_FINDER_MAP_ENTRY_ITEMS]; + +static struct hlist_head __stp_tf_map_free_list[1]; + +static struct hlist_head __stp_tf_map_table[__STP_TF_TABLE_SIZE]; + +// __stp_tf_map_initialize(): Initialize the free list. Grabs the +// lock. +static void +__stp_tf_map_initialize(void) +{ + int i; + struct hlist_head *head = &__stp_tf_map_free_list[0]; + + unsigned long flags; + write_lock_irqsave(&__stp_tf_map_lock, flags); + for (i = 0; i < TASK_FINDER_MAP_ENTRY_ITEMS; i++) { + hlist_add_head(&__stp_tf_map_free_list_items[i].hlist, head); + } + write_unlock_irqrestore(&__stp_tf_map_lock, flags); +} + + +// __stp_tf_map_get_free_entry(): Returns an entry from the free list +// or NULL. The __stp_tf_map_lock must be write locked before calling this +// function. +static struct __stp_tf_map_entry * +__stp_tf_map_get_free_entry(void) +{ + struct hlist_head *head = &__stp_tf_map_free_list[0]; + struct hlist_node *node; + struct __stp_tf_map_entry *entry = NULL; + + if (hlist_empty(head)) + return NULL; + hlist_for_each_entry(entry, node, head, hlist) { + break; + } + if (entry != NULL) + hlist_del(&entry->hlist); + return entry; +} + + +// __stp_tf_map_put_free_entry(): Puts an entry back on the free +// list. The __stp_tf_map_lock must be write locked before calling this +// function. +static void +__stp_tf_map_put_free_entry(struct __stp_tf_map_entry *entry) +{ + struct hlist_head *head = &__stp_tf_map_free_list[0]; + hlist_add_head(&entry->hlist, head); +} + + +// __stp_tf_map_hash(): Compute the map hash. +static inline u32 +__stp_tf_map_hash(struct task_struct *tsk) +{ + return (jhash_1word(tsk->pid, 0) & (__STP_TF_TABLE_SIZE - 1)); +} + + +// Get map_entry if the map is present in the map hash table. +// Returns NULL if not present. Takes a read lock on __stp_tf_map_lock. +static struct __stp_tf_map_entry * +__stp_tf_get_map_entry(struct task_struct *tsk) +{ + struct hlist_head *head; + struct hlist_node *node; + struct __stp_tf_map_entry *entry; + + unsigned long flags; + read_lock_irqsave(&__stp_tf_map_lock, flags); + head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)]; + hlist_for_each_entry(entry, node, head, hlist) { + if (tsk->pid == entry->pid) { + read_unlock_irqrestore(&__stp_tf_map_lock, flags); + return entry; + } + } + read_unlock_irqrestore(&__stp_tf_map_lock, flags); + return NULL; +} + + +// Add the map info to the map hash table. Takes a write lock on +// __stp_tf_map_lock. +static int +__stp_tf_add_map(struct task_struct *tsk, long syscall_no, unsigned long arg0, + unsigned long arg1, unsigned long arg2) +{ + struct hlist_head *head; + struct hlist_node *node; + struct __stp_tf_map_entry *entry; + unsigned long flags; + + write_lock_irqsave(&__stp_tf_map_lock, flags); + head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)]; + hlist_for_each_entry(entry, node, head, hlist) { + // If we find an existing entry, just increment the + // usage count. + if (tsk->pid == entry->pid) { + entry->usage++; + write_unlock_irqrestore(&__stp_tf_map_lock, flags); + return 0; + } + } + + // Get an element from the free list. + entry = __stp_tf_map_get_free_entry(); + if (!entry) { + write_unlock_irqrestore(&__stp_tf_map_lock, flags); + return -ENOMEM; + } + entry->usage = 1; + entry->pid = tsk->pid; + entry->syscall_no = syscall_no; + entry->arg0 = arg0; + entry->arg1 = arg1; + entry->arg2 = arg2; + hlist_add_head(&entry->hlist, head); + write_unlock_irqrestore(&__stp_tf_map_lock, flags); + return 0; +} + + +// Remove the map entry from the map hash table. Takes a write lock on +// __stp_tf_map_lock. +static int +__stp_tf_remove_map_entry(struct __stp_tf_map_entry *entry) +{ + struct hlist_head *head; + struct hlist_node *node; + int found = 0; + + if (entry != NULL) { + unsigned long flags; + write_lock_irqsave(&__stp_tf_map_lock, flags); + + // Decrement the usage count. + entry->usage--; + + // If the entry is unused, put it back on the free + // list. + if (entry->usage == 0) { + hlist_del(&entry->hlist); + __stp_tf_map_put_free_entry(entry); + } + write_unlock_irqrestore(&__stp_tf_map_lock, flags); + } + return 0; +} diff --git a/runtime/uprobes/uprobes_i386.c b/runtime/uprobes/uprobes_i386.c index 7743f400..008f32de 100644 --- a/runtime/uprobes/uprobes_i386.c +++ b/runtime/uprobes/uprobes_i386.c @@ -58,22 +58,22 @@ static const unsigned long good_2byte_insns[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ - W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 00 */ - W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */ - W(0x20, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */ - W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */ - W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */ - W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */ - W(0x60, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 60 */ - W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */ + W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1)| /* 00 */ + W(0x10, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* 10 */ + W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* 20 */ + W(0x30, 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */ + W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ + W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */ + W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 60 */ + W(0x70, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ W(0xa0, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* b0 */ - W(0xc0, 1,1,0,0,0,0,0,1,1,1,1,1,1,1,1,1)| /* c0 */ - W(0xd0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* d0 */ - W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */ - W(0xf0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0) /* f0 */ + W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* c0 */ + W(0xd0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* d0 */ + W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* e0 */ + W(0xf0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* f0 */ /* ------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; diff --git a/runtime/vsprintf.c b/runtime/vsprintf.c index 38ab0e2d..23810e75 100644 --- a/runtime/vsprintf.c +++ b/runtime/vsprintf.c @@ -361,18 +361,16 @@ static int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) else len = 1; + if (*fmt_copy == 'M') + len = len * 2; /* hex dump print size */ + if (!(flags & STP_LEFT)) { while (len < field_width--) { num_bytes++; } } - if (*fmt_copy == 'M') { - num_bytes += number_size((unsigned long) *(uint64_t *) s, - 16, field_width, len, flags); - } - else { - num_bytes += len; - } + + num_bytes += len; while (len < field_width--) { num_bytes++; @@ -636,16 +634,25 @@ static int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) len = 1; if (!(flags & STP_LEFT)) { - while (len < field_width--) { + int actlen = len; + if (*fmt == 'M') + actlen = len * 2; + while (actlen < field_width--) { if (str <= end) *str = ' '; ++str; } } - if (*fmt == 'M') { - str = number(str, str + len - 1 < end ? str + len - 1 : end, - (unsigned long) *(uint64_t *) s, - 16, field_width, len, flags); + if (*fmt == 'M') { /* stolen from kernel: trace_seq_putmem_hex() */ + const char _stp_hex_asc[] = "0123456789abcdef"; + int j; + for (i = 0, j = 0; i < len; i++) { + *str = _stp_hex_asc[((*s) & 0xf0) >> 4]; + str++; + *str = _stp_hex_asc[((*s) & 0x0f)]; + str++; s++; + } + len = len * 2; /* the actual length */ } else { for (i = 0; i < len; ++i) { |