summaryrefslogtreecommitdiffstats
path: root/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'runtime')
-rw-r--r--runtime/sym.c51
-rw-r--r--runtime/task_finder.c137
-rw-r--r--runtime/task_finder_map.c191
-rw-r--r--runtime/uprobes/uprobes_i386.c24
-rw-r--r--runtime/vsprintf.c31
5 files changed, 347 insertions, 87 deletions
diff --git a/runtime/sym.c b/runtime/sym.c
index 013edd0c..63dad1af 100644
--- a/runtime/sym.c
+++ b/runtime/sym.c
@@ -271,34 +271,29 @@ static int _stp_module_check(void)
dwfl_module_build_id was not intended to return the end address. */
notes_addr -= m->build_id_len;
- if (notes_addr > base_addr) {
- for (j = 0; j < m->build_id_len; j++)
- {
- unsigned char theory, practice;
- theory = m->build_id_bits [j];
- practice = ((unsigned char*) notes_addr) [j];
- /* XXX: consider using kread() instead of above. */
- if (theory != practice)
- {
- #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
- _stp_error ("%s: inconsistent %s build-id byte #%d "
- "(0x%x [actual] vs. 0x%x [debuginfo])\n",
- THIS_MODULE->name, m->name, j,
- practice, theory);
- return 1;
- #else
- /* This branch is a surrogate for
- kernels affected by Fedora bug
- #465873. */
- printk(KERN_WARNING
- "%s: inconsistent %s build-id byte #%d "
- "(0x%x [actual] vs. 0x%x [debuginfo])\n",
- THIS_MODULE->name, m->name, j,
- practice, theory);
- break; /* Note just the first mismatch. */
- #endif
- }
- }
+ if (notes_addr <= base_addr) /* shouldn't happen */
+ continue;
+ if (memcmp(m->build_id_bits, (unsigned char*) notes_addr, m->build_id_len)) {
+ const char *basename;
+
+ basename = strrchr(m->path, '/');
+ if (basename)
+ basename++;
+ else
+ basename = m->path;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
+ _stp_error ("Build-id mismatch: \"%s\" %.*M"
+ " vs. \"%s\" %.*M\n",
+ m->name, m->build_id_len, notes_addr,
+ basename, m->build_id_len, m->build_id_bits);
+ return 1;
+#else
+ /* This branch is a surrogate for kernels
+ * affected by Fedora bug #465873. */
+ printk(KERN_WARNING
+ "Build-id mismatch: \"%s\" vs. \"%s\"\n",
+ m->name, basename);
+#endif
}
} /* end checking */
} /* end loop */
diff --git a/runtime/task_finder.c b/runtime/task_finder.c
index 93b89cb9..f5e059ca 100644
--- a/runtime/task_finder.c
+++ b/runtime/task_finder.c
@@ -19,6 +19,7 @@ struct stap_task_finder_target { };
#include "syscall.h"
#include "utrace_compatibility.h"
+#include "task_finder_map.c"
static LIST_HEAD(__stp_task_finder_list);
@@ -129,6 +130,19 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action,
#ifdef UTRACE_ORIG_VERSION
static u32
+__stp_utrace_task_finder_target_syscall_entry(struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ struct pt_regs *regs);
+#else
+static u32
+__stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ struct pt_regs *regs);
+#endif
+
+#ifdef UTRACE_ORIG_VERSION
+static u32
__stp_utrace_task_finder_target_syscall_exit(struct utrace_attached_engine *engine,
struct task_struct *tsk,
struct pt_regs *regs);
@@ -166,6 +180,8 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt)
memset(&new_tgt->ops, 0, sizeof(new_tgt->ops));
new_tgt->ops.report_death = &__stp_utrace_task_finder_target_death;
new_tgt->ops.report_quiesce = &__stp_utrace_task_finder_target_quiesce;
+ new_tgt->ops.report_syscall_entry = \
+ &__stp_utrace_task_finder_target_syscall_entry;
new_tgt->ops.report_syscall_exit = \
&__stp_utrace_task_finder_target_syscall_exit;
@@ -394,6 +410,7 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen)
#define __STP_TASK_BASE_EVENTS (UTRACE_EVENT(DEATH))
#define __STP_TASK_VM_BASE_EVENTS (__STP_TASK_BASE_EVENTS \
+ | UTRACE_EVENT(SYSCALL_ENTRY)\
| UTRACE_EVENT(SYSCALL_EXIT))
/*
@@ -1053,24 +1070,21 @@ __stp_find_file_based_vma(struct mm_struct *mm, unsigned long addr)
#ifdef UTRACE_ORIG_VERSION
static u32
-__stp_utrace_task_finder_target_syscall_exit(struct utrace_attached_engine *engine,
- struct task_struct *tsk,
- struct pt_regs *regs)
+__stp_utrace_task_finder_target_syscall_entry(struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ struct pt_regs *regs)
#else
static u32
-__stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
- struct utrace_attached_engine *engine,
- struct task_struct *tsk,
- struct pt_regs *regs)
+__stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ struct pt_regs *regs)
#endif
{
struct stap_task_finder_target *tgt = engine->data;
long syscall_no;
- unsigned long rv;
- unsigned long args[3];
+ unsigned long args[3] = { 0L };
int rc;
- struct mm_struct *mm;
- struct vm_area_struct *vma;
if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
debug_task_finder_detach();
@@ -1100,37 +1114,92 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
&& tgt->munmap_events == 0))
return UTRACE_RESUME;
+ __stp_tf_handler_start();
+ if (syscall_no == MUNMAP_SYSCALL_NO(tsk)) {
+ // We need 2 arguments
+ syscall_get_arguments(tsk, regs, 0, 2, args);
+ }
+ else if (syscall_no == MMAP_SYSCALL_NO(tsk)
+ || syscall_no == MMAP2_SYSCALL_NO(tsk)) {
+ // For mmap, we really just need the return value, so
+ // there is no need to save arguments
+ }
+ else { // mprotect()
+ // We need 3 arguments
+ syscall_get_arguments(tsk, regs, 0, 3, args);
+ }
+
+ // Remember the syscall information
+ rc = __stp_tf_add_map(tsk, syscall_no, args[0], args[1], args[2]);
+ if (rc != 0)
+ _stp_error("__stp_tf_add_map returned error %d on pid %d",
+ rc, tsk->pid);
+ __stp_tf_handler_end();
+ return UTRACE_RESUME;
+}
+
+#ifdef UTRACE_ORIG_VERSION
+static u32
+__stp_utrace_task_finder_target_syscall_exit(struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ struct pt_regs *regs)
+#else
+static u32
+__stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ struct pt_regs *regs)
+#endif
+{
+ struct stap_task_finder_target *tgt = engine->data;
+ unsigned long rv;
+ struct __stp_tf_map_entry *entry;
+
+ if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
+ debug_task_finder_detach();
+ return UTRACE_DETACH;
+ }
+
+ if (tgt == NULL)
+ return UTRACE_RESUME;
+
+ // See if we can find saved syscall info. If we can, it must
+ // be one of the syscalls we are interested in (and we must
+ // have callbacks to call for it).
+ entry = __stp_tf_get_map_entry(tsk);
+ if (entry == NULL)
+ return UTRACE_RESUME;
+
// Get return value
+ __stp_tf_handler_start();
rv = syscall_get_return_value(tsk, regs);
- // We need the first syscall argument to see what address we
- // were operating on.
- syscall_get_arguments(tsk, regs, 0, 1, args);
-
#ifdef DEBUG_TASK_FINDER_VMA
_stp_dbug(__FUNCTION__, __LINE__,
"tsk %d found %s(0x%lx), returned 0x%lx\n",
tsk->pid,
- ((syscall_no == MMAP_SYSCALL_NO(tsk)) ? "mmap"
- : ((syscall_no == MMAP2_SYSCALL_NO(tsk)) ? "mmap2"
- : ((syscall_no == MPROTECT_SYSCALL_NO(tsk)) ? "mprotect"
- : ((syscall_no == MUNMAP_SYSCALL_NO(tsk)) ? "munmap"
+ ((entry->syscall_no == MMAP_SYSCALL_NO(tsk)) ? "mmap"
+ : ((entry->syscall_no == MMAP2_SYSCALL_NO(tsk)) ? "mmap2"
+ : ((entry->syscall_no == MPROTECT_SYSCALL_NO(tsk))
+ ? "mprotect"
+ : ((entry->syscall_no == MUNMAP_SYSCALL_NO(tsk))
+ ? "munmap"
: "UNKNOWN")))),
- args[0], rv);
+ entry->arg0, rv);
#endif
- __stp_tf_handler_start();
- if (syscall_no == MUNMAP_SYSCALL_NO(tsk)) {
- // We need the 2nd syscall argument for the length.
- syscall_get_arguments(tsk, regs, 1, 1, &args[1]);
+ if (entry->syscall_no == MUNMAP_SYSCALL_NO(tsk)) {
// Call the callbacks
- __stp_call_munmap_callbacks(tgt, tsk, args[0], args[1]);
+ __stp_call_munmap_callbacks(tgt, tsk, entry->arg0, entry->arg1);
}
- else if (syscall_no == MMAP_SYSCALL_NO(tsk)
- || syscall_no == MMAP2_SYSCALL_NO(tsk)) {
+ else if (entry->syscall_no == MMAP_SYSCALL_NO(tsk)
+ || entry->syscall_no == MMAP2_SYSCALL_NO(tsk)) {
+ struct mm_struct *mm;
mm = get_task_mm(tsk);
if (mm) {
+ struct vm_area_struct *vma;
+
down_read(&mm->mmap_sem);
vma = __stp_find_file_based_vma(mm, rv);
@@ -1144,18 +1213,14 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
mmput(mm);
}
}
- else {
- // We need the 2nd syscall argument for the length and
- // the 3rd argument for the protection.
- syscall_get_arguments(tsk, regs, 1, 2, &args[1]);
-
+ else { // mprotect
// Call the callbacks
- __stp_call_mprotect_callbacks(tgt, tsk, args[0], args[1],
- args[2]);
+ __stp_call_mprotect_callbacks(tgt, tsk, entry->arg0,
+ entry->arg1, entry->arg2);
}
-syscall_exit_done:
__stp_tf_handler_end();
+ __stp_tf_remove_map_entry(entry);
return UTRACE_RESUME;
}
@@ -1179,6 +1244,8 @@ stap_start_task_finder(void)
return ENOMEM;
}
+ __stp_tf_map_initialize();
+
atomic_set(&__stp_task_finder_state, __STP_TF_RUNNING);
rcu_read_lock();
diff --git a/runtime/task_finder_map.c b/runtime/task_finder_map.c
new file mode 100644
index 00000000..b770dd0e
--- /dev/null
+++ b/runtime/task_finder_map.c
@@ -0,0 +1,191 @@
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/spinlock.h>
+
+// When handling mmap()/munmap()/mprotect() syscall tracing to notice
+// memory map changes, we need to cache syscall entry parameter values
+// for processing at syscall exit.
+
+// __stp_tf_map_lock protects the hash table.
+// Documentation/spinlocks.txt suggest we can be a bit more clever
+// if we guarantee that in interrupt context we only read, not write
+// the datastructures. We should never change the hash table or the
+// contents in interrupt context (which should only ever call
+// stap_find_map_map_info for getting stored info). So we might
+// want to look into that if this seems a bottleneck.
+static DEFINE_RWLOCK(__stp_tf_map_lock);
+
+#define __STP_TF_HASH_BITS 4
+#define __STP_TF_TABLE_SIZE (1 << __STP_TF_HASH_BITS)
+
+#ifndef TASK_FINDER_MAP_ENTRY_ITEMS
+#define TASK_FINDER_MAP_ENTRY_ITEMS 100
+#endif
+
+struct __stp_tf_map_entry {
+/* private: */
+ struct hlist_node hlist;
+ int usage;
+
+/* public: */
+ pid_t pid;
+ long syscall_no;
+ unsigned long arg0;
+ unsigned long arg1;
+ unsigned long arg2;
+};
+
+static struct __stp_tf_map_entry
+__stp_tf_map_free_list_items[TASK_FINDER_MAP_ENTRY_ITEMS];
+
+static struct hlist_head __stp_tf_map_free_list[1];
+
+static struct hlist_head __stp_tf_map_table[__STP_TF_TABLE_SIZE];
+
+// __stp_tf_map_initialize(): Initialize the free list. Grabs the
+// lock.
+static void
+__stp_tf_map_initialize(void)
+{
+ int i;
+ struct hlist_head *head = &__stp_tf_map_free_list[0];
+
+ unsigned long flags;
+ write_lock_irqsave(&__stp_tf_map_lock, flags);
+ for (i = 0; i < TASK_FINDER_MAP_ENTRY_ITEMS; i++) {
+ hlist_add_head(&__stp_tf_map_free_list_items[i].hlist, head);
+ }
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+}
+
+
+// __stp_tf_map_get_free_entry(): Returns an entry from the free list
+// or NULL. The __stp_tf_map_lock must be write locked before calling this
+// function.
+static struct __stp_tf_map_entry *
+__stp_tf_map_get_free_entry(void)
+{
+ struct hlist_head *head = &__stp_tf_map_free_list[0];
+ struct hlist_node *node;
+ struct __stp_tf_map_entry *entry = NULL;
+
+ if (hlist_empty(head))
+ return NULL;
+ hlist_for_each_entry(entry, node, head, hlist) {
+ break;
+ }
+ if (entry != NULL)
+ hlist_del(&entry->hlist);
+ return entry;
+}
+
+
+// __stp_tf_map_put_free_entry(): Puts an entry back on the free
+// list. The __stp_tf_map_lock must be write locked before calling this
+// function.
+static void
+__stp_tf_map_put_free_entry(struct __stp_tf_map_entry *entry)
+{
+ struct hlist_head *head = &__stp_tf_map_free_list[0];
+ hlist_add_head(&entry->hlist, head);
+}
+
+
+// __stp_tf_map_hash(): Compute the map hash.
+static inline u32
+__stp_tf_map_hash(struct task_struct *tsk)
+{
+ return (jhash_1word(tsk->pid, 0) & (__STP_TF_TABLE_SIZE - 1));
+}
+
+
+// Get map_entry if the map is present in the map hash table.
+// Returns NULL if not present. Takes a read lock on __stp_tf_map_lock.
+static struct __stp_tf_map_entry *
+__stp_tf_get_map_entry(struct task_struct *tsk)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct __stp_tf_map_entry *entry;
+
+ unsigned long flags;
+ read_lock_irqsave(&__stp_tf_map_lock, flags);
+ head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)];
+ hlist_for_each_entry(entry, node, head, hlist) {
+ if (tsk->pid == entry->pid) {
+ read_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return entry;
+ }
+ }
+ read_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return NULL;
+}
+
+
+// Add the map info to the map hash table. Takes a write lock on
+// __stp_tf_map_lock.
+static int
+__stp_tf_add_map(struct task_struct *tsk, long syscall_no, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct __stp_tf_map_entry *entry;
+ unsigned long flags;
+
+ write_lock_irqsave(&__stp_tf_map_lock, flags);
+ head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)];
+ hlist_for_each_entry(entry, node, head, hlist) {
+ // If we find an existing entry, just increment the
+ // usage count.
+ if (tsk->pid == entry->pid) {
+ entry->usage++;
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return 0;
+ }
+ }
+
+ // Get an element from the free list.
+ entry = __stp_tf_map_get_free_entry();
+ if (!entry) {
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return -ENOMEM;
+ }
+ entry->usage = 1;
+ entry->pid = tsk->pid;
+ entry->syscall_no = syscall_no;
+ entry->arg0 = arg0;
+ entry->arg1 = arg1;
+ entry->arg2 = arg2;
+ hlist_add_head(&entry->hlist, head);
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return 0;
+}
+
+
+// Remove the map entry from the map hash table. Takes a write lock on
+// __stp_tf_map_lock.
+static int
+__stp_tf_remove_map_entry(struct __stp_tf_map_entry *entry)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ int found = 0;
+
+ if (entry != NULL) {
+ unsigned long flags;
+ write_lock_irqsave(&__stp_tf_map_lock, flags);
+
+ // Decrement the usage count.
+ entry->usage--;
+
+ // If the entry is unused, put it back on the free
+ // list.
+ if (entry->usage == 0) {
+ hlist_del(&entry->hlist);
+ __stp_tf_map_put_free_entry(entry);
+ }
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ }
+ return 0;
+}
diff --git a/runtime/uprobes/uprobes_i386.c b/runtime/uprobes/uprobes_i386.c
index 7743f400..008f32de 100644
--- a/runtime/uprobes/uprobes_i386.c
+++ b/runtime/uprobes/uprobes_i386.c
@@ -58,22 +58,22 @@
static const unsigned long good_2byte_insns[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ------------------------------- */
- W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 00 */
- W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
- W(0x20, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
- W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
- W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */
- W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
- W(0x60, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 60 */
- W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */
+ W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1)| /* 00 */
+ W(0x10, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* 10 */
+ W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* 20 */
+ W(0x30, 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
+ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
+ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */
+ W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 60 */
+ W(0x70, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
W(0xa0, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* b0 */
- W(0xc0, 1,1,0,0,0,0,0,1,1,1,1,1,1,1,1,1)| /* c0 */
- W(0xd0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* d0 */
- W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */
- W(0xf0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0) /* f0 */
+ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* c0 */
+ W(0xd0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* d0 */
+ W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* e0 */
+ W(0xf0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* f0 */
/* ------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
diff --git a/runtime/vsprintf.c b/runtime/vsprintf.c
index 38ab0e2d..23810e75 100644
--- a/runtime/vsprintf.c
+++ b/runtime/vsprintf.c
@@ -361,18 +361,16 @@ static int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
else
len = 1;
+ if (*fmt_copy == 'M')
+ len = len * 2; /* hex dump print size */
+
if (!(flags & STP_LEFT)) {
while (len < field_width--) {
num_bytes++;
}
}
- if (*fmt_copy == 'M') {
- num_bytes += number_size((unsigned long) *(uint64_t *) s,
- 16, field_width, len, flags);
- }
- else {
- num_bytes += len;
- }
+
+ num_bytes += len;
while (len < field_width--) {
num_bytes++;
@@ -636,16 +634,25 @@ static int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
len = 1;
if (!(flags & STP_LEFT)) {
- while (len < field_width--) {
+ int actlen = len;
+ if (*fmt == 'M')
+ actlen = len * 2;
+ while (actlen < field_width--) {
if (str <= end)
*str = ' ';
++str;
}
}
- if (*fmt == 'M') {
- str = number(str, str + len - 1 < end ? str + len - 1 : end,
- (unsigned long) *(uint64_t *) s,
- 16, field_width, len, flags);
+ if (*fmt == 'M') { /* stolen from kernel: trace_seq_putmem_hex() */
+ const char _stp_hex_asc[] = "0123456789abcdef";
+ int j;
+ for (i = 0, j = 0; i < len; i++) {
+ *str = _stp_hex_asc[((*s) & 0xf0) >> 4];
+ str++;
+ *str = _stp_hex_asc[((*s) & 0x0f)];
+ str++; s++;
+ }
+ len = len * 2; /* the actual length */
}
else {
for (i = 0; i < len; ++i) {