diff options
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | runtime/uprobes/uprobes.c | 176 | ||||
-rw-r--r-- | testsuite/systemtap.base/bz6850.c | 87 | ||||
-rw-r--r-- | testsuite/systemtap.base/bz6850.exp | 21 | ||||
-rw-r--r-- | testsuite/systemtap.base/bz6850.stp | 7 |
5 files changed, 272 insertions, 28 deletions
@@ -1,3 +1,12 @@ +2008-10-03 Jim Keniston <jkenisto@us.ibm.com> + + PR 6850 + * runtime/uprobes/uprobes.c: When a probed process forks with + uretprobe_instances outstanding, create a uprobe_process and + uprobe_task for the child, and clone the uretprobe_instances. + This requires us to allow the SSOL vma to be copied on fork. + * testsuite/systemtap.base/bz6850.{exp,c,stp}: new test case + 2008-09-30 Mark Wielaard <mjw@redhat.com> * tapsets.cxx (literal_stmt_for_local): Check if alternatives can be diff --git a/runtime/uprobes/uprobes.c b/runtime/uprobes/uprobes.c index f7d90add..0f273e93 100644 --- a/runtime/uprobes/uprobes.c +++ b/runtime/uprobes/uprobes.c @@ -57,7 +57,8 @@ static void uretprobe_handle_entry(struct uprobe *u, struct pt_regs *regs, struct uprobe_task *utask); static void uretprobe_handle_return(struct pt_regs *regs, struct uprobe_task *utask); -static void uretprobe_set_trampoline(struct uprobe_process *uproc); +static void uretprobe_set_trampoline(struct uprobe_process *uproc, + struct task_struct *tsk); static void zap_uretprobe_instances(struct uprobe *u, struct uprobe_process *uproc); @@ -1167,7 +1168,7 @@ static unsigned long find_next_possible_ssol_vma(unsigned long ceiling) struct mm_struct *mm = current->mm; struct rb_node *rb_node; struct vm_area_struct *vma; - unsigned long good_flags = VM_EXEC | VM_DONTCOPY | VM_DONTEXPAND; + unsigned long good_flags = VM_EXEC | VM_DONTEXPAND; unsigned long bad_flags = VM_WRITE | VM_GROWSDOWN | VM_GROWSUP; unsigned long addr = 0; @@ -1238,20 +1239,29 @@ static noinline unsigned long uprobe_setup_ssol_vma(unsigned long nbytes) vma = find_vma(mm, addr); BUG_ON(!vma); - /* avoid vma copy on fork() and don't expand when mremap() */ - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + /* + * Don't expand vma on mremap(). Allow vma to be copied on + * fork() -- see uprobe_fork_uproc(). + */ + vma->vm_flags |= VM_DONTEXPAND; up_write(&mm->mmap_sem); return addr; } -/* - * Initialize per-process area for single stepping out-of-line. - * Must be run by a thread in the probed process. Returns with - * area->insn_area pointing to the initialized area, or set to a - * negative errno. +/** + * uprobe_init_ssol -- initialize per-process area for single stepping + * out-of-line. + * @uproc: probed process + * @tsk: probed task: must be current if @insn_area is %NULL + * @insn_area: virtual address of the already-established SSOL vma -- + * see uprobe_fork_uproc(). + * + * Returns with @uproc->ssol_area.insn_area pointing to the initialized + * area, or set to a negative errno. */ -static noinline void uprobe_init_ssol(struct uprobe_process *uproc) +static void uprobe_init_ssol(struct uprobe_process *uproc, + struct task_struct *tsk, __user uprobe_opcode_t *insn_area) { struct uprobe_ssol_area *area = &uproc->ssol_area; struct uprobe_ssol_slot *slot; @@ -1261,9 +1271,16 @@ static noinline void uprobe_init_ssol(struct uprobe_process *uproc) /* Trampoline setup will either fail or succeed here. */ uproc->uretprobe_trampoline_addr = ERR_PTR(-ENOMEM); - area->insn_area = (uprobe_opcode_t *) uprobe_setup_ssol_vma(PAGE_SIZE); - if (IS_ERR(area->insn_area)) - return; + if (insn_area) { + BUG_ON(IS_ERR(insn_area)); + area->insn_area = insn_area; + } else { + BUG_ON(tsk != current); + area->insn_area = + (uprobe_opcode_t *) uprobe_setup_ssol_vma(PAGE_SIZE); + if (IS_ERR(area->insn_area)) + return; + } area->nfree = area->nslots = PAGE_SIZE / MAX_UINSN_BYTES; if (area->nslots > MAX_SSOL_SLOTS) @@ -1288,7 +1305,7 @@ static noinline void uprobe_init_ssol(struct uprobe_process *uproc) slot->insn = (__user uprobe_opcode_t *) slot_addr; slot_addr += MAX_UINSN_BYTES; } - uretprobe_set_trampoline(uproc); + uretprobe_set_trampoline(uproc, tsk); } /* @@ -1305,7 +1322,7 @@ static __user uprobe_opcode_t mutex_lock(&uproc->ssol_area.setup_mutex); if (likely(!area->initialized)) { /* Nobody snuck in and set things up ahead of us. */ - uprobe_init_ssol(uproc); + uprobe_init_ssol(uproc, current, NULL); area->initialized = 1; } mutex_unlock(&uproc->ssol_area.setup_mutex); @@ -2035,6 +2052,106 @@ static u32 uprobe_report_exit(struct utrace_attached_engine *engine, } /* + * Duplicate the FIFO of uretprobe_instances from parent_utask into + * child_utask. Zap the uretprobe pointer, since all we care about is + * vectoring to the proper return address. Where there are multiple + * uretprobe_instances for the same function instance, copy only the + * one that contains the real return address. + */ +static int uprobe_fork_uretprobe_instances(struct uprobe_task *parent_utask, + struct uprobe_task *child_utask) +{ + struct uprobe_process *parent_uproc = parent_utask->uproc; + struct uprobe_process *child_uproc = child_utask->uproc; + __user uprobe_opcode_t *trampoline_addr = + child_uproc->uretprobe_trampoline_addr; + struct hlist_node *tmp, *tail; + struct uretprobe_instance *pri, *cri; + + BUG_ON(trampoline_addr != parent_uproc->uretprobe_trampoline_addr); + + /* Since there's no hlist_add_tail()... */ + tail = NULL; + hlist_for_each_entry(pri, tmp, &parent_utask->uretprobe_instances, + hlist) { + if (pri->ret_addr == (unsigned long) trampoline_addr) + continue; + cri = kmalloc(sizeof(*cri), GFP_USER); + if (!cri) + return -ENOMEM; + cri->rp = NULL; + cri->ret_addr = pri->ret_addr; + INIT_HLIST_NODE(&cri->hlist); + if (tail) + hlist_add_after(tail, &cri->hlist); + else + hlist_add_head(&cri->hlist, + &child_utask->uretprobe_instances); + tail = &cri->hlist; + + /* Ref-count uretprobe_instances. */ + uprobe_get_process(child_uproc); + } + BUG_ON(hlist_empty(&child_utask->uretprobe_instances)); + return 0; +} + +/* + * A probed process is forking, and at least one function in the + * call stack has a uretprobe on it. Since the child inherits the + * call stack, it's possible that the child could attempt to return + * through the uretprobe trampoline. Create a uprobe_process for + * the child, initialize its SSOL vma (which has been cloned from + * the parent), and clone the parent's list of uretprobe_instances. + * + * Called with uproc_table locked and parent_uproc->rwsem write-locked. + * + * (On architectures where it's easy to keep track of where in the + * stack the return addresses are stored, we could just poke the real + * return addresses back into the child's stack. We use this more + * general solution.) + */ +static int uprobe_fork_uproc(struct uprobe_process *parent_uproc, + struct uprobe_task *parent_utask, + struct task_struct *child_tsk) +{ + int ret = 0; + struct uprobe_process *child_uproc; + struct uprobe_task *child_utask; + + BUG_ON(!parent_uproc->uretprobe_trampoline_addr || + IS_ERR(parent_uproc->uretprobe_trampoline_addr)); + + if (!try_module_get(THIS_MODULE)) + return -ENOSYS; + child_uproc = uprobe_mk_process(child_tsk); + if (IS_ERR(child_uproc)) { + ret = (int) PTR_ERR(child_uproc); + module_put(THIS_MODULE); + return ret; + } + /* child_uproc is write-locked and ref-counted at this point. */ + + mutex_lock(&child_uproc->ssol_area.setup_mutex); + uprobe_init_ssol(child_uproc, child_tsk, + parent_uproc->ssol_area.insn_area); + child_uproc->ssol_area.initialized = 1; + mutex_unlock(&child_uproc->ssol_area.setup_mutex); + + child_utask = uprobe_find_utask(child_tsk); + BUG_ON(!child_utask); + ret = uprobe_fork_uretprobe_instances(parent_utask, child_utask); + + hlist_add_head(&child_uproc->hlist, + &uproc_table[hash_long(child_uproc->tgid, + UPROBE_HASH_BITS)]); + + up_write(&child_uproc->rwsem); + uprobe_decref_process(child_uproc); + return ret; +} + +/* * Clone callback: The current task has spawned a thread/process. * * NOTE: For now, we don't pass on uprobes from the parent to the @@ -2057,8 +2174,10 @@ static u32 uprobe_report_clone(struct utrace_attached_engine *engine, /* * Lock uproc so no new uprobes can be installed 'til all - * report_clone activities are completed + * report_clone activities are completed. Lock uproc_table + * in case we have to run uprobe_fork_uproc(). */ + lock_uproc_table(); down_write(&uproc->rwsem); get_task_struct(child); @@ -2066,13 +2185,9 @@ static u32 uprobe_report_clone(struct utrace_attached_engine *engine, /* New thread in the same process */ ctask = uprobe_add_task(child, uproc); BUG_ON(!ctask); - if (IS_ERR(ctask)) { - put_task_struct(child); - up_write(&uproc->rwsem); - goto fail; - } - if (ctask) - uproc->nthreads++; + if (IS_ERR(ctask)) + goto done; + uproc->nthreads++; /* * FIXME: Handle the case where uproc is quiescing * (assuming it's possible to clone while quiescing). @@ -2108,12 +2223,15 @@ static u32 uprobe_report_clone(struct utrace_attached_engine *engine, } } } + + if (!hlist_empty(&ptask->uretprobe_instances)) + (void) uprobe_fork_uproc(uproc, ptask, child); } +done: put_task_struct(child); up_write(&uproc->rwsem); - -fail: + unlock_uproc_table(); return UTRACE_ACTION_RESUME; } @@ -2316,13 +2434,14 @@ EXPORT_SYMBOL_GPL(unregister_uretprobe); * uproc->ssol_area has been successfully set up. Establish the * uretprobe trampoline in slot 0. */ -static void uretprobe_set_trampoline(struct uprobe_process *uproc) +static void uretprobe_set_trampoline(struct uprobe_process *uproc, + struct task_struct *tsk) { uprobe_opcode_t bp_insn = BREAKPOINT_INSTRUCTION; struct uprobe_ssol_area *area = &uproc->ssol_area; struct uprobe_ssol_slot *slot = &area->slots[0]; - if (access_process_vm(current, (unsigned long) slot->insn, + if (access_process_vm(tsk, (unsigned long) slot->insn, &bp_insn, BP_INSN_SIZE, 1) == BP_INSN_SIZE) { uproc->uretprobe_trampoline_addr = slot->insn; slot->state = SSOL_RESERVED; @@ -2345,7 +2464,8 @@ static void uretprobe_handle_return(struct pt_regs *regs, struct uprobe_task *utask) { } -static void uretprobe_set_trampoline(struct uprobe_process *uproc) +static void uretprobe_set_trampoline(struct uprobe_process *uproc, + struct task_struct *tsk) { } static void zap_uretprobe_instances(struct uprobe *u, diff --git a/testsuite/systemtap.base/bz6850.c b/testsuite/systemtap.base/bz6850.c new file mode 100644 index 00000000..a8b78110 --- /dev/null +++ b/testsuite/systemtap.base/bz6850.c @@ -0,0 +1,87 @@ +/* Regression test for bugzilla 6850 */ + +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/wait.h> + +#define PASS_MARKER "./bz6850_pass" + +/* All this in an attempt to defeat gcc's over-aggressive inlining... */ +typedef pid_t (*forker)(int); +static forker call_chain[]; + +/* + * Both parent and child return from fork2() and fork1(). Both + * processes will hit the uretprobe trampolines. The handlers should + * run in the parent. With the bug fix in place, the child will return + * correctly and do the exec (but won't run the handlers). + */ +static pid_t fork2(int ignored) +{ + return fork(); +} + +static pid_t fork1(int func_index) +{ + ++func_index; + return call_chain[func_index](func_index); /* fork2() */ +} + +static pid_t fork_and_exec2(int func_index) +{ + pid_t child; + ++func_index; + child = call_chain[func_index](func_index); /* fork1() */ + if (child == 0) { + /* I'm the child. Create the marker file. */ + char *child_args[] = { "/bin/touch", PASS_MARKER, NULL }; + char *child_env[] = { NULL }; + execve(child_args[0], child_args, child_env); + perror("execve"); + fprintf(stderr, "FAIL: child couldn't exec.\n"); + exit(2); + } + return child; +} + +static pid_t fork_and_exec1(int func_index) +{ + ++func_index; + return call_chain[func_index](func_index); /* fork_and_exec2() */ +} + +static forker call_chain[] = { + fork_and_exec1, + fork_and_exec2, + fork1, + fork2, + NULL +}; + +main() +{ + pid_t child, wait_child; + int status = 0; + + (void) unlink(PASS_MARKER); + child = call_chain[0](0); /* fork_and_exec1() */ + if (child < 0) { + fprintf(stderr, "FAIL: fork_and_exec1() failed.\n"); + exit(1); + } + wait_child = wait(&status); + if (wait_child != child) { + fprintf(stderr, "FAIL: waited for %d but got %d\n", + child, wait_child); + exit(1); + } + if (WEXITSTATUS(status) != 0) { + fprintf(stderr, "FAIL: child died with status = %d\n", + WEXITSTATUS(status)); + exit(1); + } + exit(0); +} diff --git a/testsuite/systemtap.base/bz6850.exp b/testsuite/systemtap.base/bz6850.exp new file mode 100644 index 00000000..cd56ddce --- /dev/null +++ b/testsuite/systemtap.base/bz6850.exp @@ -0,0 +1,21 @@ +set test bz6850 + +catch {exec gcc -g -o bz6850 $srcdir/$subdir/bz6850.c} err +if {$err == "" && [file exists bz6850]} then { pass "$test compile" } else { fail "$test compile" } + +set rc [stap_run_batch $srcdir/$subdir/bz6850.stp] +if {$rc == 0} then { pass "$test -p4" } else { fail "$test -p4" } + +if {! [installtest_p]} { untested "$test -p5"; return } + +spawn sudo stap $srcdir/$subdir/bz6850.stp -c ./bz6850 +expect { + -timeout 60 + -re {[^\r\n]*called\r\n} { exp_continue } + -re {[^\r\n]*returns\r\n} { exp_continue } + timeout { fail "$test (timeout)" } + eof { } +} +wait +if {[file exists bz6850_pass]} then { pass "$test -p5" } else { fail "$test -p5" } +exec rm -f bz6850_pass bz6850 diff --git a/testsuite/systemtap.base/bz6850.stp b/testsuite/systemtap.base/bz6850.stp new file mode 100644 index 00000000..d6f41862 --- /dev/null +++ b/testsuite/systemtap.base/bz6850.stp @@ -0,0 +1,7 @@ +#! stap -p4 +probe process("./bz6850").function("*").call { + printf("%s called\n", probefunc()) +} +probe process("./bz6850").function("*").return { + printf("%s returns\n", probefunc()) +} |