diff options
author | kenistoj <kenistoj> | 2007-09-28 23:41:26 +0000 |
---|---|---|
committer | kenistoj <kenistoj> | 2007-09-28 23:41:26 +0000 |
commit | 04d476c727a575872b088ec853b4402bd4bf5d83 (patch) | |
tree | 63ffee8a824f71660b8c36a331883e640432b40e /runtime/uprobes | |
parent | 9446a6d68e4cea3554ecbbacb673b1be9dc89919 (diff) | |
download | systemtap-steved-04d476c727a575872b088ec853b4402bd4bf5d83.tar.gz systemtap-steved-04d476c727a575872b088ec853b4402bd4bf5d83.tar.xz systemtap-steved-04d476c727a575872b088ec853b4402bd4bf5d83.zip |
uprobes.c, uprobes.h: architecture-independent code
uprobes_i386.c, uprobes_i386.h: i386 code
uprobes_arch.c, uprobes_arch.h: map to architecture-specific files
Diffstat (limited to 'runtime/uprobes')
-rw-r--r-- | runtime/uprobes/uprobes.c | 2297 | ||||
-rw-r--r-- | runtime/uprobes/uprobes.h | 385 | ||||
-rw-r--r-- | runtime/uprobes/uprobes_arch.c | 11 | ||||
-rw-r--r-- | runtime/uprobes/uprobes_arch.h | 11 | ||||
-rw-r--r-- | runtime/uprobes/uprobes_i386.c | 302 | ||||
-rw-r--r-- | runtime/uprobes/uprobes_i386.h | 67 |
6 files changed, 3073 insertions, 0 deletions
diff --git a/runtime/uprobes/uprobes.c b/runtime/uprobes/uprobes.c new file mode 100644 index 00000000..41d0ef11 --- /dev/null +++ b/runtime/uprobes/uprobes.c @@ -0,0 +1,2297 @@ +/* + * Userspace Probes (UProbes) + * kernel/uprobes_core.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2006 + */ +#include <linux/types.h> +#include <linux/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/rcupdate.h> +#include <linux/err.h> +#include <linux/kref.h> +#include <linux/utrace.h> +#define UPROBES_IMPLEMENTATION 1 +#include "uprobes.h" +#include <linux/tracehook.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/uaccess.h> +#include <asm/tracehook.h> +#include <asm/errno.h> +#include <asm/mman.h> + +#define SET_ENGINE_FLAGS 1 +#define CLEAR_ENGINE_FLAGS 0 + +extern int access_process_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, int write); +static int utask_fake_quiesce(struct uprobe_task *utask); +static void uprobe_release_ssol_vma(struct uprobe_process *uproc); + +static void uretprobe_handle_entry(struct uprobe *u, struct pt_regs *regs, + struct uprobe_task *utask); +static void uretprobe_handle_return(struct pt_regs *regs, + struct uprobe_task *utask); +static void uretprobe_set_trampoline(struct uprobe_process *uproc); +static void zap_uretprobe_instances(struct uprobe *u, + struct uprobe_process *uproc); + +typedef void (*uprobe_handler_t)(struct uprobe*, struct pt_regs*); +#define URETPROBE_HANDLE_ENTRY ((uprobe_handler_t)-1L) +#define is_uretprobe(u) (u->handler == URETPROBE_HANDLE_ENTRY) +/* Point utask->active_probe at this while running uretprobe handler. */ +static struct uprobe_probept uretprobe_trampoline_dummy_probe; + +/* + * These data structures are shared by all SystemTap-generated modules + * that use uprobes. + */ +struct uprobe_globals { + struct hlist_head uproc_table[UPROBE_TABLE_SIZE]; + struct mutex uproc_mutex; + struct hlist_head utask_table[UPROBE_TABLE_SIZE]; + spinlock_t utask_table_lock; +}; +static struct uprobe_globals *globals; +static struct hlist_head *uproc_table; /* = globals->uproc_table */ +static struct hlist_head *utask_table; /* = globals->utask_table */ + +#define lock_uproc_table() mutex_lock(&globals->uproc_mutex) +#define unlock_uproc_table() mutex_unlock(&globals->uproc_mutex) + +#define lock_utask_table(flags) \ + spin_lock_irqsave(&globals->utask_table_lock, (flags)) +#define unlock_utask_table(flags) \ + spin_unlock_irqrestore(&globals->utask_table_lock, (flags)) + +/* + * uprobes_data and uprobes_mutex are the only uprobes hooks in the kernel. + * A pointer to the uprobes_global area is stored in uprobe_data. + */ +extern void *uprobes_data; +extern struct mutex uprobes_mutex; + +static int verify_uprobes(void); + +/* p_uprobe_utrace_ops = &uprobe_utrace_ops. Fwd refs are a pain w/o this. */ +static const struct utrace_engine_ops *p_uprobe_utrace_ops; + +struct deferred_registration { + struct list_head list; + struct uprobe *uprobe; + int regflag; /* 0 - unregister, 1 - register */ + enum uprobe_type type; +}; + +static struct uprobe_task *uprobe_find_utask(struct task_struct *tsk) +{ + struct hlist_head *head; + struct hlist_node *node; + struct uprobe_task *utask; + unsigned long flags; + + head = &utask_table[hash_ptr(tsk, UPROBE_HASH_BITS)]; + lock_utask_table(flags); + hlist_for_each_entry(utask, node, head, hlist) { + if (utask->tsk == tsk) { + unlock_utask_table(flags); + return utask; + } + } + unlock_utask_table(flags); + return NULL; +} + +static void uprobe_hash_utask(struct uprobe_task *utask) +{ + struct hlist_head *head; + unsigned long flags; + + INIT_HLIST_NODE(&utask->hlist); + head = &utask_table[hash_ptr(utask->tsk, UPROBE_HASH_BITS)]; + lock_utask_table(flags); + hlist_add_head(&utask->hlist, head); + unlock_utask_table(flags); +} + +static void uprobe_unhash_utask(struct uprobe_task *utask) +{ + unsigned long flags; + + lock_utask_table(flags); + hlist_del(&utask->hlist); + unlock_utask_table(flags); +} + +static inline void uprobe_get_process(struct uprobe_process *uproc) +{ + atomic_inc(&uproc->refcount); +} + +/* + * Decrement uproc's refcount in a situation where we "know" it can't + * reach zero. It's OK to call this with uproc locked. Compare with + * uprobe_put_process(). + */ +static inline void uprobe_decref_process(struct uprobe_process *uproc) +{ + if (atomic_dec_and_test(&uproc->refcount)) + BUG(); +} + +/* + * Runs with the uproc_mutex held. Returns with uproc ref-counted and + * write-locked. + * + * Around exec time, briefly, it's possible to have one (finished) uproc + * for the old image and one for the new image. We find the latter. + */ +static struct uprobe_process *uprobe_find_process(pid_t tgid) +{ + struct hlist_head *head; + struct hlist_node *node; + struct uprobe_process *uproc; + + head = &uproc_table[hash_long(tgid, UPROBE_HASH_BITS)]; + hlist_for_each_entry(uproc, node, head, hlist) { + if (uproc->tgid == tgid && !uproc->finished) { + uprobe_get_process(uproc); + down_write(&uproc->rwsem); + return uproc; + } + } + return NULL; +} + +/* + * In the given uproc's hash table of probepoints, find the one with the + * specified virtual address. Runs with uproc->rwsem locked. + */ +static struct uprobe_probept *uprobe_find_probept(struct uprobe_process *uproc, + unsigned long vaddr) +{ + struct uprobe_probept *ppt; + struct hlist_node *node; + struct hlist_head *head = &uproc->uprobe_table[hash_long(vaddr, + UPROBE_HASH_BITS)]; + + hlist_for_each_entry(ppt, node, head, ut_node) { + if (ppt->vaddr == vaddr && ppt->state != UPROBE_DISABLED) + return ppt; + } + return NULL; +} + +/* + * set_bp: Store a breakpoint instruction at ppt->vaddr. + * Returns BP_INSN_SIZE on success. + * + * NOTE: BREAKPOINT_INSTRUCTION on all archs is the same size as + * uprobe_opcode_t. + */ +static int set_bp(struct uprobe_probept *ppt, struct task_struct *tsk) +{ + uprobe_opcode_t bp_insn = BREAKPOINT_INSTRUCTION; + return access_process_vm(tsk, ppt->vaddr, &bp_insn, BP_INSN_SIZE, 1); +} + +/* + * set_orig_insn: For probepoint ppt, replace the breakpoint instruction + * with the original opcode. Returns BP_INSN_SIZE on success. + */ +static int set_orig_insn(struct uprobe_probept *ppt, struct task_struct *tsk) +{ + return access_process_vm(tsk, ppt->vaddr, &ppt->opcode, BP_INSN_SIZE, + 1); +} + +static void bkpt_insertion_failed(struct uprobe_probept *ppt, const char *why) +{ + printk(KERN_ERR "Can't place uprobe at pid %d vaddr %#lx: %s\n", + ppt->uproc->tgid, ppt->vaddr, why); +} + +/* + * Save a copy of the original instruction (so it can be single-stepped + * out of line), insert the breakpoint instruction, and awake + * register_uprobe(). + */ +static void insert_bkpt(struct uprobe_probept *ppt, struct task_struct *tsk) +{ + struct uprobe_kimg *uk; + long result = 0; + int len; + + if (!tsk) { + /* No surviving tasks associated with ppt->uproc */ + result = -ESRCH; + goto out; + } + + /* + * If access_process_vm() transfers fewer bytes than the maximum + * instruction size, assume that the probed instruction is smaller + * than the max and near the end of the last page of instructions. + * But there must be room at least for a breakpoint-size instruction. + */ + len = access_process_vm(tsk, ppt->vaddr, ppt->insn, MAX_UINSN_BYTES, 0); + if (len < BP_INSN_SIZE) { + bkpt_insertion_failed(ppt, + "error reading original instruction"); + result = -EIO; + goto out; + } + memcpy(&ppt->opcode, ppt->insn, BP_INSN_SIZE); + if (ppt->opcode == BREAKPOINT_INSTRUCTION) { + bkpt_insertion_failed(ppt, "bkpt already exists at that addr"); + result = -EEXIST; + goto out; + } + + if ((result = arch_validate_probed_insn(ppt)) < 0) { + bkpt_insertion_failed(ppt, "instruction type cannot be probed"); + goto out; + } + + len = set_bp(ppt, tsk); + if (len < BP_INSN_SIZE) { + bkpt_insertion_failed(ppt, "failed to insert bkpt instruction"); + result = -EIO; + goto out; + } +out: + ppt->state = (result ? UPROBE_DISABLED : UPROBE_BP_SET); + list_for_each_entry(uk, &ppt->uprobe_list, list) + uk->status = result; + wake_up_all(&ppt->waitq); +} + +static void remove_bkpt(struct uprobe_probept *ppt, struct task_struct *tsk) +{ + int len; + + if (tsk) { + len = set_orig_insn(ppt, tsk); + if (len < BP_INSN_SIZE) { + printk(KERN_ERR + "Error removing uprobe at pid %d vaddr %#lx:" + " can't restore original instruction\n", + tsk->tgid, ppt->vaddr); + /* + * This shouldn't happen, since we were previously + * able to write the breakpoint at that address. + * There's not much we can do besides let the + * process die with a SIGTRAP the next time the + * breakpoint is hit. + */ + } + } + /* Wake up unregister_uprobe(). */ + ppt->state = UPROBE_DISABLED; + wake_up_all(&ppt->waitq); +} + +/* + * Runs with all of uproc's threads quiesced and uproc->rwsem write-locked. + * As specified, insert or remove the breakpoint instruction for each + * uprobe_probept on uproc's pending list. + * tsk = one of the tasks associated with uproc -- NULL if there are + * no surviving threads. + * It's OK for uproc->pending_uprobes to be empty here. It can happen + * if a register and an unregister are requested (by different probers) + * simultaneously for the same pid/vaddr. + * Note that the current task may be a thread in uproc, or it may be + * a task running [un]register_uprobe() (or both). + */ +static void handle_pending_uprobes(struct uprobe_process *uproc, + struct task_struct *tsk) +{ + struct uprobe_probept *ppt, *tmp; + + list_for_each_entry_safe(ppt, tmp, &uproc->pending_uprobes, pd_node) { + switch (ppt->state) { + case UPROBE_INSERTING: + insert_bkpt(ppt, tsk); + break; + case UPROBE_REMOVING: + remove_bkpt(ppt, tsk); + break; + default: + BUG(); + } + list_del(&ppt->pd_node); + } +} + +static void utask_adjust_flags(struct uprobe_task *utask, int set, + unsigned long flags) +{ + unsigned long newflags, oldflags; + + newflags = oldflags = utask->engine->flags; + + if (set) + newflags |= flags; + else + newflags &= ~flags; + + if (newflags != oldflags) + utrace_set_flags(utask->tsk, utask->engine, newflags); +} + +static inline void clear_utrace_quiesce(struct uprobe_task *utask) +{ + utask_adjust_flags(utask, CLEAR_ENGINE_FLAGS, + UTRACE_ACTION_QUIESCE | UTRACE_EVENT(QUIESCE)); +} + +/* Opposite of quiesce_all_threads(). Same locking applies. */ +static void rouse_all_threads(struct uprobe_process *uproc) +{ + struct uprobe_task *utask; + + list_for_each_entry(utask, &uproc->thread_list, list) { + if (utask->quiescing) { + utask->quiescing = 0; + if (utask->state == UPTASK_QUIESCENT) { + utask->state = UPTASK_RUNNING; + uproc->n_quiescent_threads--; + clear_utrace_quiesce(utask); + } + } + } + /* Wake any threads that decided to sleep rather than quiesce. */ + wake_up_all(&uproc->waitq); +} + +/* + * If all of uproc's surviving threads have quiesced, do the necessary + * breakpoint insertions or removals and then un-quiesce everybody. + * tsk is a surviving thread, or NULL if there is none. Runs with + * uproc->rwsem write-locked. + */ +static void check_uproc_quiesced(struct uprobe_process *uproc, + struct task_struct *tsk) +{ + if (uproc->n_quiescent_threads >= uproc->nthreads) { + handle_pending_uprobes(uproc, tsk); + rouse_all_threads(uproc); + } +} + +/* + * Quiesce all threads in the specified process -- e.g., prior to + * breakpoint insertion. Runs with uproc->rwsem write-locked. + * Returns the number of threads that haven't died yet. + */ +static int quiesce_all_threads(struct uprobe_process *uproc, + struct uprobe_task **cur_utask_quiescing) +{ + struct uprobe_task *utask; + struct task_struct *survivor = NULL; // any survivor + int survivors = 0; + + *cur_utask_quiescing = NULL; + list_for_each_entry(utask, &uproc->thread_list, list) { + survivor = utask->tsk; + survivors++; + if (!utask->quiescing) { + /* + * If utask is currently handling a probepoint, it'll + * check utask->quiescing and quiesce when it's done. + */ + utask->quiescing = 1; + if (utask->tsk == current) + *cur_utask_quiescing = utask; + else if (utask->state == UPTASK_RUNNING) { + utask->quiesce_master = current; + utask_adjust_flags(utask, SET_ENGINE_FLAGS, + UTRACE_ACTION_QUIESCE + | UTRACE_EVENT(QUIESCE)); + utask->quiesce_master = NULL; + } + } + } + /* + * If any task was already quiesced (in utrace's opinion) when we + * called utask_adjust_flags() on it, uprobe_report_quiesce() was + * called, but wasn't in a position to call check_uproc_quiesced(). + */ + check_uproc_quiesced(uproc, survivor); + return survivors; +} + +/* Called with utask->uproc write-locked. */ +static void uprobe_free_task(struct uprobe_task *utask) +{ + struct deferred_registration *dr, *d; + struct uretprobe_instance *ri; + struct hlist_node *r1, *r2; + + uprobe_unhash_utask(utask); + list_del(&utask->list); + list_for_each_entry_safe(dr, d, &utask->deferred_registrations, list) { + list_del(&dr->list); + kfree(dr); + } + hlist_for_each_entry_safe(ri, r1, r2, &utask->uretprobe_instances, + hlist) { + hlist_del(&ri->hlist); + kfree(ri); + uprobe_decref_process(utask->uproc); + } + kfree(utask); +} + +/* Runs with uproc_mutex held and uproc->rwsem write-locked. */ +static void uprobe_free_process(struct uprobe_process *uproc) +{ + struct uprobe_task *utask, *tmp; + struct uprobe_ssol_area *area = &uproc->ssol_area; + + if (!uproc->finished) + uprobe_release_ssol_vma(uproc); + if (area->slots) + kfree(area->slots); + if (!hlist_unhashed(&uproc->hlist)) + hlist_del(&uproc->hlist); + list_for_each_entry_safe(utask, tmp, &uproc->thread_list, list) { + /* + * utrace_detach() is OK here (required, it seems) even if + * utask->tsk == current and we're in a utrace callback. + */ + if (utask->engine) + utrace_detach(utask->tsk, utask->engine); + uprobe_free_task(utask); + } + up_write(&uproc->rwsem); // So kfree doesn't complain + kfree(uproc); +} + +/* + * Decrement uproc's ref count. If it's zero, free uproc and return 1. + * Else return 0. If uproc is locked, don't call this; use + * uprobe_decref_process(). + */ +static int uprobe_put_process(struct uprobe_process *uproc) +{ + int ret = 0; + if (atomic_dec_and_test(&uproc->refcount)) { + lock_uproc_table(); + down_write(&uproc->rwsem); + if (unlikely(atomic_read(&uproc->refcount) != 0)) { + /* + * The works because uproc_mutex is held any + * time the ref count can go from 0 to 1 -- e.g., + * register_uprobe() sneaks in with a new probe. + */ + up_write(&uproc->rwsem); + } else { + uprobe_free_process(uproc); + ret = 1; + } + unlock_uproc_table(); + } + return ret; +} + +static struct uprobe_kimg *uprobe_mk_kimg(struct uprobe *u) +{ + struct uprobe_kimg *uk = (struct uprobe_kimg*)kzalloc(sizeof *uk, + GFP_USER); + if (unlikely(!uk)) + return ERR_PTR(-ENOMEM); + u->kdata = uk; + uk->uprobe = u; + uk->ppt = NULL; + INIT_LIST_HEAD(&uk->list); + uk->status = -EBUSY; + return uk; +} + +/* + * Allocate a uprobe_task object for t and add it to uproc's list. + * Called with t "got" and uproc->rwsem write-locked. Called in one of + * the following cases: + * - before setting the first uprobe in t's process + * - we're in uprobe_report_clone() and t is the newly added thread + * Returns: + * - pointer to new uprobe_task on success + * - NULL if t dies before we can utrace_attach it + * - negative errno otherwise + */ +static struct uprobe_task *uprobe_add_task(struct task_struct *t, + struct uprobe_process *uproc) +{ + struct uprobe_task *utask; + struct utrace_attached_engine *engine; + + utask = (struct uprobe_task *)kzalloc(sizeof *utask, GFP_USER); + if (unlikely(utask == NULL)) + return ERR_PTR(-ENOMEM); + + utask->tsk = t; + utask->state = UPTASK_RUNNING; + utask->quiescing = 0; + utask->uproc = uproc; + utask->active_probe = NULL; + utask->doomed = 0; + INIT_HLIST_HEAD(&utask->uretprobe_instances); + INIT_LIST_HEAD(&utask->deferred_registrations); + INIT_LIST_HEAD(&utask->list); + list_add_tail(&utask->list, &uproc->thread_list); + uprobe_hash_utask(utask); + + engine = utrace_attach(t, UTRACE_ATTACH_CREATE, p_uprobe_utrace_ops, + utask); + if (IS_ERR(engine)) { + long err = PTR_ERR(engine); + printk("uprobes: utrace_attach failed, returned %ld\n", err); + uprobe_free_task(utask); + if (err == -ESRCH) + return NULL; + return ERR_PTR(err); + } + utask->engine = engine; + /* + * Always watch for traps, clones, execs and exits. Caller must + * set any other engine flags. + */ + utask_adjust_flags(utask, SET_ENGINE_FLAGS, + UTRACE_EVENT(SIGNAL) | UTRACE_EVENT(SIGNAL_IGN) | + UTRACE_EVENT(SIGNAL_CORE) | UTRACE_EVENT(EXEC) | + UTRACE_EVENT(CLONE) | UTRACE_EVENT(EXIT)); + /* + * Note that it's OK if t dies just after utrace_attach, because + * with the engine in place, the appropriate report_* callback + * should handle it after we release uproc->rwsem. + */ + return utask; +} + +/* See comment in uprobe_mk_process(). */ +static struct task_struct *find_next_thread_to_add(struct uprobe_process *uproc, struct task_struct *start) +{ + struct task_struct *t; + struct uprobe_task *utask; + + read_lock(&tasklist_lock); + t = start; + do { + if (unlikely(t->flags & PF_EXITING)) + goto dont_add; + list_for_each_entry(utask, &uproc->thread_list, list) { + if (utask->tsk == t) + /* Already added */ + goto dont_add; + } + /* Found thread/task to add. */ + get_task_struct(t); + read_unlock(&tasklist_lock); + return t; +dont_add: + t = next_thread(t); + } while (t != start); + + read_unlock(&tasklist_lock); + return NULL; +} + +/* Runs with uproc_mutex held; returns with uproc->rwsem write-locked. */ +static struct uprobe_process *uprobe_mk_process(struct task_struct *p) +{ + struct uprobe_process *uproc; + struct uprobe_task *utask; + struct task_struct *add_me; + int i; + long err; + + uproc = (struct uprobe_process *)kzalloc(sizeof *uproc, GFP_USER); + if (unlikely(uproc == NULL)) + return ERR_PTR(-ENOMEM); + + /* Initialize fields */ + atomic_set(&uproc->refcount, 1); + init_rwsem(&uproc->rwsem); + down_write(&uproc->rwsem); + init_waitqueue_head(&uproc->waitq); + for (i = 0; i < UPROBE_TABLE_SIZE; i++) + INIT_HLIST_HEAD(&uproc->uprobe_table[i]); + uproc->nppt = 0; + INIT_LIST_HEAD(&uproc->pending_uprobes); + INIT_LIST_HEAD(&uproc->thread_list); + uproc->nthreads = 0; + uproc->n_quiescent_threads = 0; + INIT_HLIST_NODE(&uproc->hlist); + uproc->tgid = p->tgid; + uproc->finished = 0; + uproc->uretprobe_trampoline_addr = NULL; + + uproc->ssol_area.insn_area = NULL; + uproc->ssol_area.initialized = 0; + mutex_init(&uproc->ssol_area.setup_mutex); +#ifdef CONFIG_UPROBES_SSOL + uproc->sstep_out_of_line = 1; +#else + uproc->sstep_out_of_line = 0; +#endif + + /* + * Create and populate one utask per thread in this process. We + * can't call uprobe_add_task() while holding tasklist_lock, so we: + * 1. Lock task list. + * 2. Find the next task, add_me, in this process that's not + * already on uproc's thread_list. (Start search at previous + * one found.) + * 3. Unlock task list. + * 4. uprobe_add_task(add_me, uproc) + * Repeat 1-4 'til we have utasks for all tasks. + */ + add_me = p; + while ((add_me = find_next_thread_to_add(uproc, add_me)) != NULL) { + utask = uprobe_add_task(add_me, uproc); + put_task_struct(add_me); + if (IS_ERR(utask)) { + err = PTR_ERR(utask); + goto fail; + } + if (utask) + uproc->nthreads++; + } + + if (uproc->nthreads == 0) { + /* All threads -- even p -- are dead. */ + err = -ESRCH; + goto fail; + } + return uproc; + +fail: + uprobe_free_process(uproc); + return ERR_PTR(err); +} + +/* + * Creates a uprobe_probept and connects it to uk and uproc. Runs with + * uproc->rwsem write-locked. + */ +static struct uprobe_probept *uprobe_add_probept(struct uprobe_kimg *uk, + struct uprobe_process *uproc) +{ + struct uprobe_probept *ppt; + + ppt = (struct uprobe_probept *)kzalloc(sizeof *ppt, GFP_USER); + if (unlikely(ppt == NULL)) + return ERR_PTR(-ENOMEM); + init_waitqueue_head(&ppt->waitq); + mutex_init(&ppt->ssil_mutex); + mutex_init(&ppt->slot_mutex); + ppt->slot = NULL; + + /* Connect to uk. */ + INIT_LIST_HEAD(&ppt->uprobe_list); + list_add_tail(&uk->list, &ppt->uprobe_list); + uk->ppt = ppt; + uk->status = -EBUSY; + ppt->vaddr = uk->uprobe->vaddr; + + /* Connect to uproc. */ + ppt->state = UPROBE_INSERTING; + ppt->uproc = uproc; + INIT_LIST_HEAD(&ppt->pd_node); + list_add_tail(&ppt->pd_node, &uproc->pending_uprobes); + INIT_HLIST_NODE(&ppt->ut_node); + hlist_add_head(&ppt->ut_node, + &uproc->uprobe_table[hash_long(ppt->vaddr, UPROBE_HASH_BITS)]); + uproc->nppt++; + uprobe_get_process(uproc); + return ppt; +} + +/* ppt is going away. Free its slot (if it owns one) in the SSOL area. */ +static void uprobe_free_slot(struct uprobe_probept *ppt) +{ + struct uprobe_ssol_slot *slot = ppt->slot; + if (slot) { + down_write(&slot->rwsem); + if (slot->owner == ppt) { + unsigned long flags; + struct uprobe_ssol_area *area = &ppt->uproc->ssol_area; + spin_lock_irqsave(&area->lock, flags); + slot->state = SSOL_FREE; + slot->owner = NULL; + area->nfree++; + spin_unlock_irqrestore(&area->lock, flags); + } + up_write(&slot->rwsem); + } +} + +/* + * Runs with ppt->uproc write-locked. Frees ppt and decrements the ref count + * on ppt->uproc (but ref count shouldn't hit 0). + */ +static void uprobe_free_probept(struct uprobe_probept *ppt) +{ + struct uprobe_process *uproc = ppt->uproc; + uprobe_free_slot(ppt); + hlist_del(&ppt->ut_node); + uproc->nppt--; + kfree(ppt); + uprobe_decref_process(uproc); +} + +static void uprobe_free_kimg(struct uprobe_kimg *uk) +{ + uk->uprobe->kdata = NULL; + kfree(uk); +} + +/* + * Runs with uprobe_process write-locked. + * Note that we never free u, because the user owns that. + */ +static void purge_uprobe(struct uprobe_kimg *uk) +{ + struct uprobe_probept *ppt = uk->ppt; + list_del(&uk->list); + uprobe_free_kimg(uk); + if (list_empty(&ppt->uprobe_list)) + uprobe_free_probept(ppt); +} + +/* Probed address must be in an executable VM area, outside the SSOL area. */ +static int uprobe_validate_vaddr(struct task_struct *p, unsigned long vaddr, + struct uprobe_process *uproc) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = p->mm; + if (!mm) + return -EINVAL; + down_read(&mm->mmap_sem); + vma = find_vma(mm, vaddr); + if (!vma || vaddr < vma->vm_start || !(vma->vm_flags & VM_EXEC) + || vma->vm_start == (unsigned long) uproc->ssol_area.insn_area) { + up_read(&mm->mmap_sem); + return -EINVAL; + } + up_read(&mm->mmap_sem); + return 0; +} + +static struct task_struct *uprobe_get_task(pid_t pid) +{ + struct task_struct *p; + rcu_read_lock(); + p = find_task_by_pid(pid); + if (p) + get_task_struct(p); + rcu_read_unlock(); + return p; +} + +/* Runs with utask->uproc read-locked. Returns -EINPROGRESS on success. */ +static int defer_registration(struct uprobe *u, int regflag, + struct uprobe_task *utask) +{ + struct deferred_registration *dr = + kmalloc(sizeof(struct deferred_registration), GFP_USER); + if (!dr) + return -ENOMEM; + dr->type = (is_uretprobe(u) ? UPTY_URETPROBE : UPTY_UPROBE); + dr->uprobe = u; + dr->regflag = regflag; + INIT_LIST_HEAD(&dr->list); + list_add_tail(&dr->list, &utask->deferred_registrations); + return -EINPROGRESS; +} + +/* See Documentation/uprobes.txt. */ +static +int register_uprobe(struct uprobe *u) +{ + struct task_struct *p; + struct uprobe_process *uproc; + struct uprobe_kimg *uk; + struct uprobe_probept *ppt; + struct uprobe_task *cur_utask, *cur_utask_quiescing = NULL; + int survivors, ret = 0, uproc_is_new = 0; + if ((ret = verify_uprobes()) < 0) + return ret; + + if (!u || !u->handler) + return -EINVAL; + + p = uprobe_get_task(u->pid); + if (!p) + return -ESRCH; + + cur_utask = uprobe_find_utask(current); + if (cur_utask && cur_utask->active_probe) { + /* + * Called from handler; cur_utask->uproc is read-locked. + * Do this registration later. + */ + put_task_struct(p); + return defer_registration(u, 1, cur_utask); + } + + /* Get the uprobe_process for this pid, or make a new one. */ + lock_uproc_table(); + uproc = uprobe_find_process(p->tgid); + + if (uproc) + unlock_uproc_table(); + else { + uproc = uprobe_mk_process(p); + if (IS_ERR(uproc)) { + ret = (int) PTR_ERR(uproc); + unlock_uproc_table(); + goto fail_tsk; + } + /* Hold uproc_mutex until we've added uproc to uproc_table. */ + uproc_is_new = 1; + } + + if (is_uretprobe(u) && IS_ERR(uproc->uretprobe_trampoline_addr)) { + /* Previously failed to set up trampoline. */ + ret = -ENOMEM; + goto fail_uproc; + } + + if ((ret = uprobe_validate_vaddr(p, u->vaddr, uproc)) < 0) + goto fail_uproc; + + if (u->kdata) { + /* + * Probe is already/still registered. This is the only + * place we return -EBUSY to the user. + */ + ret = -EBUSY; + goto fail_uproc; + } + + uk = uprobe_mk_kimg(u); + if (IS_ERR(uk)) { + ret = (int) PTR_ERR(uk); + goto fail_uproc; + } + + /* See if we already have a probepoint at the vaddr. */ + ppt = (uproc_is_new ? NULL : uprobe_find_probept(uproc, u->vaddr)); + if (ppt) { + /* Breakpoint is already in place, or soon will be. */ + uk->ppt = ppt; + list_add_tail(&uk->list, &ppt->uprobe_list); + switch (ppt->state) { + case UPROBE_INSERTING: + uk->status = -EBUSY; // in progress + if (uproc->tgid == current->tgid) { + cur_utask_quiescing = cur_utask; + BUG_ON(!cur_utask_quiescing); + } + break; + case UPROBE_REMOVING: + /* Wait! Don't remove that bkpt after all! */ + ppt->state = UPROBE_BP_SET; + list_del(&ppt->pd_node); // Remove from pending list. + wake_up_all(&ppt->waitq); // Wake unregister_uprobe(). + /*FALLTHROUGH*/ + case UPROBE_BP_SET: + uk->status = 0; + break; + default: + BUG(); + } + up_write(&uproc->rwsem); + put_task_struct(p); + if (uk->status == 0) { + uprobe_put_process(uproc); + return 0; + } + goto await_bkpt_insertion; + } else { + ppt = uprobe_add_probept(uk, uproc); + if (IS_ERR(ppt)) { + ret = (int) PTR_ERR(ppt); + goto fail_uk; + } + } + + if (uproc_is_new) { + hlist_add_head(&uproc->hlist, + &uproc_table[hash_long(uproc->tgid, UPROBE_HASH_BITS)]); + unlock_uproc_table(); + } + put_task_struct(p); + survivors = quiesce_all_threads(uproc, &cur_utask_quiescing); + + if (survivors == 0) { + purge_uprobe(uk); + up_write(&uproc->rwsem); + uprobe_put_process(uproc); + return -ESRCH; + } + up_write(&uproc->rwsem); + +await_bkpt_insertion: + if (cur_utask_quiescing) + /* Current task is probing its own process. */ + (void) utask_fake_quiesce(cur_utask_quiescing); + else + wait_event(ppt->waitq, ppt->state != UPROBE_INSERTING); + ret = uk->status; + if (ret != 0) { + down_write(&uproc->rwsem); + purge_uprobe(uk); + up_write(&uproc->rwsem); + } + uprobe_put_process(uproc); + return ret; + +fail_uk: + uprobe_free_kimg(uk); + +fail_uproc: + if (uproc_is_new) { + uprobe_free_process(uproc); + unlock_uproc_table(); + } else { + up_write(&uproc->rwsem); + uprobe_put_process(uproc); + } + +fail_tsk: + put_task_struct(p); + return ret; +} + +/* See Documentation/uprobes.txt. */ +static +void unregister_uprobe(struct uprobe *u) +{ + struct task_struct *p; + struct uprobe_process *uproc; + struct uprobe_kimg *uk; + struct uprobe_probept *ppt; + struct uprobe_task *cur_utask, *cur_utask_quiescing = NULL; + + if (verify_uprobes() < 0) + return; + if (!u) + return; + p = uprobe_get_task(u->pid); + if (!p) + return; + + cur_utask = uprobe_find_utask(current); + if (cur_utask && cur_utask->active_probe) { + /* Called from handler; uproc is read-locked; do this later */ + put_task_struct(p); + (void) defer_registration(u, 0, cur_utask); + return; + } + + /* + * Lock uproc before walking the graph, in case the process we're + * probing is exiting. + */ + lock_uproc_table(); + uproc = uprobe_find_process(p->tgid); + unlock_uproc_table(); + put_task_struct(p); + if (!uproc) + return; + + uk = (struct uprobe_kimg *)u->kdata; + if (!uk) + /* + * This probe was never successfully registered, or + * has already been unregistered. + */ + goto done; + if (uk->status == -EBUSY) + /* Looks like register or unregister is already in progress. */ + goto done; + ppt = uk->ppt; + + list_del(&uk->list); + uprobe_free_kimg(uk); + + if (is_uretprobe(u)) + zap_uretprobe_instances(u, uproc); + + if (!list_empty(&ppt->uprobe_list)) + goto done; + + /* + * The last uprobe at ppt's probepoint is being unregistered. + * Queue the breakpoint for removal. + */ + ppt->state = UPROBE_REMOVING; + list_add_tail(&ppt->pd_node, &uproc->pending_uprobes); + + (void) quiesce_all_threads(uproc, &cur_utask_quiescing); + up_write(&uproc->rwsem); + if (cur_utask_quiescing) + /* Current task is probing its own process. */ + (void) utask_fake_quiesce(cur_utask_quiescing); + else + wait_event(ppt->waitq, ppt->state != UPROBE_REMOVING); + + if (likely(ppt->state == UPROBE_DISABLED)) { + down_write(&uproc->rwsem); + uprobe_free_probept(ppt); + /* else somebody else's register_uprobe() resurrected ppt. */ + up_write(&uproc->rwsem); + } + uprobe_put_process(uproc); + return; + +done: + up_write(&uproc->rwsem); + uprobe_put_process(uproc); +} + +/* Find a surviving thread in uproc. Runs with uproc->rwsem locked. */ +static struct task_struct *find_surviving_thread(struct uprobe_process *uproc) +{ + struct uprobe_task *utask; + + list_for_each_entry(utask, &uproc->thread_list, list) + return utask->tsk; + return NULL; +} + +/* + * Run all the deferred_registrations previously queued by the current utask. + * Runs with no locks or mutexes held. The current utask could disappear + * as the result of unregister_u*probe() called here. + */ +static void uprobe_run_def_regs(struct list_head *drlist) +{ + struct deferred_registration *dr, *d; + + list_for_each_entry_safe(dr, d, drlist, list) { + int result = 0; + struct uprobe *u = dr->uprobe; + + if (dr->type == UPTY_URETPROBE) { + struct uretprobe *rp = + container_of(u, struct uretprobe, u); + if (dr->regflag) + result = register_uretprobe(rp); + else + unregister_uretprobe(rp); + } else { + if (dr->regflag) + result = register_uprobe(u); + else + unregister_uprobe(u); + } + if (u && u->registration_callback) + u->registration_callback(u, dr->regflag, dr->type, + result); + list_del(&dr->list); + kfree(dr); + } +} + +/* + * Functions for allocation of the SSOL area, and the instruction slots + * therein + */ + +/* + * We leave the SSOL vma in place even after all the probes are gone. + * We used to remember its address in current->mm->context.uprobes_ssol_area, + * but adding that field to mm_context broke KAPI compatibility. + * Instead, when we shut down the uproc for lack of probes, we "tag" the vma + * for later identification. This is not particularly robust, but it's + * no more vulnerable to ptrace or mprotect mischief than any other part + * of the address space. + */ +#define UPROBES_SSOL_VMA_TAG \ + "This is the SSOL area for uprobes. Mess with it at your own risk." +#define UPROBES_SSOL_TAGSZ ((int)sizeof(UPROBES_SSOL_VMA_TAG)) + +/* + * Searching downward from ceiling address (0 signifies top of memory), + * find the next vma whose flags indicate it could be an SSOL area. + * Return its address, or 0 for no match. + */ +static unsigned long find_next_possible_ssol_vma(unsigned long ceiling) +{ + struct mm_struct *mm = current->mm; + struct rb_node *rb_node; + struct vm_area_struct *vma; + unsigned long good_flags = VM_EXEC | VM_DONTCOPY | VM_DONTEXPAND; + unsigned long bad_flags = VM_WRITE | VM_GROWSDOWN | VM_GROWSUP; + unsigned long addr = 0; + + down_read(&mm->mmap_sem); + for (rb_node=rb_last(&mm->mm_rb); rb_node; rb_node=rb_prev(rb_node)) { + vma = rb_entry(rb_node, struct vm_area_struct, vm_rb); + if (ceiling && vma->vm_start >= ceiling) + continue; + if ((vma->vm_flags & good_flags) != good_flags) + continue; + if ((vma->vm_flags & bad_flags) != 0) + continue; + addr = vma->vm_start; + break; + } + up_read(&mm->mmap_sem); + return addr; +} + +static noinline unsigned long find_old_ssol_vma(void) +{ + unsigned long addr; + unsigned long ceiling = 0; // top of memory + char buf[UPROBES_SSOL_TAGSZ]; + while ((addr = find_next_possible_ssol_vma(ceiling)) != 0) { + ceiling = addr; + if (copy_from_user(buf, (const void __user*)addr, + UPROBES_SSOL_TAGSZ)) + continue; + if (!strcmp(buf, UPROBES_SSOL_VMA_TAG)) + return addr; + } + return 0; +} + +/* + * Mmap nbytes bytes for the uprobes SSOL area for the current process. + * Returns the address of the page, or a negative errno. + * This approach was suggested by Roland McGrath. + */ +static noinline unsigned long uprobe_setup_ssol_vma(unsigned long nbytes) +{ + unsigned long addr; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + BUG_ON(nbytes & ~PAGE_MASK); + if ((addr = find_old_ssol_vma()) != 0) + return addr; + + down_write(&mm->mmap_sem); + /* + * Find the end of the top mapping and skip a page. + * If there is no space for PAGE_SIZE above + * that, mmap will ignore our address hint. + */ + vma = rb_entry(rb_last(&mm->mm_rb), struct vm_area_struct, vm_rb); + addr = vma->vm_end + PAGE_SIZE; + addr = do_mmap_pgoff(NULL, addr, nbytes, PROT_EXEC, + MAP_PRIVATE|MAP_ANONYMOUS, 0); + if (addr & ~PAGE_MASK) { + up_write(&mm->mmap_sem); + printk(KERN_ERR "Uprobes failed to allocate a vma for" + " pid/tgid %d/%d for single-stepping out of line.\n", + current->pid, current->tgid); + return addr; + } + + vma = find_vma(mm, addr); + BUG_ON(!vma); + /* avoid vma copy on fork() and don't expand when mremap() */ + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + + up_write(&mm->mmap_sem); + return addr; +} + +/* + * Initialize per-process area for single stepping out-of-line. + * Must be run by a thread in the probed process. Returns with + * area->insn_area pointing to the initialized area, or set to a + * negative errno. + */ +static noinline void uprobe_init_ssol(struct uprobe_process *uproc) +{ + struct uprobe_ssol_area *area = &uproc->ssol_area; + struct uprobe_ssol_slot *slot; + int i; + char *slot_addr; // Simplify pointer arithmetic + + /* Trampoline setup will either fail or succeed here. */ + uproc->uretprobe_trampoline_addr = ERR_PTR(-ENOMEM); + + area->insn_area = (uprobe_opcode_t *) uprobe_setup_ssol_vma(PAGE_SIZE); + if (IS_ERR(area->insn_area)) + return; + + area->nfree = area->nslots = PAGE_SIZE / MAX_UINSN_BYTES; + area->slots = (struct uprobe_ssol_slot *) + kzalloc(sizeof(struct uprobe_ssol_slot) * area->nslots, + GFP_USER); + if (!area->slots) { + area->insn_area = ERR_PTR(-ENOMEM); + return; + } + spin_lock_init(&area->lock); + area->next_slot = 0; + slot_addr = (char*) area->insn_area; + for (i = 0; i < area->nslots; i++) { + slot = &area->slots[i]; + init_rwsem(&slot->rwsem); + slot->state = SSOL_FREE; + slot->owner = NULL; + slot->last_used = 0; + slot->insn = (__user uprobe_opcode_t *) slot_addr; + slot_addr += MAX_UINSN_BYTES; + } + uretprobe_set_trampoline(uproc); +} + +/* + * Verify that the SSOL area has been set up for uproc. Returns a + * pointer to the SSOL area, or a negative erro if we couldn't set it up. + */ +static __user uprobe_opcode_t + *uprobe_verify_ssol(struct uprobe_process *uproc) +{ + struct uprobe_ssol_area *area = &uproc->ssol_area; + + if (unlikely(!area->initialized)) { + /* First time through for this probed process */ + mutex_lock(&uproc->ssol_area.setup_mutex); + if (likely(!area->initialized)) { + /* Nobody snuck in and set things up ahead of us. */ + uprobe_init_ssol(uproc); + area->initialized = 1; + } + mutex_unlock(&uproc->ssol_area.setup_mutex); + } + return area->insn_area; +} + +/* + * uproc is going away, but the process lives on. Tag the SSOL vma so a new + * uproc can reuse it if more probes are requested. + */ +static void uprobe_release_ssol_vma(struct uprobe_process *uproc) +{ + unsigned long addr; + struct task_struct *tsk; + static const char *buf = UPROBES_SSOL_VMA_TAG; + int nb; + + /* No need to muck with dying image's mm_struct. */ + BUG_ON(uproc->finished); + addr = (unsigned long) uproc->ssol_area.insn_area; + if (!addr || IS_ERR_VALUE(addr)) + return; + tsk = find_surviving_thread(uproc); + if (!tsk) + return; + nb = access_process_vm(tsk, addr, (void*)buf, UPROBES_SSOL_TAGSZ, 1); + if (nb != UPROBES_SSOL_TAGSZ) + printk(KERN_ERR "Failed to tag uprobes SSOL vma: " + "pid/tgid=%d/%d, vaddr=%#lx\n", tsk->pid, tsk->tgid, + addr); +} + +static inline int advance_slot(int slot, struct uprobe_ssol_area *area) +{ + /* Slot 0 is reserved for uretprobe trampoline. */ + slot++; + if (unlikely(slot >= area->nslots)) + slot = 1; + return slot; +} + +/* + * Return the slot number of the least-recently-used slot in the + * neighborhood of area->next_slot. Limit the number of slots we test + * to keep it fast. Nobody dies if this isn't the best choice. + */ +static int uprobe_lru_insn_slot(struct uprobe_ssol_area *area) +{ +#define MAX_LRU_TESTS 10 + struct uprobe_ssol_slot *s; + int lru_slot = -1; + unsigned long lru_time = ULONG_MAX; + int nr_lru_tests = 0; + int slot = area->next_slot; + do { + s = &area->slots[slot]; + if (likely(s->state == SSOL_ASSIGNED)) { + if( lru_time > s->last_used) { + lru_time = s->last_used; + lru_slot = slot; + } + if (++nr_lru_tests >= MAX_LRU_TESTS) + break; + } + slot = advance_slot(slot, area); + } while (slot != area->next_slot); + + if (unlikely(lru_slot < 0)) + /* All slots are in the act of being stolen. Join the melee. */ + return area->next_slot; + else + return lru_slot; +} + +/* + * Choose an instruction slot and take it. Choose a free slot if there is one. + * Otherwise choose the least-recently-used slot. Returns with slot + * read-locked and containing the desired instruction. Runs with + * ppt->slot_mutex locked. + */ +static struct uprobe_ssol_slot + *uprobe_take_insn_slot(struct uprobe_probept *ppt) +{ + struct uprobe_process *uproc = ppt->uproc; + struct uprobe_ssol_area *area = &uproc->ssol_area; + struct uprobe_ssol_slot *s; + int len, slot; + unsigned long flags; + + spin_lock_irqsave(&area->lock, flags); + + if (area->nfree) { + for (slot = 0; slot < area->nslots; slot++) { + if (area->slots[slot].state == SSOL_FREE) { + area->nfree--; + goto found_slot; + } + } + /* Shouldn't get here. Fix nfree and get on with life. */ + area->nfree = 0; + } + slot = uprobe_lru_insn_slot(area); + +found_slot: + area->next_slot = advance_slot(slot, area); + s = &area->slots[slot]; + s->state = SSOL_BEING_STOLEN; + + spin_unlock_irqrestore(&area->lock, flags); + + /* Wait for current users of slot to finish. */ + down_write(&s->rwsem); + ppt->slot = s; + s->owner = ppt; + s->last_used = jiffies; + s->state = SSOL_ASSIGNED; + /* Copy the original instruction to the chosen slot. */ + len = access_process_vm(current, (unsigned long)s->insn, + ppt->insn, MAX_UINSN_BYTES, 1); + if (unlikely(len < MAX_UINSN_BYTES)) { + up_write(&s->rwsem); + printk(KERN_ERR "Failed to copy instruction at %#lx" + " to SSOL area (%#lx)\n", ppt->vaddr, + (unsigned long) area->slots); + return NULL; + } + /* Let other threads single-step in this slot. */ + downgrade_write(&s->rwsem); + return s; +} + +/* ppt doesn't own a slot. Get one for ppt, and return it read-locked. */ +static struct uprobe_ssol_slot + *uprobe_find_insn_slot(struct uprobe_probept *ppt) +{ + struct uprobe_ssol_slot *slot; + + mutex_lock(&ppt->slot_mutex); + slot = ppt->slot; + if (unlikely(slot && slot->owner == ppt)) { + /* Looks like another thread snuck in and got a slot for us. */ + down_read(&slot->rwsem); + if (likely(slot->owner == ppt)) { + slot->last_used = jiffies; + mutex_unlock(&ppt->slot_mutex); + return slot; + } + /* ... but then somebody stole it. */ + up_read(&slot->rwsem); + } + slot = uprobe_take_insn_slot(ppt); + mutex_unlock(&ppt->slot_mutex); + return slot; +} + +/* + * Ensure that ppt owns an instruction slot for single-stepping. + * Returns with the slot read-locked and ppt->slot pointing at it. + */ +static +struct uprobe_ssol_slot *uprobe_get_insn_slot(struct uprobe_probept *ppt) +{ + struct uprobe_ssol_slot *slot = ppt->slot; + + if (unlikely(!slot)) + return uprobe_find_insn_slot(ppt); + + down_read(&slot->rwsem); + if (unlikely(slot->owner != ppt)) { + up_read(&slot->rwsem); + return uprobe_find_insn_slot(ppt); + } + slot->last_used = jiffies; + return slot; +} + +/* + * utrace engine report callbacks + */ + +/* + * We've been asked to quiesce, but aren't in a position to do so. + * This could happen in either of the following cases: + * + * 1) Our own thread is doing a register or unregister operation -- + * e.g., as called from a u[ret]probe handler or a non-uprobes utrace + * callback. We can't wait_event() for ourselves in [un]register_uprobe(). + * + * 2) We've been asked to quiesce, but we hit a probepoint first. Now + * we're in the report_signal callback, having handled the probepoint. + * We'd like to just set the UTRACE_ACTION_QUIESCE and + * UTRACE_EVENT(QUIESCE) flags and coast into quiescence. Unfortunately, + * it's possible to hit a probepoint again before we quiesce. When + * processing the SIGTRAP, utrace would call uprobe_report_quiesce(), + * which must decline to take any action so as to avoid removing the + * uprobe just hit. As a result, we could keep hitting breakpoints + * and never quiescing. + * + * So here we do essentially what we'd prefer to do in uprobe_report_quiesce(). + * If we're the last thread to quiesce, handle_pending_uprobes() and + * rouse_all_threads(). Otherwise, pretend we're quiescent and sleep until + * the last quiescent thread handles that stuff and then wakes us. + * + * Called and returns with no mutexes held. Returns 1 if we free utask->uproc, + * else 0. + */ +static int utask_fake_quiesce(struct uprobe_task *utask) +{ + struct uprobe_process *uproc = utask->uproc; + enum uprobe_task_state prev_state = utask->state; + + down_write(&uproc->rwsem); + + /* In case we're somehow set to quiesce for real... */ + clear_utrace_quiesce(utask); + + if (uproc->n_quiescent_threads == uproc->nthreads-1) { + /* We're the last thread to "quiesce." */ + handle_pending_uprobes(uproc, utask->tsk); + rouse_all_threads(uproc); + up_write(&uproc->rwsem); + return 0; + } else { + utask->state = UPTASK_SLEEPING; + uproc->n_quiescent_threads++; + up_write(&uproc->rwsem); + /* We ref-count sleepers. */ + uprobe_get_process(uproc); + + wait_event(uproc->waitq, !utask->quiescing); + + down_write(&uproc->rwsem); + utask->state = prev_state; + uproc->n_quiescent_threads--; + up_write(&uproc->rwsem); + + /* + * If uproc's last uprobe has been unregistered, and + * unregister_uprobe() woke up before we did, it's up + * to us to free uproc. + */ + return uprobe_put_process(uproc); + } +} + +/* Prepare to single-step ppt's probed instruction inline. */ +static inline void uprobe_pre_ssin(struct uprobe_task *utask, + struct uprobe_probept *ppt, struct pt_regs *regs) +{ + int len; + arch_reset_ip_for_sstep(regs); + mutex_lock(&ppt->ssil_mutex); + len = set_orig_insn(ppt, utask->tsk); + if (unlikely(len != BP_INSN_SIZE)) { + printk("Failed to temporarily restore original " + "instruction for single-stepping: " + "pid/tgid=%d/%d, vaddr=%#lx\n", + utask->tsk->pid, utask->tsk->tgid, ppt->vaddr); + utask->doomed = 1; + } +} + +/* Prepare to continue execution after single-stepping inline. */ +static inline void uprobe_post_ssin(struct uprobe_task *utask, + struct uprobe_probept *ppt) +{ + + int len = set_bp(ppt, utask->tsk); + if (unlikely(len != BP_INSN_SIZE)) { + printk("Couldn't restore bp: pid/tgid=%d/%d, addr=%#lx\n", + utask->tsk->pid, utask->tsk->tgid, ppt->vaddr); + ppt->state = UPROBE_DISABLED; + } + mutex_unlock(&ppt->ssil_mutex); +} + +/* uprobe_pre_ssout() and uprobe_post_ssout() are architecture-specific. */ + +/* + * Signal callback: + * + * We get called here with: + * state = UPTASK_RUNNING => we are here due to a breakpoint hit + * - Read-lock the process + * - Figure out which probepoint, based on regs->IP + * - Set state = UPTASK_BP_HIT + * - Reset regs->IP to beginning of the insn, if necessary + * - Invoke handler for each uprobe at this probepoint + * - Set singlestep in motion (UTRACE_ACTION_SINGLESTEP), + * with state = UPTASK_SSTEP + * + * state = UPTASK_SSTEP => here after single-stepping + * - Validate we are here per the state machine + * - Clean up after single-stepping + * - Set state = UPTASK_RUNNING + * - Read-unlock the process + * - If it's time to quiesce, take appropriate action. + * - If the handler(s) we ran called [un]register_uprobe(), + * complete those via uprobe_run_def_regs(). + * + * state = ANY OTHER STATE + * - Not our signal, pass it on (UTRACE_ACTION_RESUME) + * Note: Intermediate states such as UPTASK_POST_SSTEP help + * uprobe_report_exit() decide what to unlock if we die. + */ +static u32 uprobe_report_signal(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct pt_regs *regs, u32 action, + siginfo_t *info, const struct k_sigaction *orig_ka, + struct k_sigaction *return_ka) +{ + struct uprobe_task *utask; + struct uprobe_probept *ppt; + struct uprobe_process *uproc; + struct uprobe_kimg *uk; + u32 ret; + unsigned long probept; + int hit_uretprobe_trampoline = 0; + LIST_HEAD(def_reg_list); + + utask = rcu_dereference((struct uprobe_task *)engine->data); + BUG_ON(!utask); + + if (info->si_signo != BREAKPOINT_SIGNAL && + info->si_signo != SSTEP_SIGNAL) + goto no_interest; + + /* + * Set up the SSOL area if it's not already there. We do this + * here because we have to do it before handling the first + * probepoint hit, the probed process has to do it, and this may + * be the first time our probed process runs uprobes code. + * + * We need the SSOL area for the uretprobe trampoline even if + * this architectures doesn't single-step out of line. + */ + uproc = utask->uproc; +#ifdef CONFIG_UPROBES_SSOL + if (uproc->sstep_out_of_line && + unlikely(IS_ERR(uprobe_verify_ssol(uproc)))) + uproc->sstep_out_of_line = 0; +#elif defined(CONFIG_URETPROBES) + (void) uprobe_verify_ssol(uproc); +#endif + + switch (utask->state) { + case UPTASK_RUNNING: + if (info->si_signo != BREAKPOINT_SIGNAL) + goto no_interest; + down_read(&uproc->rwsem); + clear_utrace_quiesce(utask); + probept = arch_get_probept(regs); + + hit_uretprobe_trampoline = (probept == (unsigned long) + uproc->uretprobe_trampoline_addr); + if (hit_uretprobe_trampoline) { + uretprobe_handle_return(regs, utask); + goto bkpt_done; + } + + ppt = uprobe_find_probept(uproc, probept); + if (!ppt) { + up_read(&uproc->rwsem); + goto no_interest; + } + utask->active_probe = ppt; + utask->state = UPTASK_BP_HIT; + + if (likely(ppt->state == UPROBE_BP_SET)) { + list_for_each_entry(uk, &ppt->uprobe_list, list) { + struct uprobe *u = uk->uprobe; + if (is_uretprobe(u)) + uretprobe_handle_entry(u, regs, utask); + else if (u->handler) + u->handler(u, regs); + } + } + + utask->state = UPTASK_PRE_SSTEP; +#ifdef CONFIG_UPROBES_SSOL + if (uproc->sstep_out_of_line) + uprobe_pre_ssout(utask, ppt, regs); + else +#endif + uprobe_pre_ssin(utask, ppt, regs); + if (unlikely(utask->doomed)) + do_exit(SIGSEGV); + utask->state = UPTASK_SSTEP; + /* + * No other engines must see this signal, and the + * signal shouldn't be passed on either. + */ + ret = UTRACE_ACTION_HIDE | UTRACE_SIGNAL_IGN | + UTRACE_ACTION_SINGLESTEP | UTRACE_ACTION_NEWSTATE; + break; + case UPTASK_SSTEP: + if (info->si_signo != SSTEP_SIGNAL) + goto no_interest; + ppt = utask->active_probe; + BUG_ON(!ppt); + utask->state = UPTASK_POST_SSTEP; +#ifdef CONFIG_UPROBES_SSOL + if (uproc->sstep_out_of_line) + uprobe_post_ssout(utask, ppt, regs); + else +#endif + uprobe_post_ssin(utask, ppt); +bkpt_done: + /* Note: Can come here after running uretprobe handlers */ + if (unlikely(utask->doomed)) + do_exit(SIGSEGV); + + utask->active_probe = NULL; + + /* + * The deferred_registrations list accumulates in utask, + * but utask could go away when we uprobe_run_def_regs. + * So switch the list head to a local variable. + */ + list_splice_init(&utask->deferred_registrations, &def_reg_list); + + ret = UTRACE_ACTION_HIDE | UTRACE_SIGNAL_IGN + | UTRACE_ACTION_NEWSTATE; + utask->state = UPTASK_RUNNING; + if (utask->quiescing) { + up_read(&uproc->rwsem); + if (utask_fake_quiesce(utask) == 1) + ret |= UTRACE_ACTION_DETACH; + } else + up_read(&uproc->rwsem); + + if (hit_uretprobe_trampoline) + /* + * It's possible that the uretprobe_instance + * we just recycled was the last reason for + * keeping uproc around. + */ + uprobe_put_process(uproc); + + uprobe_run_def_regs(&def_reg_list); + break; + default: + goto no_interest; + } + return ret; + +no_interest: + return UTRACE_ACTION_RESUME; +} + +/* + * utask_quiesce_pending_sigtrap: The utask entered the quiesce callback + * through the signal delivery path, apparently. Check if the associated + * signal happened due to a uprobe hit. + * + * Called with utask->uproc write-locked. Returns 1 if quiesce was + * entered with SIGTRAP pending due to a uprobe hit. + */ +static int utask_quiesce_pending_sigtrap(struct uprobe_task *utask) +{ + const struct utrace_regset_view *view; + const struct utrace_regset *regset; + struct uprobe_probept *ppt; + unsigned long insn_ptr; + + view = utrace_native_view(utask->tsk); + regset = utrace_regset(utask->tsk, utask->engine, view, 0); + if (unlikely(regset == NULL)) + return -EIO; + + if ((*regset->get)(utask->tsk, regset, SLOT_IP * regset->size, + regset->size, &insn_ptr, NULL) != 0) + return -EIO; + + if (regset->size != sizeof(insn_ptr)) { + /* Assume 32-bit app and 64-bit kernel. */ + u32 *insn_ptr32 = (u32*) &insn_ptr; + BUG_ON(regset->size != sizeof(u32)); + insn_ptr = *insn_ptr32; + } + + ppt = uprobe_find_probept(utask->uproc, ARCH_BP_INST_PTR(insn_ptr)); + return (ppt != NULL); +} + +/* + * Quiesce callback: The associated process has one or more breakpoint + * insertions or removals pending. If we're the last thread in this + * process to quiesce, do the insertion(s) and/or removal(s). + */ +static u32 uprobe_report_quiesce(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + struct uprobe_task *utask; + struct uprobe_process *uproc; + + utask = rcu_dereference((struct uprobe_task *)engine->data); + BUG_ON(!utask); + uproc = utask->uproc; + if (current == utask->quiesce_master) { + /* + * tsk was already quiescent when quiesce_all_threads() + * called utrace_set_flags(), which in turned called + * here. uproc is already locked. Do as little as possible + * and get out. + */ + utask->state = UPTASK_QUIESCENT; + uproc->n_quiescent_threads++; + return UTRACE_ACTION_RESUME; + } + + BUG_ON(utask->active_probe); + down_write(&uproc->rwsem); + + /* + * When a thread hits a breakpoint or single-steps, utrace calls + * this quiesce callback before our signal callback. We must + * let uprobe_report_signal() handle the uprobe hit and THEN + * quiesce, because (a) there's a chance that we're quiescing + * in order to remove that very uprobe, and (b) there's a tiny + * chance that even though that uprobe isn't marked for removal + * now, it may be before all threads manage to quiesce. + */ + if (!utask->quiescing || utask_quiesce_pending_sigtrap(utask) == 1) { + clear_utrace_quiesce(utask); + goto done; + } + + utask->state = UPTASK_QUIESCENT; + uproc->n_quiescent_threads++; + check_uproc_quiesced(uproc, tsk); +done: + up_write(&uproc->rwsem); + return UTRACE_ACTION_RESUME; +} + +/* + * uproc's process is exiting or exec-ing, so zap all the (now irrelevant) + * probepoints. Runs with uproc->rwsem write-locked. Caller must ref-count + * uproc before calling this function, to ensure that uproc doesn't get + * freed in the middle of this. + */ +static void uprobe_cleanup_process(struct uprobe_process *uproc) +{ + int i; + struct uprobe_probept *ppt; + struct hlist_node *pnode1, *pnode2; + struct hlist_head *head; + struct uprobe_kimg *uk, *unode; + + uproc->finished = 1; + + for (i = 0; i < UPROBE_TABLE_SIZE; i++) { + head = &uproc->uprobe_table[i]; + hlist_for_each_entry_safe(ppt, pnode1, pnode2, head, ut_node) { + if (ppt->state == UPROBE_INSERTING || + ppt->state == UPROBE_REMOVING) { + /* + * This task is (exec/exit)ing with + * a [un]register_uprobe pending. + * [un]register_uprobe will free ppt. + */ + ppt->state = UPROBE_DISABLED; + list_del(&ppt->pd_node); + list_for_each_entry_safe(uk, unode, + &ppt->uprobe_list, list) + uk->status = -ESRCH; + wake_up_all(&ppt->waitq); + } else if (ppt->state == UPROBE_BP_SET) { + list_for_each_entry_safe(uk, unode, + &ppt->uprobe_list, list) { + list_del(&uk->list); + uprobe_free_kimg(uk); + } + uprobe_free_probept(ppt); + /* else */ + /* + * If ppt is UPROBE_DISABLED, assume that + * [un]register_uprobe() has been notified + * and will free it soon. + */ + } + } + } +} + +/* + * Exit callback: The associated task/thread is exiting. + */ +static u32 uprobe_report_exit(struct utrace_attached_engine *engine, + struct task_struct *tsk, long orig_code, long *code) +{ + struct uprobe_task *utask; + struct uprobe_process *uproc; + struct uprobe_probept *ppt; + int utask_quiescing; + + utask = rcu_dereference((struct uprobe_task *)engine->data); + uproc = utask->uproc; + uprobe_get_process(uproc); + + ppt = utask->active_probe; + if (ppt) { + if (utask->state == UPTASK_TRAMPOLINE_HIT) + printk(KERN_WARNING "Task died during uretprobe return:" + " pid/tgid = %d/%d\n", tsk->pid, tsk->tgid); + else + printk(KERN_WARNING "Task died at uprobe probepoint:" + " pid/tgid = %d/%d, probepoint = %#lx\n", + tsk->pid, tsk->tgid, ppt->vaddr); + /* Mutex cleanup depends on where we died and SSOL vs. SSIL. */ + if (uproc->sstep_out_of_line) { + if (utask->state == UPTASK_SSTEP + && ppt->slot && ppt->slot->owner == ppt) + up_read(&ppt->slot->rwsem); + } else { + switch (utask->state) { + case UPTASK_PRE_SSTEP: + case UPTASK_SSTEP: + case UPTASK_POST_SSTEP: + mutex_unlock(&ppt->ssil_mutex); + break; + default: + break; + } + } + up_read(&uproc->rwsem); + if (utask->state == UPTASK_TRAMPOLINE_HIT) + uprobe_decref_process(uproc); + } + + down_write(&uproc->rwsem); + utask_quiescing = utask->quiescing; + uprobe_free_task(utask); + + uproc->nthreads--; + if (uproc->nthreads) { + if (utask_quiescing) + /* + * In case other threads are waiting for + * us to quiesce... + */ + check_uproc_quiesced(uproc, + find_surviving_thread(uproc)); + } else { + /* + * We were the last remaining thread - clean up the uprobe + * remnants a la unregister_uprobe(). We don't have to + * remove the breakpoints, though. + */ + uprobe_cleanup_process(uproc); + } + up_write(&uproc->rwsem); + uprobe_put_process(uproc); + + return UTRACE_ACTION_DETACH; +} + +/* + * Clone callback: The current task has spawned a thread/process. + * + * NOTE: For now, we don't pass on uprobes from the parent to the + * child. We now do the necessary clearing of breakpoints in the + * child's address space. + * + * TODO: + * - Provide option for child to inherit uprobes. + */ +static u32 uprobe_report_clone(struct utrace_attached_engine *engine, + struct task_struct *parent, unsigned long clone_flags, + struct task_struct *child) +{ + int len; + struct uprobe_process *uproc; + struct uprobe_task *ptask, *ctask; + + ptask = rcu_dereference((struct uprobe_task *)engine->data); + uproc = ptask->uproc; + + /* + * Lock uproc so no new uprobes can be installed 'til all + * report_clone activities are completed + */ + down_write(&uproc->rwsem); + get_task_struct(child); + + if (clone_flags & CLONE_THREAD) { + /* New thread in the same process */ + ctask = uprobe_add_task(child, uproc); + BUG_ON(!ctask); + if (IS_ERR(ctask)) { + put_task_struct(child); + up_write(&uproc->rwsem); + goto fail; + } + if (ctask) + uproc->nthreads++; + /* + * FIXME: Handle the case where uproc is quiescing + * (assuming it's possible to clone while quiescing). + */ + } else { + /* + * New process spawned by parent. Remove the probepoints + * in the child's text. + * + * Its not necessary to quiesce the child as we are assured + * by utrace that this callback happens *before* the child + * gets to run userspace. + * + * We also hold the uproc->rwsem for the parent - so no + * new uprobes will be registered 'til we return. + */ + int i; + struct uprobe_probept *ppt; + struct hlist_node *node; + struct hlist_head *head; + + for (i = 0; i < UPROBE_TABLE_SIZE; i++) { + head = &uproc->uprobe_table[i]; + hlist_for_each_entry(ppt, node, head, ut_node) { + len = set_orig_insn(ppt, child); + if (len != BP_INSN_SIZE) { + /* Ratelimit this? */ + printk(KERN_ERR "Pid %d forked %d;" + " failed to remove probepoint" + " at %#lx in child\n", + parent->pid, child->pid, + ppt->vaddr); + } + } + } + } + + put_task_struct(child); + up_write(&uproc->rwsem); + +fail: + return UTRACE_ACTION_RESUME; +} + +/* + * Exec callback: The associated process called execve() or friends + * + * The new program is about to start running and so there is no + * possibility of a uprobe from the previous user address space + * to be hit. + * + * NOTE: + * Typically, this process would have passed through the clone + * callback, where the necessary action *should* have been + * taken. However, if we still end up at this callback: + * - We don't have to clear the uprobes - memory image + * will be overlaid. + * - We have to free up uprobe resources associated with + * this process. + */ +static u32 uprobe_report_exec(struct utrace_attached_engine *engine, + struct task_struct *tsk, const struct linux_binprm *bprm, + struct pt_regs *regs) +{ + struct uprobe_process *uproc; + struct uprobe_task *utask; + int uproc_freed; + + utask = rcu_dereference((struct uprobe_task *)engine->data); + uproc = utask->uproc; + uprobe_get_process(uproc); + + down_write(&uproc->rwsem); + uprobe_cleanup_process(uproc); + /* + * If [un]register_uprobe() is in progress, cancel the quiesce. + * Otherwise, utrace_report_exec() might call uprobe_report_exec() + * while the [un]register_uprobe thread is freeing the uproc. + */ + clear_utrace_quiesce(utask); + up_write(&uproc->rwsem); + + /* If any [un]register_uprobe is pending, it'll clean up. */ + uproc_freed = uprobe_put_process(uproc); + return (uproc_freed ? UTRACE_ACTION_DETACH : UTRACE_ACTION_RESUME); +} + +static const struct utrace_engine_ops uprobe_utrace_ops = +{ + .report_quiesce = uprobe_report_quiesce, + .report_signal = uprobe_report_signal, + .report_exit = uprobe_report_exit, + .report_clone = uprobe_report_clone, + .report_exec = uprobe_report_exec +}; + +/* + * Initialize the uprobes global data area, and return a pointer + * to it. Caller will initialize uprobes_data pointer afterward, to + * ensure that no other module sees a non-null uprobes_data until it's + * completely initialized. + */ +static struct uprobe_globals *init_uprobes(void) +{ + int i; + struct uprobe_globals *g = kmalloc(sizeof(*g), GFP_USER); + if (!g) + return ERR_PTR(-ENOMEM); + for (i = 0; i < UPROBE_TABLE_SIZE; i++) { + INIT_HLIST_HEAD(&g->uproc_table[i]); + INIT_HLIST_HEAD(&g->utask_table[i]); + } + mutex_init(&g->uproc_mutex); + spin_lock_init(&g->utask_table_lock); + return g; +} + +/* + * Verify that the uprobes globals area has been set up, and that the + * current module's globals variable points at it. Returns 0 if the + * area is successfully set up, or a negative erro otherwise. + */ +static int verify_uprobes(void) +{ + if (unlikely(!globals)) { + /* + * First time through for this instrumentation module. + * uprobes_mutex protects both the global uprobes + * initialization and this module's local initialization. + */ + struct uprobe_globals *g; + + mutex_lock(&uprobes_mutex); + if (!uprobes_data) { + /* First time through since boot time */ + g = init_uprobes(); + uprobes_data = g; + } else + g = uprobes_data; + if (!IS_ERR(g)) { + p_uprobe_utrace_ops = &uprobe_utrace_ops; + uproc_table = g->uproc_table; + utask_table = g->utask_table; + } + + /* Set globals pointer to signify all is initialized. */ + globals = g; + mutex_unlock(&uprobes_mutex); + } + if (unlikely(IS_ERR(globals))) + return (int) PTR_ERR(globals); + return 0; +} + +#ifdef CONFIG_URETPROBES + +/* Called when the entry-point probe u is hit. */ +static void uretprobe_handle_entry(struct uprobe *u, struct pt_regs *regs, + struct uprobe_task *utask) +{ + struct uretprobe_instance *ri; + unsigned long trampoline_addr; + + if (IS_ERR(utask->uproc->uretprobe_trampoline_addr)) + return; + trampoline_addr = (unsigned long) + utask->uproc->uretprobe_trampoline_addr; + ri = (struct uretprobe_instance *) + kmalloc(sizeof(struct uretprobe_instance), GFP_USER); + if (!ri) + return; + ri->ret_addr = arch_hijack_uret_addr(trampoline_addr, regs, utask); + if (likely(ri->ret_addr)) { + ri->rp = container_of(u, struct uretprobe, u); + INIT_HLIST_NODE(&ri->hlist); + hlist_add_head(&ri->hlist, &utask->uretprobe_instances); + /* We ref-count outstanding uretprobe_instances. */ + uprobe_get_process(utask->uproc); + } else + kfree(ri); +} + +/* + * For each uretprobe_instance pushed onto the LIFO for the function + * instance that's now returning, call the handler, free the ri, and + * decrement the uproc's ref count. Caller ref-counts uproc, so we + * should never hit zero in this function. + * + * Returns the original return address. + * + * TODO: Handle longjmp out of uretprobed function. + */ +static unsigned long uretprobe_run_handlers(struct uprobe_task *utask, + struct pt_regs *regs, unsigned long trampoline_addr) +{ + unsigned long ret_addr; + struct hlist_head *head = &utask->uretprobe_instances; + struct uretprobe_instance *ri; + struct hlist_node *r1, *r2; + + hlist_for_each_entry_safe(ri, r1, r2, head, hlist) { + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + ret_addr = ri->ret_addr; + hlist_del(&ri->hlist); + kfree(ri); + uprobe_decref_process(utask->uproc); + if (ret_addr != trampoline_addr) + /* + * This is the first ri (chronologically) pushed for + * this particular instance of the probed function. + */ + return ret_addr; + } + printk(KERN_ERR "No uretprobe instance with original return address!" + " pid/tgid=%d/%d", utask->tsk->pid, utask->tsk->tgid); + utask->doomed = 1; + return 0; +} + +/* Called when the uretprobe trampoline is hit. */ +static void uretprobe_handle_return(struct pt_regs *regs, + struct uprobe_task *utask) +{ + unsigned long orig_ret_addr; + /* Delay recycling of uproc until end of uprobe_report_signal() */ + uprobe_get_process(utask->uproc); + utask->state = UPTASK_TRAMPOLINE_HIT; + utask->active_probe = &uretprobe_trampoline_dummy_probe; + orig_ret_addr = uretprobe_run_handlers(utask, regs, + (unsigned long) utask->uproc->uretprobe_trampoline_addr); + arch_restore_uret_addr(orig_ret_addr, regs); +} + +static +int register_uretprobe(struct uretprobe *rp) +{ + if (!rp || !rp->handler) + return -EINVAL; + rp->u.handler = URETPROBE_HANDLE_ENTRY; + return register_uprobe(&rp->u); +} + +/* + * The uretprobe containing u is being unregistered. Its uretprobe_instances + * have to hang around 'til their associated instances return (but we can't + * run rp's handler). Zap ri->rp for each one to indicate unregistration. + * + * Runs with uproc write-locked. + */ +static void zap_uretprobe_instances(struct uprobe *u, + struct uprobe_process *uproc) +{ + struct uprobe_task *utask; + struct uretprobe *rp = container_of(u, struct uretprobe, u); + + if (!uproc) + return; + + list_for_each_entry(utask, &uproc->thread_list, list) { + struct hlist_node *r; + struct uretprobe_instance *ri; + + hlist_for_each_entry(ri, r, &utask->uretprobe_instances, hlist) + if (ri->rp == rp) + ri->rp = NULL; + } +} + +static +void unregister_uretprobe(struct uretprobe *rp) +{ + if (!rp) + return; + unregister_uprobe(&rp->u); +} + +/* + * uproc->ssol_area has been successfully set up. Establish the + * uretprobe trampoline in slot 0. + */ +static void uretprobe_set_trampoline(struct uprobe_process *uproc) +{ + uprobe_opcode_t bp_insn = BREAKPOINT_INSTRUCTION; + struct uprobe_ssol_area *area = &uproc->ssol_area; + struct uprobe_ssol_slot *slot = &area->slots[0]; + + if (access_process_vm(current, (unsigned long) slot->insn, + &bp_insn, BP_INSN_SIZE, 1) == BP_INSN_SIZE) { + uproc->uretprobe_trampoline_addr = slot->insn; + slot->state = SSOL_RESERVED; + area->next_slot = 1; + area->nfree--; + } else { + printk(KERN_ERR "uretprobes disabled for pid %d:" + " cannot set uretprobe trampoline at %p\n", + uproc->tgid, slot->insn); + } +} + +#else /* ! CONFIG_URETPROBES */ + +static void uretprobe_handle_entry(struct uprobe *u, struct pt_regs *regs, + struct uprobe_task *utask) +{ +} +static void uretprobe_handle_return(struct pt_regs *regs, + struct uprobe_task *utask) +{ +} +static void uretprobe_set_trampoline(struct uprobe_process *uproc) +{ +} +static void zap_uretprobe_instances(struct uprobe *u, + struct uprobe_process *uproc) +{ +} +#endif /* CONFIG_URETPROBES */ + +#include "uprobes_arch.c" diff --git a/runtime/uprobes/uprobes.h b/runtime/uprobes/uprobes.h new file mode 100644 index 00000000..574bee62 --- /dev/null +++ b/runtime/uprobes/uprobes.h @@ -0,0 +1,385 @@ +#ifndef _LINUX_UPROBES_H +#define _LINUX_UPROBES_H +/* + * Userspace Probes (UProbes) + * include/linux/uprobes.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2006 + */ +#include <linux/types.h> +#include <linux/list.h> + +struct pt_regs; + +enum uprobe_type { + UPTY_UPROBE, + UPTY_URETPROBE +}; + +/* This is what the user supplies us. */ +struct uprobe { + /* + * The pid of the probed process. Currently, this can be the + * thread ID (task->pid) of any active thread in the process. + */ + pid_t pid; + + /* Location of the probepoint */ + unsigned long vaddr; + + /* Handler to run when the probepoint is hit */ + void (*handler)(struct uprobe*, struct pt_regs*); + + /* + * This function, if non-NULL, will be called upon completion of + * an ASYNCHRONOUS registration (i.e., one initiated by a uprobe + * handler). reg = 1 for register, 0 for unregister. type + * specifies the type of [un]register call (uprobe or uretprobe). + */ + void (*registration_callback)(struct uprobe *u, int reg, + enum uprobe_type type, int result); + + /* Reserved for use by uprobes */ + void *kdata; +}; + +struct uretprobe_instance; + +struct uretprobe { + struct uprobe u; + void (*handler)(struct uretprobe_instance*, struct pt_regs*); +}; + +struct uretprobe_instance { + struct uretprobe *rp; + unsigned long ret_addr; + struct hlist_node hlist; + unsigned long reserved1; + unsigned long reserved2; +}; + +static int register_uprobe(struct uprobe *u); +static void unregister_uprobe(struct uprobe *u); +/* For runtime, assume uprobes support includes uretprobes. */ +static int register_uretprobe(struct uretprobe *rp); +static void unregister_uretprobe(struct uretprobe *rp); + +#ifdef UPROBES_IMPLEMENTATION + +#include <linux/mutex.h> +#include <linux/rwsem.h> +#include <linux/wait.h> +#include <asm/atomic.h> +#include "uprobes_arch.h" + +struct task_struct; +struct utrace_attached_engine; + +enum uprobe_probept_state { + UPROBE_INSERTING, // process quiescing prior to insertion + UPROBE_BP_SET, // breakpoint in place + UPROBE_REMOVING, // process quiescing prior to removal + UPROBE_DISABLED // removal completed +}; + +enum uprobe_task_state { + UPTASK_QUIESCENT, + UPTASK_SLEEPING, // used when task may not be able to quiesce + UPTASK_RUNNING, + UPTASK_BP_HIT, + UPTASK_TRAMPOLINE_HIT, + UPTASK_PRE_SSTEP, + UPTASK_SSTEP, + UPTASK_POST_SSTEP +}; + +#define UPROBE_HASH_BITS 5 +#define UPROBE_TABLE_SIZE (1 << UPROBE_HASH_BITS) + +/* Used when deciding which instruction slot to steal. */ +enum uprobe_slot_state { + SSOL_FREE, + SSOL_ASSIGNED, + SSOL_BEING_STOLEN, + SSOL_RESERVED // e.g., for uretprobe trampoline +}; + +/* + * For a uprobe_process that uses an SSOL area, there's an array of these + * objects matching the array of instruction slots in the SSOL area. + */ +struct uprobe_ssol_slot { + /* The slot in the SSOL area that holds the instruction-copy */ + __user uprobe_opcode_t *insn; + + enum uprobe_slot_state state; + + /* The probepoint that currently owns this slot */ + struct uprobe_probept *owner; + + /* + * Read-locked when slot is in use during single-stepping. + * Write-locked by stealing task. + */ + struct rw_semaphore rwsem; + + /* Used for LRU heuristics. If this overflows, it's OK. */ + unsigned long last_used; +}; + +/* + * The per-process single-stepping out-of-line (SSOL) area + */ +struct uprobe_ssol_area { + /* Array of instruction slots in the vma we allocate */ + __user uprobe_opcode_t *insn_area; + + int nslots; + int nfree; + + /* Array of slot objects, one per instruction slot */ + struct uprobe_ssol_slot *slots; + + /* lock held while finding a free slot */ + spinlock_t lock; + + /* Next slot to steal */ + int next_slot; + + /* Ensures 2 threads don't try to set up the vma simultaneously. */ + struct mutex setup_mutex; + + /* 1 = we've at least tried. IS_ERR(insn_area) if we failed. */ + int initialized; +}; + +/* + * uprobe_process -- not a user-visible struct. + * A uprobe_process represents a probed process. A process can have + * multiple probepoints (each represented by a uprobe_probept) and + * one or more threads (each represented by a uprobe_task). + */ +struct uprobe_process { + /* + * rwsem is write-locked for any change to the uprobe_process's + * graph (including uprobe_tasks, uprobe_probepts, and uprobe_kimgs) -- + * e.g., due to probe [un]registration or special events like exit. + * It's read-locked during the whole time we process a probepoint hit. + */ + struct rw_semaphore rwsem; + + /* Table of uprobe_probepts registered for this process */ + /* TODO: Switch to list_head[] per Ingo. */ + struct hlist_head uprobe_table[UPROBE_TABLE_SIZE]; + int nppt; /* number of probepoints */ + + /* List of uprobe_probepts awaiting insertion or removal */ + struct list_head pending_uprobes; + + /* List of uprobe_tasks in this task group */ + struct list_head thread_list; + int nthreads; + int n_quiescent_threads; + + /* this goes on the uproc_table */ + struct hlist_node hlist; + + /* + * All threads (tasks) in a process share the same uprobe_process. + */ + pid_t tgid; + + /* Threads in SLEEPING state wait here to be roused. */ + wait_queue_head_t waitq; + + /* + * We won't free the uprobe_process while... + * - any register/unregister operations on it are in progress; or + * - uprobe_table[] is not empty; or + * - any tasks are SLEEPING in the waitq; or + * - any uretprobe_instances are outstanding. + * refcount reflects this. We do NOT ref-count tasks (threads), + * since once the last thread has exited, the rest is academic. + */ + atomic_t refcount; + + /* Return-probed functions return via this trampoline. */ + __user uprobe_opcode_t *uretprobe_trampoline_addr; + + /* + * finished = 1 means the process is execing or the last thread + * is exiting, and we're cleaning up the uproc. If the execed + * process is probed, a new uproc will be created. + */ + int finished; + + /* + * Manages slots for instruction-copies to be single-stepped + * out of line. + */ + struct uprobe_ssol_area ssol_area; + + /* + * 1 to single-step out of line; 0 for inline. This can drop to + * 0 if we can't set up the SSOL area, but never goes from 0 to 1. + */ + int sstep_out_of_line; +}; + +/* + * uprobe_kimg -- not a user-visible struct. + * Holds implementation-only per-uprobe data. + * uprobe->kdata points to this. + */ +struct uprobe_kimg { + struct uprobe *uprobe; + struct uprobe_probept *ppt; + + /* + * -EBUSY while we're waiting for all threads to quiesce so the + * associated breakpoint can be inserted or removed. + * 0 if the the insert/remove operation has succeeded, or -errno + * otherwise. + */ + int status; + + /* on ppt's list */ + struct list_head list; +}; + +/* + * uprobe_probept -- not a user-visible struct. + * A probepoint, at which several uprobes can be registered. + * Guarded by uproc->rwsem. + */ +struct uprobe_probept { + /* vaddr copied from (first) uprobe */ + unsigned long vaddr; + + /* The uprobe_kimg(s) associated with this uprobe_probept */ + struct list_head uprobe_list; + + enum uprobe_probept_state state; + + /* Saved opcode (which has been replaced with breakpoint) */ + uprobe_opcode_t opcode; + + /* Saved original instruction */ + uprobe_opcode_t insn[MAX_UINSN_BYTES / sizeof(uprobe_opcode_t)]; + + /* The parent uprobe_process */ + struct uprobe_process *uproc; + + /* + * ppt goes in the uprobe_process->uprobe_table when registered -- + * even before the breakpoint has been inserted. + */ + struct hlist_node ut_node; + + /* + * ppt sits in the uprobe_process->pending_uprobes queue while + * awaiting insertion or removal of the breakpoint. + */ + struct list_head pd_node; + + /* [un]register_uprobe() waits 'til bkpt inserted/removed. */ + wait_queue_head_t waitq; + + /* + * Serialize single-stepping inline, so threads don't clobber + * each other swapping the breakpoint instruction in and out. + * This helps prevent crashing the probed app, but it does NOT + * prevent probe misses while the breakpoint is swapped out. + */ + struct mutex ssil_mutex; + + /* + * We put the instruction-copy here to single-step it. + * We don't own it unless slot->owner points back to us. + */ + struct uprobe_ssol_slot *slot; + + /* + * Hold this while stealing an insn slot to ensure that no + * other thread, having also hit this probepoint, simultaneously + * steals a slot for it. + */ + struct mutex slot_mutex; +}; + +/* + * uprobe_utask -- not a user-visible struct. + * Corresponds to a thread in a probed process. + * Guarded by uproc->rwsem. + */ +struct uprobe_task { + /* Lives in the global utask_table */ + struct hlist_node hlist; + + /* Lives on the thread_list for the uprobe_process */ + struct list_head list; + + /* This is a back pointer to the task_struct for this task */ + struct task_struct *tsk; + + /* The utrace engine for this task */ + struct utrace_attached_engine *engine; + + /* Back pointer to the associated uprobe_process */ + struct uprobe_process *uproc; + + enum uprobe_task_state state; + + /* + * quiescing = 1 means this task has been asked to quiesce. + * It may not be able to comply immediately if it's hit a bkpt. + */ + int quiescing; + + /* Task currently running quiesce_all_threads() */ + struct task_struct *quiesce_master; + + /* Set before running handlers; cleared after single-stepping. */ + struct uprobe_probept *active_probe; + + /* Saved address of copied original instruction */ + long singlestep_addr; + + /* + * Unexpected error in probepoint handling has left task's + * text or stack corrupted. Kill task ASAP. + */ + int doomed; + + /* LIFO -- active instances */ + struct hlist_head uretprobe_instances; + + /* [un]registrations initiated by handlers must be asynchronous. */ + struct list_head deferred_registrations; +}; + +#ifdef CONFIG_UPROBES_SSOL +static struct uprobe_ssol_slot *uprobe_get_insn_slot(struct uprobe_probept*); +static void uprobe_pre_ssout(struct uprobe_task*, struct uprobe_probept*, + struct pt_regs*); +static void uprobe_post_ssout(struct uprobe_task*, struct uprobe_probept*, + struct pt_regs*); +#endif + +#endif /* UPROBES_IMPLEMENTATION */ + +#endif /* _LINUX_UPROBES_H */ diff --git a/runtime/uprobes/uprobes_arch.c b/runtime/uprobes/uprobes_arch.c new file mode 100644 index 00000000..99ef54c8 --- /dev/null +++ b/runtime/uprobes/uprobes_arch.c @@ -0,0 +1,11 @@ +#if defined (__x86_64__) +#include "uprobes_x86_64.c" +#elif defined (__i386__) +#include "uprobes_i386.c" +#elif defined (__powerpc64__) +#include "uprobes_ppc64.c" +#elif defined (__s390__) || defined (__s390x__) +#include "uprobes_s390.c" +#else +#error "Unsupported architecture" +#endif diff --git a/runtime/uprobes/uprobes_arch.h b/runtime/uprobes/uprobes_arch.h new file mode 100644 index 00000000..0223e280 --- /dev/null +++ b/runtime/uprobes/uprobes_arch.h @@ -0,0 +1,11 @@ +#if defined (__x86_64__) +#include "uprobes_x86_64.h" +#elif defined (__i386__) +#include "uprobes_i386.h" +#elif defined (__powerpc64__) +#include "uprobes_ppc64.h" +#elif defined (__s390__) || defined (__s390x__) +#include "uprobes_s390.h" +#else +#error "Unsupported architecture" +#endif diff --git a/runtime/uprobes/uprobes_i386.c b/runtime/uprobes/uprobes_i386.c new file mode 100644 index 00000000..90d50ba0 --- /dev/null +++ b/runtime/uprobes/uprobes_i386.c @@ -0,0 +1,302 @@ +/* + * Userspace Probes (UProbes) + * arch/i386/kernel/uprobes_i386.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2006 + */ +#define UPROBES_IMPLEMENTATION 1 +#include "uprobes.h" +#include <linux/uaccess.h> + +/* Adapted from arch/x86_64/kprobes.c */ +#undef W +#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ + (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ + (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ + (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ + (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ + << (row % 32)) + static const unsigned long good_insns[256 / 32] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ------------------------------- */ + W(0x00, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0)| /* 00 */ + W(0x10, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0), /* 10 */ + W(0x20, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1)| /* 20 */ + W(0x30, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1), /* 30 */ + W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ + W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */ + W(0x60, 1,1,1,0,0,0,0,0,1,1,1,1,0,0,0,0)| /* 60 */ + W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */ + W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ + W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1), /* 90 */ + W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */ + W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ + W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */ + W(0xd0, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1), /* d0 */ + W(0xe0, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* e0 */ + W(0xf0, 0,0,0,0,0,1,1,1,1,1,0,0,1,1,1,1) /* f0 */ + /* ------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + }; + + static const unsigned long good_2byte_insns[256 / 32] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ------------------------------- */ + W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 00 */ + W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */ + W(0x20, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */ + W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */ + W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */ + W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */ + W(0x60, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 60 */ + W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */ + W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ + W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ + W(0xa0, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1)| /* a0 */ + W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ + W(0xc0, 1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1)| /* c0 */ + W(0xd0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* d0 */ + W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */ + W(0xf0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0) /* f0 */ + /* ------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + }; + +/* + * TODO: + * - Allow valid 2-byte opcodes (first byte = 0x0f). + * - Where necessary, examine the modrm byte and allow valid instructions + * in the different Groups and fpu instructions. + * - Allow at least some instruction prefixes. + * - Note: If we go past the first byte, do we need to verify that + * subsequent bytes were actually there, rather than off the last page? + * Probably overkill. We don't verify that they specified the first byte + * of the instruction, either. + * - Be clearer about which instructions we'll never probe. + */ + +/* + * opcodes we'll probably never support: + * 63 - arpl + * 6c-6d, e4-e5, ec-ed - in + * 6e-6f, e6-e7, ee-ef - out + * cc, cd - int3, int + * cf - iret + * d6 - illegal instruction + * f1 - int1/icebp + * f4 - hlt + * fa, fb - cli, sti + * + * opcodes we may need to refine support for: + * 0f - valid 2-byte opcodes + * 66 - data16 prefix + * 8f - Group 1 - only reg = 0 is OK + * c6-c7 - Group 11 - only reg = 0 is OK + * d9-df - fpu insns with some illegal encodings + * fe - Group 4 - only reg = 1 or 2 is OK + * ff - Group 5 - only reg = 0-6 is OK + * + * others -- Do we need to support these? + * 07, 17, 1f - pop es, pop ss, pop ds + * 26, 2e, 36, 3e, 64, 65 - es:, cs:, ss:, ds:, fs:, gs: segment prefixes + * 67 - addr16 prefix + * 9b - wait/fwait + * ce - into + * f0 - lock prefix + * f2, f3 - repnz, repz prefixes + */ + +static +int arch_validate_probed_insn(struct uprobe_probept *ppt) +{ + uprobe_opcode_t *insn = ppt->insn; + + if (insn[0] == 0x66) + /* Skip operand-size prefix */ + insn++; + if (test_bit(insn[0], good_insns)) + return 0; + if (insn[0] == 0x0f) { + if (test_bit(insn[1], good_2byte_insns)) + return 0; + printk(KERN_ERR "uprobes does not currently support probing " + "instructions with the 2-byte opcode 0x0f 0x%2.2x\n", + insn[1]); + } + printk(KERN_ERR "uprobes does not currently support probing " + "instructions whose first byte is 0x%2.2x\n", insn[0]); + return -EPERM; +} + +/* + * Get an instruction slot from the process's SSOL area, containing the + * instruction at ppt's probepoint. Point the eip at that slot, in + * preparation for single-stepping out of line. + */ +static +void uprobe_pre_ssout(struct uprobe_task *utask, struct uprobe_probept *ppt, + struct pt_regs *regs) +{ + struct uprobe_ssol_slot *slot; + + slot = uprobe_get_insn_slot(ppt); + if (!slot) { + utask->doomed = 1; + return; + } + regs->eip = (long)slot->insn; + utask->singlestep_addr = regs->eip; +} + +/* + * Called by uprobe_post_ssout() to adjust the return address + * pushed by a call instruction executed out-of-line. + */ +static void adjust_ret_addr(long esp, long correction, + struct uprobe_task *utask) +{ + int nleft; + long ra; + + nleft = copy_from_user(&ra, (const void __user *) esp, 4); + if (unlikely(nleft != 0)) + goto fail; + ra += correction; + nleft = copy_to_user((void __user *) esp, &ra, 4); + if (unlikely(nleft != 0)) + goto fail; + return; + +fail: + printk(KERN_ERR + "uprobes: Failed to adjust return address after" + " single-stepping call instruction;" + " pid=%d, esp=%#lx\n", current->pid, esp); + utask->doomed = 1; +} + +/* + * Called after single-stepping. ppt->vaddr is the address of the + * instruction whose first byte has been replaced by the "int3" + * instruction. To avoid the SMP problems that can occur when we + * temporarily put back the original opcode to single-step, we + * single-stepped a copy of the instruction. The address of this + * copy is utask->singlestep_addr. + * + * This function prepares to return from the post-single-step + * interrupt. We have to fix up the stack as follows: + * + * 0) Typically, the new eip is relative to the copied instruction. We + * need to make it relative to the original instruction. Exceptions are + * return instructions and absolute or indirect jump or call instructions. + * + * 1) If the single-stepped instruction was a call, the return address + * that is atop the stack is the address following the copied instruction. + * We need to make it the address following the original instruction. + */ +static +void uprobe_post_ssout(struct uprobe_task *utask, struct uprobe_probept *ppt, + struct pt_regs *regs) +{ + long next_eip = 0; + long copy_eip = utask->singlestep_addr; + long orig_eip = ppt->vaddr; + uprobe_opcode_t *insn = ppt->insn; + + up_read(&ppt->slot->rwsem); + + if (insn[0] == 0x66) + /* Skip operand-size prefix */ + insn++; + + switch (insn[0]) { + case 0xc3: /* ret/lret */ + case 0xcb: + case 0xc2: + case 0xca: + next_eip = regs->eip; + /* eip is already adjusted, no more changes required*/ + break; + case 0xe8: /* call relative - Fix return addr */ + adjust_ret_addr(regs->esp, (orig_eip - copy_eip), utask); + break; + case 0xff: + if ((insn[1] & 0x30) == 0x10) { + /* call absolute, indirect */ + /* Fix return addr; eip is correct. */ + next_eip = regs->eip; + adjust_ret_addr(regs->esp, (orig_eip - copy_eip), + utask); + } else if ((insn[1] & 0x31) == 0x20 || + (insn[1] & 0x31) == 0x21) { + /* jmp near or jmp far absolute indirect */ + /* eip is correct. */ + next_eip = regs->eip; + } + break; + case 0xea: /* jmp absolute -- eip is correct */ + next_eip = regs->eip; + break; + default: + break; + } + + if (next_eip) + regs->eip = next_eip; + else + regs->eip = orig_eip + (regs->eip - copy_eip); +} + +/* + * Replace the return address with the trampoline address. Returns + * the original return address. + */ +static +unsigned long arch_hijack_uret_addr(unsigned long trampoline_address, + struct pt_regs *regs, struct uprobe_task *utask) +{ + int nleft; + unsigned long orig_ret_addr; +#define RASIZE (sizeof(unsigned long)) + + nleft = copy_from_user(&orig_ret_addr, + (const void __user *)regs->esp, RASIZE); + if (unlikely(nleft != 0)) + return 0; + + if (orig_ret_addr == trampoline_address) + /* + * There's another uretprobe on this function, and it was + * processed first, so the return address has already + * been hijacked. + */ + return orig_ret_addr; + + nleft = copy_to_user((void __user *)regs->esp, + &trampoline_address, RASIZE); + if (unlikely(nleft != 0)) { + if (nleft != RASIZE) { + printk(KERN_ERR "uretprobe_entry_handler: " + "return address partially clobbered -- " + "pid=%d, %%esp=%#lx, %%eip=%#lx\n", + current->pid, regs->esp, regs->eip); + utask->doomed = 1; + } /* else nothing written, so no harm */ + return 0; + } + return orig_ret_addr; +} diff --git a/runtime/uprobes/uprobes_i386.h b/runtime/uprobes/uprobes_i386.h new file mode 100644 index 00000000..6e2aae70 --- /dev/null +++ b/runtime/uprobes/uprobes_i386.h @@ -0,0 +1,67 @@ +#ifndef _ASM_UPROBES_H +#define _ASM_UPROBES_H +/* + * Userspace Probes (UProbes) + * include/asm-i386/uprobes.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2006 + */ +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/signal.h> + +/* Normally defined in Kconfig */ +#define CONFIG_URETPROBES 1 +#define CONFIG_UPROBES_SSOL 1 + +typedef u8 uprobe_opcode_t; +#define BREAKPOINT_INSTRUCTION 0xcc +#define BP_INSN_SIZE 1 +#define MAX_UINSN_BYTES 16 +#define SLOT_IP 12 /* instruction pointer slot from include/asm/elf.h */ + +#define BREAKPOINT_SIGNAL SIGTRAP +#define SSTEP_SIGNAL SIGTRAP + +/* Architecture specific switch for where the IP points after a bp hit */ +#define ARCH_BP_INST_PTR(inst_ptr) (inst_ptr - BP_INSN_SIZE) + +struct uprobe_probept; +struct uprobe_task; +static int arch_validate_probed_insn(struct uprobe_probept *ppt); + +/* On i386, the int3 traps leaves eip pointing past the int3 instruction. */ +static inline unsigned long arch_get_probept(struct pt_regs *regs) +{ + return (unsigned long) (regs->eip - BP_INSN_SIZE); +} + +static inline void arch_reset_ip_for_sstep(struct pt_regs *regs) +{ + regs->eip -= BP_INSN_SIZE; +} + +static inline void arch_restore_uret_addr(unsigned long ret_addr, + struct pt_regs *regs) +{ + regs->eip = ret_addr; +} + +static unsigned long arch_hijack_uret_addr(unsigned long trampoline_addr, + struct pt_regs *regs, struct uprobe_task *utask); + +#endif /* _ASM_UPROBES_H */ |