diff options
Diffstat (limited to 'runtime')
44 files changed, 3006 insertions, 1952 deletions
diff --git a/runtime/.gitignore b/runtime/.gitignore new file mode 100644 index 00000000..ceddd64c --- /dev/null +++ b/runtime/.gitignore @@ -0,0 +1 @@ +!staprun diff --git a/runtime/ChangeLog b/runtime/ChangeLog index ee191022..f0f65215 100644 --- a/runtime/ChangeLog +++ b/runtime/ChangeLog @@ -1,3 +1,117 @@ +2008-05-06 Masami Hiramatsu <mhiramat@redhat.com> + + PR 5648 + * print_old.c (stp_print_flush): Fix unaligned access warning on + ia64. + * print_new.c (stp_print_flush): Ditto. + +2008-05-06 Masami Hiramatsu <mhiramat@redhat.com> + + PR 5648 + * vsprintf.c (_stp_vsnprintf): Fix memcpy's endianess issue. + +2008-05-05 Frank Ch. Eigler <fche@elastic.org> + + PR 6481. + * time.c (__stp_time_timer_callback): Reenable irq's before + mod_timer. + +2008-05-05 David Smith <dsmith@redhat.com> + + * task_finder.c (stap_utrace_detach_ops): Make sure we ignore + /sbin/init. + (__stp_utrace_attach): Added function to handle details of + attaching a utrace engine. + (__stp_utrace_task_finder_report_clone): Calls + __stp_utrace_attach. + (__stp_utrace_task_finder_report_exec): Ditto. + (stap_start_task_finder): Ditto. + +2008-04-30 Masami Hiramatsu <mhiramat@redhat.com> + + PR 5648 + From Shaohua Li <shaohua.li@intel.com> + * vsprintf.c (_stp_vsnprintf): Fix unaligned access warning on ia64. + +2008-04-29 David Smith <dsmith@redhat.com> + + * task_finder.c: Made more robust by ensuring that all utrace + attaches have a corresponding utrace detach. + +2008-04-28 Frank Ch. Eigler <fche@elastic.org> + + * runtime.h (TEST_MODE): Remove. + +2008-04-25 David Smith <dsmith@redhat.com> + + From Srinivasa <srinivasa@in.ibm.com> + * task_finder.c (__stp_get_mm_path): Fixed kernel 2.6.25 change. + +2008-04-24 David Smith <dsmith@redhat.com> + + * task_finder.c (__stp_get_mm_path): Made kernel 2.6.25 changes. + +2008-04-16 David Smith <dsmith@redhat.com> + + * task_finder.c (__stp_get_mm_path): Made kernel 2.6.18 changes. + +2008-04-15 David Smith <dsmith@redhat.com> + + PR 5961 (partial) + * task_finder.c (stap_start_task_finder): When an interesting + thread is found that is already running, make sure to set up + thread death notification. + +2008-04-15 hunt <hunt@redhat.com> + * print.c (_stp_pbuf_full): Delete. + +2008-04-15 hunt <hunt@redhat.com> + * stack-x86_64.c (_stp_stack_print_fallback): Add levels. + (__stp_stack_print): Count levels properly. + +2008-04-15 Martin Hunt <hunt@redhat.com> + + Finish support for limits on backtrace depth. + * runtime.h (MAXTRACE): Default to 20. + * stack.c (_stp_stack_print): Call __stp_stack_print + with levels set properly. + * sym.c (_stp_func_print): Return a value indicating + if something was printed. + + Support for i386 and x86_64 on 2.6.25 kernel + * unwind/i386.h: Support unified registers on 2.6.25. + Remove unused frame stuff, including STACK_*. + * stack-i386.c (__stp_stack_print): Support unified + registers on 2.6.25. + * regs.h (REG_FP): Define for i386. + +2008-04-15 Frank Ch. Eigler <fche@elastic.org> + + PR 6410. + * unwind.c, unwind.h: Make body conditional in STP_USE_DWARF_UNWINDER. + * stack-x86_64.c (__stp_stack_print): Tolerate !unwinder. + +2008-04-15 Frank Ch. Eigler <fche@elastic.org> + + PR 6405 + * autoconf-module-nsections.c: New file. + +2008-04-15 Frank Ch. Eigler <fche@elastic.org> + + * unwind/i386.h (STACK_BOTTOM, STACK_TOP): Comment out these + unused definitions, for they collide with some kernels + (2.6.25-0.121.rc5.git4 rawhide). + +2008-04-13 Frank Ch. Eigler <fche@elastic.org> + + * print.c (_stp_pbuf_full): New function to note full print buffer. + * stack-{i386,x86_64}.c: Use it in all stack-searching loops, to + impose another limit against unbounded iteration. + +2008-03-31 Martin Hunt <hunt@redhat.com> + + * runtime.h (STP_USE_DWARF_UNWINDER): Define. + 2008-04-04 Masami Hiramatsu <mhiramat@redhat.com> PR 6028 @@ -6,11 +120,37 @@ * regs-ia64.c (ia64_fetch_register): Don't unwind stack if it has already unwound stack in same probe. +2008-03-30 Martin Hunt <hunt@redhat.com> + + * runtime.h (STP_USE_FRAME_POINTER): Define when frame pointers + are available in the kernel and can be used. + * stack-arm.c: Use STP_USE_FRAME_POINTER. + * stack-i386.c: Ditto. + * unwind/i386.h: Ditto. + * unwind/x86_64.h: Ditto. + 2008-04-04 David Smith <dsmith@redhat.com> PR 5961 (partial) * task_finder.c: New file. +2008-03-28 Martin Hunt <hunt@redhat.com> + + * copy.c (_stp_read_address): New function. Safely read + kernel or userspace. + +2008-03-26 Martin Hunt <hunt@redhat.com> + Fixes to get i386 working. + * unwind.c (unwind): Fix types in debug print. + * stack-i386.c (_stp_stack_print_fallback): New function. + (__stp_stack_print): Call _stp_stack_print_fallback() if unwinder + appears to fail. + +2008-03-25 Martin Hunt <hunt@redhat.com> + + * unwind.c (unwind): Return a positive number to indicate + that unwinding is done. + 2008-04-01 Frank Ch. Eigler <fche@elastic.org> * lket/*: Belatedly remove retired LKET code. diff --git a/runtime/autoconf-module-nsections.c b/runtime/autoconf-module-nsections.c new file mode 100644 index 00000000..c1ce58b7 --- /dev/null +++ b/runtime/autoconf-module-nsections.c @@ -0,0 +1,8 @@ +#include <linux/module.h> + +struct module_sect_attrs x; + +void foo (void) +{ + (void) x.nsections; +} diff --git a/runtime/copy.c b/runtime/copy.c index ef3fd223..6bb22762 100644 --- a/runtime/copy.c +++ b/runtime/copy.c @@ -1,6 +1,6 @@ /* -*- linux-c -*- * Copy from user space functions - * Copyright (C) 2005, 2006, 2007 Red Hat Inc. + * Copyright (C) 2005-2008 Red Hat Inc. * Copyright (C) 2005 Intel Corporation. * * This file is part of systemtap, and is free software. You can @@ -9,11 +9,10 @@ * later version. */ -#ifndef _COPY_C_ /* -*- linux-c -*- */ +#ifndef _COPY_C_ /* -*- linux-c -*- */ #define _COPY_C_ #include "string.c" - /** @file copy.c * @brief Functions to copy from user space. */ @@ -26,6 +25,28 @@ * @{ */ +/** Safely read from userspace or kernelspace. + * On success, returns 0. Returns -EFAULT on error. + * + * This uses __get_user() to read from userspace or + * kernelspace. Will not sleep or cause pagefaults when + * called from within a kprobe context. + * + * @param segment . KERNEL_DS for kernel access + * USER_DS for userspace. + */ + +#define _stp_read_address(x, ptr, segment) \ + ({ \ + long ret; \ + mm_segment_t ofs = get_fs(); \ + set_fs(segment); \ + ret = __stp_get_user(x, ptr); \ + set_fs(ofs); \ + ret; \ + }) + + long _stp_strncpy_from_user(char *dst, const char __user *src, long count); //static long __stp_strncpy_from_user(char *dst, const char __user *src, long count); @@ -110,8 +131,7 @@ do { \ * <i>count</i> bytes and returns <i>count</i>. */ -long -_stp_strncpy_from_user(char *dst, const char __user *src, long count) +long _stp_strncpy_from_user(char *dst, const char __user *src, long count) { long res = -EFAULT; if (access_ok(VERIFY_READ, src, count)) @@ -119,7 +139,6 @@ _stp_strncpy_from_user(char *dst, const char __user *src, long count) return res; } - /** Copy a block of data from user space. * * If some data could not be copied, this function will pad the copied @@ -133,8 +152,7 @@ _stp_strncpy_from_user(char *dst, const char __user *src, long count) * */ -unsigned long -_stp_copy_from_user (char *dst, const char __user *src, unsigned long count) +unsigned long _stp_copy_from_user(char *dst, const char __user *src, unsigned long count) { if (count) { if (access_ok(VERIFY_READ, src, count)) diff --git a/runtime/debug.h b/runtime/debug.h index 8f877ede..9b2fe5c5 100644 --- a/runtime/debug.h +++ b/runtime/debug.h @@ -14,6 +14,8 @@ * _dbug() writes to systemtap stderr. * errk() writes to the system log. */ +int _stp_transport_state = 0; + #define _dbug(args...) _stp_dbug(__FUNCTION__, __LINE__, args) #define errk(args...) do { \ @@ -21,46 +23,42 @@ printk(args); \ } while (0) -#ifdef DEBUG_TRANSPORT -#undef DEBUG_TRANSPORT -#define DEBUG_TRANSPORT 1 -#else -#define DEBUG_TRANSPORT 0 -#endif +/* + * To use these, enable them from the command line when compiling. + * For example, "stap -DDEBUG_UNWIND=3" + * will activate dbug_unwind() and print messages with level <= 3. + */ -#ifdef DEBUG_UNWIND -#undef DEBUG_UNWIND -#define DEBUG_UNWIND 2 -#else -#define DEBUG_UNWIND 0 -#endif +/* Note: DEBUG_MEM is implemented in alloc.c */ -#ifdef DEBUG_SYMBOLS -#undef DEBUG_SYMBOLS -#define DEBUG_SYMBOLS 4 +#ifdef DEBUG_TRANS /* transport */ +/* Note: transport is debugged using printk() */ +#define dbug_trans(level, args...) do { \ + if ((level) <= DEBUG_TRANS) { \ + printk("%s:%d ",__FUNCTION__, __LINE__); \ + printk(args); \ + } \ + } while (0) #else -#define DEBUG_SYMBOLS 0 +#define dbug_trans(level, args...) ; #endif -#define DEBUG_TYPE (DEBUG_TRANSPORT|DEBUG_UNWIND|DEBUG_SYMBOLS) - -#if DEBUG_TYPE > 0 - -#define dbug(type, args...) do { \ - if ((type) & DEBUG_TYPE) \ +#ifdef DEBUG_UNWIND /* stack unwinder */ +#define dbug_unwind(level, args...) do { \ + if ((level) <= DEBUG_UNWIND) \ _stp_dbug(__FUNCTION__, __LINE__, args); \ } while (0) +#else +#define dbug_unwind(level, args...) ; +#endif -#define kbug(type, args...) do { \ - if ((type) & DEBUG_TYPE) { \ - printk("%s:%d ",__FUNCTION__, __LINE__); \ - printk(args); \ - } \ +#ifdef DEBUG_SYMBOLS +#define dbug_sym(level, args...) do { \ + if ((level) <= DEBUG_SYMBOLS) \ + _stp_dbug(__FUNCTION__, __LINE__, args); \ } while (0) - #else -#define dbug(type, args...) ; -#define kbug(type, args...) ; -#endif /* DEBUG_TYPE > 0 */ +#define dbug_sym(level, args...) ; +#endif #endif /* _STP_DEBUG_H_ */ diff --git a/runtime/map.c b/runtime/map.c index 513e27df..a436d7ed 100644 --- a/runtime/map.c +++ b/runtime/map.c @@ -15,7 +15,6 @@ * @brief Implements maps (associative arrays) and lists */ -#include "alloc.c" #include "sym.c" #include "stat-common.c" #include "map-stat.c" diff --git a/runtime/print.c b/runtime/print.c index 0442ba09..14a0820b 100644 --- a/runtime/print.c +++ b/runtime/print.c @@ -243,7 +243,6 @@ void _stp_print_char (const char c) pb->len ++; } - /* This function is used when printing maps or stats. */ /* Probably belongs elsewhere, but is here for now. */ /* It takes a format specification like those used for */ diff --git a/runtime/print_new.c b/runtime/print_new.c index 75bbd82b..07af2e33 100644 --- a/runtime/print_new.c +++ b/runtime/print_new.c @@ -40,11 +40,13 @@ void EXPORT_FN(stp_print_flush) (_stp_pbuf *pb) else atomic_inc (&_stp_transport_failures); #else - struct _stp_trace *t = relay_reserve(_stp_utt->rchan, sizeof(*t) + len); - if (likely(t)) { - t->sequence = _stp_seq_inc(); - t->pdu_len = len; - memcpy((void *) t + sizeof(*t), pb->buf, len); + void *buf = relay_reserve(_stp_utt->rchan, + sizeof(struct _stp_trace) + len); + if (likely(buf)) { + struct _stp_trace t = { .sequence = _stp_seq_inc(), + .pdu_len = len}; + memcpy(buf, &t, sizeof(t)); // prevent unaligned access + memcpy(buf + sizeof(t), pb->buf, len); } else atomic_inc (&_stp_transport_failures); #endif diff --git a/runtime/print_old.c b/runtime/print_old.c index 5ee050b5..5c117e5f 100644 --- a/runtime/print_old.c +++ b/runtime/print_old.c @@ -35,11 +35,13 @@ void EXPORT_FN(stp_print_flush) (_stp_pbuf *pb) else atomic_inc (&_stp_transport_failures); #else - struct _stp_trace *t = relay_reserve(_stp_utt->rchan, sizeof(*t) + len); - if (likely(t)) { - t->sequence = _stp_seq_inc(); - t->pdu_len = len; - memcpy((void *) t + sizeof(*t), pb->buf, len); + void *buf = relay_reserve(_stp_utt->rchan, + sizeof(struct _stp_trace) + len); + if (likely(buf)) { + struct _stp_trace t = { .sequence = _stp_seq_inc(), + .pdu_len = len}; + memcpy(buf, &t, sizeof(t)); // prevent unaligned access + memcpy(buf + sizeof(t), pb->buf, len); } else atomic_inc (&_stp_transport_failures); #endif diff --git a/runtime/probes.c b/runtime/probes.c index 19539044..6fe844fb 100644 --- a/runtime/probes.c +++ b/runtime/probes.c @@ -25,7 +25,7 @@ void _stp_unregister_jprobes (struct jprobe *probes, int num_probes) int i; for (i = 0; i < num_probes; i++) unregister_jprobe(&probes[i]); - dbug("All jprobes removed\n"); + // dbug("All jprobes removed\n"); } /** Register a group of jprobes. @@ -46,7 +46,7 @@ int _stp_register_jprobes (struct jprobe *probes, int num_probes) ret = -1; /* FIXME */ goto out; } - dbug("inserting jprobe at %s (%p)\n", probes[i].kp.addr, addr); + // dbug("inserting jprobe at %s (%p)\n", probes[i].kp.addr, addr); probes[i].kp.addr = (kprobe_opcode_t *)addr; ret = register_jprobe(&probes[i]); if (ret) @@ -69,7 +69,7 @@ void _stp_unregister_kprobes (struct kprobe *probes, int num_probes) int i; for (i = 0; i < num_probes; i++) unregister_kprobe(&probes[i]); - dbug("All kprobes removed\n"); + // dbug("All kprobes removed\n"); } @@ -83,7 +83,7 @@ void _stp_unregister_kretprobes (struct kretprobe *probes, int num_probes) int i; for (i = 0; i < num_probes; i++) unregister_kretprobe(&probes[i]); - dbug("All return probes removed\n"); + // dbug("All return probes removed\n"); } #endif @@ -104,7 +104,7 @@ int _stp_register_kprobes (struct kprobe *probes, int num_probes) ret = -1; goto out; } - dbug("inserting kprobe at %s (%p)\n", probes[i].addr, addr); + // dbug("inserting kprobe at %s (%p)\n", probes[i].addr, addr); probes[i].addr = (kprobe_opcode_t *)addr; ret = register_kprobe(&probes[i]); if (ret) @@ -136,7 +136,7 @@ int _stp_register_kretprobes (struct kretprobe *probes, int num_probes) ret = -1; /* FIXME */ goto out; } - dbug("inserting kretprobe at %s (%p)\n", probes[i].kp.addr, addr); + // dbug("inserting kretprobe at %s (%p)\n", probes[i].kp.addr, addr); probes[i].kp.addr = (kprobe_opcode_t *)addr; ret = register_kretprobe(&probes[i]); if (ret) diff --git a/runtime/regs.c b/runtime/regs.c index 2daeaa3c..5821f7e7 100644 --- a/runtime/regs.c +++ b/runtime/regs.c @@ -383,317 +383,60 @@ void _stp_print_regs(struct pt_regs * regs) #endif -/* - * (Theoretically) arch-independent scheme for binary lookup of register - * values (from pt_regs) by register name. A register may be called by - * more than one name. - */ -struct _stp_register_desc { - const char *name; - unsigned short size; // in bytes - unsigned short offset; // in bytes, from start of pt_regs -}; - -struct _stp_register_table { - struct _stp_register_desc *registers; - unsigned nr_registers; - unsigned nr_slots; // capacity -}; - -static DEFINE_SPINLOCK(_stp_register_table_lock); -static void _stp_populate_register_table(void); - -/* - * If the named register is in the list, return its slot number and *found=1. - * Else *found=0 and return the slot number where the name should be inserted. - */ -static int _stp_lookup_register(const char *name, - struct _stp_register_table *table, int *found) -{ - unsigned begin, mid, end; - - *found = 0; - end = table->nr_registers; - if (end == 0) - return 0; - begin = 0; - mid = -1; - for (;;) { - int cmp; - int prev_mid = mid; - mid = (begin + end) / 2; - if (mid == prev_mid) - break; - cmp = strcmp(name, table->registers[mid].name); - if (cmp == 0) { - *found = 1; - return mid; - } else if (cmp < 0) - end = mid; - else - begin = mid; - } - if (begin == 0 && strcmp(name, table->registers[0].name) < 0) - return 0; - return begin + 1; -} - -/* - * If found, return 1 and the size and/or offset in the pt_regs array. - * Else return 0. - */ -static int _stp_find_register(const char *name, - struct _stp_register_table *table, size_t *size, size_t *offset) -{ - int slot, found; - if (unlikely(table->nr_registers == 0)) { - unsigned long flags; - /* - * Should we do this at the beginning of time to avoid - * the possibility of spending too long in a handler? - */ - spin_lock_irqsave(&_stp_register_table_lock, flags); - if (table->nr_registers == 0) - _stp_populate_register_table(); - spin_unlock_irqrestore(&_stp_register_table_lock, flags); - } - slot = _stp_lookup_register(name, table, &found); - if (found) { - if (size) - *size = table->registers[slot].size; - if (offset) - *offset = table->registers[slot].offset; - return 1; - } - return 0; -} - -/* - * Add name to the register-lookup table. Note that the name pointer - * is merely copied, not strdup-ed. - */ -void _stp_add_register(const char *name, struct _stp_register_table *table, - size_t size, size_t offset) -{ - int idx, found; - struct _stp_register_desc *slot; - - idx = _stp_lookup_register(name, table, &found); - if (found) - _stp_error("stap runtime internal error: " - "register name %s used twice\n", name); - if (table->nr_registers >= table->nr_slots) - _stp_error("stap runtime internal error: " - "register table overflow\n"); - slot = &table->registers[idx]; - - // Move the slots later in the array out of the way. - if (idx < table->nr_registers) - memmove(slot+1, slot, - sizeof(*slot) * (table->nr_registers - idx)); - table->nr_registers++; - slot->name = name; - slot->size = size; - slot->offset = offset; -} - -#if defined(__i386__) || defined(__x86_64__) -/* - * This register set is used for i386 kernel and apps, and for 32-bit apps - * running on x86_64. For the latter case, this allows the user to use - * things like reg("eax") as well as the standard x86_64 pt_regs names. - */ - -/* - * x86_64 and i386 are especially ugly because the pt_reg member names - * changed as part of the x86 merge. We allow (and use, as needed) - * either the pre-merge name or the post-merge name. - */ - -// I count 32 different names, but add a fudge factor. -static struct _stp_register_desc i386_registers[32+8]; -static struct _stp_register_table i386_register_table = { - .registers = i386_registers, - .nr_slots = ARRAY_SIZE(i386_registers) -}; - -/* - * sizeof(long) is indeed what we want here, for both i386 and x86_64. - * Unlike function args, x86_64 pt_regs is the same even if the int3 - * was in an -m32 app. - */ -#define ADD_PT_REG(name, member) \ - _stp_add_register(name, &i386_register_table, \ - sizeof(long), offsetof(struct pt_regs, member)) -#define ADD2NAMES(nm1, nm2, member) \ - do { \ - ADD_PT_REG(nm1, member); \ - ADD_PT_REG(nm2, member); \ - } while (0) - -#ifdef STAPCONF_X86_UNIREGS -/* Map "ax" and "eax" to regs->ax, and "cs" and "xcs" to regs->cs */ -#define ADD_EREG(nm) ADD2NAMES(#nm, "e" #nm, nm) -#define ADD_XREG(nm) ADD2NAMES(#nm, "x" #nm, nm) -#define ADD_FLAGS_REG() ADD_EREG(flags) -#define EREG(nm, regs) ((regs)->nm) -#define RREG(nm, regs) ((regs)->nm) +/* Function arguments */ -#else /* ! STAPCONF_X86_UNIREGS */ +#define _STP_REGPARM 0x8000 +#define _STP_REGPARM_MASK ((_STP_REGPARM) - 1) -#ifdef __i386__ -#define ADD_EREG(nm) ADD2NAMES(#nm, "e" #nm, e##nm) -#define ADD_XREG(nm) ADD2NAMES(#nm, "x" #nm, x##nm) -#define ADD_FLAGS_REG() ADD_EREG(flags) -#define EREG(nm, regs) ((regs)->e##nm) -#else /* __x86_64__ */ /* - * Map "eax" to regs->rax and "xcs" to regs->cs. Other mappings are - * handled in x86_64_register_table. + * x86_64 and i386 are especially ugly because: + * 1) the pt_reg member names changed as part of the x86 merge. We use + * either the pre-merge name or the post-merge name, as needed. + * 2) -m32 apps on x86_64 look like i386 apps, so we need to support + * those semantics on both i386 and x86_64. */ -#define ADD_EREG(nm) ADD_PT_REG("e" #nm, r##nm) -#define ADD_XREG(nm) ADD_PT_REG("x" #nm, nm) -#define ADD_FLAGS_REG() ADD2NAMES("flags", "eflags", eflags) -/* Note: After a store to %eax, %rax holds the ZERO-extended %eax. */ -#define EREG(nm, regs) ((regs)->r##nm) -#define RREG(nm, regs) ((regs)->r##nm) -#endif /* __x86_64__ */ - -#endif /* ! STAPCONF_X86_UNIREGS */ -static void _stp_populate_i386_register_table(void) -{ - /* - * The order here is the same as in i386 struct pt_regs. - * It's a different order from x86_64 pt_regs; but that doesn't - * matter -- even when compiling for x86_64 -- because the - * offsets are determined by offsetof(), not the calling order. - */ - ADD_EREG(bx); - ADD_EREG(cx); - ADD_EREG(dx); - ADD_EREG(si); - ADD_EREG(di); - ADD_EREG(bp); - ADD_EREG(ax); #ifdef __i386__ - ADD_XREG(ds); - ADD_XREG(es); - ADD_XREG(fs); - /* gs not saved */ -#endif #ifdef STAPCONF_X86_UNIREGS - ADD2NAMES("orig_ax", "orig_eax", orig_ax); +#define EREG(nm, regs) ((regs)->nm) #else -#ifdef __i386__ - ADD2NAMES("orig_ax", "orig_eax", orig_eax); -#else /* __x86_64__ */ - ADD2NAMES("orig_ax", "orig_eax", orig_rax); +#define EREG(nm, regs) ((regs)->e##nm) #endif -#endif /* STAPCONF_X86_UNIREGS */ - ADD_EREG(ip); - ADD_XREG(cs); - ADD_FLAGS_REG(); - ADD_EREG(sp); - ADD_XREG(ss); -} -/* - * For x86_64, this gets a copy of the saved 64-bit register (e.g., regs->rax). - * After a store to %eax, %rax holds the ZERO-extended %eax. - */ -static long -_stp_get_reg32_by_name(const char *name, struct pt_regs *regs) +static long _stp_get_sp(struct pt_regs *regs) { - size_t offset = 0; - long value; // works for i386 or x86_64 - BUG_ON(!name); - if (!regs) - _stp_error("Register values not available in this context.\n"); -#ifdef __i386__ - if (!user_mode(regs)) { - /* esp and ss aren't saved on trap from kernel mode. */ - if (!strcmp(name,"esp") || !strcmp(name, "sp")) - return (long) &EREG(sp, regs); - if (!strcmp(name,"xss") || !strcmp(name, "ss")) { - /* - * Assume ss register hasn't changed since we took - * the trap. - */ - unsigned short ss; - asm volatile("movw %%ss, %0" : : "m" (ss)); - return ss; - } - } -#endif - if (!_stp_find_register(name, &i386_register_table, NULL, &offset)) - _stp_error("Unknown register name: %s\n", name); - (void) memcpy(&value, ((char*)regs) + offset, sizeof(value)); - return value; + if (!user_mode(regs)) + return (long) &EREG(sp, regs); + return EREG(sp, regs); } -#endif /* __i386__ || __x86_64__ */ - -#ifdef __i386__ -static void _stp_populate_register_table(void) +static int _stp_get_regparm(int regparm, struct pt_regs *regs) { - _stp_populate_i386_register_table(); + if (regparm == 0) { + /* Default */ + if (user_mode(regs)) + return 0; + else + // Kernel is built with -mregparm=3. + return 3; + } else + return (regparm & _STP_REGPARM_MASK); } #endif /* __i386__ */ #ifdef __x86_64__ -// I count 32 different names (not the same 32 as i386), but add a fudge factor. -static struct _stp_register_desc x86_64_registers[32+8]; -static struct _stp_register_table x86_64_register_table = { - .registers = x86_64_registers, - .nr_slots = ARRAY_SIZE(x86_64_registers) -}; - -/* NB: Redefining ADD_PT_REG here. ADD2NAMES and such change accordingly. */ -#undef ADD_PT_REG -#define ADD_PT_REG(name, member) \ - _stp_add_register(name, &x86_64_register_table, \ - sizeof(unsigned long), offsetof(struct pt_regs, member)) - -#define ADD_NREG(nm) ADD_PT_REG(#nm, nm) - #ifdef STAPCONF_X86_UNIREGS -#define ADD_RREG(nm) ADD2NAMES(#nm, "r" #nm, nm) +#define EREG(nm, regs) ((regs)->nm) +#define RREG(nm, regs) ((regs)->nm) #else -#define ADD_RREG(nm) ADD2NAMES(#nm, "r" #nm, r##nm) +#define EREG(nm, regs) ((regs)->r##nm) +#define RREG(nm, regs) ((regs)->r##nm) #endif -static void _stp_populate_register_table(void) +static long _stp_get_sp(struct pt_regs *regs) { - /* Same order as in struct pt_regs */ - ADD_NREG(r15); - ADD_NREG(r14); - ADD_NREG(r13); - ADD_NREG(r12); - ADD_RREG(bp); - ADD_RREG(bx); - ADD_NREG(r11); - ADD_NREG(r10); - ADD_NREG(r9); - ADD_NREG(r8); - ADD_RREG(ax); - ADD_RREG(cx); - ADD_RREG(dx); - ADD_RREG(si); - ADD_RREG(di); -#ifdef STAPCONF_X86_UNIREGS - ADD2NAMES("orig_ax", "orig_rax", orig_ax); -#else - ADD2NAMES("orig_ax", "orig_rax", orig_rax); -#endif - ADD_RREG(ip); - ADD_NREG(cs); - ADD_FLAGS_REG(); - ADD_RREG(sp); - ADD_NREG(ss); - - _stp_populate_i386_register_table(); + return RREG(sp, regs); } static int _stp_probing_32bit_app(struct pt_regs *regs) @@ -704,54 +447,26 @@ static int _stp_probing_32bit_app(struct pt_regs *regs) } /* Ensure that the upper 32 bits of val are a sign-extension of the lower 32. */ -static long _stp_sign_extend32(long val) +static int64_t __stp_sign_extend32(int64_t val) { int32_t *val_ptr32 = (int32_t*) &val; return *val_ptr32; } -/* - * Get the value of the 64-bit register with the specified name. "rax", - * "ax", and "eax" all get you regs->[r]ax. Sets *reg32=1 if the name - * designates a 32-bit register (e.g., "eax"), 0 otherwise. - */ -static unsigned long -_stp_get_reg64_by_name(const char *name, struct pt_regs *regs, int *reg32) +static int _stp_get_regparm(int regparm, struct pt_regs *regs) { - size_t offset = 0; - unsigned long value; - BUG_ON(!name); - if (!regs) { - _stp_error("Register values not available in this context.\n"); - return 0; - } - if (_stp_find_register(name, &x86_64_register_table, NULL, &offset)) { - if (reg32) - *reg32 = 0; - (void) memcpy(&value, ((char*)regs) + offset, sizeof(value)); - return value; - } - if (reg32) - *reg32 = 1; - return _stp_get_reg32_by_name(name, regs); + if (regparm == 0) { + /* Default */ + if (_stp_probing_32bit_app(regs)) + return 0; + else + return 6; + } else + return (regparm & _STP_REGPARM_MASK); } -#endif /* __x86_64__ */ - -/* Function arguments */ - -#define _STP_REGPARM 0x8000 -#define _STP_REGPARM_MASK ((_STP_REGPARM) - 1) +#endif /* __x86_64__ */ #if defined(__i386__) || defined(__x86_64__) -static long _stp_get_sp(struct pt_regs *regs) -{ -#ifdef __i386__ - if (!user_mode(regs)) - return (long) &EREG(sp, regs); -#endif - return EREG(sp, regs); -} - /* * Use this for i386 kernel and apps, and for 32-bit apps running on x86_64. * Does arch-specific work for fetching function arg #argnum (1 = first arg). @@ -792,21 +507,6 @@ static int _stp_get_arg32_by_number(int n, int nr_regargs, } #endif /* __i386__ || __x86_64__ */ -#ifdef __i386__ -static int _stp_get_regparm(int regparm, struct pt_regs *regs) -{ - if (regparm == 0) { - /* Default */ - if (user_mode(regs)) - return 0; - else - // Kernel is built with -mregparm=3. - return 3; - } else - return (regparm & _STP_REGPARM_MASK); -} -#endif - #ifdef __x86_64__ /* See _stp_get_arg32_by_number(). */ static int _stp_get_arg64_by_number(int n, int nr_regargs, @@ -835,18 +535,6 @@ static int _stp_get_arg64_by_number(int n, int nr_regargs, return 0; } } - -static int _stp_get_regparm(int regparm, struct pt_regs *regs) -{ - if (regparm == 0) { - /* Default */ - if (_stp_probing_32bit_app(regs)) - return 0; - else - return 6; - } else - return (regparm & _STP_REGPARM_MASK); -} #endif /* __x86_64__ */ /** @} */ diff --git a/runtime/regs.h b/runtime/regs.h index c1e2344b..4954020f 100644 --- a/runtime/regs.h +++ b/runtime/regs.h @@ -1,5 +1,5 @@ /* common register includes used in multiple modules - * Copyright (C) 2005 Red Hat Inc. + * Copyright (C) 2005-2008 Red Hat Inc. * Copyright (C) 2005 Intel Corporation. * * This file is part of systemtap, and is free software. You can @@ -14,6 +14,7 @@ #if defined (STAPCONF_X86_UNIREGS) && (defined (__x86_64__) || defined (__i386__)) #define REG_IP(regs) regs->ip #define REG_SP(regs) regs->sp +#define REG_FP(regs) regs->bp; #elif defined (__x86_64__) @@ -24,6 +25,7 @@ #define REG_IP(regs) regs->eip #define REG_SP(regs) regs->esp +#define REG_FP(regs) regs->ebp; #elif defined (__ia64__) #define REG_IP(regs) ((regs)->cr_iip +ia64_psr(regs)->ri) diff --git a/runtime/runtime.h b/runtime/runtime.h index 318d3038..2711f531 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -58,25 +58,32 @@ static struct #define _stp_seq_inc() (atomic_inc_return(&_stp_seq.seq)) -/* TEST_MODE is always defined by systemtap */ -#ifdef TEST_MODE -#define SYSTEMTAP 1 -#else -#define MAXTRYLOCK 1000 -#define TRYLOCKDELAY 100 -#endif - #ifndef MAXSTRINGLEN #define MAXSTRINGLEN 128 #endif +#ifndef MAXTRACE +#define MAXTRACE 20 +#endif + +#ifdef CONFIG_FRAME_POINTER +/* Just because frame pointers are available does not mean we can trust them. */ +#if defined (__i386__) || defined (__arm__) +#define STP_USE_FRAME_POINTER +#endif +#endif + +/* dwarf unwinder only tested so far on i386 and x86_64 */ +#if !defined(STP_USE_FRAME_BUFFER) && (defined(__i386__) || defined(__x86_64__)) +#define STP_USE_DWARF_UNWINDER +#endif + #include "alloc.c" #include "print.c" #include "string.c" #include "io.c" #include "arith.c" #include "copy.c" -#include "sym.h" #include "sym.c" #ifdef STP_PERFMON #include "perf.c" diff --git a/runtime/stack-arm.c b/runtime/stack-arm.c index 0c8ce450..9b0b772d 100644 --- a/runtime/stack-arm.c +++ b/runtime/stack-arm.c @@ -33,7 +33,7 @@ static int __init find_str_pc_offset(void) static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels) { -#if defined(CONFIG_FRAME_POINTER) +#ifdef STP_USE_FRAME_POINTER int pc_offset = find_str_pc_offset(); unsigned long *fp = (unsigned long *)regs->ARM_fp; unsigned long *next_fp, *pc; @@ -68,5 +68,5 @@ static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels) fp = next_fp; } -#endif +#endif /* STP_USE_FRAME_POINTER */ } diff --git a/runtime/stack-i386.c b/runtime/stack-i386.c index b46ff06b..ad101889 100644 --- a/runtime/stack-i386.c +++ b/runtime/stack-i386.c @@ -8,40 +8,68 @@ * later version. */ -static inline int _stp_valid_stack_ptr(unsigned long context, unsigned long p) +static int _stp_valid_stack_ptr(unsigned long context, unsigned long p) { return p > context && p < context + THREAD_SIZE - 3; } +/* DWARF unwinder failed. Just dump intereting addresses on kernel stack. */ +static void _stp_stack_print_fallback(unsigned long context, unsigned long stack, int verbose, int levels) +{ + unsigned long addr; + while (levels && _stp_valid_stack_ptr(context, stack)) { + if (unlikely(_stp_read_address(addr, (unsigned long *)stack, KERNEL_DS))) { + /* cannot access stack. give up. */ + return; + } + if (_stp_func_print(addr, verbose, 0)) + levels--; + stack++; + } +} + static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels) { - unsigned long *stack = (unsigned long *)®_SP(regs); - unsigned long context = (unsigned long)stack & ~(THREAD_SIZE - 1); + unsigned long context = (unsigned long)®_SP(regs) & ~(THREAD_SIZE - 1); + +#ifdef STP_USE_FRAME_POINTER unsigned long addr; + unsigned long next_fp, fp = REG_FP(regs); -#ifdef CONFIG_FRAME_POINTER - { - #ifdef STAPCONF_X86_UNIREGS - unsigned long ebp = regs->bp; - #else - unsigned long ebp = regs->ebp; - #endif - - while (_stp_valid_stack_ptr(context, (unsigned long)ebp)) { - addr = *(unsigned long *)(ebp + 4); - if (verbose) { - _stp_print_char(' '); - _stp_symbol_print (addr); - _stp_print_char('\n'); - } else - _stp_printf ("0x%08lx ", addr); - ebp = *(unsigned long *)ebp; + while (levels && _stp_valid_stack_ptr(context, (unsigned long)fp)) { + if (unlikely(_stp_read_address(addr, (unsigned long *)(fp + 4), KERNEL_DS))) { + /* cannot access stack. give up. */ + return; + } + _stp_func_print(addr, verbose, 1); + if (unlikely(_stp_read_address(next_fp, (unsigned long *)fp, KERNEL_DS))) { + /* cannot access stack. give up. */ + return; } + levels--; + + /* frame pointers move upwards */ + if (next_fp <= fp) + break; + fp = next_fp; } #else - while (_stp_valid_stack_ptr(context, (unsigned long)stack)) { - addr = *stack++; - _stp_func_print(addr, verbose, 1); + struct unwind_frame_info info; + arch_unw_init_frame_info(&info, regs); + + while (levels && !arch_unw_user_mode(&info)) { + int ret = unwind(&info); + dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(&info), UNW_SP(&info)); + if (ret == 0) { + _stp_func_print(UNW_PC(&info), verbose, 1); + levels--; + continue; + } + /* If an error happened or we hit a kretprobe trampoline, use fallback backtrace */ + /* FIXME: is there a way to unwind across kretprobe trampolines? */ + if (ret < 0 || (ret > 0 && UNW_PC(&info) == _stp_kretprobe_trampoline)) + _stp_stack_print_fallback(context, UNW_SP(&info), verbose, levels); + break; } -#endif +#endif /* STP_USE_FRAME_POINTER */ } diff --git a/runtime/stack-x86_64.c b/runtime/stack-x86_64.c index 186b2ad4..783e72bd 100644 --- a/runtime/stack-x86_64.c +++ b/runtime/stack-x86_64.c @@ -1,6 +1,6 @@ /* -*- linux-c -*- * x86_64 stack tracing functions - * Copyright (C) 2005, 2006, 2007 Red Hat Inc. + * Copyright (C) 2005-2008 Red Hat Inc. * * This file is part of systemtap, and is free software. You can * redistribute it and/or modify it under the terms of the GNU General @@ -8,13 +8,43 @@ * later version. */ -static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels) +/* DWARF unwinder failed. Just dump intereting addresses on kernel stack. */ +static void _stp_stack_print_fallback(unsigned long stack, int verbose, int levels) { - unsigned long *stack = (unsigned long *)REG_SP(regs); unsigned long addr; + while (levels && stack & (THREAD_SIZE - 1)) { + if (unlikely(_stp_read_address(addr, (unsigned long *)stack, KERNEL_DS))) { + /* cannot access stack. give up. */ + return; + } + if (_stp_func_print(addr, verbose, 0)) + levels--; + stack++; + } +} + +static void __stp_stack_print(struct pt_regs *regs, int verbose, int levels) +{ +#ifdef STP_USE_DWARF_UNWINDER + // FIXME: large stack allocation + struct unwind_frame_info info; + arch_unw_init_frame_info(&info, regs); - while ((long)stack & (THREAD_SIZE-1)) { - addr = *stack++; - _stp_func_print(addr, verbose, 1); + while (levels && !arch_unw_user_mode(&info)) { + int ret = unwind(&info); + dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(&info), UNW_SP(&info)); + if (ret == 0) { + _stp_func_print(UNW_PC(&info), verbose, 1); + levels--; + continue; + } + /* If an error happened or we hit a kretprobe trampoline, use fallback backtrace */ + /* FIXME: is there a way to unwind across kretprobe trampolines? */ + if (ret < 0 || (ret > 0 && UNW_PC(&info) == _stp_kretprobe_trampoline)) + _stp_stack_print_fallback(UNW_SP(&info), verbose, levels); + break; } +#else /* ! STP_USE_DWARF_UNWINDER */ + _stp_stack_print_fallback(REG_SP(regs), verbose); +#endif } diff --git a/runtime/stack.c b/runtime/stack.c index 9c01d65c..23ac2edc 100644 --- a/runtime/stack.c +++ b/runtime/stack.c @@ -1,6 +1,6 @@ /* -*- linux-c -*- * Stack tracing functions - * Copyright (C) 2005, 2006, 2007 Red Hat Inc. + * Copyright (C) 2005-2008 Red Hat Inc. * Copyright (C) 2005 Intel Corporation. * * This file is part of systemtap, and is free software. You can @@ -23,6 +23,7 @@ #include "sym.c" #include "regs.h" +#include "unwind.c" #define MAXBACKTRACE 20 @@ -46,7 +47,7 @@ * @param regs A pointer to the struct pt_regs. */ -void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe_instance *pi) +void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe_instance *pi, int levels) { if (verbose) { /* print the current address */ @@ -57,12 +58,15 @@ void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe_instan _stp_symbol_print((unsigned long)_stp_ret_addr_r(pi)); } else { _stp_print_char(' '); - _stp_symbol_print (REG_IP(regs)); + _stp_symbol_print(REG_IP(regs)); } _stp_print_char('\n'); - } else - _stp_printf ("%p ", (int64_t)REG_IP(regs)); - __stp_stack_print (regs, verbose, 0); + } else if (pi) + _stp_printf("%p %p ", (int64_t)(long)_stp_ret_addr_r(pi), (int64_t) REG_IP(regs)); + else + _stp_printf("%p ", (int64_t) REG_IP(regs)); + + __stp_stack_print(regs, verbose, levels); } /** Writes stack backtrace to a string @@ -71,31 +75,30 @@ void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe_instan * @param regs A pointer to the struct pt_regs. * @returns void */ -void _stp_stack_snprint (char *str, int size, struct pt_regs *regs, int verbose, struct kretprobe_instance *pi) +void _stp_stack_snprint(char *str, int size, struct pt_regs *regs, int verbose, struct kretprobe_instance *pi, int levels) { /* To get a string, we use a simple trick. First flush the print buffer, */ /* then call _stp_stack_print, then copy the result into the output string */ /* and clear the print buffer. */ _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); _stp_print_flush(); - _stp_stack_print(regs, verbose, pi); + _stp_stack_print(regs, verbose, pi, levels); strlcpy(str, pb->buf, size < (int)pb->len ? size : (int)pb->len); pb->len = 0; } - /** Prints the user stack backtrace * @param str string * @returns Same string as was input with trace info appended, * @note Currently limited to a depth of two. Works from jprobes and kprobes. */ #if 0 -void _stp_ustack_print (char *str) +void _stp_ustack_print(char *str) { - struct pt_regs *nregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) current->thread_info)) - 1; - _stp_printf ("%p : [user]\n", (int64_t)REG_IP(nregs)); + struct pt_regs *nregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)current->thread_info)) - 1; + _stp_printf("%p : [user]\n", (int64_t) REG_IP(nregs)); if (REG_SP(nregs)) - _stp_printf ("%p : [user]\n", (int64_t)(*(unsigned long *)REG_SP(nregs))); + _stp_printf("%p : [user]\n", (int64_t) (*(unsigned long *)REG_SP(nregs))); } #endif /* 0 */ diff --git a/runtime/staprun/ChangeLog b/runtime/staprun/ChangeLog index e9ef2e2d..969c299d 100644 --- a/runtime/staprun/ChangeLog +++ b/runtime/staprun/ChangeLog @@ -1,3 +1,60 @@ +2008-05-05 Martin Hunt <hunt@redhat.com> + + * mainloop.c (child_proc): Handle sig_chld + in the proper thread. + (signal_thread): Don't call send_request() + because it isn't thread-safe. + +2008-05-05 Martin Hunt <hunt@redhat.com> + + * mainloop.c (signal_thread): New thread to handle signals + better. + (setup_main_signals): Create signal thread. + +2008-04-30 Masami Hiramatsu <mhiramat@redhat.com> + + PR 6008 + * common.c (parse_args): Increase the limitation of the buffer size + to 4095MB. + * common.c (usage): Ditto. + +2008-04-30 Masami Hiramatsu <mhiramat@redhat.com> + + * stapio.c (main): Fix a typo in a debug message. + * staprun.c (main): Ditto. + +2008-04-24 Frank Ch. Eigler <fche@elastic.org> + + PR 6451. + * common.c (control_channel): Initialize to -1. + * ctl.c (close_ctl_channel): Tolerate fd=0. + +2008-04-22 Martin Hunt <hunt@redhat.com> + + * cap.c (init_cap): Detect capabilities failure and + run with them disabled. + +2008-04-22 Martin Hunt <hunt@redhat.com> + + * mainloop.c (send_request): Move here from common.c + staprun no longer send any messages. + +2008-04-22 hunt <hunt@redhat.com> + + * common.c (usage): Add -d option. + +2008-04-21 Martin Hunt <hunt@redhat.com> + + * staprun.c, stapio.c, staprun.h, mainloop.c, staprun_funcs.c, + ctl.c, common.c: Add "-d" option to have staprun remove + modules. Have staprun exec stapio and then have stapio + exec "staprun -d" to remove the module when finished. + +2008-04-16 Martin Hunt <hunt@redhat.com> + + * ctl.c (init_ctl_channel): Remove unused parameter. + Just opens one channel now. + 2008-02-21 David Smith <dsmith@redhat.com> * staprun_funcs.c (check_path): Small security fix. diff --git a/runtime/staprun/cap.c b/runtime/staprun/cap.c index 6f22dfc9..6ac6701f 100644 --- a/runtime/staprun/cap.c +++ b/runtime/staprun/cap.c @@ -23,6 +23,8 @@ #include "staprun.h" #include <sys/prctl.h> +static int _stp_no_caps = 0; + /* like perror, but exits */ #define ferror(msg) { \ _perr(msg); \ @@ -54,10 +56,10 @@ * CAP_CHOWN - allows chown */ -int init_cap(void) +void init_cap(void) { cap_t caps = cap_init(); - cap_value_t capv[] = {CAP_SYS_MODULE, CAP_SYS_ADMIN, CAP_SYS_NICE, CAP_SETUID, CAP_SETGID, CAP_DAC_OVERRIDE}; + cap_value_t capv[] = { CAP_SYS_MODULE, CAP_SYS_ADMIN, CAP_SYS_NICE, CAP_SETUID, CAP_SETGID, CAP_DAC_OVERRIDE }; const int numcaps = sizeof(capv) / sizeof(capv[0]); uid_t uid = getuid(); gid_t gid = getgid(); @@ -69,8 +71,11 @@ int init_cap(void) if (cap_set_flag(caps, CAP_PERMITTED, numcaps, capv, CAP_SET) < 0) ferror("cap_set_flag"); - if (cap_set_proc(caps) < 0) - ferror("cap_set_proc"); + if (cap_set_proc(caps) < 0) { + dbug(1, "Setting capabilities failed. Capabilities disabled.\n"); + _stp_no_caps = 1; + return; + } cap_free(caps); @@ -82,8 +87,6 @@ int init_cap(void) if (setresgid(gid, gid, gid) < 0) ferror("setresgid"); - - return 1; } void print_cap(char *text) @@ -97,19 +100,18 @@ void print_cap(char *text) perr("cap_get_proc"); return; } - + getresuid(&uid, &euid, &suid); getresgid(&gid, &egid, &sgid); printf("***** %s\n", text); - if ((p=prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0)) < 0) + if ((p = prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0)) < 0) perr("Couldn't get PR_SET_KEEPCAPS flag value"); - else + else printf("KEEPCAPS: %d\n", p); - printf("uid: %d, euid: %d, suid: %d\ngid: %d. egid: %d, sgid: %d\n", - uid, euid, suid, gid, egid, sgid ); + printf("uid: %d, euid: %d, suid: %d\ngid: %d. egid: %d, sgid: %d\n", uid, euid, suid, gid, egid, sgid); printf("Caps: %s\n", cap_to_text(caps, NULL)); cap_free(caps); printf("*****\n\n"); @@ -121,38 +123,44 @@ void print_cap(char *text) */ void drop_cap(cap_value_t cap) { - cap_t caps = cap_get_proc(); - if (caps == NULL) - ferror("cap_get_proc failed"); - if (cap_set_flag(caps, CAP_PERMITTED, 1, &cap, CAP_CLEAR) < 0) - ferror("Could not clear effective capabilities"); - if (cap_set_proc(caps) < 0) - ferror("Could not apply capability set"); - cap_free(caps); + if (_stp_no_caps == 0) { + cap_t caps = cap_get_proc(); + if (caps == NULL) + ferror("cap_get_proc failed"); + if (cap_set_flag(caps, CAP_PERMITTED, 1, &cap, CAP_CLEAR) < 0) + ferror("Could not clear effective capabilities"); + if (cap_set_proc(caps) < 0) + ferror("Could not apply capability set"); + cap_free(caps); + } } /* add_cap() adds a permitted capability to the effective set. */ void add_cap(cap_value_t cap) { - cap_t caps = cap_get_proc(); - if (caps == NULL) - ferror("cap_get_proc failed"); - if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, CAP_SET) < 0) - ferror("Could not set effective capabilities"); - if (cap_set_proc(caps) < 0) - ferror("Could not apply capability set"); - cap_free(caps); + if (_stp_no_caps == 0) { + cap_t caps = cap_get_proc(); + if (caps == NULL) + ferror("cap_get_proc failed"); + if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, CAP_SET) < 0) + ferror("Could not set effective capabilities"); + if (cap_set_proc(caps) < 0) + ferror("Could not apply capability set"); + cap_free(caps); + } } /* del_cap() deletes a permitted capability from the effective set. */ void del_cap(cap_value_t cap) { - cap_t caps = cap_get_proc(); - if (caps == NULL) - ferror("cap_get_proc failed"); - if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, CAP_CLEAR) < 0) - ferror("Could not clear effective capabilities"); - if (cap_set_proc(caps) < 0) - ferror("Could not apply capability set"); - cap_free(caps); + if (_stp_no_caps == 0) { + cap_t caps = cap_get_proc(); + if (caps == NULL) + ferror("cap_get_proc failed"); + if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, CAP_CLEAR) < 0) + ferror("Could not clear effective capabilities"); + if (cap_set_proc(caps) < 0) + ferror("Could not apply capability set"); + cap_free(caps); + } } diff --git a/runtime/staprun/common.c b/runtime/staprun/common.c index 47778efd..93da51d8 100644 --- a/runtime/staprun/common.c +++ b/runtime/staprun/common.c @@ -22,6 +22,7 @@ unsigned int buffer_size; char *target_cmd; char *outfile_name; int attach_mod; +int delete_mod; int load_only; int need_uprobes; @@ -30,8 +31,7 @@ char *modname = NULL; char *modpath = ""; char *modoptions[MAXMODOPTIONS]; -int initialized = 0; -int control_channel = 0; +int control_channel = -1; /* NB: fd==0 possible */ void parse_args(int argc, char **argv) { @@ -44,10 +44,11 @@ void parse_args(int argc, char **argv) target_cmd = NULL; outfile_name = NULL; attach_mod = 0; + delete_mod = 0; load_only = 0; need_uprobes = 0; - while ((c = getopt(argc, argv, "ALuvb:t:d:c:o:x:")) != EOF) { + while ((c = getopt(argc, argv, "ALuvb:t:dc:o:x:")) != EOF) { switch (c) { case 'u': need_uprobes = 1; @@ -57,8 +58,8 @@ void parse_args(int argc, char **argv) break; case 'b': buffer_size = (unsigned)atoi(optarg); - if (buffer_size < 1 || buffer_size > 64) { - err("Invalid buffer size '%d' (should be 1-64).\n", buffer_size); + if (buffer_size < 1 || buffer_size > 4095) { + err("Invalid buffer size '%d' (should be 1-4095).\n", buffer_size); usage(argv[0]); } break; @@ -67,7 +68,8 @@ void parse_args(int argc, char **argv) target_pid = atoi(optarg); break; case 'd': - /* obsolete internal option used by stap */ + /* delete module */ + delete_mod = 1; break; case 'c': target_cmd = optarg; @@ -128,11 +130,14 @@ void usage(char *prog) err("-o FILE Send output to FILE.\n"); err("-b buffer size The systemtap module specifies a buffer size.\n"); err(" Setting one here will override that value. The\n"); - err(" value should be an integer between 1 and 64\n"); + err(" value should be an integer between 1 and 4095 \n"); err(" which be assumed to be the buffer size in MB.\n"); err(" That value will be per-cpu in bulk mode.\n"); err("-L Load module and start probes, then detach.\n"); err("-A Attach to loaded systemtap module.\n"); + err("-d Delete a module. Only detached or unused modules\n"); + err(" the user has permission to access will be deleted. Use \"*\"\n"); + err(" (quoted) to delete all unused modules.\n"); err("MODULE can be either a module name or a module path. If a\n"); err("module name is used, it is looked for in the following\n"); err("directory: /lib/modules/`uname -r`/systemtap\n"); @@ -250,10 +255,7 @@ static void fatal_handler (int signum) rc = write (STDERR_FILENO, ERR_MSG, sizeof(ERR_MSG)); rc = write (STDERR_FILENO, str, strlen(str)); rc = write (STDERR_FILENO, "\n", 1); - if (initialized) - _exit(3); - else - _exit(1); + _exit(1); } void setup_signals(void) @@ -294,28 +296,6 @@ void setup_signals(void) #endif } -/** - * send_request - send request to kernel over control channel - * @type: the relay-app command id - * @data: pointer to the data to be sent - * @len: length of the data to be sent - * - * Returns 0 on success, negative otherwise. - */ -int send_request(int type, void *data, int len) -{ - char buf[1024]; - - /* Before doing memcpy, make sure 'buf' is big enough. */ - if ((len + 4) > (int)sizeof(buf)) { - _err("exceeded maximum send_request size.\n"); - return -1; - } - memcpy(buf, &type, 4); - memcpy(&buf[4], data, len); - return write(control_channel, buf, len+4); -} - /* * set FD_CLOEXEC for any file descriptor */ diff --git a/runtime/staprun/ctl.c b/runtime/staprun/ctl.c index af7e6c1a..4597bf72 100644 --- a/runtime/staprun/ctl.c +++ b/runtime/staprun/ctl.c @@ -12,45 +12,42 @@ #include "staprun.h" -int init_ctl_channel(int symbols) +int init_ctl_channel(const char *name, int verb) { - char *cname, buf[PATH_MAX]; + char buf[PATH_MAX]; struct statfs st; int old_transport = 0; - - if (symbols) - cname = ".symbols"; - else - cname = ".cmd"; - if (statfs("/sys/kernel/debug", &st) == 0 && (int) st.f_type == (int) DEBUGFS_MAGIC) { - if (sprintf_chk(buf, "/sys/kernel/debug/systemtap/%s/%s", modname, cname)) + if (statfs("/sys/kernel/debug", &st) == 0 && (int)st.f_type == (int)DEBUGFS_MAGIC) { + if (sprintf_chk(buf, "/sys/kernel/debug/systemtap/%s/.cmd", name)) return -1; } else { old_transport = 1; - if (sprintf_chk(buf, "/proc/systemtap/%s/%s", modname, cname)) + if (sprintf_chk(buf, "/proc/systemtap/%s/.cmd", name)) return -1; } - - dbug(2, "Opening %s\n", buf); + + dbug(2, "Opening %s\n", buf); control_channel = open(buf, O_RDWR); if (control_channel < 0) { - if (attach_mod && errno == ENOENT) - err("ERROR: Can not attach. Module %s not running.\n", modname); - else - perr("Couldn't open control channel '%s'", buf); + if (verb) { + if (attach_mod && errno == ENOENT) + err("ERROR: Can not attach. Module %s not running.\n", name); + else + perr("Couldn't open control channel '%s'", buf); + } return -1; } if (set_clexec(control_channel) < 0) return -1; - + return old_transport; } void close_ctl_channel(void) { - if (control_channel > 0) { + if (control_channel >= 0) { close(control_channel); - control_channel = 0; + control_channel = -1; } } diff --git a/runtime/staprun/mainloop.c b/runtime/staprun/mainloop.c index 2e0c3c5c..61963743 100644 --- a/runtime/staprun/mainloop.c +++ b/runtime/staprun/mainloop.c @@ -15,39 +15,98 @@ /* globals */ int ncpus; -int use_old_transport = 0; +static int use_old_transport = 0; +//enum _stp_sig_type { sig_none, sig_done, sig_detach }; +//static enum _stp_sig_type got_signal = sig_none; -static void sigproc(int signum) +/** + * send_request - send request to kernel over control channel + * @type: the relay-app command id + * @data: pointer to the data to be sent + * @len: length of the data to be sent + * + * Returns 0 on success, negative otherwise. + */ +int send_request(int type, void *data, int len) { - dbug(2, "sigproc %d (%s)\n", signum, strsignal(signum)); - - if (signum == SIGCHLD) { - pid_t pid = waitpid(-1, NULL, WNOHANG); - if (pid != target_pid) - return; - send_request(STP_EXIT, NULL, 0); - } else if (signum == SIGQUIT) - cleanup_and_exit(2); - else if (signum == SIGINT || signum == SIGHUP || signum == SIGTERM) - send_request(STP_EXIT, NULL, 0); + char buf[1024]; + + /* Before doing memcpy, make sure 'buf' is big enough. */ + if ((len + 4) > (int)sizeof(buf)) { + _err("exceeded maximum send_request size.\n"); + return -1; + } + memcpy(buf, &type, 4); + memcpy(&buf[4], data, len); + return write(control_channel, buf, len + 4); } -static void setup_main_signals(int cleanup) +static void *signal_thread(void *arg) { - struct sigaction a; - memset(&a, 0, sizeof(a)); - sigfillset(&a.sa_mask); - if (cleanup == 0) { - a.sa_handler = sigproc; - sigaction(SIGCHLD, &a, NULL); - } else - a.sa_handler = SIG_IGN; - sigaction(SIGINT, &a, NULL); - sigaction(SIGTERM, &a, NULL); - sigaction(SIGHUP, &a, NULL); - sigaction(SIGQUIT, &a, NULL); + sigset_t *s = (sigset_t *) arg; + int signum, rc, btype = STP_EXIT; + + while (1) { + if (sigwait(s, &signum) < 0) { + _perr("sigwait"); + continue; + } + dbug(2, "sigproc %d (%s)\n", signum, strsignal(signum)); + if (signum == SIGQUIT) + cleanup_and_exit(1); + else if (signum == SIGINT || signum == SIGHUP || signum == SIGTERM) { + // send STP_EXIT + rc = write(control_channel, &btype, sizeof(btype)); + break; + } + } + return NULL; +} + +static void chld_proc(int signum) +{ + int32_t rc, btype = STP_EXIT; + dbug(2, "chld_proc %d (%s)\n", signum, strsignal(signum)); + pid_t pid = waitpid(-1, NULL, WNOHANG); + if (pid != target_pid) + return; + // send STP_EXIT + rc = write(control_channel, &btype, sizeof(btype)); } +static void setup_main_signals(void) +{ + pthread_t tid; + struct sigaction sa; + sigset_t *s = malloc(sizeof(*s)); + if (!s) { + _perr("malloc failed"); + exit(1); + } + sigfillset(s); + pthread_sigmask(SIG_SETMASK, s, NULL); + memset(&sa, 0, sizeof(sa)); + sigfillset(&sa.sa_mask); + sa.sa_handler = SIG_IGN; + sigaction(SIGINT, &sa, NULL); + sigaction(SIGTERM, &sa, NULL); + sigaction(SIGHUP, &sa, NULL); + sigaction(SIGQUIT, &sa, NULL); + + sa.sa_handler = chld_proc; + sigaction(SIGCHLD, &sa, NULL); + + sigemptyset(s); + sigaddset(s, SIGINT); + sigaddset(s, SIGTERM); + sigaddset(s, SIGHUP); + sigaddset(s, SIGQUIT); + pthread_sigmask(SIG_SETMASK, s, NULL); + if (pthread_create(&tid, NULL, signal_thread, s) < 0) { + _perr("failed to create thread"); + exit(1); + } +} /* * start_cmd forks the command given on the command line @@ -75,7 +134,7 @@ void start_cmd(void) a.sa_handler = SIG_IGN; sigaction(SIGINT, &a, NULL); - dbug (1, "execing target_cmd %s\n", target_cmd); + dbug(1, "execing target_cmd %s\n", target_cmd); if ((pid = fork()) < 0) { _perr("fork"); exit(1); @@ -86,8 +145,8 @@ void start_cmd(void) sigaction(SIGINT, &a, NULL); /* commands we fork need to run at normal priority */ - setpriority (PRIO_PROCESS, 0, 0); - + setpriority(PRIO_PROCESS, 0, 0); + /* wait here until signaled */ sigwait(&usrset, &signum); @@ -107,11 +166,11 @@ void system_cmd(char *cmd) { pid_t pid; - dbug (2, "system %s\n", cmd); + dbug(2, "system %s\n", cmd); if ((pid = fork()) < 0) { _perr("fork"); } else if (pid == 0) { - setpriority (PRIO_PROCESS, 0, 0); + setpriority(PRIO_PROCESS, 0, 0); if (execl("/bin/sh", "sh", "-c", cmd, NULL) < 0) perr("%s", cmd); _exit(1); @@ -128,7 +187,7 @@ static void read_buffer_info(void) if (!use_old_transport) return; - if (statfs("/sys/kernel/debug", &st) == 0 && (int) st.f_type == (int) DEBUGFS_MAGIC) + if (statfs("/sys/kernel/debug", &st) == 0 && (int)st.f_type == (int)DEBUGFS_MAGIC) return; if (sprintf_chk(buf, "/proc/systemtap/%s/bufsize", modname)) @@ -152,7 +211,6 @@ static void read_buffer_info(void) return; } - /** * init_stapio - initialize the app * @print_summary: boolean, print summary or not at end of run @@ -164,7 +222,7 @@ int init_stapio(void) dbug(2, "init_stapio\n"); /* create control channel */ - use_old_transport = init_ctl_channel(0); + use_old_transport = init_ctl_channel(modname, 1); if (use_old_transport < 0) { err("Failed to initialize control channel.\n"); return -1; @@ -177,7 +235,7 @@ int init_stapio(void) if (init_oldrelayfs() < 0) { close_ctl_channel(); return -1; - } + } } else { if (init_relayfs() < 0) { close_ctl_channel(); @@ -192,17 +250,12 @@ int init_stapio(void) if (target_cmd) start_cmd(); - return 0; } -/* cleanup_and_exit() closed channels and frees memory - * then exits with the following status codes: - * 1 - failed to initialize. - * 2 - disconnected - * 3 - initialized - */ -void cleanup_and_exit (int closed) +/* cleanup_and_exit() closed channels, frees memory, + * removes the module (if necessary) and exits. */ +void cleanup_and_exit(int detach) { pid_t err; static int exiting = 0; @@ -211,32 +264,34 @@ void cleanup_and_exit (int closed) return; exiting = 1; - setup_main_signals(1); + setup_main_signals(); - dbug(1, "CLEANUP AND EXIT closed=%d\n", closed); + dbug(1, "detach=%d\n", detach); /* what about child processes? we will wait for them here. */ err = waitpid(-1, NULL, WNOHANG); if (err >= 0) err("\nWaiting for processes to exit\n"); - while(wait(NULL) > 0) ; + while (wait(NULL) > 0) ; if (use_old_transport) - close_oldrelayfs(closed == 2); + close_oldrelayfs(detach); else close_relayfs(); dbug(1, "closing control channel\n"); close_ctl_channel(); - if (initialized == 2 && closed == 2) { - err("\nDisconnecting from systemtap module.\n" \ - "To reconnect, type \"staprun -A %s\"\n", modname); - } else if (initialized) - closed = 3; - else - closed = 1; - exit(closed); + if (detach) { + err("\nDisconnecting from systemtap module.\n" "To reconnect, type \"staprun -A %s\"\n", modname); + } else { + dbug(2, "removing %s\n", modname); + if (execl(BINDIR "/staprun", "staprun", "-d", modname, NULL) < 0) { + perror(modname); + _exit(1); + } + } + _exit(0); } /** @@ -247,90 +302,103 @@ int stp_main_loop(void) { ssize_t nb; void *data; - int type; + uint32_t type; FILE *ofp = stdout; char recvbuf[8196]; setvbuf(ofp, (char *)NULL, _IOLBF, 0); - setup_main_signals(0); + setup_main_signals(); dbug(2, "in main loop\n"); send_request(STP_READY, NULL, 0); - while (1) { /* handle messages from control channel */ + /* handle messages from control channel */ + while (1) { nb = read(control_channel, recvbuf, sizeof(recvbuf)); + dbug(2, "nb=%d\n", (int)nb); if (nb <= 0) { if (errno != EINTR) _perr("Unexpected EOF in read (nb=%ld)", (long)nb); continue; } - - type = *(int *)recvbuf; - data = (void *)(recvbuf + sizeof(int)); - switch (type) { + type = *(uint32_t *) recvbuf; + data = (void *)(recvbuf + sizeof(uint32_t)); + nb -= sizeof(uint32_t); + + switch (type) { #ifdef STP_OLD_TRANSPORT case STP_REALTIME_DATA: - { - ssize_t bw = write(out_fd[0], data, nb - sizeof(int)); - if (bw >= 0 && bw != (nb - (ssize_t)sizeof(int))) { - nb = nb - bw; - bw = write(out_fd[0], data, nb - sizeof(int)); + { + ssize_t bw = write(out_fd[0], data, nb); + if (bw >= 0 && bw != nb) { + nb = nb - bw; + bw = write(out_fd[0], data, nb); + } + if (bw != nb) { + _perr("write error (nb=%ld)", (long)nb); + cleanup_and_exit(0); + } + break; } - if (bw != (nb - (ssize_t)sizeof(int))) { - _perr("write error (nb=%ld)", (long)nb); - cleanup_and_exit(1); - } - break; - } #endif case STP_OOB_DATA: - fputs ((char *)data, stderr); - break; - case STP_EXIT: - { - /* module asks us to unload it and exit */ - int *closed = (int *)data; - dbug(2, "got STP_EXIT, closed=%d\n", *closed); - cleanup_and_exit(*closed); + fputs((char *)data, stderr); break; - } - case STP_START: - { - struct _stp_msg_start *t = (struct _stp_msg_start *)data; - dbug(2, "probe_start() returned %d\n", t->res); - if (t->res < 0) { - if (target_cmd) - kill (target_pid, SIGKILL); - cleanup_and_exit(1); - } else if (target_cmd) - kill (target_pid, SIGUSR1); - break; - } + case STP_EXIT: + { + /* module asks us to unload it and exit */ + dbug(2, "got STP_EXIT\n"); + cleanup_and_exit(0); + break; + } + case STP_START: + { + struct _stp_msg_start *t = (struct _stp_msg_start *)data; + dbug(2, "probe_start() returned %d\n", t->res); + if (t->res < 0) { + if (target_cmd) + kill(target_pid, SIGKILL); + cleanup_and_exit(0); + } else if (target_cmd) + kill(target_pid, SIGUSR1); + break; + } case STP_SYSTEM: - { - struct _stp_msg_cmd *c = (struct _stp_msg_cmd *)data; - dbug(2, "STP_SYSTEM: %s\n", c->cmd); - system_cmd(c->cmd); - break; - } + { + struct _stp_msg_cmd *c = (struct _stp_msg_cmd *)data; + dbug(2, "STP_SYSTEM: %s\n", c->cmd); + system_cmd(c->cmd); + break; + } case STP_TRANSPORT: - { - struct _stp_msg_start ts; - if (use_old_transport) { - if (init_oldrelayfs() < 0) - cleanup_and_exit(1); - } else { - if (init_relayfs() < 0) + { + struct _stp_msg_start ts; + if (use_old_transport) { + if (init_oldrelayfs() < 0) + cleanup_and_exit(0); + } else { + if (init_relayfs() < 0) + cleanup_and_exit(0); + } + ts.target = target_pid; + send_request(STP_START, &ts, sizeof(ts)); + if (load_only) cleanup_and_exit(1); + break; + } + case STP_UNWIND: + { + int len; + char *ptr = (char *)data; + while (nb > 0) { + send_unwind_data(ptr); + len = strlen(ptr) + 1; + ptr += len; + nb -= len; + } + break; } - ts.target = target_pid; - initialized = 2; - send_request(STP_START, &ts, sizeof(ts)); - if (load_only) - cleanup_and_exit(2); - break; - } default: err("WARNING: ignored message of type %d\n", (type)); } diff --git a/runtime/staprun/stapio.c b/runtime/staprun/stapio.c index ee30a1a1..3c8c4f7f 100644 --- a/runtime/staprun/stapio.c +++ b/runtime/staprun/stapio.c @@ -27,24 +27,23 @@ char *__name__ = "stapio"; int main(int argc, char **argv) { setup_signals(); - parse_args(argc, argv); if (buffer_size) - dbug(1, "Using a buffer of %u bytes.\n", buffer_size); + dbug(1, "Using a buffer of %u MB.\n", buffer_size); if (optind < argc) { parse_modpath(argv[optind++]); dbug(2, "modpath=\"%s\", modname=\"%s\"\n", modpath, modname); } - if (optind < argc) { + if (optind < argc) { if (attach_mod) { err("ERROR: Cannot have module options with attach (-A).\n"); usage(argv[0]); } else { - unsigned start_idx = 3; /* reserve three slots in modoptions[] */ - while (optind < argc && start_idx+1 < MAXMODOPTIONS) + unsigned start_idx = 3; /* reserve three slots in modoptions[] */ + while (optind < argc && start_idx + 1 < MAXMODOPTIONS) modoptions[start_idx++] = argv[optind++]; modoptions[start_idx] = NULL; } @@ -57,13 +56,7 @@ int main(int argc, char **argv) if (init_stapio()) exit(1); - - initialized = 1; - if (attach_mod) { - /* already started */ - initialized++; - } - + if (stp_main_loop()) { err("ERROR: Couldn't enter main loop. Exiting.\n"); exit(1); diff --git a/runtime/staprun/staprun.c b/runtime/staprun/staprun.c index f4e67fdb..0291d01f 100644 --- a/runtime/staprun/staprun.c +++ b/runtime/staprun/staprun.c @@ -16,21 +16,18 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Copyright (C) 2005-2007 Red Hat, Inc. + * Copyright (C) 2005-2008 Red Hat, Inc. * */ #include "staprun.h" -int inserted_module = 0; - /* used in dbug, _err and _perr */ char *__name__ = "staprun"; extern long delete_module(const char *, unsigned int); -static int -run_as(uid_t uid, gid_t gid, const char *path, char *const argv[]) +static int run_as(uid_t uid, gid_t gid, const char *path, char *const argv[]) { pid_t pid; int rstatus; @@ -42,14 +39,13 @@ run_as(uid_t uid, gid_t gid, const char *path, char *const argv[]) err("%s ", argv[i]); i++; } - err("\n"); + err("\n"); } if ((pid = fork()) < 0) { _perr("fork"); return -1; - } - else if (pid == 0) { + } else if (pid == 0) { /* Make sure we run as the full user. If we're * switching to a non-root user, this won't allow * that process to switch back to root (since the @@ -79,17 +75,6 @@ run_as(uid_t uid, gid_t gid, const char *path, char *const argv[]) return -1; } -/* Keep the uid and gid settings because we will likely */ -/* conditionally restore "-u" */ -static int run_stapio(char **argv) -{ - uid_t uid = getuid(); - gid_t gid = getgid(); - argv[0] = PKGLIBDIR "/stapio"; - - return run_as(uid, gid, argv[0], argv); -} - /* * Module to be inserted has one or more user-space probes. Make sure * uprobes is enabled. @@ -132,8 +117,7 @@ static int enable_uprobes(void) dbug(2, "Inserting uprobes module from SystemTap runtime.\n"); argv[0] = NULL; - return insert_module(PKGDATADIR "/runtime/uprobes/uprobes.ko", - NULL, argv); + return insert_module(PKGDATADIR "/runtime/uprobes/uprobes.ko", NULL, argv); } static int insert_stap_module(void) @@ -144,6 +128,66 @@ static int insert_stap_module(void) return insert_module(modpath, bufsize_option, modoptions); } +static int remove_module(const char *name, int verb); + +static void remove_all_modules(void) +{ + char *base; + struct statfs st; + struct dirent *d; + DIR *moddir; + + if (statfs("/sys/kernel/debug", &st) == 0 && (int)st.f_type == (int)DEBUGFS_MAGIC) + base = "/sys/kernel/debug/systemtap"; + else + base = "/proc/systemtap"; + + moddir = opendir(base); + if (moddir) { + while ((d = readdir(moddir))) + if (remove_module(d->d_name, 0) == 0) + printf("Module %s removed.\n", d->d_name); + closedir(moddir); + } +} + +static int remove_module(const char *name, int verb) +{ + int ret; + dbug(2, "%s\n", name); + + if (strcmp(name, "*") == 0) { + remove_all_modules(); + return 0; + } + + /* Call init_ctl_channel() which actually attempts an open() + * of the control channel. This is better than using access() because + * an open on an already open channel will fail, preventing us from attempting + * to remove an in-use module. + */ + if (init_ctl_channel(name, 0) < 0) { + if (verb) + err("Error accessing systemtap module %s: %s\n", name, strerror(errno)); + return 1; + } + close_ctl_channel(); + + dbug(2, "removing module %s\n", name); + + /* Don't remove module when priority is elevated. */ + if (setpriority(PRIO_PROCESS, 0, 0) < 0) + _perr("setpriority"); + + ret = do_cap(CAP_SYS_MODULE, delete_module, name, 0); + if (ret != 0) { + err("Error removing module '%s': %s.\n", name, strerror(errno)); + return 1; + } + + dbug(1, "Module %s removed.\n", name); + return 0; +} int init_staprun(void) { @@ -154,71 +198,28 @@ int init_staprun(void) /* We're done with CAP_SYS_ADMIN. */ drop_cap(CAP_SYS_ADMIN); - - if (!attach_mod) { + + if (delete_mod) + exit(remove_module(modname, 1)); + else if (!attach_mod) { if (need_uprobes && enable_uprobes() != 0) return -1; if (insert_stap_module() < 0) return -1; - else - inserted_module = 1; } - return 0; } - -static void cleanup(int rc) -{ - /* Only cleanup once. */ - static int done = 0; - if (done == 0) - done = 1; - else - return; - - dbug(2, "rc=%d, inserted_module=%d\n", rc, inserted_module); - - if (setpriority (PRIO_PROCESS, 0, 0) < 0) - _perr("setpriority"); - - stop_symbol_thread(); - - /* rc == 2 means disconnected */ - if (rc == 2) - return; - - /* If we inserted the module and did not get rc==2, then */ - /* we really want to remove it. */ - if (inserted_module || rc == 3) { - long ret; - dbug(2, "removing module %s\n", modname); - ret = do_cap(CAP_SYS_MODULE, delete_module, modname, 0); - if (ret != 0) - err("Error removing module '%s': %s\n", modname, moderror(errno)); - } -} - -static void exit_cleanup(void) -{ - dbug(2, "something exited...\n"); - cleanup(1); -} int main(int argc, char **argv) { int rc; - if (atexit(exit_cleanup)) { - _perr("cannot set exit function"); - exit(1); - } - - /* NB: Don't do the geteuid()!=0 check here, since we want to - test command-line error-handling while running non-root. */ + /* NB: Don't do the geteuid()!=0 check here, since we want to + test command-line error-handling while running non-root. */ /* Get rid of a few standard environment variables (which */ /* might cause us to do unintended things). */ rc = unsetenv("IFS") || unsetenv("CDPATH") || unsetenv("ENV") - || unsetenv("BASH_ENV"); + || unsetenv("BASH_ENV"); if (rc) { _perr("unsetenv failed"); exit(-1); @@ -229,20 +230,20 @@ int main(int argc, char **argv) parse_args(argc, argv); if (buffer_size) - dbug(2, "Using a buffer of %u bytes.\n", buffer_size); + dbug(2, "Using a buffer of %u MB.\n", buffer_size); if (optind < argc) { parse_modpath(argv[optind++]); dbug(2, "modpath=\"%s\", modname=\"%s\"\n", modpath, modname); } - if (optind < argc) { + if (optind < argc) { if (attach_mod) { err("ERROR: Cannot have module options with attach (-A).\n"); usage(argv[0]); } else { unsigned start_idx = 0; - while (optind < argc && start_idx+1 < MAXMODOPTIONS) + while (optind < argc && start_idx + 1 < MAXMODOPTIONS) modoptions[start_idx++] = argv[optind++]; modoptions[start_idx] = NULL; } @@ -254,14 +255,13 @@ int main(int argc, char **argv) } if (geteuid() != 0) { - err("ERROR: The effective user ID of staprun must be set to the root user.\n" - " Check permissions on staprun and ensure it is a setuid root program.\n"); + err("ERROR: The effective user ID of staprun must be set to the root user.\n" + " Check permissions on staprun and ensure it is a setuid root program.\n"); exit(1); } - if (!init_cap()) - exit(1); - + init_cap(); + if (check_permissions() != 1) usage(argv[0]); @@ -277,11 +277,14 @@ int main(int argc, char **argv) if (init_staprun()) exit(1); - setup_staprun_signals(); - start_symbol_thread(); - - rc = run_stapio(argv); - cleanup(rc); - + argv[0] = PKGLIBDIR "/stapio"; + if (execv(argv[0], argv) < 0) { + perror(argv[0]); + goto err; + } return 0; + +err: + remove_module(modname, 1); + return 1; } diff --git a/runtime/staprun/staprun.h b/runtime/staprun/staprun.h index 1128fb4c..60bab391 100644 --- a/runtime/staprun/staprun.h +++ b/runtime/staprun/staprun.h @@ -103,8 +103,6 @@ extern char *__name__; #define STP_OLD_TRANSPORT #include "../transport/transport_msgs.h" -extern int use_old_transport; - #define RELAYFS_MAGIC 0xF0B4A981 #define DEBUGFS_MAGIC 0x64626720 #define DEBUGFSDIR "/sys/kernel/debug" @@ -118,9 +116,8 @@ int init_stapio(void); int stp_main_loop(void); int send_request(int type, void *data, int len); void cleanup_and_exit (int); -int do_module(void *); -int do_kernel_symbols(void); -int init_ctl_channel(int); +void send_unwind_data(const char *name); +int init_ctl_channel(const char *name, int verb); void close_ctl_channel(void); int init_relayfs(void); void close_relayfs(void); @@ -129,7 +126,7 @@ void close_oldrelayfs(int); void setup_signals(void); /* cap.c */ void print_cap(char *text); -int init_cap(void); +void init_cap(void); void add_cap(cap_value_t cap); void del_cap(cap_value_t cap); void drop_cap(cap_value_t cap); @@ -169,6 +166,7 @@ extern int target_pid; extern char *target_cmd; extern char *outfile_name; extern int attach_mod; +extern int delete_mod; extern int load_only; extern int need_uprobes; diff --git a/runtime/staprun/staprun_funcs.c b/runtime/staprun/staprun_funcs.c index 34e12c25..c1cb92b7 100644 --- a/runtime/staprun/staprun_funcs.c +++ b/runtime/staprun/staprun_funcs.c @@ -16,18 +16,6 @@ #include <grp.h> #include <pwd.h> -void setup_staprun_signals(void) -{ - struct sigaction a; - memset(&a, 0, sizeof(a)); - sigfillset(&a.sa_mask); - a.sa_handler = SIG_IGN; - sigaction(SIGINT, &a, NULL); - sigaction(SIGTERM, &a, NULL); - sigaction(SIGHUP, &a, NULL); - sigaction(SIGQUIT, &a, NULL); -} - extern long init_module(void *, unsigned long, const char *); /* Module errors get translated. */ @@ -401,95 +389,3 @@ int check_permissions(void) * is in that directory. */ return check_path(); } - -pthread_t symbol_thread_id = (pthread_t)0; -int kernel_ptr_size = 0; - -/* Symbol handling thread */ -void *handle_symbols(void __attribute__((unused)) *arg) -{ - ssize_t nb; - void *data; - int32_t type; - char recvbuf[8192]; - - dbug(2, "waiting for symbol requests\n"); - - /* handle messages from control channel */ - while (1) { - nb = read(control_channel, recvbuf, sizeof(recvbuf)); - if (nb <= 0) { - if (errno != EINTR) - _perr("Unexpected EOF in read (nb=%ld)", (long)nb); - continue; - } - - type = *(int32_t *)recvbuf; - data = (void *)(recvbuf + sizeof(int32_t)); - - switch (type) { - case STP_MODULE: - { - dbug(2, "STP_MODULES request received\n"); - if (do_module(data) < 0) - goto done; - break; - } - case STP_SYMBOLS: - { - struct _stp_msg_symbol *req = (struct _stp_msg_symbol *)data; - dbug(2, "STP_SYMBOLS request received\n"); - if (req->endian != 0x1234) { - err("ERROR: staprun is compiled with different endianess than the kernel!\n"); - goto done; - } - kernel_ptr_size = req->ptr_size; - if (kernel_ptr_size != 4 && kernel_ptr_size != 8) { - err("ERROR: invalid kernel pointer size %d\n", kernel_ptr_size); - goto done; - } - if (do_kernel_symbols() < 0) - goto done; - break; - } - default: - err("WARNING: ignored message of type %d\n", (type)); - } - } - -done: - /* signal stapio we're done */ - kill(0, SIGINT); - - return NULL; -} - -void start_symbol_thread(void) -{ - int status; - - /* create symbol control channel */ - status = do_cap(CAP_DAC_OVERRIDE, init_ctl_channel, 1); - drop_cap(CAP_DAC_OVERRIDE); - if (status < 0) { - err("Failed to initialize control channel.\n"); - exit(1); - } - status = pthread_create(&symbol_thread_id, NULL, handle_symbols, NULL); - if (status) { - perr("Failed to create symbol thread.\n"); - exit(1); - } -} - -void stop_symbol_thread(void) -{ - - if (symbol_thread_id) { - dbug(2, "Stopping symbol thread.\n"); - pthread_cancel(symbol_thread_id); - pthread_join(symbol_thread_id, NULL); - } - close_ctl_channel(); -} - diff --git a/runtime/staprun/symbols.c b/runtime/staprun/symbols.c deleted file mode 100644 index c7362d9e..00000000 --- a/runtime/staprun/symbols.c +++ /dev/null @@ -1,333 +0,0 @@ -/* -*- linux-c -*- - * Symbols and modules functions for staprun. - * - * Copyright (C) 2006-2008 Red Hat Inc. - * - * This file is part of systemtap, and is free software. You can - * redistribute it and/or modify it under the terms of the GNU General - * Public License (GPL); either version 2, or (at your option) any - * later version. - */ - -#include "staprun.h" - -/* send symbol data */ -static int send_data(int32_t type, void *data, int len) -{ - if (write(control_channel, &type, 4) <= 0) - return -1; - return write(control_channel, data, len); -} - - -/* Get the sections for a module. Put them in the supplied buffer */ -/* in the following order: */ -/* [struct _stp_msg_module][struct _stp_symbol sections ...][string data][unwind data] */ -/* Return the total length of all the data. */ - -#define SECDIR "/sys/module/%s/sections" -static int get_sections(char *name, char *data_start, int datalen) -{ - char dir[STP_MODULE_NAME_LEN + sizeof(SECDIR)]; - char filename[STP_MODULE_NAME_LEN + 256]; - char buf[32], strdata_start[32768]; - char *strdata=strdata_start, *data=data_start; - int fd, len, res, unwind_data_len=0; - struct _stp_msg_module *mod = (struct _stp_msg_module *)data_start; - - struct dirent *d; - DIR *secdir; - void *sec; - int struct_symbol_size = kernel_ptr_size == 8 ? sizeof(struct _stp_symbol64) : sizeof(struct _stp_symbol32); - uint64_t sec_addr; - - /* start of data is a struct _stp_msg_module */ - data += sizeof(struct _stp_msg_module); - - res = snprintf(dir, sizeof(dir), SECDIR, name); - if (res >= (int)sizeof(dir)) { - _err("Couldn't fit module \"%s\" into dir buffer.\n" \ - "This should never happen. Please file a bug report.\n", name); - return -1; - } - - if ((secdir = opendir(dir)) == NULL) - return 0; - - /* Initialize mod. */ - memset(mod, 0, sizeof(struct _stp_msg_module)); - - /* Copy name in and check for overflow. */ - strncpy(mod->name, name, STP_MODULE_NAME_LEN); - if (mod->name[STP_MODULE_NAME_LEN - 1] != '\0') { - _err("Couldn't fit module \"%s\" into mod->name buffer.\n" \ - "This should never happen. Please file a bug report.\n", name); - return -1; - } - - /* FIXME: optionally fill in unwind data here */ - mod->unwind_len = unwind_data_len; - - while ((d = readdir(secdir))) { - char *secname = d->d_name; - - /* Copy filename in and check for overflow. */ - res = snprintf(filename, sizeof(filename), "/sys/module/%s/sections/%s", name, secname); - if (res >= (int)sizeof(filename)) { - _err("Couldn't fit secname \"%s\" into filename buffer.\n" \ - "This should never happen. Please file a bug report.\n", secname); - closedir(secdir); - return -1; - } - - /* filter out some non-useful stuff */ - if (!strncmp(secname,"__",2) - || !strcmp(secname,".") - || !strcmp(secname,"..") - || !strcmp(secname,".module_sig") - || !strcmp(secname,".modinfo") - || !strcmp(secname,".strtab") - || !strcmp(secname,".symtab") ) { - continue; - } - if (!strncmp(secname, ".gnu.linkonce", 13) - && strcmp(secname, ".gnu.linkonce.this_module")) - continue; - - if ((fd = open(filename,O_RDONLY)) >= 0) { - if (read(fd, buf, 32) > 0) { - /* create next section */ - sec = data; - if (data - data_start + struct_symbol_size > datalen) - goto err1; - data += struct_symbol_size; - - sec_addr = (uint64_t)strtoull(buf,NULL,16); - if (kernel_ptr_size == 8) { - ((struct _stp_symbol64 *)sec)->addr = sec_addr; - ((struct _stp_symbol64 *)sec)->symbol = (uint64_t)(strdata - strdata_start); - } else { - ((struct _stp_symbol32 *)sec)->addr = (uint32_t)sec_addr; - ((struct _stp_symbol32 *)sec)->symbol = (uint32_t)(strdata - strdata_start); - } - mod->num_sections++; - - /* now create string data for the - * section (checking for overflow) */ - if ((strdata - strdata_start + strlen(strdata)) - >= sizeof(strdata_start)) - goto err1; - strcpy(strdata, secname); - strdata += strlen(secname) + 1; - - /* These sections are used a lot so keep the values handy */ - if (!strcmp(secname, ".data") || !strncmp(secname, ".rodata", 7)) { - if (mod->data == 0 || sec_addr < mod->data) - mod->data = sec_addr; - } - if (!strcmp(secname, ".text")) - mod->text = sec_addr; - if (!strcmp(secname, ".gnu.linkonce.this_module")) - mod->module = sec_addr; - } - close(fd); - } - } - closedir(secdir); - - /* consolidate buffers */ - len = strdata - strdata_start; - if ((len + data - data_start) > datalen) - goto err0; - strdata = strdata_start; - while (len--) - *data++ = *strdata++; - -#if 0 - if (unwind_data_len) { - if ((unwind_data_len + data - data_start) > datalen) - goto err0; - memcpy(data, unwind_data, unwind_data_len); - data += unwind_data_len; - } -#endif - return data - data_start; - -err1: - close(fd); - closedir(secdir); -err0: - /* if this happens, something went seriously wrong. */ - _err("Unexpected error. Overflowed buffers.\n"); - return -1; -} -#undef SECDIR - -/* - * For modules, we send the name, section names, and offsets - */ -static int send_module (char *mname) -{ - char data[65536]; - int len; - len = get_sections(mname, data, sizeof(data)); - if (len > 0) { - if (send_data(STP_MODULE, data, len) < 0) { - _err("Loading of module %s failed. Exiting...\n", mname); - return -1; - } - } - return len; -} - -/* - * Send either all modules, or a specific one. - * Returns: - * >=0 : OK - * -1 : serious error (exit) - */ -int do_module (void *data) -{ - struct _stp_msg_module *mod = (struct _stp_msg_module *)data; - - if (mod->name[0] == 0) { - struct dirent *d; - DIR *moddir = opendir("/sys/module"); - if (moddir) { - while ((d = readdir(moddir))) - if (send_module(d->d_name) < 0) { - closedir(moddir); - return -1; - } - closedir(moddir); - } - send_request(STP_MODULE, data, 1); - return 0; - } - - return send_module(mod->name); -} - -#define MAX_SYMBOLS 32*1024 - -/* - * Read /proc/kallsyms and send all kernel symbols to the - * systemtap module. Ignore module symbols; the systemtap module - * can access them directly. - */ -int do_kernel_symbols(void) -{ - FILE *kallsyms=NULL; - char *name, *mod, *dataptr, *datamax, type, *data_base=NULL; - unsigned long long addr; - void *syms = NULL; - int ret, num_syms, i = 0, struct_symbol_size; - int max_syms= MAX_SYMBOLS, data_basesize = MAX_SYMBOLS*32; - - if (kernel_ptr_size == 8) - struct_symbol_size = sizeof(struct _stp_symbol64); - else - struct_symbol_size = sizeof(struct _stp_symbol32); - - syms = malloc(max_syms * struct_symbol_size); - data_base = malloc(data_basesize); - if (data_base == NULL || syms == NULL) { - _err("Failed to allocate memory for symbols\n"); - goto err; - } - dataptr = data_base; - datamax = data_base + data_basesize; - - kallsyms = fopen ("/proc/kallsyms", "r"); - if (!kallsyms) { - _perr("Fatal error: Unable to open /proc/kallsyms"); - goto err; - } - - /* put empty string in data */ - *dataptr++ = 0; - - while ((ret = fscanf(kallsyms, "%llx %c %as [%as", &addr, &type, &name, &mod))>0 - && dataptr < datamax) { - if (ret < 3) - continue; - if (ret > 3) { - /* ignore modules */ - free(name); - free(mod); - /* modules are loaded above the kernel, so if we */ - /* are getting modules, then we're done. */ - break; - } - - if (type == 't' || type == 'T' || type == 'A') { - if (kernel_ptr_size == 8) { - ((struct _stp_symbol64 *)syms)[i].addr = (uint64_t)addr; - ((struct _stp_symbol64 *)syms)[i].symbol = (uint64_t)(dataptr - data_base); - } else { - ((struct _stp_symbol32 *)syms)[i].addr = (uint32_t)addr; - ((struct _stp_symbol32 *)syms)[i].symbol = (uint32_t)(dataptr - data_base); - } - if (dataptr >= datamax - strlen(name)) { - char *db; - data_basesize *= 2; - db = realloc(data_base, data_basesize); - if (db == NULL) { - _err("Could not allocate enough space for symbols.\n"); - goto err; - } - dataptr = db + (dataptr - data_base); - datamax = db + data_basesize; - data_base = db; - } - strcpy(dataptr, name); - dataptr += strlen(name) + 1; - free(name); - i++; - if (i >= max_syms) { - max_syms *= 2; - syms = realloc(syms, max_syms*struct_symbol_size); - if (syms == NULL) { - _err("Could not allocate enough space for symbols.\n"); - goto err; - } - } - } - } - num_syms = i; - if (num_syms <= 0) - goto err; - - - /* send header */ - struct _stp_msg_symbol_hdr smsh; - smsh.num_syms = num_syms; - smsh.sym_size = (uint32_t)(dataptr - data_base); - smsh.unwind_size = (uint32_t)0; - if (send_request(STP_SYMBOLS, &smsh, sizeof(smsh)) <= 0) - goto err; - - /* send syms */ - if (send_data(STP_SYMBOLS, syms, num_syms*struct_symbol_size) < 0) - goto err; - - /* send data */ - if (send_data(STP_SYMBOLS, data_base, dataptr-data_base) < 0) - goto err; - - free(data_base); - free(syms); - fclose(kallsyms); - return 0; - -err: - if (data_base) - free(data_base); - if (syms) - free(syms); - if (kallsyms) - fclose(kallsyms); - - _err("Loading of symbols failed. Exiting...\n"); - return -1; -} diff --git a/runtime/staprun/unwind_data.c b/runtime/staprun/unwind_data.c new file mode 100644 index 00000000..ed27cc20 --- /dev/null +++ b/runtime/staprun/unwind_data.c @@ -0,0 +1,97 @@ +/* -*- linux-c -*- + * Unwind data functions for staprun. + * + * Copyright (C) 2008 Red Hat Inc. + * + * This file is part of systemtap, and is free software. You can + * redistribute it and/or modify it under the terms of the GNU General + * Public License (GPL); either version 2, or (at your option) any + * later version. + */ + +#include "staprun.h" +#include <elfutils/libdwfl.h> +#include <dwarf.h> + +static char debuginfo_path_arr[] = "-:.debug:/usr/lib/debug"; +static char *debuginfo_path = debuginfo_path_arr; +static const Dwfl_Callbacks kernel_callbacks = { + .find_debuginfo = dwfl_standard_find_debuginfo, + .debuginfo_path = &debuginfo_path, + .find_elf = dwfl_linux_kernel_find_elf, + .section_address = dwfl_linux_kernel_module_section_address, +}; + +void *get_module_unwind_data(Dwfl * dwfl, const char *name, int *len) +{ + Dwarf_Addr bias = 0; + Dwarf *dw; + GElf_Ehdr *ehdr, ehdr_mem; + GElf_Shdr *shdr, shdr_mem; + Elf_Scn *scn = NULL; + Elf_Data *data = NULL; + + Dwfl_Module *mod = dwfl_report_module(dwfl, name, 0, 0); + dwfl_report_end(dwfl, NULL, NULL); + dw = dwfl_module_getdwarf(mod, &bias); + Elf *elf = dwarf_getelf(dw); + ehdr = gelf_getehdr(elf, &ehdr_mem); + while ((scn = elf_nextscn(elf, scn))) { + shdr = gelf_getshdr(scn, &shdr_mem); + if (strcmp(elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name), ".debug_frame") == 0) { + data = elf_rawdata(scn, NULL); + break; + } + } + + if (data == NULL) { + *len = 0; + dbug(2, "module %s returns NULL\n", name); + return NULL; + } + dbug(2, "module %s returns %d\n", name, (int)data->d_size); + *len = data->d_size; + return data->d_buf; +} + +void send_unwind_data(const char *name) +{ + struct _stp_msg_unwind *un; + int unwind_data_len = 0; + void *unwind_data = NULL; + char *buf; + + dbug(2, "module %s\n", name); + if (strcmp(name, "*")) { + Dwfl *dwfl = dwfl_begin(&kernel_callbacks); + + if (name[0] == 0) + unwind_data = get_module_unwind_data(dwfl, "kernel", &unwind_data_len); + else + unwind_data = get_module_unwind_data(dwfl, name, &unwind_data_len); + + /* yuck */ + buf = (char *)malloc(unwind_data_len + sizeof(*un) + sizeof(uint32_t)); + if (!buf) { + err("malloc failed\n"); + return; + } + memcpy(buf + sizeof(*un) + sizeof(uint32_t), unwind_data, unwind_data_len); + dwfl_end(dwfl); + } else { + buf = (char *)malloc(sizeof(*un) + sizeof(uint32_t)); + if (!buf) { + err("malloc failed\n"); + return; + } + } + + un = (struct _stp_msg_unwind *)(buf + sizeof(uint32_t)); + strncpy(un->name, name, sizeof(un->name)); + un->unwind_len = unwind_data_len; + *(uint32_t *) buf = STP_UNWIND; + + /* send unwind data */ + if (write(control_channel, buf, unwind_data_len + sizeof(*un) + sizeof(uint32_t)) <= 0) + err("write failed\n"); +} diff --git a/runtime/sym.c b/runtime/sym.c index 3c2f859a..7163bf92 100644 --- a/runtime/sym.c +++ b/runtime/sym.c @@ -33,7 +33,7 @@ unsigned long _stp_module_relocate(const char *module, const char *section, unsi return 0; } - dbug(DEBUG_SYMBOLS, "%s, %s, %lx\n", module, section, offset); + dbug_sym(1, "%s, %s, %lx\n", module, section, offset); STP_RLOCK_MODULES; if (!module || !strcmp(section, "") /* absolute, unrelocated address */ @@ -47,7 +47,7 @@ unsigned long _stp_module_relocate(const char *module, const char *section, unsi if (!strcmp(module, last->name) && !strcmp(section, last_sec->symbol)) { offset += last_sec->addr; STP_RUNLOCK_MODULES; - dbug(DEBUG_SYMBOLS, "offset = %lx\n", offset); + dbug_sym(1, "offset = %lx\n", offset); return offset; } } @@ -72,7 +72,7 @@ unsigned long _stp_module_relocate(const char *module, const char *section, unsi if (!strcmp(section, last_sec->symbol)) { offset += last_sec->addr; STP_RUNLOCK_MODULES; - dbug(DEBUG_SYMBOLS, "offset = %lx\n", offset); + dbug_sym(1, "offset = %lx\n", offset); return offset; } } @@ -223,8 +223,7 @@ void _stp_symbol_print(unsigned long address) } /* Like _stp_symbol_print, except only print if the address is a valid function address */ - -void _stp_func_print(unsigned long address, int verbose, int exact) +int _stp_func_print(unsigned long address, int verbose, int exact) { char *modname; const char *name; @@ -247,7 +246,9 @@ void _stp_func_print(unsigned long address, int verbose, int exact) _stp_printf(" %p : %s+%#lx/%#lx%s\n", (int64_t) address, name, offset, size, exstr); } else _stp_printf("%p ", (int64_t) address); + return 1; } + return 0; } void _stp_symbol_snprint(char *str, size_t len, unsigned long address) diff --git a/runtime/sym.h b/runtime/sym.h index b124882a..0bb64c13 100644 --- a/runtime/sym.h +++ b/runtime/sym.h @@ -7,8 +7,8 @@ * later version. */ -#ifndef _STAP_SYMBOLS_H_ -#define _STAP_SYMBOLS_H_ +#ifndef _STP_SYM_H_ +#define _STP_SYM_H_ #define STP_MODULE_NAME_LEN 64 @@ -16,11 +16,6 @@ struct _stp_symbol { unsigned long addr; const char *symbol; }; -struct stap_symbol { - unsigned long addr; - const char *symbol; - const char *module; -}; DEFINE_RWLOCK(_stp_module_lock); #define STP_RLOCK_MODULES read_lock_irqsave(&_stp_module_lock, flags) @@ -50,8 +45,14 @@ struct _stp_module { /* how many sections this module has */ uint32_t num_sections; - /* how the symbol_data below was allocated */ - int32_t allocated; /* 0 = kmalloc, 1 = vmalloc */ + /* how the data below was allocated */ + /* 0 = kmalloc, 1 = vmalloc */ + struct { + unsigned symbols :1; + unsigned symbol_data :1; + unsigned unwind_data :1; + unsigned unwind_hdr :1; + } allocated; struct _stp_symbol *sections; @@ -63,7 +64,10 @@ struct _stp_module { /* the stack unwind data for this module */ void *unwind_data; + void *unwind_hdr; uint32_t unwind_data_len; + uint32_t unwind_hdr_len; + uint32_t unwind_is_ehframe; /* unwind data comes from .eh_frame */ rwlock_t lock; /* lock while unwinding is happening */ }; @@ -80,7 +84,8 @@ struct _stp_module *_stp_modules_by_addr[STP_MAX_MODULES]; /* the number of modules in the arrays */ int _stp_num_modules = 0; +static unsigned long _stp_kretprobe_trampoline = 0; unsigned long _stp_module_relocate (const char *module, const char *section, unsigned long offset); static struct _stp_module *_stp_get_unwind_info (unsigned long addr); -#endif /* _STAP_SYMBOLS_H_ */ +#endif /* _STP_SYM_H_ */ diff --git a/runtime/task_finder.c b/runtime/task_finder.c index d2e57a6b..6d79c98a 100644 --- a/runtime/task_finder.c +++ b/runtime/task_finder.c @@ -1,9 +1,16 @@ #include <linux/list.h> +#include <linux/binfmts.h> static LIST_HEAD(__stp_task_finder_list); struct stap_task_finder_target; +#define __STP_TF_STARTING 0 +#define __STP_TF_RUNNING 1 +#define __STP_TF_STOPPING 2 +#define __STP_TF_STOPPED 3 +atomic_t __stp_task_finder_state = ATOMIC_INIT(__STP_TF_STARTING); + typedef int (*stap_task_finder_callback)(struct task_struct *tsk, int register_p, struct stap_task_finder_target *tgt); @@ -23,6 +30,10 @@ struct stap_task_finder_target { stap_task_finder_callback callback; }; +static u32 +__stp_utrace_task_finder_target_death(struct utrace_attached_engine *engine, + struct task_struct *tsk); + static int stap_register_task_finder_target(struct stap_task_finder_target *new_tgt) { @@ -38,6 +49,11 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt) else new_tgt->pathlen = 0; + // Make sure everything is initialized properly. + new_tgt->engine_attached = 0; + memset(&new_tgt->ops, 0, sizeof(new_tgt->ops)); + new_tgt->ops.report_death = &__stp_utrace_task_finder_target_death; + // Search the list for an existing entry for pathname/pid. list_for_each(node, &__stp_task_finder_list) { tgt = list_entry(node, struct stap_task_finder_target, list); @@ -62,7 +78,6 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt) } // Add this target to the callback list for this task. - new_tgt->engine_attached = 0; list_add_tail(&new_tgt->callback_list, &tgt->callback_list_head); return 0; } @@ -78,6 +93,10 @@ stap_utrace_detach_ops(struct utrace_engine_ops *ops) rcu_read_lock(); for_each_process(tsk) { struct mm_struct *mm; + + if (tsk->pid <= 1) + continue; + mm = get_task_mm(tsk); if (mm) { mmput(mm); @@ -152,11 +171,12 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen) vma = vma->vm_next; } if (vma) { - struct vfsmount *mnt = mntget(vma->vm_file->f_path.mnt); - struct dentry *dentry = dget(vma->vm_file->f_path.dentry); - rc = d_path(dentry, mnt, buf, buflen); - dput(dentry); - mntput(mnt); +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) + rc = d_path(vma->vm_file->f_dentry, vma->vm_file->f_vfsmnt, + buf, buflen); +#else + rc = d_path(&(vma->vm_file->f_path), buf, buflen); +#endif } else { *buf = '\0'; @@ -167,76 +187,82 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen) } #define __STP_UTRACE_TASK_FINDER_EVENTS (UTRACE_EVENT(CLONE) \ - | UTRACE_EVENT(EXEC)) + | UTRACE_EVENT(EXEC) \ + | UTRACE_EVENT(DEATH)) #define __STP_UTRACE_ATTACHED_TASK_EVENTS (UTRACE_EVENT(DEATH)) -static u32 -__stp_utrace_task_finder_clone(struct utrace_attached_engine *engine, - struct task_struct *parent, - unsigned long clone_flags, - struct task_struct *child) +static int +__stp_utrace_attach(struct task_struct *tsk, + const struct utrace_engine_ops *ops, void *data, + unsigned long event_flags) { - struct utrace_attached_engine *child_engine; + struct utrace_attached_engine *engine; struct mm_struct *mm; + int rc = 0; - // On clone, attach to the child. Ignore threads with no mm - // (which are kernel threads). - mm = get_task_mm(child); - if (mm) { - mmput(mm); - child_engine = utrace_attach(child, UTRACE_ATTACH_CREATE, - engine->ops, 0); - if (IS_ERR(child_engine)) - _stp_error("attach to clone child %d failed: %ld", - (int)child->pid, PTR_ERR(child_engine)); - else { - utrace_set_flags(child, child_engine, - __STP_UTRACE_TASK_FINDER_EVENTS); + // Ignore init + if (tsk->pid <= 1) + return EPERM; + + // Ignore threads with no mm (which are kernel threads). + mm = get_task_mm(tsk); + if (! mm) + return EPERM; + mmput(mm); + + engine = utrace_attach(tsk, UTRACE_ATTACH_CREATE, ops, data); + if (IS_ERR(engine)) { + int error = -PTR_ERR(engine); + if (error != ENOENT) { + _stp_error("utrace_attach returned error %d on pid %d", + error, (int)tsk->pid); + rc = error; } } - return UTRACE_ACTION_RESUME; + else if (unlikely(engine == NULL)) { + _stp_error("utrace_attach returned NULL on pid %d", + (int)tsk->pid); + rc = EFAULT; + } + else { + utrace_set_flags(tsk, engine, event_flags); + } + return rc; } static u32 -__stp_utrace_task_finder_death(struct utrace_attached_engine *engine, - struct task_struct *tsk) +__stp_utrace_task_finder_report_clone(struct utrace_attached_engine *engine, + struct task_struct *parent, + unsigned long clone_flags, + struct task_struct *child) { - struct stap_task_finder_target *tgt = engine->data; - - // The first implementation of this added a - // UTRACE_EVENT(DEATH) handler to - // __stp_utrace_task_finder_ops. However, dead threads don't - // have a mm_struct, so we can't find the exe's path. So, we - // don't know which callback(s) to call. - // - // So, now when an "interesting" thread is found, we add a - // separate UTRACE_EVENT(DEATH) handler for every probe. + struct utrace_attached_engine *child_engine; + struct mm_struct *mm; - if (tgt != NULL && tgt->callback != NULL) { - int rc; + if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) + return UTRACE_ACTION_RESUME; - // Call the callback - rc = tgt->callback(tsk, 0, tgt); - if (rc != 0) { - _stp_error("death callback for %d failed: %d", - (int)tsk->pid, rc); - } - } + // On clone, attach to the child. + (void) __stp_utrace_attach(child, engine->ops, 0, + __STP_UTRACE_TASK_FINDER_EVENTS); return UTRACE_ACTION_RESUME; } static u32 -__stp_utrace_task_finder_exec(struct utrace_attached_engine *engine, - struct task_struct *tsk, - const struct linux_binprm *bprm, - struct pt_regs *regs) +__stp_utrace_task_finder_report_exec(struct utrace_attached_engine *engine, + struct task_struct *tsk, + const struct linux_binprm *bprm, + struct pt_regs *regs) { size_t filelen; struct list_head *tgt_node; struct stap_task_finder_target *tgt; int found_node = 0; + if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) + return UTRACE_ACTION_RESUME; + // On exec, check bprm if (bprm->filename == NULL) return UTRACE_ACTION_RESUME; @@ -258,6 +284,8 @@ __stp_utrace_task_finder_exec(struct utrace_attached_engine *engine, struct list_head *cb_node; list_for_each(cb_node, &tgt->callback_list_head) { struct stap_task_finder_target *cb_tgt; + int rc; + cb_tgt = list_entry(cb_node, struct stap_task_finder_target, callback_list); @@ -274,31 +302,59 @@ __stp_utrace_task_finder_exec(struct utrace_attached_engine *engine, } // Set up thread death notification. - memset(&cb_tgt->ops, 0, sizeof(cb_tgt->ops)); - cb_tgt->ops.report_death - = &__stp_utrace_task_finder_death; - - engine = utrace_attach(tsk, - UTRACE_ATTACH_CREATE, - &cb_tgt->ops, cb_tgt); - if (IS_ERR(engine)) { - _stp_error("attach to exec'ed %d failed: %ld", - (int)tsk->pid, - PTR_ERR(engine)); - } - else { - utrace_set_flags(tsk, engine, + rc = __stp_utrace_attach(tsk, &cb_tgt->ops, cb_tgt, __STP_UTRACE_ATTACHED_TASK_EVENTS); - cb_tgt->engine_attached = 1; - } + if (rc != 0 && rc != EPERM) + break; + cb_tgt->engine_attached = 1; } } return UTRACE_ACTION_RESUME; } +static u32 +stap_utrace_task_finder_report_death(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + return UTRACE_ACTION_DETACH; +} + +static u32 +__stp_utrace_task_finder_target_death(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + struct stap_task_finder_target *tgt = engine->data; + + if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { + return UTRACE_ACTION_DETACH; + } + + // The first implementation of this added a + // UTRACE_EVENT(DEATH) handler to + // __stp_utrace_task_finder_ops. However, dead threads don't + // have a mm_struct, so we can't find the exe's path. So, we + // don't know which callback(s) to call. + // + // So, now when an "interesting" thread is found, we add a + // separate UTRACE_EVENT(DEATH) handler for every probe. + + if (tgt != NULL && tgt->callback != NULL) { + int rc; + + // Call the callback + rc = tgt->callback(tsk, 0, tgt); + if (rc != 0) { + _stp_error("death callback for %d failed: %d", + (int)tsk->pid, rc); + } + } + return UTRACE_ACTION_DETACH; +} + struct utrace_engine_ops __stp_utrace_task_finder_ops = { - .report_clone = __stp_utrace_task_finder_clone, - .report_exec = __stp_utrace_task_finder_exec, + .report_clone = __stp_utrace_task_finder_report_clone, + .report_exec = __stp_utrace_task_finder_report_exec, + .report_death = stap_utrace_task_finder_report_death, }; int @@ -314,44 +370,36 @@ stap_start_task_finder(void) return ENOMEM; } + atomic_set(&__stp_task_finder_state, __STP_TF_RUNNING); + rcu_read_lock(); for_each_process(tsk) { - struct utrace_attached_engine *engine; struct mm_struct *mm; char *mmpath; size_t mmpathlen; struct list_head *tgt_node; + /* Attach to the thread */ + rc = __stp_utrace_attach(tsk, &__stp_utrace_task_finder_ops, 0, + __STP_UTRACE_TASK_FINDER_EVENTS); + if (rc == EPERM) { + /* Ignore EPERM errors, which mean this wasn't + * a thread we can attach to. */ + rc = 0; + continue; + } + else if (rc != 0) { + /* If we get a real error, quit. */ + break; + } + + /* Grab the path associated with this task. */ mm = get_task_mm(tsk); if (! mm) { /* If the thread doesn't have a mm_struct, it is * a kernel thread which we need to skip. */ continue; } - - /* Attach to the thread */ - engine = utrace_attach(tsk, UTRACE_ATTACH_CREATE, - &__stp_utrace_task_finder_ops, 0); - if (IS_ERR(engine)) { - int error = -PTR_ERR(engine); - if (error != ENOENT) { - mmput(mm); - _stp_error("utrace_attach returned error %d on pid %d", - error, (int)tsk->pid); - rc = error; - break; - } - } - else if (unlikely(engine == NULL)) { - mmput(mm); - _stp_error("utrace_attach returned NULL on pid %d", - (int)tsk->pid); - rc = EFAULT; - break; - } - utrace_set_flags(tsk, engine, __STP_UTRACE_TASK_FINDER_EVENTS); - - /* Check the thread's exe's path/pid against our list. */ mmpath = __stp_get_mm_path(mm, mmpath_buf, PATH_MAX); mmput(mm); /* We're done with mm */ if (IS_ERR(mmpath)) { @@ -361,6 +409,7 @@ stap_start_task_finder(void) break; } + /* Check the thread's exe's path/pid against our list. */ mmpathlen = strlen(mmpath); list_for_each(tgt_node, &__stp_task_finder_list) { struct stap_task_finder_target *tgt; @@ -394,10 +443,19 @@ stap_start_task_finder(void) (int)tsk->pid, rc); break; } + + // Set up thread death notification. + rc = __stp_utrace_attach(tsk, &cb_tgt->ops, + cb_tgt, + __STP_UTRACE_ATTACHED_TASK_EVENTS); + if (rc != 0 && rc != EPERM) + break; + cb_tgt->engine_attached = 1; } } } rcu_read_unlock(); + _stp_kfree(mmpath_buf); return rc; } @@ -405,6 +463,8 @@ stap_start_task_finder(void) static void stap_stop_task_finder(void) { + atomic_set(&__stp_task_finder_state, __STP_TF_STOPPING); stap_utrace_detach_ops(&__stp_utrace_task_finder_ops); __stp_task_finder_cleanup(); + atomic_set(&__stp_task_finder_state, __STP_TF_STOPPED); } diff --git a/runtime/time.c b/runtime/time.c index 52a2edbb..8a0b6fad 100644 --- a/runtime/time.c +++ b/runtime/time.c @@ -131,10 +131,13 @@ __stp_time_timer_callback(unsigned long val) time->base_cycles = cycles; write_sequnlock(&time->lock); + local_irq_restore(flags); + /* PR6481: reenable IRQs before resetting the timer. + XXX: The worst that can probably happen is that we get + two consecutive timer resets. */ + if (likely(stp_timer_reregister)) mod_timer(&time->timer, jiffies + 1); - - local_irq_restore(flags); } /* This is called as an IPI, with interrupts disabled. */ diff --git a/runtime/transport/ChangeLog b/runtime/transport/ChangeLog index c3837f86..9d0ba162 100644 --- a/runtime/transport/ChangeLog +++ b/runtime/transport/ChangeLog @@ -1,3 +1,51 @@ +2008-04-30 Masami Hiramatsu <mhiramat@redhat.com> + + PR 5645 + * transport.c (_stp_transport_init): Fix subbuffer size calculation + overflow. + +2008-04-21 hunt <hunt@redhat.com> + + * control.c (_stp_ctl_write): Return len + sizeof(int) so + sending an empty command doesn't return 0 and look like a failure. + * transport.c: _stp_cleanup_and_exit(): Cleanup. + +2008-04-15 Frank Ch. Eigler <fche@elastic.org> + + PR 6410 + * symbols.c (_stp_do_unwind_data): Tolerate !STP_USE_DWARF_UNWINDER. + +2008-04-15 Frank Ch. Eigler <fche@elastic.org> + + PR 6405 + * symbols.c (_stp_load_module_symbols): Support older kernels + without module->sect_attrs->nsections. + +2008-04-09 Martin Hunt <hunt@dragon> + + * symbols.c (_stp_init_kernel_symbols): Print error + messages and exit if symbol lookups fail. + (_stp_init_modules): Lookup modules_op. + +2008-03-31 Martin Hunt <hunt@redhat.com> + + * symbols.c (_stp_init_modules): Use STP_USE_DWARF_UNWINDER. + + * transport.c (_stp_get_root_dir): Remove misleading error message. + +2008-03-30 Martin Hunt <hunt@redhat.com> + + * symbols.c (_stp_init_modules): If using frames, don't + request unwind info. + +2008-03-25 Martin Hunt <hunt@redhat.com> + + * control.c (_stp_ctl_write_dbug): Insert missing break. + + 32-bit systems can't do 64-bit get_user(), so + * symbols.c (_stp_do_unwind_data): Change unwind_len to a u32. + * transport_msgs.h (struct _stp_msg_unwind): Ditto. + 2008-02-27 Martin Hunt <hunt@redhat.com> * symbols.c: Use rwlocks. Use new dbug macros. Handle diff --git a/runtime/transport/control.c b/runtime/transport/control.c index 6a5b272d..ca7edf79 100644 --- a/runtime/transport/control.c +++ b/runtime/transport/control.c @@ -14,80 +14,31 @@ static int _stp_current_buffers = STP_DEFAULT_BUFFERS; static _stp_mempool_t *_stp_pool_q; static struct list_head _stp_ctl_ready_q; -static struct list_head _stp_sym_ready_q; DEFINE_SPINLOCK(_stp_ctl_ready_lock); -DEFINE_SPINLOCK(_stp_sym_ready_lock); -static ssize_t _stp_sym_write_cmd(struct file *file, const char __user *buf, size_t count, loff_t *ppos) -{ - static int saved_type = 0; - int type; - - if (count < sizeof(int32_t)) - return 0; - - /* Allow sending of packet type followed by data in the next packet. */ - if (count == sizeof(int32_t)) { - if (get_user(saved_type, (int __user *)buf)) - return -EFAULT; - return count; - } else if (saved_type) { - type = saved_type; - saved_type = 0; - } else { - if (get_user(type, (int __user *)buf)) - return -EFAULT; - count -= sizeof(int); - buf += sizeof(int); - } - -#if DEBUG_TRANSPORT > 0 - if (type < STP_MAX_CMD) - _dbug("Got %s. len=%d\n", _stp_command_name[type], (int)count); -#endif - - switch (type) { - case STP_SYMBOLS: - count = _stp_do_symbols(buf, count); - break; - case STP_MODULE: - if (count > 1) - count = _stp_do_module(buf, count); - else { - /* count == 1 indicates end of initial modules list */ - _stp_ctl_send(STP_TRANSPORT, NULL, 0); - } - break; - case STP_EXIT: - _stp_exit_flag = 1; - break; - default: - errk("invalid symbol command type %d\n", type); - return -EINVAL; - } - - return count; -} static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - int type; + u32 type; static int started = 0; - if (count < sizeof(int)) + if (count < sizeof(u32)) return 0; - if (get_user(type, (int __user *)buf)) + if (get_user(type, (u32 __user *)buf)) return -EFAULT; -#if DEBUG_TRANSPORT > 0 + count -= sizeof(u32); + buf += sizeof(u32); + +#ifdef DEBUG_TRANS if (type < STP_MAX_CMD) _dbug("Got %s. len=%d\n", _stp_command_name[type], (int)count); #endif - count -= sizeof(int); - buf += sizeof(int); - switch (type) { + case STP_UNWIND: + _stp_do_unwind_data(buf, count); + break; case STP_START: if (started == 0) { struct _stp_msg_start st; @@ -110,7 +61,7 @@ static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, siz #endif case STP_READY: /* request symbolic information */ - _stp_ask_for_symbols(); + /* _stp_ask_for_symbols(); */ break; default: @@ -121,8 +72,6 @@ static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, siz return count; } -#define STP_CTL_BUFFER_SIZE 256 - struct _stp_buffer { struct list_head list; int len; @@ -131,9 +80,8 @@ struct _stp_buffer { }; static DECLARE_WAIT_QUEUE_HEAD(_stp_ctl_wq); -static DECLARE_WAIT_QUEUE_HEAD(_stp_sym_wq); -#if DEBUG_TRANSPORT > 0 +#ifdef DEBUG_TRANS static void _stp_ctl_write_dbug(int type, void *data, int len) { char buf[64]; @@ -155,19 +103,9 @@ static void _stp_ctl_write_dbug(int type, void *data, int len) case STP_TRANSPORT: _dbug("sending STP_TRANSPORT\n"); break; - default: - _dbug("ERROR: unknown message type: %d\n", type); - break; - } -} -static void _stp_sym_write_dbug(int type, void *data, int len) -{ - switch (type) { - case STP_SYMBOLS: - _dbug("sending STP_SYMBOLS\n"); - break; - case STP_MODULE: - _dbug("sending STP_MODULE\n"); + case STP_UNWIND: + snprintf(buf, sizeof(buf), "%s", (char *)data); + _dbug("sending STP_UNWIND %s [len=%d]\n", buf, len); break; default: _dbug("ERROR: unknown message type: %d\n", type); @@ -181,7 +119,7 @@ static int _stp_ctl_write(int type, void *data, unsigned len) struct _stp_buffer *bptr; unsigned long flags; -#if DEBUG_TRANSPORT > 0 +#ifdef DEBUG_TRANS _stp_ctl_write_dbug(type, data, len); #endif @@ -203,99 +141,22 @@ static int _stp_ctl_write(int type, void *data, unsigned len) list_add_tail(&bptr->list, &_stp_ctl_ready_q); spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags); - return len; -} - -static int _stp_sym_write(int type, void *data, unsigned len) -{ - struct _stp_buffer *bptr; - unsigned long flags; - -#if DEBUG_TRANSPORT > 0 - _stp_sym_write_dbug(type, data, len); -#endif - - /* make sure we won't overflow the buffer */ - if (unlikely(len > STP_CTL_BUFFER_SIZE)) - return 0; - - /* get a buffer from the free pool */ - bptr = _stp_mempool_alloc(_stp_pool_q); - if (unlikely(bptr == NULL)) - return -1; - - bptr->type = type; - memcpy(bptr->buf, data, len); - bptr->len = len; - - /* put it on the pool of ready buffers */ - spin_lock_irqsave(&_stp_sym_ready_lock, flags); - list_add_tail(&bptr->list, &_stp_sym_ready_q); - spin_unlock_irqrestore(&_stp_sym_ready_lock, flags); - - /* OK, it's queued. Now signal any waiters. */ - wake_up_interruptible(&_stp_sym_wq); - - return len; + return len + sizeof(bptr->type); } /* send commands with timeout and retry */ static int _stp_ctl_send(int type, void *data, int len) { int err, trylimit = 50; - kbug(DEBUG_TRANSPORT, "ctl_send: type=%d len=%d\n", type, len); - if (unlikely(type == STP_SYMBOLS || type == STP_MODULE)) { - while ((err = _stp_sym_write(type, data, len)) < 0 && trylimit--) - msleep(5); - } else { - while ((err = _stp_ctl_write(type, data, len)) < 0 && trylimit--) - msleep(5); - if (err > 0) - wake_up_interruptible(&_stp_ctl_wq); - } - kbug(DEBUG_TRANSPORT, "returning %d\n", err); + dbug_trans(1, "ctl_send: type=%d len=%d\n", type, len); + while ((err = _stp_ctl_write(type, data, len)) < 0 && trylimit--) + msleep(5); + if (err > 0) + wake_up_interruptible(&_stp_ctl_wq); + dbug_trans(1, "returning %d\n", err); return err; } -static ssize_t _stp_sym_read_cmd(struct file *file, char __user *buf, size_t count, loff_t *ppos) -{ - struct _stp_buffer *bptr; - int len; - unsigned long flags; - - /* wait for nonempty ready queue */ - spin_lock_irqsave(&_stp_sym_ready_lock, flags); - while (list_empty(&_stp_sym_ready_q)) { - spin_unlock_irqrestore(&_stp_sym_ready_lock, flags); - if (file->f_flags & O_NONBLOCK) - return -EAGAIN; - if (wait_event_interruptible(_stp_sym_wq, !list_empty(&_stp_sym_ready_q))) - return -ERESTARTSYS; - spin_lock_irqsave(&_stp_sym_ready_lock, flags); - } - - /* get the next buffer off the ready list */ - bptr = (struct _stp_buffer *)_stp_sym_ready_q.next; - list_del_init(&bptr->list); - spin_unlock_irqrestore(&_stp_sym_ready_lock, flags); - - /* write it out */ - len = bptr->len + 4; - if (len > count || copy_to_user(buf, &bptr->type, len)) { - /* now what? We took it off the queue then failed to send it */ - /* we can't put it back on the queue because it will likely be out-of-order */ - /* fortunately this should never happen */ - /* FIXME need to mark this as a transport failure */ - errk("Supplied buffer too small. count:%d len:%d\n", (int)count, len); - return -EFAULT; - } - - /* put it on the pool of free buffers */ - _stp_mempool_free(bptr); - - return len; -} - static ssize_t _stp_ctl_read_cmd(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct _stp_buffer *bptr; @@ -335,29 +196,10 @@ static ssize_t _stp_ctl_read_cmd(struct file *file, char __user *buf, size_t cou return len; } -static int _stp_sym_opens = 0; -static int _stp_sym_open_cmd(struct inode *inode, struct file *file) -{ - /* only allow one reader */ - if (_stp_sym_opens) - return -1; - - _stp_sym_opens++; - return 0; -} - -static int _stp_sym_close_cmd(struct inode *inode, struct file *file) -{ - if (_stp_sym_opens) - _stp_sym_opens--; - return 0; -} - static int _stp_ctl_open_cmd(struct inode *inode, struct file *file) { if (_stp_attached) return -1; - _stp_attach(); return 0; } @@ -377,16 +219,7 @@ static struct file_operations _stp_ctl_fops_cmd = { .release = _stp_ctl_close_cmd, }; -static struct file_operations _stp_sym_fops_cmd = { - .owner = THIS_MODULE, - .read = _stp_sym_read_cmd, - .write = _stp_sym_write_cmd, - .open = _stp_sym_open_cmd, - .release = _stp_sym_close_cmd, -}; - static struct dentry *_stp_cmd_file = NULL; -static struct dentry *_stp_sym_file = NULL; static int _stp_register_ctl_channel(void) { @@ -400,7 +233,6 @@ static int _stp_register_ctl_channel(void) } INIT_LIST_HEAD(&_stp_ctl_ready_q); - INIT_LIST_HEAD(&_stp_sym_ready_q); /* allocate buffers */ _stp_pool_q = _stp_mempool_init(sizeof(struct _stp_buffer), STP_DEFAULT_BUFFERS); @@ -415,15 +247,9 @@ static int _stp_register_ctl_channel(void) _stp_cmd_file->d_inode->i_uid = _stp_uid; _stp_cmd_file->d_inode->i_gid = _stp_gid; - /* create [debugfs]/systemtap/module_name/.symbols */ - _stp_sym_file = debugfs_create_file(".symbols", 0600, _stp_utt->dir, NULL, &_stp_sym_fops_cmd); - if (_stp_sym_file == NULL) - goto err0; return 0; err0: - if (_stp_cmd_file) - debugfs_remove(_stp_cmd_file); _stp_mempool_destroy(_stp_pool_q); errk("Error creating systemtap debugfs entries.\n"); return -1; @@ -432,16 +258,10 @@ err0: static void _stp_unregister_ctl_channel(void) { struct list_head *p, *tmp; - if (_stp_sym_file) - debugfs_remove(_stp_sym_file); if (_stp_cmd_file) debugfs_remove(_stp_cmd_file); /* Return memory to pool and free it. */ - list_for_each_safe(p, tmp, &_stp_sym_ready_q) { - list_del(p); - _stp_mempool_free(p); - } list_for_each_safe(p, tmp, &_stp_ctl_ready_q) { list_del(p); _stp_mempool_free(p); diff --git a/runtime/transport/procfs.c b/runtime/transport/procfs.c index 2afea1c9..750e1994 100644 --- a/runtime/transport/procfs.c +++ b/runtime/transport/procfs.c @@ -161,7 +161,7 @@ struct _stp_buffer { struct list_head list; int len; int type; - char buf[STP_BUFFER_SIZE]; + char buf[STP_CTL_BUFFER_SIZE]; }; static DECLARE_WAIT_QUEUE_HEAD(_stp_ctl_wq); diff --git a/runtime/transport/symbols.c b/runtime/transport/symbols.c index 8c453a55..087bf893 100644 --- a/runtime/transport/symbols.c +++ b/runtime/transport/symbols.c @@ -12,8 +12,8 @@ * lib/sort.c of kernel 2.6.22-rc5. It was written by Matt Mackall. */ -#ifndef _SYMBOLS_C_ -#define _SYMBOLS_C_ +#ifndef _STP_SYMBOLS_C_ +#define _STP_SYMBOLS_C_ #include "../sym.h" static char *_stp_symbol_data = NULL; @@ -21,17 +21,12 @@ static int _stp_symbol_state = 0; static char *_stp_module_data = NULL; static int _stp_module_state = 0; - /* these are all the symbol types we are interested in */ static int _stp_sym_type_ok(int type) { - switch (type) { - case 'T': - case 't': + /* we only care about function symbols, which are in the text section */ + if (type == 'T' || type == 't') return 1; - default: - return 0; - } return 0; } @@ -41,10 +36,10 @@ static unsigned _stp_get_sym_sizes(struct module *m, unsigned *dsize) { unsigned int i; unsigned num = 0, datasize = 0; - for (i=0; i < m->num_symtab; i++) { + for (i = 0; i < m->num_symtab; i++) { char *str = (char *)(m->strtab + m->symtab[i].st_name); if (*str != '\0' && _stp_sym_type_ok(m->symtab[i].st_info)) { - datasize += strlen(str)+1; + datasize += strlen(str) + 1; num++; } } @@ -52,19 +47,23 @@ static unsigned _stp_get_sym_sizes(struct module *m, unsigned *dsize) return num; } -/* allocate space for a module and symbols */ -static struct _stp_module * _stp_alloc_module(unsigned num, unsigned datasize, unsigned unwindsize) +/* allocate space for a module, sections, and symbols */ +static struct _stp_module *_stp_alloc_module(unsigned sectsize, unsigned num, unsigned datasize) { struct _stp_module *mod = (struct _stp_module *)_stp_kzalloc(sizeof(struct _stp_module)); if (mod == NULL) goto bad; + mod->sections = (struct _stp_symbol *)_stp_kmalloc(sectsize); + if (mod->sections == NULL) + goto bad; + mod->symbols = (struct _stp_symbol *)_stp_kmalloc(num * sizeof(struct _stp_symbol)); if (mod->symbols == NULL) { mod->symbols = (struct _stp_symbol *)_stp_vmalloc(num * sizeof(struct _stp_symbol)); if (mod->symbols == NULL) goto bad; - mod->allocated = 1; + mod->allocated.symbols = 1; } mod->symbol_data = _stp_kmalloc(datasize); @@ -72,91 +71,63 @@ static struct _stp_module * _stp_alloc_module(unsigned num, unsigned datasize, u mod->symbol_data = _stp_vmalloc(datasize); if (mod->symbol_data == NULL) goto bad; - mod->allocated |= 2; + mod->allocated.symbol_data = 1; } - mod->unwind_data = _stp_kmalloc(unwindsize); - if (mod->unwind_data == NULL) { - mod->unwind_data = _stp_vmalloc(unwindsize); - if (mod->unwind_data == NULL) - goto bad; - mod->allocated |= 4; - } - mod->num_symbols = num; return mod; bad: if (mod) { + if (mod->sections) + _stp_kfree(mod->sections); if (mod->symbols) { - if (mod->allocated & 1) + if (mod->allocated.symbols) _stp_vfree(mod->symbols); else _stp_kfree(mod->symbols); - mod->symbols = NULL; } - if (mod->symbol_data) { - if (mod->allocated & 2) - _stp_vfree(mod->symbol_data); - else - _stp_kfree(mod->symbol_data); - mod->symbol_data = NULL; - } - _stp_kfree(mod); - if (mod->symbols) { - if (mod->allocated & 1) - _stp_vfree(mod->symbols); - else - _stp_kfree(mod->symbols); - mod->symbols = NULL; - } - _stp_kfree(mod); + _stp_kfree(mod); } return NULL; } -static struct _stp_module * _stp_alloc_module_from_module (struct module *m, uint32_t unwind_len) -{ - unsigned datasize, num = _stp_get_sym_sizes(m, &datasize); - return _stp_alloc_module(num, datasize, unwind_len); -} - static void _stp_free_module(struct _stp_module *mod) { /* The module write lock is held. Any prior readers of this */ /* module's data will have read locks and need to finish before */ /* the memory is freed. */ write_lock(&mod->lock); - write_unlock(&mod->lock); /* there will be no more readers */ + write_unlock(&mod->lock); /* there will be no more readers */ - /* free symbol memory */ - if (mod->symbols) { - if (mod->allocated & 1) - _stp_vfree(mod->symbols); - else - _stp_kfree(mod->symbols); - mod->symbols = NULL; - } + /* Free symbol memory */ + /* If symbol_data wasn't allocated, then symbols weren't either. */ if (mod->symbol_data) { - if (mod->allocated & 2) + if (mod->symbols) { + if (mod->allocated.symbols) + _stp_vfree(mod->symbols); + else + _stp_kfree(mod->symbols); + } + if (mod->allocated.symbol_data) _stp_vfree(mod->symbol_data); else _stp_kfree(mod->symbol_data); - mod->symbol_data = NULL; - } if (mod->unwind_data) { - if (mod->allocated & 4) + if (mod->allocated.unwind_data) _stp_vfree(mod->unwind_data); else _stp_kfree(mod->unwind_data); - mod->unwind_data = NULL; - } - if (mod->sections) { - _stp_kfree(mod->sections); - mod->sections = NULL; + if (mod->unwind_hdr) { + if (mod->allocated.unwind_hdr) + _stp_vfree(mod->unwind_hdr); + else + _stp_kfree(mod->unwind_hdr); } + if (mod->sections) + _stp_kfree(mod->sections); /* free module memory */ _stp_kfree(mod); @@ -168,7 +139,7 @@ static void _stp_del_module(struct _stp_module *mod) { int i, num; - // kbug(DEBUG_SYMBOLS, "deleting %s\n", mod->name); + dbug_sym(1, "deleting module %s\n", mod->name); /* signal relocation code to clear its cache */ _stp_module_relocate((char *)-1, NULL, 0); @@ -181,15 +152,15 @@ static void _stp_del_module(struct _stp_module *mod) if (num >= _stp_num_modules) return; - for (i = num; i < _stp_num_modules-1; i++) - _stp_modules[i] = _stp_modules[i+1]; + for (i = num; i < _stp_num_modules - 1; i++) + _stp_modules[i] = _stp_modules[i + 1]; for (num = 0; num < _stp_num_modules; num++) { if (_stp_modules_by_addr[num] == mod) break; } - for (i = num; i < _stp_num_modules-1; i++) - _stp_modules_by_addr[i] = _stp_modules_by_addr[i+1]; + for (i = num; i < _stp_num_modules - 1; i++) + _stp_modules_by_addr[i] = _stp_modules_by_addr[i + 1]; _stp_num_modules--; @@ -197,10 +168,8 @@ static void _stp_del_module(struct _stp_module *mod) } static void _stp_free_modules(void) -{ +{ int i; - unsigned long flags; - /* This only happens when the systemtap module unloads */ /* so there is no need for locks. */ for (i = _stp_num_modules - 1; i >= 0; i--) @@ -208,82 +177,134 @@ static void _stp_free_modules(void) } static unsigned long _stp_kallsyms_lookup_name(const char *name); +static void _stp_create_unwind_hdr(struct _stp_module *m); + +extern unsigned _stp_num_kernel_symbols; +extern struct _stp_symbol _stp_kernel_symbols[]; -/* process the KERNEL symbols */ -static int _stp_do_symbols(const char __user *buf, int count) +/* initialize the kernel symbols */ +static int _stp_init_kernel_symbols(void) { - struct _stp_symbol *s; - unsigned datasize, num, unwindsize; + _stp_modules[0] = (struct _stp_module *)_stp_kzalloc(sizeof(struct _stp_module)); + if (_stp_modules[0] == NULL) { + _dbug("cannot allocate memory\n"); + return -1; + } + _stp_modules[0]->symbols = _stp_kernel_symbols; + _stp_modules[0]->num_symbols = _stp_num_kernel_symbols; + rwlock_init(&_stp_modules[0]->lock); + _stp_num_modules = 1; + + /* Note: this mapping is used by kernel/_stext pseudo-relocations. */ + _stp_modules[0]->text = _stp_kallsyms_lookup_name("_stext"); + if (_stp_modules[0]->text == 0) { + _dbug("Lookup of _stext failed. Exiting.\n"); + return -1; + } + _stp_modules[0]->data = _stp_kallsyms_lookup_name("_etext"); + if (_stp_modules[0]->data == 0) { + _dbug("Lookup of _etext failed. Exiting.\n"); + return -1; + } + _stp_modules[0]->text_size = _stp_modules[0]->data - _stp_modules[0]->text; + _stp_modules_by_addr[0] = _stp_modules[0]; + + _stp_kretprobe_trampoline = _stp_kallsyms_lookup_name("kretprobe_trampoline"); + /* Lookup failure is not fatal */ + + return 0; +} + +static void _stp_do_unwind_data(const char __user *buf, size_t count) +{ + u32 unwind_len; + unsigned long flags; + char name[STP_MODULE_NAME_LEN]; int i; + struct _stp_module *m; + + dbug_unwind(1, "got unwind data, count=%d\n", count); - switch (_stp_symbol_state) { - case 0: - if (count != sizeof(struct _stp_msg_symbol_hdr)) { - errk("count=%d\n", count); - return -EFAULT; - } - if (get_user(num, (unsigned __user *)buf)) - return -EFAULT; - if (get_user(datasize, (unsigned __user *)(buf+4))) - return -EFAULT; - if (get_user(unwindsize, (unsigned __user *)(buf+8))) - return -EFAULT; - dbug(DEBUG_UNWIND, "num=%d datasize=%d unwindsize=%d\n", num, datasize, unwindsize); - - _stp_modules[0] = _stp_alloc_module(num, datasize, unwindsize); - if (_stp_modules[0] == NULL) { - errk("cannot allocate memory\n"); - return -EFAULT; + if (count < STP_MODULE_NAME_LEN + sizeof(unwind_len)) { + dbug_unwind(1, "unwind message too short\n"); + return; + } + if (strncpy_from_user(name, buf, STP_MODULE_NAME_LEN) < 0) { + errk("userspace copy failed\n"); + return; + } + dbug_unwind(1, "name=%s\n", name); + if (!strcmp(name,"*")) { + /* OK, all initial unwind data received. Ready to go. */ + _stp_ctl_send(STP_TRANSPORT, NULL, 0); + return; + } + count -= STP_MODULE_NAME_LEN; + buf += STP_MODULE_NAME_LEN; + + if (get_user(unwind_len, (u32 __user *)buf)) { + errk("userspace copy failed\n"); + return; + } + count -= sizeof(unwind_len); + buf += sizeof(unwind_len); + if (count != unwind_len) { + dbug_unwind(1, "count=%d unwind_len=%d\n", (int)count, (int)unwind_len); + return; + } + + STP_RLOCK_MODULES; + for (i = 0; i < _stp_num_modules; i++) { + if (strcmp(name, _stp_modules[i]->name) == 0) + break; + } + if (unlikely(i == _stp_num_modules)) { + dbug_unwind(1, "module %s not found!\n", name); + STP_RUNLOCK_MODULES; + return; + } + m = _stp_modules[i]; + write_lock(&m->lock); + STP_RUNLOCK_MODULES; + + /* allocate space for unwind data */ + m->unwind_data = _stp_kmalloc(count); + if (unlikely(m->unwind_data == NULL)) { + m->unwind_data = _stp_vmalloc(count); + if (m->unwind_data == NULL) { + errk("kmalloc failed\n"); + goto done; } - rwlock_init(&_stp_modules[0]->lock); - _stp_symbol_state = 1; - break; - case 1: - dbug(DEBUG_SYMBOLS, "got stap_symbols, count=%d\n", count); - if (copy_from_user ((char *)_stp_modules[0]->symbols, buf, count)) - return -EFAULT; - _stp_symbol_state = 2; - break; - case 2: - dbug(DEBUG_SYMBOLS, "got symbol data, count=%d buf=%p\n", count, buf); - if (copy_from_user (_stp_modules[0]->symbol_data, buf, count)) - return -EFAULT; - _stp_num_modules = 1; - - s = _stp_modules[0]->symbols; - for (i = 0; i < _stp_modules[0]->num_symbols; i++) - s[i].symbol += (long)_stp_modules[0]->symbol_data; - - _stp_symbol_state = 3; - /* NB: this mapping is used by kernel/_stext pseudo-relocations. */ - _stp_modules[0]->text = _stp_kallsyms_lookup_name("_stext"); - _stp_modules[0]->data = _stp_kallsyms_lookup_name("_etext"); - _stp_modules[0]->text_size = _stp_modules[0]->data - _stp_modules[0]->text; - _stp_modules_by_addr[0] = _stp_modules[0]; - dbug(DEBUG_SYMBOLS, "Got kernel symbols. text=%p len=%u\n", - (int64_t)_stp_modules[0]->text, _stp_modules[0]->text_size); - break; - case 3: - dbug(DEBUG_UNWIND, "got unwind data, count=%d\n", count); - _stp_symbol_state = 4; - if (copy_from_user (_stp_modules[0]->unwind_data, buf, count)) { - _dbug("cfu failed\n"); - return -EFAULT; + m->allocated.unwind_data = 1; + } + + if (unlikely(copy_from_user(m->unwind_data, buf, count))) { + errk("userspace copy failed\n"); + if (m->unwind_data) { + if (m->allocated.unwind_data) + _stp_vfree(m->unwind_data); + else + _stp_kfree(m->unwind_data); + m->unwind_data = NULL; } - _stp_modules[0]->unwind_data_len = count; - break; - default: - errk("unexpected symbol data of size %d.\n", count); + goto done; } - return count; + m->unwind_data_len = count; +#ifdef STP_USE_DWARF_UNWINDER + _stp_create_unwind_hdr(m); +#endif +done: + write_unlock(&m->lock); } static int _stp_compare_addr(const void *p1, const void *p2) { struct _stp_symbol *s1 = (struct _stp_symbol *)p1; struct _stp_symbol *s2 = (struct _stp_symbol *)p2; - if (s1->addr == s2->addr) return 0; - if (s1->addr < s2->addr) return -1; + if (s1->addr == s2->addr) + return 0; + if (s1->addr < s2->addr) + return -1; return 1; } @@ -332,18 +353,17 @@ static void generic_swap(void *a, void *b, int size) * it less suitable for kernel use. */ void _stp_sort(void *base, size_t num, size_t size, - int (*cmp)(const void *, const void *), - void (*swap)(void *, void *, int size)) + int (*cmp) (const void *, const void *), void (*swap) (void *, void *, int size)) { /* pre-scale counters for performance */ - int i = (num/2 - 1) * size, n = num * size, c, r; + int i = (num / 2 - 1) * size, n = num * size, c, r; if (!swap) swap = (size == 4 ? u32_swap : generic_swap); /* heapify */ - for ( ; i >= 0; i -= size) { - for (r = i; r * 2 + size < n; r = c) { + for (; i >= 0; i -= size) { + for (r = i; r * 2 + size < n; r = c) { c = r * 2 + size; if (c < n - size && cmp(base + c, base + c + size) < 0) c += size; @@ -367,65 +387,125 @@ void _stp_sort(void *base, size_t num, size_t size, } } +/* filter out section names we don't care about */ +static int _stp_section_is_interesting(const char *name) +{ + int ret = 1; + if (!strncmp("__", name, 2) + || !strncmp(".note", name, 5) + || !strncmp(".gnu", name, 4) + || !strncmp(".mod", name, 4)) + ret = 0; + return ret; +} + /* Create a new _stp_module and load the symbols */ -static struct _stp_module *_stp_load_module_symbols (struct _stp_module *imod, uint32_t unwind_len) +static struct _stp_module *_stp_load_module_symbols(struct module *mod) { - unsigned i, num=0; - struct module *m = (struct module *)imod->module; - struct _stp_module *mod = NULL; - char *dataptr; + int i, num, overflow = 0; + struct module_sect_attrs *sa = mod->sect_attrs; + struct attribute_group *sag = & sa->grp; + unsigned sect_size = 0, sect_num = 0, sym_size, sym_num; + struct _stp_module *sm; + char *dataptr, *endptr; + unsigned nsections = 0; + +#ifdef STAPCONF_MODULE_NSECTIONS + nsections = sa->nsections; +#else + /* count section attributes on older kernel */ + struct attribute** gattr; + for (gattr = sag->attrs; *gattr; gattr++) + nsections++; + dbug_sym(2, "\tcount %d\n", nsections); +#endif + + /* calculate how much space to allocate for section strings */ + for (i = 0; i < nsections; i++) { + if (_stp_section_is_interesting(sa->attrs[i].name)) { + sect_num++; + sect_size += strlen(sa->attrs[i].name) + 1; + dbug_sym(2, "\t%s\t%lx\n", sa->attrs[i].name, sa->attrs[i].address); + } + } + sect_size += sect_num * sizeof(struct _stp_symbol); - if (m == NULL) { - kbug(DEBUG_SYMBOLS, "imod->module is NULL\n"); + /* and how much space for symbols */ + sym_num = _stp_get_sym_sizes(mod, &sym_size); + + sm = _stp_alloc_module(sect_size, sym_num, sym_size); + if (!sm) { + errk("failed to allocate memory for module.\n"); return NULL; } - if (try_module_get(m)) { - mod = _stp_alloc_module_from_module(m, unwind_len); - if (mod == NULL) { - module_put(m); - errk("failed to allocate memory for module.\n"); - return NULL; - } + strlcpy(sm->name, mod->name, STP_MODULE_NAME_LEN); + sm->module = (unsigned long)mod; + sm->text = (unsigned long)mod->module_core; + sm->text_size = mod->core_text_size; + sm->data = 0; /* fixme */ + sm->num_sections = sect_num; + rwlock_init(&sm->lock); - strlcpy(mod->name, imod->name, STP_MODULE_NAME_LEN); - mod->module = imod->module; - mod->text = imod->text; - mod->data = imod->data; - mod->num_sections = imod->num_sections; - mod->sections = imod->sections; - mod->text_size = m->core_text_size; - rwlock_init(&mod->lock); - - /* now copy all the symbols we are interested in */ - dataptr = mod->symbol_data; - for (i=0; i < m->num_symtab; i++) { - char *str = (char *)(m->strtab + m->symtab[i].st_name); - if (*str != '\0' && _stp_sym_type_ok(m->symtab[i].st_info)) { - mod->symbols[num].symbol = dataptr; - mod->symbols[num].addr = m->symtab[i].st_value; - while (*str) *dataptr++ = *str++; - *dataptr++ = 0; - num++; + /* copy in section data */ + dataptr = (char *)((long)sm->sections + sect_num * sizeof(struct _stp_symbol)); + endptr = (char *)((long)sm->sections + sect_size); + num = 0; + for (i = 0; i < nsections; i++) { + size_t len, maxlen; + if (_stp_section_is_interesting(sa->attrs[i].name)) { + sm->sections[num].addr = sa->attrs[i].address; + sm->sections[num].symbol = dataptr; + maxlen = (size_t) (endptr - dataptr); + len = strlcpy(dataptr, sa->attrs[i].name, maxlen); + if (unlikely(len >= maxlen)) { + _dbug("dataptr=%lx endptr=%lx len=%d maxlen=%d\n", dataptr, endptr, len, maxlen); + overflow = 1; } + dataptr += len + 1; + num++; } - module_put(m); + } + if (unlikely(overflow)) { + errk("Section names truncated!!! Should never happen!!\n"); + *endptr = 0; + overflow = 0; + } - /* sort symbols by address */ - _stp_sort (mod->symbols, num, sizeof(struct _stp_symbol), _stp_compare_addr, _stp_swap_symbol); + /* now copy all the symbols we are interested in */ + dataptr = sm->symbol_data; + endptr = dataptr + sym_size - 1; + num = 0; + for (i = 0; i < mod->num_symtab; i++) { + char *str = (char *)(mod->strtab + mod->symtab[i].st_name); + if (*str != '\0' && _stp_sym_type_ok(mod->symtab[i].st_info)) { + sm->symbols[num].symbol = dataptr; + sm->symbols[num].addr = mod->symtab[i].st_value; + while (*str && (dataptr < endptr)) + *dataptr++ = *str++; + if (unlikely(*str)) + overflow = 1; + *dataptr++ = 0; + num++; + } } - return mod; + if (unlikely(overflow)) + errk("Symbol names truncated!!! Should never happen!!\n"); + + /* sort symbols by address */ + _stp_sort(sm->symbols, num, sizeof(struct _stp_symbol), _stp_compare_addr, _stp_swap_symbol); + + return sm; } -/* Remove any old module info from our database */ -static void _stp_module_exists_delete (struct _stp_module *mod) +/* Remove any old module info from our database. */ +static void _stp_module_exists_delete(struct _stp_module *mod) { int i, num; - /* remove any old modules with the same name */ for (num = 1; num < _stp_num_modules; num++) { if (strcmp(_stp_modules[num]->name, mod->name) == 0) { - dbug(DEBUG_SYMBOLS, "found existing module with name %s. Deleting.\n", mod->name); + dbug_sym(1, "found existing module with name %s. Deleting.\n", mod->name); _stp_del_module(_stp_modules[num]); break; } @@ -435,143 +515,61 @@ static void _stp_module_exists_delete (struct _stp_module *mod) for (num = 1; num < _stp_num_modules; num++) { if (mod->text + mod->text_size < _stp_modules_by_addr[num]->text) continue; - if (mod->text < _stp_modules_by_addr[num]->text - + _stp_modules_by_addr[num]->text_size) { - dbug(DEBUG_SYMBOLS, "New module %s overlaps with old module %s. Deleting old.\n", - mod->name, _stp_modules_by_addr[num]->name); + if (mod->text < _stp_modules_by_addr[num]->text + _stp_modules_by_addr[num]->text_size) { + dbug_sym(1, "New module %s overlaps with old module %s. Deleting old.\n", + mod->name, _stp_modules_by_addr[num]->name); _stp_del_module(_stp_modules_by_addr[num]); } } } -static int _stp_ins_module(struct _stp_module *mod) +static void _stp_ins_module(struct module *mod) { - int i, num, res, ret = 0; + int i, num, res; unsigned long flags; - - // kbug(DEBUG_SYMBOLS, "insert %s\n", mod->name); + struct _stp_module *m; + dbug_sym(1, "insert %s\n", mod->name); + m = _stp_load_module_symbols(mod); + if (m == NULL) + return; STP_WLOCK_MODULES; - - _stp_module_exists_delete(mod); - + _stp_module_exists_delete(m); /* check for overflow */ if (_stp_num_modules == STP_MAX_MODULES) { errk("Exceeded the limit of %d modules\n", STP_MAX_MODULES); - ret = -ENOMEM; goto done; } - + /* insert alphabetically in _stp_modules[] */ for (num = 1; num < _stp_num_modules; num++) - if (strcmp(_stp_modules[num]->name, mod->name) > 0) + if (strcmp(_stp_modules[num]->name, m->name) > 0) break; for (i = _stp_num_modules; i > num; i--) - _stp_modules[i] = _stp_modules[i-1]; - _stp_modules[num] = mod; - + _stp_modules[i] = _stp_modules[i - 1]; + _stp_modules[num] = m; /* insert by text address in _stp_modules_by_addr[] */ for (num = 1; num < _stp_num_modules; num++) - if (mod->text < _stp_modules_by_addr[num]->text) + if (m->text < _stp_modules_by_addr[num]->text) break; for (i = _stp_num_modules; i > num; i--) - _stp_modules_by_addr[i] = _stp_modules_by_addr[i-1]; - _stp_modules_by_addr[num] = mod; - + _stp_modules_by_addr[i] = _stp_modules_by_addr[i - 1]; + _stp_modules_by_addr[num] = m; _stp_num_modules++; - done: STP_WUNLOCK_MODULES; - return ret; -} - - -/* Called from procfs.c when a STP_MODULE msg is received */ -static int _stp_do_module(const char __user *buf, int count) -{ - struct _stp_msg_module tmpmod; - struct _stp_module mod, *m; - unsigned i, section_len; - - if (count < (int)sizeof(tmpmod)) { - errk("expected %d and got %d\n", (int)sizeof(tmpmod), count); - return -EFAULT; - } - if (copy_from_user ((char *)&tmpmod, buf, sizeof(tmpmod))) - return -EFAULT; - - section_len = count - sizeof(tmpmod) - tmpmod.unwind_len; - if (section_len <= 0) { - errk("section_len = %d\n", section_len); - return -EFAULT; - } - dbug(DEBUG_SYMBOLS, "Got module %s, count=%d section_len=%d unwind_len=%d\n", - tmpmod.name, count, section_len, tmpmod.unwind_len); - - strcpy(mod.name, tmpmod.name); - mod.module = tmpmod.module; - mod.text = tmpmod.text; - mod.data = tmpmod.data; - mod.num_sections = tmpmod.num_sections; - - /* copy in section data */ - mod.sections = _stp_kmalloc(section_len); - if (mod.sections == NULL) { - errk("unable to allocate memory.\n"); - return -EFAULT; - } - if (copy_from_user ((char *)mod.sections, buf+sizeof(tmpmod), section_len)) { - _stp_kfree(mod.sections); - return -EFAULT; - } - for (i = 0; i < mod.num_sections; i++) { - mod.sections[i].symbol = - (char *)((long)mod.sections[i].symbol - + (long)((long)mod.sections + mod.num_sections * sizeof(struct _stp_symbol))); - } - - #if 0 - for (i = 0; i < mod.num_sections; i++) - _dbug("section %d (stored at %p): %s %lx\n", i, &mod.sections[i], mod.sections[i].symbol, mod.sections[i].addr); - #endif - - /* load symbols from tmpmod.module to mod */ - m = _stp_load_module_symbols(&mod, tmpmod.unwind_len); - if (m == NULL) { - _stp_kfree(mod.sections); - return 0; - } - - dbug(DEBUG_SYMBOLS, "module %s loaded. Text=%p text_size=%u\n", m->name, (int64_t)m->text, m->text_size); - /* finally copy unwind info */ - if (copy_from_user (m->unwind_data, buf+sizeof(tmpmod)+section_len, tmpmod.unwind_len)) { - _stp_free_module(m); - _stp_kfree(mod.sections); - return -EFAULT; - } - m->unwind_data_len = tmpmod.unwind_len; - - if (_stp_ins_module(m) < 0) { - _stp_free_module(m); - return -ENOMEM; - } - - return count; + return; } -static int _stp_ctl_send (int type, void *data, int len); - -static int _stp_module_load_notify(struct notifier_block * self, unsigned long val, void * data) +static int _stp_module_load_notify(struct notifier_block *self, unsigned long val, void *data) { struct module *mod = (struct module *)data; struct _stp_module rmod; - switch (val) { case MODULE_STATE_COMING: - dbug(DEBUG_SYMBOLS, "module %s load notify\n", mod->name); - strlcpy(rmod.name, mod->name, STP_MODULE_NAME_LEN); - _stp_ctl_send(STP_MODULE, &rmod, sizeof(struct _stp_module)); + dbug_sym(1, "module %s load notify\n", mod->name); + _stp_ins_module(mod); break; default: errk("module loaded? val=%ld\n", val); @@ -583,4 +581,72 @@ static struct notifier_block _stp_module_load_nb = { .notifier_call = _stp_module_load_notify, }; -#endif /* _SYMBOLS_C_ */ +#include <linux/seq_file.h> + +static int _stp_init_modules(void) +{ + loff_t pos = 0; + void *res; + struct module *mod; + const struct seq_operations *modules_op = (const struct seq_operations *)_stp_kallsyms_lookup_name("modules_op"); + + if (modules_op == NULL) { + _dbug("Lookup of modules_op failed.\n"); + return -1; + } + + /* Use the seq_file interface to safely get a list of installed modules */ + res = modules_op->start(NULL, &pos); + while (res) { + mod = list_entry(res, struct module, list); + _stp_ins_module(mod); + res = modules_op->next(NULL, res, &pos); + } + + if (register_module_notifier(&_stp_module_load_nb)) + errk("failed to load module notifier\n"); + + /* unlocks the list */ + modules_op->stop(NULL, NULL); + +#ifdef STP_USE_DWARF_UNWINDER + /* now that we have all the modules, ask for their unwind info */ + { + unsigned long flags; + int i, left = STP_CTL_BUFFER_SIZE; + char buf[STP_CTL_BUFFER_SIZE]; + char *ptr = buf; + *ptr = 0; + + STP_RLOCK_MODULES; + /* Loop through modules, sending module names packed into */ + /* messages of size STP_CTL_BUFFER. */ + for (i = 0; i < _stp_num_modules; i++) { + char *name = _stp_modules[i]->name; + int len = strlen(name); + if (len >= left) { + _stp_ctl_send(STP_UNWIND, buf, sizeof(buf) - left); + ptr = buf; + left = STP_CTL_BUFFER_SIZE; + } + strlcpy(ptr, name, left); + ptr += len + 1; + left -= len + 1; + } + STP_RUNLOCK_MODULES; + + /* Send terminator. When we get this back from stapio */ + /* that means all the unwind info has been sent. */ + strlcpy(ptr, "*", left); + left -= 2; + _stp_ctl_send(STP_UNWIND, buf, sizeof(buf) - left); + } +#else + /* done with modules, now go */ + _stp_ctl_send(STP_TRANSPORT, NULL, 0); +#endif /* STP_USE_DWARF_UNWINDER */ + + return 0; +} + +#endif /* _STP_SYMBOLS_C_ */ diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c index 8335e44b..a4e4e652 100644 --- a/runtime/transport/transport.c +++ b/runtime/transport/transport.c @@ -23,83 +23,47 @@ #include "../procfs.c" static struct utt_trace *_stp_utt = NULL; - +static unsigned int utt_seq = 1; +static int _stp_probes_started = 0; +pid_t _stp_target = 0; +static int _stp_exit_called = 0; +int _stp_exit_flag = 0; #ifdef STP_OLD_TRANSPORT #include "relayfs.c" +#include "procfs.c" #else #include "utt.c" +#include "control.c" #endif -static unsigned int utt_seq = 1; - -static int _stp_probes_started = 0; - /* module parameters */ static int _stp_bufsize; module_param(_stp_bufsize, int, 0); MODULE_PARM_DESC(_stp_bufsize, "buffer size"); -pid_t _stp_target = 0; -static int _stp_exit_called = 0; -int _stp_exit_flag = 0; - /* forward declarations */ void probe_exit(void); int probe_start(void); void _stp_exit(void); -void _stp_handle_start (struct _stp_msg_start *st); -static void _stp_detach(void); -static void _stp_attach(void); /* check for new workqueue API */ -#ifdef DECLARE_DELAYED_WORK -static void _stp_work_queue (struct work_struct *data); +#ifdef DECLARE_DELAYED_WORK +static void _stp_work_queue(struct work_struct *data); static DECLARE_DELAYED_WORK(_stp_work, _stp_work_queue); #else -static void _stp_work_queue (void *data); +static void _stp_work_queue(void *data); static DECLARE_WORK(_stp_work, _stp_work_queue, NULL); #endif static struct workqueue_struct *_stp_wq; -static void _stp_ask_for_symbols(void); - -#ifdef STP_OLD_TRANSPORT -#include "procfs.c" -#else -#include "control.c" -#endif - -static void _stp_ask_for_symbols(void) -{ - struct _stp_msg_symbol req; - struct _stp_module mod; - static int sent_symbols = 0; - - if (sent_symbols == 0) { - /* ask for symbols and modules */ - kbug(DEBUG_SYMBOLS|DEBUG_TRANSPORT, "AFS\n"); - - req.endian = 0x1234; - req.ptr_size = sizeof(char *); - _stp_ctl_send(STP_SYMBOLS, &req, sizeof(req)); - - strcpy(mod.name, ""); - _stp_ctl_send(STP_MODULE, &mod, sizeof(mod)); - sent_symbols = 1; - } -} /* * _stp_handle_start - handle STP_START */ -void _stp_handle_start (struct _stp_msg_start *st) +void _stp_handle_start(struct _stp_msg_start *st) { - kbug (DEBUG_TRANSPORT, "stp_handle_start\n"); - - if (register_module_notifier(&_stp_module_load_nb)) - errk("failed to load module notifier\n"); - + dbug_trans(1, "stp_handle_start\n"); _stp_target = st->target; st->res = probe_start(); if (st->res >= 0) @@ -108,16 +72,14 @@ void _stp_handle_start (struct _stp_msg_start *st) _stp_ctl_send(STP_START, st, sizeof(*st)); } - /* common cleanup code. */ /* This is called from the kernel thread when an exit was requested */ -/* by staprun or the exit() function. It is also called by transport_close() */ -/* when the module is removed. In that case "dont_rmmod" is set to 1. */ +/* by staprun or the exit() function. */ /* We need to call it both times because we want to clean up properly */ /* when someone does /sbin/rmmod on a loaded systemtap module. */ -static void _stp_cleanup_and_exit (int dont_rmmod) +static void _stp_cleanup_and_exit(int send_exit) { - kbug(DEBUG_TRANSPORT, "cleanup_and_exit (%d)\n", dont_rmmod); + dbug_trans(1, "cleanup_and_exit (%d)\n", send_exit); if (!_stp_exit_called) { int failures; @@ -128,23 +90,24 @@ static void _stp_cleanup_and_exit (int dont_rmmod) _stp_exit_called = 1; if (_stp_probes_started) { - kbug(DEBUG_TRANSPORT, "calling probe_exit\n"); + dbug_trans(1, "calling probe_exit\n"); /* tell the stap-generated code to unload its probes, etc */ probe_exit(); - kbug(DEBUG_TRANSPORT, "done with probe_exit\n"); + dbug_trans(1, "done with probe_exit\n"); } failures = atomic_read(&_stp_transport_failures); if (failures) - _stp_warn ("There were %d transport failures.\n", failures); + _stp_warn("There were %d transport failures.\n", failures); - kbug(DEBUG_TRANSPORT, "************** calling startstop 0 *************\n"); - if (_stp_utt) utt_trace_startstop(_stp_utt, 0, &utt_seq); + dbug_trans(1, "************** calling startstop 0 *************\n"); + if (_stp_utt) + utt_trace_startstop(_stp_utt, 0, &utt_seq); - kbug(DEBUG_TRANSPORT, "ctl_send STP_EXIT\n"); - /* tell staprun to exit (if it is still there) */ - _stp_ctl_send(STP_EXIT, &dont_rmmod, sizeof(int)); - kbug(DEBUG_TRANSPORT, "done with ctl_send STP_EXIT\n"); + dbug_trans(1, "ctl_send STP_EXIT\n"); + if (send_exit) + _stp_ctl_send(STP_EXIT, NULL, 0); + dbug_trans(1, "done with ctl_send STP_EXIT\n"); } } @@ -153,7 +116,7 @@ static void _stp_cleanup_and_exit (int dont_rmmod) */ static void _stp_detach(void) { - kbug(DEBUG_TRANSPORT, "detach\n"); + dbug_trans(1, "detach\n"); _stp_attached = 0; _stp_pid = 0; @@ -169,10 +132,10 @@ static void _stp_detach(void) */ static void _stp_attach(void) { - kbug(DEBUG_TRANSPORT, "attach\n"); + dbug_trans(1, "attach\n"); _stp_attached = 1; _stp_pid = current->pid; - utt_set_overwrite(0); + utt_set_overwrite(0); queue_delayed_work(_stp_wq, &_stp_work, STP_WORK_TIMER); } @@ -180,10 +143,10 @@ static void _stp_attach(void) * _stp_work_queue - periodically check for IO or exit * This is run by a kernel thread and may sleep. */ -#ifdef DECLARE_DELAYED_WORK -static void _stp_work_queue (struct work_struct *data) +#ifdef DECLARE_DELAYED_WORK +static void _stp_work_queue(struct work_struct *data) #else -static void _stp_work_queue (void *data) +static void _stp_work_queue(void *data) #endif { int do_io = 0; @@ -198,7 +161,7 @@ static void _stp_work_queue (void *data) /* if exit flag is set AND we have finished with probe_start() */ if (unlikely(_stp_exit_flag)) - _stp_cleanup_and_exit(0); + _stp_cleanup_and_exit(1); else if (likely(_stp_attached)) queue_delayed_work(_stp_wq, &_stp_work, STP_WORK_TIMER); } @@ -211,19 +174,19 @@ static void _stp_work_queue (void *data) */ void _stp_transport_close() { - kbug(DEBUG_TRANSPORT, "%d: ************** transport_close *************\n", current->pid); - _stp_cleanup_and_exit(1); + dbug_trans(1, "%d: ************** transport_close *************\n", current->pid); + _stp_cleanup_and_exit(0); destroy_workqueue(_stp_wq); _stp_unregister_ctl_channel(); - if (_stp_utt) utt_trace_remove(_stp_utt); + if (_stp_utt) + utt_trace_remove(_stp_utt); _stp_free_modules(); _stp_kill_time(); - _stp_print_cleanup(); /* free print buffers */ + _stp_print_cleanup(); /* free print buffers */ _stp_mem_debug_done(); - kbug(DEBUG_TRANSPORT, "---- CLOSED ----\n"); + dbug_trans(1, "---- CLOSED ----\n"); } - static struct utt_trace *_stp_utt_open(void) { struct utt_trace_setup utts; @@ -249,22 +212,26 @@ int _stp_transport_init(void) { int ret; - kbug(DEBUG_TRANSPORT, "transport_init\n"); + dbug_trans(1, "transport_init\n"); _stp_init_pid = current->pid; _stp_uid = current->uid; _stp_gid = current->gid; #ifdef RELAY_GUEST - /* Guest scripts use relay only for reporting warnings and errors */ - _stp_subbuf_size = 65536; - _stp_nsubbufs = 2; + /* Guest scripts use relay only for reporting warnings and errors */ + _stp_subbuf_size = 65536; + _stp_nsubbufs = 2; #endif if (_stp_bufsize) { unsigned size = _stp_bufsize * 1024 * 1024; - _stp_subbuf_size = ((size >> 2) + 1) * 65536; + _stp_subbuf_size = 65536; + while (size / _stp_subbuf_size > 64 && + _stp_subbuf_size < 1024 * 1024) { + _stp_subbuf_size <<= 1; + } _stp_nsubbufs = size / _stp_subbuf_size; - kbug(DEBUG_TRANSPORT, "Using %d subbufs of size %d\n", _stp_nsubbufs, _stp_subbuf_size); + dbug_trans(1, "Using %d subbufs of size %d\n", _stp_nsubbufs, _stp_subbuf_size); } /* initialize timer code */ @@ -286,41 +253,57 @@ int _stp_transport_init(void) if (_stp_print_init() < 0) goto err2; + /* start transport */ utt_trace_startstop(_stp_utt, 1, &utt_seq); /* create workqueue of kernel threads */ _stp_wq = create_workqueue("systemtap"); if (!_stp_wq) goto err3; + + _stp_transport_state = 1; + + dbug_trans(1, "calling init_kernel_symbols\n"); + if (_stp_init_kernel_symbols() < 0) + goto err4; + + dbug_trans(1, "calling init_modules\n"); + if (_stp_init_modules() < 0) + goto err4; + return 0; +err4: + errk("failed to initialize modules\n"); + _stp_free_modules(); + destroy_workqueue(_stp_wq); err3: _stp_print_cleanup(); err2: _stp_unregister_ctl_channel(); err1: - if (_stp_utt) utt_trace_remove(_stp_utt); + if (_stp_utt) + utt_trace_remove(_stp_utt); err0: _stp_kill_time(); return -1; } - static inline void _stp_lock_inode(struct inode *inode) { #ifdef DEFINE_MUTEX - mutex_lock(&inode->i_mutex); + mutex_lock(&inode->i_mutex); #else - down(&inode->i_sem); + down(&inode->i_sem); #endif } static inline void _stp_unlock_inode(struct inode *inode) { #ifdef DEFINE_MUTEX - mutex_unlock(&inode->i_mutex); + mutex_unlock(&inode->i_mutex); #else - up(&inode->i_sem); + up(&inode->i_sem); #endif } @@ -358,7 +341,8 @@ static void _stp_unlock_debugfs(void) /* utt.c and relayfs.c. Will not be necessary if utt is included */ /* in the kernel. */ -static struct dentry *_stp_get_root_dir(const char *name) { +static struct dentry *_stp_get_root_dir(const char *name) +{ struct file_system_type *fs; struct dentry *root; struct super_block *sb; @@ -377,7 +361,6 @@ static struct dentry *_stp_get_root_dir(const char *name) { errk("Couldn't lock transport directory.\n"); return NULL; } - #ifdef STP_OLD_TRANSPORT root = relayfs_create_dir(name, NULL); #else @@ -389,12 +372,11 @@ static struct dentry *_stp_get_root_dir(const char *name) { _stp_lock_inode(sb->s_root->d_inode); root = lookup_one_len(name, sb->s_root, strlen(name)); _stp_unlock_inode(sb->s_root->d_inode); - kbug(DEBUG_TRANSPORT, "root=%p\n", root); if (!IS_ERR(root)) dput(root); else { root = NULL; - kbug(DEBUG_TRANSPORT, "Could not create or find transport directory.\n"); + errk("Could not create or find transport directory.\n"); } } _stp_unlock_debugfs(); diff --git a/runtime/transport/transport.h b/runtime/transport/transport.h index 6dc00d2b..dc499961 100644 --- a/runtime/transport/transport.h +++ b/runtime/transport/transport.h @@ -7,21 +7,37 @@ #include "transport_msgs.h" -void _stp_warn (const char *fmt, ...); - +/* The size of print buffers. This limits the maximum */ +/* amount of data a print can send. */ #define STP_BUFFER_SIZE 8192 +/* STP_CTL_BUFFER_SIZE is the maximum size of a message */ +/* exchanged on the control channel. */ +#ifdef STP_OLD_TRANSPORT +/* Old transport sends print output on control channel */ +#define STP_CTL_BUFFER_SIZE STP_BUFFER_SIZE +#else +#define STP_CTL_BUFFER_SIZE 256 +#endif + /* how often the work queue wakes up and checks buffers */ #define STP_WORK_TIMER (HZ/100) static unsigned _stp_nsubbufs = 8; static unsigned _stp_subbuf_size = 65536*4; + +void _stp_warn (const char *fmt, ...); extern void _stp_transport_close(void); extern int _stp_print_init(void); extern void _stp_print_cleanup(void); static struct dentry *_stp_get_root_dir(const char *name); static int _stp_lock_debugfs(void); static void _stp_unlock_debugfs(void); +static int _stp_ctl_send(int type, void *data, int len); +static void _stp_attach(void); +static void _stp_detach(void); +void _stp_handle_start(struct _stp_msg_start *st); + int _stp_pid = 0; uid_t _stp_uid = 0; gid_t _stp_gid = 0; diff --git a/runtime/transport/transport_msgs.h b/runtime/transport/transport_msgs.h index 55de2d4a..5f385565 100644 --- a/runtime/transport/transport_msgs.h +++ b/runtime/transport/transport_msgs.h @@ -23,8 +23,7 @@ enum STP_EXIT, STP_OOB_DATA, STP_SYSTEM, - STP_SYMBOLS, - STP_MODULE, + STP_UNWIND, STP_TRANSPORT, STP_CONNECT, STP_DISCONNECT, @@ -36,18 +35,16 @@ enum STP_SUBBUFS_CONSUMED, STP_REALTIME_DATA, #endif - STP_MAX_CMD }; -#ifdef DEBUG_TRANSPORT +#ifdef DEBUG_TRANS static const char *_stp_command_name[] = { "STP_START", "STP_EXIT", "STP_OOB_DATA", "STP_SYSTEM", - "STP_SYMBOLS", - "STP_MODULE", + "STP_UNWIND", "STP_TRANSPORT", "STP_CONNECT", "STP_DISCONNECT", @@ -59,68 +56,34 @@ static const char *_stp_command_name[] = { "STP_REALTIME_DATA", #endif }; -#endif /* DEBUG_TRANSPORT */ +#endif /* DEBUG_TRANS */ /* control channel messages */ -/* command to execute: sent to staprun */ +/* command to execute: module->stapio */ struct _stp_msg_cmd { char cmd[128]; }; -/* request for symbol data. sent to staprun */ -struct _stp_msg_symbol +/* Unwind data. stapio->module */ +struct _stp_msg_unwind { - int32_t endian; - int32_t ptr_size; + /* the module name, or "*" for all */ + char name[STP_MODULE_NAME_LEN]; + /* length of unwind data */ + uint32_t unwind_len; + /* data ...*/ }; /* Request to start probes. */ -/* Sent from staprun. Then returned from module. */ +/* stapio->module->stapio */ struct _stp_msg_start { pid_t target; int32_t res; // for reply: result of probe_start() }; -struct _stp_symbol32 -{ - uint32_t addr; - uint32_t symbol; -}; - -struct _stp_symbol64 -{ - uint64_t addr; - uint64_t symbol; -}; - -struct _stp_msg_symbol_hdr -{ - uint32_t num_syms; - uint32_t sym_size; - uint32_t unwind_size; -}; - -struct _stp_msg_module { - /* the module name, or "" for kernel */ - char name[STP_MODULE_NAME_LEN]; - - /* A pointer to the struct module */ - uint64_t module; - - /* the start of the module's text and data sections */ - uint64_t text; - uint64_t data; - - /* how many sections this module has */ - uint32_t num_sections; - - /* length of unwind data */ - uint32_t unwind_len; -}; - #ifdef STP_OLD_TRANSPORT /**** for compatibility with old relayfs ****/ struct _stp_buf_info diff --git a/runtime/unwind.c b/runtime/unwind.c new file mode 100644 index 00000000..aa270cad --- /dev/null +++ b/runtime/unwind.c @@ -0,0 +1,964 @@ +/* -*- linux-c -*- + * kernel stack unwinding + * Copyright (C) 2008 Red Hat Inc. + * + * Based on old kernel code that is + * Copyright (C) 2002-2006 Novell, Inc. + * Jan Beulich <jbeulich@novell.com> + * + * This code is released under version 2 of the GNU GPL. + * + * This code currently does stack unwinding in the + * kernel and modules. It will need some extension to handle + * userspace unwinding. + */ + +#include "unwind/unwind.h" + +#ifdef STP_USE_DWARF_UNWINDER + +struct eh_frame_hdr_table_entry { + unsigned long start, fde; +}; + +static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2) +{ + const struct eh_frame_hdr_table_entry *e1 = p1; + const struct eh_frame_hdr_table_entry *e2 = p2; + return (e1->start > e2->start) - (e1->start < e2->start); +} + +static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) +{ + struct eh_frame_hdr_table_entry *e1 = p1; + struct eh_frame_hdr_table_entry *e2 = p2; + unsigned long v; + + v = e1->start; + e1->start = e2->start; + e2->start = v; + v = e1->fde; + e1->fde = e2->fde; + e2->fde = v; +} + +/* Build a binary-searchable unwind header. Also do some + * validity checks. In the future we might use */ +/* .eh_frame_hdr if it is already present. */ +static void _stp_create_unwind_hdr(struct _stp_module *m) +{ + const u8 *ptr; + unsigned long tableSize, hdrSize, last; + unsigned n = 0; + const u32 *fde; + int bad_order = 0; + struct { + u8 version; + u8 eh_frame_ptr_enc; + u8 fde_count_enc; + u8 table_enc; + unsigned long eh_frame_ptr; + unsigned int fde_count; + struct eh_frame_hdr_table_entry table[]; + } __attribute__ ((__packed__)) * header = NULL; + + /* already did this or no data? */ + if (m->unwind_hdr || m->unwind_data_len == 0) + return; + + tableSize = m->unwind_data_len; + if (tableSize & (sizeof(*fde) - 1)) { + dbug_unwind(1, "tableSize=0x%x not a multiple of 0x%x\n", (int)tableSize, (int)sizeof(*fde)); + goto bad; + } + + /* count the FDEs */ + for (fde = m->unwind_data; + tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde; + tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { + signed ptrType; + const u32 *cie; + + /* check for extended length */ + if ((*fde & 0xfffffff0) == 0xfffffff0) { + dbug_unwind(1, "Module %s has extended-length CIE or FDE."); + dbug_unwind(1, "This is not supported at this time."); + goto bad; + } + cie = cie_for_fde(fde, m); + if (cie == ¬_fde) + continue; /* fde was a CIE. That's OK, just skip it. */ + if (cie == NULL || cie == &bad_cie || (ptrType = fde_pointer_type(cie)) < 0) + goto bad; + /* we have a real FDE */ + ptr = (const u8 *)(fde + 2); + if (!read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType)) + goto bad; + ++n; + } + + if (tableSize || !n) { + dbug_unwind(1, "%s: tableSize=%ld, n=%d\n", m->name, tableSize, n); + goto bad; + } + + hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) + 2 * n * sizeof(unsigned long); + header = _stp_kmalloc(hdrSize); + if (header == NULL) { + header = _stp_vmalloc(hdrSize); + if (header == NULL) + return; + m->allocated.unwind_hdr = 1; + } + + header->version = 1; + header->eh_frame_ptr_enc = DW_EH_PE_absptr; + header->fde_count_enc = DW_EH_PE_data4; + header->table_enc = DW_EH_PE_absptr; + _stp_put_unaligned((unsigned long)m->unwind_data, &header->eh_frame_ptr); + + BUILD_BUG_ON(offsetof(typeof(*header), fde_count) + % __alignof(typeof(header->fde_count))); + header->fde_count = n; + + BUILD_BUG_ON(offsetof(typeof(*header), table) % __alignof(typeof(*header->table))); + + n = 0; + last = 0; + tableSize = m->unwind_data_len; + for (fde = m->unwind_data; tableSize; tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { + const u32 *cie = cie_for_fde(fde, m); + if (cie == ¬_fde) + continue; + if (cie == NULL || cie == &bad_cie) + goto bad; + /* we have a real FDE */ + ptr = (const u8 *)(fde + 2); + header->table[n].start = read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, fde_pointer_type(cie)); + header->table[n].fde = (unsigned long)fde; + if (header->table[n].start < last) + bad_order++; + last = header->table[n].start; + ++n; + } + WARN_ON(n != header->fde_count); + + /* Is sort ever necessary? */ + if (bad_order) + _stp_sort(header->table, n, sizeof(*header->table), cmp_eh_frame_hdr_table_entries, + swap_eh_frame_hdr_table_entries); + + m->unwind_hdr_len = hdrSize; + m->unwind_hdr = header; + return; + + /* unwind data is not acceptable. free it and return */ +bad: + dbug_unwind(1, "unwind data for %s is unacceptable. Freeing.", m->name); + if (header) { + if (m->allocated.unwind_hdr) { + m->allocated.unwind_hdr = 0; + _stp_vfree(header); + } else + _stp_kfree(header); + } + if (m->unwind_data) { + if (m->allocated.unwind_data) + _stp_vfree(m->unwind_data); + else + _stp_kfree(m->unwind_data); + m->unwind_data = NULL; + m->unwind_data_len = 0; + } + return; +} + +static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) +{ + const u8 *cur = *pcur; + uleb128_t value = 0; + unsigned shift; + + for (shift = 0; cur < end; shift += 7) { + if (shift + 7 > 8 * sizeof(value) + && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { + cur = end + 1; + break; + } + value |= (uleb128_t)(*cur & 0x7f) << shift; + if (!(*cur++ & 0x80)) + break; + } + *pcur = cur; + + return value; +} + +static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) +{ + const u8 *cur = *pcur; + sleb128_t value = 0; + unsigned shift; + + for (shift = 0; cur < end; shift += 7) { + if (shift + 7 > 8 * sizeof(value) + && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { + cur = end + 1; + break; + } + value |= (sleb128_t)(*cur & 0x7f) << shift; + if (!(*cur & 0x80)) { + value |= -(*cur++ & 0x40) << shift; + break; + } + } + *pcur = cur; + + return value; +} + +/* given an FDE, find its CIE */ +static const u32 *cie_for_fde(const u32 *fde, const struct _stp_module *m) +{ + const u32 *cie; + + /* check that length is proper */ + if (!*fde || (*fde & (sizeof(*fde) - 1))) + return &bad_cie; + + /* CIE id for eh_frame is 0, otherwise 0xffffffff */ + if (m->unwind_is_ehframe && fde[1] == 0) + return ¬_fde; + else if (fde[1] == 0xffffffff) + return ¬_fde; + + /* OK, must be an FDE. Now find its CIE. */ + + /* CIE_pointer must be a proper offset */ + if ((fde[1] & (sizeof(*fde) - 1)) || fde[1] > (unsigned long)(fde + 1) - (unsigned long)m->unwind_data) { + dbug_unwind(1, "fde[1]=%lx fde+1=%lx, unwind_data=%lx %lx\n", + (unsigned long)fde[1], (unsigned long)(fde + 1), + (unsigned long)m->unwind_data, (unsigned long)(fde + 1) - (unsigned long)m->unwind_data); + return NULL; /* this is not a valid FDE */ + } + + /* cie pointer field is different in eh_frame vs debug_frame */ + if (m->unwind_is_ehframe) + cie = fde + 1 - fde[1] / sizeof(*fde); + else + cie = m->unwind_data + fde[1]; + + if (*cie <= sizeof(*cie) + 4 || *cie >= fde[1] - sizeof(*fde) + || (*cie & (sizeof(*cie) - 1)) + || (cie[1] != 0xffffffff && cie[1] != 0)) { + dbug_unwind(1, "cie is not valid %lx %x %x %x\n", cie, *cie, fde[1], cie[1]); + return NULL; /* this is not a (valid) CIE */ + } + + return cie; +} + +/* read an encoded pointer */ +static unsigned long read_pointer(const u8 **pLoc, const void *end, signed ptrType) +{ + unsigned long value = 0; + union { + const u8 *p8; + const u16 *p16u; + const s16 *p16s; + const u32 *p32u; + const s32 *p32s; + const unsigned long *pul; + } ptr; + + if (ptrType < 0 || ptrType == DW_EH_PE_omit) + return 0; + + ptr.p8 = *pLoc; + switch (ptrType & DW_EH_PE_FORM) { + case DW_EH_PE_data2: + if (end < (const void *)(ptr.p16u + 1)) + return 0; + if (ptrType & DW_EH_PE_signed) + value = _stp_get_unaligned(ptr.p16s++); + else + value = _stp_get_unaligned(ptr.p16u++); + break; + case DW_EH_PE_data4: +#ifdef CONFIG_64BIT + if (end < (const void *)(ptr.p32u + 1)) + return 0; + if (ptrType & DW_EH_PE_signed) + value = _stp_get_unaligned(ptr.p32s++); + else + value = _stp_get_unaligned(ptr.p32u++); + break; + case DW_EH_PE_data8: + BUILD_BUG_ON(sizeof(u64) != sizeof(value)); +#else + BUILD_BUG_ON(sizeof(u32) != sizeof(value)); +#endif + case DW_EH_PE_absptr: + if (end < (const void *)(ptr.pul + 1)) + return 0; + value = _stp_get_unaligned(ptr.pul++); + break; + case DW_EH_PE_leb128: + BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value)); + value = ptrType & DW_EH_PE_signed ? get_sleb128(&ptr.p8, end) + : get_uleb128(&ptr.p8, end); + if ((const void *)ptr.p8 > end) + return 0; + break; + default: + return 0; + } + switch (ptrType & DW_EH_PE_ADJUST) { + case DW_EH_PE_absptr: + break; + case DW_EH_PE_pcrel: + value += (unsigned long)*pLoc; + break; + default: + return 0; + } + if ((ptrType & DW_EH_PE_indirect) + && _stp_read_address(value, (unsigned long *)value, KERNEL_DS)) + return 0; + *pLoc = ptr.p8; + + return value; +} + +static signed fde_pointer_type(const u32 *cie) +{ + const u8 *ptr = (const u8 *)(cie + 2); + unsigned version = *ptr; + + if (version != 1) + return -1; /* unsupported */ + if (*++ptr) { + const char *aug; + const u8 *end = (const u8 *)(cie + 1) + *cie; + uleb128_t len; + + /* check if augmentation size is first (and thus present) */ + if (*ptr != 'z') + return -1; + /* check if augmentation string is nul-terminated */ + if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL) + return -1; + ++ptr; /* skip terminator */ + get_uleb128(&ptr, end); /* skip code alignment */ + get_sleb128(&ptr, end); /* skip data alignment */ + /* skip return address column */ + version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end); + len = get_uleb128(&ptr, end); /* augmentation length */ + if (ptr + len < ptr || ptr + len > end) + return -1; + end = ptr + len; + while (*++aug) { + if (ptr >= end) + return -1; + switch (*aug) { + case 'L': + ++ptr; + break; + case 'P':{ + signed ptrType = *ptr++; + + if (!read_pointer(&ptr, end, ptrType) || ptr > end) + return -1; + } + break; + case 'R': + return *ptr; + default: + return -1; + } + } + } + return DW_EH_PE_absptr; +} + +static int advance_loc(unsigned long delta, struct unwind_state *state) +{ + state->loc += delta * state->codeAlign; + dbug_unwind(1, "state->loc=%lx\n", state->loc); + return delta > 0; +} + +static void set_rule(uleb128_t reg, enum item_location where, uleb128_t value, struct unwind_state *state) +{ + dbug_unwind(1, "reg=%d, where=%d, value=%lx\n", reg, where, value); + if (reg < ARRAY_SIZE(state->regs)) { + state->regs[reg].where = where; + state->regs[reg].value = value; + } +} + +static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc, signed ptrType, struct unwind_state *state) +{ + union { + const u8 *p8; + const u16 *p16; + const u32 *p32; + } ptr; + int result = 1; + + dbug_unwind(1, "targetLoc=%lx state->loc=%lx\n", targetLoc, state->loc); + if (start != state->cieStart) { + state->loc = state->org; + result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state); + if (targetLoc == 0 && state->label == NULL) + return result; + } + + for (ptr.p8 = start; result && ptr.p8 < end;) { + switch (*ptr.p8 >> 6) { + uleb128_t value; + case 0: + switch (*ptr.p8++) { + case DW_CFA_nop: + dbug_unwind(1, "DW_CFA_nop\n"); + break; + case DW_CFA_set_loc: + if ((state->loc = read_pointer(&ptr.p8, end, ptrType)) == 0) + result = 0; + dbug_unwind(1, "DW_CFA_set_loc %lx (result=%d)\n", state->loc, result); + break; + case DW_CFA_advance_loc1: + result = ptr.p8 < end && advance_loc(*ptr.p8++, state); + dbug_unwind(1, "DW_CFA_advance_loc1 %d\n", result); + break; + case DW_CFA_advance_loc2: + result = ptr.p8 <= end + 2 && advance_loc(*ptr.p16++, state); + dbug_unwind(1, "DW_CFA_advance_loc2 %d\n", result); + break; + case DW_CFA_advance_loc4: + result = ptr.p8 <= end + 4 && advance_loc(*ptr.p32++, state); + dbug_unwind(1, "DW_CFA_advance_loc4 %d\n", result); + break; + case DW_CFA_offset_extended: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); + dbug_unwind(1, "DW_CFA_offset_extended\n"); + break; + case DW_CFA_val_offset: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Value, get_uleb128(&ptr.p8, end), state); + dbug_unwind(1, "DW_CFA_val_offset\n"); + break; + case DW_CFA_offset_extended_sf: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Memory, get_sleb128(&ptr.p8, end), state); + dbug_unwind(1, "DW_CFA_offset_extended_sf\n"); + break; + case DW_CFA_val_offset_sf: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Value, get_sleb128(&ptr.p8, end), state); + dbug_unwind(1, "DW_CFA_val_offset_sf\n"); + break; + case DW_CFA_restore_extended: + case DW_CFA_undefined: + case DW_CFA_same_value: + set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state); + dbug_unwind(1, "DW_CFA_undefined\n"); + break; + case DW_CFA_register: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Register, get_uleb128(&ptr.p8, end), state); + dbug_unwind(1, "DW_CFA_register\n"); + break; + case DW_CFA_remember_state: + dbug_unwind(1, "DW_CFA_remember_state\n"); + if (ptr.p8 == state->label) { + state->label = NULL; + return 1; + } + if (state->stackDepth >= MAX_STACK_DEPTH) + return 0; + state->stack[state->stackDepth++] = ptr.p8; + break; + case DW_CFA_restore_state: + dbug_unwind(1, "DW_CFA_restore_state\n"); + if (state->stackDepth) { + const uleb128_t loc = state->loc; + const u8 *label = state->label; + + state->label = state->stack[state->stackDepth - 1]; + memcpy(&state->cfa, &badCFA, sizeof(state->cfa)); + memset(state->regs, 0, sizeof(state->regs)); + state->stackDepth = 0; + result = processCFI(start, end, 0, ptrType, state); + state->loc = loc; + state->label = label; + } else + return 0; + break; + case DW_CFA_def_cfa: + state->cfa.reg = get_uleb128(&ptr.p8, end); + dbug_unwind(1, "DW_CFA_def_cfa reg=%ld\n", state->cfa.reg); + /*nobreak */ + case DW_CFA_def_cfa_offset: + state->cfa.offs = get_uleb128(&ptr.p8, end); + dbug_unwind(1, "DW_CFA_def_cfa_offset offs=%lx\n", state->cfa.offs); + break; + case DW_CFA_def_cfa_sf: + state->cfa.reg = get_uleb128(&ptr.p8, end); + dbug_unwind(1, "DW_CFA_def_cfa_sf reg=%ld\n", state->cfa.reg); + /*nobreak */ + case DW_CFA_def_cfa_offset_sf: + state->cfa.offs = get_sleb128(&ptr.p8, end) * state->dataAlign; + dbug_unwind(1, "DW_CFA_def_cfa_offset_sf offs=%lx\n", state->cfa.offs); + break; + case DW_CFA_def_cfa_register: + state->cfa.reg = get_uleb128(&ptr.p8, end); + dbug_unwind(1, "DW_CFA_def_cfa_register reg=%ld\n", state->cfa.reg); + break; + /*todo case DW_CFA_def_cfa_expression: */ + /*todo case DW_CFA_expression: */ + /*todo case DW_CFA_val_expression: */ + case DW_CFA_GNU_args_size: + get_uleb128(&ptr.p8, end); + dbug_unwind(1, "DW_CFA_GNU_args_size\n"); + break; + case DW_CFA_GNU_negative_offset_extended: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Memory, (uleb128_t)0 - get_uleb128(&ptr.p8, end), state); + dbug_unwind(1, "DW_CFA_GNU_negative_offset_extended\n"); + break; + case DW_CFA_GNU_window_save: + default: + dbug_unwind(1, "unimplemented call frame instruction: 0x%x\n", *(ptr.p8 - 1)); + result = 0; + break; + } + break; + case 1: + result = advance_loc(*ptr.p8++ & 0x3f, state); + dbug_unwind(1, "case 1\n"); + break; + case 2: + value = *ptr.p8++ & 0x3f; + set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); + dbug_unwind(1, "case 2\n"); + break; + case 3: + set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); + dbug_unwind(1, "case 3\n"); + break; + } + dbug_unwind(1, "targetLoc=%lx state->loc=%lx\n", targetLoc, state->loc); + if (ptr.p8 > end) + result = 0; + if (result && targetLoc != 0 && targetLoc < state->loc) + return 1; + } + return result && ptr.p8 == end && (targetLoc == 0 || state->label == NULL); +} + +/* If we previously created an unwind header, then use it now to binary search */ +/* for the FDE corresponding to pc. */ + +static u32 *_stp_search_unwind_hdr(unsigned long pc, struct _stp_module *m) +{ + const u8 *ptr, *end, *hdr = m->unwind_hdr; + unsigned long startLoc; + u32 *fde = NULL; + unsigned num, tableSize, t2; + + if (hdr == NULL || hdr[0] != 1) + return NULL; + + dbug_unwind(1, "search for %lx", pc); + + /* table_enc */ + switch (hdr[3] & DW_EH_PE_FORM) { + case DW_EH_PE_absptr: + tableSize = sizeof(unsigned long); + break; + case DW_EH_PE_data2: + tableSize = 2; + break; + case DW_EH_PE_data4: + tableSize = 4; + break; + case DW_EH_PE_data8: + tableSize = 8; + break; + default: + dbug_unwind(1, "bad table encoding"); + return NULL; + } + ptr = hdr + 4; + end = hdr + m->unwind_hdr_len; + + if (read_pointer(&ptr, end, hdr[1]) != (unsigned long)m->unwind_data) { + dbug_unwind(1, "eh_frame_ptr not valid"); + return NULL; + } + + num = read_pointer(&ptr, end, hdr[2]); + if (num == 0 || num != (end - ptr) / (2 * tableSize) || (end - ptr) % (2 * tableSize)) { + dbug_unwind(1, "Bad num=%d end-ptr=%ld 2*tableSize=%d", num, end - ptr, 2 * tableSize); + return NULL; + } + + do { + const u8 *cur = ptr + (num / 2) * (2 * tableSize); + startLoc = read_pointer(&cur, cur + tableSize, hdr[3]); + if (pc < startLoc) + num /= 2; + else { + ptr = cur - tableSize; + num = (num + 1) / 2; + } + } while (startLoc && num > 1); + + if (num == 1 && (startLoc = read_pointer(&ptr, ptr + tableSize, hdr[3])) != 0 && pc >= startLoc) + fde = (void *)read_pointer(&ptr, ptr + tableSize, hdr[3]); + + dbug_unwind(1, "returning fde=%lx startLoc=%lx", fde, startLoc); + return fde; +} + +#ifdef DEBUG_UNWIND +static const char *_stp_enc_hi_name[] = { + "", + "DW_EH_PE_pcrel", + "DW_EH_PE_textrel", + "DW_EH_PE_datarel", + "DW_EH_PE_funcrel", + "DW_EH_PE_aligned" +}; +static const char *_stp_enc_lo_name[] = { + "DW_EH_PE_absptr", + "DW_EH_PE_uleb128", + "DW_EH_PE_udata2", + "DW_EH_PE_udata4", + "DW_EH_PE_udata8", + "DW_EH_PE_sleb128", + "DW_EH_PE_sdata2", + "DW_EH_PE_sdata4", + "DW_EH_PE_sdata8" +}; +char *_stp_eh_enc_name(signed type) +{ + static char buf[64]; + int hi, low; + if (type == DW_EH_PE_omit) + return "DW_EH_PE_omit"; + + hi = (type & DW_EH_PE_ADJUST) >> 4; + low = type & DW_EH_PE_FORM; + if (hi > 5 || low > 4 || (low == 0 && (type & DW_EH_PE_signed))) { + sprintf(buf, "ERROR:encoding=0x%x", type); + return buf; + } + + buf[0] = 0; + if (type & DW_EH_PE_indirect) + strlcpy(buf, "DW_EH_PE_indirect|", sizeof(buf)); + if (hi) + strlcat(buf, _stp_enc_hi_name[hi], sizeof(buf)); + + if (type & DW_EH_PE_signed) + low += 4; + strlcat(buf, _stp_enc_lo_name[low], sizeof(buf)); + return buf; +} +#endif /* DEBUG_UNWIND */ + +/* Unwind to previous to frame. Returns 0 if successful, negative + * number in case of an error. A positive return means unwinding is finished; + * don't try to fallback to dumping addresses on the stack. */ +int unwind(struct unwind_frame_info *frame) +{ +#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) + const u32 *fde, *cie = NULL; + const u8 *ptr = NULL, *end = NULL; + unsigned long pc = UNW_PC(frame) - frame->call_frame; + unsigned long tableSize, startLoc = 0, endLoc = 0, cfa; + unsigned i; + signed ptrType = -1; + uleb128_t retAddrReg = 0; + struct _stp_module *m; + struct unwind_state state; + + dbug_unwind(1, "pc=%lx, %lx", pc, UNW_PC(frame)); + + if (UNW_PC(frame) == 0) + return -EINVAL; + + m = _stp_get_unwind_info(pc); + if (unlikely(m == NULL)) { + dbug_unwind(1, "No module found for pc=%lx", pc); + return -EINVAL; + } + + if (unlikely(m->unwind_data_len == 0 || m->unwind_data_len & (sizeof(*fde) - 1))) { + dbug_unwind(1, "Module %s: unwind_data_len=%d", m->name, m->unwind_data_len); + goto err; + } + + fde = _stp_search_unwind_hdr(pc, m); + dbug_unwind(1, "%s: fde=%lx\n", m->name, fde); + + /* found the fde, now set startLoc and endLoc */ + if (fde != NULL) { + cie = cie_for_fde(fde, m); + if (likely(cie != NULL && cie != &bad_cie && cie != ¬_fde)) { + ptr = (const u8 *)(fde + 2); + ptrType = fde_pointer_type(cie); + startLoc = read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType); + dbug_unwind(2, "startLoc=%lx, ptrType=%s", startLoc, _stp_eh_enc_name(ptrType)); + if (!(ptrType & DW_EH_PE_indirect)) + ptrType &= DW_EH_PE_FORM | DW_EH_PE_signed; + endLoc = startLoc + read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType); + if (pc > endLoc) { + dbug_unwind(1, "pc (%lx) > endLoc(%lx)\n", pc, endLoc); + goto done; + } + } else { + dbug_unwind(1, "fde found in header, but cie is bad!\n"); + fde = NULL; + } + } + + /* did not a good fde find with binary search, so do slow linear search */ + if (fde == NULL) { + for (fde = m->unwind_data, tableSize = m->unwind_data_len; cie = NULL, tableSize > sizeof(*fde) + && tableSize - sizeof(*fde) >= *fde; tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { + dbug_unwind(3, "fde=%lx tableSize=%d\n", (long)*fde, (int)tableSize); + cie = cie_for_fde(fde, m); + if (cie == &bad_cie) { + cie = NULL; + break; + } + if (cie == NULL || cie == ¬_fde || (ptrType = fde_pointer_type(cie)) < 0) + continue; + + ptr = (const u8 *)(fde + 2); + startLoc = read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType); + dbug_unwind(2, "startLoc=%lx, ptrType=%s", startLoc, _stp_eh_enc_name(ptrType)); + if (!startLoc) + continue; + if (!(ptrType & DW_EH_PE_indirect)) + ptrType &= DW_EH_PE_FORM | DW_EH_PE_signed; + endLoc = startLoc + read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType); + dbug_unwind(3, "endLoc=%lx\n", endLoc); + if (pc >= startLoc && pc < endLoc) + break; + } + } + + dbug_unwind(1, "cie=%lx fde=%lx startLoc=%lx endLoc=%lx\n", cie, fde, startLoc, endLoc); + if (cie == NULL || fde == NULL) + goto err; + + /* found the CIE and FDE */ + + memset(&state, 0, sizeof(state)); + state.cieEnd = ptr; /* keep here temporarily */ + ptr = (const u8 *)(cie + 2); + end = (const u8 *)(cie + 1) + *cie; + frame->call_frame = 1; + if ((state.version = *ptr) != 1) { + dbug_unwind(1, "CIE version number is %d. 1 is supported.\n", state.version); + goto err; /* unsupported version */ + } + if (*++ptr) { + /* check if augmentation size is first (and thus present) */ + if (*ptr == 'z') { + while (++ptr < end && *ptr) { + switch (*ptr) { + /* check for ignorable (or already handled) + * nul-terminated augmentation string */ + case 'L': + case 'P': + case 'R': + continue; + case 'S': + dbug_unwind(1, "This is a signal frame\n"); + frame->call_frame = 0; + continue; + default: + break; + } + break; + } + } + if (ptr >= end || *ptr) { + dbug_unwind(1, "Problem parsing the augmentation string.\n"); + goto err; + } + } + ++ptr; + + /* get code aligment factor */ + state.codeAlign = get_uleb128(&ptr, end); + /* get data aligment factor */ + state.dataAlign = get_sleb128(&ptr, end); + if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) + goto err;; + + retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); + + /* skip augmentation */ + if (((const char *)(cie + 2))[1] == 'z') { + uleb128_t augSize = get_uleb128(&ptr, end); + ptr += augSize; + } + if (ptr > end || retAddrReg >= ARRAY_SIZE(reg_info) + || REG_INVALID(retAddrReg) + || reg_info[retAddrReg].width != sizeof(unsigned long)) + goto err; + + state.cieStart = ptr; + ptr = state.cieEnd; + state.cieEnd = end; + end = (const u8 *)(fde + 1) + *fde; + + /* skip augmentation */ + if (((const char *)(cie + 2))[1] == 'z') { + uleb128_t augSize = get_uleb128(&ptr, end); + if ((ptr += augSize) > end) + goto err; + } + + state.org = startLoc; + memcpy(&state.cfa, &badCFA, sizeof(state.cfa)); + /* process instructions */ + if (!processCFI(ptr, end, pc, ptrType, &state) + || state.loc > endLoc || state.regs[retAddrReg].where == Nowhere || state.cfa.reg >= ARRAY_SIZE(reg_info) + || reg_info[state.cfa.reg].width != sizeof(unsigned long) + || state.cfa.offs % sizeof(unsigned long)) + goto err; + + /* update frame */ +#ifndef CONFIG_AS_CFI_SIGNAL_FRAME + if (frame->call_frame && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign)) + frame->call_frame = 0; +#endif + cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs; + startLoc = min((unsigned long)UNW_SP(frame), cfa); + endLoc = max((unsigned long)UNW_SP(frame), cfa); + dbug_unwind(1, "cfa=%lx startLoc=%lx, endLoc=%lx\n", cfa, startLoc, endLoc); + if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) { + startLoc = min(STACK_LIMIT(cfa), cfa); + endLoc = max(STACK_LIMIT(cfa), cfa); + dbug_unwind(1, "cfa startLoc=%p, endLoc=%p\n", (u64)startLoc, (u64)endLoc); + } +#ifndef CONFIG_64BIT +# define CASES CASE(8); CASE(16); CASE(32) +#else +# define CASES CASE(8); CASE(16); CASE(32); CASE(64) +#endif + dbug_unwind(1, "cie=%lx fde=%lx\n", cie, fde); + for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { + if (REG_INVALID(i)) { + if (state.regs[i].where == Nowhere) + continue; + dbug_unwind(2, "REG_INVALID %d\n", i); + goto err; + } + dbug_unwind(2, "register %d. where=%d\n", i, state.regs[i].where); + switch (state.regs[i].where) { + default: + break; + case Register: + if (state.regs[i].value >= ARRAY_SIZE(reg_info) + || REG_INVALID(state.regs[i].value) + || reg_info[i].width > reg_info[state.regs[i].value].width) { + dbug_unwind(2, "case Register bad\n"); + goto err; + } + switch (reg_info[state.regs[i].value].width) { +#define CASE(n) \ + case sizeof(u##n): \ + state.regs[i].value = FRAME_REG(state.regs[i].value, \ + const u##n); \ + break + CASES; +#undef CASE + default: + dbug_unwind(2, "default\n"); + goto err; + } + break; + } + } + for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { + dbug_unwind(2, "register %d. invalid=%d\n", i, REG_INVALID(i)); + if (REG_INVALID(i)) + continue; + dbug_unwind(2, "register %d. where=%d\n", i, state.regs[i].where); + switch (state.regs[i].where) { + case Nowhere: + if (reg_info[i].width != sizeof(UNW_SP(frame)) + || &FRAME_REG(i, __typeof__(UNW_SP(frame))) + != &UNW_SP(frame)) + continue; + UNW_SP(frame) = cfa; + break; + case Register: + switch (reg_info[i].width) { +#define CASE(n) case sizeof(u##n): \ + FRAME_REG(i, u##n) = state.regs[i].value; \ + break + CASES; +#undef CASE + default: + dbug_unwind(2, "default\n"); + goto err; + } + break; + case Value: + if (reg_info[i].width != sizeof(unsigned long)) { + dbug_unwind(2, "Value\n"); + goto err; + } + FRAME_REG(i, unsigned long) = cfa + state.regs[i].value * state.dataAlign; + break; + case Memory:{ + unsigned long addr = cfa + state.regs[i].value * state.dataAlign; + dbug_unwind(2, "addr=%lx width=%d\n", addr, reg_info[i].width); + switch (reg_info[i].width) { +#define CASE(n) case sizeof(u##n): \ + if (unlikely(_stp_read_address(FRAME_REG(i, u##n), (u##n *)addr, KERNEL_DS))) \ + goto copy_failed;\ + dbug_unwind(1, "set register %d to %lx\n", i, (long)FRAME_REG(i,u##n));\ + break + CASES; +#undef CASE + default: + dbug_unwind(2, "default\n"); + goto err; + } + } + break; + } + } + read_unlock(&m->lock); + dbug_unwind(1, "returning 0 (%lx)\n", UNW_PC(frame)); + return 0; + +copy_failed: + dbug_unwind(1, "_stp_read_address failed to access memory\n"); +err: + read_unlock(&m->lock); + return -EIO; + +done: + /* PC was in a range convered by a module but no unwind info */ + /* found for the specific PC. This seems to happen only for kretprobe */ + /* trampolines and at the end of interrupt backtraces. */ + read_unlock(&m->lock); + return 1; +#undef CASES +#undef FRAME_REG +} + + +#endif /* STP_USE_DWARF_UNWINDER */ diff --git a/runtime/unwind/i386.h b/runtime/unwind/i386.h new file mode 100644 index 00000000..79e6ba73 --- /dev/null +++ b/runtime/unwind/i386.h @@ -0,0 +1,135 @@ +/* -*- linux-c -*- + * + * 32-bit x86 dwarf unwinder header file + * Copyright (C) 2008 Red Hat Inc. + * Copyright (C) 2002-2006 Novell, Inc. + * + * This file is part of systemtap, and is free software. You can + * redistribute it and/or modify it under the terms of the GNU General + * Public License (GPL); either version 2, or (at your option) any + * later version. + */ +#ifndef _STP_I386_UNWIND_H +#define _STP_I386_UNWIND_H + +#include <linux/sched.h> +#include <asm/fixmap.h> +#include <asm/ptrace.h> +#include <asm/uaccess.h> + +/* these are simple for i386 */ +#define _stp_get_unaligned(ptr) (*(ptr)) +#define _stp_put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) + +struct unwind_frame_info +{ + struct pt_regs regs; + struct task_struct *task; + unsigned call_frame:1; +}; + +#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1)) + +#ifdef STAPCONF_X86_UNIREGS + +#define UNW_PC(frame) (frame)->regs.ip +#define UNW_SP(frame) (frame)->regs.sp + +#define UNW_REGISTER_INFO \ + PTREGS_INFO(ax), \ + PTREGS_INFO(cx), \ + PTREGS_INFO(dx), \ + PTREGS_INFO(bx), \ + PTREGS_INFO(sp), \ + PTREGS_INFO(bp), \ + PTREGS_INFO(si), \ + PTREGS_INFO(di), \ + PTREGS_INFO(ip) + +#else /* !STAPCONF_X86_UNIREGS */ + +#define UNW_PC(frame) (frame)->regs.eip +#define UNW_SP(frame) (frame)->regs.esp + +#define UNW_REGISTER_INFO \ + PTREGS_INFO(eax), \ + PTREGS_INFO(ecx), \ + PTREGS_INFO(edx), \ + PTREGS_INFO(ebx), \ + PTREGS_INFO(esp), \ + PTREGS_INFO(ebp), \ + PTREGS_INFO(esi), \ + PTREGS_INFO(edi), \ + PTREGS_INFO(eip) + +#endif /* STAPCONF_X86_UNIREGS */ + +#define UNW_DEFAULT_RA(raItem, dataAlign) \ + ((raItem).where == Memory && \ + !((raItem).value * (dataAlign) + 4)) + +static inline void arch_unw_init_frame_info(struct unwind_frame_info *info, + /*const*/ struct pt_regs *regs) +{ + if (user_mode_vm(regs)) + info->regs = *regs; + else { +#ifdef STAPCONF_X86_UNIREGS + memcpy(&info->regs, regs, offsetof(struct pt_regs, sp)); + info->regs.sp = (unsigned long)®s->sp; + info->regs.ss = __KERNEL_DS; +#else + memcpy(&info->regs, regs, offsetof(struct pt_regs, esp)); + info->regs.esp = (unsigned long)®s->esp; + info->regs.xss = __KERNEL_DS; +#endif + + } + info->call_frame = 1; +} + +static inline void arch_unw_init_blocked(struct unwind_frame_info *info) +{ + memset(&info->regs, 0, sizeof(info->regs)); +#ifdef STAPCONF_X86_UNIREGS + info->regs.ip = info->task->thread.ip; + info->regs.cs = __KERNEL_CS; + __get_user(info->regs.bp, (long *)info->task->thread.sp); + info->regs.sp = info->task->thread.sp; + info->regs.ss = __KERNEL_DS; + info->regs.ds = __USER_DS; + info->regs.es = __USER_DS; +#else + info->regs.eip = info->task->thread.eip; + info->regs.xcs = __KERNEL_CS; + __get_user(info->regs.ebp, (long *)info->task->thread.esp); + info->regs.esp = info->task->thread.esp; + info->regs.xss = __KERNEL_DS; + info->regs.xds = __USER_DS; + info->regs.xes = __USER_DS; +#endif + +} + + +static inline int arch_unw_user_mode(const struct unwind_frame_info *info) +{ +#if 0 /* This can only work when selector register and EFLAGS saves/restores + are properly annotated (and tracked in UNW_REGISTER_INFO). */ + return user_mode_vm(&info->regs); +#else +#ifdef STAPCONF_X86_UNIREGS + return info->regs.ip < PAGE_OFFSET + || (info->regs.ip >= __fix_to_virt(FIX_VDSO) + && info->regs.ip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) + || info->regs.sp < PAGE_OFFSET; +#else + return info->regs.eip < PAGE_OFFSET + || (info->regs.eip >= __fix_to_virt(FIX_VDSO) + && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) + || info->regs.esp < PAGE_OFFSET; +#endif +#endif +} + +#endif /* _STP_I386_UNWIND_H */ diff --git a/runtime/unwind/unwind.h b/runtime/unwind/unwind.h new file mode 100644 index 00000000..ae5e75d3 --- /dev/null +++ b/runtime/unwind/unwind.h @@ -0,0 +1,146 @@ +/* -*- linux-c -*- + * + * dwarf unwinder header file + * Copyright (C) 2008 Red Hat Inc. + * Copyright (C) 2002-2006 Novell, Inc. + * + * This file is part of systemtap, and is free software. You can + * redistribute it and/or modify it under the terms of the GNU General + * Public License (GPL); either version 2, or (at your option) any + * later version. + */ + +#ifndef _STP_UNWIND_H_ +#define _STP_UNWIND_H_ + +#ifdef STP_USE_DWARF_UNWINDER + +#if defined (__x86_64__) +#include "x86_64.h" +#elif defined (__i386__) +#include "i386.h" +#else +#error "Unsupported dwarf unwind architecture" +#endif + +#define MAX_STACK_DEPTH 8 + +#define EXTRA_INFO(f) { \ + BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \ + % FIELD_SIZEOF(struct unwind_frame_info, f)) \ + + offsetof(struct unwind_frame_info, f) \ + / FIELD_SIZEOF(struct unwind_frame_info, f), \ + FIELD_SIZEOF(struct unwind_frame_info, f) \ + } +#define PTREGS_INFO(f) EXTRA_INFO(regs.f) + +static const struct { + unsigned offs:BITS_PER_LONG / 2; + unsigned width:BITS_PER_LONG / 2; +} reg_info[] = { + UNW_REGISTER_INFO +}; + +#undef PTREGS_INFO +#undef EXTRA_INFO + +#ifndef REG_INVALID +#define REG_INVALID(r) (reg_info[r].width == 0) +#endif + +#define DW_CFA_nop 0x00 +#define DW_CFA_set_loc 0x01 +#define DW_CFA_advance_loc1 0x02 +#define DW_CFA_advance_loc2 0x03 +#define DW_CFA_advance_loc4 0x04 +#define DW_CFA_offset_extended 0x05 +#define DW_CFA_restore_extended 0x06 +#define DW_CFA_undefined 0x07 +#define DW_CFA_same_value 0x08 +#define DW_CFA_register 0x09 +#define DW_CFA_remember_state 0x0a +#define DW_CFA_restore_state 0x0b +#define DW_CFA_def_cfa 0x0c +#define DW_CFA_def_cfa_register 0x0d +#define DW_CFA_def_cfa_offset 0x0e +#define DW_CFA_def_cfa_expression 0x0f +#define DW_CFA_expression 0x10 +#define DW_CFA_offset_extended_sf 0x11 +#define DW_CFA_def_cfa_sf 0x12 +#define DW_CFA_def_cfa_offset_sf 0x13 +#define DW_CFA_val_offset 0x14 +#define DW_CFA_val_offset_sf 0x15 +#define DW_CFA_val_expression 0x16 +#define DW_CFA_lo_user 0x1c +#define DW_CFA_GNU_window_save 0x2d +#define DW_CFA_GNU_args_size 0x2e +#define DW_CFA_GNU_negative_offset_extended 0x2f +#define DW_CFA_hi_user 0x3f + +#define DW_EH_PE_absptr 0x00 +#define DW_EH_PE_leb128 0x01 +#define DW_EH_PE_data2 0x02 +#define DW_EH_PE_data4 0x03 +#define DW_EH_PE_data8 0x04 +#define DW_EH_PE_FORM 0x07 /* mask */ +#define DW_EH_PE_signed 0x08 /* signed versions of above have this bit set */ + +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 +#define DW_EH_PE_ADJUST 0x70 /* mask */ +#define DW_EH_PE_indirect 0x80 +#define DW_EH_PE_omit 0xff + +typedef unsigned long uleb128_t; +typedef signed long sleb128_t; + +static struct unwind_table { + unsigned long pc; /* text */ + unsigned long range; /* text_size */ + const void *address; /* unwind_data */ + unsigned long size; /* unwind_data_len */ + const unsigned char *header; /* unwind_header */ + unsigned long hdrsz; + struct unwind_table *link; + const char *name; /* module name */ +} root_table; + +struct unwind_item { + enum item_location { + Nowhere, + Memory, + Register, + Value + } where; + uleb128_t value; +}; + +struct unwind_state { + uleb128_t loc, org; + const u8 *cieStart, *cieEnd; + uleb128_t codeAlign; + sleb128_t dataAlign; + struct cfa { + uleb128_t reg, offs; + } cfa; + struct unwind_item regs[ARRAY_SIZE(reg_info)]; + unsigned stackDepth:8; + unsigned version:8; + const u8 *label; + const u8 *stack[MAX_STACK_DEPTH]; +}; + +static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; +static unsigned long read_pointer(const u8 **pLoc, + const void *end, + signed ptrType); +static const u32 bad_cie, not_fde; +static const u32 *cie_for_fde(const u32 *fde, const struct _stp_module *); +static signed fde_pointer_type(const u32 *cie); + + +#endif /* STP_USE_DWARF_UNWINDER */ +#endif /*_STP_UNWIND_H_*/ diff --git a/runtime/unwind/x86_64.h b/runtime/unwind/x86_64.h new file mode 100644 index 00000000..5eb3a58f --- /dev/null +++ b/runtime/unwind/x86_64.h @@ -0,0 +1,150 @@ +/* -*- linux-c -*- + * + * x86_64 dwarf unwinder header file + * Copyright (C) 2008 Red Hat Inc. + * Copyright (C) 2002-2006 Novell, Inc. + * + * This file is part of systemtap, and is free software. You can + * redistribute it and/or modify it under the terms of the GNU General + * Public License (GPL); either version 2, or (at your option) any + * later version. + */ +#ifndef _STP_X86_64_UNWIND_H +#define _STP_X86_64_UNWIND_H + +/* + * Copyright (C) 2002-2006 Novell, Inc. + * Jan Beulich <jbeulich@novell.com> + * This code is released under version 2 of the GNU GPL. + */ + +#include <linux/sched.h> +#include <asm/ptrace.h> +#include <asm/vsyscall.h> + +/* these are simple for x86_64 */ +#define _stp_get_unaligned(ptr) (*(ptr)) +#define _stp_put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) + +struct unwind_frame_info +{ + struct pt_regs regs; + struct task_struct *task; + unsigned call_frame:1; +}; + +#ifdef STAPCONF_X86_UNIREGS +#define UNW_PC(frame) (frame)->regs.ip +#define UNW_SP(frame) (frame)->regs.sp +#else +#define UNW_PC(frame) (frame)->regs.rip +#define UNW_SP(frame) (frame)->regs.rsp +#endif /* STAPCONF_X86_UNIREGS */ + +#if 0 /* STP_USE_FRAME_POINTER */ +/* Frame pointers not implemented in x86_64 currently */ +#define UNW_FP(frame) (frame)->regs.rbp +#define FRAME_RETADDR_OFFSET 8 +#define FRAME_LINK_OFFSET 0 +#define STACK_BOTTOM(tsk) (((tsk)->thread.rsp0 - 1) & ~(THREAD_SIZE - 1)) +#define STACK_TOP(tsk) ((tsk)->thread.rsp0) +#endif + +/* Might need to account for the special exception and interrupt handling + stacks here, since normally + EXCEPTION_STACK_ORDER < THREAD_ORDER < IRQSTACK_ORDER, + but the construct is needed only for getting across the stack switch to + the interrupt stack - thus considering the IRQ stack itself is unnecessary, + and the overhead of comparing against all exception handling stacks seems + not desirable. */ +#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1)) + +#ifdef STAPCONF_X86_UNIREGS +#define UNW_REGISTER_INFO \ + PTREGS_INFO(ax), \ + PTREGS_INFO(dx), \ + PTREGS_INFO(cx), \ + PTREGS_INFO(bx), \ + PTREGS_INFO(si), \ + PTREGS_INFO(di), \ + PTREGS_INFO(bp), \ + PTREGS_INFO(sp), \ + PTREGS_INFO(r8), \ + PTREGS_INFO(r9), \ + PTREGS_INFO(r10), \ + PTREGS_INFO(r11), \ + PTREGS_INFO(r12), \ + PTREGS_INFO(r13), \ + PTREGS_INFO(r14), \ + PTREGS_INFO(r15), \ + PTREGS_INFO(ip) +#else +#define UNW_REGISTER_INFO \ + PTREGS_INFO(rax), \ + PTREGS_INFO(rdx), \ + PTREGS_INFO(rcx), \ + PTREGS_INFO(rbx), \ + PTREGS_INFO(rsi), \ + PTREGS_INFO(rdi), \ + PTREGS_INFO(rbp), \ + PTREGS_INFO(rsp), \ + PTREGS_INFO(r8), \ + PTREGS_INFO(r9), \ + PTREGS_INFO(r10), \ + PTREGS_INFO(r11), \ + PTREGS_INFO(r12), \ + PTREGS_INFO(r13), \ + PTREGS_INFO(r14), \ + PTREGS_INFO(r15), \ + PTREGS_INFO(rip) +#endif /* STAPCONF_X86_UNIREGS */ + +#define UNW_DEFAULT_RA(raItem, dataAlign) \ + ((raItem).where == Memory && \ + !((raItem).value * (dataAlign) + 8)) + +static inline void arch_unw_init_frame_info(struct unwind_frame_info *info, + /*const*/ struct pt_regs *regs) +{ + info->regs = *regs; + info->call_frame = 1; +} + +static inline void arch_unw_init_blocked(struct unwind_frame_info *info) +{ + extern const char thread_return[]; + + memset(&info->regs, 0, sizeof(info->regs)); + info->regs.cs = __KERNEL_CS; + info->regs.ss = __KERNEL_DS; + +#ifdef STAPCONF_X86_UNIREGS + info->regs.ip = (unsigned long)thread_return; + __get_user(info->regs.bp, (unsigned long *)info->task->thread.sp); + info->regs.sp = info->task->thread.sp; +#else + info->regs.rip = (unsigned long)thread_return; + __get_user(info->regs.rbp, (unsigned long *)info->task->thread.rsp); + info->regs.rsp = info->task->thread.rsp; +#endif +} + +static inline int arch_unw_user_mode(const struct unwind_frame_info *info) +{ +#if 0 /* This can only work when selector register saves/restores + are properly annotated (and tracked in UNW_REGISTER_INFO). */ + return user_mode(&info->regs); +#else +#ifdef STAPCONF_X86_UNIREGS + return (long)info->regs.ip >= 0 + || (info->regs.ip >= VSYSCALL_START && info->regs.ip < VSYSCALL_END) + || (long)info->regs.sp >= 0; +#else + return (long)info->regs.rip >= 0 + || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END) + || (long)info->regs.rsp >= 0; +#endif +#endif +} + +#endif /* _STP_X86_64_UNWIND_H */ diff --git a/runtime/vsprintf.c b/runtime/vsprintf.c index 0bf625a5..4ffcf72e 100644 --- a/runtime/vsprintf.c +++ b/runtime/vsprintf.c @@ -248,6 +248,11 @@ int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) ++str; } } +#ifdef __ia64__ + if ((str + precision - 1) <= end) + memcpy(str, &num, precision); //to prevent unaligned access + str += precision; +#else switch(precision) { case 1: if(str <= end) @@ -271,6 +276,7 @@ int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) str+=8; break; } +#endif while (len < field_width--) { if (str <= end) *str = '\0'; |