summaryrefslogtreecommitdiffstats
path: root/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'runtime')
-rw-r--r--runtime/.gitignore1
-rw-r--r--runtime/ChangeLog140
-rw-r--r--runtime/autoconf-module-nsections.c8
-rw-r--r--runtime/copy.c34
-rw-r--r--runtime/debug.h60
-rw-r--r--runtime/map.c1
-rw-r--r--runtime/print.c1
-rw-r--r--runtime/print_new.c12
-rw-r--r--runtime/print_old.c12
-rw-r--r--runtime/probes.c12
-rw-r--r--runtime/regs.c394
-rw-r--r--runtime/regs.h4
-rw-r--r--runtime/runtime.h25
-rw-r--r--runtime/stack-arm.c4
-rw-r--r--runtime/stack-i386.c76
-rw-r--r--runtime/stack-x86_64.c42
-rw-r--r--runtime/stack.c29
-rw-r--r--runtime/staprun/ChangeLog57
-rw-r--r--runtime/staprun/cap.c78
-rw-r--r--runtime/staprun/common.c46
-rw-r--r--runtime/staprun/ctl.c35
-rw-r--r--runtime/staprun/mainloop.c296
-rw-r--r--runtime/staprun/stapio.c17
-rw-r--r--runtime/staprun/staprun.c173
-rw-r--r--runtime/staprun/staprun.h10
-rw-r--r--runtime/staprun/staprun_funcs.c104
-rw-r--r--runtime/staprun/symbols.c333
-rw-r--r--runtime/staprun/unwind_data.c97
-rw-r--r--runtime/sym.c11
-rw-r--r--runtime/sym.h25
-rw-r--r--runtime/task_finder.c250
-rw-r--r--runtime/time.c7
-rw-r--r--runtime/transport/ChangeLog48
-rw-r--r--runtime/transport/control.c226
-rw-r--r--runtime/transport/procfs.c2
-rw-r--r--runtime/transport/symbols.c640
-rw-r--r--runtime/transport/transport.c164
-rw-r--r--runtime/transport/transport.h20
-rw-r--r--runtime/transport/transport_msgs.h63
-rw-r--r--runtime/unwind.c964
-rw-r--r--runtime/unwind/i386.h135
-rw-r--r--runtime/unwind/unwind.h146
-rw-r--r--runtime/unwind/x86_64.h150
-rw-r--r--runtime/vsprintf.c6
44 files changed, 3006 insertions, 1952 deletions
diff --git a/runtime/.gitignore b/runtime/.gitignore
new file mode 100644
index 00000000..ceddd64c
--- /dev/null
+++ b/runtime/.gitignore
@@ -0,0 +1 @@
+!staprun
diff --git a/runtime/ChangeLog b/runtime/ChangeLog
index ee191022..f0f65215 100644
--- a/runtime/ChangeLog
+++ b/runtime/ChangeLog
@@ -1,3 +1,117 @@
+2008-05-06 Masami Hiramatsu <mhiramat@redhat.com>
+
+ PR 5648
+ * print_old.c (stp_print_flush): Fix unaligned access warning on
+ ia64.
+ * print_new.c (stp_print_flush): Ditto.
+
+2008-05-06 Masami Hiramatsu <mhiramat@redhat.com>
+
+ PR 5648
+ * vsprintf.c (_stp_vsnprintf): Fix memcpy's endianess issue.
+
+2008-05-05 Frank Ch. Eigler <fche@elastic.org>
+
+ PR 6481.
+ * time.c (__stp_time_timer_callback): Reenable irq's before
+ mod_timer.
+
+2008-05-05 David Smith <dsmith@redhat.com>
+
+ * task_finder.c (stap_utrace_detach_ops): Make sure we ignore
+ /sbin/init.
+ (__stp_utrace_attach): Added function to handle details of
+ attaching a utrace engine.
+ (__stp_utrace_task_finder_report_clone): Calls
+ __stp_utrace_attach.
+ (__stp_utrace_task_finder_report_exec): Ditto.
+ (stap_start_task_finder): Ditto.
+
+2008-04-30 Masami Hiramatsu <mhiramat@redhat.com>
+
+ PR 5648
+ From Shaohua Li <shaohua.li@intel.com>
+ * vsprintf.c (_stp_vsnprintf): Fix unaligned access warning on ia64.
+
+2008-04-29 David Smith <dsmith@redhat.com>
+
+ * task_finder.c: Made more robust by ensuring that all utrace
+ attaches have a corresponding utrace detach.
+
+2008-04-28 Frank Ch. Eigler <fche@elastic.org>
+
+ * runtime.h (TEST_MODE): Remove.
+
+2008-04-25 David Smith <dsmith@redhat.com>
+
+ From Srinivasa <srinivasa@in.ibm.com>
+ * task_finder.c (__stp_get_mm_path): Fixed kernel 2.6.25 change.
+
+2008-04-24 David Smith <dsmith@redhat.com>
+
+ * task_finder.c (__stp_get_mm_path): Made kernel 2.6.25 changes.
+
+2008-04-16 David Smith <dsmith@redhat.com>
+
+ * task_finder.c (__stp_get_mm_path): Made kernel 2.6.18 changes.
+
+2008-04-15 David Smith <dsmith@redhat.com>
+
+ PR 5961 (partial)
+ * task_finder.c (stap_start_task_finder): When an interesting
+ thread is found that is already running, make sure to set up
+ thread death notification.
+
+2008-04-15 hunt <hunt@redhat.com>
+ * print.c (_stp_pbuf_full): Delete.
+
+2008-04-15 hunt <hunt@redhat.com>
+ * stack-x86_64.c (_stp_stack_print_fallback): Add levels.
+ (__stp_stack_print): Count levels properly.
+
+2008-04-15 Martin Hunt <hunt@redhat.com>
+
+ Finish support for limits on backtrace depth.
+ * runtime.h (MAXTRACE): Default to 20.
+ * stack.c (_stp_stack_print): Call __stp_stack_print
+ with levels set properly.
+ * sym.c (_stp_func_print): Return a value indicating
+ if something was printed.
+
+ Support for i386 and x86_64 on 2.6.25 kernel
+ * unwind/i386.h: Support unified registers on 2.6.25.
+ Remove unused frame stuff, including STACK_*.
+ * stack-i386.c (__stp_stack_print): Support unified
+ registers on 2.6.25.
+ * regs.h (REG_FP): Define for i386.
+
+2008-04-15 Frank Ch. Eigler <fche@elastic.org>
+
+ PR 6410.
+ * unwind.c, unwind.h: Make body conditional in STP_USE_DWARF_UNWINDER.
+ * stack-x86_64.c (__stp_stack_print): Tolerate !unwinder.
+
+2008-04-15 Frank Ch. Eigler <fche@elastic.org>
+
+ PR 6405
+ * autoconf-module-nsections.c: New file.
+
+2008-04-15 Frank Ch. Eigler <fche@elastic.org>
+
+ * unwind/i386.h (STACK_BOTTOM, STACK_TOP): Comment out these
+ unused definitions, for they collide with some kernels
+ (2.6.25-0.121.rc5.git4 rawhide).
+
+2008-04-13 Frank Ch. Eigler <fche@elastic.org>
+
+ * print.c (_stp_pbuf_full): New function to note full print buffer.
+ * stack-{i386,x86_64}.c: Use it in all stack-searching loops, to
+ impose another limit against unbounded iteration.
+
+2008-03-31 Martin Hunt <hunt@redhat.com>
+
+ * runtime.h (STP_USE_DWARF_UNWINDER): Define.
+
2008-04-04 Masami Hiramatsu <mhiramat@redhat.com>
PR 6028
@@ -6,11 +120,37 @@
* regs-ia64.c (ia64_fetch_register): Don't unwind stack if it has
already unwound stack in same probe.
+2008-03-30 Martin Hunt <hunt@redhat.com>
+
+ * runtime.h (STP_USE_FRAME_POINTER): Define when frame pointers
+ are available in the kernel and can be used.
+ * stack-arm.c: Use STP_USE_FRAME_POINTER.
+ * stack-i386.c: Ditto.
+ * unwind/i386.h: Ditto.
+ * unwind/x86_64.h: Ditto.
+
2008-04-04 David Smith <dsmith@redhat.com>
PR 5961 (partial)
* task_finder.c: New file.
+2008-03-28 Martin Hunt <hunt@redhat.com>
+
+ * copy.c (_stp_read_address): New function. Safely read
+ kernel or userspace.
+
+2008-03-26 Martin Hunt <hunt@redhat.com>
+ Fixes to get i386 working.
+ * unwind.c (unwind): Fix types in debug print.
+ * stack-i386.c (_stp_stack_print_fallback): New function.
+ (__stp_stack_print): Call _stp_stack_print_fallback() if unwinder
+ appears to fail.
+
+2008-03-25 Martin Hunt <hunt@redhat.com>
+
+ * unwind.c (unwind): Return a positive number to indicate
+ that unwinding is done.
+
2008-04-01 Frank Ch. Eigler <fche@elastic.org>
* lket/*: Belatedly remove retired LKET code.
diff --git a/runtime/autoconf-module-nsections.c b/runtime/autoconf-module-nsections.c
new file mode 100644
index 00000000..c1ce58b7
--- /dev/null
+++ b/runtime/autoconf-module-nsections.c
@@ -0,0 +1,8 @@
+#include <linux/module.h>
+
+struct module_sect_attrs x;
+
+void foo (void)
+{
+ (void) x.nsections;
+}
diff --git a/runtime/copy.c b/runtime/copy.c
index ef3fd223..6bb22762 100644
--- a/runtime/copy.c
+++ b/runtime/copy.c
@@ -1,6 +1,6 @@
/* -*- linux-c -*-
* Copy from user space functions
- * Copyright (C) 2005, 2006, 2007 Red Hat Inc.
+ * Copyright (C) 2005-2008 Red Hat Inc.
* Copyright (C) 2005 Intel Corporation.
*
* This file is part of systemtap, and is free software. You can
@@ -9,11 +9,10 @@
* later version.
*/
-#ifndef _COPY_C_ /* -*- linux-c -*- */
+#ifndef _COPY_C_ /* -*- linux-c -*- */
#define _COPY_C_
#include "string.c"
-
/** @file copy.c
* @brief Functions to copy from user space.
*/
@@ -26,6 +25,28 @@
* @{
*/
+/** Safely read from userspace or kernelspace.
+ * On success, returns 0. Returns -EFAULT on error.
+ *
+ * This uses __get_user() to read from userspace or
+ * kernelspace. Will not sleep or cause pagefaults when
+ * called from within a kprobe context.
+ *
+ * @param segment . KERNEL_DS for kernel access
+ * USER_DS for userspace.
+ */
+
+#define _stp_read_address(x, ptr, segment) \
+ ({ \
+ long ret; \
+ mm_segment_t ofs = get_fs(); \
+ set_fs(segment); \
+ ret = __stp_get_user(x, ptr); \
+ set_fs(ofs); \
+ ret; \
+ })
+
+
long _stp_strncpy_from_user(char *dst, const char __user *src, long count);
//static long __stp_strncpy_from_user(char *dst, const char __user *src, long count);
@@ -110,8 +131,7 @@ do { \
* <i>count</i> bytes and returns <i>count</i>.
*/
-long
-_stp_strncpy_from_user(char *dst, const char __user *src, long count)
+long _stp_strncpy_from_user(char *dst, const char __user *src, long count)
{
long res = -EFAULT;
if (access_ok(VERIFY_READ, src, count))
@@ -119,7 +139,6 @@ _stp_strncpy_from_user(char *dst, const char __user *src, long count)
return res;
}
-
/** Copy a block of data from user space.
*
* If some data could not be copied, this function will pad the copied
@@ -133,8 +152,7 @@ _stp_strncpy_from_user(char *dst, const char __user *src, long count)
*
*/
-unsigned long
-_stp_copy_from_user (char *dst, const char __user *src, unsigned long count)
+unsigned long _stp_copy_from_user(char *dst, const char __user *src, unsigned long count)
{
if (count) {
if (access_ok(VERIFY_READ, src, count))
diff --git a/runtime/debug.h b/runtime/debug.h
index 8f877ede..9b2fe5c5 100644
--- a/runtime/debug.h
+++ b/runtime/debug.h
@@ -14,6 +14,8 @@
* _dbug() writes to systemtap stderr.
* errk() writes to the system log.
*/
+int _stp_transport_state = 0;
+
#define _dbug(args...) _stp_dbug(__FUNCTION__, __LINE__, args)
#define errk(args...) do { \
@@ -21,46 +23,42 @@
printk(args); \
} while (0)
-#ifdef DEBUG_TRANSPORT
-#undef DEBUG_TRANSPORT
-#define DEBUG_TRANSPORT 1
-#else
-#define DEBUG_TRANSPORT 0
-#endif
+/*
+ * To use these, enable them from the command line when compiling.
+ * For example, "stap -DDEBUG_UNWIND=3"
+ * will activate dbug_unwind() and print messages with level <= 3.
+ */
-#ifdef DEBUG_UNWIND
-#undef DEBUG_UNWIND
-#define DEBUG_UNWIND 2
-#else
-#define DEBUG_UNWIND 0
-#endif
+/* Note: DEBUG_MEM is implemented in alloc.c */
-#ifdef DEBUG_SYMBOLS
-#undef DEBUG_SYMBOLS
-#define DEBUG_SYMBOLS 4
+#ifdef DEBUG_TRANS /* transport */
+/* Note: transport is debugged using printk() */
+#define dbug_trans(level, args...) do { \
+ if ((level) <= DEBUG_TRANS) { \
+ printk("%s:%d ",__FUNCTION__, __LINE__); \
+ printk(args); \
+ } \
+ } while (0)
#else
-#define DEBUG_SYMBOLS 0
+#define dbug_trans(level, args...) ;
#endif
-#define DEBUG_TYPE (DEBUG_TRANSPORT|DEBUG_UNWIND|DEBUG_SYMBOLS)
-
-#if DEBUG_TYPE > 0
-
-#define dbug(type, args...) do { \
- if ((type) & DEBUG_TYPE) \
+#ifdef DEBUG_UNWIND /* stack unwinder */
+#define dbug_unwind(level, args...) do { \
+ if ((level) <= DEBUG_UNWIND) \
_stp_dbug(__FUNCTION__, __LINE__, args); \
} while (0)
+#else
+#define dbug_unwind(level, args...) ;
+#endif
-#define kbug(type, args...) do { \
- if ((type) & DEBUG_TYPE) { \
- printk("%s:%d ",__FUNCTION__, __LINE__); \
- printk(args); \
- } \
+#ifdef DEBUG_SYMBOLS
+#define dbug_sym(level, args...) do { \
+ if ((level) <= DEBUG_SYMBOLS) \
+ _stp_dbug(__FUNCTION__, __LINE__, args); \
} while (0)
-
#else
-#define dbug(type, args...) ;
-#define kbug(type, args...) ;
-#endif /* DEBUG_TYPE > 0 */
+#define dbug_sym(level, args...) ;
+#endif
#endif /* _STP_DEBUG_H_ */
diff --git a/runtime/map.c b/runtime/map.c
index 513e27df..a436d7ed 100644
--- a/runtime/map.c
+++ b/runtime/map.c
@@ -15,7 +15,6 @@
* @brief Implements maps (associative arrays) and lists
*/
-#include "alloc.c"
#include "sym.c"
#include "stat-common.c"
#include "map-stat.c"
diff --git a/runtime/print.c b/runtime/print.c
index 0442ba09..14a0820b 100644
--- a/runtime/print.c
+++ b/runtime/print.c
@@ -243,7 +243,6 @@ void _stp_print_char (const char c)
pb->len ++;
}
-
/* This function is used when printing maps or stats. */
/* Probably belongs elsewhere, but is here for now. */
/* It takes a format specification like those used for */
diff --git a/runtime/print_new.c b/runtime/print_new.c
index 75bbd82b..07af2e33 100644
--- a/runtime/print_new.c
+++ b/runtime/print_new.c
@@ -40,11 +40,13 @@ void EXPORT_FN(stp_print_flush) (_stp_pbuf *pb)
else
atomic_inc (&_stp_transport_failures);
#else
- struct _stp_trace *t = relay_reserve(_stp_utt->rchan, sizeof(*t) + len);
- if (likely(t)) {
- t->sequence = _stp_seq_inc();
- t->pdu_len = len;
- memcpy((void *) t + sizeof(*t), pb->buf, len);
+ void *buf = relay_reserve(_stp_utt->rchan,
+ sizeof(struct _stp_trace) + len);
+ if (likely(buf)) {
+ struct _stp_trace t = { .sequence = _stp_seq_inc(),
+ .pdu_len = len};
+ memcpy(buf, &t, sizeof(t)); // prevent unaligned access
+ memcpy(buf + sizeof(t), pb->buf, len);
} else
atomic_inc (&_stp_transport_failures);
#endif
diff --git a/runtime/print_old.c b/runtime/print_old.c
index 5ee050b5..5c117e5f 100644
--- a/runtime/print_old.c
+++ b/runtime/print_old.c
@@ -35,11 +35,13 @@ void EXPORT_FN(stp_print_flush) (_stp_pbuf *pb)
else
atomic_inc (&_stp_transport_failures);
#else
- struct _stp_trace *t = relay_reserve(_stp_utt->rchan, sizeof(*t) + len);
- if (likely(t)) {
- t->sequence = _stp_seq_inc();
- t->pdu_len = len;
- memcpy((void *) t + sizeof(*t), pb->buf, len);
+ void *buf = relay_reserve(_stp_utt->rchan,
+ sizeof(struct _stp_trace) + len);
+ if (likely(buf)) {
+ struct _stp_trace t = { .sequence = _stp_seq_inc(),
+ .pdu_len = len};
+ memcpy(buf, &t, sizeof(t)); // prevent unaligned access
+ memcpy(buf + sizeof(t), pb->buf, len);
} else
atomic_inc (&_stp_transport_failures);
#endif
diff --git a/runtime/probes.c b/runtime/probes.c
index 19539044..6fe844fb 100644
--- a/runtime/probes.c
+++ b/runtime/probes.c
@@ -25,7 +25,7 @@ void _stp_unregister_jprobes (struct jprobe *probes, int num_probes)
int i;
for (i = 0; i < num_probes; i++)
unregister_jprobe(&probes[i]);
- dbug("All jprobes removed\n");
+ // dbug("All jprobes removed\n");
}
/** Register a group of jprobes.
@@ -46,7 +46,7 @@ int _stp_register_jprobes (struct jprobe *probes, int num_probes)
ret = -1; /* FIXME */
goto out;
}
- dbug("inserting jprobe at %s (%p)\n", probes[i].kp.addr, addr);
+ // dbug("inserting jprobe at %s (%p)\n", probes[i].kp.addr, addr);
probes[i].kp.addr = (kprobe_opcode_t *)addr;
ret = register_jprobe(&probes[i]);
if (ret)
@@ -69,7 +69,7 @@ void _stp_unregister_kprobes (struct kprobe *probes, int num_probes)
int i;
for (i = 0; i < num_probes; i++)
unregister_kprobe(&probes[i]);
- dbug("All kprobes removed\n");
+ // dbug("All kprobes removed\n");
}
@@ -83,7 +83,7 @@ void _stp_unregister_kretprobes (struct kretprobe *probes, int num_probes)
int i;
for (i = 0; i < num_probes; i++)
unregister_kretprobe(&probes[i]);
- dbug("All return probes removed\n");
+ // dbug("All return probes removed\n");
}
#endif
@@ -104,7 +104,7 @@ int _stp_register_kprobes (struct kprobe *probes, int num_probes)
ret = -1;
goto out;
}
- dbug("inserting kprobe at %s (%p)\n", probes[i].addr, addr);
+ // dbug("inserting kprobe at %s (%p)\n", probes[i].addr, addr);
probes[i].addr = (kprobe_opcode_t *)addr;
ret = register_kprobe(&probes[i]);
if (ret)
@@ -136,7 +136,7 @@ int _stp_register_kretprobes (struct kretprobe *probes, int num_probes)
ret = -1; /* FIXME */
goto out;
}
- dbug("inserting kretprobe at %s (%p)\n", probes[i].kp.addr, addr);
+ // dbug("inserting kretprobe at %s (%p)\n", probes[i].kp.addr, addr);
probes[i].kp.addr = (kprobe_opcode_t *)addr;
ret = register_kretprobe(&probes[i]);
if (ret)
diff --git a/runtime/regs.c b/runtime/regs.c
index 2daeaa3c..5821f7e7 100644
--- a/runtime/regs.c
+++ b/runtime/regs.c
@@ -383,317 +383,60 @@ void _stp_print_regs(struct pt_regs * regs)
#endif
-/*
- * (Theoretically) arch-independent scheme for binary lookup of register
- * values (from pt_regs) by register name. A register may be called by
- * more than one name.
- */
-struct _stp_register_desc {
- const char *name;
- unsigned short size; // in bytes
- unsigned short offset; // in bytes, from start of pt_regs
-};
-
-struct _stp_register_table {
- struct _stp_register_desc *registers;
- unsigned nr_registers;
- unsigned nr_slots; // capacity
-};
-
-static DEFINE_SPINLOCK(_stp_register_table_lock);
-static void _stp_populate_register_table(void);
-
-/*
- * If the named register is in the list, return its slot number and *found=1.
- * Else *found=0 and return the slot number where the name should be inserted.
- */
-static int _stp_lookup_register(const char *name,
- struct _stp_register_table *table, int *found)
-{
- unsigned begin, mid, end;
-
- *found = 0;
- end = table->nr_registers;
- if (end == 0)
- return 0;
- begin = 0;
- mid = -1;
- for (;;) {
- int cmp;
- int prev_mid = mid;
- mid = (begin + end) / 2;
- if (mid == prev_mid)
- break;
- cmp = strcmp(name, table->registers[mid].name);
- if (cmp == 0) {
- *found = 1;
- return mid;
- } else if (cmp < 0)
- end = mid;
- else
- begin = mid;
- }
- if (begin == 0 && strcmp(name, table->registers[0].name) < 0)
- return 0;
- return begin + 1;
-}
-
-/*
- * If found, return 1 and the size and/or offset in the pt_regs array.
- * Else return 0.
- */
-static int _stp_find_register(const char *name,
- struct _stp_register_table *table, size_t *size, size_t *offset)
-{
- int slot, found;
- if (unlikely(table->nr_registers == 0)) {
- unsigned long flags;
- /*
- * Should we do this at the beginning of time to avoid
- * the possibility of spending too long in a handler?
- */
- spin_lock_irqsave(&_stp_register_table_lock, flags);
- if (table->nr_registers == 0)
- _stp_populate_register_table();
- spin_unlock_irqrestore(&_stp_register_table_lock, flags);
- }
- slot = _stp_lookup_register(name, table, &found);
- if (found) {
- if (size)
- *size = table->registers[slot].size;
- if (offset)
- *offset = table->registers[slot].offset;
- return 1;
- }
- return 0;
-}
-
-/*
- * Add name to the register-lookup table. Note that the name pointer
- * is merely copied, not strdup-ed.
- */
-void _stp_add_register(const char *name, struct _stp_register_table *table,
- size_t size, size_t offset)
-{
- int idx, found;
- struct _stp_register_desc *slot;
-
- idx = _stp_lookup_register(name, table, &found);
- if (found)
- _stp_error("stap runtime internal error: "
- "register name %s used twice\n", name);
- if (table->nr_registers >= table->nr_slots)
- _stp_error("stap runtime internal error: "
- "register table overflow\n");
- slot = &table->registers[idx];
-
- // Move the slots later in the array out of the way.
- if (idx < table->nr_registers)
- memmove(slot+1, slot,
- sizeof(*slot) * (table->nr_registers - idx));
- table->nr_registers++;
- slot->name = name;
- slot->size = size;
- slot->offset = offset;
-}
-
-#if defined(__i386__) || defined(__x86_64__)
-/*
- * This register set is used for i386 kernel and apps, and for 32-bit apps
- * running on x86_64. For the latter case, this allows the user to use
- * things like reg("eax") as well as the standard x86_64 pt_regs names.
- */
-
-/*
- * x86_64 and i386 are especially ugly because the pt_reg member names
- * changed as part of the x86 merge. We allow (and use, as needed)
- * either the pre-merge name or the post-merge name.
- */
-
-// I count 32 different names, but add a fudge factor.
-static struct _stp_register_desc i386_registers[32+8];
-static struct _stp_register_table i386_register_table = {
- .registers = i386_registers,
- .nr_slots = ARRAY_SIZE(i386_registers)
-};
-
-/*
- * sizeof(long) is indeed what we want here, for both i386 and x86_64.
- * Unlike function args, x86_64 pt_regs is the same even if the int3
- * was in an -m32 app.
- */
-#define ADD_PT_REG(name, member) \
- _stp_add_register(name, &i386_register_table, \
- sizeof(long), offsetof(struct pt_regs, member))
-#define ADD2NAMES(nm1, nm2, member) \
- do { \
- ADD_PT_REG(nm1, member); \
- ADD_PT_REG(nm2, member); \
- } while (0)
-
-#ifdef STAPCONF_X86_UNIREGS
-/* Map "ax" and "eax" to regs->ax, and "cs" and "xcs" to regs->cs */
-#define ADD_EREG(nm) ADD2NAMES(#nm, "e" #nm, nm)
-#define ADD_XREG(nm) ADD2NAMES(#nm, "x" #nm, nm)
-#define ADD_FLAGS_REG() ADD_EREG(flags)
-#define EREG(nm, regs) ((regs)->nm)
-#define RREG(nm, regs) ((regs)->nm)
+/* Function arguments */
-#else /* ! STAPCONF_X86_UNIREGS */
+#define _STP_REGPARM 0x8000
+#define _STP_REGPARM_MASK ((_STP_REGPARM) - 1)
-#ifdef __i386__
-#define ADD_EREG(nm) ADD2NAMES(#nm, "e" #nm, e##nm)
-#define ADD_XREG(nm) ADD2NAMES(#nm, "x" #nm, x##nm)
-#define ADD_FLAGS_REG() ADD_EREG(flags)
-#define EREG(nm, regs) ((regs)->e##nm)
-#else /* __x86_64__ */
/*
- * Map "eax" to regs->rax and "xcs" to regs->cs. Other mappings are
- * handled in x86_64_register_table.
+ * x86_64 and i386 are especially ugly because:
+ * 1) the pt_reg member names changed as part of the x86 merge. We use
+ * either the pre-merge name or the post-merge name, as needed.
+ * 2) -m32 apps on x86_64 look like i386 apps, so we need to support
+ * those semantics on both i386 and x86_64.
*/
-#define ADD_EREG(nm) ADD_PT_REG("e" #nm, r##nm)
-#define ADD_XREG(nm) ADD_PT_REG("x" #nm, nm)
-#define ADD_FLAGS_REG() ADD2NAMES("flags", "eflags", eflags)
-/* Note: After a store to %eax, %rax holds the ZERO-extended %eax. */
-#define EREG(nm, regs) ((regs)->r##nm)
-#define RREG(nm, regs) ((regs)->r##nm)
-#endif /* __x86_64__ */
-
-#endif /* ! STAPCONF_X86_UNIREGS */
-static void _stp_populate_i386_register_table(void)
-{
- /*
- * The order here is the same as in i386 struct pt_regs.
- * It's a different order from x86_64 pt_regs; but that doesn't
- * matter -- even when compiling for x86_64 -- because the
- * offsets are determined by offsetof(), not the calling order.
- */
- ADD_EREG(bx);
- ADD_EREG(cx);
- ADD_EREG(dx);
- ADD_EREG(si);
- ADD_EREG(di);
- ADD_EREG(bp);
- ADD_EREG(ax);
#ifdef __i386__
- ADD_XREG(ds);
- ADD_XREG(es);
- ADD_XREG(fs);
- /* gs not saved */
-#endif
#ifdef STAPCONF_X86_UNIREGS
- ADD2NAMES("orig_ax", "orig_eax", orig_ax);
+#define EREG(nm, regs) ((regs)->nm)
#else
-#ifdef __i386__
- ADD2NAMES("orig_ax", "orig_eax", orig_eax);
-#else /* __x86_64__ */
- ADD2NAMES("orig_ax", "orig_eax", orig_rax);
+#define EREG(nm, regs) ((regs)->e##nm)
#endif
-#endif /* STAPCONF_X86_UNIREGS */
- ADD_EREG(ip);
- ADD_XREG(cs);
- ADD_FLAGS_REG();
- ADD_EREG(sp);
- ADD_XREG(ss);
-}
-/*
- * For x86_64, this gets a copy of the saved 64-bit register (e.g., regs->rax).
- * After a store to %eax, %rax holds the ZERO-extended %eax.
- */
-static long
-_stp_get_reg32_by_name(const char *name, struct pt_regs *regs)
+static long _stp_get_sp(struct pt_regs *regs)
{
- size_t offset = 0;
- long value; // works for i386 or x86_64
- BUG_ON(!name);
- if (!regs)
- _stp_error("Register values not available in this context.\n");
-#ifdef __i386__
- if (!user_mode(regs)) {
- /* esp and ss aren't saved on trap from kernel mode. */
- if (!strcmp(name,"esp") || !strcmp(name, "sp"))
- return (long) &EREG(sp, regs);
- if (!strcmp(name,"xss") || !strcmp(name, "ss")) {
- /*
- * Assume ss register hasn't changed since we took
- * the trap.
- */
- unsigned short ss;
- asm volatile("movw %%ss, %0" : : "m" (ss));
- return ss;
- }
- }
-#endif
- if (!_stp_find_register(name, &i386_register_table, NULL, &offset))
- _stp_error("Unknown register name: %s\n", name);
- (void) memcpy(&value, ((char*)regs) + offset, sizeof(value));
- return value;
+ if (!user_mode(regs))
+ return (long) &EREG(sp, regs);
+ return EREG(sp, regs);
}
-#endif /* __i386__ || __x86_64__ */
-
-#ifdef __i386__
-static void _stp_populate_register_table(void)
+static int _stp_get_regparm(int regparm, struct pt_regs *regs)
{
- _stp_populate_i386_register_table();
+ if (regparm == 0) {
+ /* Default */
+ if (user_mode(regs))
+ return 0;
+ else
+ // Kernel is built with -mregparm=3.
+ return 3;
+ } else
+ return (regparm & _STP_REGPARM_MASK);
}
#endif /* __i386__ */
#ifdef __x86_64__
-// I count 32 different names (not the same 32 as i386), but add a fudge factor.
-static struct _stp_register_desc x86_64_registers[32+8];
-static struct _stp_register_table x86_64_register_table = {
- .registers = x86_64_registers,
- .nr_slots = ARRAY_SIZE(x86_64_registers)
-};
-
-/* NB: Redefining ADD_PT_REG here. ADD2NAMES and such change accordingly. */
-#undef ADD_PT_REG
-#define ADD_PT_REG(name, member) \
- _stp_add_register(name, &x86_64_register_table, \
- sizeof(unsigned long), offsetof(struct pt_regs, member))
-
-#define ADD_NREG(nm) ADD_PT_REG(#nm, nm)
-
#ifdef STAPCONF_X86_UNIREGS
-#define ADD_RREG(nm) ADD2NAMES(#nm, "r" #nm, nm)
+#define EREG(nm, regs) ((regs)->nm)
+#define RREG(nm, regs) ((regs)->nm)
#else
-#define ADD_RREG(nm) ADD2NAMES(#nm, "r" #nm, r##nm)
+#define EREG(nm, regs) ((regs)->r##nm)
+#define RREG(nm, regs) ((regs)->r##nm)
#endif
-static void _stp_populate_register_table(void)
+static long _stp_get_sp(struct pt_regs *regs)
{
- /* Same order as in struct pt_regs */
- ADD_NREG(r15);
- ADD_NREG(r14);
- ADD_NREG(r13);
- ADD_NREG(r12);
- ADD_RREG(bp);
- ADD_RREG(bx);
- ADD_NREG(r11);
- ADD_NREG(r10);
- ADD_NREG(r9);
- ADD_NREG(r8);
- ADD_RREG(ax);
- ADD_RREG(cx);
- ADD_RREG(dx);
- ADD_RREG(si);
- ADD_RREG(di);
-#ifdef STAPCONF_X86_UNIREGS
- ADD2NAMES("orig_ax", "orig_rax", orig_ax);
-#else
- ADD2NAMES("orig_ax", "orig_rax", orig_rax);
-#endif
- ADD_RREG(ip);
- ADD_NREG(cs);
- ADD_FLAGS_REG();
- ADD_RREG(sp);
- ADD_NREG(ss);
-
- _stp_populate_i386_register_table();
+ return RREG(sp, regs);
}
static int _stp_probing_32bit_app(struct pt_regs *regs)
@@ -704,54 +447,26 @@ static int _stp_probing_32bit_app(struct pt_regs *regs)
}
/* Ensure that the upper 32 bits of val are a sign-extension of the lower 32. */
-static long _stp_sign_extend32(long val)
+static int64_t __stp_sign_extend32(int64_t val)
{
int32_t *val_ptr32 = (int32_t*) &val;
return *val_ptr32;
}
-/*
- * Get the value of the 64-bit register with the specified name. "rax",
- * "ax", and "eax" all get you regs->[r]ax. Sets *reg32=1 if the name
- * designates a 32-bit register (e.g., "eax"), 0 otherwise.
- */
-static unsigned long
-_stp_get_reg64_by_name(const char *name, struct pt_regs *regs, int *reg32)
+static int _stp_get_regparm(int regparm, struct pt_regs *regs)
{
- size_t offset = 0;
- unsigned long value;
- BUG_ON(!name);
- if (!regs) {
- _stp_error("Register values not available in this context.\n");
- return 0;
- }
- if (_stp_find_register(name, &x86_64_register_table, NULL, &offset)) {
- if (reg32)
- *reg32 = 0;
- (void) memcpy(&value, ((char*)regs) + offset, sizeof(value));
- return value;
- }
- if (reg32)
- *reg32 = 1;
- return _stp_get_reg32_by_name(name, regs);
+ if (regparm == 0) {
+ /* Default */
+ if (_stp_probing_32bit_app(regs))
+ return 0;
+ else
+ return 6;
+ } else
+ return (regparm & _STP_REGPARM_MASK);
}
-#endif /* __x86_64__ */
-
-/* Function arguments */
-
-#define _STP_REGPARM 0x8000
-#define _STP_REGPARM_MASK ((_STP_REGPARM) - 1)
+#endif /* __x86_64__ */
#if defined(__i386__) || defined(__x86_64__)
-static long _stp_get_sp(struct pt_regs *regs)
-{
-#ifdef __i386__
- if (!user_mode(regs))
- return (long) &EREG(sp, regs);
-#endif
- return EREG(sp, regs);
-}
-
/*
* Use this for i386 kernel and apps, and for 32-bit apps running on x86_64.
* Does arch-specific work for fetching function arg #argnum (1 = first arg).
@@ -792,21 +507,6 @@ static int _stp_get_arg32_by_number(int n, int nr_regargs,
}
#endif /* __i386__ || __x86_64__ */
-#ifdef __i386__
-static int _stp_get_regparm(int regparm, struct pt_regs *regs)
-{
- if (regparm == 0) {
- /* Default */
- if (user_mode(regs))
- return 0;
- else
- // Kernel is built with -mregparm=3.
- return 3;
- } else
- return (regparm & _STP_REGPARM_MASK);
-}
-#endif
-
#ifdef __x86_64__
/* See _stp_get_arg32_by_number(). */
static int _stp_get_arg64_by_number(int n, int nr_regargs,
@@ -835,18 +535,6 @@ static int _stp_get_arg64_by_number(int n, int nr_regargs,
return 0;
}
}
-
-static int _stp_get_regparm(int regparm, struct pt_regs *regs)
-{
- if (regparm == 0) {
- /* Default */
- if (_stp_probing_32bit_app(regs))
- return 0;
- else
- return 6;
- } else
- return (regparm & _STP_REGPARM_MASK);
-}
#endif /* __x86_64__ */
/** @} */
diff --git a/runtime/regs.h b/runtime/regs.h
index c1e2344b..4954020f 100644
--- a/runtime/regs.h
+++ b/runtime/regs.h
@@ -1,5 +1,5 @@
/* common register includes used in multiple modules
- * Copyright (C) 2005 Red Hat Inc.
+ * Copyright (C) 2005-2008 Red Hat Inc.
* Copyright (C) 2005 Intel Corporation.
*
* This file is part of systemtap, and is free software. You can
@@ -14,6 +14,7 @@
#if defined (STAPCONF_X86_UNIREGS) && (defined (__x86_64__) || defined (__i386__))
#define REG_IP(regs) regs->ip
#define REG_SP(regs) regs->sp
+#define REG_FP(regs) regs->bp;
#elif defined (__x86_64__)
@@ -24,6 +25,7 @@
#define REG_IP(regs) regs->eip
#define REG_SP(regs) regs->esp
+#define REG_FP(regs) regs->ebp;
#elif defined (__ia64__)
#define REG_IP(regs) ((regs)->cr_iip +ia64_psr(regs)->ri)
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 318d3038..2711f531 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -58,25 +58,32 @@ static struct
#define _stp_seq_inc() (atomic_inc_return(&_stp_seq.seq))
-/* TEST_MODE is always defined by systemtap */
-#ifdef TEST_MODE
-#define SYSTEMTAP 1
-#else
-#define MAXTRYLOCK 1000
-#define TRYLOCKDELAY 100
-#endif
-
#ifndef MAXSTRINGLEN
#define MAXSTRINGLEN 128
#endif
+#ifndef MAXTRACE
+#define MAXTRACE 20
+#endif
+
+#ifdef CONFIG_FRAME_POINTER
+/* Just because frame pointers are available does not mean we can trust them. */
+#if defined (__i386__) || defined (__arm__)
+#define STP_USE_FRAME_POINTER
+#endif
+#endif
+
+/* dwarf unwinder only tested so far on i386 and x86_64 */
+#if !defined(STP_USE_FRAME_BUFFER) && (defined(__i386__) || defined(__x86_64__))
+#define STP_USE_DWARF_UNWINDER
+#endif
+
#include "alloc.c"
#include "print.c"
#include "string.c"
#include "io.c"
#include "arith.c"
#include "copy.c"
-#include "sym.h"
#include "sym.c"
#ifdef STP_PERFMON
#include "perf.c"
diff --git a/runtime/stack-arm.c b/runtime/stack-arm.c
index 0c8ce450..9b0b772d 100644
--- a/runtime/stack-arm.c
+++ b/runtime/stack-arm.c
@@ -33,7 +33,7 @@ static int __init find_str_pc_offset(void)
static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels)
{
-#if defined(CONFIG_FRAME_POINTER)
+#ifdef STP_USE_FRAME_POINTER
int pc_offset = find_str_pc_offset();
unsigned long *fp = (unsigned long *)regs->ARM_fp;
unsigned long *next_fp, *pc;
@@ -68,5 +68,5 @@ static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels)
fp = next_fp;
}
-#endif
+#endif /* STP_USE_FRAME_POINTER */
}
diff --git a/runtime/stack-i386.c b/runtime/stack-i386.c
index b46ff06b..ad101889 100644
--- a/runtime/stack-i386.c
+++ b/runtime/stack-i386.c
@@ -8,40 +8,68 @@
* later version.
*/
-static inline int _stp_valid_stack_ptr(unsigned long context, unsigned long p)
+static int _stp_valid_stack_ptr(unsigned long context, unsigned long p)
{
return p > context && p < context + THREAD_SIZE - 3;
}
+/* DWARF unwinder failed. Just dump intereting addresses on kernel stack. */
+static void _stp_stack_print_fallback(unsigned long context, unsigned long stack, int verbose, int levels)
+{
+ unsigned long addr;
+ while (levels && _stp_valid_stack_ptr(context, stack)) {
+ if (unlikely(_stp_read_address(addr, (unsigned long *)stack, KERNEL_DS))) {
+ /* cannot access stack. give up. */
+ return;
+ }
+ if (_stp_func_print(addr, verbose, 0))
+ levels--;
+ stack++;
+ }
+}
+
static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels)
{
- unsigned long *stack = (unsigned long *)&REG_SP(regs);
- unsigned long context = (unsigned long)stack & ~(THREAD_SIZE - 1);
+ unsigned long context = (unsigned long)&REG_SP(regs) & ~(THREAD_SIZE - 1);
+
+#ifdef STP_USE_FRAME_POINTER
unsigned long addr;
+ unsigned long next_fp, fp = REG_FP(regs);
-#ifdef CONFIG_FRAME_POINTER
- {
- #ifdef STAPCONF_X86_UNIREGS
- unsigned long ebp = regs->bp;
- #else
- unsigned long ebp = regs->ebp;
- #endif
-
- while (_stp_valid_stack_ptr(context, (unsigned long)ebp)) {
- addr = *(unsigned long *)(ebp + 4);
- if (verbose) {
- _stp_print_char(' ');
- _stp_symbol_print (addr);
- _stp_print_char('\n');
- } else
- _stp_printf ("0x%08lx ", addr);
- ebp = *(unsigned long *)ebp;
+ while (levels && _stp_valid_stack_ptr(context, (unsigned long)fp)) {
+ if (unlikely(_stp_read_address(addr, (unsigned long *)(fp + 4), KERNEL_DS))) {
+ /* cannot access stack. give up. */
+ return;
+ }
+ _stp_func_print(addr, verbose, 1);
+ if (unlikely(_stp_read_address(next_fp, (unsigned long *)fp, KERNEL_DS))) {
+ /* cannot access stack. give up. */
+ return;
}
+ levels--;
+
+ /* frame pointers move upwards */
+ if (next_fp <= fp)
+ break;
+ fp = next_fp;
}
#else
- while (_stp_valid_stack_ptr(context, (unsigned long)stack)) {
- addr = *stack++;
- _stp_func_print(addr, verbose, 1);
+ struct unwind_frame_info info;
+ arch_unw_init_frame_info(&info, regs);
+
+ while (levels && !arch_unw_user_mode(&info)) {
+ int ret = unwind(&info);
+ dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(&info), UNW_SP(&info));
+ if (ret == 0) {
+ _stp_func_print(UNW_PC(&info), verbose, 1);
+ levels--;
+ continue;
+ }
+ /* If an error happened or we hit a kretprobe trampoline, use fallback backtrace */
+ /* FIXME: is there a way to unwind across kretprobe trampolines? */
+ if (ret < 0 || (ret > 0 && UNW_PC(&info) == _stp_kretprobe_trampoline))
+ _stp_stack_print_fallback(context, UNW_SP(&info), verbose, levels);
+ break;
}
-#endif
+#endif /* STP_USE_FRAME_POINTER */
}
diff --git a/runtime/stack-x86_64.c b/runtime/stack-x86_64.c
index 186b2ad4..783e72bd 100644
--- a/runtime/stack-x86_64.c
+++ b/runtime/stack-x86_64.c
@@ -1,6 +1,6 @@
/* -*- linux-c -*-
* x86_64 stack tracing functions
- * Copyright (C) 2005, 2006, 2007 Red Hat Inc.
+ * Copyright (C) 2005-2008 Red Hat Inc.
*
* This file is part of systemtap, and is free software. You can
* redistribute it and/or modify it under the terms of the GNU General
@@ -8,13 +8,43 @@
* later version.
*/
-static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels)
+/* DWARF unwinder failed. Just dump intereting addresses on kernel stack. */
+static void _stp_stack_print_fallback(unsigned long stack, int verbose, int levels)
{
- unsigned long *stack = (unsigned long *)REG_SP(regs);
unsigned long addr;
+ while (levels && stack & (THREAD_SIZE - 1)) {
+ if (unlikely(_stp_read_address(addr, (unsigned long *)stack, KERNEL_DS))) {
+ /* cannot access stack. give up. */
+ return;
+ }
+ if (_stp_func_print(addr, verbose, 0))
+ levels--;
+ stack++;
+ }
+}
+
+static void __stp_stack_print(struct pt_regs *regs, int verbose, int levels)
+{
+#ifdef STP_USE_DWARF_UNWINDER
+ // FIXME: large stack allocation
+ struct unwind_frame_info info;
+ arch_unw_init_frame_info(&info, regs);
- while ((long)stack & (THREAD_SIZE-1)) {
- addr = *stack++;
- _stp_func_print(addr, verbose, 1);
+ while (levels && !arch_unw_user_mode(&info)) {
+ int ret = unwind(&info);
+ dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(&info), UNW_SP(&info));
+ if (ret == 0) {
+ _stp_func_print(UNW_PC(&info), verbose, 1);
+ levels--;
+ continue;
+ }
+ /* If an error happened or we hit a kretprobe trampoline, use fallback backtrace */
+ /* FIXME: is there a way to unwind across kretprobe trampolines? */
+ if (ret < 0 || (ret > 0 && UNW_PC(&info) == _stp_kretprobe_trampoline))
+ _stp_stack_print_fallback(UNW_SP(&info), verbose, levels);
+ break;
}
+#else /* ! STP_USE_DWARF_UNWINDER */
+ _stp_stack_print_fallback(REG_SP(regs), verbose);
+#endif
}
diff --git a/runtime/stack.c b/runtime/stack.c
index 9c01d65c..23ac2edc 100644
--- a/runtime/stack.c
+++ b/runtime/stack.c
@@ -1,6 +1,6 @@
/* -*- linux-c -*-
* Stack tracing functions
- * Copyright (C) 2005, 2006, 2007 Red Hat Inc.
+ * Copyright (C) 2005-2008 Red Hat Inc.
* Copyright (C) 2005 Intel Corporation.
*
* This file is part of systemtap, and is free software. You can
@@ -23,6 +23,7 @@
#include "sym.c"
#include "regs.h"
+#include "unwind.c"
#define MAXBACKTRACE 20
@@ -46,7 +47,7 @@
* @param regs A pointer to the struct pt_regs.
*/
-void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe_instance *pi)
+void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe_instance *pi, int levels)
{
if (verbose) {
/* print the current address */
@@ -57,12 +58,15 @@ void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe_instan
_stp_symbol_print((unsigned long)_stp_ret_addr_r(pi));
} else {
_stp_print_char(' ');
- _stp_symbol_print (REG_IP(regs));
+ _stp_symbol_print(REG_IP(regs));
}
_stp_print_char('\n');
- } else
- _stp_printf ("%p ", (int64_t)REG_IP(regs));
- __stp_stack_print (regs, verbose, 0);
+ } else if (pi)
+ _stp_printf("%p %p ", (int64_t)(long)_stp_ret_addr_r(pi), (int64_t) REG_IP(regs));
+ else
+ _stp_printf("%p ", (int64_t) REG_IP(regs));
+
+ __stp_stack_print(regs, verbose, levels);
}
/** Writes stack backtrace to a string
@@ -71,31 +75,30 @@ void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe_instan
* @param regs A pointer to the struct pt_regs.
* @returns void
*/
-void _stp_stack_snprint (char *str, int size, struct pt_regs *regs, int verbose, struct kretprobe_instance *pi)
+void _stp_stack_snprint(char *str, int size, struct pt_regs *regs, int verbose, struct kretprobe_instance *pi, int levels)
{
/* To get a string, we use a simple trick. First flush the print buffer, */
/* then call _stp_stack_print, then copy the result into the output string */
/* and clear the print buffer. */
_stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id());
_stp_print_flush();
- _stp_stack_print(regs, verbose, pi);
+ _stp_stack_print(regs, verbose, pi, levels);
strlcpy(str, pb->buf, size < (int)pb->len ? size : (int)pb->len);
pb->len = 0;
}
-
/** Prints the user stack backtrace
* @param str string
* @returns Same string as was input with trace info appended,
* @note Currently limited to a depth of two. Works from jprobes and kprobes.
*/
#if 0
-void _stp_ustack_print (char *str)
+void _stp_ustack_print(char *str)
{
- struct pt_regs *nregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) current->thread_info)) - 1;
- _stp_printf ("%p : [user]\n", (int64_t)REG_IP(nregs));
+ struct pt_regs *nregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)current->thread_info)) - 1;
+ _stp_printf("%p : [user]\n", (int64_t) REG_IP(nregs));
if (REG_SP(nregs))
- _stp_printf ("%p : [user]\n", (int64_t)(*(unsigned long *)REG_SP(nregs)));
+ _stp_printf("%p : [user]\n", (int64_t) (*(unsigned long *)REG_SP(nregs)));
}
#endif /* 0 */
diff --git a/runtime/staprun/ChangeLog b/runtime/staprun/ChangeLog
index e9ef2e2d..969c299d 100644
--- a/runtime/staprun/ChangeLog
+++ b/runtime/staprun/ChangeLog
@@ -1,3 +1,60 @@
+2008-05-05 Martin Hunt <hunt@redhat.com>
+
+ * mainloop.c (child_proc): Handle sig_chld
+ in the proper thread.
+ (signal_thread): Don't call send_request()
+ because it isn't thread-safe.
+
+2008-05-05 Martin Hunt <hunt@redhat.com>
+
+ * mainloop.c (signal_thread): New thread to handle signals
+ better.
+ (setup_main_signals): Create signal thread.
+
+2008-04-30 Masami Hiramatsu <mhiramat@redhat.com>
+
+ PR 6008
+ * common.c (parse_args): Increase the limitation of the buffer size
+ to 4095MB.
+ * common.c (usage): Ditto.
+
+2008-04-30 Masami Hiramatsu <mhiramat@redhat.com>
+
+ * stapio.c (main): Fix a typo in a debug message.
+ * staprun.c (main): Ditto.
+
+2008-04-24 Frank Ch. Eigler <fche@elastic.org>
+
+ PR 6451.
+ * common.c (control_channel): Initialize to -1.
+ * ctl.c (close_ctl_channel): Tolerate fd=0.
+
+2008-04-22 Martin Hunt <hunt@redhat.com>
+
+ * cap.c (init_cap): Detect capabilities failure and
+ run with them disabled.
+
+2008-04-22 Martin Hunt <hunt@redhat.com>
+
+ * mainloop.c (send_request): Move here from common.c
+ staprun no longer send any messages.
+
+2008-04-22 hunt <hunt@redhat.com>
+
+ * common.c (usage): Add -d option.
+
+2008-04-21 Martin Hunt <hunt@redhat.com>
+
+ * staprun.c, stapio.c, staprun.h, mainloop.c, staprun_funcs.c,
+ ctl.c, common.c: Add "-d" option to have staprun remove
+ modules. Have staprun exec stapio and then have stapio
+ exec "staprun -d" to remove the module when finished.
+
+2008-04-16 Martin Hunt <hunt@redhat.com>
+
+ * ctl.c (init_ctl_channel): Remove unused parameter.
+ Just opens one channel now.
+
2008-02-21 David Smith <dsmith@redhat.com>
* staprun_funcs.c (check_path): Small security fix.
diff --git a/runtime/staprun/cap.c b/runtime/staprun/cap.c
index 6f22dfc9..6ac6701f 100644
--- a/runtime/staprun/cap.c
+++ b/runtime/staprun/cap.c
@@ -23,6 +23,8 @@
#include "staprun.h"
#include <sys/prctl.h>
+static int _stp_no_caps = 0;
+
/* like perror, but exits */
#define ferror(msg) { \
_perr(msg); \
@@ -54,10 +56,10 @@
* CAP_CHOWN - allows chown
*/
-int init_cap(void)
+void init_cap(void)
{
cap_t caps = cap_init();
- cap_value_t capv[] = {CAP_SYS_MODULE, CAP_SYS_ADMIN, CAP_SYS_NICE, CAP_SETUID, CAP_SETGID, CAP_DAC_OVERRIDE};
+ cap_value_t capv[] = { CAP_SYS_MODULE, CAP_SYS_ADMIN, CAP_SYS_NICE, CAP_SETUID, CAP_SETGID, CAP_DAC_OVERRIDE };
const int numcaps = sizeof(capv) / sizeof(capv[0]);
uid_t uid = getuid();
gid_t gid = getgid();
@@ -69,8 +71,11 @@ int init_cap(void)
if (cap_set_flag(caps, CAP_PERMITTED, numcaps, capv, CAP_SET) < 0)
ferror("cap_set_flag");
- if (cap_set_proc(caps) < 0)
- ferror("cap_set_proc");
+ if (cap_set_proc(caps) < 0) {
+ dbug(1, "Setting capabilities failed. Capabilities disabled.\n");
+ _stp_no_caps = 1;
+ return;
+ }
cap_free(caps);
@@ -82,8 +87,6 @@ int init_cap(void)
if (setresgid(gid, gid, gid) < 0)
ferror("setresgid");
-
- return 1;
}
void print_cap(char *text)
@@ -97,19 +100,18 @@ void print_cap(char *text)
perr("cap_get_proc");
return;
}
-
+
getresuid(&uid, &euid, &suid);
getresgid(&gid, &egid, &sgid);
printf("***** %s\n", text);
- if ((p=prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0)) < 0)
+ if ((p = prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0)) < 0)
perr("Couldn't get PR_SET_KEEPCAPS flag value");
- else
+ else
printf("KEEPCAPS: %d\n", p);
- printf("uid: %d, euid: %d, suid: %d\ngid: %d. egid: %d, sgid: %d\n",
- uid, euid, suid, gid, egid, sgid );
+ printf("uid: %d, euid: %d, suid: %d\ngid: %d. egid: %d, sgid: %d\n", uid, euid, suid, gid, egid, sgid);
printf("Caps: %s\n", cap_to_text(caps, NULL));
cap_free(caps);
printf("*****\n\n");
@@ -121,38 +123,44 @@ void print_cap(char *text)
*/
void drop_cap(cap_value_t cap)
{
- cap_t caps = cap_get_proc();
- if (caps == NULL)
- ferror("cap_get_proc failed");
- if (cap_set_flag(caps, CAP_PERMITTED, 1, &cap, CAP_CLEAR) < 0)
- ferror("Could not clear effective capabilities");
- if (cap_set_proc(caps) < 0)
- ferror("Could not apply capability set");
- cap_free(caps);
+ if (_stp_no_caps == 0) {
+ cap_t caps = cap_get_proc();
+ if (caps == NULL)
+ ferror("cap_get_proc failed");
+ if (cap_set_flag(caps, CAP_PERMITTED, 1, &cap, CAP_CLEAR) < 0)
+ ferror("Could not clear effective capabilities");
+ if (cap_set_proc(caps) < 0)
+ ferror("Could not apply capability set");
+ cap_free(caps);
+ }
}
/* add_cap() adds a permitted capability to the effective set. */
void add_cap(cap_value_t cap)
{
- cap_t caps = cap_get_proc();
- if (caps == NULL)
- ferror("cap_get_proc failed");
- if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, CAP_SET) < 0)
- ferror("Could not set effective capabilities");
- if (cap_set_proc(caps) < 0)
- ferror("Could not apply capability set");
- cap_free(caps);
+ if (_stp_no_caps == 0) {
+ cap_t caps = cap_get_proc();
+ if (caps == NULL)
+ ferror("cap_get_proc failed");
+ if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, CAP_SET) < 0)
+ ferror("Could not set effective capabilities");
+ if (cap_set_proc(caps) < 0)
+ ferror("Could not apply capability set");
+ cap_free(caps);
+ }
}
/* del_cap() deletes a permitted capability from the effective set. */
void del_cap(cap_value_t cap)
{
- cap_t caps = cap_get_proc();
- if (caps == NULL)
- ferror("cap_get_proc failed");
- if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, CAP_CLEAR) < 0)
- ferror("Could not clear effective capabilities");
- if (cap_set_proc(caps) < 0)
- ferror("Could not apply capability set");
- cap_free(caps);
+ if (_stp_no_caps == 0) {
+ cap_t caps = cap_get_proc();
+ if (caps == NULL)
+ ferror("cap_get_proc failed");
+ if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, CAP_CLEAR) < 0)
+ ferror("Could not clear effective capabilities");
+ if (cap_set_proc(caps) < 0)
+ ferror("Could not apply capability set");
+ cap_free(caps);
+ }
}
diff --git a/runtime/staprun/common.c b/runtime/staprun/common.c
index 47778efd..93da51d8 100644
--- a/runtime/staprun/common.c
+++ b/runtime/staprun/common.c
@@ -22,6 +22,7 @@ unsigned int buffer_size;
char *target_cmd;
char *outfile_name;
int attach_mod;
+int delete_mod;
int load_only;
int need_uprobes;
@@ -30,8 +31,7 @@ char *modname = NULL;
char *modpath = "";
char *modoptions[MAXMODOPTIONS];
-int initialized = 0;
-int control_channel = 0;
+int control_channel = -1; /* NB: fd==0 possible */
void parse_args(int argc, char **argv)
{
@@ -44,10 +44,11 @@ void parse_args(int argc, char **argv)
target_cmd = NULL;
outfile_name = NULL;
attach_mod = 0;
+ delete_mod = 0;
load_only = 0;
need_uprobes = 0;
- while ((c = getopt(argc, argv, "ALuvb:t:d:c:o:x:")) != EOF) {
+ while ((c = getopt(argc, argv, "ALuvb:t:dc:o:x:")) != EOF) {
switch (c) {
case 'u':
need_uprobes = 1;
@@ -57,8 +58,8 @@ void parse_args(int argc, char **argv)
break;
case 'b':
buffer_size = (unsigned)atoi(optarg);
- if (buffer_size < 1 || buffer_size > 64) {
- err("Invalid buffer size '%d' (should be 1-64).\n", buffer_size);
+ if (buffer_size < 1 || buffer_size > 4095) {
+ err("Invalid buffer size '%d' (should be 1-4095).\n", buffer_size);
usage(argv[0]);
}
break;
@@ -67,7 +68,8 @@ void parse_args(int argc, char **argv)
target_pid = atoi(optarg);
break;
case 'd':
- /* obsolete internal option used by stap */
+ /* delete module */
+ delete_mod = 1;
break;
case 'c':
target_cmd = optarg;
@@ -128,11 +130,14 @@ void usage(char *prog)
err("-o FILE Send output to FILE.\n");
err("-b buffer size The systemtap module specifies a buffer size.\n");
err(" Setting one here will override that value. The\n");
- err(" value should be an integer between 1 and 64\n");
+ err(" value should be an integer between 1 and 4095 \n");
err(" which be assumed to be the buffer size in MB.\n");
err(" That value will be per-cpu in bulk mode.\n");
err("-L Load module and start probes, then detach.\n");
err("-A Attach to loaded systemtap module.\n");
+ err("-d Delete a module. Only detached or unused modules\n");
+ err(" the user has permission to access will be deleted. Use \"*\"\n");
+ err(" (quoted) to delete all unused modules.\n");
err("MODULE can be either a module name or a module path. If a\n");
err("module name is used, it is looked for in the following\n");
err("directory: /lib/modules/`uname -r`/systemtap\n");
@@ -250,10 +255,7 @@ static void fatal_handler (int signum)
rc = write (STDERR_FILENO, ERR_MSG, sizeof(ERR_MSG));
rc = write (STDERR_FILENO, str, strlen(str));
rc = write (STDERR_FILENO, "\n", 1);
- if (initialized)
- _exit(3);
- else
- _exit(1);
+ _exit(1);
}
void setup_signals(void)
@@ -294,28 +296,6 @@ void setup_signals(void)
#endif
}
-/**
- * send_request - send request to kernel over control channel
- * @type: the relay-app command id
- * @data: pointer to the data to be sent
- * @len: length of the data to be sent
- *
- * Returns 0 on success, negative otherwise.
- */
-int send_request(int type, void *data, int len)
-{
- char buf[1024];
-
- /* Before doing memcpy, make sure 'buf' is big enough. */
- if ((len + 4) > (int)sizeof(buf)) {
- _err("exceeded maximum send_request size.\n");
- return -1;
- }
- memcpy(buf, &type, 4);
- memcpy(&buf[4], data, len);
- return write(control_channel, buf, len+4);
-}
-
/*
* set FD_CLOEXEC for any file descriptor
*/
diff --git a/runtime/staprun/ctl.c b/runtime/staprun/ctl.c
index af7e6c1a..4597bf72 100644
--- a/runtime/staprun/ctl.c
+++ b/runtime/staprun/ctl.c
@@ -12,45 +12,42 @@
#include "staprun.h"
-int init_ctl_channel(int symbols)
+int init_ctl_channel(const char *name, int verb)
{
- char *cname, buf[PATH_MAX];
+ char buf[PATH_MAX];
struct statfs st;
int old_transport = 0;
-
- if (symbols)
- cname = ".symbols";
- else
- cname = ".cmd";
- if (statfs("/sys/kernel/debug", &st) == 0 && (int) st.f_type == (int) DEBUGFS_MAGIC) {
- if (sprintf_chk(buf, "/sys/kernel/debug/systemtap/%s/%s", modname, cname))
+ if (statfs("/sys/kernel/debug", &st) == 0 && (int)st.f_type == (int)DEBUGFS_MAGIC) {
+ if (sprintf_chk(buf, "/sys/kernel/debug/systemtap/%s/.cmd", name))
return -1;
} else {
old_transport = 1;
- if (sprintf_chk(buf, "/proc/systemtap/%s/%s", modname, cname))
+ if (sprintf_chk(buf, "/proc/systemtap/%s/.cmd", name))
return -1;
}
-
- dbug(2, "Opening %s\n", buf);
+
+ dbug(2, "Opening %s\n", buf);
control_channel = open(buf, O_RDWR);
if (control_channel < 0) {
- if (attach_mod && errno == ENOENT)
- err("ERROR: Can not attach. Module %s not running.\n", modname);
- else
- perr("Couldn't open control channel '%s'", buf);
+ if (verb) {
+ if (attach_mod && errno == ENOENT)
+ err("ERROR: Can not attach. Module %s not running.\n", name);
+ else
+ perr("Couldn't open control channel '%s'", buf);
+ }
return -1;
}
if (set_clexec(control_channel) < 0)
return -1;
-
+
return old_transport;
}
void close_ctl_channel(void)
{
- if (control_channel > 0) {
+ if (control_channel >= 0) {
close(control_channel);
- control_channel = 0;
+ control_channel = -1;
}
}
diff --git a/runtime/staprun/mainloop.c b/runtime/staprun/mainloop.c
index 2e0c3c5c..61963743 100644
--- a/runtime/staprun/mainloop.c
+++ b/runtime/staprun/mainloop.c
@@ -15,39 +15,98 @@
/* globals */
int ncpus;
-int use_old_transport = 0;
+static int use_old_transport = 0;
+//enum _stp_sig_type { sig_none, sig_done, sig_detach };
+//static enum _stp_sig_type got_signal = sig_none;
-static void sigproc(int signum)
+/**
+ * send_request - send request to kernel over control channel
+ * @type: the relay-app command id
+ * @data: pointer to the data to be sent
+ * @len: length of the data to be sent
+ *
+ * Returns 0 on success, negative otherwise.
+ */
+int send_request(int type, void *data, int len)
{
- dbug(2, "sigproc %d (%s)\n", signum, strsignal(signum));
-
- if (signum == SIGCHLD) {
- pid_t pid = waitpid(-1, NULL, WNOHANG);
- if (pid != target_pid)
- return;
- send_request(STP_EXIT, NULL, 0);
- } else if (signum == SIGQUIT)
- cleanup_and_exit(2);
- else if (signum == SIGINT || signum == SIGHUP || signum == SIGTERM)
- send_request(STP_EXIT, NULL, 0);
+ char buf[1024];
+
+ /* Before doing memcpy, make sure 'buf' is big enough. */
+ if ((len + 4) > (int)sizeof(buf)) {
+ _err("exceeded maximum send_request size.\n");
+ return -1;
+ }
+ memcpy(buf, &type, 4);
+ memcpy(&buf[4], data, len);
+ return write(control_channel, buf, len + 4);
}
-static void setup_main_signals(int cleanup)
+static void *signal_thread(void *arg)
{
- struct sigaction a;
- memset(&a, 0, sizeof(a));
- sigfillset(&a.sa_mask);
- if (cleanup == 0) {
- a.sa_handler = sigproc;
- sigaction(SIGCHLD, &a, NULL);
- } else
- a.sa_handler = SIG_IGN;
- sigaction(SIGINT, &a, NULL);
- sigaction(SIGTERM, &a, NULL);
- sigaction(SIGHUP, &a, NULL);
- sigaction(SIGQUIT, &a, NULL);
+ sigset_t *s = (sigset_t *) arg;
+ int signum, rc, btype = STP_EXIT;
+
+ while (1) {
+ if (sigwait(s, &signum) < 0) {
+ _perr("sigwait");
+ continue;
+ }
+ dbug(2, "sigproc %d (%s)\n", signum, strsignal(signum));
+ if (signum == SIGQUIT)
+ cleanup_and_exit(1);
+ else if (signum == SIGINT || signum == SIGHUP || signum == SIGTERM) {
+ // send STP_EXIT
+ rc = write(control_channel, &btype, sizeof(btype));
+ break;
+ }
+ }
+ return NULL;
+}
+
+static void chld_proc(int signum)
+{
+ int32_t rc, btype = STP_EXIT;
+ dbug(2, "chld_proc %d (%s)\n", signum, strsignal(signum));
+ pid_t pid = waitpid(-1, NULL, WNOHANG);
+ if (pid != target_pid)
+ return;
+ // send STP_EXIT
+ rc = write(control_channel, &btype, sizeof(btype));
}
+static void setup_main_signals(void)
+{
+ pthread_t tid;
+ struct sigaction sa;
+ sigset_t *s = malloc(sizeof(*s));
+ if (!s) {
+ _perr("malloc failed");
+ exit(1);
+ }
+ sigfillset(s);
+ pthread_sigmask(SIG_SETMASK, s, NULL);
+ memset(&sa, 0, sizeof(sa));
+ sigfillset(&sa.sa_mask);
+ sa.sa_handler = SIG_IGN;
+ sigaction(SIGINT, &sa, NULL);
+ sigaction(SIGTERM, &sa, NULL);
+ sigaction(SIGHUP, &sa, NULL);
+ sigaction(SIGQUIT, &sa, NULL);
+
+ sa.sa_handler = chld_proc;
+ sigaction(SIGCHLD, &sa, NULL);
+
+ sigemptyset(s);
+ sigaddset(s, SIGINT);
+ sigaddset(s, SIGTERM);
+ sigaddset(s, SIGHUP);
+ sigaddset(s, SIGQUIT);
+ pthread_sigmask(SIG_SETMASK, s, NULL);
+ if (pthread_create(&tid, NULL, signal_thread, s) < 0) {
+ _perr("failed to create thread");
+ exit(1);
+ }
+}
/*
* start_cmd forks the command given on the command line
@@ -75,7 +134,7 @@ void start_cmd(void)
a.sa_handler = SIG_IGN;
sigaction(SIGINT, &a, NULL);
- dbug (1, "execing target_cmd %s\n", target_cmd);
+ dbug(1, "execing target_cmd %s\n", target_cmd);
if ((pid = fork()) < 0) {
_perr("fork");
exit(1);
@@ -86,8 +145,8 @@ void start_cmd(void)
sigaction(SIGINT, &a, NULL);
/* commands we fork need to run at normal priority */
- setpriority (PRIO_PROCESS, 0, 0);
-
+ setpriority(PRIO_PROCESS, 0, 0);
+
/* wait here until signaled */
sigwait(&usrset, &signum);
@@ -107,11 +166,11 @@ void system_cmd(char *cmd)
{
pid_t pid;
- dbug (2, "system %s\n", cmd);
+ dbug(2, "system %s\n", cmd);
if ((pid = fork()) < 0) {
_perr("fork");
} else if (pid == 0) {
- setpriority (PRIO_PROCESS, 0, 0);
+ setpriority(PRIO_PROCESS, 0, 0);
if (execl("/bin/sh", "sh", "-c", cmd, NULL) < 0)
perr("%s", cmd);
_exit(1);
@@ -128,7 +187,7 @@ static void read_buffer_info(void)
if (!use_old_transport)
return;
- if (statfs("/sys/kernel/debug", &st) == 0 && (int) st.f_type == (int) DEBUGFS_MAGIC)
+ if (statfs("/sys/kernel/debug", &st) == 0 && (int)st.f_type == (int)DEBUGFS_MAGIC)
return;
if (sprintf_chk(buf, "/proc/systemtap/%s/bufsize", modname))
@@ -152,7 +211,6 @@ static void read_buffer_info(void)
return;
}
-
/**
* init_stapio - initialize the app
* @print_summary: boolean, print summary or not at end of run
@@ -164,7 +222,7 @@ int init_stapio(void)
dbug(2, "init_stapio\n");
/* create control channel */
- use_old_transport = init_ctl_channel(0);
+ use_old_transport = init_ctl_channel(modname, 1);
if (use_old_transport < 0) {
err("Failed to initialize control channel.\n");
return -1;
@@ -177,7 +235,7 @@ int init_stapio(void)
if (init_oldrelayfs() < 0) {
close_ctl_channel();
return -1;
- }
+ }
} else {
if (init_relayfs() < 0) {
close_ctl_channel();
@@ -192,17 +250,12 @@ int init_stapio(void)
if (target_cmd)
start_cmd();
-
return 0;
}
-/* cleanup_and_exit() closed channels and frees memory
- * then exits with the following status codes:
- * 1 - failed to initialize.
- * 2 - disconnected
- * 3 - initialized
- */
-void cleanup_and_exit (int closed)
+/* cleanup_and_exit() closed channels, frees memory,
+ * removes the module (if necessary) and exits. */
+void cleanup_and_exit(int detach)
{
pid_t err;
static int exiting = 0;
@@ -211,32 +264,34 @@ void cleanup_and_exit (int closed)
return;
exiting = 1;
- setup_main_signals(1);
+ setup_main_signals();
- dbug(1, "CLEANUP AND EXIT closed=%d\n", closed);
+ dbug(1, "detach=%d\n", detach);
/* what about child processes? we will wait for them here. */
err = waitpid(-1, NULL, WNOHANG);
if (err >= 0)
err("\nWaiting for processes to exit\n");
- while(wait(NULL) > 0) ;
+ while (wait(NULL) > 0) ;
if (use_old_transport)
- close_oldrelayfs(closed == 2);
+ close_oldrelayfs(detach);
else
close_relayfs();
dbug(1, "closing control channel\n");
close_ctl_channel();
- if (initialized == 2 && closed == 2) {
- err("\nDisconnecting from systemtap module.\n" \
- "To reconnect, type \"staprun -A %s\"\n", modname);
- } else if (initialized)
- closed = 3;
- else
- closed = 1;
- exit(closed);
+ if (detach) {
+ err("\nDisconnecting from systemtap module.\n" "To reconnect, type \"staprun -A %s\"\n", modname);
+ } else {
+ dbug(2, "removing %s\n", modname);
+ if (execl(BINDIR "/staprun", "staprun", "-d", modname, NULL) < 0) {
+ perror(modname);
+ _exit(1);
+ }
+ }
+ _exit(0);
}
/**
@@ -247,90 +302,103 @@ int stp_main_loop(void)
{
ssize_t nb;
void *data;
- int type;
+ uint32_t type;
FILE *ofp = stdout;
char recvbuf[8196];
setvbuf(ofp, (char *)NULL, _IOLBF, 0);
- setup_main_signals(0);
+ setup_main_signals();
dbug(2, "in main loop\n");
send_request(STP_READY, NULL, 0);
- while (1) { /* handle messages from control channel */
+ /* handle messages from control channel */
+ while (1) {
nb = read(control_channel, recvbuf, sizeof(recvbuf));
+ dbug(2, "nb=%d\n", (int)nb);
if (nb <= 0) {
if (errno != EINTR)
_perr("Unexpected EOF in read (nb=%ld)", (long)nb);
continue;
}
-
- type = *(int *)recvbuf;
- data = (void *)(recvbuf + sizeof(int));
- switch (type) {
+ type = *(uint32_t *) recvbuf;
+ data = (void *)(recvbuf + sizeof(uint32_t));
+ nb -= sizeof(uint32_t);
+
+ switch (type) {
#ifdef STP_OLD_TRANSPORT
case STP_REALTIME_DATA:
- {
- ssize_t bw = write(out_fd[0], data, nb - sizeof(int));
- if (bw >= 0 && bw != (nb - (ssize_t)sizeof(int))) {
- nb = nb - bw;
- bw = write(out_fd[0], data, nb - sizeof(int));
+ {
+ ssize_t bw = write(out_fd[0], data, nb);
+ if (bw >= 0 && bw != nb) {
+ nb = nb - bw;
+ bw = write(out_fd[0], data, nb);
+ }
+ if (bw != nb) {
+ _perr("write error (nb=%ld)", (long)nb);
+ cleanup_and_exit(0);
+ }
+ break;
}
- if (bw != (nb - (ssize_t)sizeof(int))) {
- _perr("write error (nb=%ld)", (long)nb);
- cleanup_and_exit(1);
- }
- break;
- }
#endif
case STP_OOB_DATA:
- fputs ((char *)data, stderr);
- break;
- case STP_EXIT:
- {
- /* module asks us to unload it and exit */
- int *closed = (int *)data;
- dbug(2, "got STP_EXIT, closed=%d\n", *closed);
- cleanup_and_exit(*closed);
+ fputs((char *)data, stderr);
break;
- }
- case STP_START:
- {
- struct _stp_msg_start *t = (struct _stp_msg_start *)data;
- dbug(2, "probe_start() returned %d\n", t->res);
- if (t->res < 0) {
- if (target_cmd)
- kill (target_pid, SIGKILL);
- cleanup_and_exit(1);
- } else if (target_cmd)
- kill (target_pid, SIGUSR1);
- break;
- }
+ case STP_EXIT:
+ {
+ /* module asks us to unload it and exit */
+ dbug(2, "got STP_EXIT\n");
+ cleanup_and_exit(0);
+ break;
+ }
+ case STP_START:
+ {
+ struct _stp_msg_start *t = (struct _stp_msg_start *)data;
+ dbug(2, "probe_start() returned %d\n", t->res);
+ if (t->res < 0) {
+ if (target_cmd)
+ kill(target_pid, SIGKILL);
+ cleanup_and_exit(0);
+ } else if (target_cmd)
+ kill(target_pid, SIGUSR1);
+ break;
+ }
case STP_SYSTEM:
- {
- struct _stp_msg_cmd *c = (struct _stp_msg_cmd *)data;
- dbug(2, "STP_SYSTEM: %s\n", c->cmd);
- system_cmd(c->cmd);
- break;
- }
+ {
+ struct _stp_msg_cmd *c = (struct _stp_msg_cmd *)data;
+ dbug(2, "STP_SYSTEM: %s\n", c->cmd);
+ system_cmd(c->cmd);
+ break;
+ }
case STP_TRANSPORT:
- {
- struct _stp_msg_start ts;
- if (use_old_transport) {
- if (init_oldrelayfs() < 0)
- cleanup_and_exit(1);
- } else {
- if (init_relayfs() < 0)
+ {
+ struct _stp_msg_start ts;
+ if (use_old_transport) {
+ if (init_oldrelayfs() < 0)
+ cleanup_and_exit(0);
+ } else {
+ if (init_relayfs() < 0)
+ cleanup_and_exit(0);
+ }
+ ts.target = target_pid;
+ send_request(STP_START, &ts, sizeof(ts));
+ if (load_only)
cleanup_and_exit(1);
+ break;
+ }
+ case STP_UNWIND:
+ {
+ int len;
+ char *ptr = (char *)data;
+ while (nb > 0) {
+ send_unwind_data(ptr);
+ len = strlen(ptr) + 1;
+ ptr += len;
+ nb -= len;
+ }
+ break;
}
- ts.target = target_pid;
- initialized = 2;
- send_request(STP_START, &ts, sizeof(ts));
- if (load_only)
- cleanup_and_exit(2);
- break;
- }
default:
err("WARNING: ignored message of type %d\n", (type));
}
diff --git a/runtime/staprun/stapio.c b/runtime/staprun/stapio.c
index ee30a1a1..3c8c4f7f 100644
--- a/runtime/staprun/stapio.c
+++ b/runtime/staprun/stapio.c
@@ -27,24 +27,23 @@ char *__name__ = "stapio";
int main(int argc, char **argv)
{
setup_signals();
-
parse_args(argc, argv);
if (buffer_size)
- dbug(1, "Using a buffer of %u bytes.\n", buffer_size);
+ dbug(1, "Using a buffer of %u MB.\n", buffer_size);
if (optind < argc) {
parse_modpath(argv[optind++]);
dbug(2, "modpath=\"%s\", modname=\"%s\"\n", modpath, modname);
}
- if (optind < argc) {
+ if (optind < argc) {
if (attach_mod) {
err("ERROR: Cannot have module options with attach (-A).\n");
usage(argv[0]);
} else {
- unsigned start_idx = 3; /* reserve three slots in modoptions[] */
- while (optind < argc && start_idx+1 < MAXMODOPTIONS)
+ unsigned start_idx = 3; /* reserve three slots in modoptions[] */
+ while (optind < argc && start_idx + 1 < MAXMODOPTIONS)
modoptions[start_idx++] = argv[optind++];
modoptions[start_idx] = NULL;
}
@@ -57,13 +56,7 @@ int main(int argc, char **argv)
if (init_stapio())
exit(1);
-
- initialized = 1;
- if (attach_mod) {
- /* already started */
- initialized++;
- }
-
+
if (stp_main_loop()) {
err("ERROR: Couldn't enter main loop. Exiting.\n");
exit(1);
diff --git a/runtime/staprun/staprun.c b/runtime/staprun/staprun.c
index f4e67fdb..0291d01f 100644
--- a/runtime/staprun/staprun.c
+++ b/runtime/staprun/staprun.c
@@ -16,21 +16,18 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
- * Copyright (C) 2005-2007 Red Hat, Inc.
+ * Copyright (C) 2005-2008 Red Hat, Inc.
*
*/
#include "staprun.h"
-int inserted_module = 0;
-
/* used in dbug, _err and _perr */
char *__name__ = "staprun";
extern long delete_module(const char *, unsigned int);
-static int
-run_as(uid_t uid, gid_t gid, const char *path, char *const argv[])
+static int run_as(uid_t uid, gid_t gid, const char *path, char *const argv[])
{
pid_t pid;
int rstatus;
@@ -42,14 +39,13 @@ run_as(uid_t uid, gid_t gid, const char *path, char *const argv[])
err("%s ", argv[i]);
i++;
}
- err("\n");
+ err("\n");
}
if ((pid = fork()) < 0) {
_perr("fork");
return -1;
- }
- else if (pid == 0) {
+ } else if (pid == 0) {
/* Make sure we run as the full user. If we're
* switching to a non-root user, this won't allow
* that process to switch back to root (since the
@@ -79,17 +75,6 @@ run_as(uid_t uid, gid_t gid, const char *path, char *const argv[])
return -1;
}
-/* Keep the uid and gid settings because we will likely */
-/* conditionally restore "-u" */
-static int run_stapio(char **argv)
-{
- uid_t uid = getuid();
- gid_t gid = getgid();
- argv[0] = PKGLIBDIR "/stapio";
-
- return run_as(uid, gid, argv[0], argv);
-}
-
/*
* Module to be inserted has one or more user-space probes. Make sure
* uprobes is enabled.
@@ -132,8 +117,7 @@ static int enable_uprobes(void)
dbug(2, "Inserting uprobes module from SystemTap runtime.\n");
argv[0] = NULL;
- return insert_module(PKGDATADIR "/runtime/uprobes/uprobes.ko",
- NULL, argv);
+ return insert_module(PKGDATADIR "/runtime/uprobes/uprobes.ko", NULL, argv);
}
static int insert_stap_module(void)
@@ -144,6 +128,66 @@ static int insert_stap_module(void)
return insert_module(modpath, bufsize_option, modoptions);
}
+static int remove_module(const char *name, int verb);
+
+static void remove_all_modules(void)
+{
+ char *base;
+ struct statfs st;
+ struct dirent *d;
+ DIR *moddir;
+
+ if (statfs("/sys/kernel/debug", &st) == 0 && (int)st.f_type == (int)DEBUGFS_MAGIC)
+ base = "/sys/kernel/debug/systemtap";
+ else
+ base = "/proc/systemtap";
+
+ moddir = opendir(base);
+ if (moddir) {
+ while ((d = readdir(moddir)))
+ if (remove_module(d->d_name, 0) == 0)
+ printf("Module %s removed.\n", d->d_name);
+ closedir(moddir);
+ }
+}
+
+static int remove_module(const char *name, int verb)
+{
+ int ret;
+ dbug(2, "%s\n", name);
+
+ if (strcmp(name, "*") == 0) {
+ remove_all_modules();
+ return 0;
+ }
+
+ /* Call init_ctl_channel() which actually attempts an open()
+ * of the control channel. This is better than using access() because
+ * an open on an already open channel will fail, preventing us from attempting
+ * to remove an in-use module.
+ */
+ if (init_ctl_channel(name, 0) < 0) {
+ if (verb)
+ err("Error accessing systemtap module %s: %s\n", name, strerror(errno));
+ return 1;
+ }
+ close_ctl_channel();
+
+ dbug(2, "removing module %s\n", name);
+
+ /* Don't remove module when priority is elevated. */
+ if (setpriority(PRIO_PROCESS, 0, 0) < 0)
+ _perr("setpriority");
+
+ ret = do_cap(CAP_SYS_MODULE, delete_module, name, 0);
+ if (ret != 0) {
+ err("Error removing module '%s': %s.\n", name, strerror(errno));
+ return 1;
+ }
+
+ dbug(1, "Module %s removed.\n", name);
+ return 0;
+}
int init_staprun(void)
{
@@ -154,71 +198,28 @@ int init_staprun(void)
/* We're done with CAP_SYS_ADMIN. */
drop_cap(CAP_SYS_ADMIN);
-
- if (!attach_mod) {
+
+ if (delete_mod)
+ exit(remove_module(modname, 1));
+ else if (!attach_mod) {
if (need_uprobes && enable_uprobes() != 0)
return -1;
if (insert_stap_module() < 0)
return -1;
- else
- inserted_module = 1;
}
-
return 0;
}
-
-static void cleanup(int rc)
-{
- /* Only cleanup once. */
- static int done = 0;
- if (done == 0)
- done = 1;
- else
- return;
-
- dbug(2, "rc=%d, inserted_module=%d\n", rc, inserted_module);
-
- if (setpriority (PRIO_PROCESS, 0, 0) < 0)
- _perr("setpriority");
-
- stop_symbol_thread();
-
- /* rc == 2 means disconnected */
- if (rc == 2)
- return;
-
- /* If we inserted the module and did not get rc==2, then */
- /* we really want to remove it. */
- if (inserted_module || rc == 3) {
- long ret;
- dbug(2, "removing module %s\n", modname);
- ret = do_cap(CAP_SYS_MODULE, delete_module, modname, 0);
- if (ret != 0)
- err("Error removing module '%s': %s\n", modname, moderror(errno));
- }
-}
-
-static void exit_cleanup(void)
-{
- dbug(2, "something exited...\n");
- cleanup(1);
-}
int main(int argc, char **argv)
{
int rc;
- if (atexit(exit_cleanup)) {
- _perr("cannot set exit function");
- exit(1);
- }
-
- /* NB: Don't do the geteuid()!=0 check here, since we want to
- test command-line error-handling while running non-root. */
+ /* NB: Don't do the geteuid()!=0 check here, since we want to
+ test command-line error-handling while running non-root. */
/* Get rid of a few standard environment variables (which */
/* might cause us to do unintended things). */
rc = unsetenv("IFS") || unsetenv("CDPATH") || unsetenv("ENV")
- || unsetenv("BASH_ENV");
+ || unsetenv("BASH_ENV");
if (rc) {
_perr("unsetenv failed");
exit(-1);
@@ -229,20 +230,20 @@ int main(int argc, char **argv)
parse_args(argc, argv);
if (buffer_size)
- dbug(2, "Using a buffer of %u bytes.\n", buffer_size);
+ dbug(2, "Using a buffer of %u MB.\n", buffer_size);
if (optind < argc) {
parse_modpath(argv[optind++]);
dbug(2, "modpath=\"%s\", modname=\"%s\"\n", modpath, modname);
}
- if (optind < argc) {
+ if (optind < argc) {
if (attach_mod) {
err("ERROR: Cannot have module options with attach (-A).\n");
usage(argv[0]);
} else {
unsigned start_idx = 0;
- while (optind < argc && start_idx+1 < MAXMODOPTIONS)
+ while (optind < argc && start_idx + 1 < MAXMODOPTIONS)
modoptions[start_idx++] = argv[optind++];
modoptions[start_idx] = NULL;
}
@@ -254,14 +255,13 @@ int main(int argc, char **argv)
}
if (geteuid() != 0) {
- err("ERROR: The effective user ID of staprun must be set to the root user.\n"
- " Check permissions on staprun and ensure it is a setuid root program.\n");
+ err("ERROR: The effective user ID of staprun must be set to the root user.\n"
+ " Check permissions on staprun and ensure it is a setuid root program.\n");
exit(1);
}
- if (!init_cap())
- exit(1);
-
+ init_cap();
+
if (check_permissions() != 1)
usage(argv[0]);
@@ -277,11 +277,14 @@ int main(int argc, char **argv)
if (init_staprun())
exit(1);
- setup_staprun_signals();
- start_symbol_thread();
-
- rc = run_stapio(argv);
- cleanup(rc);
-
+ argv[0] = PKGLIBDIR "/stapio";
+ if (execv(argv[0], argv) < 0) {
+ perror(argv[0]);
+ goto err;
+ }
return 0;
+
+err:
+ remove_module(modname, 1);
+ return 1;
}
diff --git a/runtime/staprun/staprun.h b/runtime/staprun/staprun.h
index 1128fb4c..60bab391 100644
--- a/runtime/staprun/staprun.h
+++ b/runtime/staprun/staprun.h
@@ -103,8 +103,6 @@ extern char *__name__;
#define STP_OLD_TRANSPORT
#include "../transport/transport_msgs.h"
-extern int use_old_transport;
-
#define RELAYFS_MAGIC 0xF0B4A981
#define DEBUGFS_MAGIC 0x64626720
#define DEBUGFSDIR "/sys/kernel/debug"
@@ -118,9 +116,8 @@ int init_stapio(void);
int stp_main_loop(void);
int send_request(int type, void *data, int len);
void cleanup_and_exit (int);
-int do_module(void *);
-int do_kernel_symbols(void);
-int init_ctl_channel(int);
+void send_unwind_data(const char *name);
+int init_ctl_channel(const char *name, int verb);
void close_ctl_channel(void);
int init_relayfs(void);
void close_relayfs(void);
@@ -129,7 +126,7 @@ void close_oldrelayfs(int);
void setup_signals(void);
/* cap.c */
void print_cap(char *text);
-int init_cap(void);
+void init_cap(void);
void add_cap(cap_value_t cap);
void del_cap(cap_value_t cap);
void drop_cap(cap_value_t cap);
@@ -169,6 +166,7 @@ extern int target_pid;
extern char *target_cmd;
extern char *outfile_name;
extern int attach_mod;
+extern int delete_mod;
extern int load_only;
extern int need_uprobes;
diff --git a/runtime/staprun/staprun_funcs.c b/runtime/staprun/staprun_funcs.c
index 34e12c25..c1cb92b7 100644
--- a/runtime/staprun/staprun_funcs.c
+++ b/runtime/staprun/staprun_funcs.c
@@ -16,18 +16,6 @@
#include <grp.h>
#include <pwd.h>
-void setup_staprun_signals(void)
-{
- struct sigaction a;
- memset(&a, 0, sizeof(a));
- sigfillset(&a.sa_mask);
- a.sa_handler = SIG_IGN;
- sigaction(SIGINT, &a, NULL);
- sigaction(SIGTERM, &a, NULL);
- sigaction(SIGHUP, &a, NULL);
- sigaction(SIGQUIT, &a, NULL);
-}
-
extern long init_module(void *, unsigned long, const char *);
/* Module errors get translated. */
@@ -401,95 +389,3 @@ int check_permissions(void)
* is in that directory. */
return check_path();
}
-
-pthread_t symbol_thread_id = (pthread_t)0;
-int kernel_ptr_size = 0;
-
-/* Symbol handling thread */
-void *handle_symbols(void __attribute__((unused)) *arg)
-{
- ssize_t nb;
- void *data;
- int32_t type;
- char recvbuf[8192];
-
- dbug(2, "waiting for symbol requests\n");
-
- /* handle messages from control channel */
- while (1) {
- nb = read(control_channel, recvbuf, sizeof(recvbuf));
- if (nb <= 0) {
- if (errno != EINTR)
- _perr("Unexpected EOF in read (nb=%ld)", (long)nb);
- continue;
- }
-
- type = *(int32_t *)recvbuf;
- data = (void *)(recvbuf + sizeof(int32_t));
-
- switch (type) {
- case STP_MODULE:
- {
- dbug(2, "STP_MODULES request received\n");
- if (do_module(data) < 0)
- goto done;
- break;
- }
- case STP_SYMBOLS:
- {
- struct _stp_msg_symbol *req = (struct _stp_msg_symbol *)data;
- dbug(2, "STP_SYMBOLS request received\n");
- if (req->endian != 0x1234) {
- err("ERROR: staprun is compiled with different endianess than the kernel!\n");
- goto done;
- }
- kernel_ptr_size = req->ptr_size;
- if (kernel_ptr_size != 4 && kernel_ptr_size != 8) {
- err("ERROR: invalid kernel pointer size %d\n", kernel_ptr_size);
- goto done;
- }
- if (do_kernel_symbols() < 0)
- goto done;
- break;
- }
- default:
- err("WARNING: ignored message of type %d\n", (type));
- }
- }
-
-done:
- /* signal stapio we're done */
- kill(0, SIGINT);
-
- return NULL;
-}
-
-void start_symbol_thread(void)
-{
- int status;
-
- /* create symbol control channel */
- status = do_cap(CAP_DAC_OVERRIDE, init_ctl_channel, 1);
- drop_cap(CAP_DAC_OVERRIDE);
- if (status < 0) {
- err("Failed to initialize control channel.\n");
- exit(1);
- }
- status = pthread_create(&symbol_thread_id, NULL, handle_symbols, NULL);
- if (status) {
- perr("Failed to create symbol thread.\n");
- exit(1);
- }
-}
-
-void stop_symbol_thread(void)
-{
-
- if (symbol_thread_id) {
- dbug(2, "Stopping symbol thread.\n");
- pthread_cancel(symbol_thread_id);
- pthread_join(symbol_thread_id, NULL);
- }
- close_ctl_channel();
-}
-
diff --git a/runtime/staprun/symbols.c b/runtime/staprun/symbols.c
deleted file mode 100644
index c7362d9e..00000000
--- a/runtime/staprun/symbols.c
+++ /dev/null
@@ -1,333 +0,0 @@
-/* -*- linux-c -*-
- * Symbols and modules functions for staprun.
- *
- * Copyright (C) 2006-2008 Red Hat Inc.
- *
- * This file is part of systemtap, and is free software. You can
- * redistribute it and/or modify it under the terms of the GNU General
- * Public License (GPL); either version 2, or (at your option) any
- * later version.
- */
-
-#include "staprun.h"
-
-/* send symbol data */
-static int send_data(int32_t type, void *data, int len)
-{
- if (write(control_channel, &type, 4) <= 0)
- return -1;
- return write(control_channel, data, len);
-}
-
-
-/* Get the sections for a module. Put them in the supplied buffer */
-/* in the following order: */
-/* [struct _stp_msg_module][struct _stp_symbol sections ...][string data][unwind data] */
-/* Return the total length of all the data. */
-
-#define SECDIR "/sys/module/%s/sections"
-static int get_sections(char *name, char *data_start, int datalen)
-{
- char dir[STP_MODULE_NAME_LEN + sizeof(SECDIR)];
- char filename[STP_MODULE_NAME_LEN + 256];
- char buf[32], strdata_start[32768];
- char *strdata=strdata_start, *data=data_start;
- int fd, len, res, unwind_data_len=0;
- struct _stp_msg_module *mod = (struct _stp_msg_module *)data_start;
-
- struct dirent *d;
- DIR *secdir;
- void *sec;
- int struct_symbol_size = kernel_ptr_size == 8 ? sizeof(struct _stp_symbol64) : sizeof(struct _stp_symbol32);
- uint64_t sec_addr;
-
- /* start of data is a struct _stp_msg_module */
- data += sizeof(struct _stp_msg_module);
-
- res = snprintf(dir, sizeof(dir), SECDIR, name);
- if (res >= (int)sizeof(dir)) {
- _err("Couldn't fit module \"%s\" into dir buffer.\n" \
- "This should never happen. Please file a bug report.\n", name);
- return -1;
- }
-
- if ((secdir = opendir(dir)) == NULL)
- return 0;
-
- /* Initialize mod. */
- memset(mod, 0, sizeof(struct _stp_msg_module));
-
- /* Copy name in and check for overflow. */
- strncpy(mod->name, name, STP_MODULE_NAME_LEN);
- if (mod->name[STP_MODULE_NAME_LEN - 1] != '\0') {
- _err("Couldn't fit module \"%s\" into mod->name buffer.\n" \
- "This should never happen. Please file a bug report.\n", name);
- return -1;
- }
-
- /* FIXME: optionally fill in unwind data here */
- mod->unwind_len = unwind_data_len;
-
- while ((d = readdir(secdir))) {
- char *secname = d->d_name;
-
- /* Copy filename in and check for overflow. */
- res = snprintf(filename, sizeof(filename), "/sys/module/%s/sections/%s", name, secname);
- if (res >= (int)sizeof(filename)) {
- _err("Couldn't fit secname \"%s\" into filename buffer.\n" \
- "This should never happen. Please file a bug report.\n", secname);
- closedir(secdir);
- return -1;
- }
-
- /* filter out some non-useful stuff */
- if (!strncmp(secname,"__",2)
- || !strcmp(secname,".")
- || !strcmp(secname,"..")
- || !strcmp(secname,".module_sig")
- || !strcmp(secname,".modinfo")
- || !strcmp(secname,".strtab")
- || !strcmp(secname,".symtab") ) {
- continue;
- }
- if (!strncmp(secname, ".gnu.linkonce", 13)
- && strcmp(secname, ".gnu.linkonce.this_module"))
- continue;
-
- if ((fd = open(filename,O_RDONLY)) >= 0) {
- if (read(fd, buf, 32) > 0) {
- /* create next section */
- sec = data;
- if (data - data_start + struct_symbol_size > datalen)
- goto err1;
- data += struct_symbol_size;
-
- sec_addr = (uint64_t)strtoull(buf,NULL,16);
- if (kernel_ptr_size == 8) {
- ((struct _stp_symbol64 *)sec)->addr = sec_addr;
- ((struct _stp_symbol64 *)sec)->symbol = (uint64_t)(strdata - strdata_start);
- } else {
- ((struct _stp_symbol32 *)sec)->addr = (uint32_t)sec_addr;
- ((struct _stp_symbol32 *)sec)->symbol = (uint32_t)(strdata - strdata_start);
- }
- mod->num_sections++;
-
- /* now create string data for the
- * section (checking for overflow) */
- if ((strdata - strdata_start + strlen(strdata))
- >= sizeof(strdata_start))
- goto err1;
- strcpy(strdata, secname);
- strdata += strlen(secname) + 1;
-
- /* These sections are used a lot so keep the values handy */
- if (!strcmp(secname, ".data") || !strncmp(secname, ".rodata", 7)) {
- if (mod->data == 0 || sec_addr < mod->data)
- mod->data = sec_addr;
- }
- if (!strcmp(secname, ".text"))
- mod->text = sec_addr;
- if (!strcmp(secname, ".gnu.linkonce.this_module"))
- mod->module = sec_addr;
- }
- close(fd);
- }
- }
- closedir(secdir);
-
- /* consolidate buffers */
- len = strdata - strdata_start;
- if ((len + data - data_start) > datalen)
- goto err0;
- strdata = strdata_start;
- while (len--)
- *data++ = *strdata++;
-
-#if 0
- if (unwind_data_len) {
- if ((unwind_data_len + data - data_start) > datalen)
- goto err0;
- memcpy(data, unwind_data, unwind_data_len);
- data += unwind_data_len;
- }
-#endif
- return data - data_start;
-
-err1:
- close(fd);
- closedir(secdir);
-err0:
- /* if this happens, something went seriously wrong. */
- _err("Unexpected error. Overflowed buffers.\n");
- return -1;
-}
-#undef SECDIR
-
-/*
- * For modules, we send the name, section names, and offsets
- */
-static int send_module (char *mname)
-{
- char data[65536];
- int len;
- len = get_sections(mname, data, sizeof(data));
- if (len > 0) {
- if (send_data(STP_MODULE, data, len) < 0) {
- _err("Loading of module %s failed. Exiting...\n", mname);
- return -1;
- }
- }
- return len;
-}
-
-/*
- * Send either all modules, or a specific one.
- * Returns:
- * >=0 : OK
- * -1 : serious error (exit)
- */
-int do_module (void *data)
-{
- struct _stp_msg_module *mod = (struct _stp_msg_module *)data;
-
- if (mod->name[0] == 0) {
- struct dirent *d;
- DIR *moddir = opendir("/sys/module");
- if (moddir) {
- while ((d = readdir(moddir)))
- if (send_module(d->d_name) < 0) {
- closedir(moddir);
- return -1;
- }
- closedir(moddir);
- }
- send_request(STP_MODULE, data, 1);
- return 0;
- }
-
- return send_module(mod->name);
-}
-
-#define MAX_SYMBOLS 32*1024
-
-/*
- * Read /proc/kallsyms and send all kernel symbols to the
- * systemtap module. Ignore module symbols; the systemtap module
- * can access them directly.
- */
-int do_kernel_symbols(void)
-{
- FILE *kallsyms=NULL;
- char *name, *mod, *dataptr, *datamax, type, *data_base=NULL;
- unsigned long long addr;
- void *syms = NULL;
- int ret, num_syms, i = 0, struct_symbol_size;
- int max_syms= MAX_SYMBOLS, data_basesize = MAX_SYMBOLS*32;
-
- if (kernel_ptr_size == 8)
- struct_symbol_size = sizeof(struct _stp_symbol64);
- else
- struct_symbol_size = sizeof(struct _stp_symbol32);
-
- syms = malloc(max_syms * struct_symbol_size);
- data_base = malloc(data_basesize);
- if (data_base == NULL || syms == NULL) {
- _err("Failed to allocate memory for symbols\n");
- goto err;
- }
- dataptr = data_base;
- datamax = data_base + data_basesize;
-
- kallsyms = fopen ("/proc/kallsyms", "r");
- if (!kallsyms) {
- _perr("Fatal error: Unable to open /proc/kallsyms");
- goto err;
- }
-
- /* put empty string in data */
- *dataptr++ = 0;
-
- while ((ret = fscanf(kallsyms, "%llx %c %as [%as", &addr, &type, &name, &mod))>0
- && dataptr < datamax) {
- if (ret < 3)
- continue;
- if (ret > 3) {
- /* ignore modules */
- free(name);
- free(mod);
- /* modules are loaded above the kernel, so if we */
- /* are getting modules, then we're done. */
- break;
- }
-
- if (type == 't' || type == 'T' || type == 'A') {
- if (kernel_ptr_size == 8) {
- ((struct _stp_symbol64 *)syms)[i].addr = (uint64_t)addr;
- ((struct _stp_symbol64 *)syms)[i].symbol = (uint64_t)(dataptr - data_base);
- } else {
- ((struct _stp_symbol32 *)syms)[i].addr = (uint32_t)addr;
- ((struct _stp_symbol32 *)syms)[i].symbol = (uint32_t)(dataptr - data_base);
- }
- if (dataptr >= datamax - strlen(name)) {
- char *db;
- data_basesize *= 2;
- db = realloc(data_base, data_basesize);
- if (db == NULL) {
- _err("Could not allocate enough space for symbols.\n");
- goto err;
- }
- dataptr = db + (dataptr - data_base);
- datamax = db + data_basesize;
- data_base = db;
- }
- strcpy(dataptr, name);
- dataptr += strlen(name) + 1;
- free(name);
- i++;
- if (i >= max_syms) {
- max_syms *= 2;
- syms = realloc(syms, max_syms*struct_symbol_size);
- if (syms == NULL) {
- _err("Could not allocate enough space for symbols.\n");
- goto err;
- }
- }
- }
- }
- num_syms = i;
- if (num_syms <= 0)
- goto err;
-
-
- /* send header */
- struct _stp_msg_symbol_hdr smsh;
- smsh.num_syms = num_syms;
- smsh.sym_size = (uint32_t)(dataptr - data_base);
- smsh.unwind_size = (uint32_t)0;
- if (send_request(STP_SYMBOLS, &smsh, sizeof(smsh)) <= 0)
- goto err;
-
- /* send syms */
- if (send_data(STP_SYMBOLS, syms, num_syms*struct_symbol_size) < 0)
- goto err;
-
- /* send data */
- if (send_data(STP_SYMBOLS, data_base, dataptr-data_base) < 0)
- goto err;
-
- free(data_base);
- free(syms);
- fclose(kallsyms);
- return 0;
-
-err:
- if (data_base)
- free(data_base);
- if (syms)
- free(syms);
- if (kallsyms)
- fclose(kallsyms);
-
- _err("Loading of symbols failed. Exiting...\n");
- return -1;
-}
diff --git a/runtime/staprun/unwind_data.c b/runtime/staprun/unwind_data.c
new file mode 100644
index 00000000..ed27cc20
--- /dev/null
+++ b/runtime/staprun/unwind_data.c
@@ -0,0 +1,97 @@
+/* -*- linux-c -*-
+ * Unwind data functions for staprun.
+ *
+ * Copyright (C) 2008 Red Hat Inc.
+ *
+ * This file is part of systemtap, and is free software. You can
+ * redistribute it and/or modify it under the terms of the GNU General
+ * Public License (GPL); either version 2, or (at your option) any
+ * later version.
+ */
+
+#include "staprun.h"
+#include <elfutils/libdwfl.h>
+#include <dwarf.h>
+
+static char debuginfo_path_arr[] = "-:.debug:/usr/lib/debug";
+static char *debuginfo_path = debuginfo_path_arr;
+static const Dwfl_Callbacks kernel_callbacks = {
+ .find_debuginfo = dwfl_standard_find_debuginfo,
+ .debuginfo_path = &debuginfo_path,
+ .find_elf = dwfl_linux_kernel_find_elf,
+ .section_address = dwfl_linux_kernel_module_section_address,
+};
+
+void *get_module_unwind_data(Dwfl * dwfl, const char *name, int *len)
+{
+ Dwarf_Addr bias = 0;
+ Dwarf *dw;
+ GElf_Ehdr *ehdr, ehdr_mem;
+ GElf_Shdr *shdr, shdr_mem;
+ Elf_Scn *scn = NULL;
+ Elf_Data *data = NULL;
+
+ Dwfl_Module *mod = dwfl_report_module(dwfl, name, 0, 0);
+ dwfl_report_end(dwfl, NULL, NULL);
+ dw = dwfl_module_getdwarf(mod, &bias);
+ Elf *elf = dwarf_getelf(dw);
+ ehdr = gelf_getehdr(elf, &ehdr_mem);
+ while ((scn = elf_nextscn(elf, scn))) {
+ shdr = gelf_getshdr(scn, &shdr_mem);
+ if (strcmp(elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name), ".debug_frame") == 0) {
+ data = elf_rawdata(scn, NULL);
+ break;
+ }
+ }
+
+ if (data == NULL) {
+ *len = 0;
+ dbug(2, "module %s returns NULL\n", name);
+ return NULL;
+ }
+ dbug(2, "module %s returns %d\n", name, (int)data->d_size);
+ *len = data->d_size;
+ return data->d_buf;
+}
+
+void send_unwind_data(const char *name)
+{
+ struct _stp_msg_unwind *un;
+ int unwind_data_len = 0;
+ void *unwind_data = NULL;
+ char *buf;
+
+ dbug(2, "module %s\n", name);
+ if (strcmp(name, "*")) {
+ Dwfl *dwfl = dwfl_begin(&kernel_callbacks);
+
+ if (name[0] == 0)
+ unwind_data = get_module_unwind_data(dwfl, "kernel", &unwind_data_len);
+ else
+ unwind_data = get_module_unwind_data(dwfl, name, &unwind_data_len);
+
+ /* yuck */
+ buf = (char *)malloc(unwind_data_len + sizeof(*un) + sizeof(uint32_t));
+ if (!buf) {
+ err("malloc failed\n");
+ return;
+ }
+ memcpy(buf + sizeof(*un) + sizeof(uint32_t), unwind_data, unwind_data_len);
+ dwfl_end(dwfl);
+ } else {
+ buf = (char *)malloc(sizeof(*un) + sizeof(uint32_t));
+ if (!buf) {
+ err("malloc failed\n");
+ return;
+ }
+ }
+
+ un = (struct _stp_msg_unwind *)(buf + sizeof(uint32_t));
+ strncpy(un->name, name, sizeof(un->name));
+ un->unwind_len = unwind_data_len;
+ *(uint32_t *) buf = STP_UNWIND;
+
+ /* send unwind data */
+ if (write(control_channel, buf, unwind_data_len + sizeof(*un) + sizeof(uint32_t)) <= 0)
+ err("write failed\n");
+}
diff --git a/runtime/sym.c b/runtime/sym.c
index 3c2f859a..7163bf92 100644
--- a/runtime/sym.c
+++ b/runtime/sym.c
@@ -33,7 +33,7 @@ unsigned long _stp_module_relocate(const char *module, const char *section, unsi
return 0;
}
- dbug(DEBUG_SYMBOLS, "%s, %s, %lx\n", module, section, offset);
+ dbug_sym(1, "%s, %s, %lx\n", module, section, offset);
STP_RLOCK_MODULES;
if (!module || !strcmp(section, "") /* absolute, unrelocated address */
@@ -47,7 +47,7 @@ unsigned long _stp_module_relocate(const char *module, const char *section, unsi
if (!strcmp(module, last->name) && !strcmp(section, last_sec->symbol)) {
offset += last_sec->addr;
STP_RUNLOCK_MODULES;
- dbug(DEBUG_SYMBOLS, "offset = %lx\n", offset);
+ dbug_sym(1, "offset = %lx\n", offset);
return offset;
}
}
@@ -72,7 +72,7 @@ unsigned long _stp_module_relocate(const char *module, const char *section, unsi
if (!strcmp(section, last_sec->symbol)) {
offset += last_sec->addr;
STP_RUNLOCK_MODULES;
- dbug(DEBUG_SYMBOLS, "offset = %lx\n", offset);
+ dbug_sym(1, "offset = %lx\n", offset);
return offset;
}
}
@@ -223,8 +223,7 @@ void _stp_symbol_print(unsigned long address)
}
/* Like _stp_symbol_print, except only print if the address is a valid function address */
-
-void _stp_func_print(unsigned long address, int verbose, int exact)
+int _stp_func_print(unsigned long address, int verbose, int exact)
{
char *modname;
const char *name;
@@ -247,7 +246,9 @@ void _stp_func_print(unsigned long address, int verbose, int exact)
_stp_printf(" %p : %s+%#lx/%#lx%s\n", (int64_t) address, name, offset, size, exstr);
} else
_stp_printf("%p ", (int64_t) address);
+ return 1;
}
+ return 0;
}
void _stp_symbol_snprint(char *str, size_t len, unsigned long address)
diff --git a/runtime/sym.h b/runtime/sym.h
index b124882a..0bb64c13 100644
--- a/runtime/sym.h
+++ b/runtime/sym.h
@@ -7,8 +7,8 @@
* later version.
*/
-#ifndef _STAP_SYMBOLS_H_
-#define _STAP_SYMBOLS_H_
+#ifndef _STP_SYM_H_
+#define _STP_SYM_H_
#define STP_MODULE_NAME_LEN 64
@@ -16,11 +16,6 @@ struct _stp_symbol {
unsigned long addr;
const char *symbol;
};
-struct stap_symbol {
- unsigned long addr;
- const char *symbol;
- const char *module;
-};
DEFINE_RWLOCK(_stp_module_lock);
#define STP_RLOCK_MODULES read_lock_irqsave(&_stp_module_lock, flags)
@@ -50,8 +45,14 @@ struct _stp_module {
/* how many sections this module has */
uint32_t num_sections;
- /* how the symbol_data below was allocated */
- int32_t allocated; /* 0 = kmalloc, 1 = vmalloc */
+ /* how the data below was allocated */
+ /* 0 = kmalloc, 1 = vmalloc */
+ struct {
+ unsigned symbols :1;
+ unsigned symbol_data :1;
+ unsigned unwind_data :1;
+ unsigned unwind_hdr :1;
+ } allocated;
struct _stp_symbol *sections;
@@ -63,7 +64,10 @@ struct _stp_module {
/* the stack unwind data for this module */
void *unwind_data;
+ void *unwind_hdr;
uint32_t unwind_data_len;
+ uint32_t unwind_hdr_len;
+ uint32_t unwind_is_ehframe; /* unwind data comes from .eh_frame */
rwlock_t lock; /* lock while unwinding is happening */
};
@@ -80,7 +84,8 @@ struct _stp_module *_stp_modules_by_addr[STP_MAX_MODULES];
/* the number of modules in the arrays */
int _stp_num_modules = 0;
+static unsigned long _stp_kretprobe_trampoline = 0;
unsigned long _stp_module_relocate (const char *module, const char *section, unsigned long offset);
static struct _stp_module *_stp_get_unwind_info (unsigned long addr);
-#endif /* _STAP_SYMBOLS_H_ */
+#endif /* _STP_SYM_H_ */
diff --git a/runtime/task_finder.c b/runtime/task_finder.c
index d2e57a6b..6d79c98a 100644
--- a/runtime/task_finder.c
+++ b/runtime/task_finder.c
@@ -1,9 +1,16 @@
#include <linux/list.h>
+#include <linux/binfmts.h>
static LIST_HEAD(__stp_task_finder_list);
struct stap_task_finder_target;
+#define __STP_TF_STARTING 0
+#define __STP_TF_RUNNING 1
+#define __STP_TF_STOPPING 2
+#define __STP_TF_STOPPED 3
+atomic_t __stp_task_finder_state = ATOMIC_INIT(__STP_TF_STARTING);
+
typedef int (*stap_task_finder_callback)(struct task_struct *tsk,
int register_p,
struct stap_task_finder_target *tgt);
@@ -23,6 +30,10 @@ struct stap_task_finder_target {
stap_task_finder_callback callback;
};
+static u32
+__stp_utrace_task_finder_target_death(struct utrace_attached_engine *engine,
+ struct task_struct *tsk);
+
static int
stap_register_task_finder_target(struct stap_task_finder_target *new_tgt)
{
@@ -38,6 +49,11 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt)
else
new_tgt->pathlen = 0;
+ // Make sure everything is initialized properly.
+ new_tgt->engine_attached = 0;
+ memset(&new_tgt->ops, 0, sizeof(new_tgt->ops));
+ new_tgt->ops.report_death = &__stp_utrace_task_finder_target_death;
+
// Search the list for an existing entry for pathname/pid.
list_for_each(node, &__stp_task_finder_list) {
tgt = list_entry(node, struct stap_task_finder_target, list);
@@ -62,7 +78,6 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt)
}
// Add this target to the callback list for this task.
- new_tgt->engine_attached = 0;
list_add_tail(&new_tgt->callback_list, &tgt->callback_list_head);
return 0;
}
@@ -78,6 +93,10 @@ stap_utrace_detach_ops(struct utrace_engine_ops *ops)
rcu_read_lock();
for_each_process(tsk) {
struct mm_struct *mm;
+
+ if (tsk->pid <= 1)
+ continue;
+
mm = get_task_mm(tsk);
if (mm) {
mmput(mm);
@@ -152,11 +171,12 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen)
vma = vma->vm_next;
}
if (vma) {
- struct vfsmount *mnt = mntget(vma->vm_file->f_path.mnt);
- struct dentry *dentry = dget(vma->vm_file->f_path.dentry);
- rc = d_path(dentry, mnt, buf, buflen);
- dput(dentry);
- mntput(mnt);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+ rc = d_path(vma->vm_file->f_dentry, vma->vm_file->f_vfsmnt,
+ buf, buflen);
+#else
+ rc = d_path(&(vma->vm_file->f_path), buf, buflen);
+#endif
}
else {
*buf = '\0';
@@ -167,76 +187,82 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen)
}
#define __STP_UTRACE_TASK_FINDER_EVENTS (UTRACE_EVENT(CLONE) \
- | UTRACE_EVENT(EXEC))
+ | UTRACE_EVENT(EXEC) \
+ | UTRACE_EVENT(DEATH))
#define __STP_UTRACE_ATTACHED_TASK_EVENTS (UTRACE_EVENT(DEATH))
-static u32
-__stp_utrace_task_finder_clone(struct utrace_attached_engine *engine,
- struct task_struct *parent,
- unsigned long clone_flags,
- struct task_struct *child)
+static int
+__stp_utrace_attach(struct task_struct *tsk,
+ const struct utrace_engine_ops *ops, void *data,
+ unsigned long event_flags)
{
- struct utrace_attached_engine *child_engine;
+ struct utrace_attached_engine *engine;
struct mm_struct *mm;
+ int rc = 0;
- // On clone, attach to the child. Ignore threads with no mm
- // (which are kernel threads).
- mm = get_task_mm(child);
- if (mm) {
- mmput(mm);
- child_engine = utrace_attach(child, UTRACE_ATTACH_CREATE,
- engine->ops, 0);
- if (IS_ERR(child_engine))
- _stp_error("attach to clone child %d failed: %ld",
- (int)child->pid, PTR_ERR(child_engine));
- else {
- utrace_set_flags(child, child_engine,
- __STP_UTRACE_TASK_FINDER_EVENTS);
+ // Ignore init
+ if (tsk->pid <= 1)
+ return EPERM;
+
+ // Ignore threads with no mm (which are kernel threads).
+ mm = get_task_mm(tsk);
+ if (! mm)
+ return EPERM;
+ mmput(mm);
+
+ engine = utrace_attach(tsk, UTRACE_ATTACH_CREATE, ops, data);
+ if (IS_ERR(engine)) {
+ int error = -PTR_ERR(engine);
+ if (error != ENOENT) {
+ _stp_error("utrace_attach returned error %d on pid %d",
+ error, (int)tsk->pid);
+ rc = error;
}
}
- return UTRACE_ACTION_RESUME;
+ else if (unlikely(engine == NULL)) {
+ _stp_error("utrace_attach returned NULL on pid %d",
+ (int)tsk->pid);
+ rc = EFAULT;
+ }
+ else {
+ utrace_set_flags(tsk, engine, event_flags);
+ }
+ return rc;
}
static u32
-__stp_utrace_task_finder_death(struct utrace_attached_engine *engine,
- struct task_struct *tsk)
+__stp_utrace_task_finder_report_clone(struct utrace_attached_engine *engine,
+ struct task_struct *parent,
+ unsigned long clone_flags,
+ struct task_struct *child)
{
- struct stap_task_finder_target *tgt = engine->data;
-
- // The first implementation of this added a
- // UTRACE_EVENT(DEATH) handler to
- // __stp_utrace_task_finder_ops. However, dead threads don't
- // have a mm_struct, so we can't find the exe's path. So, we
- // don't know which callback(s) to call.
- //
- // So, now when an "interesting" thread is found, we add a
- // separate UTRACE_EVENT(DEATH) handler for every probe.
+ struct utrace_attached_engine *child_engine;
+ struct mm_struct *mm;
- if (tgt != NULL && tgt->callback != NULL) {
- int rc;
+ if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING)
+ return UTRACE_ACTION_RESUME;
- // Call the callback
- rc = tgt->callback(tsk, 0, tgt);
- if (rc != 0) {
- _stp_error("death callback for %d failed: %d",
- (int)tsk->pid, rc);
- }
- }
+ // On clone, attach to the child.
+ (void) __stp_utrace_attach(child, engine->ops, 0,
+ __STP_UTRACE_TASK_FINDER_EVENTS);
return UTRACE_ACTION_RESUME;
}
static u32
-__stp_utrace_task_finder_exec(struct utrace_attached_engine *engine,
- struct task_struct *tsk,
- const struct linux_binprm *bprm,
- struct pt_regs *regs)
+__stp_utrace_task_finder_report_exec(struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ const struct linux_binprm *bprm,
+ struct pt_regs *regs)
{
size_t filelen;
struct list_head *tgt_node;
struct stap_task_finder_target *tgt;
int found_node = 0;
+ if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING)
+ return UTRACE_ACTION_RESUME;
+
// On exec, check bprm
if (bprm->filename == NULL)
return UTRACE_ACTION_RESUME;
@@ -258,6 +284,8 @@ __stp_utrace_task_finder_exec(struct utrace_attached_engine *engine,
struct list_head *cb_node;
list_for_each(cb_node, &tgt->callback_list_head) {
struct stap_task_finder_target *cb_tgt;
+ int rc;
+
cb_tgt = list_entry(cb_node,
struct stap_task_finder_target,
callback_list);
@@ -274,31 +302,59 @@ __stp_utrace_task_finder_exec(struct utrace_attached_engine *engine,
}
// Set up thread death notification.
- memset(&cb_tgt->ops, 0, sizeof(cb_tgt->ops));
- cb_tgt->ops.report_death
- = &__stp_utrace_task_finder_death;
-
- engine = utrace_attach(tsk,
- UTRACE_ATTACH_CREATE,
- &cb_tgt->ops, cb_tgt);
- if (IS_ERR(engine)) {
- _stp_error("attach to exec'ed %d failed: %ld",
- (int)tsk->pid,
- PTR_ERR(engine));
- }
- else {
- utrace_set_flags(tsk, engine,
+ rc = __stp_utrace_attach(tsk, &cb_tgt->ops, cb_tgt,
__STP_UTRACE_ATTACHED_TASK_EVENTS);
- cb_tgt->engine_attached = 1;
- }
+ if (rc != 0 && rc != EPERM)
+ break;
+ cb_tgt->engine_attached = 1;
}
}
return UTRACE_ACTION_RESUME;
}
+static u32
+stap_utrace_task_finder_report_death(struct utrace_attached_engine *engine,
+ struct task_struct *tsk)
+{
+ return UTRACE_ACTION_DETACH;
+}
+
+static u32
+__stp_utrace_task_finder_target_death(struct utrace_attached_engine *engine,
+ struct task_struct *tsk)
+{
+ struct stap_task_finder_target *tgt = engine->data;
+
+ if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
+ return UTRACE_ACTION_DETACH;
+ }
+
+ // The first implementation of this added a
+ // UTRACE_EVENT(DEATH) handler to
+ // __stp_utrace_task_finder_ops. However, dead threads don't
+ // have a mm_struct, so we can't find the exe's path. So, we
+ // don't know which callback(s) to call.
+ //
+ // So, now when an "interesting" thread is found, we add a
+ // separate UTRACE_EVENT(DEATH) handler for every probe.
+
+ if (tgt != NULL && tgt->callback != NULL) {
+ int rc;
+
+ // Call the callback
+ rc = tgt->callback(tsk, 0, tgt);
+ if (rc != 0) {
+ _stp_error("death callback for %d failed: %d",
+ (int)tsk->pid, rc);
+ }
+ }
+ return UTRACE_ACTION_DETACH;
+}
+
struct utrace_engine_ops __stp_utrace_task_finder_ops = {
- .report_clone = __stp_utrace_task_finder_clone,
- .report_exec = __stp_utrace_task_finder_exec,
+ .report_clone = __stp_utrace_task_finder_report_clone,
+ .report_exec = __stp_utrace_task_finder_report_exec,
+ .report_death = stap_utrace_task_finder_report_death,
};
int
@@ -314,44 +370,36 @@ stap_start_task_finder(void)
return ENOMEM;
}
+ atomic_set(&__stp_task_finder_state, __STP_TF_RUNNING);
+
rcu_read_lock();
for_each_process(tsk) {
- struct utrace_attached_engine *engine;
struct mm_struct *mm;
char *mmpath;
size_t mmpathlen;
struct list_head *tgt_node;
+ /* Attach to the thread */
+ rc = __stp_utrace_attach(tsk, &__stp_utrace_task_finder_ops, 0,
+ __STP_UTRACE_TASK_FINDER_EVENTS);
+ if (rc == EPERM) {
+ /* Ignore EPERM errors, which mean this wasn't
+ * a thread we can attach to. */
+ rc = 0;
+ continue;
+ }
+ else if (rc != 0) {
+ /* If we get a real error, quit. */
+ break;
+ }
+
+ /* Grab the path associated with this task. */
mm = get_task_mm(tsk);
if (! mm) {
/* If the thread doesn't have a mm_struct, it is
* a kernel thread which we need to skip. */
continue;
}
-
- /* Attach to the thread */
- engine = utrace_attach(tsk, UTRACE_ATTACH_CREATE,
- &__stp_utrace_task_finder_ops, 0);
- if (IS_ERR(engine)) {
- int error = -PTR_ERR(engine);
- if (error != ENOENT) {
- mmput(mm);
- _stp_error("utrace_attach returned error %d on pid %d",
- error, (int)tsk->pid);
- rc = error;
- break;
- }
- }
- else if (unlikely(engine == NULL)) {
- mmput(mm);
- _stp_error("utrace_attach returned NULL on pid %d",
- (int)tsk->pid);
- rc = EFAULT;
- break;
- }
- utrace_set_flags(tsk, engine, __STP_UTRACE_TASK_FINDER_EVENTS);
-
- /* Check the thread's exe's path/pid against our list. */
mmpath = __stp_get_mm_path(mm, mmpath_buf, PATH_MAX);
mmput(mm); /* We're done with mm */
if (IS_ERR(mmpath)) {
@@ -361,6 +409,7 @@ stap_start_task_finder(void)
break;
}
+ /* Check the thread's exe's path/pid against our list. */
mmpathlen = strlen(mmpath);
list_for_each(tgt_node, &__stp_task_finder_list) {
struct stap_task_finder_target *tgt;
@@ -394,10 +443,19 @@ stap_start_task_finder(void)
(int)tsk->pid, rc);
break;
}
+
+ // Set up thread death notification.
+ rc = __stp_utrace_attach(tsk, &cb_tgt->ops,
+ cb_tgt,
+ __STP_UTRACE_ATTACHED_TASK_EVENTS);
+ if (rc != 0 && rc != EPERM)
+ break;
+ cb_tgt->engine_attached = 1;
}
}
}
rcu_read_unlock();
+
_stp_kfree(mmpath_buf);
return rc;
}
@@ -405,6 +463,8 @@ stap_start_task_finder(void)
static void
stap_stop_task_finder(void)
{
+ atomic_set(&__stp_task_finder_state, __STP_TF_STOPPING);
stap_utrace_detach_ops(&__stp_utrace_task_finder_ops);
__stp_task_finder_cleanup();
+ atomic_set(&__stp_task_finder_state, __STP_TF_STOPPED);
}
diff --git a/runtime/time.c b/runtime/time.c
index 52a2edbb..8a0b6fad 100644
--- a/runtime/time.c
+++ b/runtime/time.c
@@ -131,10 +131,13 @@ __stp_time_timer_callback(unsigned long val)
time->base_cycles = cycles;
write_sequnlock(&time->lock);
+ local_irq_restore(flags);
+ /* PR6481: reenable IRQs before resetting the timer.
+ XXX: The worst that can probably happen is that we get
+ two consecutive timer resets. */
+
if (likely(stp_timer_reregister))
mod_timer(&time->timer, jiffies + 1);
-
- local_irq_restore(flags);
}
/* This is called as an IPI, with interrupts disabled. */
diff --git a/runtime/transport/ChangeLog b/runtime/transport/ChangeLog
index c3837f86..9d0ba162 100644
--- a/runtime/transport/ChangeLog
+++ b/runtime/transport/ChangeLog
@@ -1,3 +1,51 @@
+2008-04-30 Masami Hiramatsu <mhiramat@redhat.com>
+
+ PR 5645
+ * transport.c (_stp_transport_init): Fix subbuffer size calculation
+ overflow.
+
+2008-04-21 hunt <hunt@redhat.com>
+
+ * control.c (_stp_ctl_write): Return len + sizeof(int) so
+ sending an empty command doesn't return 0 and look like a failure.
+ * transport.c: _stp_cleanup_and_exit(): Cleanup.
+
+2008-04-15 Frank Ch. Eigler <fche@elastic.org>
+
+ PR 6410
+ * symbols.c (_stp_do_unwind_data): Tolerate !STP_USE_DWARF_UNWINDER.
+
+2008-04-15 Frank Ch. Eigler <fche@elastic.org>
+
+ PR 6405
+ * symbols.c (_stp_load_module_symbols): Support older kernels
+ without module->sect_attrs->nsections.
+
+2008-04-09 Martin Hunt <hunt@dragon>
+
+ * symbols.c (_stp_init_kernel_symbols): Print error
+ messages and exit if symbol lookups fail.
+ (_stp_init_modules): Lookup modules_op.
+
+2008-03-31 Martin Hunt <hunt@redhat.com>
+
+ * symbols.c (_stp_init_modules): Use STP_USE_DWARF_UNWINDER.
+
+ * transport.c (_stp_get_root_dir): Remove misleading error message.
+
+2008-03-30 Martin Hunt <hunt@redhat.com>
+
+ * symbols.c (_stp_init_modules): If using frames, don't
+ request unwind info.
+
+2008-03-25 Martin Hunt <hunt@redhat.com>
+
+ * control.c (_stp_ctl_write_dbug): Insert missing break.
+
+ 32-bit systems can't do 64-bit get_user(), so
+ * symbols.c (_stp_do_unwind_data): Change unwind_len to a u32.
+ * transport_msgs.h (struct _stp_msg_unwind): Ditto.
+
2008-02-27 Martin Hunt <hunt@redhat.com>
* symbols.c: Use rwlocks. Use new dbug macros. Handle
diff --git a/runtime/transport/control.c b/runtime/transport/control.c
index 6a5b272d..ca7edf79 100644
--- a/runtime/transport/control.c
+++ b/runtime/transport/control.c
@@ -14,80 +14,31 @@ static int _stp_current_buffers = STP_DEFAULT_BUFFERS;
static _stp_mempool_t *_stp_pool_q;
static struct list_head _stp_ctl_ready_q;
-static struct list_head _stp_sym_ready_q;
DEFINE_SPINLOCK(_stp_ctl_ready_lock);
-DEFINE_SPINLOCK(_stp_sym_ready_lock);
-static ssize_t _stp_sym_write_cmd(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
-{
- static int saved_type = 0;
- int type;
-
- if (count < sizeof(int32_t))
- return 0;
-
- /* Allow sending of packet type followed by data in the next packet. */
- if (count == sizeof(int32_t)) {
- if (get_user(saved_type, (int __user *)buf))
- return -EFAULT;
- return count;
- } else if (saved_type) {
- type = saved_type;
- saved_type = 0;
- } else {
- if (get_user(type, (int __user *)buf))
- return -EFAULT;
- count -= sizeof(int);
- buf += sizeof(int);
- }
-
-#if DEBUG_TRANSPORT > 0
- if (type < STP_MAX_CMD)
- _dbug("Got %s. len=%d\n", _stp_command_name[type], (int)count);
-#endif
-
- switch (type) {
- case STP_SYMBOLS:
- count = _stp_do_symbols(buf, count);
- break;
- case STP_MODULE:
- if (count > 1)
- count = _stp_do_module(buf, count);
- else {
- /* count == 1 indicates end of initial modules list */
- _stp_ctl_send(STP_TRANSPORT, NULL, 0);
- }
- break;
- case STP_EXIT:
- _stp_exit_flag = 1;
- break;
- default:
- errk("invalid symbol command type %d\n", type);
- return -EINVAL;
- }
-
- return count;
-}
static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
- int type;
+ u32 type;
static int started = 0;
- if (count < sizeof(int))
+ if (count < sizeof(u32))
return 0;
- if (get_user(type, (int __user *)buf))
+ if (get_user(type, (u32 __user *)buf))
return -EFAULT;
-#if DEBUG_TRANSPORT > 0
+ count -= sizeof(u32);
+ buf += sizeof(u32);
+
+#ifdef DEBUG_TRANS
if (type < STP_MAX_CMD)
_dbug("Got %s. len=%d\n", _stp_command_name[type], (int)count);
#endif
- count -= sizeof(int);
- buf += sizeof(int);
-
switch (type) {
+ case STP_UNWIND:
+ _stp_do_unwind_data(buf, count);
+ break;
case STP_START:
if (started == 0) {
struct _stp_msg_start st;
@@ -110,7 +61,7 @@ static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, siz
#endif
case STP_READY:
/* request symbolic information */
- _stp_ask_for_symbols();
+ /* _stp_ask_for_symbols(); */
break;
default:
@@ -121,8 +72,6 @@ static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, siz
return count;
}
-#define STP_CTL_BUFFER_SIZE 256
-
struct _stp_buffer {
struct list_head list;
int len;
@@ -131,9 +80,8 @@ struct _stp_buffer {
};
static DECLARE_WAIT_QUEUE_HEAD(_stp_ctl_wq);
-static DECLARE_WAIT_QUEUE_HEAD(_stp_sym_wq);
-#if DEBUG_TRANSPORT > 0
+#ifdef DEBUG_TRANS
static void _stp_ctl_write_dbug(int type, void *data, int len)
{
char buf[64];
@@ -155,19 +103,9 @@ static void _stp_ctl_write_dbug(int type, void *data, int len)
case STP_TRANSPORT:
_dbug("sending STP_TRANSPORT\n");
break;
- default:
- _dbug("ERROR: unknown message type: %d\n", type);
- break;
- }
-}
-static void _stp_sym_write_dbug(int type, void *data, int len)
-{
- switch (type) {
- case STP_SYMBOLS:
- _dbug("sending STP_SYMBOLS\n");
- break;
- case STP_MODULE:
- _dbug("sending STP_MODULE\n");
+ case STP_UNWIND:
+ snprintf(buf, sizeof(buf), "%s", (char *)data);
+ _dbug("sending STP_UNWIND %s [len=%d]\n", buf, len);
break;
default:
_dbug("ERROR: unknown message type: %d\n", type);
@@ -181,7 +119,7 @@ static int _stp_ctl_write(int type, void *data, unsigned len)
struct _stp_buffer *bptr;
unsigned long flags;
-#if DEBUG_TRANSPORT > 0
+#ifdef DEBUG_TRANS
_stp_ctl_write_dbug(type, data, len);
#endif
@@ -203,99 +141,22 @@ static int _stp_ctl_write(int type, void *data, unsigned len)
list_add_tail(&bptr->list, &_stp_ctl_ready_q);
spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags);
- return len;
-}
-
-static int _stp_sym_write(int type, void *data, unsigned len)
-{
- struct _stp_buffer *bptr;
- unsigned long flags;
-
-#if DEBUG_TRANSPORT > 0
- _stp_sym_write_dbug(type, data, len);
-#endif
-
- /* make sure we won't overflow the buffer */
- if (unlikely(len > STP_CTL_BUFFER_SIZE))
- return 0;
-
- /* get a buffer from the free pool */
- bptr = _stp_mempool_alloc(_stp_pool_q);
- if (unlikely(bptr == NULL))
- return -1;
-
- bptr->type = type;
- memcpy(bptr->buf, data, len);
- bptr->len = len;
-
- /* put it on the pool of ready buffers */
- spin_lock_irqsave(&_stp_sym_ready_lock, flags);
- list_add_tail(&bptr->list, &_stp_sym_ready_q);
- spin_unlock_irqrestore(&_stp_sym_ready_lock, flags);
-
- /* OK, it's queued. Now signal any waiters. */
- wake_up_interruptible(&_stp_sym_wq);
-
- return len;
+ return len + sizeof(bptr->type);
}
/* send commands with timeout and retry */
static int _stp_ctl_send(int type, void *data, int len)
{
int err, trylimit = 50;
- kbug(DEBUG_TRANSPORT, "ctl_send: type=%d len=%d\n", type, len);
- if (unlikely(type == STP_SYMBOLS || type == STP_MODULE)) {
- while ((err = _stp_sym_write(type, data, len)) < 0 && trylimit--)
- msleep(5);
- } else {
- while ((err = _stp_ctl_write(type, data, len)) < 0 && trylimit--)
- msleep(5);
- if (err > 0)
- wake_up_interruptible(&_stp_ctl_wq);
- }
- kbug(DEBUG_TRANSPORT, "returning %d\n", err);
+ dbug_trans(1, "ctl_send: type=%d len=%d\n", type, len);
+ while ((err = _stp_ctl_write(type, data, len)) < 0 && trylimit--)
+ msleep(5);
+ if (err > 0)
+ wake_up_interruptible(&_stp_ctl_wq);
+ dbug_trans(1, "returning %d\n", err);
return err;
}
-static ssize_t _stp_sym_read_cmd(struct file *file, char __user *buf, size_t count, loff_t *ppos)
-{
- struct _stp_buffer *bptr;
- int len;
- unsigned long flags;
-
- /* wait for nonempty ready queue */
- spin_lock_irqsave(&_stp_sym_ready_lock, flags);
- while (list_empty(&_stp_sym_ready_q)) {
- spin_unlock_irqrestore(&_stp_sym_ready_lock, flags);
- if (file->f_flags & O_NONBLOCK)
- return -EAGAIN;
- if (wait_event_interruptible(_stp_sym_wq, !list_empty(&_stp_sym_ready_q)))
- return -ERESTARTSYS;
- spin_lock_irqsave(&_stp_sym_ready_lock, flags);
- }
-
- /* get the next buffer off the ready list */
- bptr = (struct _stp_buffer *)_stp_sym_ready_q.next;
- list_del_init(&bptr->list);
- spin_unlock_irqrestore(&_stp_sym_ready_lock, flags);
-
- /* write it out */
- len = bptr->len + 4;
- if (len > count || copy_to_user(buf, &bptr->type, len)) {
- /* now what? We took it off the queue then failed to send it */
- /* we can't put it back on the queue because it will likely be out-of-order */
- /* fortunately this should never happen */
- /* FIXME need to mark this as a transport failure */
- errk("Supplied buffer too small. count:%d len:%d\n", (int)count, len);
- return -EFAULT;
- }
-
- /* put it on the pool of free buffers */
- _stp_mempool_free(bptr);
-
- return len;
-}
-
static ssize_t _stp_ctl_read_cmd(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
struct _stp_buffer *bptr;
@@ -335,29 +196,10 @@ static ssize_t _stp_ctl_read_cmd(struct file *file, char __user *buf, size_t cou
return len;
}
-static int _stp_sym_opens = 0;
-static int _stp_sym_open_cmd(struct inode *inode, struct file *file)
-{
- /* only allow one reader */
- if (_stp_sym_opens)
- return -1;
-
- _stp_sym_opens++;
- return 0;
-}
-
-static int _stp_sym_close_cmd(struct inode *inode, struct file *file)
-{
- if (_stp_sym_opens)
- _stp_sym_opens--;
- return 0;
-}
-
static int _stp_ctl_open_cmd(struct inode *inode, struct file *file)
{
if (_stp_attached)
return -1;
-
_stp_attach();
return 0;
}
@@ -377,16 +219,7 @@ static struct file_operations _stp_ctl_fops_cmd = {
.release = _stp_ctl_close_cmd,
};
-static struct file_operations _stp_sym_fops_cmd = {
- .owner = THIS_MODULE,
- .read = _stp_sym_read_cmd,
- .write = _stp_sym_write_cmd,
- .open = _stp_sym_open_cmd,
- .release = _stp_sym_close_cmd,
-};
-
static struct dentry *_stp_cmd_file = NULL;
-static struct dentry *_stp_sym_file = NULL;
static int _stp_register_ctl_channel(void)
{
@@ -400,7 +233,6 @@ static int _stp_register_ctl_channel(void)
}
INIT_LIST_HEAD(&_stp_ctl_ready_q);
- INIT_LIST_HEAD(&_stp_sym_ready_q);
/* allocate buffers */
_stp_pool_q = _stp_mempool_init(sizeof(struct _stp_buffer), STP_DEFAULT_BUFFERS);
@@ -415,15 +247,9 @@ static int _stp_register_ctl_channel(void)
_stp_cmd_file->d_inode->i_uid = _stp_uid;
_stp_cmd_file->d_inode->i_gid = _stp_gid;
- /* create [debugfs]/systemtap/module_name/.symbols */
- _stp_sym_file = debugfs_create_file(".symbols", 0600, _stp_utt->dir, NULL, &_stp_sym_fops_cmd);
- if (_stp_sym_file == NULL)
- goto err0;
return 0;
err0:
- if (_stp_cmd_file)
- debugfs_remove(_stp_cmd_file);
_stp_mempool_destroy(_stp_pool_q);
errk("Error creating systemtap debugfs entries.\n");
return -1;
@@ -432,16 +258,10 @@ err0:
static void _stp_unregister_ctl_channel(void)
{
struct list_head *p, *tmp;
- if (_stp_sym_file)
- debugfs_remove(_stp_sym_file);
if (_stp_cmd_file)
debugfs_remove(_stp_cmd_file);
/* Return memory to pool and free it. */
- list_for_each_safe(p, tmp, &_stp_sym_ready_q) {
- list_del(p);
- _stp_mempool_free(p);
- }
list_for_each_safe(p, tmp, &_stp_ctl_ready_q) {
list_del(p);
_stp_mempool_free(p);
diff --git a/runtime/transport/procfs.c b/runtime/transport/procfs.c
index 2afea1c9..750e1994 100644
--- a/runtime/transport/procfs.c
+++ b/runtime/transport/procfs.c
@@ -161,7 +161,7 @@ struct _stp_buffer {
struct list_head list;
int len;
int type;
- char buf[STP_BUFFER_SIZE];
+ char buf[STP_CTL_BUFFER_SIZE];
};
static DECLARE_WAIT_QUEUE_HEAD(_stp_ctl_wq);
diff --git a/runtime/transport/symbols.c b/runtime/transport/symbols.c
index 8c453a55..087bf893 100644
--- a/runtime/transport/symbols.c
+++ b/runtime/transport/symbols.c
@@ -12,8 +12,8 @@
* lib/sort.c of kernel 2.6.22-rc5. It was written by Matt Mackall.
*/
-#ifndef _SYMBOLS_C_
-#define _SYMBOLS_C_
+#ifndef _STP_SYMBOLS_C_
+#define _STP_SYMBOLS_C_
#include "../sym.h"
static char *_stp_symbol_data = NULL;
@@ -21,17 +21,12 @@ static int _stp_symbol_state = 0;
static char *_stp_module_data = NULL;
static int _stp_module_state = 0;
-
/* these are all the symbol types we are interested in */
static int _stp_sym_type_ok(int type)
{
- switch (type) {
- case 'T':
- case 't':
+ /* we only care about function symbols, which are in the text section */
+ if (type == 'T' || type == 't')
return 1;
- default:
- return 0;
- }
return 0;
}
@@ -41,10 +36,10 @@ static unsigned _stp_get_sym_sizes(struct module *m, unsigned *dsize)
{
unsigned int i;
unsigned num = 0, datasize = 0;
- for (i=0; i < m->num_symtab; i++) {
+ for (i = 0; i < m->num_symtab; i++) {
char *str = (char *)(m->strtab + m->symtab[i].st_name);
if (*str != '\0' && _stp_sym_type_ok(m->symtab[i].st_info)) {
- datasize += strlen(str)+1;
+ datasize += strlen(str) + 1;
num++;
}
}
@@ -52,19 +47,23 @@ static unsigned _stp_get_sym_sizes(struct module *m, unsigned *dsize)
return num;
}
-/* allocate space for a module and symbols */
-static struct _stp_module * _stp_alloc_module(unsigned num, unsigned datasize, unsigned unwindsize)
+/* allocate space for a module, sections, and symbols */
+static struct _stp_module *_stp_alloc_module(unsigned sectsize, unsigned num, unsigned datasize)
{
struct _stp_module *mod = (struct _stp_module *)_stp_kzalloc(sizeof(struct _stp_module));
if (mod == NULL)
goto bad;
+ mod->sections = (struct _stp_symbol *)_stp_kmalloc(sectsize);
+ if (mod->sections == NULL)
+ goto bad;
+
mod->symbols = (struct _stp_symbol *)_stp_kmalloc(num * sizeof(struct _stp_symbol));
if (mod->symbols == NULL) {
mod->symbols = (struct _stp_symbol *)_stp_vmalloc(num * sizeof(struct _stp_symbol));
if (mod->symbols == NULL)
goto bad;
- mod->allocated = 1;
+ mod->allocated.symbols = 1;
}
mod->symbol_data = _stp_kmalloc(datasize);
@@ -72,91 +71,63 @@ static struct _stp_module * _stp_alloc_module(unsigned num, unsigned datasize, u
mod->symbol_data = _stp_vmalloc(datasize);
if (mod->symbol_data == NULL)
goto bad;
- mod->allocated |= 2;
+ mod->allocated.symbol_data = 1;
}
- mod->unwind_data = _stp_kmalloc(unwindsize);
- if (mod->unwind_data == NULL) {
- mod->unwind_data = _stp_vmalloc(unwindsize);
- if (mod->unwind_data == NULL)
- goto bad;
- mod->allocated |= 4;
- }
-
mod->num_symbols = num;
return mod;
bad:
if (mod) {
+ if (mod->sections)
+ _stp_kfree(mod->sections);
if (mod->symbols) {
- if (mod->allocated & 1)
+ if (mod->allocated.symbols)
_stp_vfree(mod->symbols);
else
_stp_kfree(mod->symbols);
- mod->symbols = NULL;
}
- if (mod->symbol_data) {
- if (mod->allocated & 2)
- _stp_vfree(mod->symbol_data);
- else
- _stp_kfree(mod->symbol_data);
- mod->symbol_data = NULL;
- }
- _stp_kfree(mod);
- if (mod->symbols) {
- if (mod->allocated & 1)
- _stp_vfree(mod->symbols);
- else
- _stp_kfree(mod->symbols);
- mod->symbols = NULL;
- }
- _stp_kfree(mod);
+ _stp_kfree(mod);
}
return NULL;
}
-static struct _stp_module * _stp_alloc_module_from_module (struct module *m, uint32_t unwind_len)
-{
- unsigned datasize, num = _stp_get_sym_sizes(m, &datasize);
- return _stp_alloc_module(num, datasize, unwind_len);
-}
-
static void _stp_free_module(struct _stp_module *mod)
{
/* The module write lock is held. Any prior readers of this */
/* module's data will have read locks and need to finish before */
/* the memory is freed. */
write_lock(&mod->lock);
- write_unlock(&mod->lock); /* there will be no more readers */
+ write_unlock(&mod->lock); /* there will be no more readers */
- /* free symbol memory */
- if (mod->symbols) {
- if (mod->allocated & 1)
- _stp_vfree(mod->symbols);
- else
- _stp_kfree(mod->symbols);
- mod->symbols = NULL;
- }
+ /* Free symbol memory */
+ /* If symbol_data wasn't allocated, then symbols weren't either. */
if (mod->symbol_data) {
- if (mod->allocated & 2)
+ if (mod->symbols) {
+ if (mod->allocated.symbols)
+ _stp_vfree(mod->symbols);
+ else
+ _stp_kfree(mod->symbols);
+ }
+ if (mod->allocated.symbol_data)
_stp_vfree(mod->symbol_data);
else
_stp_kfree(mod->symbol_data);
- mod->symbol_data = NULL;
-
}
if (mod->unwind_data) {
- if (mod->allocated & 4)
+ if (mod->allocated.unwind_data)
_stp_vfree(mod->unwind_data);
else
_stp_kfree(mod->unwind_data);
- mod->unwind_data = NULL;
-
}
- if (mod->sections) {
- _stp_kfree(mod->sections);
- mod->sections = NULL;
+ if (mod->unwind_hdr) {
+ if (mod->allocated.unwind_hdr)
+ _stp_vfree(mod->unwind_hdr);
+ else
+ _stp_kfree(mod->unwind_hdr);
}
+ if (mod->sections)
+ _stp_kfree(mod->sections);
/* free module memory */
_stp_kfree(mod);
@@ -168,7 +139,7 @@ static void _stp_del_module(struct _stp_module *mod)
{
int i, num;
- // kbug(DEBUG_SYMBOLS, "deleting %s\n", mod->name);
+ dbug_sym(1, "deleting module %s\n", mod->name);
/* signal relocation code to clear its cache */
_stp_module_relocate((char *)-1, NULL, 0);
@@ -181,15 +152,15 @@ static void _stp_del_module(struct _stp_module *mod)
if (num >= _stp_num_modules)
return;
- for (i = num; i < _stp_num_modules-1; i++)
- _stp_modules[i] = _stp_modules[i+1];
+ for (i = num; i < _stp_num_modules - 1; i++)
+ _stp_modules[i] = _stp_modules[i + 1];
for (num = 0; num < _stp_num_modules; num++) {
if (_stp_modules_by_addr[num] == mod)
break;
}
- for (i = num; i < _stp_num_modules-1; i++)
- _stp_modules_by_addr[i] = _stp_modules_by_addr[i+1];
+ for (i = num; i < _stp_num_modules - 1; i++)
+ _stp_modules_by_addr[i] = _stp_modules_by_addr[i + 1];
_stp_num_modules--;
@@ -197,10 +168,8 @@ static void _stp_del_module(struct _stp_module *mod)
}
static void _stp_free_modules(void)
-{
+{
int i;
- unsigned long flags;
-
/* This only happens when the systemtap module unloads */
/* so there is no need for locks. */
for (i = _stp_num_modules - 1; i >= 0; i--)
@@ -208,82 +177,134 @@ static void _stp_free_modules(void)
}
static unsigned long _stp_kallsyms_lookup_name(const char *name);
+static void _stp_create_unwind_hdr(struct _stp_module *m);
+
+extern unsigned _stp_num_kernel_symbols;
+extern struct _stp_symbol _stp_kernel_symbols[];
-/* process the KERNEL symbols */
-static int _stp_do_symbols(const char __user *buf, int count)
+/* initialize the kernel symbols */
+static int _stp_init_kernel_symbols(void)
{
- struct _stp_symbol *s;
- unsigned datasize, num, unwindsize;
+ _stp_modules[0] = (struct _stp_module *)_stp_kzalloc(sizeof(struct _stp_module));
+ if (_stp_modules[0] == NULL) {
+ _dbug("cannot allocate memory\n");
+ return -1;
+ }
+ _stp_modules[0]->symbols = _stp_kernel_symbols;
+ _stp_modules[0]->num_symbols = _stp_num_kernel_symbols;
+ rwlock_init(&_stp_modules[0]->lock);
+ _stp_num_modules = 1;
+
+ /* Note: this mapping is used by kernel/_stext pseudo-relocations. */
+ _stp_modules[0]->text = _stp_kallsyms_lookup_name("_stext");
+ if (_stp_modules[0]->text == 0) {
+ _dbug("Lookup of _stext failed. Exiting.\n");
+ return -1;
+ }
+ _stp_modules[0]->data = _stp_kallsyms_lookup_name("_etext");
+ if (_stp_modules[0]->data == 0) {
+ _dbug("Lookup of _etext failed. Exiting.\n");
+ return -1;
+ }
+ _stp_modules[0]->text_size = _stp_modules[0]->data - _stp_modules[0]->text;
+ _stp_modules_by_addr[0] = _stp_modules[0];
+
+ _stp_kretprobe_trampoline = _stp_kallsyms_lookup_name("kretprobe_trampoline");
+ /* Lookup failure is not fatal */
+
+ return 0;
+}
+
+static void _stp_do_unwind_data(const char __user *buf, size_t count)
+{
+ u32 unwind_len;
+ unsigned long flags;
+ char name[STP_MODULE_NAME_LEN];
int i;
+ struct _stp_module *m;
+
+ dbug_unwind(1, "got unwind data, count=%d\n", count);
- switch (_stp_symbol_state) {
- case 0:
- if (count != sizeof(struct _stp_msg_symbol_hdr)) {
- errk("count=%d\n", count);
- return -EFAULT;
- }
- if (get_user(num, (unsigned __user *)buf))
- return -EFAULT;
- if (get_user(datasize, (unsigned __user *)(buf+4)))
- return -EFAULT;
- if (get_user(unwindsize, (unsigned __user *)(buf+8)))
- return -EFAULT;
- dbug(DEBUG_UNWIND, "num=%d datasize=%d unwindsize=%d\n", num, datasize, unwindsize);
-
- _stp_modules[0] = _stp_alloc_module(num, datasize, unwindsize);
- if (_stp_modules[0] == NULL) {
- errk("cannot allocate memory\n");
- return -EFAULT;
+ if (count < STP_MODULE_NAME_LEN + sizeof(unwind_len)) {
+ dbug_unwind(1, "unwind message too short\n");
+ return;
+ }
+ if (strncpy_from_user(name, buf, STP_MODULE_NAME_LEN) < 0) {
+ errk("userspace copy failed\n");
+ return;
+ }
+ dbug_unwind(1, "name=%s\n", name);
+ if (!strcmp(name,"*")) {
+ /* OK, all initial unwind data received. Ready to go. */
+ _stp_ctl_send(STP_TRANSPORT, NULL, 0);
+ return;
+ }
+ count -= STP_MODULE_NAME_LEN;
+ buf += STP_MODULE_NAME_LEN;
+
+ if (get_user(unwind_len, (u32 __user *)buf)) {
+ errk("userspace copy failed\n");
+ return;
+ }
+ count -= sizeof(unwind_len);
+ buf += sizeof(unwind_len);
+ if (count != unwind_len) {
+ dbug_unwind(1, "count=%d unwind_len=%d\n", (int)count, (int)unwind_len);
+ return;
+ }
+
+ STP_RLOCK_MODULES;
+ for (i = 0; i < _stp_num_modules; i++) {
+ if (strcmp(name, _stp_modules[i]->name) == 0)
+ break;
+ }
+ if (unlikely(i == _stp_num_modules)) {
+ dbug_unwind(1, "module %s not found!\n", name);
+ STP_RUNLOCK_MODULES;
+ return;
+ }
+ m = _stp_modules[i];
+ write_lock(&m->lock);
+ STP_RUNLOCK_MODULES;
+
+ /* allocate space for unwind data */
+ m->unwind_data = _stp_kmalloc(count);
+ if (unlikely(m->unwind_data == NULL)) {
+ m->unwind_data = _stp_vmalloc(count);
+ if (m->unwind_data == NULL) {
+ errk("kmalloc failed\n");
+ goto done;
}
- rwlock_init(&_stp_modules[0]->lock);
- _stp_symbol_state = 1;
- break;
- case 1:
- dbug(DEBUG_SYMBOLS, "got stap_symbols, count=%d\n", count);
- if (copy_from_user ((char *)_stp_modules[0]->symbols, buf, count))
- return -EFAULT;
- _stp_symbol_state = 2;
- break;
- case 2:
- dbug(DEBUG_SYMBOLS, "got symbol data, count=%d buf=%p\n", count, buf);
- if (copy_from_user (_stp_modules[0]->symbol_data, buf, count))
- return -EFAULT;
- _stp_num_modules = 1;
-
- s = _stp_modules[0]->symbols;
- for (i = 0; i < _stp_modules[0]->num_symbols; i++)
- s[i].symbol += (long)_stp_modules[0]->symbol_data;
-
- _stp_symbol_state = 3;
- /* NB: this mapping is used by kernel/_stext pseudo-relocations. */
- _stp_modules[0]->text = _stp_kallsyms_lookup_name("_stext");
- _stp_modules[0]->data = _stp_kallsyms_lookup_name("_etext");
- _stp_modules[0]->text_size = _stp_modules[0]->data - _stp_modules[0]->text;
- _stp_modules_by_addr[0] = _stp_modules[0];
- dbug(DEBUG_SYMBOLS, "Got kernel symbols. text=%p len=%u\n",
- (int64_t)_stp_modules[0]->text, _stp_modules[0]->text_size);
- break;
- case 3:
- dbug(DEBUG_UNWIND, "got unwind data, count=%d\n", count);
- _stp_symbol_state = 4;
- if (copy_from_user (_stp_modules[0]->unwind_data, buf, count)) {
- _dbug("cfu failed\n");
- return -EFAULT;
+ m->allocated.unwind_data = 1;
+ }
+
+ if (unlikely(copy_from_user(m->unwind_data, buf, count))) {
+ errk("userspace copy failed\n");
+ if (m->unwind_data) {
+ if (m->allocated.unwind_data)
+ _stp_vfree(m->unwind_data);
+ else
+ _stp_kfree(m->unwind_data);
+ m->unwind_data = NULL;
}
- _stp_modules[0]->unwind_data_len = count;
- break;
- default:
- errk("unexpected symbol data of size %d.\n", count);
+ goto done;
}
- return count;
+ m->unwind_data_len = count;
+#ifdef STP_USE_DWARF_UNWINDER
+ _stp_create_unwind_hdr(m);
+#endif
+done:
+ write_unlock(&m->lock);
}
static int _stp_compare_addr(const void *p1, const void *p2)
{
struct _stp_symbol *s1 = (struct _stp_symbol *)p1;
struct _stp_symbol *s2 = (struct _stp_symbol *)p2;
- if (s1->addr == s2->addr) return 0;
- if (s1->addr < s2->addr) return -1;
+ if (s1->addr == s2->addr)
+ return 0;
+ if (s1->addr < s2->addr)
+ return -1;
return 1;
}
@@ -332,18 +353,17 @@ static void generic_swap(void *a, void *b, int size)
* it less suitable for kernel use.
*/
void _stp_sort(void *base, size_t num, size_t size,
- int (*cmp)(const void *, const void *),
- void (*swap)(void *, void *, int size))
+ int (*cmp) (const void *, const void *), void (*swap) (void *, void *, int size))
{
/* pre-scale counters for performance */
- int i = (num/2 - 1) * size, n = num * size, c, r;
+ int i = (num / 2 - 1) * size, n = num * size, c, r;
if (!swap)
swap = (size == 4 ? u32_swap : generic_swap);
/* heapify */
- for ( ; i >= 0; i -= size) {
- for (r = i; r * 2 + size < n; r = c) {
+ for (; i >= 0; i -= size) {
+ for (r = i; r * 2 + size < n; r = c) {
c = r * 2 + size;
if (c < n - size && cmp(base + c, base + c + size) < 0)
c += size;
@@ -367,65 +387,125 @@ void _stp_sort(void *base, size_t num, size_t size,
}
}
+/* filter out section names we don't care about */
+static int _stp_section_is_interesting(const char *name)
+{
+ int ret = 1;
+ if (!strncmp("__", name, 2)
+ || !strncmp(".note", name, 5)
+ || !strncmp(".gnu", name, 4)
+ || !strncmp(".mod", name, 4))
+ ret = 0;
+ return ret;
+}
+
/* Create a new _stp_module and load the symbols */
-static struct _stp_module *_stp_load_module_symbols (struct _stp_module *imod, uint32_t unwind_len)
+static struct _stp_module *_stp_load_module_symbols(struct module *mod)
{
- unsigned i, num=0;
- struct module *m = (struct module *)imod->module;
- struct _stp_module *mod = NULL;
- char *dataptr;
+ int i, num, overflow = 0;
+ struct module_sect_attrs *sa = mod->sect_attrs;
+ struct attribute_group *sag = & sa->grp;
+ unsigned sect_size = 0, sect_num = 0, sym_size, sym_num;
+ struct _stp_module *sm;
+ char *dataptr, *endptr;
+ unsigned nsections = 0;
+
+#ifdef STAPCONF_MODULE_NSECTIONS
+ nsections = sa->nsections;
+#else
+ /* count section attributes on older kernel */
+ struct attribute** gattr;
+ for (gattr = sag->attrs; *gattr; gattr++)
+ nsections++;
+ dbug_sym(2, "\tcount %d\n", nsections);
+#endif
+
+ /* calculate how much space to allocate for section strings */
+ for (i = 0; i < nsections; i++) {
+ if (_stp_section_is_interesting(sa->attrs[i].name)) {
+ sect_num++;
+ sect_size += strlen(sa->attrs[i].name) + 1;
+ dbug_sym(2, "\t%s\t%lx\n", sa->attrs[i].name, sa->attrs[i].address);
+ }
+ }
+ sect_size += sect_num * sizeof(struct _stp_symbol);
- if (m == NULL) {
- kbug(DEBUG_SYMBOLS, "imod->module is NULL\n");
+ /* and how much space for symbols */
+ sym_num = _stp_get_sym_sizes(mod, &sym_size);
+
+ sm = _stp_alloc_module(sect_size, sym_num, sym_size);
+ if (!sm) {
+ errk("failed to allocate memory for module.\n");
return NULL;
}
- if (try_module_get(m)) {
- mod = _stp_alloc_module_from_module(m, unwind_len);
- if (mod == NULL) {
- module_put(m);
- errk("failed to allocate memory for module.\n");
- return NULL;
- }
+ strlcpy(sm->name, mod->name, STP_MODULE_NAME_LEN);
+ sm->module = (unsigned long)mod;
+ sm->text = (unsigned long)mod->module_core;
+ sm->text_size = mod->core_text_size;
+ sm->data = 0; /* fixme */
+ sm->num_sections = sect_num;
+ rwlock_init(&sm->lock);
- strlcpy(mod->name, imod->name, STP_MODULE_NAME_LEN);
- mod->module = imod->module;
- mod->text = imod->text;
- mod->data = imod->data;
- mod->num_sections = imod->num_sections;
- mod->sections = imod->sections;
- mod->text_size = m->core_text_size;
- rwlock_init(&mod->lock);
-
- /* now copy all the symbols we are interested in */
- dataptr = mod->symbol_data;
- for (i=0; i < m->num_symtab; i++) {
- char *str = (char *)(m->strtab + m->symtab[i].st_name);
- if (*str != '\0' && _stp_sym_type_ok(m->symtab[i].st_info)) {
- mod->symbols[num].symbol = dataptr;
- mod->symbols[num].addr = m->symtab[i].st_value;
- while (*str) *dataptr++ = *str++;
- *dataptr++ = 0;
- num++;
+ /* copy in section data */
+ dataptr = (char *)((long)sm->sections + sect_num * sizeof(struct _stp_symbol));
+ endptr = (char *)((long)sm->sections + sect_size);
+ num = 0;
+ for (i = 0; i < nsections; i++) {
+ size_t len, maxlen;
+ if (_stp_section_is_interesting(sa->attrs[i].name)) {
+ sm->sections[num].addr = sa->attrs[i].address;
+ sm->sections[num].symbol = dataptr;
+ maxlen = (size_t) (endptr - dataptr);
+ len = strlcpy(dataptr, sa->attrs[i].name, maxlen);
+ if (unlikely(len >= maxlen)) {
+ _dbug("dataptr=%lx endptr=%lx len=%d maxlen=%d\n", dataptr, endptr, len, maxlen);
+ overflow = 1;
}
+ dataptr += len + 1;
+ num++;
}
- module_put(m);
+ }
+ if (unlikely(overflow)) {
+ errk("Section names truncated!!! Should never happen!!\n");
+ *endptr = 0;
+ overflow = 0;
+ }
- /* sort symbols by address */
- _stp_sort (mod->symbols, num, sizeof(struct _stp_symbol), _stp_compare_addr, _stp_swap_symbol);
+ /* now copy all the symbols we are interested in */
+ dataptr = sm->symbol_data;
+ endptr = dataptr + sym_size - 1;
+ num = 0;
+ for (i = 0; i < mod->num_symtab; i++) {
+ char *str = (char *)(mod->strtab + mod->symtab[i].st_name);
+ if (*str != '\0' && _stp_sym_type_ok(mod->symtab[i].st_info)) {
+ sm->symbols[num].symbol = dataptr;
+ sm->symbols[num].addr = mod->symtab[i].st_value;
+ while (*str && (dataptr < endptr))
+ *dataptr++ = *str++;
+ if (unlikely(*str))
+ overflow = 1;
+ *dataptr++ = 0;
+ num++;
+ }
}
- return mod;
+ if (unlikely(overflow))
+ errk("Symbol names truncated!!! Should never happen!!\n");
+
+ /* sort symbols by address */
+ _stp_sort(sm->symbols, num, sizeof(struct _stp_symbol), _stp_compare_addr, _stp_swap_symbol);
+
+ return sm;
}
-/* Remove any old module info from our database */
-static void _stp_module_exists_delete (struct _stp_module *mod)
+/* Remove any old module info from our database. */
+static void _stp_module_exists_delete(struct _stp_module *mod)
{
int i, num;
-
/* remove any old modules with the same name */
for (num = 1; num < _stp_num_modules; num++) {
if (strcmp(_stp_modules[num]->name, mod->name) == 0) {
- dbug(DEBUG_SYMBOLS, "found existing module with name %s. Deleting.\n", mod->name);
+ dbug_sym(1, "found existing module with name %s. Deleting.\n", mod->name);
_stp_del_module(_stp_modules[num]);
break;
}
@@ -435,143 +515,61 @@ static void _stp_module_exists_delete (struct _stp_module *mod)
for (num = 1; num < _stp_num_modules; num++) {
if (mod->text + mod->text_size < _stp_modules_by_addr[num]->text)
continue;
- if (mod->text < _stp_modules_by_addr[num]->text
- + _stp_modules_by_addr[num]->text_size) {
- dbug(DEBUG_SYMBOLS, "New module %s overlaps with old module %s. Deleting old.\n",
- mod->name, _stp_modules_by_addr[num]->name);
+ if (mod->text < _stp_modules_by_addr[num]->text + _stp_modules_by_addr[num]->text_size) {
+ dbug_sym(1, "New module %s overlaps with old module %s. Deleting old.\n",
+ mod->name, _stp_modules_by_addr[num]->name);
_stp_del_module(_stp_modules_by_addr[num]);
}
}
}
-static int _stp_ins_module(struct _stp_module *mod)
+static void _stp_ins_module(struct module *mod)
{
- int i, num, res, ret = 0;
+ int i, num, res;
unsigned long flags;
-
- // kbug(DEBUG_SYMBOLS, "insert %s\n", mod->name);
+ struct _stp_module *m;
+ dbug_sym(1, "insert %s\n", mod->name);
+ m = _stp_load_module_symbols(mod);
+ if (m == NULL)
+ return;
STP_WLOCK_MODULES;
-
- _stp_module_exists_delete(mod);
-
+ _stp_module_exists_delete(m);
/* check for overflow */
if (_stp_num_modules == STP_MAX_MODULES) {
errk("Exceeded the limit of %d modules\n", STP_MAX_MODULES);
- ret = -ENOMEM;
goto done;
}
-
+
/* insert alphabetically in _stp_modules[] */
for (num = 1; num < _stp_num_modules; num++)
- if (strcmp(_stp_modules[num]->name, mod->name) > 0)
+ if (strcmp(_stp_modules[num]->name, m->name) > 0)
break;
for (i = _stp_num_modules; i > num; i--)
- _stp_modules[i] = _stp_modules[i-1];
- _stp_modules[num] = mod;
-
+ _stp_modules[i] = _stp_modules[i - 1];
+ _stp_modules[num] = m;
/* insert by text address in _stp_modules_by_addr[] */
for (num = 1; num < _stp_num_modules; num++)
- if (mod->text < _stp_modules_by_addr[num]->text)
+ if (m->text < _stp_modules_by_addr[num]->text)
break;
for (i = _stp_num_modules; i > num; i--)
- _stp_modules_by_addr[i] = _stp_modules_by_addr[i-1];
- _stp_modules_by_addr[num] = mod;
-
+ _stp_modules_by_addr[i] = _stp_modules_by_addr[i - 1];
+ _stp_modules_by_addr[num] = m;
_stp_num_modules++;
-
done:
STP_WUNLOCK_MODULES;
- return ret;
-}
-
-
-/* Called from procfs.c when a STP_MODULE msg is received */
-static int _stp_do_module(const char __user *buf, int count)
-{
- struct _stp_msg_module tmpmod;
- struct _stp_module mod, *m;
- unsigned i, section_len;
-
- if (count < (int)sizeof(tmpmod)) {
- errk("expected %d and got %d\n", (int)sizeof(tmpmod), count);
- return -EFAULT;
- }
- if (copy_from_user ((char *)&tmpmod, buf, sizeof(tmpmod)))
- return -EFAULT;
-
- section_len = count - sizeof(tmpmod) - tmpmod.unwind_len;
- if (section_len <= 0) {
- errk("section_len = %d\n", section_len);
- return -EFAULT;
- }
- dbug(DEBUG_SYMBOLS, "Got module %s, count=%d section_len=%d unwind_len=%d\n",
- tmpmod.name, count, section_len, tmpmod.unwind_len);
-
- strcpy(mod.name, tmpmod.name);
- mod.module = tmpmod.module;
- mod.text = tmpmod.text;
- mod.data = tmpmod.data;
- mod.num_sections = tmpmod.num_sections;
-
- /* copy in section data */
- mod.sections = _stp_kmalloc(section_len);
- if (mod.sections == NULL) {
- errk("unable to allocate memory.\n");
- return -EFAULT;
- }
- if (copy_from_user ((char *)mod.sections, buf+sizeof(tmpmod), section_len)) {
- _stp_kfree(mod.sections);
- return -EFAULT;
- }
- for (i = 0; i < mod.num_sections; i++) {
- mod.sections[i].symbol =
- (char *)((long)mod.sections[i].symbol
- + (long)((long)mod.sections + mod.num_sections * sizeof(struct _stp_symbol)));
- }
-
- #if 0
- for (i = 0; i < mod.num_sections; i++)
- _dbug("section %d (stored at %p): %s %lx\n", i, &mod.sections[i], mod.sections[i].symbol, mod.sections[i].addr);
- #endif
-
- /* load symbols from tmpmod.module to mod */
- m = _stp_load_module_symbols(&mod, tmpmod.unwind_len);
- if (m == NULL) {
- _stp_kfree(mod.sections);
- return 0;
- }
-
- dbug(DEBUG_SYMBOLS, "module %s loaded. Text=%p text_size=%u\n", m->name, (int64_t)m->text, m->text_size);
- /* finally copy unwind info */
- if (copy_from_user (m->unwind_data, buf+sizeof(tmpmod)+section_len, tmpmod.unwind_len)) {
- _stp_free_module(m);
- _stp_kfree(mod.sections);
- return -EFAULT;
- }
- m->unwind_data_len = tmpmod.unwind_len;
-
- if (_stp_ins_module(m) < 0) {
- _stp_free_module(m);
- return -ENOMEM;
- }
-
- return count;
+ return;
}
-static int _stp_ctl_send (int type, void *data, int len);
-
-static int _stp_module_load_notify(struct notifier_block * self, unsigned long val, void * data)
+static int _stp_module_load_notify(struct notifier_block *self, unsigned long val, void *data)
{
struct module *mod = (struct module *)data;
struct _stp_module rmod;
-
switch (val) {
case MODULE_STATE_COMING:
- dbug(DEBUG_SYMBOLS, "module %s load notify\n", mod->name);
- strlcpy(rmod.name, mod->name, STP_MODULE_NAME_LEN);
- _stp_ctl_send(STP_MODULE, &rmod, sizeof(struct _stp_module));
+ dbug_sym(1, "module %s load notify\n", mod->name);
+ _stp_ins_module(mod);
break;
default:
errk("module loaded? val=%ld\n", val);
@@ -583,4 +581,72 @@ static struct notifier_block _stp_module_load_nb = {
.notifier_call = _stp_module_load_notify,
};
-#endif /* _SYMBOLS_C_ */
+#include <linux/seq_file.h>
+
+static int _stp_init_modules(void)
+{
+ loff_t pos = 0;
+ void *res;
+ struct module *mod;
+ const struct seq_operations *modules_op = (const struct seq_operations *)_stp_kallsyms_lookup_name("modules_op");
+
+ if (modules_op == NULL) {
+ _dbug("Lookup of modules_op failed.\n");
+ return -1;
+ }
+
+ /* Use the seq_file interface to safely get a list of installed modules */
+ res = modules_op->start(NULL, &pos);
+ while (res) {
+ mod = list_entry(res, struct module, list);
+ _stp_ins_module(mod);
+ res = modules_op->next(NULL, res, &pos);
+ }
+
+ if (register_module_notifier(&_stp_module_load_nb))
+ errk("failed to load module notifier\n");
+
+ /* unlocks the list */
+ modules_op->stop(NULL, NULL);
+
+#ifdef STP_USE_DWARF_UNWINDER
+ /* now that we have all the modules, ask for their unwind info */
+ {
+ unsigned long flags;
+ int i, left = STP_CTL_BUFFER_SIZE;
+ char buf[STP_CTL_BUFFER_SIZE];
+ char *ptr = buf;
+ *ptr = 0;
+
+ STP_RLOCK_MODULES;
+ /* Loop through modules, sending module names packed into */
+ /* messages of size STP_CTL_BUFFER. */
+ for (i = 0; i < _stp_num_modules; i++) {
+ char *name = _stp_modules[i]->name;
+ int len = strlen(name);
+ if (len >= left) {
+ _stp_ctl_send(STP_UNWIND, buf, sizeof(buf) - left);
+ ptr = buf;
+ left = STP_CTL_BUFFER_SIZE;
+ }
+ strlcpy(ptr, name, left);
+ ptr += len + 1;
+ left -= len + 1;
+ }
+ STP_RUNLOCK_MODULES;
+
+ /* Send terminator. When we get this back from stapio */
+ /* that means all the unwind info has been sent. */
+ strlcpy(ptr, "*", left);
+ left -= 2;
+ _stp_ctl_send(STP_UNWIND, buf, sizeof(buf) - left);
+ }
+#else
+ /* done with modules, now go */
+ _stp_ctl_send(STP_TRANSPORT, NULL, 0);
+#endif /* STP_USE_DWARF_UNWINDER */
+
+ return 0;
+}
+
+#endif /* _STP_SYMBOLS_C_ */
diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c
index 8335e44b..a4e4e652 100644
--- a/runtime/transport/transport.c
+++ b/runtime/transport/transport.c
@@ -23,83 +23,47 @@
#include "../procfs.c"
static struct utt_trace *_stp_utt = NULL;
-
+static unsigned int utt_seq = 1;
+static int _stp_probes_started = 0;
+pid_t _stp_target = 0;
+static int _stp_exit_called = 0;
+int _stp_exit_flag = 0;
#ifdef STP_OLD_TRANSPORT
#include "relayfs.c"
+#include "procfs.c"
#else
#include "utt.c"
+#include "control.c"
#endif
-static unsigned int utt_seq = 1;
-
-static int _stp_probes_started = 0;
-
/* module parameters */
static int _stp_bufsize;
module_param(_stp_bufsize, int, 0);
MODULE_PARM_DESC(_stp_bufsize, "buffer size");
-pid_t _stp_target = 0;
-static int _stp_exit_called = 0;
-int _stp_exit_flag = 0;
-
/* forward declarations */
void probe_exit(void);
int probe_start(void);
void _stp_exit(void);
-void _stp_handle_start (struct _stp_msg_start *st);
-static void _stp_detach(void);
-static void _stp_attach(void);
/* check for new workqueue API */
-#ifdef DECLARE_DELAYED_WORK
-static void _stp_work_queue (struct work_struct *data);
+#ifdef DECLARE_DELAYED_WORK
+static void _stp_work_queue(struct work_struct *data);
static DECLARE_DELAYED_WORK(_stp_work, _stp_work_queue);
#else
-static void _stp_work_queue (void *data);
+static void _stp_work_queue(void *data);
static DECLARE_WORK(_stp_work, _stp_work_queue, NULL);
#endif
static struct workqueue_struct *_stp_wq;
-static void _stp_ask_for_symbols(void);
-
-#ifdef STP_OLD_TRANSPORT
-#include "procfs.c"
-#else
-#include "control.c"
-#endif
-
-static void _stp_ask_for_symbols(void)
-{
- struct _stp_msg_symbol req;
- struct _stp_module mod;
- static int sent_symbols = 0;
-
- if (sent_symbols == 0) {
- /* ask for symbols and modules */
- kbug(DEBUG_SYMBOLS|DEBUG_TRANSPORT, "AFS\n");
-
- req.endian = 0x1234;
- req.ptr_size = sizeof(char *);
- _stp_ctl_send(STP_SYMBOLS, &req, sizeof(req));
-
- strcpy(mod.name, "");
- _stp_ctl_send(STP_MODULE, &mod, sizeof(mod));
- sent_symbols = 1;
- }
-}
/*
* _stp_handle_start - handle STP_START
*/
-void _stp_handle_start (struct _stp_msg_start *st)
+void _stp_handle_start(struct _stp_msg_start *st)
{
- kbug (DEBUG_TRANSPORT, "stp_handle_start\n");
-
- if (register_module_notifier(&_stp_module_load_nb))
- errk("failed to load module notifier\n");
-
+ dbug_trans(1, "stp_handle_start\n");
_stp_target = st->target;
st->res = probe_start();
if (st->res >= 0)
@@ -108,16 +72,14 @@ void _stp_handle_start (struct _stp_msg_start *st)
_stp_ctl_send(STP_START, st, sizeof(*st));
}
-
/* common cleanup code. */
/* This is called from the kernel thread when an exit was requested */
-/* by staprun or the exit() function. It is also called by transport_close() */
-/* when the module is removed. In that case "dont_rmmod" is set to 1. */
+/* by staprun or the exit() function. */
/* We need to call it both times because we want to clean up properly */
/* when someone does /sbin/rmmod on a loaded systemtap module. */
-static void _stp_cleanup_and_exit (int dont_rmmod)
+static void _stp_cleanup_and_exit(int send_exit)
{
- kbug(DEBUG_TRANSPORT, "cleanup_and_exit (%d)\n", dont_rmmod);
+ dbug_trans(1, "cleanup_and_exit (%d)\n", send_exit);
if (!_stp_exit_called) {
int failures;
@@ -128,23 +90,24 @@ static void _stp_cleanup_and_exit (int dont_rmmod)
_stp_exit_called = 1;
if (_stp_probes_started) {
- kbug(DEBUG_TRANSPORT, "calling probe_exit\n");
+ dbug_trans(1, "calling probe_exit\n");
/* tell the stap-generated code to unload its probes, etc */
probe_exit();
- kbug(DEBUG_TRANSPORT, "done with probe_exit\n");
+ dbug_trans(1, "done with probe_exit\n");
}
failures = atomic_read(&_stp_transport_failures);
if (failures)
- _stp_warn ("There were %d transport failures.\n", failures);
+ _stp_warn("There were %d transport failures.\n", failures);
- kbug(DEBUG_TRANSPORT, "************** calling startstop 0 *************\n");
- if (_stp_utt) utt_trace_startstop(_stp_utt, 0, &utt_seq);
+ dbug_trans(1, "************** calling startstop 0 *************\n");
+ if (_stp_utt)
+ utt_trace_startstop(_stp_utt, 0, &utt_seq);
- kbug(DEBUG_TRANSPORT, "ctl_send STP_EXIT\n");
- /* tell staprun to exit (if it is still there) */
- _stp_ctl_send(STP_EXIT, &dont_rmmod, sizeof(int));
- kbug(DEBUG_TRANSPORT, "done with ctl_send STP_EXIT\n");
+ dbug_trans(1, "ctl_send STP_EXIT\n");
+ if (send_exit)
+ _stp_ctl_send(STP_EXIT, NULL, 0);
+ dbug_trans(1, "done with ctl_send STP_EXIT\n");
}
}
@@ -153,7 +116,7 @@ static void _stp_cleanup_and_exit (int dont_rmmod)
*/
static void _stp_detach(void)
{
- kbug(DEBUG_TRANSPORT, "detach\n");
+ dbug_trans(1, "detach\n");
_stp_attached = 0;
_stp_pid = 0;
@@ -169,10 +132,10 @@ static void _stp_detach(void)
*/
static void _stp_attach(void)
{
- kbug(DEBUG_TRANSPORT, "attach\n");
+ dbug_trans(1, "attach\n");
_stp_attached = 1;
_stp_pid = current->pid;
- utt_set_overwrite(0);
+ utt_set_overwrite(0);
queue_delayed_work(_stp_wq, &_stp_work, STP_WORK_TIMER);
}
@@ -180,10 +143,10 @@ static void _stp_attach(void)
* _stp_work_queue - periodically check for IO or exit
* This is run by a kernel thread and may sleep.
*/
-#ifdef DECLARE_DELAYED_WORK
-static void _stp_work_queue (struct work_struct *data)
+#ifdef DECLARE_DELAYED_WORK
+static void _stp_work_queue(struct work_struct *data)
#else
-static void _stp_work_queue (void *data)
+static void _stp_work_queue(void *data)
#endif
{
int do_io = 0;
@@ -198,7 +161,7 @@ static void _stp_work_queue (void *data)
/* if exit flag is set AND we have finished with probe_start() */
if (unlikely(_stp_exit_flag))
- _stp_cleanup_and_exit(0);
+ _stp_cleanup_and_exit(1);
else if (likely(_stp_attached))
queue_delayed_work(_stp_wq, &_stp_work, STP_WORK_TIMER);
}
@@ -211,19 +174,19 @@ static void _stp_work_queue (void *data)
*/
void _stp_transport_close()
{
- kbug(DEBUG_TRANSPORT, "%d: ************** transport_close *************\n", current->pid);
- _stp_cleanup_and_exit(1);
+ dbug_trans(1, "%d: ************** transport_close *************\n", current->pid);
+ _stp_cleanup_and_exit(0);
destroy_workqueue(_stp_wq);
_stp_unregister_ctl_channel();
- if (_stp_utt) utt_trace_remove(_stp_utt);
+ if (_stp_utt)
+ utt_trace_remove(_stp_utt);
_stp_free_modules();
_stp_kill_time();
- _stp_print_cleanup(); /* free print buffers */
+ _stp_print_cleanup(); /* free print buffers */
_stp_mem_debug_done();
- kbug(DEBUG_TRANSPORT, "---- CLOSED ----\n");
+ dbug_trans(1, "---- CLOSED ----\n");
}
-
static struct utt_trace *_stp_utt_open(void)
{
struct utt_trace_setup utts;
@@ -249,22 +212,26 @@ int _stp_transport_init(void)
{
int ret;
- kbug(DEBUG_TRANSPORT, "transport_init\n");
+ dbug_trans(1, "transport_init\n");
_stp_init_pid = current->pid;
_stp_uid = current->uid;
_stp_gid = current->gid;
#ifdef RELAY_GUEST
- /* Guest scripts use relay only for reporting warnings and errors */
- _stp_subbuf_size = 65536;
- _stp_nsubbufs = 2;
+ /* Guest scripts use relay only for reporting warnings and errors */
+ _stp_subbuf_size = 65536;
+ _stp_nsubbufs = 2;
#endif
if (_stp_bufsize) {
unsigned size = _stp_bufsize * 1024 * 1024;
- _stp_subbuf_size = ((size >> 2) + 1) * 65536;
+ _stp_subbuf_size = 65536;
+ while (size / _stp_subbuf_size > 64 &&
+ _stp_subbuf_size < 1024 * 1024) {
+ _stp_subbuf_size <<= 1;
+ }
_stp_nsubbufs = size / _stp_subbuf_size;
- kbug(DEBUG_TRANSPORT, "Using %d subbufs of size %d\n", _stp_nsubbufs, _stp_subbuf_size);
+ dbug_trans(1, "Using %d subbufs of size %d\n", _stp_nsubbufs, _stp_subbuf_size);
}
/* initialize timer code */
@@ -286,41 +253,57 @@ int _stp_transport_init(void)
if (_stp_print_init() < 0)
goto err2;
+ /* start transport */
utt_trace_startstop(_stp_utt, 1, &utt_seq);
/* create workqueue of kernel threads */
_stp_wq = create_workqueue("systemtap");
if (!_stp_wq)
goto err3;
+
+ _stp_transport_state = 1;
+
+ dbug_trans(1, "calling init_kernel_symbols\n");
+ if (_stp_init_kernel_symbols() < 0)
+ goto err4;
+
+ dbug_trans(1, "calling init_modules\n");
+ if (_stp_init_modules() < 0)
+ goto err4;
+
return 0;
+err4:
+ errk("failed to initialize modules\n");
+ _stp_free_modules();
+ destroy_workqueue(_stp_wq);
err3:
_stp_print_cleanup();
err2:
_stp_unregister_ctl_channel();
err1:
- if (_stp_utt) utt_trace_remove(_stp_utt);
+ if (_stp_utt)
+ utt_trace_remove(_stp_utt);
err0:
_stp_kill_time();
return -1;
}
-
static inline void _stp_lock_inode(struct inode *inode)
{
#ifdef DEFINE_MUTEX
- mutex_lock(&inode->i_mutex);
+ mutex_lock(&inode->i_mutex);
#else
- down(&inode->i_sem);
+ down(&inode->i_sem);
#endif
}
static inline void _stp_unlock_inode(struct inode *inode)
{
#ifdef DEFINE_MUTEX
- mutex_unlock(&inode->i_mutex);
+ mutex_unlock(&inode->i_mutex);
#else
- up(&inode->i_sem);
+ up(&inode->i_sem);
#endif
}
@@ -358,7 +341,8 @@ static void _stp_unlock_debugfs(void)
/* utt.c and relayfs.c. Will not be necessary if utt is included */
/* in the kernel. */
-static struct dentry *_stp_get_root_dir(const char *name) {
+static struct dentry *_stp_get_root_dir(const char *name)
+{
struct file_system_type *fs;
struct dentry *root;
struct super_block *sb;
@@ -377,7 +361,6 @@ static struct dentry *_stp_get_root_dir(const char *name) {
errk("Couldn't lock transport directory.\n");
return NULL;
}
-
#ifdef STP_OLD_TRANSPORT
root = relayfs_create_dir(name, NULL);
#else
@@ -389,12 +372,11 @@ static struct dentry *_stp_get_root_dir(const char *name) {
_stp_lock_inode(sb->s_root->d_inode);
root = lookup_one_len(name, sb->s_root, strlen(name));
_stp_unlock_inode(sb->s_root->d_inode);
- kbug(DEBUG_TRANSPORT, "root=%p\n", root);
if (!IS_ERR(root))
dput(root);
else {
root = NULL;
- kbug(DEBUG_TRANSPORT, "Could not create or find transport directory.\n");
+ errk("Could not create or find transport directory.\n");
}
}
_stp_unlock_debugfs();
diff --git a/runtime/transport/transport.h b/runtime/transport/transport.h
index 6dc00d2b..dc499961 100644
--- a/runtime/transport/transport.h
+++ b/runtime/transport/transport.h
@@ -7,21 +7,37 @@
#include "transport_msgs.h"
-void _stp_warn (const char *fmt, ...);
-
+/* The size of print buffers. This limits the maximum */
+/* amount of data a print can send. */
#define STP_BUFFER_SIZE 8192
+/* STP_CTL_BUFFER_SIZE is the maximum size of a message */
+/* exchanged on the control channel. */
+#ifdef STP_OLD_TRANSPORT
+/* Old transport sends print output on control channel */
+#define STP_CTL_BUFFER_SIZE STP_BUFFER_SIZE
+#else
+#define STP_CTL_BUFFER_SIZE 256
+#endif
+
/* how often the work queue wakes up and checks buffers */
#define STP_WORK_TIMER (HZ/100)
static unsigned _stp_nsubbufs = 8;
static unsigned _stp_subbuf_size = 65536*4;
+
+void _stp_warn (const char *fmt, ...);
extern void _stp_transport_close(void);
extern int _stp_print_init(void);
extern void _stp_print_cleanup(void);
static struct dentry *_stp_get_root_dir(const char *name);
static int _stp_lock_debugfs(void);
static void _stp_unlock_debugfs(void);
+static int _stp_ctl_send(int type, void *data, int len);
+static void _stp_attach(void);
+static void _stp_detach(void);
+void _stp_handle_start(struct _stp_msg_start *st);
+
int _stp_pid = 0;
uid_t _stp_uid = 0;
gid_t _stp_gid = 0;
diff --git a/runtime/transport/transport_msgs.h b/runtime/transport/transport_msgs.h
index 55de2d4a..5f385565 100644
--- a/runtime/transport/transport_msgs.h
+++ b/runtime/transport/transport_msgs.h
@@ -23,8 +23,7 @@ enum
STP_EXIT,
STP_OOB_DATA,
STP_SYSTEM,
- STP_SYMBOLS,
- STP_MODULE,
+ STP_UNWIND,
STP_TRANSPORT,
STP_CONNECT,
STP_DISCONNECT,
@@ -36,18 +35,16 @@ enum
STP_SUBBUFS_CONSUMED,
STP_REALTIME_DATA,
#endif
-
STP_MAX_CMD
};
-#ifdef DEBUG_TRANSPORT
+#ifdef DEBUG_TRANS
static const char *_stp_command_name[] = {
"STP_START",
"STP_EXIT",
"STP_OOB_DATA",
"STP_SYSTEM",
- "STP_SYMBOLS",
- "STP_MODULE",
+ "STP_UNWIND",
"STP_TRANSPORT",
"STP_CONNECT",
"STP_DISCONNECT",
@@ -59,68 +56,34 @@ static const char *_stp_command_name[] = {
"STP_REALTIME_DATA",
#endif
};
-#endif /* DEBUG_TRANSPORT */
+#endif /* DEBUG_TRANS */
/* control channel messages */
-/* command to execute: sent to staprun */
+/* command to execute: module->stapio */
struct _stp_msg_cmd
{
char cmd[128];
};
-/* request for symbol data. sent to staprun */
-struct _stp_msg_symbol
+/* Unwind data. stapio->module */
+struct _stp_msg_unwind
{
- int32_t endian;
- int32_t ptr_size;
+ /* the module name, or "*" for all */
+ char name[STP_MODULE_NAME_LEN];
+ /* length of unwind data */
+ uint32_t unwind_len;
+ /* data ...*/
};
/* Request to start probes. */
-/* Sent from staprun. Then returned from module. */
+/* stapio->module->stapio */
struct _stp_msg_start
{
pid_t target;
int32_t res; // for reply: result of probe_start()
};
-struct _stp_symbol32
-{
- uint32_t addr;
- uint32_t symbol;
-};
-
-struct _stp_symbol64
-{
- uint64_t addr;
- uint64_t symbol;
-};
-
-struct _stp_msg_symbol_hdr
-{
- uint32_t num_syms;
- uint32_t sym_size;
- uint32_t unwind_size;
-};
-
-struct _stp_msg_module {
- /* the module name, or "" for kernel */
- char name[STP_MODULE_NAME_LEN];
-
- /* A pointer to the struct module */
- uint64_t module;
-
- /* the start of the module's text and data sections */
- uint64_t text;
- uint64_t data;
-
- /* how many sections this module has */
- uint32_t num_sections;
-
- /* length of unwind data */
- uint32_t unwind_len;
-};
-
#ifdef STP_OLD_TRANSPORT
/**** for compatibility with old relayfs ****/
struct _stp_buf_info
diff --git a/runtime/unwind.c b/runtime/unwind.c
new file mode 100644
index 00000000..aa270cad
--- /dev/null
+++ b/runtime/unwind.c
@@ -0,0 +1,964 @@
+/* -*- linux-c -*-
+ * kernel stack unwinding
+ * Copyright (C) 2008 Red Hat Inc.
+ *
+ * Based on old kernel code that is
+ * Copyright (C) 2002-2006 Novell, Inc.
+ * Jan Beulich <jbeulich@novell.com>
+ *
+ * This code is released under version 2 of the GNU GPL.
+ *
+ * This code currently does stack unwinding in the
+ * kernel and modules. It will need some extension to handle
+ * userspace unwinding.
+ */
+
+#include "unwind/unwind.h"
+
+#ifdef STP_USE_DWARF_UNWINDER
+
+struct eh_frame_hdr_table_entry {
+ unsigned long start, fde;
+};
+
+static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
+{
+ const struct eh_frame_hdr_table_entry *e1 = p1;
+ const struct eh_frame_hdr_table_entry *e2 = p2;
+ return (e1->start > e2->start) - (e1->start < e2->start);
+}
+
+static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
+{
+ struct eh_frame_hdr_table_entry *e1 = p1;
+ struct eh_frame_hdr_table_entry *e2 = p2;
+ unsigned long v;
+
+ v = e1->start;
+ e1->start = e2->start;
+ e2->start = v;
+ v = e1->fde;
+ e1->fde = e2->fde;
+ e2->fde = v;
+}
+
+/* Build a binary-searchable unwind header. Also do some
+ * validity checks. In the future we might use */
+/* .eh_frame_hdr if it is already present. */
+static void _stp_create_unwind_hdr(struct _stp_module *m)
+{
+ const u8 *ptr;
+ unsigned long tableSize, hdrSize, last;
+ unsigned n = 0;
+ const u32 *fde;
+ int bad_order = 0;
+ struct {
+ u8 version;
+ u8 eh_frame_ptr_enc;
+ u8 fde_count_enc;
+ u8 table_enc;
+ unsigned long eh_frame_ptr;
+ unsigned int fde_count;
+ struct eh_frame_hdr_table_entry table[];
+ } __attribute__ ((__packed__)) * header = NULL;
+
+ /* already did this or no data? */
+ if (m->unwind_hdr || m->unwind_data_len == 0)
+ return;
+
+ tableSize = m->unwind_data_len;
+ if (tableSize & (sizeof(*fde) - 1)) {
+ dbug_unwind(1, "tableSize=0x%x not a multiple of 0x%x\n", (int)tableSize, (int)sizeof(*fde));
+ goto bad;
+ }
+
+ /* count the FDEs */
+ for (fde = m->unwind_data;
+ tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
+ tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+ signed ptrType;
+ const u32 *cie;
+
+ /* check for extended length */
+ if ((*fde & 0xfffffff0) == 0xfffffff0) {
+ dbug_unwind(1, "Module %s has extended-length CIE or FDE.");
+ dbug_unwind(1, "This is not supported at this time.");
+ goto bad;
+ }
+ cie = cie_for_fde(fde, m);
+ if (cie == &not_fde)
+ continue; /* fde was a CIE. That's OK, just skip it. */
+ if (cie == NULL || cie == &bad_cie || (ptrType = fde_pointer_type(cie)) < 0)
+ goto bad;
+ /* we have a real FDE */
+ ptr = (const u8 *)(fde + 2);
+ if (!read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType))
+ goto bad;
+ ++n;
+ }
+
+ if (tableSize || !n) {
+ dbug_unwind(1, "%s: tableSize=%ld, n=%d\n", m->name, tableSize, n);
+ goto bad;
+ }
+
+ hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) + 2 * n * sizeof(unsigned long);
+ header = _stp_kmalloc(hdrSize);
+ if (header == NULL) {
+ header = _stp_vmalloc(hdrSize);
+ if (header == NULL)
+ return;
+ m->allocated.unwind_hdr = 1;
+ }
+
+ header->version = 1;
+ header->eh_frame_ptr_enc = DW_EH_PE_absptr;
+ header->fde_count_enc = DW_EH_PE_data4;
+ header->table_enc = DW_EH_PE_absptr;
+ _stp_put_unaligned((unsigned long)m->unwind_data, &header->eh_frame_ptr);
+
+ BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
+ % __alignof(typeof(header->fde_count)));
+ header->fde_count = n;
+
+ BUILD_BUG_ON(offsetof(typeof(*header), table) % __alignof(typeof(*header->table)));
+
+ n = 0;
+ last = 0;
+ tableSize = m->unwind_data_len;
+ for (fde = m->unwind_data; tableSize; tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+ const u32 *cie = cie_for_fde(fde, m);
+ if (cie == &not_fde)
+ continue;
+ if (cie == NULL || cie == &bad_cie)
+ goto bad;
+ /* we have a real FDE */
+ ptr = (const u8 *)(fde + 2);
+ header->table[n].start = read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, fde_pointer_type(cie));
+ header->table[n].fde = (unsigned long)fde;
+ if (header->table[n].start < last)
+ bad_order++;
+ last = header->table[n].start;
+ ++n;
+ }
+ WARN_ON(n != header->fde_count);
+
+ /* Is sort ever necessary? */
+ if (bad_order)
+ _stp_sort(header->table, n, sizeof(*header->table), cmp_eh_frame_hdr_table_entries,
+ swap_eh_frame_hdr_table_entries);
+
+ m->unwind_hdr_len = hdrSize;
+ m->unwind_hdr = header;
+ return;
+
+ /* unwind data is not acceptable. free it and return */
+bad:
+ dbug_unwind(1, "unwind data for %s is unacceptable. Freeing.", m->name);
+ if (header) {
+ if (m->allocated.unwind_hdr) {
+ m->allocated.unwind_hdr = 0;
+ _stp_vfree(header);
+ } else
+ _stp_kfree(header);
+ }
+ if (m->unwind_data) {
+ if (m->allocated.unwind_data)
+ _stp_vfree(m->unwind_data);
+ else
+ _stp_kfree(m->unwind_data);
+ m->unwind_data = NULL;
+ m->unwind_data_len = 0;
+ }
+ return;
+}
+
+static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
+{
+ const u8 *cur = *pcur;
+ uleb128_t value = 0;
+ unsigned shift;
+
+ for (shift = 0; cur < end; shift += 7) {
+ if (shift + 7 > 8 * sizeof(value)
+ && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
+ cur = end + 1;
+ break;
+ }
+ value |= (uleb128_t)(*cur & 0x7f) << shift;
+ if (!(*cur++ & 0x80))
+ break;
+ }
+ *pcur = cur;
+
+ return value;
+}
+
+static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
+{
+ const u8 *cur = *pcur;
+ sleb128_t value = 0;
+ unsigned shift;
+
+ for (shift = 0; cur < end; shift += 7) {
+ if (shift + 7 > 8 * sizeof(value)
+ && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
+ cur = end + 1;
+ break;
+ }
+ value |= (sleb128_t)(*cur & 0x7f) << shift;
+ if (!(*cur & 0x80)) {
+ value |= -(*cur++ & 0x40) << shift;
+ break;
+ }
+ }
+ *pcur = cur;
+
+ return value;
+}
+
+/* given an FDE, find its CIE */
+static const u32 *cie_for_fde(const u32 *fde, const struct _stp_module *m)
+{
+ const u32 *cie;
+
+ /* check that length is proper */
+ if (!*fde || (*fde & (sizeof(*fde) - 1)))
+ return &bad_cie;
+
+ /* CIE id for eh_frame is 0, otherwise 0xffffffff */
+ if (m->unwind_is_ehframe && fde[1] == 0)
+ return &not_fde;
+ else if (fde[1] == 0xffffffff)
+ return &not_fde;
+
+ /* OK, must be an FDE. Now find its CIE. */
+
+ /* CIE_pointer must be a proper offset */
+ if ((fde[1] & (sizeof(*fde) - 1)) || fde[1] > (unsigned long)(fde + 1) - (unsigned long)m->unwind_data) {
+ dbug_unwind(1, "fde[1]=%lx fde+1=%lx, unwind_data=%lx %lx\n",
+ (unsigned long)fde[1], (unsigned long)(fde + 1),
+ (unsigned long)m->unwind_data, (unsigned long)(fde + 1) - (unsigned long)m->unwind_data);
+ return NULL; /* this is not a valid FDE */
+ }
+
+ /* cie pointer field is different in eh_frame vs debug_frame */
+ if (m->unwind_is_ehframe)
+ cie = fde + 1 - fde[1] / sizeof(*fde);
+ else
+ cie = m->unwind_data + fde[1];
+
+ if (*cie <= sizeof(*cie) + 4 || *cie >= fde[1] - sizeof(*fde)
+ || (*cie & (sizeof(*cie) - 1))
+ || (cie[1] != 0xffffffff && cie[1] != 0)) {
+ dbug_unwind(1, "cie is not valid %lx %x %x %x\n", cie, *cie, fde[1], cie[1]);
+ return NULL; /* this is not a (valid) CIE */
+ }
+
+ return cie;
+}
+
+/* read an encoded pointer */
+static unsigned long read_pointer(const u8 **pLoc, const void *end, signed ptrType)
+{
+ unsigned long value = 0;
+ union {
+ const u8 *p8;
+ const u16 *p16u;
+ const s16 *p16s;
+ const u32 *p32u;
+ const s32 *p32s;
+ const unsigned long *pul;
+ } ptr;
+
+ if (ptrType < 0 || ptrType == DW_EH_PE_omit)
+ return 0;
+
+ ptr.p8 = *pLoc;
+ switch (ptrType & DW_EH_PE_FORM) {
+ case DW_EH_PE_data2:
+ if (end < (const void *)(ptr.p16u + 1))
+ return 0;
+ if (ptrType & DW_EH_PE_signed)
+ value = _stp_get_unaligned(ptr.p16s++);
+ else
+ value = _stp_get_unaligned(ptr.p16u++);
+ break;
+ case DW_EH_PE_data4:
+#ifdef CONFIG_64BIT
+ if (end < (const void *)(ptr.p32u + 1))
+ return 0;
+ if (ptrType & DW_EH_PE_signed)
+ value = _stp_get_unaligned(ptr.p32s++);
+ else
+ value = _stp_get_unaligned(ptr.p32u++);
+ break;
+ case DW_EH_PE_data8:
+ BUILD_BUG_ON(sizeof(u64) != sizeof(value));
+#else
+ BUILD_BUG_ON(sizeof(u32) != sizeof(value));
+#endif
+ case DW_EH_PE_absptr:
+ if (end < (const void *)(ptr.pul + 1))
+ return 0;
+ value = _stp_get_unaligned(ptr.pul++);
+ break;
+ case DW_EH_PE_leb128:
+ BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
+ value = ptrType & DW_EH_PE_signed ? get_sleb128(&ptr.p8, end)
+ : get_uleb128(&ptr.p8, end);
+ if ((const void *)ptr.p8 > end)
+ return 0;
+ break;
+ default:
+ return 0;
+ }
+ switch (ptrType & DW_EH_PE_ADJUST) {
+ case DW_EH_PE_absptr:
+ break;
+ case DW_EH_PE_pcrel:
+ value += (unsigned long)*pLoc;
+ break;
+ default:
+ return 0;
+ }
+ if ((ptrType & DW_EH_PE_indirect)
+ && _stp_read_address(value, (unsigned long *)value, KERNEL_DS))
+ return 0;
+ *pLoc = ptr.p8;
+
+ return value;
+}
+
+static signed fde_pointer_type(const u32 *cie)
+{
+ const u8 *ptr = (const u8 *)(cie + 2);
+ unsigned version = *ptr;
+
+ if (version != 1)
+ return -1; /* unsupported */
+ if (*++ptr) {
+ const char *aug;
+ const u8 *end = (const u8 *)(cie + 1) + *cie;
+ uleb128_t len;
+
+ /* check if augmentation size is first (and thus present) */
+ if (*ptr != 'z')
+ return -1;
+ /* check if augmentation string is nul-terminated */
+ if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL)
+ return -1;
+ ++ptr; /* skip terminator */
+ get_uleb128(&ptr, end); /* skip code alignment */
+ get_sleb128(&ptr, end); /* skip data alignment */
+ /* skip return address column */
+ version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end);
+ len = get_uleb128(&ptr, end); /* augmentation length */
+ if (ptr + len < ptr || ptr + len > end)
+ return -1;
+ end = ptr + len;
+ while (*++aug) {
+ if (ptr >= end)
+ return -1;
+ switch (*aug) {
+ case 'L':
+ ++ptr;
+ break;
+ case 'P':{
+ signed ptrType = *ptr++;
+
+ if (!read_pointer(&ptr, end, ptrType) || ptr > end)
+ return -1;
+ }
+ break;
+ case 'R':
+ return *ptr;
+ default:
+ return -1;
+ }
+ }
+ }
+ return DW_EH_PE_absptr;
+}
+
+static int advance_loc(unsigned long delta, struct unwind_state *state)
+{
+ state->loc += delta * state->codeAlign;
+ dbug_unwind(1, "state->loc=%lx\n", state->loc);
+ return delta > 0;
+}
+
+static void set_rule(uleb128_t reg, enum item_location where, uleb128_t value, struct unwind_state *state)
+{
+ dbug_unwind(1, "reg=%d, where=%d, value=%lx\n", reg, where, value);
+ if (reg < ARRAY_SIZE(state->regs)) {
+ state->regs[reg].where = where;
+ state->regs[reg].value = value;
+ }
+}
+
+static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc, signed ptrType, struct unwind_state *state)
+{
+ union {
+ const u8 *p8;
+ const u16 *p16;
+ const u32 *p32;
+ } ptr;
+ int result = 1;
+
+ dbug_unwind(1, "targetLoc=%lx state->loc=%lx\n", targetLoc, state->loc);
+ if (start != state->cieStart) {
+ state->loc = state->org;
+ result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state);
+ if (targetLoc == 0 && state->label == NULL)
+ return result;
+ }
+
+ for (ptr.p8 = start; result && ptr.p8 < end;) {
+ switch (*ptr.p8 >> 6) {
+ uleb128_t value;
+ case 0:
+ switch (*ptr.p8++) {
+ case DW_CFA_nop:
+ dbug_unwind(1, "DW_CFA_nop\n");
+ break;
+ case DW_CFA_set_loc:
+ if ((state->loc = read_pointer(&ptr.p8, end, ptrType)) == 0)
+ result = 0;
+ dbug_unwind(1, "DW_CFA_set_loc %lx (result=%d)\n", state->loc, result);
+ break;
+ case DW_CFA_advance_loc1:
+ result = ptr.p8 < end && advance_loc(*ptr.p8++, state);
+ dbug_unwind(1, "DW_CFA_advance_loc1 %d\n", result);
+ break;
+ case DW_CFA_advance_loc2:
+ result = ptr.p8 <= end + 2 && advance_loc(*ptr.p16++, state);
+ dbug_unwind(1, "DW_CFA_advance_loc2 %d\n", result);
+ break;
+ case DW_CFA_advance_loc4:
+ result = ptr.p8 <= end + 4 && advance_loc(*ptr.p32++, state);
+ dbug_unwind(1, "DW_CFA_advance_loc4 %d\n", result);
+ break;
+ case DW_CFA_offset_extended:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
+ dbug_unwind(1, "DW_CFA_offset_extended\n");
+ break;
+ case DW_CFA_val_offset:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Value, get_uleb128(&ptr.p8, end), state);
+ dbug_unwind(1, "DW_CFA_val_offset\n");
+ break;
+ case DW_CFA_offset_extended_sf:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Memory, get_sleb128(&ptr.p8, end), state);
+ dbug_unwind(1, "DW_CFA_offset_extended_sf\n");
+ break;
+ case DW_CFA_val_offset_sf:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Value, get_sleb128(&ptr.p8, end), state);
+ dbug_unwind(1, "DW_CFA_val_offset_sf\n");
+ break;
+ case DW_CFA_restore_extended:
+ case DW_CFA_undefined:
+ case DW_CFA_same_value:
+ set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state);
+ dbug_unwind(1, "DW_CFA_undefined\n");
+ break;
+ case DW_CFA_register:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Register, get_uleb128(&ptr.p8, end), state);
+ dbug_unwind(1, "DW_CFA_register\n");
+ break;
+ case DW_CFA_remember_state:
+ dbug_unwind(1, "DW_CFA_remember_state\n");
+ if (ptr.p8 == state->label) {
+ state->label = NULL;
+ return 1;
+ }
+ if (state->stackDepth >= MAX_STACK_DEPTH)
+ return 0;
+ state->stack[state->stackDepth++] = ptr.p8;
+ break;
+ case DW_CFA_restore_state:
+ dbug_unwind(1, "DW_CFA_restore_state\n");
+ if (state->stackDepth) {
+ const uleb128_t loc = state->loc;
+ const u8 *label = state->label;
+
+ state->label = state->stack[state->stackDepth - 1];
+ memcpy(&state->cfa, &badCFA, sizeof(state->cfa));
+ memset(state->regs, 0, sizeof(state->regs));
+ state->stackDepth = 0;
+ result = processCFI(start, end, 0, ptrType, state);
+ state->loc = loc;
+ state->label = label;
+ } else
+ return 0;
+ break;
+ case DW_CFA_def_cfa:
+ state->cfa.reg = get_uleb128(&ptr.p8, end);
+ dbug_unwind(1, "DW_CFA_def_cfa reg=%ld\n", state->cfa.reg);
+ /*nobreak */
+ case DW_CFA_def_cfa_offset:
+ state->cfa.offs = get_uleb128(&ptr.p8, end);
+ dbug_unwind(1, "DW_CFA_def_cfa_offset offs=%lx\n", state->cfa.offs);
+ break;
+ case DW_CFA_def_cfa_sf:
+ state->cfa.reg = get_uleb128(&ptr.p8, end);
+ dbug_unwind(1, "DW_CFA_def_cfa_sf reg=%ld\n", state->cfa.reg);
+ /*nobreak */
+ case DW_CFA_def_cfa_offset_sf:
+ state->cfa.offs = get_sleb128(&ptr.p8, end) * state->dataAlign;
+ dbug_unwind(1, "DW_CFA_def_cfa_offset_sf offs=%lx\n", state->cfa.offs);
+ break;
+ case DW_CFA_def_cfa_register:
+ state->cfa.reg = get_uleb128(&ptr.p8, end);
+ dbug_unwind(1, "DW_CFA_def_cfa_register reg=%ld\n", state->cfa.reg);
+ break;
+ /*todo case DW_CFA_def_cfa_expression: */
+ /*todo case DW_CFA_expression: */
+ /*todo case DW_CFA_val_expression: */
+ case DW_CFA_GNU_args_size:
+ get_uleb128(&ptr.p8, end);
+ dbug_unwind(1, "DW_CFA_GNU_args_size\n");
+ break;
+ case DW_CFA_GNU_negative_offset_extended:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Memory, (uleb128_t)0 - get_uleb128(&ptr.p8, end), state);
+ dbug_unwind(1, "DW_CFA_GNU_negative_offset_extended\n");
+ break;
+ case DW_CFA_GNU_window_save:
+ default:
+ dbug_unwind(1, "unimplemented call frame instruction: 0x%x\n", *(ptr.p8 - 1));
+ result = 0;
+ break;
+ }
+ break;
+ case 1:
+ result = advance_loc(*ptr.p8++ & 0x3f, state);
+ dbug_unwind(1, "case 1\n");
+ break;
+ case 2:
+ value = *ptr.p8++ & 0x3f;
+ set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
+ dbug_unwind(1, "case 2\n");
+ break;
+ case 3:
+ set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
+ dbug_unwind(1, "case 3\n");
+ break;
+ }
+ dbug_unwind(1, "targetLoc=%lx state->loc=%lx\n", targetLoc, state->loc);
+ if (ptr.p8 > end)
+ result = 0;
+ if (result && targetLoc != 0 && targetLoc < state->loc)
+ return 1;
+ }
+ return result && ptr.p8 == end && (targetLoc == 0 || state->label == NULL);
+}
+
+/* If we previously created an unwind header, then use it now to binary search */
+/* for the FDE corresponding to pc. */
+
+static u32 *_stp_search_unwind_hdr(unsigned long pc, struct _stp_module *m)
+{
+ const u8 *ptr, *end, *hdr = m->unwind_hdr;
+ unsigned long startLoc;
+ u32 *fde = NULL;
+ unsigned num, tableSize, t2;
+
+ if (hdr == NULL || hdr[0] != 1)
+ return NULL;
+
+ dbug_unwind(1, "search for %lx", pc);
+
+ /* table_enc */
+ switch (hdr[3] & DW_EH_PE_FORM) {
+ case DW_EH_PE_absptr:
+ tableSize = sizeof(unsigned long);
+ break;
+ case DW_EH_PE_data2:
+ tableSize = 2;
+ break;
+ case DW_EH_PE_data4:
+ tableSize = 4;
+ break;
+ case DW_EH_PE_data8:
+ tableSize = 8;
+ break;
+ default:
+ dbug_unwind(1, "bad table encoding");
+ return NULL;
+ }
+ ptr = hdr + 4;
+ end = hdr + m->unwind_hdr_len;
+
+ if (read_pointer(&ptr, end, hdr[1]) != (unsigned long)m->unwind_data) {
+ dbug_unwind(1, "eh_frame_ptr not valid");
+ return NULL;
+ }
+
+ num = read_pointer(&ptr, end, hdr[2]);
+ if (num == 0 || num != (end - ptr) / (2 * tableSize) || (end - ptr) % (2 * tableSize)) {
+ dbug_unwind(1, "Bad num=%d end-ptr=%ld 2*tableSize=%d", num, end - ptr, 2 * tableSize);
+ return NULL;
+ }
+
+ do {
+ const u8 *cur = ptr + (num / 2) * (2 * tableSize);
+ startLoc = read_pointer(&cur, cur + tableSize, hdr[3]);
+ if (pc < startLoc)
+ num /= 2;
+ else {
+ ptr = cur - tableSize;
+ num = (num + 1) / 2;
+ }
+ } while (startLoc && num > 1);
+
+ if (num == 1 && (startLoc = read_pointer(&ptr, ptr + tableSize, hdr[3])) != 0 && pc >= startLoc)
+ fde = (void *)read_pointer(&ptr, ptr + tableSize, hdr[3]);
+
+ dbug_unwind(1, "returning fde=%lx startLoc=%lx", fde, startLoc);
+ return fde;
+}
+
+#ifdef DEBUG_UNWIND
+static const char *_stp_enc_hi_name[] = {
+ "",
+ "DW_EH_PE_pcrel",
+ "DW_EH_PE_textrel",
+ "DW_EH_PE_datarel",
+ "DW_EH_PE_funcrel",
+ "DW_EH_PE_aligned"
+};
+static const char *_stp_enc_lo_name[] = {
+ "DW_EH_PE_absptr",
+ "DW_EH_PE_uleb128",
+ "DW_EH_PE_udata2",
+ "DW_EH_PE_udata4",
+ "DW_EH_PE_udata8",
+ "DW_EH_PE_sleb128",
+ "DW_EH_PE_sdata2",
+ "DW_EH_PE_sdata4",
+ "DW_EH_PE_sdata8"
+};
+char *_stp_eh_enc_name(signed type)
+{
+ static char buf[64];
+ int hi, low;
+ if (type == DW_EH_PE_omit)
+ return "DW_EH_PE_omit";
+
+ hi = (type & DW_EH_PE_ADJUST) >> 4;
+ low = type & DW_EH_PE_FORM;
+ if (hi > 5 || low > 4 || (low == 0 && (type & DW_EH_PE_signed))) {
+ sprintf(buf, "ERROR:encoding=0x%x", type);
+ return buf;
+ }
+
+ buf[0] = 0;
+ if (type & DW_EH_PE_indirect)
+ strlcpy(buf, "DW_EH_PE_indirect|", sizeof(buf));
+ if (hi)
+ strlcat(buf, _stp_enc_hi_name[hi], sizeof(buf));
+
+ if (type & DW_EH_PE_signed)
+ low += 4;
+ strlcat(buf, _stp_enc_lo_name[low], sizeof(buf));
+ return buf;
+}
+#endif /* DEBUG_UNWIND */
+
+/* Unwind to previous to frame. Returns 0 if successful, negative
+ * number in case of an error. A positive return means unwinding is finished;
+ * don't try to fallback to dumping addresses on the stack. */
+int unwind(struct unwind_frame_info *frame)
+{
+#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
+ const u32 *fde, *cie = NULL;
+ const u8 *ptr = NULL, *end = NULL;
+ unsigned long pc = UNW_PC(frame) - frame->call_frame;
+ unsigned long tableSize, startLoc = 0, endLoc = 0, cfa;
+ unsigned i;
+ signed ptrType = -1;
+ uleb128_t retAddrReg = 0;
+ struct _stp_module *m;
+ struct unwind_state state;
+
+ dbug_unwind(1, "pc=%lx, %lx", pc, UNW_PC(frame));
+
+ if (UNW_PC(frame) == 0)
+ return -EINVAL;
+
+ m = _stp_get_unwind_info(pc);
+ if (unlikely(m == NULL)) {
+ dbug_unwind(1, "No module found for pc=%lx", pc);
+ return -EINVAL;
+ }
+
+ if (unlikely(m->unwind_data_len == 0 || m->unwind_data_len & (sizeof(*fde) - 1))) {
+ dbug_unwind(1, "Module %s: unwind_data_len=%d", m->name, m->unwind_data_len);
+ goto err;
+ }
+
+ fde = _stp_search_unwind_hdr(pc, m);
+ dbug_unwind(1, "%s: fde=%lx\n", m->name, fde);
+
+ /* found the fde, now set startLoc and endLoc */
+ if (fde != NULL) {
+ cie = cie_for_fde(fde, m);
+ if (likely(cie != NULL && cie != &bad_cie && cie != &not_fde)) {
+ ptr = (const u8 *)(fde + 2);
+ ptrType = fde_pointer_type(cie);
+ startLoc = read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType);
+ dbug_unwind(2, "startLoc=%lx, ptrType=%s", startLoc, _stp_eh_enc_name(ptrType));
+ if (!(ptrType & DW_EH_PE_indirect))
+ ptrType &= DW_EH_PE_FORM | DW_EH_PE_signed;
+ endLoc = startLoc + read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType);
+ if (pc > endLoc) {
+ dbug_unwind(1, "pc (%lx) > endLoc(%lx)\n", pc, endLoc);
+ goto done;
+ }
+ } else {
+ dbug_unwind(1, "fde found in header, but cie is bad!\n");
+ fde = NULL;
+ }
+ }
+
+ /* did not a good fde find with binary search, so do slow linear search */
+ if (fde == NULL) {
+ for (fde = m->unwind_data, tableSize = m->unwind_data_len; cie = NULL, tableSize > sizeof(*fde)
+ && tableSize - sizeof(*fde) >= *fde; tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+ dbug_unwind(3, "fde=%lx tableSize=%d\n", (long)*fde, (int)tableSize);
+ cie = cie_for_fde(fde, m);
+ if (cie == &bad_cie) {
+ cie = NULL;
+ break;
+ }
+ if (cie == NULL || cie == &not_fde || (ptrType = fde_pointer_type(cie)) < 0)
+ continue;
+
+ ptr = (const u8 *)(fde + 2);
+ startLoc = read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType);
+ dbug_unwind(2, "startLoc=%lx, ptrType=%s", startLoc, _stp_eh_enc_name(ptrType));
+ if (!startLoc)
+ continue;
+ if (!(ptrType & DW_EH_PE_indirect))
+ ptrType &= DW_EH_PE_FORM | DW_EH_PE_signed;
+ endLoc = startLoc + read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType);
+ dbug_unwind(3, "endLoc=%lx\n", endLoc);
+ if (pc >= startLoc && pc < endLoc)
+ break;
+ }
+ }
+
+ dbug_unwind(1, "cie=%lx fde=%lx startLoc=%lx endLoc=%lx\n", cie, fde, startLoc, endLoc);
+ if (cie == NULL || fde == NULL)
+ goto err;
+
+ /* found the CIE and FDE */
+
+ memset(&state, 0, sizeof(state));
+ state.cieEnd = ptr; /* keep here temporarily */
+ ptr = (const u8 *)(cie + 2);
+ end = (const u8 *)(cie + 1) + *cie;
+ frame->call_frame = 1;
+ if ((state.version = *ptr) != 1) {
+ dbug_unwind(1, "CIE version number is %d. 1 is supported.\n", state.version);
+ goto err; /* unsupported version */
+ }
+ if (*++ptr) {
+ /* check if augmentation size is first (and thus present) */
+ if (*ptr == 'z') {
+ while (++ptr < end && *ptr) {
+ switch (*ptr) {
+ /* check for ignorable (or already handled)
+ * nul-terminated augmentation string */
+ case 'L':
+ case 'P':
+ case 'R':
+ continue;
+ case 'S':
+ dbug_unwind(1, "This is a signal frame\n");
+ frame->call_frame = 0;
+ continue;
+ default:
+ break;
+ }
+ break;
+ }
+ }
+ if (ptr >= end || *ptr) {
+ dbug_unwind(1, "Problem parsing the augmentation string.\n");
+ goto err;
+ }
+ }
+ ++ptr;
+
+ /* get code aligment factor */
+ state.codeAlign = get_uleb128(&ptr, end);
+ /* get data aligment factor */
+ state.dataAlign = get_sleb128(&ptr, end);
+ if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
+ goto err;;
+
+ retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
+
+ /* skip augmentation */
+ if (((const char *)(cie + 2))[1] == 'z') {
+ uleb128_t augSize = get_uleb128(&ptr, end);
+ ptr += augSize;
+ }
+ if (ptr > end || retAddrReg >= ARRAY_SIZE(reg_info)
+ || REG_INVALID(retAddrReg)
+ || reg_info[retAddrReg].width != sizeof(unsigned long))
+ goto err;
+
+ state.cieStart = ptr;
+ ptr = state.cieEnd;
+ state.cieEnd = end;
+ end = (const u8 *)(fde + 1) + *fde;
+
+ /* skip augmentation */
+ if (((const char *)(cie + 2))[1] == 'z') {
+ uleb128_t augSize = get_uleb128(&ptr, end);
+ if ((ptr += augSize) > end)
+ goto err;
+ }
+
+ state.org = startLoc;
+ memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
+ /* process instructions */
+ if (!processCFI(ptr, end, pc, ptrType, &state)
+ || state.loc > endLoc || state.regs[retAddrReg].where == Nowhere || state.cfa.reg >= ARRAY_SIZE(reg_info)
+ || reg_info[state.cfa.reg].width != sizeof(unsigned long)
+ || state.cfa.offs % sizeof(unsigned long))
+ goto err;
+
+ /* update frame */
+#ifndef CONFIG_AS_CFI_SIGNAL_FRAME
+ if (frame->call_frame && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign))
+ frame->call_frame = 0;
+#endif
+ cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
+ startLoc = min((unsigned long)UNW_SP(frame), cfa);
+ endLoc = max((unsigned long)UNW_SP(frame), cfa);
+ dbug_unwind(1, "cfa=%lx startLoc=%lx, endLoc=%lx\n", cfa, startLoc, endLoc);
+ if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
+ startLoc = min(STACK_LIMIT(cfa), cfa);
+ endLoc = max(STACK_LIMIT(cfa), cfa);
+ dbug_unwind(1, "cfa startLoc=%p, endLoc=%p\n", (u64)startLoc, (u64)endLoc);
+ }
+#ifndef CONFIG_64BIT
+# define CASES CASE(8); CASE(16); CASE(32)
+#else
+# define CASES CASE(8); CASE(16); CASE(32); CASE(64)
+#endif
+ dbug_unwind(1, "cie=%lx fde=%lx\n", cie, fde);
+ for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
+ if (REG_INVALID(i)) {
+ if (state.regs[i].where == Nowhere)
+ continue;
+ dbug_unwind(2, "REG_INVALID %d\n", i);
+ goto err;
+ }
+ dbug_unwind(2, "register %d. where=%d\n", i, state.regs[i].where);
+ switch (state.regs[i].where) {
+ default:
+ break;
+ case Register:
+ if (state.regs[i].value >= ARRAY_SIZE(reg_info)
+ || REG_INVALID(state.regs[i].value)
+ || reg_info[i].width > reg_info[state.regs[i].value].width) {
+ dbug_unwind(2, "case Register bad\n");
+ goto err;
+ }
+ switch (reg_info[state.regs[i].value].width) {
+#define CASE(n) \
+ case sizeof(u##n): \
+ state.regs[i].value = FRAME_REG(state.regs[i].value, \
+ const u##n); \
+ break
+ CASES;
+#undef CASE
+ default:
+ dbug_unwind(2, "default\n");
+ goto err;
+ }
+ break;
+ }
+ }
+ for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
+ dbug_unwind(2, "register %d. invalid=%d\n", i, REG_INVALID(i));
+ if (REG_INVALID(i))
+ continue;
+ dbug_unwind(2, "register %d. where=%d\n", i, state.regs[i].where);
+ switch (state.regs[i].where) {
+ case Nowhere:
+ if (reg_info[i].width != sizeof(UNW_SP(frame))
+ || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
+ != &UNW_SP(frame))
+ continue;
+ UNW_SP(frame) = cfa;
+ break;
+ case Register:
+ switch (reg_info[i].width) {
+#define CASE(n) case sizeof(u##n): \
+ FRAME_REG(i, u##n) = state.regs[i].value; \
+ break
+ CASES;
+#undef CASE
+ default:
+ dbug_unwind(2, "default\n");
+ goto err;
+ }
+ break;
+ case Value:
+ if (reg_info[i].width != sizeof(unsigned long)) {
+ dbug_unwind(2, "Value\n");
+ goto err;
+ }
+ FRAME_REG(i, unsigned long) = cfa + state.regs[i].value * state.dataAlign;
+ break;
+ case Memory:{
+ unsigned long addr = cfa + state.regs[i].value * state.dataAlign;
+ dbug_unwind(2, "addr=%lx width=%d\n", addr, reg_info[i].width);
+ switch (reg_info[i].width) {
+#define CASE(n) case sizeof(u##n): \
+ if (unlikely(_stp_read_address(FRAME_REG(i, u##n), (u##n *)addr, KERNEL_DS))) \
+ goto copy_failed;\
+ dbug_unwind(1, "set register %d to %lx\n", i, (long)FRAME_REG(i,u##n));\
+ break
+ CASES;
+#undef CASE
+ default:
+ dbug_unwind(2, "default\n");
+ goto err;
+ }
+ }
+ break;
+ }
+ }
+ read_unlock(&m->lock);
+ dbug_unwind(1, "returning 0 (%lx)\n", UNW_PC(frame));
+ return 0;
+
+copy_failed:
+ dbug_unwind(1, "_stp_read_address failed to access memory\n");
+err:
+ read_unlock(&m->lock);
+ return -EIO;
+
+done:
+ /* PC was in a range convered by a module but no unwind info */
+ /* found for the specific PC. This seems to happen only for kretprobe */
+ /* trampolines and at the end of interrupt backtraces. */
+ read_unlock(&m->lock);
+ return 1;
+#undef CASES
+#undef FRAME_REG
+}
+
+
+#endif /* STP_USE_DWARF_UNWINDER */
diff --git a/runtime/unwind/i386.h b/runtime/unwind/i386.h
new file mode 100644
index 00000000..79e6ba73
--- /dev/null
+++ b/runtime/unwind/i386.h
@@ -0,0 +1,135 @@
+/* -*- linux-c -*-
+ *
+ * 32-bit x86 dwarf unwinder header file
+ * Copyright (C) 2008 Red Hat Inc.
+ * Copyright (C) 2002-2006 Novell, Inc.
+ *
+ * This file is part of systemtap, and is free software. You can
+ * redistribute it and/or modify it under the terms of the GNU General
+ * Public License (GPL); either version 2, or (at your option) any
+ * later version.
+ */
+#ifndef _STP_I386_UNWIND_H
+#define _STP_I386_UNWIND_H
+
+#include <linux/sched.h>
+#include <asm/fixmap.h>
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
+
+/* these are simple for i386 */
+#define _stp_get_unaligned(ptr) (*(ptr))
+#define _stp_put_unaligned(val, ptr) ((void)( *(ptr) = (val) ))
+
+struct unwind_frame_info
+{
+ struct pt_regs regs;
+ struct task_struct *task;
+ unsigned call_frame:1;
+};
+
+#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1))
+
+#ifdef STAPCONF_X86_UNIREGS
+
+#define UNW_PC(frame) (frame)->regs.ip
+#define UNW_SP(frame) (frame)->regs.sp
+
+#define UNW_REGISTER_INFO \
+ PTREGS_INFO(ax), \
+ PTREGS_INFO(cx), \
+ PTREGS_INFO(dx), \
+ PTREGS_INFO(bx), \
+ PTREGS_INFO(sp), \
+ PTREGS_INFO(bp), \
+ PTREGS_INFO(si), \
+ PTREGS_INFO(di), \
+ PTREGS_INFO(ip)
+
+#else /* !STAPCONF_X86_UNIREGS */
+
+#define UNW_PC(frame) (frame)->regs.eip
+#define UNW_SP(frame) (frame)->regs.esp
+
+#define UNW_REGISTER_INFO \
+ PTREGS_INFO(eax), \
+ PTREGS_INFO(ecx), \
+ PTREGS_INFO(edx), \
+ PTREGS_INFO(ebx), \
+ PTREGS_INFO(esp), \
+ PTREGS_INFO(ebp), \
+ PTREGS_INFO(esi), \
+ PTREGS_INFO(edi), \
+ PTREGS_INFO(eip)
+
+#endif /* STAPCONF_X86_UNIREGS */
+
+#define UNW_DEFAULT_RA(raItem, dataAlign) \
+ ((raItem).where == Memory && \
+ !((raItem).value * (dataAlign) + 4))
+
+static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
+ /*const*/ struct pt_regs *regs)
+{
+ if (user_mode_vm(regs))
+ info->regs = *regs;
+ else {
+#ifdef STAPCONF_X86_UNIREGS
+ memcpy(&info->regs, regs, offsetof(struct pt_regs, sp));
+ info->regs.sp = (unsigned long)&regs->sp;
+ info->regs.ss = __KERNEL_DS;
+#else
+ memcpy(&info->regs, regs, offsetof(struct pt_regs, esp));
+ info->regs.esp = (unsigned long)&regs->esp;
+ info->regs.xss = __KERNEL_DS;
+#endif
+
+ }
+ info->call_frame = 1;
+}
+
+static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
+{
+ memset(&info->regs, 0, sizeof(info->regs));
+#ifdef STAPCONF_X86_UNIREGS
+ info->regs.ip = info->task->thread.ip;
+ info->regs.cs = __KERNEL_CS;
+ __get_user(info->regs.bp, (long *)info->task->thread.sp);
+ info->regs.sp = info->task->thread.sp;
+ info->regs.ss = __KERNEL_DS;
+ info->regs.ds = __USER_DS;
+ info->regs.es = __USER_DS;
+#else
+ info->regs.eip = info->task->thread.eip;
+ info->regs.xcs = __KERNEL_CS;
+ __get_user(info->regs.ebp, (long *)info->task->thread.esp);
+ info->regs.esp = info->task->thread.esp;
+ info->regs.xss = __KERNEL_DS;
+ info->regs.xds = __USER_DS;
+ info->regs.xes = __USER_DS;
+#endif
+
+}
+
+
+static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
+{
+#if 0 /* This can only work when selector register and EFLAGS saves/restores
+ are properly annotated (and tracked in UNW_REGISTER_INFO). */
+ return user_mode_vm(&info->regs);
+#else
+#ifdef STAPCONF_X86_UNIREGS
+ return info->regs.ip < PAGE_OFFSET
+ || (info->regs.ip >= __fix_to_virt(FIX_VDSO)
+ && info->regs.ip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
+ || info->regs.sp < PAGE_OFFSET;
+#else
+ return info->regs.eip < PAGE_OFFSET
+ || (info->regs.eip >= __fix_to_virt(FIX_VDSO)
+ && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
+ || info->regs.esp < PAGE_OFFSET;
+#endif
+#endif
+}
+
+#endif /* _STP_I386_UNWIND_H */
diff --git a/runtime/unwind/unwind.h b/runtime/unwind/unwind.h
new file mode 100644
index 00000000..ae5e75d3
--- /dev/null
+++ b/runtime/unwind/unwind.h
@@ -0,0 +1,146 @@
+/* -*- linux-c -*-
+ *
+ * dwarf unwinder header file
+ * Copyright (C) 2008 Red Hat Inc.
+ * Copyright (C) 2002-2006 Novell, Inc.
+ *
+ * This file is part of systemtap, and is free software. You can
+ * redistribute it and/or modify it under the terms of the GNU General
+ * Public License (GPL); either version 2, or (at your option) any
+ * later version.
+ */
+
+#ifndef _STP_UNWIND_H_
+#define _STP_UNWIND_H_
+
+#ifdef STP_USE_DWARF_UNWINDER
+
+#if defined (__x86_64__)
+#include "x86_64.h"
+#elif defined (__i386__)
+#include "i386.h"
+#else
+#error "Unsupported dwarf unwind architecture"
+#endif
+
+#define MAX_STACK_DEPTH 8
+
+#define EXTRA_INFO(f) { \
+ BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
+ % FIELD_SIZEOF(struct unwind_frame_info, f)) \
+ + offsetof(struct unwind_frame_info, f) \
+ / FIELD_SIZEOF(struct unwind_frame_info, f), \
+ FIELD_SIZEOF(struct unwind_frame_info, f) \
+ }
+#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
+
+static const struct {
+ unsigned offs:BITS_PER_LONG / 2;
+ unsigned width:BITS_PER_LONG / 2;
+} reg_info[] = {
+ UNW_REGISTER_INFO
+};
+
+#undef PTREGS_INFO
+#undef EXTRA_INFO
+
+#ifndef REG_INVALID
+#define REG_INVALID(r) (reg_info[r].width == 0)
+#endif
+
+#define DW_CFA_nop 0x00
+#define DW_CFA_set_loc 0x01
+#define DW_CFA_advance_loc1 0x02
+#define DW_CFA_advance_loc2 0x03
+#define DW_CFA_advance_loc4 0x04
+#define DW_CFA_offset_extended 0x05
+#define DW_CFA_restore_extended 0x06
+#define DW_CFA_undefined 0x07
+#define DW_CFA_same_value 0x08
+#define DW_CFA_register 0x09
+#define DW_CFA_remember_state 0x0a
+#define DW_CFA_restore_state 0x0b
+#define DW_CFA_def_cfa 0x0c
+#define DW_CFA_def_cfa_register 0x0d
+#define DW_CFA_def_cfa_offset 0x0e
+#define DW_CFA_def_cfa_expression 0x0f
+#define DW_CFA_expression 0x10
+#define DW_CFA_offset_extended_sf 0x11
+#define DW_CFA_def_cfa_sf 0x12
+#define DW_CFA_def_cfa_offset_sf 0x13
+#define DW_CFA_val_offset 0x14
+#define DW_CFA_val_offset_sf 0x15
+#define DW_CFA_val_expression 0x16
+#define DW_CFA_lo_user 0x1c
+#define DW_CFA_GNU_window_save 0x2d
+#define DW_CFA_GNU_args_size 0x2e
+#define DW_CFA_GNU_negative_offset_extended 0x2f
+#define DW_CFA_hi_user 0x3f
+
+#define DW_EH_PE_absptr 0x00
+#define DW_EH_PE_leb128 0x01
+#define DW_EH_PE_data2 0x02
+#define DW_EH_PE_data4 0x03
+#define DW_EH_PE_data8 0x04
+#define DW_EH_PE_FORM 0x07 /* mask */
+#define DW_EH_PE_signed 0x08 /* signed versions of above have this bit set */
+
+#define DW_EH_PE_pcrel 0x10
+#define DW_EH_PE_textrel 0x20
+#define DW_EH_PE_datarel 0x30
+#define DW_EH_PE_funcrel 0x40
+#define DW_EH_PE_aligned 0x50
+#define DW_EH_PE_ADJUST 0x70 /* mask */
+#define DW_EH_PE_indirect 0x80
+#define DW_EH_PE_omit 0xff
+
+typedef unsigned long uleb128_t;
+typedef signed long sleb128_t;
+
+static struct unwind_table {
+ unsigned long pc; /* text */
+ unsigned long range; /* text_size */
+ const void *address; /* unwind_data */
+ unsigned long size; /* unwind_data_len */
+ const unsigned char *header; /* unwind_header */
+ unsigned long hdrsz;
+ struct unwind_table *link;
+ const char *name; /* module name */
+} root_table;
+
+struct unwind_item {
+ enum item_location {
+ Nowhere,
+ Memory,
+ Register,
+ Value
+ } where;
+ uleb128_t value;
+};
+
+struct unwind_state {
+ uleb128_t loc, org;
+ const u8 *cieStart, *cieEnd;
+ uleb128_t codeAlign;
+ sleb128_t dataAlign;
+ struct cfa {
+ uleb128_t reg, offs;
+ } cfa;
+ struct unwind_item regs[ARRAY_SIZE(reg_info)];
+ unsigned stackDepth:8;
+ unsigned version:8;
+ const u8 *label;
+ const u8 *stack[MAX_STACK_DEPTH];
+};
+
+static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
+static unsigned long read_pointer(const u8 **pLoc,
+ const void *end,
+ signed ptrType);
+static const u32 bad_cie, not_fde;
+static const u32 *cie_for_fde(const u32 *fde, const struct _stp_module *);
+static signed fde_pointer_type(const u32 *cie);
+
+
+#endif /* STP_USE_DWARF_UNWINDER */
+#endif /*_STP_UNWIND_H_*/
diff --git a/runtime/unwind/x86_64.h b/runtime/unwind/x86_64.h
new file mode 100644
index 00000000..5eb3a58f
--- /dev/null
+++ b/runtime/unwind/x86_64.h
@@ -0,0 +1,150 @@
+/* -*- linux-c -*-
+ *
+ * x86_64 dwarf unwinder header file
+ * Copyright (C) 2008 Red Hat Inc.
+ * Copyright (C) 2002-2006 Novell, Inc.
+ *
+ * This file is part of systemtap, and is free software. You can
+ * redistribute it and/or modify it under the terms of the GNU General
+ * Public License (GPL); either version 2, or (at your option) any
+ * later version.
+ */
+#ifndef _STP_X86_64_UNWIND_H
+#define _STP_X86_64_UNWIND_H
+
+/*
+ * Copyright (C) 2002-2006 Novell, Inc.
+ * Jan Beulich <jbeulich@novell.com>
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+#include <asm/vsyscall.h>
+
+/* these are simple for x86_64 */
+#define _stp_get_unaligned(ptr) (*(ptr))
+#define _stp_put_unaligned(val, ptr) ((void)( *(ptr) = (val) ))
+
+struct unwind_frame_info
+{
+ struct pt_regs regs;
+ struct task_struct *task;
+ unsigned call_frame:1;
+};
+
+#ifdef STAPCONF_X86_UNIREGS
+#define UNW_PC(frame) (frame)->regs.ip
+#define UNW_SP(frame) (frame)->regs.sp
+#else
+#define UNW_PC(frame) (frame)->regs.rip
+#define UNW_SP(frame) (frame)->regs.rsp
+#endif /* STAPCONF_X86_UNIREGS */
+
+#if 0 /* STP_USE_FRAME_POINTER */
+/* Frame pointers not implemented in x86_64 currently */
+#define UNW_FP(frame) (frame)->regs.rbp
+#define FRAME_RETADDR_OFFSET 8
+#define FRAME_LINK_OFFSET 0
+#define STACK_BOTTOM(tsk) (((tsk)->thread.rsp0 - 1) & ~(THREAD_SIZE - 1))
+#define STACK_TOP(tsk) ((tsk)->thread.rsp0)
+#endif
+
+/* Might need to account for the special exception and interrupt handling
+ stacks here, since normally
+ EXCEPTION_STACK_ORDER < THREAD_ORDER < IRQSTACK_ORDER,
+ but the construct is needed only for getting across the stack switch to
+ the interrupt stack - thus considering the IRQ stack itself is unnecessary,
+ and the overhead of comparing against all exception handling stacks seems
+ not desirable. */
+#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1))
+
+#ifdef STAPCONF_X86_UNIREGS
+#define UNW_REGISTER_INFO \
+ PTREGS_INFO(ax), \
+ PTREGS_INFO(dx), \
+ PTREGS_INFO(cx), \
+ PTREGS_INFO(bx), \
+ PTREGS_INFO(si), \
+ PTREGS_INFO(di), \
+ PTREGS_INFO(bp), \
+ PTREGS_INFO(sp), \
+ PTREGS_INFO(r8), \
+ PTREGS_INFO(r9), \
+ PTREGS_INFO(r10), \
+ PTREGS_INFO(r11), \
+ PTREGS_INFO(r12), \
+ PTREGS_INFO(r13), \
+ PTREGS_INFO(r14), \
+ PTREGS_INFO(r15), \
+ PTREGS_INFO(ip)
+#else
+#define UNW_REGISTER_INFO \
+ PTREGS_INFO(rax), \
+ PTREGS_INFO(rdx), \
+ PTREGS_INFO(rcx), \
+ PTREGS_INFO(rbx), \
+ PTREGS_INFO(rsi), \
+ PTREGS_INFO(rdi), \
+ PTREGS_INFO(rbp), \
+ PTREGS_INFO(rsp), \
+ PTREGS_INFO(r8), \
+ PTREGS_INFO(r9), \
+ PTREGS_INFO(r10), \
+ PTREGS_INFO(r11), \
+ PTREGS_INFO(r12), \
+ PTREGS_INFO(r13), \
+ PTREGS_INFO(r14), \
+ PTREGS_INFO(r15), \
+ PTREGS_INFO(rip)
+#endif /* STAPCONF_X86_UNIREGS */
+
+#define UNW_DEFAULT_RA(raItem, dataAlign) \
+ ((raItem).where == Memory && \
+ !((raItem).value * (dataAlign) + 8))
+
+static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
+ /*const*/ struct pt_regs *regs)
+{
+ info->regs = *regs;
+ info->call_frame = 1;
+}
+
+static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
+{
+ extern const char thread_return[];
+
+ memset(&info->regs, 0, sizeof(info->regs));
+ info->regs.cs = __KERNEL_CS;
+ info->regs.ss = __KERNEL_DS;
+
+#ifdef STAPCONF_X86_UNIREGS
+ info->regs.ip = (unsigned long)thread_return;
+ __get_user(info->regs.bp, (unsigned long *)info->task->thread.sp);
+ info->regs.sp = info->task->thread.sp;
+#else
+ info->regs.rip = (unsigned long)thread_return;
+ __get_user(info->regs.rbp, (unsigned long *)info->task->thread.rsp);
+ info->regs.rsp = info->task->thread.rsp;
+#endif
+}
+
+static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
+{
+#if 0 /* This can only work when selector register saves/restores
+ are properly annotated (and tracked in UNW_REGISTER_INFO). */
+ return user_mode(&info->regs);
+#else
+#ifdef STAPCONF_X86_UNIREGS
+ return (long)info->regs.ip >= 0
+ || (info->regs.ip >= VSYSCALL_START && info->regs.ip < VSYSCALL_END)
+ || (long)info->regs.sp >= 0;
+#else
+ return (long)info->regs.rip >= 0
+ || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END)
+ || (long)info->regs.rsp >= 0;
+#endif
+#endif
+}
+
+#endif /* _STP_X86_64_UNWIND_H */
diff --git a/runtime/vsprintf.c b/runtime/vsprintf.c
index 0bf625a5..4ffcf72e 100644
--- a/runtime/vsprintf.c
+++ b/runtime/vsprintf.c
@@ -248,6 +248,11 @@ int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
++str;
}
}
+#ifdef __ia64__
+ if ((str + precision - 1) <= end)
+ memcpy(str, &num, precision); //to prevent unaligned access
+ str += precision;
+#else
switch(precision) {
case 1:
if(str <= end)
@@ -271,6 +276,7 @@ int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
str+=8;
break;
}
+#endif
while (len < field_width--) {
if (str <= end)
*str = '\0';