summaryrefslogtreecommitdiffstats
path: root/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'runtime')
-rw-r--r--runtime/autoconf-asm-syscall.c2
-rw-r--r--runtime/autoconf-find-task-pid.c6
-rw-r--r--runtime/autoconf-x86-gs.c5
-rw-r--r--runtime/itrace.c60
-rw-r--r--runtime/loc2c-runtime.h42
-rw-r--r--runtime/map-gen.c291
-rw-r--r--runtime/map.c23
-rw-r--r--runtime/pmap-gen.c343
-rw-r--r--runtime/print.c27
-rw-r--r--runtime/ptrace_compatibility.h50
-rw-r--r--runtime/runtime.h19
-rw-r--r--runtime/stack-arm.c3
-rw-r--r--runtime/stack-i386.c25
-rw-r--r--runtime/stack-ia64.c3
-rw-r--r--runtime/stack-ppc64.c3
-rw-r--r--runtime/stack-s390.c3
-rw-r--r--runtime/stack-x86_64.c23
-rw-r--r--runtime/stack.c40
-rw-r--r--runtime/staprun/common.c123
-rw-r--r--runtime/staprun/mainloop.c70
-rw-r--r--runtime/staprun/modverify.c391
-rw-r--r--runtime/staprun/modverify.h9
-rw-r--r--runtime/staprun/relay.c149
-rw-r--r--runtime/staprun/relay_old.c138
-rw-r--r--runtime/staprun/staprun.h29
-rw-r--r--runtime/staprun/staprun_funcs.c190
-rw-r--r--runtime/sym.c220
-rw-r--r--runtime/sym.h2
-rw-r--r--runtime/syscall.h407
-rw-r--r--runtime/task_finder.c539
-rw-r--r--runtime/task_finder_map.c191
-rw-r--r--runtime/task_finder_vma.c87
-rw-r--r--runtime/transport/control.c7
-rw-r--r--runtime/transport/transport.c14
-rw-r--r--runtime/transport/transport_msgs.h6
-rw-r--r--runtime/unwind.c24
-rw-r--r--runtime/unwind/unwind.h4
-rw-r--r--runtime/uprobes/.gitignore7
-rw-r--r--runtime/uprobes/uprobes.c34
-rw-r--r--runtime/uprobes/uprobes.h6
-rw-r--r--runtime/uprobes/uprobes_i386.c40
-rw-r--r--runtime/uprobes/uprobes_x86.c22
-rw-r--r--runtime/uprobes/uprobes_x86_64.c7
-rw-r--r--runtime/uprobes2/uprobes.c52
-rw-r--r--runtime/uprobes2/uprobes.h11
-rw-r--r--runtime/uprobes2/uprobes_x86.c9
-rw-r--r--runtime/uprobes2/uprobes_x86.h13
-rw-r--r--runtime/utrace_compatibility.h14
-rw-r--r--runtime/vsprintf.c372
49 files changed, 3395 insertions, 760 deletions
diff --git a/runtime/autoconf-asm-syscall.c b/runtime/autoconf-asm-syscall.c
new file mode 100644
index 00000000..bf7a273f
--- /dev/null
+++ b/runtime/autoconf-asm-syscall.c
@@ -0,0 +1,2 @@
+#include <asm/syscall.h>
+
diff --git a/runtime/autoconf-find-task-pid.c b/runtime/autoconf-find-task-pid.c
new file mode 100644
index 00000000..549d5ac3
--- /dev/null
+++ b/runtime/autoconf-find-task-pid.c
@@ -0,0 +1,6 @@
+#include <linux/sched.h>
+
+void foo (pid_t k) {
+ struct task_struct *tsk = find_task_by_pid (k);
+ (void) tsk;
+}
diff --git a/runtime/autoconf-x86-gs.c b/runtime/autoconf-x86-gs.c
new file mode 100644
index 00000000..f4dda795
--- /dev/null
+++ b/runtime/autoconf-x86-gs.c
@@ -0,0 +1,5 @@
+#include <asm/ptrace.h>
+
+#if defined (__i386__)
+struct pt_regs regs = {.gs = 0x0};
+#endif
diff --git a/runtime/itrace.c b/runtime/itrace.c
index ed32b0bc..68f85301 100644
--- a/runtime/itrace.c
+++ b/runtime/itrace.c
@@ -1,6 +1,7 @@
/*
* user space instruction tracing
* Copyright (C) 2005, 2006, 2007, 2008, 2009 IBM Corp.
+ * Copyright (C) 2009 Red Hat Inc.
*
* This file is part of systemtap, and is free software. You can
* redistribute it and/or modify it under the terms of the GNU General
@@ -17,8 +18,16 @@
#include <linux/sched.h>
#include <linux/rcupdate.h>
#include <linux/utrace.h>
+#include "ptrace_compatibility.h"
+
+/* PR9974: Adapt to struct renaming. */
+#ifdef UTRACE_API_VERSION
+#define utrace_attached_engine utrace_engine
+#endif
+
#include <asm/string.h>
#include "uprobes/uprobes.h"
+#include "utrace_compatibility.h"
#ifndef put_task_struct
#define put_task_struct(t) \
@@ -55,7 +64,7 @@ struct itrace_info {
struct list_head link;
};
-static u32 debug = 1;
+static u32 debug = 0 /* 1 */;
static LIST_HEAD(usr_itrace_info);
static spinlock_t itrace_lock;
@@ -118,10 +127,15 @@ static int __access_process_vm(struct task_struct *tsk, unsigned long addr, void
return buf - old_buf;
}
+#ifdef UTRACE_ORIG_VERSION
+static u32 usr_itrace_report_quiesce(struct utrace_attached_engine *engine,
+ struct task_struct *tsk)
+#else
static u32 usr_itrace_report_quiesce(enum utrace_resume_action action,
struct utrace_attached_engine *engine,
struct task_struct *tsk,
unsigned long event)
+#endif
{
int status;
struct itrace_info *ui;
@@ -129,10 +143,23 @@ static u32 usr_itrace_report_quiesce(enum utrace_resume_action action,
ui = rcu_dereference(engine->data);
WARN_ON(!ui);
+#ifdef UTRACE_ORIG_VERSION
+ return (ui->step_flag | UTRACE_ACTION_NEWSTATE);
+#else
return (event == 0 ? ui->step_flag : UTRACE_RESUME);
+#endif
}
+#ifdef UTRACE_ORIG_VERSION
+static u32 usr_itrace_report_signal(
+ struct utrace_attached_engine *engine,
+ struct task_struct *tsk,
+ struct pt_regs *regs,
+ u32 action, siginfo_t *info,
+ const struct k_sigaction *orig_ka,
+ struct k_sigaction *return_ka)
+#else
static u32 usr_itrace_report_signal(u32 action,
struct utrace_attached_engine *engine,
struct task_struct *tsk,
@@ -140,6 +167,7 @@ static u32 usr_itrace_report_signal(u32 action,
siginfo_t *info,
const struct k_sigaction *orig_ka,
struct k_sigaction *return_ka)
+#endif
{
struct itrace_info *ui;
u32 return_flags;
@@ -174,16 +202,31 @@ static u32 usr_itrace_report_signal(u32 action,
return return_flags;
}
+
+
+#ifdef UTRACE_ORIG_VERSION
+static u32 usr_itrace_report_clone(
+ struct utrace_attached_engine *engine,
+ struct task_struct *parent,
+ unsigned long clone_flags,
+ struct task_struct *child)
+#else
static u32 usr_itrace_report_clone(enum utrace_resume_action action,
struct utrace_attached_engine *engine,
struct task_struct *parent, unsigned long clone_flags,
struct task_struct *child)
+#endif
{
return UTRACE_RESUME;
}
+#ifdef UTRACE_ORIG_VERSION
+static u32 usr_itrace_report_death(struct utrace_attached_engine *e,
+ struct task_struct *tsk)
+#else
static u32 usr_itrace_report_death(struct utrace_attached_engine *e,
struct task_struct *tsk, bool group_dead, int signal)
+#endif
{
struct itrace_info *ui = rcu_dereference(e->data);
WARN_ON(!ui);
@@ -275,8 +318,13 @@ static int usr_itrace_init(int single_step, pid_t tid, struct stap_itrace_probe
struct itrace_info *ui;
struct task_struct *tsk;
+ spin_lock_init(&itrace_lock);
rcu_read_lock();
+#ifdef STAPCONF_FIND_TASK_PID
+ tsk = find_task_by_pid(tid);
+#else
tsk = find_task_by_vpid(tid);
+#endif
if (!tsk) {
printk(KERN_ERR "usr_itrace_init: Cannot find process %d\n", tid);
rcu_read_unlock();
@@ -293,12 +341,8 @@ static int usr_itrace_init(int single_step, pid_t tid, struct stap_itrace_probe
put_task_struct(tsk);
rcu_read_unlock();
- spin_lock_init(&itrace_lock);
-
- /* set initial state */
- spin_lock(&itrace_lock);
- spin_unlock(&itrace_lock);
- printk(KERN_INFO "usr_itrace_init: completed for tid = %d\n", tid);
+ if (debug)
+ printk(KERN_INFO "usr_itrace_init: completed for tid = %d\n", tid);
return 0;
}
@@ -314,7 +358,6 @@ void static remove_usr_itrace_info(struct itrace_info *ui)
if (debug)
printk(KERN_INFO "remove_usr_itrace_info: tid=%d\n", ui->tid);
- spin_lock(&itrace_lock);
if (ui->tsk && ui->engine) {
status = utrace_control(ui->tsk, ui->engine, UTRACE_DETACH);
if (status < 0 && status != -ESRCH && status != -EALREADY)
@@ -322,6 +365,7 @@ void static remove_usr_itrace_info(struct itrace_info *ui)
"utrace_control(UTRACE_DETACH) returns %d\n",
status);
}
+ spin_lock(&itrace_lock);
list_del(&ui->link);
spin_unlock(&itrace_lock);
kfree(ui);
diff --git a/runtime/loc2c-runtime.h b/runtime/loc2c-runtime.h
index 0af19edc..620e1615 100644
--- a/runtime/loc2c-runtime.h
+++ b/runtime/loc2c-runtime.h
@@ -29,11 +29,12 @@
& (((__typeof (base)) 1 << (nbits)) - 1))
#define store_bitfield(target, base, higherbits, nbits) \
- target = (target \
- &~ ((((__typeof (base)) 1 << (nbits)) - 1) \
- << (sizeof (base) * 8 - (higherbits) - (nbits))) \
- | ((__typeof (base)) (base) \
- << (sizeof (base) * 8 - (higherbits) - (nbits))))
+ target = ((target \
+ &~ ((((__typeof (target)) 1 << (nbits)) - 1) \
+ << (sizeof (target) * 8 - (higherbits) - (nbits)))) \
+ | ((((__typeof (target)) (base)) \
+ & (((__typeof (target)) 1 << (nbits)) - 1)) \
+ << (sizeof (target) * 8 - (higherbits) - (nbits))))
/* Given a DWARF register number, fetch its intptr_t (long) value from the
@@ -62,6 +63,10 @@
must work right for kernel addresses, and can use whatever existing
machine-specific kernel macros are convenient. */
+#if STP_SKIP_BADVARS
+#define DEREF_FAULT(addr) ({0; })
+#define STORE_DEREF_FAULT(addr) ({0; })
+#else
#define DEREF_FAULT(addr) ({ \
snprintf(c->error_buffer, sizeof(c->error_buffer), \
"kernel read fault at 0x%p (%s)", (void *)(intptr_t)(addr), #addr); \
@@ -75,7 +80,7 @@
c->last_error = c->error_buffer; \
goto deref_fault; \
})
-
+#endif
#if defined (STAPCONF_X86_UNIREGS) && defined (__i386__)
@@ -186,37 +191,34 @@
*/
#define kread(ptr) ({ \
- typeof(*(ptr)) _v; \
- if (probe_kernel_read((void *)&_v, (void *)(ptr), sizeof(*(ptr)))) \
- DEREF_FAULT(ptr); \
+ typeof(*(ptr)) _v = 0; \
+ if (lookup_bad_addr((unsigned long)(ptr)) || \
+ probe_kernel_read((void *)&_v, (void *)(ptr), sizeof(*(ptr)))) \
+ DEREF_FAULT(ptr); \
_v; \
})
#define kwrite(ptr, value) ({ \
typeof(*(ptr)) _v; \
_v = (typeof(*(ptr)))(value); \
- if (probe_kernel_write((void *)(ptr), (void *)&_v, sizeof(*(ptr)))) \
- STORE_DEREF_FAULT(ptr); \
+ if (lookup_bad_addr((unsigned long)addr) || \
+ probe_kernel_write((void *)(ptr), (void *)&_v, sizeof(*(ptr)))) \
+ STORE_DEREF_FAULT(ptr); \
})
#define deref(size, addr) ({ \
- intptr_t _i; \
- if (lookup_bad_addr((unsigned long)addr)) \
- __deref_bad(); \
+ intptr_t _i = 0; \
switch (size) { \
case 1: _i = kread((u8 *)(addr)); break; \
case 2: _i = kread((u16 *)(addr)); break; \
case 4: _i = kread((u32 *)(addr)); break; \
case 8: _i = kread((u64 *)(addr)); break; \
default: __deref_bad(); \
- /* uninitialized _i should also be caught by -Werror */ \
} \
_i; \
})
#define store_deref(size, addr, value) ({ \
- if (lookup_bad_addr((unsigned long)addr)) \
- __store_deref_bad(); \
switch (size) { \
case 1: kwrite((u8 *)(addr), (value)); break; \
case 2: kwrite((u16 *)(addr), (value)); break; \
@@ -237,7 +239,7 @@ extern void __store_deref_bad(void);
({ \
int _bad = 0; \
u8 _b; u16 _w; u32 _l; \
- intptr_t _v; \
+ intptr_t _v = 0; \
if (lookup_bad_addr((unsigned long)addr)) \
_bad = 1; \
else \
@@ -277,7 +279,7 @@ extern void __store_deref_bad(void);
({ \
int _bad = 0; \
u8 _b; u16 _w; u32 _l; u64 _q; \
- intptr_t _v; \
+ intptr_t _v = 0; \
if (lookup_bad_addr((unsigned long)addr)) \
_bad = 1; \
else \
@@ -394,7 +396,7 @@ extern void __store_deref_bad(void);
#define deref(size, addr) \
({ \
int _bad = 0; \
- intptr_t _v; \
+ intptr_t _v = 0; \
if (lookup_bad_addr((unsigned long)addr)) \
_bad = 1; \
else \
diff --git a/runtime/map-gen.c b/runtime/map-gen.c
index c4bdf2c7..fdb75089 100644
--- a/runtime/map-gen.c
+++ b/runtime/map-gen.c
@@ -26,6 +26,14 @@
#define JOIN5x(a,b,c,d,e,f) a##_##b##c##d##e##f
#define JOIN6(a,b,c,d,e,f,g) JOIN6x(a,b,c,d,e,f,g)
#define JOIN6x(a,b,c,d,e,f,g) a##_##b##c##d##e##f##g
+#define JOIN7(a,b,c,d,e,f,g,h) JOIN7x(a,b,c,d,e,f,g,h)
+#define JOIN7x(a,b,c,d,e,f,g,h) a##_##b##c##d##e##f##g##h
+#define JOIN8(a,b,c,d,e,f,g,h,i) JOIN8x(a,b,c,d,e,f,g,h,i)
+#define JOIN8x(a,b,c,d,e,f,g,h,i) a##_##b##c##d##e##f##g##h##i
+#define JOIN9(a,b,c,d,e,f,g,h,i,j) JOIN9x(a,b,c,d,e,f,g,h,i,j)
+#define JOIN9x(a,b,c,d,e,f,g,h,i,j) a##_##b##c##d##e##f##g##h##i##j
+#define JOIN10(a,b,c,d,e,f,g,h,i,j,k) JOIN10x(a,b,c,d,e,f,g,h,i,j,k)
+#define JOIN10x(a,b,c,d,e,f,g,h,i,j,k) a##_##b##c##d##e##f##g##h##i##j##k
#include "map.h"
@@ -162,6 +170,113 @@
#define KEY5_HASH JOIN(KEY5NAME,hash)
#endif /* defined(KEY5_TYPE) */
+#if defined (KEY6_TYPE)
+#undef KEY_ARITY
+#define KEY_ARITY 6
+#if KEY6_TYPE == STRING
+#define KEY6TYPE char*
+#define KEY6NAME str
+#define KEY6N s
+#define KEY6STOR char key6[MAP_STRING_LENGTH]
+#define KEY6CPY(m) str_copy(m->key6, key6)
+#else
+#define KEY6TYPE int64_t
+#define KEY6NAME int64
+#define KEY6N i
+#define KEY6STOR int64_t key6
+#define KEY6CPY(m) m->key6=key6
+#endif
+#define KEY6_EQ_P JOIN(KEY6NAME,eq_p)
+#define KEY6_HASH JOIN(KEY6NAME,hash)
+#endif /* defined(KEY6_TYPE) */
+
+#if defined (KEY7_TYPE)
+#undef KEY_ARITY
+#define KEY_ARITY 7
+#if KEY7_TYPE == STRING
+#define KEY7TYPE char*
+#define KEY7NAME str
+#define KEY7N s
+#define KEY7STOR char key7[MAP_STRING_LENGTH]
+#define KEY7CPY(m) str_copy(m->key7, key7)
+#else
+#define KEY7TYPE int64_t
+#define KEY7NAME int64
+#define KEY7N i
+#define KEY7STOR int64_t key7
+#define KEY7CPY(m) m->key7=key7
+#endif
+#define KEY7_EQ_P JOIN(KEY7NAME,eq_p)
+#define KEY7_HASH JOIN(KEY7NAME,hash)
+#endif /* defined(KEY7_TYPE) */
+
+#if defined (KEY7_TYPE)
+#undef KEY_ARITY
+#define KEY_ARITY 7
+#if KEY7_TYPE == STRING
+#define KEY7TYPE char*
+#define KEY7NAME str
+#define KEY7N s
+#define KEY7STOR char key7[MAP_STRING_LENGTH]
+#define KEY7CPY(m) str_copy(m->key7, key7)
+#else
+#define KEY7TYPE int64_t
+#define KEY7NAME int64
+#define KEY7N i
+#define KEY7STOR int64_t key7
+#define KEY7CPY(m) m->key7=key7
+#endif
+#define KEY7_EQ_P JOIN(KEY7NAME,eq_p)
+#define KEY7_HASH JOIN(KEY7NAME,hash)
+#endif /* defined(KEY7_TYPE) */
+
+#if defined (KEY8_TYPE)
+#undef KEY_ARITY
+#define KEY_ARITY 8
+#if KEY8_TYPE == STRING
+#define KEY8TYPE char*
+#define KEY8NAME str
+#define KEY8N s
+#define KEY8STOR char key8[MAP_STRING_LENGTH]
+#define KEY8CPY(m) str_copy(m->key8, key8)
+#else
+#define KEY8TYPE int64_t
+#define KEY8NAME int64
+#define KEY8N i
+#define KEY8STOR int64_t key8
+#define KEY8CPY(m) m->key8=key8
+#endif
+#define KEY8_EQ_P JOIN(KEY8NAME,eq_p)
+#define KEY8_HASH JOIN(KEY8NAME,hash)
+#endif /* defined(KEY8_TYPE) */
+
+#if defined (KEY9_TYPE)
+#undef KEY_ARITY
+#define KEY_ARITY 9
+#if KEY9_TYPE == STRING
+#define KEY9TYPE char*
+#define KEY9NAME str
+#define KEY9N s
+#define KEY9STOR char key9[MAP_STRING_LENGTH]
+#define KEY9CPY(m) str_copy(m->key9, key9)
+#else
+#define KEY9TYPE int64_t
+#define KEY9NAME int64
+#define KEY9N i
+#define KEY9STOR int64_t key9
+#define KEY9CPY(m) m->key9=key9
+#endif
+#define KEY9_EQ_P JOIN(KEY9NAME,eq_p)
+#define KEY9_HASH JOIN(KEY9NAME,hash)
+#endif /* defined(KEY9_TYPE) */
+
+/* Not so many, cowboy! */
+#if defined (KEY10_TYPE)
+#error "excessive key arity == too many array indexes"
+#endif
+
+
+
#if KEY_ARITY == 1
#define KEYSYM(x) JOIN2(x,KEY1N,VALN)
#define ALLKEYS(x) x##1
@@ -187,6 +302,26 @@
#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5
#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5
#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);}
+#elif KEY_ARITY == 6
+#define KEYSYM(x) JOIN7(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,VALN)
+#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6
+#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6
+#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);}
+#elif KEY_ARITY == 7
+#define KEYSYM(x) JOIN8(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,KEY7N,VALN)
+#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6, x##7
+#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6, KEY7TYPE x##7
+#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);KEY7CPY(m);}
+#elif KEY_ARITY == 8
+#define KEYSYM(x) JOIN9(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,KEY7N,KEY8N,VALN)
+#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8
+#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6, KEY7TYPE x##7, KEY8TYPE x##8
+#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);KEY7CPY(m);KEY8CPY(m);}
+#elif KEY_ARITY == 9
+#define KEYSYM(x) JOIN10(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,KEY7N,KEY8N,KEY9N,VALN)
+#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8, x##9
+#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6, KEY7TYPE x##7, KEY8TYPE x##8, KEY9TYPE x##9
+#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);KEY7CPY(m);KEY8CPY(m);KEY9CPY(m);}
#endif
/* */
@@ -208,6 +343,18 @@ struct KEYSYM(map_node) {
KEY4STOR;
#if KEY_ARITY > 4
KEY5STOR;
+#if KEY_ARITY > 5
+ KEY6STOR;
+#if KEY_ARITY > 6
+ KEY7STOR;
+#if KEY_ARITY > 7
+ KEY8STOR;
+#if KEY_ARITY > 8
+ KEY9STOR;
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -266,6 +413,34 @@ static key_data KEYSYM(map_get_key) (struct map_node *mn, int n, int *type)
if (type)
*type = type_to_enum(KEY5TYPE);
break;
+#if KEY_ARITY > 5
+ case 6:
+ ptr = (key_data)m->key6;
+ if (type)
+ *type = type_to_enum(KEY6TYPE);
+ break;
+#if KEY_ARITY > 6
+ case 7:
+ ptr = (key_data)m->key7;
+ if (type)
+ *type = type_to_enum(KEY7TYPE);
+ break;
+#if KEY_ARITY > 7
+ case 8:
+ ptr = (key_data)m->key8;
+ if (type)
+ *type = type_to_enum(KEY8TYPE);
+ break;
+#if KEY_ARITY > 8
+ case 9:
+ ptr = (key_data)m->key9;
+ if (type)
+ *type = type_to_enum(KEY9TYPE);
+ break;
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -309,6 +484,34 @@ static unsigned int KEYSYM(keycheck) (ALLKEYSD(key))
if (key5 == NULL)
return 0;
#endif
+
+#if KEY_ARITY > 5
+#if KEY6_TYPE == STRING
+ if (key6 == NULL)
+ return 0;
+#endif
+
+#if KEY_ARITY > 6
+#if KEY7_TYPE == STRING
+ if (key7 == NULL)
+ return 0;
+#endif
+
+#if KEY_ARITY > 7
+#if KEY8_TYPE == STRING
+ if (key8 == NULL)
+ return 0;
+#endif
+
+#if KEY_ARITY > 8
+#if KEY9_TYPE == STRING
+ if (key9 == NULL)
+ return 0;
+#endif
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -327,6 +530,18 @@ static unsigned int KEYSYM(hash) (ALLKEYSD(key))
hash ^= KEY4_HASH(key4);
#if KEY_ARITY > 4
hash ^= KEY5_HASH(key5);
+#if KEY_ARITY > 5
+ hash ^= KEY6_HASH(key6);
+#if KEY_ARITY > 6
+ hash ^= KEY7_HASH(key7);
+#if KEY_ARITY > 7
+ hash ^= KEY8_HASH(key8);
+#if KEY_ARITY > 8
+ hash ^= KEY9_HASH(key9);
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -411,6 +626,18 @@ static int KEYSYM(__stp_map_set) (MAP map, ALLKEYSD(key), VSTYPE val, int add)
&& KEY4_EQ_P(n->key4, key4)
#if KEY_ARITY > 4
&& KEY5_EQ_P(n->key5, key5)
+#if KEY_ARITY > 5
+ && KEY6_EQ_P(n->key6, key6)
+#if KEY_ARITY > 6
+ && KEY7_EQ_P(n->key7, key7)
+#if KEY_ARITY > 7
+ && KEY8_EQ_P(n->key8, key8)
+#if KEY_ARITY > 8
+ && KEY9_EQ_P(n->key9, key9)
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -462,6 +689,18 @@ static VALTYPE KEYSYM(_stp_map_get) (MAP map, ALLKEYSD(key))
&& KEY4_EQ_P(n->key4, key4)
#if KEY_ARITY > 4
&& KEY5_EQ_P(n->key5, key5)
+#if KEY_ARITY > 5
+ && KEY6_EQ_P(n->key6, key6)
+#if KEY_ARITY > 6
+ && KEY7_EQ_P(n->key7, key7)
+#if KEY_ARITY > 7
+ && KEY8_EQ_P(n->key8, key8)
+#if KEY_ARITY > 8
+ && KEY9_EQ_P(n->key9, key9)
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -498,6 +737,18 @@ static int KEYSYM(_stp_map_del) (MAP map, ALLKEYSD(key))
&& KEY4_EQ_P(n->key4, key4)
#if KEY_ARITY > 4
&& KEY5_EQ_P(n->key5, key5)
+#if KEY_ARITY > 5
+ && KEY6_EQ_P(n->key6, key6)
+#if KEY_ARITY > 6
+ && KEY7_EQ_P(n->key7, key7)
+#if KEY_ARITY > 7
+ && KEY8_EQ_P(n->key8, key8)
+#if KEY_ARITY > 8
+ && KEY9_EQ_P(n->key9, key9)
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -535,6 +786,18 @@ static int KEYSYM(_stp_map_exists) (MAP map, ALLKEYSD(key))
&& KEY4_EQ_P(n->key4, key4)
#if KEY_ARITY > 4
&& KEY5_EQ_P(n->key5, key5)
+#if KEY_ARITY > 5
+ && KEY6_EQ_P(n->key6, key6)
+#if KEY_ARITY > 6
+ && KEY7_EQ_P(n->key7, key7)
+#if KEY_ARITY > 7
+ && KEY8_EQ_P(n->key8, key8)
+#if KEY_ARITY > 8
+ && KEY9_EQ_P(n->key9, key9)
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -582,6 +845,34 @@ static int KEYSYM(_stp_map_exists) (MAP map, ALLKEYSD(key))
#undef KEY5STOR
#undef KEY5CPY
+#undef KEY6NAME
+#undef KEY6N
+#undef KEY6TYPE
+#undef KEY6_TYPE
+#undef KEY6STOR
+#undef KEY6CPY
+
+#undef KEY7NAME
+#undef KEY7N
+#undef KEY7TYPE
+#undef KEY7_TYPE
+#undef KEY7STOR
+#undef KEY7CPY
+
+#undef KEY8NAME
+#undef KEY8N
+#undef KEY8TYPE
+#undef KEY8_TYPE
+#undef KEY8STOR
+#undef KEY8CPY
+
+#undef KEY9NAME
+#undef KEY9N
+#undef KEY9TYPE
+#undef KEY9_TYPE
+#undef KEY9STOR
+#undef KEY9CPY
+
#undef KEY_ARITY
#undef ALLKEYS
#undef ALLKEYSD
diff --git a/runtime/map.c b/runtime/map.c
index de25d6f3..74467f30 100644
--- a/runtime/map.c
+++ b/runtime/map.c
@@ -38,27 +38,16 @@ static int int64_eq_p (int64_t key1, int64_t key2)
static void str_copy(char *dest, char *src)
{
- int len = 0;
- if (src) {
- len = strlen(src);
- if (len > MAP_STRING_LENGTH - 1)
- len = MAP_STRING_LENGTH - 1;
- memcpy (dest, src, len);
- }
- dest[len] = 0;
+ if (src)
+ strlcpy(dest, src, MAP_STRING_LENGTH);
+ else
+ *dest = 0;
}
static void str_add(void *dest, char *val)
{
char *dst = (char *)dest;
- int len = strlen(val);
- int len1 = strlen(dst);
- int num = MAP_STRING_LENGTH - 1 - len1;
-
- if (len > num)
- len = num;
- memcpy (&dst[len1], val, len);
- dst[len + len1] = 0;
+ strlcat(dst, val, MAP_STRING_LENGTH);
}
static int str_eq_p (char *key1, char *key2)
@@ -730,7 +719,7 @@ static MAP _stp_pmap_agg (PMAP pmap)
{
int i, hash;
MAP m, agg;
- struct map_node *ptr, *aptr;
+ struct map_node *ptr, *aptr = NULL;
struct hlist_head *head, *ahead;
struct hlist_node *e, *f;
diff --git a/runtime/pmap-gen.c b/runtime/pmap-gen.c
index 86c3dc42..c95adc6b 100644
--- a/runtime/pmap-gen.c
+++ b/runtime/pmap-gen.c
@@ -26,6 +26,14 @@
#define JOIN5x(a,b,c,d,e,f) a##_##b##c##d##e##f
#define JOIN6(a,b,c,d,e,f,g) JOIN6x(a,b,c,d,e,f,g)
#define JOIN6x(a,b,c,d,e,f,g) a##_##b##c##d##e##f##g
+#define JOIN7(a,b,c,d,e,f,g,h) JOIN7x(a,b,c,d,e,f,g,h)
+#define JOIN7x(a,b,c,d,e,f,g,h) a##_##b##c##d##e##f##g##h
+#define JOIN8(a,b,c,d,e,f,g,h,i) JOIN8x(a,b,c,d,e,f,g,h,i)
+#define JOIN8x(a,b,c,d,e,f,g,h,i) a##_##b##c##d##e##f##g##h##i
+#define JOIN9(a,b,c,d,e,f,g,h,i,j) JOIN9x(a,b,c,d,e,f,g,h,i,j)
+#define JOIN9x(a,b,c,d,e,f,g,h,i,j) a##_##b##c##d##e##f##g##h##i##j
+#define JOIN10(a,b,c,d,e,f,g,h,i,j,k) JOIN10x(a,b,c,d,e,f,g,h,i,j,k)
+#define JOIN10x(a,b,c,d,e,f,g,h,i,j,k) a##_##b##c##d##e##f##g##h##i##j##k
#include "map.h"
@@ -162,6 +170,113 @@
#define KEY5_HASH JOIN(KEY5NAME,hash)
#endif /* defined(KEY5_TYPE) */
+#if defined (KEY6_TYPE)
+#undef KEY_ARITY
+#define KEY_ARITY 6
+#if KEY6_TYPE == STRING
+#define KEY6TYPE char*
+#define KEY6NAME str
+#define KEY6N s
+#define KEY6STOR char key6[MAP_STRING_LENGTH]
+#define KEY6CPY(m) str_copy(m->key6, key6)
+#else
+#define KEY6TYPE int64_t
+#define KEY6NAME int64
+#define KEY6N i
+#define KEY6STOR int64_t key6
+#define KEY6CPY(m) m->key6=key6
+#endif
+#define KEY6_EQ_P JOIN(KEY6NAME,eq_p)
+#define KEY6_HASH JOIN(KEY6NAME,hash)
+#endif /* defined(KEY6_TYPE) */
+
+#if defined (KEY7_TYPE)
+#undef KEY_ARITY
+#define KEY_ARITY 7
+#if KEY7_TYPE == STRING
+#define KEY7TYPE char*
+#define KEY7NAME str
+#define KEY7N s
+#define KEY7STOR char key7[MAP_STRING_LENGTH]
+#define KEY7CPY(m) str_copy(m->key7, key7)
+#else
+#define KEY7TYPE int64_t
+#define KEY7NAME int64
+#define KEY7N i
+#define KEY7STOR int64_t key7
+#define KEY7CPY(m) m->key7=key7
+#endif
+#define KEY7_EQ_P JOIN(KEY7NAME,eq_p)
+#define KEY7_HASH JOIN(KEY7NAME,hash)
+#endif /* defined(KEY7_TYPE) */
+
+#if defined (KEY7_TYPE)
+#undef KEY_ARITY
+#define KEY_ARITY 7
+#if KEY7_TYPE == STRING
+#define KEY7TYPE char*
+#define KEY7NAME str
+#define KEY7N s
+#define KEY7STOR char key7[MAP_STRING_LENGTH]
+#define KEY7CPY(m) str_copy(m->key7, key7)
+#else
+#define KEY7TYPE int64_t
+#define KEY7NAME int64
+#define KEY7N i
+#define KEY7STOR int64_t key7
+#define KEY7CPY(m) m->key7=key7
+#endif
+#define KEY7_EQ_P JOIN(KEY7NAME,eq_p)
+#define KEY7_HASH JOIN(KEY7NAME,hash)
+#endif /* defined(KEY7_TYPE) */
+
+#if defined (KEY8_TYPE)
+#undef KEY_ARITY
+#define KEY_ARITY 8
+#if KEY8_TYPE == STRING
+#define KEY8TYPE char*
+#define KEY8NAME str
+#define KEY8N s
+#define KEY8STOR char key8[MAP_STRING_LENGTH]
+#define KEY8CPY(m) str_copy(m->key8, key8)
+#else
+#define KEY8TYPE int64_t
+#define KEY8NAME int64
+#define KEY8N i
+#define KEY8STOR int64_t key8
+#define KEY8CPY(m) m->key8=key8
+#endif
+#define KEY8_EQ_P JOIN(KEY8NAME,eq_p)
+#define KEY8_HASH JOIN(KEY8NAME,hash)
+#endif /* defined(KEY8_TYPE) */
+
+#if defined (KEY9_TYPE)
+#undef KEY_ARITY
+#define KEY_ARITY 9
+#if KEY9_TYPE == STRING
+#define KEY9TYPE char*
+#define KEY9NAME str
+#define KEY9N s
+#define KEY9STOR char key9[MAP_STRING_LENGTH]
+#define KEY9CPY(m) str_copy(m->key9, key9)
+#else
+#define KEY9TYPE int64_t
+#define KEY9NAME int64
+#define KEY9N i
+#define KEY9STOR int64_t key9
+#define KEY9CPY(m) m->key9=key9
+#endif
+#define KEY9_EQ_P JOIN(KEY9NAME,eq_p)
+#define KEY9_HASH JOIN(KEY9NAME,hash)
+#endif /* defined(KEY9_TYPE) */
+
+/* Not so many, cowboy! */
+#if defined (KEY10_TYPE)
+#error "excessive key arity == too many array indexes"
+#endif
+
+
+
#if KEY_ARITY == 1
#define KEYSYM(x) JOIN2(x,KEY1N,VALN)
#define ALLKEYS(x) x##1
@@ -187,6 +302,26 @@
#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5
#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5
#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);}
+#elif KEY_ARITY == 6
+#define KEYSYM(x) JOIN7(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,VALN)
+#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6
+#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6
+#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);}
+#elif KEY_ARITY == 7
+#define KEYSYM(x) JOIN8(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,KEY7N,VALN)
+#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6, x##7
+#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6, KEY7TYPE x##7
+#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);KEY7CPY(m);}
+#elif KEY_ARITY == 8
+#define KEYSYM(x) JOIN9(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,KEY7N,KEY8N,VALN)
+#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8
+#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6, KEY7TYPE x##7, KEY8TYPE x##8
+#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);KEY7CPY(m);KEY8CPY(m);}
+#elif KEY_ARITY == 9
+#define KEYSYM(x) JOIN10(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,KEY7N,KEY8N,KEY9N,VALN)
+#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8, x##9
+#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6, KEY7TYPE x##7, KEY8TYPE x##8, KEY9TYPE x##9
+#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);KEY7CPY(m);KEY8CPY(m);KEY9CPY(m);}
#endif
/* */
@@ -208,6 +343,18 @@ struct KEYSYM(pmap_node) {
KEY4STOR;
#if KEY_ARITY > 4
KEY5STOR;
+#if KEY_ARITY > 5
+ KEY6STOR;
+#if KEY_ARITY > 6
+ KEY7STOR;
+#if KEY_ARITY > 7
+ KEY8STOR;
+#if KEY_ARITY > 8
+ KEY9STOR;
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -238,6 +385,18 @@ static int KEYSYM(pmap_key_cmp) (struct map_node *m1, struct map_node *m2)
&& KEY4_EQ_P(n1->key4, n2->key4)
#if KEY_ARITY > 4
&& KEY5_EQ_P(n1->key5, n2->key5)
+#if KEY_ARITY > 5
+ && KEY6_EQ_P(n1->key6, n2->key6)
+#if KEY_ARITY > 6
+ && KEY7_EQ_P(n1->key7, n2->key7)
+#if KEY_ARITY > 7
+ && KEY8_EQ_P(n1->key8, n2->key8)
+#if KEY_ARITY > 8
+ && KEY9_EQ_P(n1->key9, n2->key9)
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -282,6 +441,34 @@ static void KEYSYM(pmap_copy_keys) (struct map_node *m1, struct map_node *m2)
#else
dst->key5 = src->key5;
#endif
+#if KEY_ARITY > 5
+#if KEY6_TYPE == STRING
+ str_copy (dst->key6, src->key6);
+#else
+ dst->key6 = src->key6;
+#endif
+#if KEY_ARITY > 6
+#if KEY7_TYPE == STRING
+ str_copy (dst->key7, src->key7);
+#else
+ dst->key7 = src->key7;
+#endif
+#if KEY_ARITY > 7
+#if KEY8_TYPE == STRING
+ str_copy (dst->key8, src->key8);
+#else
+ dst->key8 = src->key8;
+#endif
+#if KEY_ARITY > 8
+#if KEY9_TYPE == STRING
+ str_copy (dst->key9, src->key9);
+#else
+ dst->key9 = src->key9;
+#endif
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -330,6 +517,34 @@ static key_data KEYSYM(pmap_get_key) (struct map_node *mn, int n, int *type)
if (type)
*type = type_to_enum(KEY5TYPE);
break;
+#if KEY_ARITY > 5
+ case 6:
+ ptr = (key_data)m->key6;
+ if (type)
+ *type = type_to_enum(KEY6TYPE);
+ break;
+#if KEY_ARITY > 6
+ case 7:
+ ptr = (key_data)m->key7;
+ if (type)
+ *type = type_to_enum(KEY7TYPE);
+ break;
+#if KEY_ARITY > 7
+ case 8:
+ ptr = (key_data)m->key8;
+ if (type)
+ *type = type_to_enum(KEY8TYPE);
+ break;
+#if KEY_ARITY > 8
+ case 9:
+ ptr = (key_data)m->key9;
+ if (type)
+ *type = type_to_enum(KEY9TYPE);
+ break;
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -373,6 +588,34 @@ static unsigned int KEYSYM(pkeycheck) (ALLKEYSD(key))
if (key5 == NULL)
return 0;
#endif
+
+#if KEY_ARITY > 5
+#if KEY6_TYPE == STRING
+ if (key6 == NULL)
+ return 0;
+#endif
+
+#if KEY_ARITY > 6
+#if KEY7_TYPE == STRING
+ if (key7 == NULL)
+ return 0;
+#endif
+
+#if KEY_ARITY > 7
+#if KEY8_TYPE == STRING
+ if (key8 == NULL)
+ return 0;
+#endif
+
+#if KEY_ARITY > 8
+#if KEY9_TYPE == STRING
+ if (key9 == NULL)
+ return 0;
+#endif
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -391,6 +634,18 @@ static unsigned int KEYSYM(phash) (ALLKEYSD(key))
hash ^= KEY4_HASH(key4);
#if KEY_ARITY > 4
hash ^= KEY5_HASH(key5);
+#if KEY_ARITY > 5
+ hash ^= KEY6_HASH(key6);
+#if KEY_ARITY > 6
+ hash ^= KEY7_HASH(key7);
+#if KEY_ARITY > 7
+ hash ^= KEY8_HASH(key8);
+#if KEY_ARITY > 8
+ hash ^= KEY9_HASH(key9);
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -504,6 +759,18 @@ static int KEYSYM(__stp_pmap_set) (MAP map, ALLKEYSD(key), VSTYPE val, int add)
&& KEY4_EQ_P(n->key4, key4)
#if KEY_ARITY > 4
&& KEY5_EQ_P(n->key5, key5)
+#if KEY_ARITY > 5
+ && KEY6_EQ_P(n->key6, key6)
+#if KEY_ARITY > 6
+ && KEY7_EQ_P(n->key7, key7)
+#if KEY_ARITY > 7
+ && KEY8_EQ_P(n->key8, key8)
+#if KEY_ARITY > 8
+ && KEY9_EQ_P(n->key9, key9)
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -587,6 +854,18 @@ static VALTYPE KEYSYM(_stp_pmap_get_cpu) (PMAP pmap, ALLKEYSD(key))
&& KEY4_EQ_P(n->key4, key4)
#if KEY_ARITY > 4
&& KEY5_EQ_P(n->key5, key5)
+#if KEY_ARITY > 5
+ && KEY6_EQ_P(n->key6, key6)
+#if KEY_ARITY > 6
+ && KEY7_EQ_P(n->key7, key7)
+#if KEY_ARITY > 7
+ && KEY8_EQ_P(n->key8, key8)
+#if KEY_ARITY > 8
+ && KEY9_EQ_P(n->key9, key9)
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -637,6 +916,18 @@ static VALTYPE KEYSYM(_stp_pmap_get) (PMAP pmap, ALLKEYSD(key))
&& KEY4_EQ_P(n->key4, key4)
#if KEY_ARITY > 4
&& KEY5_EQ_P(n->key5, key5)
+#if KEY_ARITY > 5
+ && KEY6_EQ_P(n->key6, key6)
+#if KEY_ARITY > 6
+ && KEY7_EQ_P(n->key7, key7)
+#if KEY_ARITY > 7
+ && KEY8_EQ_P(n->key8, key8)
+#if KEY_ARITY > 8
+ && KEY9_EQ_P(n->key9, key9)
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -669,6 +960,18 @@ static VALTYPE KEYSYM(_stp_pmap_get) (PMAP pmap, ALLKEYSD(key))
&& KEY4_EQ_P(n->key4, key4)
#if KEY_ARITY > 4
&& KEY5_EQ_P(n->key5, key5)
+#if KEY_ARITY > 5
+ && KEY6_EQ_P(n->key6, key6)
+#if KEY_ARITY > 6
+ && KEY7_EQ_P(n->key7, key7)
+#if KEY_ARITY > 7
+ && KEY8_EQ_P(n->key8, key8)
+#if KEY_ARITY > 8
+ && KEY9_EQ_P(n->key9, key9)
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -723,6 +1026,18 @@ static int KEYSYM(__stp_pmap_del) (MAP map, ALLKEYSD(key))
&& KEY4_EQ_P(n->key4, key4)
#if KEY_ARITY > 4
&& KEY5_EQ_P(n->key5, key5)
+#if KEY_ARITY > 5
+ && KEY6_EQ_P(n->key6, key6)
+#if KEY_ARITY > 6
+ && KEY7_EQ_P(n->key7, key7)
+#if KEY_ARITY > 7
+ && KEY8_EQ_P(n->key8, key8)
+#if KEY_ARITY > 8
+ && KEY9_EQ_P(n->key9, key9)
+#endif
+#endif
+#endif
+#endif
#endif
#endif
#endif
@@ -788,6 +1103,34 @@ static int KEYSYM(_stp_pmap_del) (PMAP pmap, ALLKEYSD(key))
#undef KEY5STOR
#undef KEY5CPY
+#undef KEY6NAME
+#undef KEY6N
+#undef KEY6TYPE
+#undef KEY6_TYPE
+#undef KEY6STOR
+#undef KEY6CPY
+
+#undef KEY7NAME
+#undef KEY7N
+#undef KEY7TYPE
+#undef KEY7_TYPE
+#undef KEY7STOR
+#undef KEY7CPY
+
+#undef KEY8NAME
+#undef KEY8N
+#undef KEY8TYPE
+#undef KEY8_TYPE
+#undef KEY8STOR
+#undef KEY8CPY
+
+#undef KEY9NAME
+#undef KEY9N
+#undef KEY9TYPE
+#undef KEY9_TYPE
+#undef KEY9STOR
+#undef KEY9CPY
+
#undef KEY_ARITY
#undef ALLKEYS
#undef ALLKEYSD
diff --git a/runtime/print.c b/runtime/print.c
index c1fff306..d51c8108 100644
--- a/runtime/print.c
+++ b/runtime/print.c
@@ -16,6 +16,7 @@
#include "vsprintf.c"
#include "print.h"
#include "transport/transport.c"
+#include "vsprintf.c"
/** @file print.c
* Printing Functions.
@@ -168,34 +169,10 @@ static void _stp_print_binary (int num, ...)
*/
static void _stp_printf (const char *fmt, ...)
{
- int num;
va_list args;
- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id());
- char *buf = pb->buf + pb->len;
- int size = STP_BUFFER_SIZE - pb->len;
-
va_start(args, fmt);
- num = _stp_vsnprintf(buf, size, fmt, args);
+ _stp_vsnprintf(NULL, 0, fmt, args);
va_end(args);
- if (unlikely(num >= size)) {
- /* overflowed the buffer */
- if (pb->len == 0) {
- /* A single print request exceeded the buffer size. */
- /* Should not be possible with Systemtap-generated code. */
- pb->len = STP_BUFFER_SIZE;
- _stp_print_flush();
- num = 0;
- } else {
- /* Need more space. Flush the previous contents */
- _stp_print_flush();
-
- /* try again */
- va_start(args, fmt);
- num = _stp_vsnprintf(pb->buf, STP_BUFFER_SIZE, fmt, args);
- va_end(args);
- }
- }
- pb->len += num;
}
/** Write a string into the print buffer.
diff --git a/runtime/ptrace_compatibility.h b/runtime/ptrace_compatibility.h
new file mode 100644
index 00000000..939c3b56
--- /dev/null
+++ b/runtime/ptrace_compatibility.h
@@ -0,0 +1,50 @@
+#ifndef _PTRACE_COMPATIBILITY_H_
+#define _PTRACE_COMPATIBILITY_H_
+
+#include <linux/ptrace.h>
+
+/* Older kernel's linux/ptrace.h don't define
+ * arch_has_single_step()/arch_has_block_step(). */
+
+#ifndef arch_has_single_step
+
+#include <linux/tracehook.h>
+
+/**
+ * arch_has_single_step - does this CPU support user-mode single-step?
+ *
+ * If this is defined, then there must be function declarations or
+ * inlines for user_enable_single_step() and user_disable_single_step().
+ * arch_has_single_step() should evaluate to nonzero iff the machine
+ * supports instruction single-step for user mode.
+ * It can be a constant or it can test a CPU feature bit.
+ */
+
+#ifdef ARCH_HAS_SINGLE_STEP
+#define arch_has_single_step() (ARCH_HAS_SINGLE_STEP)
+#else
+#define arch_has_single_step() (0)
+#endif /* ARCH_HAS_SINGLE_STEP */
+
+#endif /* arch_has_single_step */
+
+#ifndef arch_has_block_step
+/**
+ * arch_has_block_step - does this CPU support user-mode block-step?
+ *
+ * If this is defined, then there must be a function declaration or inline
+ * for user_enable_block_step(), and arch_has_single_step() must be defined
+ * too. arch_has_block_step() should evaluate to nonzero iff the machine
+ * supports step-until-branch for user mode. It can be a constant or it
+ * can test a CPU feature bit.
+ */
+
+#ifdef ARCH_HAS_BLOCK_STEP
+#define arch_has_block_step() (ARCH_HAS_BLOCK_STEP)
+#else
+#define arch_has_block_step() (0)
+#endif /* ARCH_HAS_BLOCK_STEP */
+
+#endif /* arch_has_block_step */
+
+#endif /* _PTRACE_COMPATIBILITY_H_ */
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 78c27a84..7418d13b 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -74,6 +74,13 @@ static struct
#define MAXTRACE 20
#endif
+/* dwarf unwinder only tested so far on i386 and x86_64. */
+#if (defined(__i386__) || defined(__x86_64__))
+#ifndef STP_USE_DWARF_UNWINDER
+#define STP_USE_DWARF_UNWINDER
+#endif
+#endif
+
#ifdef CONFIG_FRAME_POINTER
/* Just because frame pointers are available does not mean we can trust them. */
#ifndef STP_USE_DWARF_UNWINDER
@@ -81,19 +88,17 @@ static struct
#endif
#endif
-/* dwarf unwinder only tested so far on i386 and x86_64,
- but globally disabled for now */
-#if 0
-// !defined(STP_USE_FRAME_BUFFER) && (defined(__i386__) || defined(__x86_64__))
-#define STP_USE_DWARF_UNWINDER
-#endif
-
#include "alloc.c"
#include "print.c"
#include "string.c"
#include "io.c"
#include "arith.c"
#include "copy.c"
+#include "regs.c"
+#include "regs-ia64.c"
+
+#include "task_finder.c"
+
#include "sym.c"
#ifdef STP_PERFMON
#include "perf.c"
diff --git a/runtime/stack-arm.c b/runtime/stack-arm.c
index 9b0b772d..fcff0a3b 100644
--- a/runtime/stack-arm.c
+++ b/runtime/stack-arm.c
@@ -31,7 +31,8 @@ static int __init find_str_pc_offset(void)
}
-static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels)
+static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels,
+ struct task_struct *tsk)
{
#ifdef STP_USE_FRAME_POINTER
int pc_offset = find_str_pc_offset();
diff --git a/runtime/stack-i386.c b/runtime/stack-i386.c
index 5a18c9d8..69623765 100644
--- a/runtime/stack-i386.c
+++ b/runtime/stack-i386.c
@@ -23,14 +23,15 @@ static void _stp_stack_print_fallback(unsigned long stack, int verbose, int leve
/* cannot access stack. give up. */
return;
}
- if (_stp_func_print(addr, verbose, 0))
+ if (_stp_func_print(addr, verbose, 0, NULL))
levels--;
stack++;
}
}
#endif
-static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels)
+static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels,
+ struct task_struct *tsk)
{
unsigned long context = (unsigned long)&REG_SP(regs) & ~(THREAD_SIZE - 1);
@@ -43,7 +44,7 @@ static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels)
/* cannot access stack. give up. */
return;
}
- _stp_func_print(addr, verbose, 1);
+ _stp_func_print(addr, verbose, 1, NULL);
if (unlikely(_stp_read_address(next_fp, (unsigned long *)fp, KERNEL_DS))) {
/* cannot access stack. give up. */
return;
@@ -60,19 +61,23 @@ static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels)
struct unwind_frame_info info;
arch_unw_init_frame_info(&info, regs);
- while (levels && !arch_unw_user_mode(&info)) {
- int ret = unwind(&info);
+ while (levels && (tsk || !arch_unw_user_mode(&info))) {
+ int ret = unwind(&info, tsk);
dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(&info), UNW_SP(&info));
if (ret == 0) {
- _stp_func_print(UNW_PC(&info), verbose, 1);
+ _stp_func_print(UNW_PC(&info), verbose, 1, tsk);
levels--;
continue;
}
- /* If an error happened or we hit a kretprobe trampoline, use fallback backtrace */
- /* FIXME: is there a way to unwind across kretprobe trampolines? */
- if (ret < 0 || (ret > 0 && UNW_PC(&info) == _stp_kretprobe_trampoline))
+ /* If an error happened or we hit a kretprobe trampoline,
+ * use fallback backtrace, unless user task backtrace.
+ * FIXME: is there a way to unwind across kretprobe
+ * trampolines? */
+ if ((ret < 0
+ || (ret > 0 && UNW_PC(&info) == _stp_kretprobe_trampoline))
+ && ! (tsk || arch_unw_user_mode(&info)))
_stp_stack_print_fallback(UNW_SP(&info), verbose, levels);
- break;
+ return;
}
#else /* ! STP_USE_DWARF_UNWINDER */
_stp_stack_print_fallback((unsigned long)&REG_SP(regs), verbose, levels);
diff --git a/runtime/stack-ia64.c b/runtime/stack-ia64.c
index ca9d25a6..a04355fa 100644
--- a/runtime/stack-ia64.c
+++ b/runtime/stack-ia64.c
@@ -48,7 +48,8 @@ static void __stp_show_stack_addr(struct unw_frame_info *info, void *arg)
} while (unw_unwind(info) >= 0);
}
-static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels)
+static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels,
+ struct task_struct *tsk)
{
unsigned long *stack = (unsigned long *)&REG_SP(regs);
struct dump_para para;
diff --git a/runtime/stack-ppc64.c b/runtime/stack-ppc64.c
index 3dc38526..3267194e 100644
--- a/runtime/stack-ppc64.c
+++ b/runtime/stack-ppc64.c
@@ -7,7 +7,8 @@
* later version.
*/
-static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels)
+static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels,
+ struct task_struct *tsk)
{
unsigned long ip, newsp, lr = 0;
int count = 0;
diff --git a/runtime/stack-s390.c b/runtime/stack-s390.c
index c9654102..14e9b7d8 100644
--- a/runtime/stack-s390.c
+++ b/runtime/stack-s390.c
@@ -66,7 +66,8 @@ __stp_show_stack (unsigned long sp, unsigned long low,
}
static void __stp_stack_print (struct pt_regs *regs,
- int verbose, int levels)
+ int verbose, int levels,
+ struct task_struct *tsk)
{
unsigned long *_sp = (unsigned long *)&REG_SP(regs);
unsigned long sp = (unsigned long)_sp;
diff --git a/runtime/stack-x86_64.c b/runtime/stack-x86_64.c
index 03d88ef0..9afdf38a 100644
--- a/runtime/stack-x86_64.c
+++ b/runtime/stack-x86_64.c
@@ -19,7 +19,7 @@ static void _stp_stack_print_fallback(unsigned long stack, int verbose, int leve
/* cannot access stack. give up. */
return;
}
- if (_stp_func_print(addr, verbose, 0))
+ if (_stp_func_print(addr, verbose, 0, NULL))
levels--;
stack++;
}
@@ -27,26 +27,31 @@ static void _stp_stack_print_fallback(unsigned long stack, int verbose, int leve
#endif
-static void __stp_stack_print(struct pt_regs *regs, int verbose, int levels)
+static void __stp_stack_print(struct pt_regs *regs, int verbose, int levels,
+ struct task_struct *tsk)
{
#ifdef STP_USE_DWARF_UNWINDER
// FIXME: large stack allocation
struct unwind_frame_info info;
arch_unw_init_frame_info(&info, regs);
- while (levels && !arch_unw_user_mode(&info)) {
- int ret = unwind(&info);
+ while (levels && (tsk || !arch_unw_user_mode(&info))) {
+ int ret = unwind(&info, tsk);
dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(&info), UNW_SP(&info));
if (ret == 0) {
- _stp_func_print(UNW_PC(&info), verbose, 1);
+ _stp_func_print(UNW_PC(&info), verbose, 1, tsk);
levels--;
continue;
}
- /* If an error happened or we hit a kretprobe trampoline, use fallback backtrace */
- /* FIXME: is there a way to unwind across kretprobe trampolines? */
- if (ret < 0 || (ret > 0 && UNW_PC(&info) == _stp_kretprobe_trampoline))
+ /* If an error happened or we hit a kretprobe trampoline,
+ * use fallback backtrace, unless user task backtrace.
+ * FIXME: is there a way to unwind across kretprobe
+ * trampolines? */
+ if ((ret < 0
+ || (ret > 0 && UNW_PC(&info) == _stp_kretprobe_trampoline))
+ && ! (tsk || arch_unw_user_mode(&info)))
_stp_stack_print_fallback(UNW_SP(&info), verbose, levels);
- break;
+ return;
}
#else /* ! STP_USE_DWARF_UNWINDER */
_stp_stack_print_fallback(REG_SP(regs), verbose, levels);
diff --git a/runtime/stack.c b/runtime/stack.c
index f6b1cd08..042f44c7 100644
--- a/runtime/stack.c
+++ b/runtime/stack.c
@@ -1,6 +1,6 @@
/* -*- linux-c -*-
* Stack tracing functions
- * Copyright (C) 2005-2008 Red Hat Inc.
+ * Copyright (C) 2005-2009 Red Hat Inc.
* Copyright (C) 2005 Intel Corporation.
*
* This file is part of systemtap, and is free software. You can
@@ -77,7 +77,7 @@ static void print_stack_address(void *data, unsigned long addr, int reliable)
{
struct print_stack_data *sdata = data;
if (sdata->level++ < sdata->max_level)
- _stp_func_print(addr,sdata->verbose, 0);
+ _stp_func_print(addr, sdata->verbose, 0, NULL);
}
static const struct stacktrace_ops print_stack_ops = {
@@ -97,11 +97,17 @@ static void _stp_stack_print_fallback(unsigned long stack, int verbose, int leve
&print_data);
}
#endif
+
+// Without KPROBES very little works atm.
+// But this file is unconditionally imported, while these two functions are only
+// used through context-unwind.stp.
+#if defined (CONFIG_KPROBES)
+
/** Prints the stack backtrace
* @param regs A pointer to the struct pt_regs.
*/
-static void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe_instance *pi, int levels)
+static void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe_instance *pi, int levels, struct task_struct *tsk)
{
if (verbose) {
/* print the current address */
@@ -112,7 +118,10 @@ static void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe
_stp_symbol_print((unsigned long)_stp_ret_addr_r(pi));
} else {
_stp_print_char(' ');
- _stp_symbol_print(REG_IP(regs));
+ if (tsk)
+ _stp_usymbol_print(REG_IP(regs), tsk);
+ else
+ _stp_symbol_print(REG_IP(regs));
}
_stp_print_char('\n');
} else if (pi)
@@ -120,7 +129,7 @@ static void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe
else
_stp_printf("%p ", (int64_t) REG_IP(regs));
- __stp_stack_print(regs, verbose, levels);
+ __stp_stack_print(regs, verbose, levels, tsk);
}
/** Writes stack backtrace to a string
@@ -129,34 +138,19 @@ static void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe
* @param regs A pointer to the struct pt_regs.
* @returns void
*/
-static void _stp_stack_snprint(char *str, int size, struct pt_regs *regs, int verbose, struct kretprobe_instance *pi, int levels)
+static void _stp_stack_snprint(char *str, int size, struct pt_regs *regs, int verbose, struct kretprobe_instance *pi, int levels, struct task_struct *tsk)
{
/* To get a string, we use a simple trick. First flush the print buffer, */
/* then call _stp_stack_print, then copy the result into the output string */
/* and clear the print buffer. */
_stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id());
_stp_print_flush();
- _stp_stack_print(regs, verbose, pi, levels);
+ _stp_stack_print(regs, verbose, pi, levels, tsk);
strlcpy(str, pb->buf, size < (int)pb->len ? size : (int)pb->len);
pb->len = 0;
}
-/** Prints the user stack backtrace
- * @param str string
- * @returns Same string as was input with trace info appended,
- * @note Currently limited to a depth of two. Works from jprobes and kprobes.
- */
-#if 0
-static void _stp_ustack_print(char *str)
-{
- struct pt_regs *nregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)current->thread_info)) - 1;
- _stp_printf("%p : [user]\n", (int64_t) REG_IP(nregs));
- if (REG_SP(nregs))
- _stp_printf("%p : [user]\n", (int64_t) (*(unsigned long *)REG_SP(nregs)));
-}
-#endif /* 0 */
-
-/** @} */
+#endif /* CONFIG_KPROBES */
void _stp_stack_print_tsk(struct task_struct *tsk, int verbose, int levels)
{
diff --git a/runtime/staprun/common.c b/runtime/staprun/common.c
index fd16b4b8..c67ce340 100644
--- a/runtime/staprun/common.c
+++ b/runtime/staprun/common.c
@@ -27,6 +27,9 @@ int attach_mod;
int delete_mod;
int load_only;
int need_uprobes;
+int daemon_mode;
+off_t fsize_max;
+int fnum_max;
/* module variables */
char *modname = NULL;
@@ -35,9 +38,38 @@ char *modoptions[MAXMODOPTIONS];
int control_channel = -1; /* NB: fd==0 possible */
+static char path_buf[PATH_MAX];
+static char *get_abspath(char *path)
+{
+ int len;
+ if (path[0] == '/')
+ return path;
+
+ len = strlen(getcwd(path_buf, PATH_MAX));
+ if (len + 2 + strlen(path) >= PATH_MAX)
+ return NULL;
+ path_buf[len] = '/';
+ strcpy(&path_buf[len + 1], path);
+ return path_buf;
+}
+
+int stap_strfloctime(char *buf, size_t max, const char *fmt, time_t t)
+{
+ struct tm tm;
+ size_t ret;
+ if (buf == NULL || fmt == NULL || max <= 1)
+ return -EINVAL;
+ localtime_r(&t, &tm);
+ ret = strftime(buf, max, fmt, &tm);
+ if (ret == 0)
+ return -EINVAL;
+ return (int)ret;
+}
+
void parse_args(int argc, char **argv)
{
int c;
+ char *s;
/* Initialize option variables. */
verbose = 0;
@@ -49,8 +81,11 @@ void parse_args(int argc, char **argv)
delete_mod = 0;
load_only = 0;
need_uprobes = 0;
+ daemon_mode = 0;
+ fsize_max = 0;
+ fnum_max = 0;
- while ((c = getopt(argc, argv, "ALuvb:t:dc:o:x:")) != EOF) {
+ while ((c = getopt(argc, argv, "ALuvb:t:dc:o:x:S:D")) != EOF) {
switch (c) {
case 'u':
need_uprobes = 1;
@@ -85,11 +120,38 @@ void parse_args(int argc, char **argv)
case 'L':
load_only = 1;
break;
+ case 'D':
+ daemon_mode = 1;
+ break;
+ case 'S':
+ fsize_max = strtoul(optarg, &s, 10);
+ fsize_max <<= 20;
+ if (s[0] == ',')
+ fnum_max = (int)strtoul(&s[1], &s, 10);
+ if (s[0] != '\0') {
+ err("Invalid file size option '%s'.\n", optarg);
+ usage(argv[0]);
+ }
+ break;
default:
usage(argv[0]);
}
}
-
+ if (outfile_name) {
+ char tmp[PATH_MAX];
+ int ret;
+ outfile_name = get_abspath(outfile_name);
+ if (outfile_name == NULL) {
+ err("File name is too long.\n");
+ usage(argv[0]);
+ }
+ ret = stap_strfloctime(tmp, PATH_MAX - 18, /* = _cpuNNN.SSSSSSSSSS */
+ outfile_name, time(NULL));
+ if (ret < 0) {
+ err("Filename format is invalid or too long.\n");
+ usage(argv[0]);
+ }
+ }
if (attach_mod && load_only) {
err("You can't specify the '-A' and '-L' options together.\n");
usage(argv[0]);
@@ -118,18 +180,40 @@ void parse_args(int argc, char **argv)
err("You can't specify the '-c' and '-x' options together.\n");
usage(argv[0]);
}
+
+ if (daemon_mode && load_only) {
+ err("You can't specify the '-D' and '-L' options together.\n");
+ usage(argv[0]);
+ }
+ if (daemon_mode && delete_mod) {
+ err("You can't specify the '-D' and '-d' options together.\n");
+ usage(argv[0]);
+ }
+ if (daemon_mode && target_cmd) {
+ err("You can't specify the '-D' and '-c' options together.\n");
+ usage(argv[0]);
+ }
+ if (daemon_mode && outfile_name == NULL) {
+ err("You have to specify output FILE with '-D' option.\n");
+ usage(argv[0]);
+ }
+ if (outfile_name == NULL && fsize_max != 0) {
+ err("You have to specify output FILE with '-S' option.\n");
+ usage(argv[0]);
+ }
}
void usage(char *prog)
{
- err("\n%s [-v] [-c cmd ] [-x pid] [-u user]\n"
- "\t[-A|-L] [-b bufsize] [-o FILE] MODULE [module-options]\n", prog);
+ err("\n%s [-v] [-c cmd ] [-x pid] [-u user] [-A|-L|-d]\n"
+ "\t[-b bufsize] [-o FILE [-D] [-S size[,N]]] MODULE [module-options]\n", prog);
err("-v Increase verbosity.\n");
err("-c cmd Command \'cmd\' will be run and staprun will\n");
err(" exit when it does. The '_stp_target' variable\n");
err(" will contain the pid for the command.\n");
err("-x pid Sets the '_stp_target' variable to pid.\n");
- err("-o FILE Send output to FILE.\n");
+ err("-o FILE Send output to FILE. This supports strftime(3)\n");
+ err(" formats for FILE.\n");
err("-b buffer size The systemtap module specifies a buffer size.\n");
err(" Setting one here will override that value. The\n");
err(" value should be an integer between 1 and 4095 \n");
@@ -140,6 +224,14 @@ void usage(char *prog)
err("-d Delete a module. Only detached or unused modules\n");
err(" the user has permission to access will be deleted. Use \"*\"\n");
err(" (quoted) to delete all unused modules.\n");
+ err("-D Run in background. This requires '-o' option.\n");
+ err("-S size[,N] Switches output file to next file when the size\n");
+ err(" of file reaches the specified size. The value\n");
+ err(" should be an integer greater than 1 which is\n");
+ err(" assumed to be the maximum file size in MB.\n");
+ err(" When the number of output files reaches N, it\n");
+ err(" switches to the first output file. You can omit\n");
+ err(" the second argument.\n");
err("MODULE can be either a module name or a module path. If a\n");
err("module name is used, it is looked for in the following\n");
err("directory: /lib/modules/`uname -r`/systemtap\n");
@@ -344,3 +436,24 @@ int send_request(int type, void *data, int len)
if (rc < 0) return rc;
return (rc != len+4);
}
+
+#include <stdarg.h>
+
+static int use_syslog = 0;
+
+void eprintf(const char *fmt, ...)
+{
+ va_list va;
+ va_start(va, fmt);
+ if (use_syslog)
+ vsyslog(LOG_ERR, fmt, va);
+ else
+ vfprintf(stderr, fmt, va);
+ va_end(va);
+}
+
+void switch_syslog(const char *name)
+{
+ openlog(name, LOG_PID, LOG_DAEMON);
+ use_syslog = 1;
+}
diff --git a/runtime/staprun/mainloop.c b/runtime/staprun/mainloop.c
index 0745f611..7125a7bb 100644
--- a/runtime/staprun/mainloop.c
+++ b/runtime/staprun/mainloop.c
@@ -7,7 +7,7 @@
* Public License (GPL); either version 2, or (at your option) any
* later version.
*
- * Copyright (C) 2005-2008 Red Hat Inc.
+ * Copyright (C) 2005-2009 Red Hat Inc.
*/
#include "staprun.h"
@@ -318,6 +318,41 @@ int init_stapio(void)
if (target_cmd)
start_cmd();
+ /* Run in background */
+ if (daemon_mode) {
+ pid_t pid;
+ int ret;
+ dbug(2, "daemonizing stapio\n");
+
+ /* daemonize */
+ ret = daemon(0, 1); /* don't close stdout at this time. */
+ if (ret) {
+ err("Failed to daemonize stapio\n");
+ return -1;
+ }
+
+ /* change error messages to syslog. */
+ switch_syslog("stapio");
+
+ /* show new pid */
+ pid = getpid();
+ fprintf(stdout, "%d\n", pid);
+ fflush(stdout);
+
+ /* redirect all outputs to /dev/null */
+ ret = open("/dev/null", O_RDWR);
+ if (ret < 0) {
+ err("Failed to open /dev/null\n");
+ return -1;
+ }
+ close(STDIN_FILENO);
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
+ dup2(ret, STDOUT_FILENO);
+ dup2(ret, STDERR_FILENO);
+ close(ret);
+ }
+
return 0;
}
@@ -360,10 +395,10 @@ void cleanup_and_exit(int detach)
#define BUG9788_WORKAROUND
#ifndef BUG9788_WORKAROUND
dbug(2, "removing %s\n", modname);
- if (execlp(staprun, basename (staprun), "-d", modname, NULL) < 0) {
+ if (execlp(staprun, basename (staprun), "-d", modpath, NULL) < 0) {
if (errno == ENOEXEC) {
char *cmd;
- if (asprintf(&cmd, "%s -d '%s'", staprun, modname) > 0)
+ if (asprintf(&cmd, "%s -d '%s'", staprun, modpath) > 0)
execl("/bin/sh", "sh", "-c", cmd, NULL);
free(cmd);
}
@@ -392,10 +427,10 @@ void cleanup_and_exit(int detach)
if (pid == 0) { /* child process */
/* Run the command. */
- if (execlp(staprun, basename (staprun), "-d", modname, NULL) < 0) {
+ if (execlp(staprun, basename (staprun), "-d", modpath, NULL) < 0) {
if (errno == ENOEXEC) {
char *cmd;
- if (asprintf(&cmd, "%s -d '%s'", staprun, modname) > 0)
+ if (asprintf(&cmd, "%s -d '%s'", staprun, modpath) > 0)
execl("/bin/sh", "sh", "-c", cmd, NULL);
free(cmd);
}
@@ -454,21 +489,14 @@ int stp_main_loop(void)
switch (type) {
#if STP_TRANSPORT_VERSION == 1
case STP_REALTIME_DATA:
- {
- ssize_t bw = write(out_fd[0], data, nb);
- if (bw >= 0 && bw != nb) {
- nb = nb - bw;
- bw = write(out_fd[0], data, nb);
- }
- if (bw != nb) {
- _perr("write error (nb=%ld)", (long)nb);
- cleanup_and_exit(0);
- }
- break;
+ if (write_realtime_data(data, nb)) {
+ _perr("write error (nb=%ld)", (long)nb);
+ cleanup_and_exit(0);
}
+ break;
#endif
case STP_OOB_DATA:
- fputs((char *)data, stderr);
+ eprintf("%s", (char *)data);
break;
case STP_EXIT:
{
@@ -477,6 +505,14 @@ int stp_main_loop(void)
cleanup_and_exit(0);
break;
}
+ case STP_REQUEST_EXIT:
+ {
+ /* module asks us to start exiting, so send STP_EXIT */
+ dbug(2, "got STP_REQUEST_EXIT\n");
+ int32_t rc, btype = STP_EXIT;
+ rc = write(control_channel, &btype, sizeof(btype));
+ break;
+ }
case STP_START:
{
struct _stp_msg_start *t = (struct _stp_msg_start *)data;
diff --git a/runtime/staprun/modverify.c b/runtime/staprun/modverify.c
new file mode 100644
index 00000000..b50a69f4
--- /dev/null
+++ b/runtime/staprun/modverify.c
@@ -0,0 +1,391 @@
+/*
+ This program verifies the given file using the given signature, the named
+ certificate and public key in the given certificate database.
+
+ Copyright (C) 2009 Red Hat Inc.
+
+ This file is part of systemtap, and is free software. You can
+ redistribute it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+
+#include <nspr.h>
+#include <nss.h>
+#include <pk11pub.h>
+#include <cryptohi.h>
+#include <cert.h>
+#include <certt.h>
+
+#include "nsscommon.h"
+#include "modverify.h"
+
+#include <sys/stat.h>
+
+/* Function: int check_cert_db_permissions (const char *cert_db_path);
+ *
+ * Check that the given certificate directory and its contents have
+ * the correct permissions.
+ *
+ * Returns 0 if there is an error, 1 otherwise.
+ */
+static int
+check_db_file_permissions (const char *cert_db_file) {
+ struct stat info;
+ int rc;
+
+ rc = stat (cert_db_file, & info);
+ if (rc)
+ {
+ fprintf (stderr, "Could not obtain information on certificate database file %s.\n",
+ cert_db_file);
+ perror ("");
+ return 0;
+ }
+
+ rc = 1; /* ok */
+
+ /* The owner of the file must be root. */
+ if (info.st_uid != 0)
+ {
+ fprintf (stderr, "Certificate database file %s must be owned by root.\n",
+ cert_db_file);
+ rc = 0;
+ }
+
+ /* Check the access permissions of the file. */
+ if ((info.st_mode & S_IRUSR) == 0)
+ fprintf (stderr, "Certificate database file %s should be readable by the owner.\n", cert_db_file);
+ if ((info.st_mode & S_IWUSR) == 0)
+ fprintf (stderr, "Certificate database file %s should be writeable by the owner.\n", cert_db_file);
+ if ((info.st_mode & S_IXUSR) != 0)
+ {
+ fprintf (stderr, "Certificate database file %s must not be executable by the owner.\n", cert_db_file);
+ rc = 0;
+ }
+ if ((info.st_mode & S_IRGRP) == 0)
+ {
+ fprintf (stderr, "Certificate database file %s should be readable by the group.\n", cert_db_file);
+ rc = 0;
+ }
+ if ((info.st_mode & S_IWGRP) != 0)
+ {
+ fprintf (stderr, "Certificate database file %s must not be writable by the group.\n", cert_db_file);
+ rc = 0;
+ }
+ if ((info.st_mode & S_IXGRP) != 0)
+ {
+ fprintf (stderr, "Certificate database file %s must not be executable by the group.\n", cert_db_file);
+ rc = 0;
+ }
+ if ((info.st_mode & S_IROTH) == 0)
+ {
+ fprintf (stderr, "Certificate database file %s should be readable by others.\n", cert_db_file);
+ rc = 0;
+ }
+ if ((info.st_mode & S_IWOTH) != 0)
+ {
+ fprintf (stderr, "Certificate database file %s must not be writable by others.\n", cert_db_file);
+ rc = 0;
+ }
+ if ((info.st_mode & S_IXOTH) != 0)
+ {
+ fprintf (stderr, "Certificate database file %s must not be executable by others.\n", cert_db_file);
+ rc = 0;
+ }
+
+ return rc;
+}
+
+/* Function: int check_cert_db_permissions (const char *cert_db_path);
+ *
+ * Check that the given certificate directory and its contents have
+ * the correct permissions.
+ *
+ * Returns 0 if there is an error, 1 otherwise.
+ */
+static int
+check_cert_db_permissions (const char *cert_db_path) {
+ struct stat info;
+ char *fileName;
+ int rc;
+
+ rc = stat (cert_db_path, & info);
+ if (rc)
+ {
+ fprintf (stderr, "Could not obtain information on certificate database directory %s.\n",
+ cert_db_path);
+ perror ("");
+ return 0;
+ }
+
+ rc = 1; /* ok */
+
+ /* The owner of the database must be root. */
+ if (info.st_uid != 0)
+ {
+ fprintf (stderr, "Certificate database directory %s must be owned by root.\n", cert_db_path);
+ rc = 0;
+ }
+
+ /* Check the database directory access permissions */
+ if ((info.st_mode & S_IRUSR) == 0)
+ fprintf (stderr, "Certificate database %s should be readable by the owner.\n", cert_db_path);
+ if ((info.st_mode & S_IWUSR) == 0)
+ fprintf (stderr, "Certificate database %s should be writeable by the owner.\n", cert_db_path);
+ if ((info.st_mode & S_IXUSR) == 0)
+ fprintf (stderr, "Certificate database %s should be searchable by the owner.\n", cert_db_path);
+ if ((info.st_mode & S_IRGRP) == 0)
+ fprintf (stderr, "Certificate database %s should be readable by the group.\n", cert_db_path);
+ if ((info.st_mode & S_IWGRP) != 0)
+ {
+ fprintf (stderr, "Certificate database %s must not be writable by the group.\n", cert_db_path);
+ rc = 0;
+ }
+ if ((info.st_mode & S_IXGRP) == 0)
+ fprintf (stderr, "Certificate database %s should be searchable by the group.\n", cert_db_path);
+ if ((info.st_mode & S_IROTH) == 0)
+ fprintf (stderr, "Certificate database %s should be readable by others.\n", cert_db_path);
+ if ((info.st_mode & S_IWOTH) != 0)
+ {
+ fprintf (stderr, "Certificate database %s must not be writable by others.\n", cert_db_path);
+ rc = 0;
+ }
+ if ((info.st_mode & S_IXOTH) == 0)
+ fprintf (stderr, "Certificate database %s should be searchable by others.\n", cert_db_path);
+
+ /* Now check the permissions of the critical files. */
+ fileName = PORT_Alloc (strlen (cert_db_path) + 11);
+ if (! fileName)
+ {
+ fprintf (stderr, "Unable to allocate memory for certificate database file names\n");
+ return 0;
+ }
+
+ sprintf (fileName, "%s/cert8.db", cert_db_path);
+ rc &= check_db_file_permissions (fileName);
+ sprintf (fileName, "%s/key3.db", cert_db_path);
+ rc &= check_db_file_permissions (fileName);
+ sprintf (fileName, "%s/secmod.db", cert_db_path);
+ rc &= check_db_file_permissions (fileName);
+
+ PORT_Free (fileName);
+
+ if (rc == 0)
+ fprintf (stderr, "Unable to use certificate database %s due to errors.\n", cert_db_path);
+
+ return rc;
+}
+
+static int
+verify_it (const char *inputName, const char *signatureName, SECKEYPublicKey *pubKey)
+{
+ unsigned char buffer[4096];
+ PRFileInfo info;
+ PRStatus prStatus;
+ PRInt32 numBytes;
+ PRFileDesc *local_file_fd;
+ VFYContext *vfy;
+ SECItem signature;
+ SECStatus secStatus;
+
+ /* Get the size of the signature file. */
+ prStatus = PR_GetFileInfo (signatureName, &info);
+ if (prStatus != PR_SUCCESS || info.type != PR_FILE_FILE || info.size < 0)
+ {
+ fprintf (stderr, "Unable to obtain information on the signature file %s.\n", signatureName);
+ nssError ();
+ return MODULE_UNTRUSTED; /* Not signed */
+ }
+
+ /* Open the signature file. */
+ local_file_fd = PR_Open (signatureName, PR_RDONLY, 0);
+ if (local_file_fd == NULL)
+ {
+ fprintf (stderr, "Could not open the signature file %s\n.", signatureName);
+ nssError ();
+ return MODULE_CHECK_ERROR;
+ }
+
+ /* Allocate space to read the signature file. */
+ signature.data = PORT_Alloc (info.size);
+ if (! signature.data)
+ {
+ fprintf (stderr, "Unable to allocate memory for the signature in %s.\n", signatureName);
+ nssError ();
+ return MODULE_CHECK_ERROR;
+ }
+
+ /* Read the signature. */
+ numBytes = PR_Read (local_file_fd, signature.data, info.size);
+ if (numBytes == 0) /* EOF */
+ {
+ fprintf (stderr, "EOF reading signature file %s.\n", signatureName);
+ return MODULE_CHECK_ERROR;
+ }
+ if (numBytes < 0)
+ {
+ fprintf (stderr, "Error reading signature file %s.\n", signatureName);
+ nssError ();
+ return MODULE_CHECK_ERROR;
+ }
+ if (numBytes != info.size)
+ {
+ fprintf (stderr, "Incomplete data while reading signature file %s.\n", signatureName);
+ return MODULE_CHECK_ERROR;
+ }
+ signature.len = info.size;
+
+ /* Done with the signature file. */
+ PR_Close (local_file_fd);
+
+ /* Create a verification context. */
+ vfy = VFY_CreateContextDirect (pubKey, & signature, SEC_OID_PKCS1_RSA_ENCRYPTION,
+ SEC_OID_UNKNOWN, NULL, NULL);
+ if (! vfy)
+ {
+ /* The key does not match the signature. This is not an error. It just means
+ we are currently trying the wrong certificate/key. i.e. the module
+ remains untrusted for now. */
+ return MODULE_UNTRUSTED;
+ }
+
+ /* Begin the verification process. */
+ secStatus = VFY_Begin(vfy);
+ if (secStatus != SECSuccess)
+ {
+ fprintf (stderr, "Unable to initialize verification context while verifying %s using the signature in %s.\n",
+ inputName, signatureName);
+ nssError ();
+ return MODULE_CHECK_ERROR;
+ }
+
+ /* Now read the data and add it to the signature. */
+ local_file_fd = PR_Open (inputName, PR_RDONLY, 0);
+ if (local_file_fd == NULL)
+ {
+ fprintf (stderr, "Could not open module file %s.\n", inputName);
+ nssError ();
+ return MODULE_CHECK_ERROR;
+ }
+
+ for (;;)
+ {
+ numBytes = PR_Read (local_file_fd, buffer, sizeof (buffer));
+ if (numBytes == 0)
+ break; /* EOF */
+
+ if (numBytes < 0)
+ {
+ fprintf (stderr, "Error reading module file %s.\n", inputName);
+ nssError ();
+ return MODULE_CHECK_ERROR;
+ }
+
+ /* Add the data to the signature. */
+ secStatus = VFY_Update (vfy, buffer, numBytes);
+ if (secStatus != SECSuccess)
+ {
+ fprintf (stderr, "Error while verifying module file %s.\n", inputName);
+ nssError ();
+ return MODULE_CHECK_ERROR;
+ }
+ }
+
+ PR_Close(local_file_fd);
+
+ /* Complete the verification. */
+ secStatus = VFY_End (vfy);
+ if (secStatus != SECSuccess) {
+ fprintf (stderr, "Unable to verify signed module %s. It may have been altered since it was created.\n", inputName);
+ nssError ();
+ return MODULE_ALTERED;
+ }
+
+ return MODULE_OK;
+}
+
+int verify_module (const char *module_name, const char *signature_name)
+{
+ const char *dbdir = SYSCONFDIR "/systemtap/staprun";
+ SECKEYPublicKey *pubKey;
+ SECStatus secStatus;
+ CERTCertList *certList;
+ CERTCertListNode *certListNode;
+ CERTCertificate *cert;
+ PRStatus prStatus;
+ PRFileInfo info;
+ int rc = 0;
+
+ /* Look for the certificate database. If it's not there, it's not an error, it
+ just means that the module can't be verified. */
+ prStatus = PR_GetFileInfo (dbdir, &info);
+ if (prStatus != PR_SUCCESS || info.type != PR_FILE_DIRECTORY)
+ return MODULE_UNTRUSTED;
+
+ /* Verify the permissions of the certificate database and its files. */
+ if (! check_cert_db_permissions (dbdir))
+ return MODULE_UNTRUSTED;
+
+ /* Call the NSPR initialization routines. */
+ PR_Init (PR_SYSTEM_THREAD, PR_PRIORITY_NORMAL, 1);
+
+ /* Initialize NSS. */
+ secStatus = NSS_Init (dbdir);
+ if (secStatus != SECSuccess)
+ {
+ fprintf (stderr, "Unable to initialize nss library using the database in %s.\n",
+ dbdir);
+ nssError ();
+ return MODULE_CHECK_ERROR;
+ }
+
+ certList = PK11_ListCerts (PK11CertListAll, NULL);
+ if (certList == NULL)
+ {
+ fprintf (stderr, "Unable to find certificates in the certificate database in %s.\n",
+ dbdir);
+ nssError ();
+ return MODULE_UNTRUSTED;
+ }
+
+ /* We need to look at each certificate in the database. */
+ for (certListNode = CERT_LIST_HEAD (certList);
+ ! CERT_LIST_END (certListNode, certList);
+ certListNode = CERT_LIST_NEXT (certListNode))
+ {
+ cert = certListNode->cert;
+
+ pubKey = CERT_ExtractPublicKey (cert);
+ if (pubKey == NULL)
+ {
+ fprintf (stderr, "Unable to extract public key from the certificate with nickname %s from the certificate database in %s.\n",
+ cert->nickname, dbdir);
+ nssError ();
+ return MODULE_CHECK_ERROR;
+ }
+
+ /* Verify the file. */
+ rc = verify_it (module_name, signature_name, pubKey);
+ if (rc == MODULE_OK || rc == MODULE_ALTERED || rc == MODULE_CHECK_ERROR)
+ break; /* resolved or error */
+ }
+
+ /* Shutdown NSS and exit NSPR gracefully. */
+ nssCleanup ();
+
+ return rc;
+}
+
+/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
diff --git a/runtime/staprun/modverify.h b/runtime/staprun/modverify.h
new file mode 100644
index 00000000..49b90bfe
--- /dev/null
+++ b/runtime/staprun/modverify.h
@@ -0,0 +1,9 @@
+int verify_module (const char *module_name, const char *signature_name);
+
+/* return codes for verify_module. */
+#define MODULE_OK 1
+#define MODULE_UNTRUSTED 0
+#define MODULE_CHECK_ERROR -1
+#define MODULE_ALTERED -2
+
+/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
diff --git a/runtime/staprun/relay.c b/runtime/staprun/relay.c
index 19621933..b9796241 100644
--- a/runtime/staprun/relay.c
+++ b/runtime/staprun/relay.c
@@ -17,6 +17,9 @@ static pthread_t reader[NR_CPUS];
static int relay_fd[NR_CPUS];
static int bulkmode = 0;
static volatile int stop_threads = 0;
+static time_t *time_backlog[NR_CPUS];
+static int backlog_order=0;
+#define BACKLOG_MASK ((1 << backlog_order) - 1)
/*
* ppoll exists in glibc >= 2.4
@@ -44,6 +47,90 @@ static int ppoll(struct pollfd *fds, nfds_t nfds,
}
#endif
+int init_backlog(int cpu)
+{
+ int order = 0;
+ if (!fnum_max)
+ return 0;
+ while (fnum_max >> order) order++;
+ if (fnum_max == 1<<(order-1)) order--;
+ time_backlog[cpu] = (time_t *)calloc(1<<order, sizeof(time_t));
+ if (time_backlog[cpu] == NULL) {
+ _err("Memory allocation failed\n");
+ return -1;
+ }
+ backlog_order = order;
+ return 0;
+}
+
+void write_backlog(int cpu, int fnum, time_t t)
+{
+ time_backlog[cpu][fnum & BACKLOG_MASK] = t;
+}
+
+time_t read_backlog(int cpu, int fnum)
+{
+ return time_backlog[cpu][fnum & BACKLOG_MASK];
+}
+
+int make_outfile_name(char *buf, int max, int fnum, int cpu, time_t t)
+{
+ int len;
+ len = stap_strfloctime(buf, max, outfile_name, t);
+ if (len < 0) {
+ err("Invalid FILE name format\n");
+ return -1;
+ }
+ if (bulkmode) {
+ /* special case: for testing we sometimes want to write to /dev/null */
+ if (strcmp(outfile_name, "/dev/null") == 0) {
+ strcpy(buf, "/dev/null");
+ } else {
+ if (snprintf_chk(&buf[len], PATH_MAX - len,
+ "_cpu%d.%d", cpu, fnum))
+ return -1;
+ }
+ } else {
+ /* stream mode */
+ if (snprintf_chk(&buf[len], PATH_MAX - len, ".%d", fnum))
+ return -1;
+ }
+ return 0;
+}
+
+static int open_outfile(int fnum, int cpu, int remove_file)
+{
+ char buf[PATH_MAX];
+ time_t t;
+ if (!outfile_name) {
+ _err("-S is set without -o. Please file a bug report.\n");
+ return -1;
+ }
+
+ time(&t);
+ if (fnum_max) {
+ if (remove_file) {
+ /* remove oldest file */
+ if (make_outfile_name(buf, PATH_MAX, fnum - fnum_max,
+ cpu, read_backlog(cpu, fnum - fnum_max)) < 0)
+ return -1;
+ remove(buf); /* don't care */
+ }
+ write_backlog(cpu, fnum, t);
+ }
+
+ if (make_outfile_name(buf, PATH_MAX, fnum, cpu, t) < 0)
+ return -1;
+ out_fd[cpu] = open (buf, O_CREAT|O_TRUNC|O_WRONLY, 0666);
+ if (out_fd[cpu] < 0) {
+ perr("Couldn't open output file %s", buf);
+ return -1;
+ }
+ if (set_clexec(out_fd[cpu]) < 0)
+ return -1;
+ return 0;
+}
+
/**
* reader_thread - per-cpu channel buffer reader
*/
@@ -57,6 +144,9 @@ static void *reader_thread(void *data)
struct timespec tim = {.tv_sec=0, .tv_nsec=200000000}, *timeout = &tim;
sigset_t sigs;
struct sigaction sa;
+ off_t wsize = 0;
+ int fnum = 0;
+ int remove_file = 0;
sigemptyset(&sigs);
sigaddset(&sigs,SIGUSR2);
@@ -95,17 +185,37 @@ static void *reader_thread(void *data)
dbug(3, "cpu=%d poll=%d errno=%d\n", cpu, rc, errno);
if (errno != EINTR) {
_perr("poll error");
- return(NULL);
+ goto error_out;
}
}
while ((rc = read(relay_fd[cpu], buf, sizeof(buf))) > 0) {
+ wsize += rc;
+ /* Switching file */
+ if (fsize_max && wsize > fsize_max) {
+ close(out_fd[cpu]);
+ fnum++;
+ if (fnum_max && fnum == fnum_max)
+ remove_file = 1;
+ if (open_outfile(fnum, cpu, remove_file) < 0) {
+ perr("Couldn't open file for cpu %d, exiting.", cpu);
+ goto error_out;
+ }
+ wsize = rc;
+ }
if (write(out_fd[cpu], buf, rc) != rc) {
- perr("Couldn't write to output %d for cpu %d, exiting.", out_fd[cpu], cpu);
- return(NULL);
+ if (errno != EPIPE)
+ perr("Couldn't write to output %d for cpu %d, exiting.", out_fd[cpu], cpu);
+ goto error_out;
}
}
} while (!stop_threads);
- dbug(3, "exiting thread %d\n", cpu);
+ dbug(3, "exiting thread for cpu %d\n", cpu);
+ return(NULL);
+
+error_out:
+ /* Signal the main thread that we need to quit */
+ kill(getpid(), SIGTERM);
+ dbug(2, "exiting thread for cpu %d after error\n", cpu);
return(NULL);
}
@@ -116,7 +226,7 @@ static void *reader_thread(void *data)
*/
int init_relayfs(void)
{
- int i;
+ int i, len;
struct statfs st;
char rqbuf[128];
char buf[PATH_MAX], relay_filebase[PATH_MAX];
@@ -163,14 +273,29 @@ int init_relayfs(void)
return -1;
}
- if (bulkmode) {
+ if (fsize_max) {
+ /* switch file mode */
+ for (i = 0; i < ncpus; i++) {
+ if (init_backlog(i) < 0)
+ return -1;
+ if (open_outfile(0, i, 0) < 0)
+ return -1;
+ }
+ } else if (bulkmode) {
for (i = 0; i < ncpus; i++) {
if (outfile_name) {
/* special case: for testing we sometimes want to write to /dev/null */
if (strcmp(outfile_name, "/dev/null") == 0) {
strcpy(buf, "/dev/null");
} else {
- if (sprintf_chk(buf, "%s_%d", outfile_name, i))
+ len = stap_strfloctime(buf, PATH_MAX,
+ outfile_name, time(NULL));
+ if (len < 0) {
+ err("Invalid FILE name format\n");
+ return -1;
+ }
+ if (snprintf_chk(&buf[len],
+ PATH_MAX - len, "_%d", i))
return -1;
}
} else {
@@ -189,9 +314,15 @@ int init_relayfs(void)
} else {
/* stream mode */
if (outfile_name) {
- out_fd[0] = open (outfile_name, O_CREAT|O_TRUNC|O_WRONLY, 0666);
+ len = stap_strfloctime(buf, PATH_MAX,
+ outfile_name, time(NULL));
+ if (len < 0) {
+ err("Invalid FILE name format\n");
+ return -1;
+ }
+ out_fd[0] = open (buf, O_CREAT|O_TRUNC|O_WRONLY, 0666);
if (out_fd[0] < 0) {
- perr("Couldn't open output file %s", outfile_name);
+ perr("Couldn't open output file %s", buf);
return -1;
}
if (set_clexec(out_fd[i]) < 0)
diff --git a/runtime/staprun/relay_old.c b/runtime/staprun/relay_old.c
index bd746f19..33d2daf3 100644
--- a/runtime/staprun/relay_old.c
+++ b/runtime/staprun/relay_old.c
@@ -23,6 +23,14 @@ static int bulkmode = 0;
unsigned subbuf_size = 0;
unsigned n_subbufs = 0;
+struct switchfile_ctrl_block {
+ off_t wsize;
+ int fnum;
+ int rmfile;
+};
+
+static struct switchfile_ctrl_block global_scb = {0, 0, 0};
+
/* per-cpu buffer info */
static struct buf_status
{
@@ -70,6 +78,41 @@ void close_oldrelayfs(int detach)
close_relayfs_files(i);
}
+static int open_oldoutfile(int fnum, int cpu, int remove_file)
+{
+ char buf[PATH_MAX];
+ time_t t;
+ if (outfile_name) {
+ time(&t);
+ if (fnum_max) {
+ if (remove_file) {
+ /* remove oldest file */
+ if (make_outfile_name(buf, PATH_MAX, fnum - fnum_max,
+ cpu, read_backlog(cpu, fnum - fnum_max)) < 0)
+ return -1;
+ remove(buf); /* don't care */
+ }
+ write_backlog(cpu, fnum, t);
+ }
+ if (make_outfile_name(buf, PATH_MAX, fnum, cpu, t) < 0)
+ return -1;
+ } else if (bulkmode) {
+ if (sprintf_chk(buf, "stpd_cpu%d.%d", cpu, fnum))
+ return -1;
+ } else { /* stream mode */
+ out_fd[cpu] = STDOUT_FILENO;
+ return 0;
+ }
+
+ out_fd[cpu] = open (buf, O_CREAT|O_TRUNC|O_WRONLY, 0666);
+ if (out_fd[cpu] < 0) {
+ perr("Couldn't open output file %s", buf);
+ return -1;
+ }
+ if (set_clexec(out_fd[cpu]) < 0)
+ return -1;
+ return 0;
+}
/**
* open_relayfs_files - open and mmap buffer and open output file.
* Returns -1 on unexpected failure, 0 if file not found, 1 on success.
@@ -104,18 +147,31 @@ static int open_relayfs_files(int cpu, const char *relay_filebase, const char *p
return -1;
}
+ if (fsize_max) {
+ if (init_backlog(cpu) < 0)
+ goto err2;
+ if (open_oldoutfile(0, cpu, 0) < 0)
+ goto err2;
+ goto opened;
+ }
if (outfile_name) {
/* special case: for testing we sometimes want to
* write to /dev/null */
if (strcmp(outfile_name, "/dev/null") == 0) {
strcpy(tmp, "/dev/null");
} else {
- if (sprintf_chk(tmp, "%s_%d", outfile_name, cpu))
- goto err1;
+ int len;
+ len = stap_strfloctime(tmp, PATH_MAX, outfile_name, time(NULL));
+ if (len < 0) {
+ err("Invalid FILE name format\n");
+ goto err2;
+ }
+ if (snprintf_chk(&tmp[len], PATH_MAX - len, "_%d", cpu))
+ goto err2;
}
} else {
if (sprintf_chk(tmp, "stpd_cpu%d", cpu))
- goto err1;
+ goto err2;
}
if((percpu_tmpfile[cpu] = fopen(tmp, "w+")) == NULL) {
@@ -126,6 +182,7 @@ static int open_relayfs_files(int cpu, const char *relay_filebase, const char *p
perr("Couldn't open output file %s", tmp);
goto err2;
}
+opened:
total_bufsize = subbuf_size * n_subbufs;
relay_buffer[cpu] = mmap(NULL, total_bufsize, PROT_READ,
@@ -155,7 +212,8 @@ err1:
/**
* process_subbufs - write ready subbufs to disk
*/
-static int process_subbufs(struct _stp_buf_info *info)
+static int process_subbufs(struct _stp_buf_info *info,
+ struct switchfile_ctrl_block *scb)
{
unsigned subbufs_ready, start_subbuf, end_subbuf, subbuf_idx, i;
int len, cpu = info->cpu;
@@ -173,10 +231,23 @@ static int process_subbufs(struct _stp_buf_info *info)
padding = *((unsigned *)subbuf_ptr);
subbuf_ptr += sizeof(padding);
len = (subbuf_size - sizeof(padding)) - padding;
+ scb->wsize += len;
+ if (fsize_max && scb->wsize > fsize_max) {
+ fclose(percpu_tmpfile[cpu]);
+ scb->fnum ++;
+ if (fnum_max && scb->fnum == fnum_max)
+ scb->rmfile = 1;
+ if (open_oldoutfile(scb->fnum, cpu, scb->rmfile) < 0) {
+ perr("Couldn't open file for cpu %d, exiting.", cpu);
+ return -1;
+ }
+ scb->wsize = len;
+ }
if (len) {
if (fwrite_unlocked (subbuf_ptr, len, 1, percpu_tmpfile[cpu]) != 1) {
- _perr("Couldn't write to output file for cpu %d, exiting:", cpu);
- exit(1);
+ if (errno != EPIPE)
+ _perr("Couldn't write to output file for cpu %d, exiting:", cpu);
+ return -1;
}
}
subbufs_consumed++;
@@ -196,6 +267,7 @@ static void *reader_thread(void *data)
struct _stp_consumed_info consumed_info;
unsigned subbufs_consumed;
cpu_set_t cpu_mask;
+ struct switchfile_ctrl_block scb = {0, 0, 0};
CPU_ZERO(&cpu_mask);
CPU_SET(cpu, &cpu_mask);
@@ -210,14 +282,17 @@ static void *reader_thread(void *data)
if (rc < 0) {
if (errno != EINTR) {
_perr("poll error");
- exit(1);
+ break;
}
err("WARNING: poll warning: %s\n", strerror(errno));
rc = 0;
}
rc = read(proc_fd[cpu], &status[cpu].info, sizeof(struct _stp_buf_info));
- subbufs_consumed = process_subbufs(&status[cpu].info);
+ rc = process_subbufs(&status[cpu].info, &scb);
+ if (rc < 0)
+ break;
+ subbufs_consumed = rc;
if (subbufs_consumed) {
if (subbufs_consumed > status[cpu].max_backlog)
status[cpu].max_backlog = subbufs_consumed;
@@ -230,6 +305,37 @@ static void *reader_thread(void *data)
if (status[cpu].info.flushing)
pthread_exit(NULL);
} while (1);
+
+ /* Signal the main thread that we need to quit */
+ kill(getpid(), SIGTERM);
+ pthread_exit(NULL);
+}
+
+/**
+ * write_realtime_data - write realtime data packet to disk
+ */
+int write_realtime_data(void *data, ssize_t nb)
+{
+ ssize_t bw;
+ global_scb.wsize += nb;
+ if (fsize_max && global_scb.wsize > fsize_max) {
+ close(out_fd[0]);
+ global_scb.fnum++;
+ if (fnum_max && global_scb.fnum == fnum_max)
+ global_scb.rmfile = 1;
+ if (open_oldoutfile(global_scb.fnum, 0,
+ global_scb.rmfile) < 0) {
+ perr("Couldn't open file, exiting.");
+ return -1;
+ }
+ global_scb.wsize = nb;
+ }
+ bw = write(out_fd[0], data, nb);
+ if (bw >= 0 && bw != nb) {
+ nb = nb - bw;
+ bw = write(out_fd[0], data, nb);
+ }
+ return bw != nb;
}
/**
@@ -249,10 +355,22 @@ int init_oldrelayfs(void)
bulkmode = 1;
if (!bulkmode) {
+ int len;
+ char tmp[PATH_MAX];
+ if (fsize_max) {
+ if (init_backlog(0))
+ return -1;
+ return open_oldoutfile(0, 0, 0);
+ }
if (outfile_name) {
- out_fd[0] = open (outfile_name, O_CREAT|O_TRUNC|O_WRONLY, 0666);
+ len = stap_strfloctime(tmp, PATH_MAX, outfile_name, time(NULL));
+ if (len < 0) {
+ err("Invalid FILE name format\n");
+ return -1;
+ }
+ out_fd[0] = open (tmp, O_CREAT|O_TRUNC|O_WRONLY, 0666);
if (out_fd[0] < 0 || set_clexec(out_fd[0]) < 0) {
- perr("Couldn't open output file '%s'", outfile_name);
+ perr("Couldn't open output file '%s'", tmp);
return -1;
}
} else
diff --git a/runtime/staprun/staprun.h b/runtime/staprun/staprun.h
index f49cc7db..bd6402e4 100644
--- a/runtime/staprun/staprun.h
+++ b/runtime/staprun/staprun.h
@@ -9,7 +9,7 @@
*
* Copyright (C) 2005-2008 Red Hat Inc.
*/
-
+#define _FILE_OFFSET_BITS 64
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
@@ -33,31 +33,35 @@
#include <sys/wait.h>
#include <sys/statfs.h>
#include <linux/version.h>
+#include <syslog.h>
/* Include config.h to pick up dependency for --prefix usage. */
#include "config.h"
-#define dbug(level, args...) {if (verbose>=level) {fprintf(stderr,"%s:%s:%d ",__name__,__FUNCTION__, __LINE__); fprintf(stderr,args);}}
+extern void eprintf(const char *fmt, ...);
+extern void switch_syslog(const char *name);
+
+#define dbug(level, args...) do {if (verbose>=level) {eprintf("%s:%s:%d ",__name__,__FUNCTION__, __LINE__); eprintf(args);}} while (0)
extern char *__name__;
/* print to stderr */
-#define err(args...) fprintf(stderr,args)
+#define err(args...) eprintf(args)
/* better perror() */
#define perr(args...) do { \
int _errno = errno; \
- fputs("ERROR: ", stderr); \
- fprintf(stderr, args); \
- fprintf(stderr, ": %s\n", strerror(_errno)); \
+ eprintf("ERROR: "); \
+ eprintf(args); \
+ eprintf(": %s\n", strerror(_errno)); \
} while (0)
/* Error messages. Use these for serious errors, not informational messages to stderr. */
-#define _err(args...) do {fprintf(stderr,"%s:%s:%d: ERROR: ",__name__, __FUNCTION__, __LINE__); fprintf(stderr,args);} while(0)
+#define _err(args...) do {eprintf("%s:%s:%d: ERROR: ",__name__, __FUNCTION__, __LINE__); eprintf(args);} while(0)
#define _perr(args...) do { \
int _errno = errno; \
_err(args); \
- fprintf(stderr, ": %s\n", strerror(_errno)); \
+ eprintf(": %s\n", strerror(_errno)); \
} while (0)
#define overflow_error() _err("Internal buffer overflow. Please file a bug report.\n")
@@ -114,7 +118,12 @@ int init_relayfs(void);
void close_relayfs(void);
int init_oldrelayfs(void);
void close_oldrelayfs(int);
+int write_realtime_data(void *data, ssize_t nb);
void setup_signals(void);
+int make_outfile_name(char *buf, int max, int fnum, int cpu, time_t t);
+int init_backlog(int cpu);
+void write_backlog(int cpu, int fnum, time_t t);
+time_t read_backlog(int cpu, int fnum);
/* staprun_funcs.c */
void setup_staprun_signals(void);
const char *moderror(int err);
@@ -126,6 +135,7 @@ void start_symbol_thread(void);
void stop_symbol_thread(void);
/* common.c functions */
+int stap_strfloctime(char *buf, size_t max, const char *fmt, time_t t);
void parse_args(int argc, char **argv);
void usage(char *prog);
void parse_modpath(const char *);
@@ -154,6 +164,9 @@ extern int attach_mod;
extern int delete_mod;
extern int load_only;
extern int need_uprobes;
+extern int daemon_mode;
+extern off_t fsize_max;
+extern int fnum_max;
/* getopt variables */
extern char *optarg;
diff --git a/runtime/staprun/staprun_funcs.c b/runtime/staprun/staprun_funcs.c
index 5e7fa102..8da7e7e8 100644
--- a/runtime/staprun/staprun_funcs.c
+++ b/runtime/staprun/staprun_funcs.c
@@ -7,14 +7,20 @@
* Public License (GPL); either version 2, or (at your option) any
* later version.
*
- * Copyright (C) 2007-2008 Red Hat Inc.
+ * Copyright (C) 2007-2009 Red Hat Inc.
*/
+#include "config.h"
#include "staprun.h"
+#if HAVE_NSS
+#include "modverify.h"
+#endif
+
#include <sys/mount.h>
#include <sys/utsname.h>
#include <grp.h>
#include <pwd.h>
+#include <assert.h>
extern long init_module(void *, unsigned long, const char *);
@@ -199,6 +205,44 @@ int mountfs(void)
return 0;
}
+#if HAVE_NSS
+/*
+ * Modules which have been signed using a certificate and private key
+ * corresponding to a certificate and public key in the database in
+ * the '$sysconfdir/systemtap/staprun' directory may be loaded by
+ * anyone.
+ *
+ * Returns: -1 on errors, 0 on failure, 1 on success.
+ */
+static int
+check_signature(void)
+{
+ char module_realpath[PATH_MAX];
+ char signature_realpath[PATH_MAX];
+ int rc;
+
+ dbug(2, "checking signature for %s\n", modpath);
+
+ /* Use realpath() to canonicalize the module path. */
+ if (realpath(modpath, module_realpath) == NULL) {
+ perr("Unable to canonicalize module path \"%s\"", modpath);
+ return MODULE_CHECK_ERROR;
+ }
+
+ /* Now add the .sgn suffix to get the signature file name. */
+ if (strlen (module_realpath) > PATH_MAX - 4) {
+ err("Path \"%s\" is too long.", modpath);
+ return MODULE_CHECK_ERROR;
+ }
+ sprintf (signature_realpath, "%s.sgn", module_realpath);
+
+ rc = verify_module (module_realpath, signature_realpath);
+
+ dbug(2, "verify_module returns %d\n", rc);
+
+ return rc;
+}
+#endif /* HAVE_NSS */
/*
* Members of the 'stapusr' group can only use "blessed" modules -
@@ -269,6 +313,15 @@ check_path(void)
return -1;
}
+ /* Overwrite the modpath with the canonicalized one, to defeat
+ a possible race between path checking below and somewhat later
+ module loading. */
+ modpath = strdup (module_realpath);
+ if (modpath == NULL) {
+ _perr("allocating memory failed");
+ exit (1);
+ }
+
/* To make sure the user can't specify something like
* /lib/modules/`uname -r`/systemtapmod.ko, put a '/' on the
* end of staplib_dir_realpath. */
@@ -293,22 +346,23 @@ check_path(void)
}
/*
- * Check the user's permissions. Is he allowed to run staprun (or is
- * he limited to "blessed" modules)?
+ * Check the user's group membership. Is he allowed to run staprun (or is
*
- * Returns: -1 on errors, 0 on failure, 1 on success.
+ * o members of stapdev can do anything
+ * o members of stapusr can load modules from /lib/modules/KVER/systemtap
+ *
+ * Returns: -2 if neither group exists
+ * -1 for other errors
+ * 0 on failure
+ * 1 on success
*/
-int check_permissions(void)
+static int
+check_groups (void)
{
gid_t gid, gidlist[NGROUPS_MAX];
gid_t stapdev_gid, stapusr_gid;
int i, ngids;
struct group *stgr;
- int path_check = 0;
-
- /* If we're root, we can do anything. */
- if (getuid() == 0)
- return 1;
/* Lookup the gid for group "stapdev" */
errno = 0;
@@ -332,55 +386,42 @@ int check_permissions(void)
else
stapusr_gid = stgr->gr_gid;
- /* If neither group was found, just return an error. */
- if (stapdev_gid == (gid_t)-1 && stapusr_gid == (gid_t)-1) {
- err("ERROR: You are trying to run stap as a normal user.\n"
- "You should either be root, or be part of either "
- "group \"stapdev\" or group \"stapusr\".\n"
- "Your system doesn't seem to have either group.\n"
- "For more information, please consult the \"SAFETY AND SECURITY\" section of the \"stap(1)\" manpage\n");
- return -1;
- }
+ /* If neither group was found, then return -2. */
+ if (stapdev_gid == (gid_t)-1 && stapusr_gid == (gid_t)-1)
+ return -2;
/* According to the getgroups() man page, getgroups() may not
* return the effective gid, so try to match it first. */
gid = getegid();
if (gid == stapdev_gid)
return 1;
- else if (gid == stapusr_gid)
- path_check = 1;
- /* Get the list of the user's groups. */
- ngids = getgroups(NGROUPS_MAX, gidlist);
- if (ngids < 0) {
- perr("Unable to retrieve group list");
- return -1;
- }
-
- for (i = 0; i < ngids; i++) {
- /* If the user is a member of 'stapdev', then we're
- * done, since he can use staprun without any
- * restrictions. */
- if (gidlist[i] == stapdev_gid)
- return 1;
-
- /* If the user is a member of 'stapusr', then we'll
- * need to check the module path. However, we'll keep
- * checking groups since it is possible the user is a
- * member of both groups and we haven't seen the
- * 'stapdev' group yet. */
- if (gidlist[i] == stapusr_gid)
- path_check = 1;
- }
+ if (gid != stapusr_gid) {
+ /* Get the list of the user's groups. */
+ ngids = getgroups(NGROUPS_MAX, gidlist);
+ if (ngids < 0) {
+ perr("Unable to retrieve group list");
+ return -1;
+ }
- /* If path_check is 0, then the user isn't a member of either
- * group. Error out. */
- if (path_check == 0) {
- err("ERROR: You are trying to run stap as a normal user.\n"
- "You must be a member of either group \"stapdev\" or group \"stapusr\".\n"
- "Please contact your system administrator to get yourself membership to either of those groups.\n"
- "For more information, please consult the \"SAFETY AND SECURITY\" section of the \"stap(1)\" manpage.\n");
- return 0;
+ for (i = 0; i < ngids; i++) {
+ /* If the user is a member of 'stapdev', then we're
+ * done, since he can use staprun without any
+ * restrictions. */
+ if (gidlist[i] == stapdev_gid)
+ return 1;
+
+ /* If the user is a member of 'stapusr', then we'll
+ * need to check the module path. However, we'll keep
+ * checking groups since it is possible the user is a
+ * member of both groups and we haven't seen the
+ * 'stapdev' group yet. */
+ if (gidlist[i] == stapusr_gid)
+ gid = stapusr_gid;
+ }
+ /* Not a member of stapusr? */
+ if (gid != stapusr_gid)
+ return 0;
}
/* At this point the user is only a member of the 'stapusr'
@@ -389,3 +430,50 @@ int check_permissions(void)
* is in that directory. */
return check_path();
}
+
+/*
+ * Check the user's permissions. Is he allowed to run staprun (or is
+ * he limited to "blessed" modules)?
+ *
+ * There are several levels of possible permission:
+ *
+ * 1) root can do anything
+ * 2) members of stapdev can do anything
+ * 3) members of stapusr can load modules from /lib/modules/KVER/systemtap
+ *
+ * It is only an error if all 3 levels of checking fail
+ *
+ * Returns: -1 on errors, 0 on failure, 1 on success.
+ */
+int check_permissions(void)
+{
+ int check_groups_rc;
+#if HAVE_NSS
+ int check_signature_rc = 0;
+
+ /* Attempt to verify the module against its signature. Return failure
+ if the module has been tampered with (altered). */
+ check_signature_rc = check_signature ();
+ if (check_signature_rc == MODULE_ALTERED)
+ return 0;
+#endif
+
+ /* If we're root, we can do anything. */
+ if (getuid() == 0)
+ return 1;
+
+ /* Check permissions for group membership. */
+ check_groups_rc = check_groups ();
+ if (check_groups_rc == 1)
+ return 1;
+
+ err("ERROR: You are trying to run stap as a normal user.\n"
+ "You should either be root, or be part of either "
+ "group \"stapdev\" or group \"stapusr\".\n");
+ if (check_groups_rc == -2) {
+ err("Your system doesn't seem to have either group.\n");
+ check_groups_rc = -1;
+ }
+
+ return check_groups_rc;
+}
diff --git a/runtime/sym.c b/runtime/sym.c
index 31700326..386005b2 100644
--- a/runtime/sym.c
+++ b/runtime/sym.c
@@ -14,6 +14,7 @@
#include "sym.h"
#include "string.c"
+#include "task_finder_vma.c"
/** @file sym.c
* @addtogroup sym Symbolic Functions
@@ -21,6 +22,62 @@
* @{
*/
+static void _stp_sym_init(void)
+{
+ static int initialized = 0;
+ if (! initialized) {
+ __stp_tf_vma_initialize();
+ initialized = 1;
+ }
+}
+
+/* Callback that needs to be registered (in tapsets.cxx for
+ emit_module_init) for every user task path or pid for which we
+ might need symbols or unwind info. */
+static int _stp_tf_mmap_cb(struct stap_task_finder_target *tgt,
+ struct task_struct *tsk,
+ char *path,
+ unsigned long addr,
+ unsigned long length,
+ unsigned long offset,
+ unsigned long vm_flags)
+{
+ int i;
+ struct _stp_module *module = NULL;
+
+#ifdef DEBUG_TASK_FINDER_VMA
+ _stp_dbug(__FUNCTION__, __LINE__,
+ "mmap_cb: tsk %d:%d path %s, addr 0x%08lx, length 0x%08lx, offset 0x%lx, flags 0x%lx\n",
+ tsk->pid, tsk->tgid, path, addr, length, offset, vm_flags);
+#endif
+ if (path != NULL) {
+ for (i = 0; i < _stp_num_modules; i++) {
+ if (strcmp(path, _stp_modules[i]->path) == 0)
+ {
+#ifdef DEBUG_TASK_FINDER_VMA
+ _stp_dbug(__FUNCTION__, __LINE__,
+ "vm_cb: matched path %s to module\n",
+ path);
+#endif
+ module = _stp_modules[i];
+ break;
+ }
+ }
+ }
+ stap_add_vma_map_info(tsk->group_leader, addr, addr + length, offset,
+ module);
+ return 0;
+}
+
+static int _stp_tf_munmap_cb(struct stap_task_finder_target *tgt,
+ struct task_struct *tsk,
+ unsigned long addr,
+ unsigned long length)
+{
+ stap_remove_vma_map_info(tsk->group_leader, addr, addr + length, 0);
+ return 0;
+}
+
/* XXX: this needs to be address-space-specific. */
static unsigned long _stp_module_relocate(const char *module, const char *section, unsigned long offset)
{
@@ -72,35 +129,54 @@ static unsigned long _stp_module_relocate(const char *module, const char *sectio
return 0;
}
-
-/* Return module owner and fills in closest section of the address
- if found, return NULL otherwise.
+/* Return module owner and, if sec != NULL, fills in closest section
+ of the address if found, return NULL otherwise.
XXX: needs to be address-space-specific. */
static struct _stp_module *_stp_mod_sec_lookup(unsigned long addr,
+ struct task_struct *task,
struct _stp_section **sec)
{
- struct _stp_module *m = NULL;
+ void *user = NULL;
unsigned midx = 0;
- unsigned long closest_section_offset = ~0;
+
+ // Try vma matching first if task given.
+ if (task)
+ {
+ unsigned long vm_start = 0;
+ if (stap_find_vma_map_info(task->group_leader, addr,
+ &vm_start, NULL,
+ NULL, &user) == 0)
+ if (user != NULL)
+ {
+ struct _stp_module *m = (struct _stp_module *)user;
+ if (sec)
+ *sec = &m->sections[0]; // XXX check actual section and relocate
+ dbug_sym(1, "found section %s in module %s at 0x%lx\n",
+ m->sections[0].name, m->name, vm_start);
+ if (strcmp(".dynamic", m->sections[0].name) == 0)
+ m->sections[0].addr = vm_start; // cheat...
+ return m;
+ }
+ }
+
for (midx = 0; midx < _stp_num_modules; midx++)
{
unsigned secidx;
for (secidx = 0; secidx < _stp_modules[midx]->num_sections; secidx++)
{
- unsigned long this_section_addr;
- unsigned long this_section_offset;
- this_section_addr = _stp_modules[midx]->sections[secidx].addr;
- if (addr < this_section_addr) continue;
- this_section_offset = addr - this_section_addr;
- if (this_section_offset < closest_section_offset)
- {
- closest_section_offset = this_section_offset;
- m = _stp_modules[midx];
- *sec = & m->sections[secidx];
+ unsigned long sec_addr;
+ unsigned long sec_size;
+ sec_addr = _stp_modules[midx]->sections[secidx].addr;
+ sec_size = _stp_modules[midx]->sections[secidx].size;
+ if (addr >= sec_addr && addr < sec_addr + sec_size)
+ {
+ if (sec)
+ *sec = & _stp_modules[midx]->sections[secidx];
+ return _stp_modules[midx];
}
}
}
- return m;
+ return NULL;
}
@@ -109,14 +185,15 @@ static const char *_stp_kallsyms_lookup(unsigned long addr, unsigned long *symbo
unsigned long *offset,
const char **modname,
/* char ** secname? */
- char *namebuf)
+ char *namebuf,
+ struct task_struct *task)
{
struct _stp_module *m = NULL;
struct _stp_section *sec = NULL;
struct _stp_symbol *s = NULL;
unsigned end, begin = 0;
- m = _stp_mod_sec_lookup(addr, &sec);
+ m = _stp_mod_sec_lookup(addr, task, &sec);
if (unlikely (m == NULL || sec == NULL))
return NULL;
@@ -195,34 +272,29 @@ static int _stp_module_check(void)
dwfl_module_build_id was not intended to return the end address. */
notes_addr -= m->build_id_len;
- if (notes_addr > base_addr) {
- for (j = 0; j < m->build_id_len; j++)
- {
- unsigned char theory, practice;
- theory = m->build_id_bits [j];
- practice = ((unsigned char*) notes_addr) [j];
- /* XXX: consider using kread() instead of above. */
- if (theory != practice)
- {
- #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
- _stp_error ("%s: inconsistent %s build-id byte #%d "
- "(0x%x [actual] vs. 0x%x [debuginfo])\n",
- THIS_MODULE->name, m->name, j,
- practice, theory);
- return 1;
- #else
- /* This branch is a surrogate for
- kernels affected by Fedora bug
- #465873. */
- printk(KERN_WARNING
- "%s: inconsistent %s build-id byte #%d "
- "(0x%x [actual] vs. 0x%x [debuginfo])\n",
- THIS_MODULE->name, m->name, j,
- practice, theory);
- break; /* Note just the first mismatch. */
- #endif
- }
- }
+ if (notes_addr <= base_addr) /* shouldn't happen */
+ continue;
+ if (memcmp(m->build_id_bits, (unsigned char*) notes_addr, m->build_id_len)) {
+ const char *basename;
+
+ basename = strrchr(m->path, '/');
+ if (basename)
+ basename++;
+ else
+ basename = m->path;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
+ _stp_error ("Build-id mismatch: \"%s\" %.*M"
+ " vs. \"%s\" %.*M\n",
+ m->name, m->build_id_len, notes_addr,
+ basename, m->build_id_len, m->build_id_bits);
+ return 1;
+#else
+ /* This branch is a surrogate for kernels
+ * affected by Fedora bug #465873. */
+ printk(KERN_WARNING
+ "Build-id mismatch: \"%s\" vs. \"%s\"\n",
+ m->name, basename);
+#endif
}
} /* end checking */
} /* end loop */
@@ -241,7 +313,33 @@ static void _stp_symbol_print(unsigned long address)
const char *name;
unsigned long offset, size;
- name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL);
+ name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL, NULL);
+
+ _stp_printf("%p", (int64_t) address);
+
+ if (name) {
+ if (modname && *modname)
+ _stp_printf(" : %s+%#lx/%#lx [%s]", name, offset, size, modname);
+ else
+ _stp_printf(" : %s+%#lx/%#lx", name, offset, size);
+ }
+}
+
+/** Print an user space address from a specific task symbolically.
+ * @param address The address to lookup.
+ * @param task The address to lookup.
+ * @note Symbolic lookups should not normally be done within
+ * a probe because it is too time-consuming. Use at module exit time.
+ */
+
+static void _stp_usymbol_print(unsigned long address, struct task_struct *task)
+{
+ const char *modname;
+ const char *name;
+ unsigned long offset, size;
+
+ name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL,
+ task);
_stp_printf("%p", (int64_t) address);
@@ -254,7 +352,8 @@ static void _stp_symbol_print(unsigned long address)
}
/* Like _stp_symbol_print, except only print if the address is a valid function address */
-static int _stp_func_print(unsigned long address, int verbose, int exact)
+static int _stp_func_print(unsigned long address, int verbose, int exact,
+ struct task_struct *task)
{
const char *modname;
const char *name;
@@ -266,7 +365,7 @@ static int _stp_func_print(unsigned long address, int verbose, int exact)
else
exstr = " (inexact)";
- name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL);
+ name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL, task);
if (name) {
if (verbose) {
@@ -282,16 +381,29 @@ static int _stp_func_print(unsigned long address, int verbose, int exact)
return 0;
}
-static void _stp_symbol_snprint(char *str, size_t len, unsigned long address)
+/** Puts symbolic information of an address in a string.
+ * @param src The string to fill in.
+ * @param len The length of the given src string.
+ * @param address The address to lookup.
+ * @param add_mod Whether to include module name information if found.
+ */
+
+static void _stp_symbol_snprint(char *str, size_t len, unsigned long address,
+ struct task_struct *task, int add_mod)
{
const char *modname;
const char *name;
unsigned long offset, size;
- name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL);
- if (name)
- strlcpy(str, name, len);
- else
+ name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL,
+ task);
+ if (name) {
+ if (add_mod && modname && *modname)
+ _stp_snprintf(str, len, "%s %s+%#lx/%#lx",
+ name, modname, offset, size);
+ else
+ strlcpy(str, name, len);
+ } else
_stp_snprintf(str, len, "%p", (int64_t) address);
}
diff --git a/runtime/sym.h b/runtime/sym.h
index e642cab4..80c334fb 100644
--- a/runtime/sym.h
+++ b/runtime/sym.h
@@ -18,6 +18,7 @@ struct _stp_symbol {
struct _stp_section {
const char *name;
unsigned long addr; /* XXX: belongs in per-address-space tables */
+ unsigned long size; /* length of the address space module covers. */
struct _stp_symbol *symbols; /* ordered by address */
unsigned num_symbols;
};
@@ -25,6 +26,7 @@ struct _stp_section {
struct _stp_module {
const char* name;
+ const char* path; /* canonical path used for runtime matching. */
struct _stp_section *sections;
unsigned num_sections;
diff --git a/runtime/syscall.h b/runtime/syscall.h
index ae451070..38b523e1 100644
--- a/runtime/syscall.h
+++ b/runtime/syscall.h
@@ -1,5 +1,6 @@
-/* syscall defines and inlines
- * Copyright (C) 2008 Red Hat Inc.
+/*
+ * syscall defines and inlines
+ * Copyright (C) 2008-2009 Red Hat Inc.
*
* This file is part of systemtap, and is free software. You can
* redistribute it and/or modify it under the terms of the GNU General
@@ -89,9 +90,17 @@
#error "Unimplemented architecture"
#endif
+#ifdef STAPCONF_ASM_SYSCALL_H
+
+/* If the system has asm/syscall.h, use defines from it. */
+#include <asm/syscall.h>
+
+#else /* !STAPCONF_ASM_SYSCALL_H */
+
+/* If the system doesn't have asm/syscall.h, use our defines. */
#if defined(__i386__) || defined(__x86_64__)
-static inline unsigned long
-__stp_user_syscall_nr(struct pt_regs *regs)
+static inline long
+syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
#if defined(STAPCONF_X86_UNIREGS)
return regs->orig_ax;
@@ -104,37 +113,45 @@ __stp_user_syscall_nr(struct pt_regs *regs)
#endif
#if defined(__powerpc__)
-static inline unsigned long
-__stp_user_syscall_nr(struct pt_regs *regs)
+static inline long
+syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
return regs->gpr[0];
}
#endif
#if defined(__ia64__)
-static inline unsigned long
-__stp_user_syscall_nr(struct pt_regs *regs)
+static inline long
+syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
- return regs->r15;
+ if ((long)regs->cr_ifs < 0) /* Not a syscall */
+ return -1;
+
+#ifdef CONFIG_IA32_SUPPORT
+ if (IS_IA32_PROCESS(regs))
+ return regs->r1;
+#endif
+
+ return regs->r15;
}
#endif
#if defined(__s390__) || defined(__s390x__)
-static inline unsigned long
-__stp_user_syscall_nr(struct pt_regs *regs)
+static inline long
+syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
- // might need to be 'orig_gpr2'
+ // might need to be 'orig_gpr2'
return regs->gprs[2];
}
#endif
#if defined(__i386__) || defined(__x86_64__)
-static inline long *
-__stp_user_syscall_return_value(struct task_struct *task, struct pt_regs *regs)
+static inline long
+syscall_get_return_value(struct task_struct *task, struct pt_regs *regs)
{
#ifdef CONFIG_IA32_EMULATION
// This code works, but isn't what we need. Since
-// __stp_user_syscall_arg() doesn't sign-extend, a value passed in as
+// syscall_get_syscall_arg() doesn't sign-extend, a value passed in as
// an argument and then returned won't compare correctly anymore. So,
// for now, disable this code.
# if 0
@@ -145,158 +162,332 @@ __stp_user_syscall_return_value(struct task_struct *task, struct pt_regs *regs)
# endif
#endif
#if defined(STAPCONF_X86_UNIREGS)
- return &regs->ax;
+ return regs->ax;
#elif defined(__x86_64__)
- return &regs->rax;
+ return regs->rax;
#elif defined (__i386__)
- return &regs->eax;
+ return regs->eax;
#endif
}
#endif
#if defined(__powerpc__)
-static inline long *
-__stp_user_syscall_return_value(struct task_struct *task, struct pt_regs *regs)
+static inline long
+syscall_get_return_value(struct task_struct *task, struct pt_regs *regs)
{
- return &regs->gpr[3];
+ return regs->gpr[3];
}
#endif
#if defined(__ia64__)
-static inline long *
-__stp_user_syscall_return_value(struct task_struct *task, struct pt_regs *regs)
+static inline long
+syscall_get_return_value(struct task_struct *task, struct pt_regs *regs)
{
- return &regs->r8;
+ return regs->r8;
}
#endif
#if defined(__s390__) || defined(__s390x__)
-static inline long *
-__stp_user_syscall_return_value(struct task_struct *task, struct pt_regs *regs)
+static inline long
+syscall_get_return_value(struct task_struct *task, struct pt_regs *regs)
{
- return &regs->gprs[2];
+ return regs->gprs[2];
}
#endif
#if defined(__i386__) || defined(__x86_64__)
-static inline long *
-__stp_user_syscall_arg(struct task_struct *task, struct pt_regs *regs,
- unsigned int n)
+static inline void
+syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
+ unsigned int i, unsigned int n, unsigned long *args)
{
-#if defined(__i386__)
- if (n > 5) {
- _stp_error("syscall arg > 5");
- return NULL;
+ if (i + n > 6) {
+ _stp_error("invalid syscall arg request");
+ return;
}
+#if defined(__i386__)
#if defined(STAPCONF_X86_UNIREGS)
- return &regs->bx + n;
+ memcpy(args, &regs->bx + i, n * sizeof(args[0]));
#else
- return &regs->ebx + n;
+ memcpy(args, &regs->ebx + i, n * sizeof(args[0]));
#endif
#elif defined(__x86_64__)
#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(task, TIF_IA32))
- switch (n) {
+ if (test_tsk_thread_flag(task, TIF_IA32)) {
+ switch (i) {
#if defined(STAPCONF_X86_UNIREGS)
- case 0: return &regs->bx;
- case 1: return &regs->cx;
- case 2: return &regs->dx;
- case 3: return &regs->si;
- case 4: return &regs->di;
- case 5: return &regs->bp;
+ case 0:
+ if (!n--) break;
+ *args++ = regs->bx;
+ case 1:
+ if (!n--) break;
+ *args++ = regs->cx;
+ case 2:
+ if (!n--) break;
+ *args++ = regs->dx;
+ case 3:
+ if (!n--) break;
+ *args++ = regs->si;
+ case 4:
+ if (!n--) break;
+ *args++ = regs->di;
+ case 5:
+ if (!n--) break;
+ *args++ = regs->bp;
#else
- case 0: return &regs->rbx;
- case 1: return &regs->rcx;
- case 2: return &regs->rdx;
- case 3: return &regs->rsi;
- case 4: return &regs->rdi;
- case 5: return &regs->rbp;
+ case 0:
+ if (!n--) break;
+ *args++ = regs->rbx;
+ case 1:
+ if (!n--) break;
+ *args++ = regs->rcx;
+ case 2:
+ if (!n--) break;
+ *args++ = regs->rdx;
+ case 3:
+ if (!n--) break;
+ *args++ = regs->rsi;
+ case 4:
+ if (!n--) break;
+ *args++ = regs->rdi;
+ case 5:
+ if (!n--) break;
+ *args++ = regs->rbp;
#endif
- default:
- _stp_error("syscall arg > 5");
- return NULL;
}
+ return;
+ }
#endif /* CONFIG_IA32_EMULATION */
- switch (n) {
+ switch (i) {
#if defined(STAPCONF_X86_UNIREGS)
- case 0: return &regs->di;
- case 1: return &regs->si;
- case 2: return &regs->dx;
- case 3: return &regs->r10;
- case 4: return &regs->r8;
- case 5: return &regs->r9;
+ case 0:
+ if (!n--) break;
+ *args++ = regs->di;
+ case 1:
+ if (!n--) break;
+ *args++ = regs->si;
+ case 2:
+ if (!n--) break;
+ *args++ = regs->dx;
+ case 3:
+ if (!n--) break;
+ *args++ = regs->r10;
+ case 4:
+ if (!n--) break;
+ *args++ = regs->r8;
+ case 5:
+ if (!n--) break;
+ *args++ = regs->r9;
#else
- case 0: return &regs->rdi;
- case 1: return &regs->rsi;
- case 2: return &regs->rdx;
- case 3: return &regs->r10;
- case 4: return &regs->r8;
- case 5: return &regs->r9;
+ case 0:
+ if (!n--) break;
+ *args++ = regs->rdi;
+ case 1:
+ if (!n--) break;
+ *args++ = regs->rsi;
+ case 2:
+ if (!n--) break;
+ *args++ = regs->rdx;
+ case 3:
+ if (!n--) break;
+ *args++ = regs->r10;
+ case 4:
+ if (!n--) break;
+ *args++ = regs->r8;
+ case 5:
+ if (!n--) break;
+ *args++ = regs->r9;
#endif
- default:
- _stp_error("syscall arg > 5");
- return NULL;
}
#endif /* CONFIG_X86_32 */
+ return;
}
#endif
#if defined(__powerpc__)
-static inline long *
-__stp_user_syscall_arg(struct task_struct *task, struct pt_regs *regs,
- unsigned int n)
+static inline void
+syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
+ unsigned int i, unsigned int n, unsigned long *args)
{
- switch (n) {
- case 0: return &regs->gpr[3];
- case 1: return &regs->gpr[4];
- case 2: return &regs->gpr[5];
- case 3: return &regs->gpr[6];
- case 4: return &regs->gpr[7];
- case 5: return &regs->gpr[8];
- default:
- _stp_error("syscall arg > 5");
- return NULL;
+ if (i + n > 6) {
+ _stp_error("invalid syscall arg request");
+ return;
}
+#ifdef CONFIG_PPC64
+ if (test_tsk_thread_flag(task, TIF_32BIT)) {
+ /*
+ * Zero-extend 32-bit argument values. The high bits are
+ * garbage ignored by the actual syscall dispatch.
+ */
+ while (n-- > 0)
+ args[n] = (u32) regs->gpr[3 + i + n];
+ return;
+ }
+#endif
+ memcpy(args, &regs->gpr[3 + i], n * sizeof(args[0]));
}
#endif
#if defined(__ia64__)
-#define __stp_user_syscall_arg(task, regs, n) \
- ____stp_user_syscall_arg(task, regs, n, &c->unwaddr)
-static inline long *
-____stp_user_syscall_arg(struct task_struct *task, struct pt_regs *regs,
- unsigned int n, unsigned long **cache)
+/* Return TRUE if PT was created due to kernel-entry via a system-call. */
+
+static inline int
+in_syscall (struct pt_regs *pt)
+{
+ return (long) pt->cr_ifs >= 0;
+}
+
+struct syscall_get_set_args {
+ unsigned int i;
+ unsigned int n;
+ unsigned long *args;
+ struct pt_regs *regs;
+ int rw;
+};
+
+#define CFM_SOF(cfm) ((cfm) & 0x7f) /* Size of frame */
+#define CFM_SOL(cfm) (((cfm) >> 7) & 0x7f) /* Size of locals */
+#define CFM_OUT(cfm) (CFM_SOF(cfm) - CFM_SOL(cfm)) /* Size of outputs */
+
+static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
+{
+ struct syscall_get_set_args *args = data;
+ struct pt_regs *pt = args->regs;
+ unsigned long *krbs, cfm, ndirty;
+ int i, count;
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
+ cfm = pt->cr_ifs;
+ krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8;
+ ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
+
+ count = 0;
+ if (in_syscall(pt))
+ /* args->i + args->n must be less equal than nr outputs */
+ count = min_t(int, args->n, CFM_OUT(cfm) - args->i);
+
+ for (i = 0; i < count; i++) {
+ /* Skips dirties and locals */
+ if (args->rw)
+ *ia64_rse_skip_regs(krbs,
+ ndirty + CFM_SOL(cfm) + args->i + i) =
+ args->args[i];
+ else
+ args->args[i] = *ia64_rse_skip_regs(krbs,
+ ndirty + CFM_SOL(cfm) + args->i + i);
+ }
+
+ if (!args->rw) {
+ while (i < args->n) {
+ args->args[i] = 0;
+ i++;
+ }
+ }
+}
+
+void ia64_syscall_get_set_arguments(struct task_struct *task,
+ struct pt_regs *regs, unsigned int i, unsigned int n,
+ unsigned long *args, int rw)
{
- if (n > 5) {
- _stp_error("syscall arg > 5");
- return NULL;
+ struct syscall_get_set_args data = {
+ .i = i,
+ .n = n,
+ .args = args,
+ .regs = regs,
+ .rw = rw,
+ };
+
+ if (task == current)
+ unw_init_running(syscall_get_set_args_cb, &data);
+ else {
+ struct unw_frame_info ufi;
+ memset(&ufi, 0, sizeof(ufi));
+ unw_init_from_blocked_task(&ufi, task);
+ syscall_get_set_args_cb(&ufi, &data);
}
- return __ia64_fetch_register(n + 32, regs, cache);
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned int i, unsigned int n,
+ unsigned long *args)
+{
+ BUG_ON(i + n > 6);
+
+#ifdef CONFIG_IA32_SUPPORT
+ if (IS_IA32_PROCESS(regs)) {
+ switch (i + n) {
+ case 6:
+ if (!n--) break;
+ *args++ = regs->r13;
+ case 5:
+ if (!n--) break;
+ *args++ = regs->r15;
+ case 4:
+ if (!n--) break;
+ *args++ = regs->r14;
+ case 3:
+ if (!n--) break;
+ *args++ = regs->r10;
+ case 2:
+ if (!n--) break;
+ *args++ = regs->r9;
+ case 1:
+ if (!n--) break;
+ *args++ = regs->r11;
+ case 0:
+ if (!n--) break;
+ default:
+ BUG();
+ break;
+ }
+
+ return;
+ }
+#endif
+ ia64_syscall_get_set_arguments(task, regs, i, n, args, 0);
}
#endif
#if defined(__s390__) || defined(__s390x__)
-static inline long *
-__stp_user_syscall_arg(struct task_struct *task, struct pt_regs *regs,
- unsigned int n)
+static inline void
+syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
+ unsigned int i, unsigned int n, unsigned long *args)
{
- /* If we were returning a value, we could check for TIF_31BIT
- * here and cast the value with '(u32)' to make sure it got
- * down to 32bits. But, since we're returning an address,
- * there isn't much we can do. */
- switch (n) {
- case 0: return &regs->orig_gpr2;
- case 1: return &regs->gprs[3];
- case 2: return &regs->gprs[4];
- case 3: return &regs->gprs[5];
- case 4: return &regs->gprs[6];
- case 5: return &regs->args[0];
- default:
- _stp_error("syscall arg > 5");
- return NULL;
+ unsigned long mask = -1UL;
+
+ if (i + n > 6) {
+ _stp_error("invalid syscall arg request");
+ return;
+ }
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(task, TIF_31BIT))
+ mask = 0xffffffff;
+#endif
+ switch (i) {
+ case 0:
+ if (!n--) break;
+ *args++ = regs->orig_gpr2 & mask;
+ case 1:
+ if (!n--) break;
+ *args++ = regs->gprs[3] & mask;
+ case 2:
+ if (!n--) break;
+ *args++ = regs->gprs[4] & mask;
+ case 3:
+ if (!n--) break;
+ *args++ = regs->gprs[5] & mask;
+ case 4:
+ if (!n--) break;
+ *args++ = regs->gprs[6] & mask;
+ case 5:
+ if (!n--) break;
+ *args++ = regs->args[0] & mask;
}
}
#endif
+#endif /* !STAPCONF_ASM_SYSCALL_H */
#endif /* _SYSCALL_H_ */
diff --git a/runtime/task_finder.c b/runtime/task_finder.c
index 9db713c3..f5e059ca 100644
--- a/runtime/task_finder.c
+++ b/runtime/task_finder.c
@@ -2,17 +2,24 @@
#define TASK_FINDER_C
#if ! defined(CONFIG_UTRACE)
-#error "Need CONFIG_UTRACE!"
-#endif
+/* Dummy definitions for use in sym.c */
+struct stap_task_finder_target { };
+#else
#include <linux/utrace.h>
+
+/* PR9974: Adapt to struct renaming. */
+#ifdef UTRACE_API_VERSION
+#define utrace_attached_engine utrace_engine
+#endif
+
#include <linux/list.h>
#include <linux/binfmts.h>
#include <linux/mount.h>
#include "syscall.h"
#include "utrace_compatibility.h"
-#include "task_finder_vma.c"
+#include "task_finder_map.c"
static LIST_HEAD(__stp_task_finder_list);
@@ -33,51 +40,49 @@ static atomic_t __stp_attach_count = ATOMIC_INIT (0);
#define debug_task_finder_attach() (atomic_inc(&__stp_attach_count))
#define debug_task_finder_detach() (atomic_dec(&__stp_attach_count))
+#ifdef DEBUG_TASK_FINDER_PRINTK
+#define debug_task_finder_report() (printk(KERN_ERR \
+ "%s:%d attach count: %d, inuse count: %d\n", \
+ __FUNCTION__, __LINE__, \
+ atomic_read(&__stp_attach_count), \
+ atomic_read(&__stp_inuse_count)))
+#else
#define debug_task_finder_report() (_stp_dbug(__FUNCTION__, __LINE__, \
"attach count: %d, inuse count: %d\n", \
atomic_read(&__stp_attach_count), \
atomic_read(&__stp_inuse_count)))
+#endif /* !DEBUG_TASK_FINDER_PRINTK */
#else
#define debug_task_finder_attach() /* empty */
#define debug_task_finder_detach() /* empty */
#define debug_task_finder_report() /* empty */
-#endif
+#endif /* !DEBUG_TASK_FINDER */
typedef int (*stap_task_finder_callback)(struct stap_task_finder_target *tgt,
struct task_struct *tsk,
int register_p,
int process_p);
-typedef int (*stap_task_finder_vm_callback)(struct stap_task_finder_target *tgt,
- struct task_struct *tsk,
- int map_p, char *vm_path,
- unsigned long vm_start,
- unsigned long vm_end,
- unsigned long vm_pgoff);
-
-#ifdef DEBUG_TASK_FINDER_VMA
-static int __stp_tf_vm_cb(struct stap_task_finder_target *tgt,
- struct task_struct *tsk,
- int map_p, char *vm_path,
- unsigned long vm_start,
- unsigned long vm_end,
- unsigned long vm_pgoff)
-{
- _stp_dbug(__FUNCTION__, __LINE__,
- "vm_cb: tsk %d:%d path %s, start 0x%08lx, end 0x%08lx, offset 0x%lx\n",
- tsk->pid, map_p, vm_path, vm_start, vm_end, vm_pgoff);
- if (map_p) {
- // FIXME: What should we do with vm_path? We can't save
- // the vm_path pointer itself, but we don't have any
- // storage space allocated to save it in...
- stap_add_vma_map_info(tsk, vm_start, vm_end, vm_pgoff);
- }
- else {
- stap_remove_vma_map_info(tsk, vm_start, vm_end, vm_pgoff);
- }
- return 0;
-}
-#endif
+typedef int
+(*stap_task_finder_mmap_callback)(struct stap_task_finder_target *tgt,
+ struct task_struct *tsk,
+ char *path,
+ unsigned long addr,
+ unsigned long length,
+ unsigned long offset,
+ unsigned long vm_flags);
+typedef int
+(*stap_task_finder_munmap_callback)(struct stap_task_finder_target *tgt,
+ struct task_struct *tsk,
+ unsigned long addr,
+ unsigned long length);
+
+typedef int
+(*stap_task_finder_mprotect_callback)(struct stap_task_finder_target *tgt,
+ struct task_struct *tsk,
+ unsigned long addr,
+ unsigned long length,
+ int prot);
struct stap_task_finder_target {
/* private: */
@@ -86,14 +91,18 @@ struct stap_task_finder_target {
struct list_head callback_list;
struct utrace_engine_ops ops;
unsigned engine_attached:1;
- unsigned vm_events:1;
+ unsigned mmap_events:1;
+ unsigned munmap_events:1;
+ unsigned mprotect_events:1;
size_t pathlen;
/* public: */
const char *pathname;
pid_t pid;
stap_task_finder_callback callback;
- stap_task_finder_vm_callback vm_callback;
+ stap_task_finder_mmap_callback mmap_callback;
+ stap_task_finder_munmap_callback munmap_callback;
+ stap_task_finder_mprotect_callback mprotect_callback;
};
#ifdef UTRACE_ORIG_VERSION
@@ -165,7 +174,9 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt)
// Make sure everything is initialized properly.
new_tgt->engine_attached = 0;
- new_tgt->vm_events = 0;
+ new_tgt->mmap_events = 0;
+ new_tgt->munmap_events = 0;
+ new_tgt->mprotect_events = 0;
memset(&new_tgt->ops, 0, sizeof(new_tgt->ops));
new_tgt->ops.report_death = &__stp_utrace_task_finder_target_death;
new_tgt->ops.report_quiesce = &__stp_utrace_task_finder_target_quiesce;
@@ -184,7 +195,7 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt)
&& strcmp(tgt->pathname, new_tgt->pathname) == 0)
/* pid-based target (a specific pid or all
* pids) */
- || (new_tgt->pathlen == 0
+ || (new_tgt->pathlen == 0 && tgt->pathlen == 0
&& tgt->pid == new_tgt->pid))) {
found_node = 1;
break;
@@ -202,9 +213,13 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt)
// Add this target to the callback list for this task.
list_add_tail(&new_tgt->callback_list, &tgt->callback_list_head);
- // If the new target has a vm_callback, remember this.
- if (new_tgt->vm_callback != NULL)
- tgt->vm_events = 1;
+ // If the new target has any m* callbacks, remember this.
+ if (new_tgt->mmap_callback != NULL)
+ tgt->mmap_events = 1;
+ if (new_tgt->munmap_callback != NULL)
+ tgt->munmap_events = 1;
+ if (new_tgt->mprotect_callback != NULL)
+ tgt->mprotect_events = 1;
return 0;
}
@@ -264,11 +279,15 @@ stap_utrace_detach(struct task_struct *tsk,
break;
case -ESRCH: /* REAP callback already begun */
case -EALREADY: /* DEATH callback already begun */
- rc = 0; /* ignore these errors*/
+ rc = 0; /* ignore these errors */
+ break;
+ case -EINPROGRESS:
+ debug_task_finder_detach();
+ rc = 0;
break;
default:
rc = -rc;
- _stp_error("utrace_detach returned error %d on pid %d",
+ _stp_error("utrace_control returned error %d on pid %d",
rc, tsk->pid);
break;
}
@@ -282,7 +301,6 @@ stap_utrace_detach_ops(struct utrace_engine_ops *ops)
{
struct task_struct *grp, *tsk;
struct utrace_attached_engine *engine;
- int rc = 0;
pid_t pid = 0;
// Notice we're not calling get_task_mm() in this loop. In
@@ -308,11 +326,12 @@ stap_utrace_detach_ops(struct utrace_engine_ops *ops)
continue;
#endif
- rc = stap_utrace_detach(tsk, ops);
- if (rc != 0)
- goto udo_err;
+ /* Notice we're purposefully ignoring errors from
+ * stap_utrace_detach(). Even if we got an error on
+ * this task, we need to keep detaching from other
+ * tasks. */
+ (void) stap_utrace_detach(tsk, ops);
} while_each_thread(grp, tsk);
-udo_err:
rcu_read_unlock();
debug_task_finder_report();
}
@@ -383,10 +402,10 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen)
/*
* __STP_TASK_BASE_EVENTS: base events for stap_task_finder_target's
- * without vm_callback's
+ * without map callback's
*
* __STP_TASK_VM_BASE_EVENTS: base events for
- * stap_task_finder_target's with vm_callback's
+ * stap_task_finder_target's with map callback's
*/
#define __STP_TASK_BASE_EVENTS (UTRACE_EVENT(DEATH))
@@ -403,8 +422,10 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen)
#define __STP_ATTACHED_TASK_EVENTS (__STP_TASK_BASE_EVENTS \
| UTRACE_EVENT(QUIESCE))
-#define __STP_ATTACHED_TASK_BASE_EVENTS(tgt) \
- ((tgt)->vm_events ? __STP_TASK_VM_BASE_EVENTS : __STP_TASK_BASE_EVENTS)
+#define __STP_ATTACHED_TASK_BASE_EVENTS(tgt) \
+ (((tgt)->mmap_events || (tgt)->munmap_events \
+ || (tgt)->mprotect_events) \
+ ? __STP_TASK_VM_BASE_EVENTS : __STP_TASK_BASE_EVENTS)
static int
__stp_utrace_attach(struct task_struct *tsk,
@@ -459,7 +480,7 @@ __stp_utrace_attach(struct task_struct *tsk,
* ref.
*/
rc = utrace_barrier(tsk, engine);
- if (rc != 0)
+ if (rc != -ESRCH && rc != -EALREADY)
_stp_error("utrace_barrier returned error %d on pid %d",
rc, (int)tsk->pid);
}
@@ -478,7 +499,7 @@ __stp_utrace_attach(struct task_struct *tsk,
}
}
- else
+ else if (rc != -ESRCH && rc != -EALREADY)
_stp_error("utrace_set_events2 returned error %d on pid %d",
rc, (int)tsk->pid);
utrace_engine_put(engine);
@@ -520,11 +541,113 @@ __stp_call_callbacks(struct stap_task_finder_target *tgt,
}
}
+static void
+__stp_call_mmap_callbacks(struct stap_task_finder_target *tgt,
+ struct task_struct *tsk, char *path,
+ unsigned long addr, unsigned long length,
+ unsigned long offset, unsigned long vm_flags)
+{
+ struct list_head *cb_node;
+ int rc;
+
+ if (tgt == NULL || tsk == NULL)
+ return;
+
+#ifdef DEBUG_TASK_FINDER_VMA
+ _stp_dbug(__FUNCTION__, __LINE__,
+ "pid %d, a/l/o/p/path 0x%lx 0x%lx 0x%lx %c%c%c%c %s\n",
+ tsk->pid, addr, length, offset,
+ vm_flags & VM_READ ? 'r' : '-',
+ vm_flags & VM_WRITE ? 'w' : '-',
+ vm_flags & VM_EXEC ? 'x' : '-',
+ vm_flags & VM_MAYSHARE ? 's' : 'p',
+ path);
+#endif
+ list_for_each(cb_node, &tgt->callback_list_head) {
+ struct stap_task_finder_target *cb_tgt;
+
+ cb_tgt = list_entry(cb_node, struct stap_task_finder_target,
+ callback_list);
+ if (cb_tgt == NULL || cb_tgt->mmap_callback == NULL)
+ continue;
+
+ rc = cb_tgt->mmap_callback(cb_tgt, tsk, path, addr, length,
+ offset, vm_flags);
+ if (rc != 0) {
+ _stp_error("mmap callback for %d failed: %d",
+ (int)tsk->pid, rc);
+ }
+ }
+}
+
+static void
+__stp_call_mmap_callbacks_with_vma(struct stap_task_finder_target *tgt,
+ struct task_struct *tsk,
+ struct vm_area_struct *vma)
+{
+ char *mmpath_buf;
+ char *mmpath;
+ int rc;
+
+ // Allocate space for a path
+ mmpath_buf = _stp_kmalloc(PATH_MAX);
+ if (mmpath_buf == NULL) {
+ _stp_error("Unable to allocate space for path");
+ return;
+ }
+
+ // Grab the path associated with this vma.
+#ifdef STAPCONF_DPATH_PATH
+ mmpath = d_path(&(vma->vm_file->f_path), mmpath_buf, PATH_MAX);
+#else
+ mmpath = d_path(vma->vm_file->f_dentry, vma->vm_file->f_vfsmnt,
+ mmpath_buf, PATH_MAX);
+#endif
+ if (mmpath == NULL || IS_ERR(mmpath)) {
+ rc = -PTR_ERR(mmpath);
+ _stp_error("Unable to get path (error %d) for pid %d",
+ rc, (int)tsk->pid);
+ }
+ else {
+ __stp_call_mmap_callbacks(tgt, tsk, mmpath, vma->vm_start,
+ vma->vm_end - vma->vm_start,
+ (vma->vm_pgoff << PAGE_SHIFT),
+ vma->vm_flags);
+ }
+ _stp_kfree(mmpath_buf);
+}
+
+static inline void
+__stp_call_munmap_callbacks(struct stap_task_finder_target *tgt,
+ struct task_struct *tsk, unsigned long addr,
+ unsigned long length)
+{
+ struct list_head *cb_node;
+ int rc;
+
+ if (tgt == NULL || tsk == NULL)
+ return;
+
+ list_for_each(cb_node, &tgt->callback_list_head) {
+ struct stap_task_finder_target *cb_tgt;
+
+ cb_tgt = list_entry(cb_node, struct stap_task_finder_target,
+ callback_list);
+ if (cb_tgt == NULL || cb_tgt->munmap_callback == NULL)
+ continue;
+
+ rc = cb_tgt->munmap_callback(cb_tgt, tsk, addr, length);
+ if (rc != 0) {
+ _stp_error("munmap callback for %d failed: %d",
+ (int)tsk->pid, rc);
+ }
+ }
+}
+
static inline void
-__stp_call_vm_callbacks(struct stap_task_finder_target *tgt,
- struct task_struct *tsk, int map_p, char *vm_path,
- unsigned long vm_start, unsigned long vm_end,
- unsigned long vm_pgoff)
+__stp_call_mprotect_callbacks(struct stap_task_finder_target *tgt,
+ struct task_struct *tsk, unsigned long addr,
+ unsigned long length, int prot)
{
struct list_head *cb_node;
int rc;
@@ -537,13 +660,13 @@ __stp_call_vm_callbacks(struct stap_task_finder_target *tgt,
cb_tgt = list_entry(cb_node, struct stap_task_finder_target,
callback_list);
- if (cb_tgt == NULL || cb_tgt->vm_callback == NULL)
+ if (cb_tgt == NULL || cb_tgt->mprotect_callback == NULL)
continue;
- rc = cb_tgt->vm_callback(cb_tgt, tsk, map_p, vm_path,
- vm_start, vm_end, vm_pgoff);
+ rc = cb_tgt->mprotect_callback(cb_tgt, tsk, addr, length,
+ prot);
if (rc != 0) {
- _stp_error("vm callback for %d failed: %d",
+ _stp_error("mprotect callback for %d failed: %d",
(int)tsk->pid, rc);
}
}
@@ -853,11 +976,12 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action,
* a stale task pointer, if we have an engine ref.
*/
rc = utrace_barrier(tsk, engine);
- if (rc != 0)
+ if (rc == 0)
+ rc = utrace_set_events(tsk, engine,
+ __STP_ATTACHED_TASK_BASE_EVENTS(tgt));
+ else if (rc != -ESRCH && rc != -EALREADY)
_stp_error("utrace_barrier returned error %d on pid %d",
rc, (int)tsk->pid);
- rc = utrace_set_events(tsk, engine,
- __STP_ATTACHED_TASK_BASE_EVENTS(tgt));
}
if (rc != 0)
_stp_error("utrace_set_events returned error %d on pid %d",
@@ -869,16 +993,16 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action,
__stp_call_callbacks(tgt, tsk, 1, (tsk->pid == tsk->tgid));
/* If this is just a thread other than the thread group leader,
- don't bother inform vm_callback clients about its memory map,
+ don't bother inform map callback clients about its memory map,
since they will simply duplicate each other. */
- if (tgt->vm_events == 1 && tsk->tgid == tsk->pid) {
+ if (tgt->mmap_events == 1 && tsk->tgid == tsk->pid) {
struct mm_struct *mm;
char *mmpath_buf;
char *mmpath;
struct vm_area_struct *vma;
int rc;
- /* Call the vm_callback for every vma associated with
+ /* Call the mmap_callback for every vma associated with
* a file. */
mm = get_task_mm(tsk);
if (! mm)
@@ -905,12 +1029,13 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action,
mmpath_buf, PATH_MAX);
#endif
if (mmpath) {
- __stp_call_vm_callbacks(tgt, tsk, 1,
- mmpath,
- vma->vm_start,
- vma->vm_end,
- (vma->vm_pgoff
- << PAGE_SHIFT));
+ __stp_call_mmap_callbacks(tgt, tsk,
+ mmpath,
+ vma->vm_start,
+ vma->vm_end - vma->vm_start,
+ (vma->vm_pgoff
+ << PAGE_SHIFT),
+ vma->vm_flags);
}
else {
_stp_dbug(__FUNCTION__, __LINE__,
@@ -957,96 +1082,60 @@ __stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action,
#endif
{
struct stap_task_finder_target *tgt = engine->data;
- unsigned long syscall_no;
- struct mm_struct *mm;
- struct vm_area_struct *vma;
- unsigned long *arg0_addr, arg0;
+ long syscall_no;
+ unsigned long args[3] = { 0L };
int rc;
-#if defined(__ia64__)
- struct { unsigned long *unwaddr; } _c = {.unwaddr = NULL}, *c = &_c;
-#endif
if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
debug_task_finder_detach();
return UTRACE_DETACH;
}
- if (tgt == NULL || tgt->vm_events == 0)
+ if (tgt == NULL)
return UTRACE_RESUME;
// See if syscall is one we're interested in.
//
// FIXME: do we need to handle mremap()?
- syscall_no = __stp_user_syscall_nr(regs);
+ syscall_no = syscall_get_nr(tsk, regs);
if (syscall_no != MMAP_SYSCALL_NO(tsk)
&& syscall_no != MMAP2_SYSCALL_NO(tsk)
&& syscall_no != MPROTECT_SYSCALL_NO(tsk)
&& syscall_no != MUNMAP_SYSCALL_NO(tsk))
return UTRACE_RESUME;
- __stp_tf_handler_start();
+ // The syscall is one we're interested in, but do we have a
+ // handler for it?
+ if (((syscall_no == MMAP_SYSCALL_NO(tsk)
+ || syscall_no == MMAP2_SYSCALL_NO(tsk)) && tgt->mmap_events == 0)
+ || (syscall_no == MPROTECT_SYSCALL_NO(tsk)
+ && tgt->mprotect_events == 0)
+ || (syscall_no == MUNMAP_SYSCALL_NO(tsk)
+ && tgt->munmap_events == 0))
+ return UTRACE_RESUME;
- // We need the first syscall argument to see what address
- // we're operating on.
- arg0_addr = __stp_user_syscall_arg(tsk, regs, 0);
- if ((rc = __stp_get_user(arg0, arg0_addr)) != 0) {
- _stp_error("couldn't read syscall arg 0 for pid %d: %d",
- tsk->pid, rc);
+ __stp_tf_handler_start();
+ if (syscall_no == MUNMAP_SYSCALL_NO(tsk)) {
+ // We need 2 arguments
+ syscall_get_arguments(tsk, regs, 0, 2, args);
}
- else if (arg0 != (unsigned long)NULL) {
- mm = get_task_mm(tsk);
- if (mm) {
- down_read(&mm->mmap_sem);
-
- // If we can find a matching vma associated
- // with a file, save off its details.
- vma = __stp_find_file_based_vma(mm, arg0);
- if (vma != NULL) {
- __stp_tf_add_vma(tsk, arg0, vma);
- }
-
- up_read(&mm->mmap_sem);
- mmput(mm);
- }
+ else if (syscall_no == MMAP_SYSCALL_NO(tsk)
+ || syscall_no == MMAP2_SYSCALL_NO(tsk)) {
+ // For mmap, we really just need the return value, so
+ // there is no need to save arguments
}
- __stp_tf_handler_end();
- return UTRACE_RESUME;
-}
-
-static void
-__stp_call_vm_callbacks_with_vma(struct stap_task_finder_target *tgt,
- struct task_struct *tsk,
- struct vm_area_struct *vma)
-{
- char *mmpath_buf;
- char *mmpath;
- int rc;
-
- // Allocate space for a path
- mmpath_buf = _stp_kmalloc(PATH_MAX);
- if (mmpath_buf == NULL) {
- _stp_error("Unable to allocate space for path");
- return;
+ else { // mprotect()
+ // We need 3 arguments
+ syscall_get_arguments(tsk, regs, 0, 3, args);
}
- // Grab the path associated with this vma.
-#ifdef STAPCONF_DPATH_PATH
- mmpath = d_path(&(vma->vm_file->f_path), mmpath_buf, PATH_MAX);
-#else
- mmpath = d_path(vma->vm_file->f_dentry, vma->vm_file->f_vfsmnt,
- mmpath_buf, PATH_MAX);
-#endif
- if (mmpath == NULL || IS_ERR(mmpath)) {
- rc = -PTR_ERR(mmpath);
- _stp_error("Unable to get path (error %d) for pid %d",
- rc, (int)tsk->pid);
- }
- else {
- __stp_call_vm_callbacks(tgt, tsk, 1, mmpath,
- vma->vm_start, vma->vm_end,
- (vma->vm_pgoff << PAGE_SHIFT));
- }
- _stp_kfree(mmpath_buf);
+ // Remember the syscall information
+ rc = __stp_tf_add_map(tsk, syscall_no, args[0], args[1], args[2]);
+ if (rc != 0)
+ _stp_error("__stp_tf_add_map returned error %d on pid %d",
+ rc, tsk->pid);
+ __stp_tf_handler_end();
+ return UTRACE_RESUME;
}
#ifdef UTRACE_ORIG_VERSION
@@ -1063,165 +1152,75 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
#endif
{
struct stap_task_finder_target *tgt = engine->data;
- unsigned long syscall_no;
- unsigned long *rv_addr, rv;
- unsigned long *arg0_addr, arg0;
- int rc;
- struct mm_struct *mm;
- struct vm_area_struct *vma;
- struct __stp_tf_vma_entry *entry = NULL;
-#if defined(__ia64__)
- struct { unsigned long *unwaddr; } _c = {.unwaddr = NULL}, *c = &_c;
-#endif
+ unsigned long rv;
+ struct __stp_tf_map_entry *entry;
if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
debug_task_finder_detach();
return UTRACE_DETACH;
}
- if (tgt == NULL || tgt->vm_events == 0)
+ if (tgt == NULL)
return UTRACE_RESUME;
- // See if syscall is one we're interested in.
- //
- // FIXME: do we need to handle mremap()?
- syscall_no = __stp_user_syscall_nr(regs);
- if (syscall_no != MMAP_SYSCALL_NO(tsk)
- && syscall_no != MMAP2_SYSCALL_NO(tsk)
- && syscall_no != MPROTECT_SYSCALL_NO(tsk)
- && syscall_no != MUNMAP_SYSCALL_NO(tsk))
+ // See if we can find saved syscall info. If we can, it must
+ // be one of the syscalls we are interested in (and we must
+ // have callbacks to call for it).
+ entry = __stp_tf_get_map_entry(tsk);
+ if (entry == NULL)
return UTRACE_RESUME;
// Get return value
- rv_addr = __stp_user_syscall_return_value(tsk, regs);
- if ((rc = __stp_get_user(rv, rv_addr)) != 0) {
- _stp_error("couldn't read syscall return value for pid %d: %d",
- tsk->pid, rc);
- return UTRACE_RESUME;
- }
-
- // We need the first syscall argument to see what address we
- // were operating on.
- arg0_addr = __stp_user_syscall_arg(tsk, regs, 0);
- if ((rc = __stp_get_user(arg0, arg0_addr)) != 0) {
- _stp_error("couldn't read syscall arg 0 for pid %d: %d",
- tsk->pid, rc);
- return UTRACE_RESUME;
- }
+ __stp_tf_handler_start();
+ rv = syscall_get_return_value(tsk, regs);
#ifdef DEBUG_TASK_FINDER_VMA
_stp_dbug(__FUNCTION__, __LINE__,
"tsk %d found %s(0x%lx), returned 0x%lx\n",
tsk->pid,
- ((syscall_no == MMAP_SYSCALL_NO(tsk)) ? "mmap"
- : ((syscall_no == MMAP2_SYSCALL_NO(tsk)) ? "mmap2"
- : ((syscall_no == MPROTECT_SYSCALL_NO(tsk)) ? "mprotect"
- : ((syscall_no == MUNMAP_SYSCALL_NO(tsk)) ? "munmap"
+ ((entry->syscall_no == MMAP_SYSCALL_NO(tsk)) ? "mmap"
+ : ((entry->syscall_no == MMAP2_SYSCALL_NO(tsk)) ? "mmap2"
+ : ((entry->syscall_no == MPROTECT_SYSCALL_NO(tsk))
+ ? "mprotect"
+ : ((entry->syscall_no == MUNMAP_SYSCALL_NO(tsk))
+ ? "munmap"
: "UNKNOWN")))),
- arg0, rv);
+ entry->arg0, rv);
#endif
- __stp_tf_handler_start();
- // Try to find the vma info we might have saved.
- if (arg0 != (unsigned long)NULL)
- entry = __stp_tf_get_vma_entry(tsk, arg0);
+ if (entry->syscall_no == MUNMAP_SYSCALL_NO(tsk)) {
+ // Call the callbacks
+ __stp_call_munmap_callbacks(tgt, tsk, entry->arg0, entry->arg1);
+ }
+ else if (entry->syscall_no == MMAP_SYSCALL_NO(tsk)
+ || entry->syscall_no == MMAP2_SYSCALL_NO(tsk)) {
+ struct mm_struct *mm;
- // If entry is NULL, this means we didn't find a file based
- // vma to store in the syscall_entry routine. This could mean
- // we just created a new vma.
- if (entry == NULL) {
mm = get_task_mm(tsk);
if (mm) {
+ struct vm_area_struct *vma;
+
down_read(&mm->mmap_sem);
vma = __stp_find_file_based_vma(mm, rv);
- if (vma != NULL) {
- __stp_call_vm_callbacks_with_vma(tgt, tsk, vma);
- }
- up_read(&mm->mmap_sem);
- mmput(mm);
- }
- }
- // If we found saved vma information, try to match it up with
- // what currently exists.
- else {
-#ifdef DEBUG_TASK_FINDER_VMA
- _stp_dbug(__FUNCTION__, __LINE__,
- "** found stored vma 0x%lx/0x%lx/0x%lx!\n",
- entry->vm_start, entry->vm_end, entry->vm_pgoff);
-#endif
- mm = get_task_mm(tsk);
- if (mm) {
- down_read(&mm->mmap_sem);
- vma = __stp_find_file_based_vma(mm, entry->vm_start);
-
- // We couldn't find the vma at all. The
- // original vma was deleted.
- if (vma == NULL) {
- // FIXME: We'll need to figure out to
- // retrieve the path of a deleted
- // vma.
-
- __stp_call_vm_callbacks(tgt, tsk, 0, NULL,
- entry->vm_start,
- entry->vm_end,
- (entry->vm_pgoff
- << PAGE_SHIFT));
- }
- // If nothing has changed, there is no
- // need to call the callback.
- else if (vma->vm_start == entry->vm_start
- && vma->vm_end == entry->vm_end
- && vma->vm_pgoff == entry->vm_pgoff) {
- // do nothing
+ // Call the callbacks
+ if (vma) {
+ __stp_call_mmap_callbacks_with_vma(tgt, tsk,
+ vma);
}
- // The original vma has been changed. It is
- // possible that calling mprotect (e.g.) split
- // up an existing vma into 2 or 3 new vma's
- // (assuming it protected a portion of the
- // original vma at the beginning, middle, or
- // end). Try to determine what happened.
- else {
- unsigned long tmp;
-
- // First report that the original vma
- // is gone.
- //
- // FIXME: We'll need to figure out to
- // retrieve the path of a deleted
- // vma.
- __stp_call_vm_callbacks(tgt, tsk, 0, NULL,
- entry->vm_start,
- entry->vm_end,
- (entry->vm_pgoff
- << PAGE_SHIFT));
-
- // Now find all the new vma's that
- // made up the original vma's address
- // space and call the callback on each
- // new vma.
- tmp = entry->vm_start;
- while (((vma = __stp_find_file_based_vma(mm,
- tmp))
- != NULL)
- && vma->vm_end <= entry->vm_end) {
- __stp_call_vm_callbacks_with_vma(tgt,
- tsk,
- vma);
- if (vma->vm_end >= entry->vm_end)
- break;
- tmp = vma->vm_end;
- }
- }
up_read(&mm->mmap_sem);
mmput(mm);
}
-
- // Cleanup by deleting the saved vma info.
- __stp_tf_remove_vma_entry(entry);
}
+ else { // mprotect
+ // Call the callbacks
+ __stp_call_mprotect_callbacks(tgt, tsk, entry->arg0,
+ entry->arg1, entry->arg2);
+ }
+
__stp_tf_handler_end();
+ __stp_tf_remove_map_entry(entry);
return UTRACE_RESUME;
}
@@ -1245,7 +1244,7 @@ stap_start_task_finder(void)
return ENOMEM;
}
- __stp_tf_vma_initialize();
+ __stp_tf_map_initialize();
atomic_set(&__stp_task_finder_state, __STP_TF_RUNNING);
@@ -1362,5 +1361,5 @@ stap_stop_task_finder(void)
debug_task_finder_report();
}
-
+#endif /* defined(CONFIG_UTRACE) */
#endif /* TASK_FINDER_C */
diff --git a/runtime/task_finder_map.c b/runtime/task_finder_map.c
new file mode 100644
index 00000000..b770dd0e
--- /dev/null
+++ b/runtime/task_finder_map.c
@@ -0,0 +1,191 @@
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/spinlock.h>
+
+// When handling mmap()/munmap()/mprotect() syscall tracing to notice
+// memory map changes, we need to cache syscall entry parameter values
+// for processing at syscall exit.
+
+// __stp_tf_map_lock protects the hash table.
+// Documentation/spinlocks.txt suggest we can be a bit more clever
+// if we guarantee that in interrupt context we only read, not write
+// the datastructures. We should never change the hash table or the
+// contents in interrupt context (which should only ever call
+// stap_find_map_map_info for getting stored info). So we might
+// want to look into that if this seems a bottleneck.
+static DEFINE_RWLOCK(__stp_tf_map_lock);
+
+#define __STP_TF_HASH_BITS 4
+#define __STP_TF_TABLE_SIZE (1 << __STP_TF_HASH_BITS)
+
+#ifndef TASK_FINDER_MAP_ENTRY_ITEMS
+#define TASK_FINDER_MAP_ENTRY_ITEMS 100
+#endif
+
+struct __stp_tf_map_entry {
+/* private: */
+ struct hlist_node hlist;
+ int usage;
+
+/* public: */
+ pid_t pid;
+ long syscall_no;
+ unsigned long arg0;
+ unsigned long arg1;
+ unsigned long arg2;
+};
+
+static struct __stp_tf_map_entry
+__stp_tf_map_free_list_items[TASK_FINDER_MAP_ENTRY_ITEMS];
+
+static struct hlist_head __stp_tf_map_free_list[1];
+
+static struct hlist_head __stp_tf_map_table[__STP_TF_TABLE_SIZE];
+
+// __stp_tf_map_initialize(): Initialize the free list. Grabs the
+// lock.
+static void
+__stp_tf_map_initialize(void)
+{
+ int i;
+ struct hlist_head *head = &__stp_tf_map_free_list[0];
+
+ unsigned long flags;
+ write_lock_irqsave(&__stp_tf_map_lock, flags);
+ for (i = 0; i < TASK_FINDER_MAP_ENTRY_ITEMS; i++) {
+ hlist_add_head(&__stp_tf_map_free_list_items[i].hlist, head);
+ }
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+}
+
+
+// __stp_tf_map_get_free_entry(): Returns an entry from the free list
+// or NULL. The __stp_tf_map_lock must be write locked before calling this
+// function.
+static struct __stp_tf_map_entry *
+__stp_tf_map_get_free_entry(void)
+{
+ struct hlist_head *head = &__stp_tf_map_free_list[0];
+ struct hlist_node *node;
+ struct __stp_tf_map_entry *entry = NULL;
+
+ if (hlist_empty(head))
+ return NULL;
+ hlist_for_each_entry(entry, node, head, hlist) {
+ break;
+ }
+ if (entry != NULL)
+ hlist_del(&entry->hlist);
+ return entry;
+}
+
+
+// __stp_tf_map_put_free_entry(): Puts an entry back on the free
+// list. The __stp_tf_map_lock must be write locked before calling this
+// function.
+static void
+__stp_tf_map_put_free_entry(struct __stp_tf_map_entry *entry)
+{
+ struct hlist_head *head = &__stp_tf_map_free_list[0];
+ hlist_add_head(&entry->hlist, head);
+}
+
+
+// __stp_tf_map_hash(): Compute the map hash.
+static inline u32
+__stp_tf_map_hash(struct task_struct *tsk)
+{
+ return (jhash_1word(tsk->pid, 0) & (__STP_TF_TABLE_SIZE - 1));
+}
+
+
+// Get map_entry if the map is present in the map hash table.
+// Returns NULL if not present. Takes a read lock on __stp_tf_map_lock.
+static struct __stp_tf_map_entry *
+__stp_tf_get_map_entry(struct task_struct *tsk)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct __stp_tf_map_entry *entry;
+
+ unsigned long flags;
+ read_lock_irqsave(&__stp_tf_map_lock, flags);
+ head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)];
+ hlist_for_each_entry(entry, node, head, hlist) {
+ if (tsk->pid == entry->pid) {
+ read_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return entry;
+ }
+ }
+ read_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return NULL;
+}
+
+
+// Add the map info to the map hash table. Takes a write lock on
+// __stp_tf_map_lock.
+static int
+__stp_tf_add_map(struct task_struct *tsk, long syscall_no, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct __stp_tf_map_entry *entry;
+ unsigned long flags;
+
+ write_lock_irqsave(&__stp_tf_map_lock, flags);
+ head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)];
+ hlist_for_each_entry(entry, node, head, hlist) {
+ // If we find an existing entry, just increment the
+ // usage count.
+ if (tsk->pid == entry->pid) {
+ entry->usage++;
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return 0;
+ }
+ }
+
+ // Get an element from the free list.
+ entry = __stp_tf_map_get_free_entry();
+ if (!entry) {
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return -ENOMEM;
+ }
+ entry->usage = 1;
+ entry->pid = tsk->pid;
+ entry->syscall_no = syscall_no;
+ entry->arg0 = arg0;
+ entry->arg1 = arg1;
+ entry->arg2 = arg2;
+ hlist_add_head(&entry->hlist, head);
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ return 0;
+}
+
+
+// Remove the map entry from the map hash table. Takes a write lock on
+// __stp_tf_map_lock.
+static int
+__stp_tf_remove_map_entry(struct __stp_tf_map_entry *entry)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ int found = 0;
+
+ if (entry != NULL) {
+ unsigned long flags;
+ write_lock_irqsave(&__stp_tf_map_lock, flags);
+
+ // Decrement the usage count.
+ entry->usage--;
+
+ // If the entry is unused, put it back on the free
+ // list.
+ if (entry->usage == 0) {
+ hlist_del(&entry->hlist);
+ __stp_tf_map_put_free_entry(entry);
+ }
+ write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+ }
+ return 0;
+}
diff --git a/runtime/task_finder_vma.c b/runtime/task_finder_vma.c
index 4dce4be8..ed9c6f4f 100644
--- a/runtime/task_finder_vma.c
+++ b/runtime/task_finder_vma.c
@@ -1,13 +1,19 @@
#include <linux/list.h>
#include <linux/jhash.h>
-#include <linux/mutex.h>
+#include <linux/spinlock.h>
// When handling memcpy() syscall tracing to notice memory map
// changes, we need to cache memcpy() entry parameter values for
// processing at memcpy() exit.
-// __stp_tf_vma_mutex protects the hash table.
-static DEFINE_MUTEX(__stp_tf_vma_mutex);
+// __stp_tf_vma_lock protects the hash table.
+// Documentation/spinlocks.txt suggest we can be a bit more clever
+// if we guarantee that in interrupt context we only read, not write
+// the datastructures. We should never change the hash table or the
+// contents in interrupt context (which should only ever call
+// stap_find_vma_map_info for getting stored vma info). So we might
+// want to look into that if this seems a bottleneck.
+static DEFINE_RWLOCK(__stp_tf_vma_lock);
#define __STP_TF_HASH_BITS 4
#define __STP_TF_TABLE_SIZE (1 << __STP_TF_HASH_BITS)
@@ -25,6 +31,9 @@ struct __stp_tf_vma_entry {
unsigned long vm_end;
unsigned long vm_pgoff;
// Is that enough? Should we store a dcookie for vm_file?
+
+ // User data (possibly stp_module)
+ void *user;
};
static struct __stp_tf_vma_entry
@@ -37,23 +46,24 @@ static struct hlist_head __stp_tf_vma_table[__STP_TF_TABLE_SIZE];
static struct hlist_head __stp_tf_vma_map[__STP_TF_TABLE_SIZE];
// __stp_tf_vma_initialize(): Initialize the free list. Grabs the
-// mutex.
+// spinlock.
static void
__stp_tf_vma_initialize(void)
{
int i;
struct hlist_head *head = &__stp_tf_vma_free_list[0];
- mutex_lock(&__stp_tf_vma_mutex);
+ unsigned long flags;
+ write_lock_irqsave(&__stp_tf_vma_lock, flags);
for (i = 0; i < TASK_FINDER_VMA_ENTRY_ITEMS; i++) {
hlist_add_head(&__stp_tf_vma_free_list_items[i].hlist, head);
}
- mutex_unlock(&__stp_tf_vma_mutex);
+ write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
}
// __stp_tf_vma_get_free_entry(): Returns an entry from the free list
-// or NULL. The __stp_tf_vma_mutex must be locked before calling this
+// or NULL. The __stp_tf_vma_lock must be write locked before calling this
// function.
static struct __stp_tf_vma_entry *
__stp_tf_vma_get_free_entry(void)
@@ -74,7 +84,7 @@ __stp_tf_vma_get_free_entry(void)
// __stp_tf_vma_put_free_entry(): Puts an entry back on the free
-// list. The __stp_tf_vma_mutex must be locked before calling this
+// list. The __stp_tf_vma_lock must be write locked before calling this
// function.
static void
__stp_tf_vma_put_free_entry(struct __stp_tf_vma_entry *entry)
@@ -98,7 +108,7 @@ __stp_tf_vma_hash(struct task_struct *tsk, unsigned long addr)
// Get vma_entry if the vma is present in the vma hash table.
-// Returns NULL if not present.
+// Returns NULL if not present. Takes a read lock on __stp_tf_vma_lock.
static struct __stp_tf_vma_entry *
__stp_tf_get_vma_entry(struct task_struct *tsk, unsigned long addr)
{
@@ -106,20 +116,22 @@ __stp_tf_get_vma_entry(struct task_struct *tsk, unsigned long addr)
struct hlist_node *node;
struct __stp_tf_vma_entry *entry;
- mutex_lock(&__stp_tf_vma_mutex);
+ unsigned long flags;
+ read_lock_irqsave(&__stp_tf_vma_lock, flags);
head = &__stp_tf_vma_table[__stp_tf_vma_hash(tsk, addr)];
hlist_for_each_entry(entry, node, head, hlist) {
if (tsk->pid == entry->pid
&& addr == entry->addr) {
- mutex_unlock(&__stp_tf_vma_mutex);
+ read_unlock_irqrestore(&__stp_tf_vma_lock, flags);
return entry;
}
}
- mutex_unlock(&__stp_tf_vma_mutex);
+ read_unlock_irqrestore(&__stp_tf_vma_lock, flags);
return NULL;
}
// Add the vma info to the vma hash table.
+// Takes a write lock on __stp_tf_vma_lock.
static int
__stp_tf_add_vma(struct task_struct *tsk, unsigned long addr,
struct vm_area_struct *vma)
@@ -128,7 +140,8 @@ __stp_tf_add_vma(struct task_struct *tsk, unsigned long addr,
struct hlist_node *node;
struct __stp_tf_vma_entry *entry;
- mutex_lock(&__stp_tf_vma_mutex);
+ unsigned long flags;
+ write_lock_irqsave(&__stp_tf_vma_lock, flags);
head = &__stp_tf_vma_table[__stp_tf_vma_hash(tsk, addr)];
hlist_for_each_entry(entry, node, head, hlist) {
if (tsk->pid == entry->pid
@@ -138,7 +151,7 @@ __stp_tf_add_vma(struct task_struct *tsk, unsigned long addr,
"vma (pid: %d, vm_start: 0x%lx) present?\n",
tsk->pid, vma->vm_start);
#endif
- mutex_unlock(&__stp_tf_vma_mutex);
+ write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
return -EBUSY; /* Already there */
}
}
@@ -146,7 +159,7 @@ __stp_tf_add_vma(struct task_struct *tsk, unsigned long addr,
// Get an element from the free list.
entry = __stp_tf_vma_get_free_entry();
if (!entry) {
- mutex_unlock(&__stp_tf_vma_mutex);
+ write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
return -ENOMEM;
}
entry->pid = tsk->pid;
@@ -155,11 +168,12 @@ __stp_tf_add_vma(struct task_struct *tsk, unsigned long addr,
entry->vm_end = vma->vm_end;
entry->vm_pgoff = vma->vm_pgoff;
hlist_add_head(&entry->hlist, head);
- mutex_unlock(&__stp_tf_vma_mutex);
+ write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
return 0;
}
// Remove the vma entry from the vma hash table.
+// Takes a write lock on __stp_tf_vma_lock.
static int
__stp_tf_remove_vma_entry(struct __stp_tf_vma_entry *entry)
{
@@ -168,10 +182,11 @@ __stp_tf_remove_vma_entry(struct __stp_tf_vma_entry *entry)
int found = 0;
if (entry != NULL) {
- mutex_lock(&__stp_tf_vma_mutex);
+ unsigned long flags;
+ write_lock_irqsave(&__stp_tf_vma_lock, flags);
hlist_del(&entry->hlist);
__stp_tf_vma_put_free_entry(entry);
- mutex_unlock(&__stp_tf_vma_mutex);
+ write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
}
return 0;
}
@@ -186,7 +201,7 @@ __stp_tf_vma_map_hash(struct task_struct *tsk)
}
// Get vma_entry if the vma is present in the vma map hash table.
-// Returns NULL if not present. The __stp_tf_vma_mutex must be locked
+// Returns NULL if not present. The __stp_tf_vma_lock must be read locked
// before calling this function.
static struct __stp_tf_vma_entry *
__stp_tf_get_vma_map_entry_internal(struct task_struct *tsk,
@@ -200,7 +215,6 @@ __stp_tf_get_vma_map_entry_internal(struct task_struct *tsk,
hlist_for_each_entry(entry, node, head, hlist) {
if (tsk->pid == entry->pid
&& vm_start == entry->addr) {
- mutex_unlock(&__stp_tf_vma_mutex);
return entry;
}
}
@@ -211,13 +225,17 @@ __stp_tf_get_vma_map_entry_internal(struct task_struct *tsk,
// Add the vma info to the vma map hash table.
static int
stap_add_vma_map_info(struct task_struct *tsk, unsigned long vm_start,
- unsigned long vm_end, unsigned long vm_pgoff)
+ unsigned long vm_end, unsigned long vm_pgoff,
+ void *user)
{
struct hlist_head *head;
struct hlist_node *node;
struct __stp_tf_vma_entry *entry;
- mutex_lock(&__stp_tf_vma_mutex);
+ unsigned long flags;
+ // Take a write lock, since we are most likely going to write
+ // after reading.
+ write_lock_irqsave(&__stp_tf_vma_lock, flags);
entry = __stp_tf_get_vma_map_entry_internal(tsk, vm_start);
if (entry != NULL) {
#if 0
@@ -225,14 +243,14 @@ stap_add_vma_map_info(struct task_struct *tsk, unsigned long vm_start,
"vma (pid: %d, vm_start: 0x%lx) present?\n",
tsk->pid, entry->vm_start);
#endif
- mutex_unlock(&__stp_tf_vma_mutex);
+ write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
return -EBUSY; /* Already there */
}
// Get an element from the free list.
entry = __stp_tf_vma_get_free_entry();
if (!entry) {
- mutex_unlock(&__stp_tf_vma_mutex);
+ write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
return -ENOMEM;
}
@@ -242,10 +260,11 @@ stap_add_vma_map_info(struct task_struct *tsk, unsigned long vm_start,
entry->vm_start = vm_start;
entry->vm_end = vm_end;
entry->vm_pgoff = vm_pgoff;
+ entry->user = user;
head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
hlist_add_head(&entry->hlist, head);
- mutex_unlock(&__stp_tf_vma_mutex);
+ write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
return 0;
}
@@ -259,23 +278,26 @@ stap_remove_vma_map_info(struct task_struct *tsk, unsigned long vm_start,
struct hlist_node *node;
struct __stp_tf_vma_entry *entry;
- mutex_lock(&__stp_tf_vma_mutex);
+ // Take a write lock since we are most likely going to delete
+ // after reading.
+ unsigned long flags;
+ write_lock_irqsave(&__stp_tf_vma_lock, flags);
entry = __stp_tf_get_vma_map_entry_internal(tsk, vm_start);
if (entry != NULL) {
hlist_del(&entry->hlist);
__stp_tf_vma_put_free_entry(entry);
}
- mutex_unlock(&__stp_tf_vma_mutex);
+ write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
return 0;
}
// Finds vma info if the vma is present in the vma map hash table.
-// Returns ESRCH if not present. The __stp_tf_vma_mutex must *not* be
+// Returns ESRCH if not present. The __stp_tf_vma_lock must *not* be
// locked before calling this function.
static int
stap_find_vma_map_info(struct task_struct *tsk, unsigned long vm_addr,
unsigned long *vm_start, unsigned long *vm_end,
- unsigned long *vm_pgoff)
+ unsigned long *vm_pgoff, void **user)
{
struct hlist_head *head;
struct hlist_node *node;
@@ -283,7 +305,8 @@ stap_find_vma_map_info(struct task_struct *tsk, unsigned long vm_addr,
struct __stp_tf_vma_entry *found_entry = NULL;
int rc = ESRCH;
- mutex_lock(&__stp_tf_vma_mutex);
+ unsigned long flags;
+ read_lock_irqsave(&__stp_tf_vma_lock, flags);
head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
hlist_for_each_entry(entry, node, head, hlist) {
if (tsk->pid == entry->pid
@@ -300,8 +323,10 @@ stap_find_vma_map_info(struct task_struct *tsk, unsigned long vm_addr,
*vm_end = found_entry->vm_end;
if (vm_pgoff != NULL)
*vm_pgoff = found_entry->vm_pgoff;
+ if (user != NULL)
+ *user = found_entry->user;
rc = 0;
}
- mutex_unlock(&__stp_tf_vma_mutex);
+ read_unlock_irqrestore(&__stp_tf_vma_lock, flags);
return rc;
}
diff --git a/runtime/transport/control.c b/runtime/transport/control.c
index 4e07a0a7..a1624152 100644
--- a/runtime/transport/control.c
+++ b/runtime/transport/control.c
@@ -18,6 +18,8 @@ static _stp_mempool_t *_stp_pool_q;
static struct list_head _stp_ctl_ready_q;
static DEFINE_SPINLOCK(_stp_ctl_ready_lock);
+static void _stp_cleanup_and_exit(int send_exit);
+
static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
u32 type;
@@ -51,7 +53,7 @@ static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, siz
}
break;
case STP_EXIT:
- _stp_exit_flag = 1;
+ _stp_cleanup_and_exit(1);
break;
case STP_BULK:
#ifdef STP_BULKMODE
@@ -98,6 +100,9 @@ static void _stp_ctl_write_dbug(int type, void *data, int len)
case STP_TRANSPORT:
_dbug("sending STP_TRANSPORT\n");
break;
+ case STP_REQUEST_EXIT:
+ _dbug("sending STP_REQUEST_EXIT\n");
+ break;
default:
_dbug("ERROR: unknown message type: %d\n", type);
break;
diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c
index 3d0453bf..792ea815 100644
--- a/runtime/transport/transport.c
+++ b/runtime/transport/transport.c
@@ -148,6 +148,18 @@ static void _stp_cleanup_and_exit(int send_exit)
}
}
+static void _stp_request_exit(void)
+{
+ static int called = 0;
+ if (!called) {
+ /* we only want to do this once */
+ called = 1;
+ dbug_trans(1, "ctl_send STP_REQUEST_EXIT\n");
+ _stp_ctl_send(STP_REQUEST_EXIT, NULL, 0);
+ dbug_trans(1, "done with ctl_send STP_REQUEST_EXIT\n");
+ }
+}
+
/*
* Called when stapio closes the control channel.
*/
@@ -202,7 +214,7 @@ static void _stp_work_queue(void *data)
/* if exit flag is set AND we have finished with probe_start() */
if (unlikely(_stp_exit_flag && _stp_probes_started))
- _stp_cleanup_and_exit(1);
+ _stp_request_exit();
if (likely(_stp_ctl_attached))
queue_delayed_work(_stp_wq, &_stp_work, STP_WORK_TIMER);
}
diff --git a/runtime/transport/transport_msgs.h b/runtime/transport/transport_msgs.h
index 0d6853f7..aa50051c 100644
--- a/runtime/transport/transport_msgs.h
+++ b/runtime/transport/transport_msgs.h
@@ -21,12 +21,12 @@ struct _stp_trace {
enum
{
STP_START,
- STP_EXIT,
+ STP_EXIT,
STP_OOB_DATA,
STP_SYSTEM,
STP_TRANSPORT,
STP_CONNECT,
- STP_DISCONNECT,
+ STP_DISCONNECT,
STP_BULK,
STP_READY,
STP_RELOCATION,
@@ -34,6 +34,7 @@ enum
STP_BUF_INFO,
STP_SUBBUFS_CONSUMED,
STP_REALTIME_DATA,
+ STP_REQUEST_EXIT,
STP_MAX_CMD
};
@@ -52,6 +53,7 @@ static const char *_stp_command_name[] = {
"STP_BUF_INFO",
"STP_SUBBUFS_CONSUMED",
"STP_REALTIME_DATA",
+ "STP_REQUEST_EXIT",
};
#endif /* DEBUG_TRANS */
diff --git a/runtime/unwind.c b/runtime/unwind.c
index 9c704e28..aacd56f1 100644
--- a/runtime/unwind.c
+++ b/runtime/unwind.c
@@ -345,7 +345,7 @@ static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc, s
state->label = NULL;
return 1;
}
- if (state->stackDepth >= MAX_STACK_DEPTH)
+ if (state->stackDepth >= STP_MAX_STACK_DEPTH)
return 0;
state->stack[state->stackDepth++] = ptr.p8;
break;
@@ -435,12 +435,18 @@ adjustStartLoc (unsigned long startLoc,
struct _stp_module *m,
struct _stp_section *s)
{
- if (startLoc && (strcmp (m->name, "kernel") != 0))
- {
- startLoc = _stp_module_relocate (m->name, s->name,
- startLoc);
- startLoc -= m->dwarf_module_base;
- }
+ /* XXX - some, or all, of this should really be done by
+ _stp_module_relocate. */
+ if (startLoc == 0
+ || strcmp (m->name, "kernel") == 0
+ || strcmp (s->name, ".absolute") == 0)
+ return startLoc;
+
+ if (strcmp (s->name, ".dynamic") == 0)
+ return startLoc + s->addr;
+
+ startLoc = _stp_module_relocate (m->name, s->name, startLoc);
+ startLoc -= m->dwarf_module_base;
return startLoc;
}
@@ -562,7 +568,7 @@ static char *_stp_eh_enc_name(signed type)
/* Unwind to previous to frame. Returns 0 if successful, negative
* number in case of an error. A positive return means unwinding is finished;
* don't try to fallback to dumping addresses on the stack. */
-static int unwind(struct unwind_frame_info *frame)
+static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk)
{
#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
const u32 *fde, *cie = NULL;
@@ -581,7 +587,7 @@ static int unwind(struct unwind_frame_info *frame)
if (UNW_PC(frame) == 0)
return -EINVAL;
- m = _stp_mod_sec_lookup (pc, &s);
+ m = _stp_mod_sec_lookup (pc, tsk, &s);
if (unlikely(m == NULL)) {
dbug_unwind(1, "No module found for pc=%lx", pc);
return -EINVAL;
diff --git a/runtime/unwind/unwind.h b/runtime/unwind/unwind.h
index 78a4bfef..3b6d0de0 100644
--- a/runtime/unwind/unwind.h
+++ b/runtime/unwind/unwind.h
@@ -23,7 +23,7 @@
#error "Unsupported dwarf unwind architecture"
#endif
-#define MAX_STACK_DEPTH 8
+#define STP_MAX_STACK_DEPTH 8
#ifndef BUILD_BUG_ON_ZERO
#define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1)
@@ -135,7 +135,7 @@ struct unwind_state {
unsigned stackDepth:8;
unsigned version:8;
const u8 *label;
- const u8 *stack[MAX_STACK_DEPTH];
+ const u8 *stack[STP_MAX_STACK_DEPTH];
};
static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
diff --git a/runtime/uprobes/.gitignore b/runtime/uprobes/.gitignore
new file mode 100644
index 00000000..c8172c41
--- /dev/null
+++ b/runtime/uprobes/.gitignore
@@ -0,0 +1,7 @@
+/*.o
+/*.cmd
+/.tmp_versions
+/Module.*
+/modules.order
+/uprobes.ko
+/uprobes.mod.c
diff --git a/runtime/uprobes/uprobes.c b/runtime/uprobes/uprobes.c
index 9dfb82b9..27e923b8 100644
--- a/runtime/uprobes/uprobes.c
+++ b/runtime/uprobes/uprobes.c
@@ -1049,8 +1049,7 @@ fail_tsk:
}
EXPORT_SYMBOL_GPL(register_uprobe);
-/* See Documentation/uprobes.txt. */
-void unregister_uprobe(struct uprobe *u)
+void __unregister_uprobe(struct uprobe *u, bool remove_bkpt)
{
struct task_struct *p;
struct uprobe_process *uproc;
@@ -1104,10 +1103,13 @@ void unregister_uprobe(struct uprobe *u)
if (!list_empty(&ppt->uprobe_list))
goto done;
- /*
- * The last uprobe at ppt's probepoint is being unregistered.
- * Queue the breakpoint for removal.
- */
+ /* The last uprobe at ppt's probepoint is being unregistered. */
+ if (!remove_bkpt) {
+ uprobe_free_probept(ppt);
+ goto done;
+ }
+
+ /* Queue the breakpoint for removal. */
ppt->state = UPROBE_REMOVING;
list_add_tail(&ppt->pd_node, &uproc->pending_uprobes);
@@ -1132,8 +1134,20 @@ done:
up_write(&uproc->rwsem);
uprobe_put_process(uproc);
}
+
+/* See Documentation/uprobes.txt. */
+void unregister_uprobe(struct uprobe *u)
+{
+ __unregister_uprobe(u, true);
+}
EXPORT_SYMBOL_GPL(unregister_uprobe);
+void unmap_uprobe(struct uprobe *u)
+{
+ __unregister_uprobe(u, false);
+}
+EXPORT_SYMBOL_GPL(unmap_uprobe);
+
/* Find a surviving thread in uproc. Runs with uproc->rwsem locked. */
static struct task_struct *find_surviving_thread(struct uprobe_process *uproc)
{
@@ -2540,6 +2554,14 @@ void unregister_uretprobe(struct uretprobe *rp)
}
EXPORT_SYMBOL_GPL(unregister_uretprobe);
+void unmap_uretprobe(struct uretprobe *rp)
+{
+ if (!rp)
+ return;
+ unmap_uprobe(&rp->u);
+}
+EXPORT_SYMBOL_GPL(unmap_uretprobe);
+
/*
* uproc->ssol_area has been successfully set up. Establish the
* uretprobe trampoline in slot 0.
diff --git a/runtime/uprobes/uprobes.h b/runtime/uprobes/uprobes.h
index 0266cb7d..d542420d 100644
--- a/runtime/uprobes/uprobes.h
+++ b/runtime/uprobes/uprobes.h
@@ -35,6 +35,9 @@
#include <linux/types.h>
#include <linux/list.h>
+/* Version 2 includes unmap_u[ret]probe(). */
+#define UPROBES_API_VERSION 2
+
struct pt_regs;
enum uprobe_type {
@@ -89,6 +92,9 @@ extern void unregister_uprobe(struct uprobe *u);
/* For runtime, assume uprobes support includes uretprobes. */
extern int register_uretprobe(struct uretprobe *rp);
extern void unregister_uretprobe(struct uretprobe *rp);
+/* For PRs 9940, 6852... */
+extern void unmap_uprobe(struct uprobe *u);
+extern void unmap_uretprobe(struct uretprobe *rp);
#ifdef UPROBES_IMPLEMENTATION
diff --git a/runtime/uprobes/uprobes_i386.c b/runtime/uprobes/uprobes_i386.c
index ffa088ed..008f32de 100644
--- a/runtime/uprobes/uprobes_i386.c
+++ b/runtime/uprobes/uprobes_i386.c
@@ -44,7 +44,7 @@
W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1), /* 90 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -58,22 +58,22 @@
static const unsigned long good_2byte_insns[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ------------------------------- */
- W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 00 */
- W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
- W(0x20, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
- W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
- W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */
- W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
- W(0x60, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 60 */
- W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */
+ W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1)| /* 00 */
+ W(0x10, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* 10 */
+ W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* 20 */
+ W(0x30, 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
+ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
+ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */
+ W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 60 */
+ W(0x70, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
W(0xa0, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* b0 */
- W(0xc0, 1,1,0,0,0,0,0,1,1,1,1,1,1,1,1,1)| /* c0 */
- W(0xd0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* d0 */
- W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */
- W(0xf0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0) /* f0 */
+ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* c0 */
+ W(0xd0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* d0 */
+ W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* e0 */
+ W(0xf0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* f0 */
/* ------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
@@ -114,7 +114,6 @@
* 26, 2e, 36, 3e, - es:, cs:, ss:, ds: segment prefixes --
* but 64 and 65 (fs: and gs:) seems to be used, so we support them.
* 67 - addr16 prefix
- * 9b - wait/fwait
* ce - into
* f0 - lock prefix
* f2, f3 - repnz, repz prefixes
@@ -302,9 +301,20 @@ unsigned long arch_hijack_uret_addr(unsigned long trampoline_address,
return orig_ret_addr;
}
+/*
+ * On x86_32, if a function returns a struct or union, the return
+ * value is copied into an area created by the caller. The address
+ * of this area is passed on the stack as a "hidden" first argument.
+ * When such a function returns, it uses a "ret $4" instruction to pop
+ * not only the return address but also the hidden arg. To accommodate
+ * such functions, we add 4 bytes of slop when predicting the return
+ * address. See PR #10078.
+ */
+#define STRUCT_RETURN_SLOP 4
+
static
unsigned long arch_predict_sp_at_ret(struct pt_regs *regs,
struct task_struct *tsk)
{
- return (unsigned long) (regs->esp + 4);
+ return (unsigned long) (regs->esp + 4 + STRUCT_RETURN_SLOP);
}
diff --git a/runtime/uprobes/uprobes_x86.c b/runtime/uprobes/uprobes_x86.c
index e3bdf8ff..93331715 100644
--- a/runtime/uprobes/uprobes_x86.c
+++ b/runtime/uprobes/uprobes_x86.c
@@ -45,8 +45,8 @@ static const unsigned long long good_insns_64[256 / 64] = {
W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */
W(0x60, 0,0,0,1,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
- W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0x80, 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -71,7 +71,7 @@ static const unsigned long long good_insns_32[256 / 64] = {
W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -119,7 +119,7 @@ static const unsigned long long good_2byte_insns[256 / 64] = {
* 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
*
* invalid opcodes in 64-bit mode:
- * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, c4-c5, d4-d5
+ * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
*
* 63 - we support this opcode in x86_64 but not in i386.
* opcodes we may need to refine support for:
@@ -141,7 +141,6 @@ static const unsigned long long good_2byte_insns[256 / 64] = {
* 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
* but 64 and 65 (fs: and gs:) seems to be used, so we support them.
* 67 - addr16 prefix
- * 9b - wait/fwait
* ce - into
* f0 - lock prefix
*/
@@ -717,12 +716,23 @@ unsigned long arch_hijack_uret_addr(unsigned long trampoline_address,
return orig_ret_addr;
}
+/*
+ * On x86_32, if a function returns a struct or union, the return
+ * value is copied into an area created by the caller. The address
+ * of this area is passed on the stack as a "hidden" first argument.
+ * When such a function returns, it uses a "ret $4" instruction to pop
+ * not only the return address but also the hidden arg. To accommodate
+ * such functions, we add 4 bytes of slop when predicting the return
+ * address. See PR #10078.
+ */
+#define STRUCT_RETURN_SLOP 4
+
static
unsigned long arch_predict_sp_at_ret(struct pt_regs *regs,
struct task_struct *tsk)
{
if (test_tsk_thread_flag(tsk, TIF_IA32))
- return (unsigned long) (REGS_SP + 4);
+ return (unsigned long) (REGS_SP + 4 + STRUCT_RETURN_SLOP);
else
return (unsigned long) (REGS_SP + 8);
}
diff --git a/runtime/uprobes/uprobes_x86_64.c b/runtime/uprobes/uprobes_x86_64.c
index 8cf36623..56ebe2e1 100644
--- a/runtime/uprobes/uprobes_x86_64.c
+++ b/runtime/uprobes/uprobes_x86_64.c
@@ -45,8 +45,8 @@ static const unsigned long good_insns_64[256 / 64] = {
W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */
W(0x60, 0,0,0,1,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
- W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0x80, 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -71,7 +71,7 @@ static const unsigned long good_insns_32[256 / 64] = {
W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -141,7 +141,6 @@ static const unsigned long good_2byte_insns[256 / 64] = {
* 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
* but 64 and 65 (fs: and gs:) seems to be used, so we support them.
* 67 - addr16 prefix
- * 9b - wait/fwait
* ce - into
* f0 - lock prefix
*/
diff --git a/runtime/uprobes2/uprobes.c b/runtime/uprobes2/uprobes.c
index af187fc9..07ad3984 100644
--- a/runtime/uprobes2/uprobes.c
+++ b/runtime/uprobes2/uprobes.c
@@ -29,6 +29,12 @@
#include <linux/utrace.h>
#include <linux/regset.h>
#define UPROBES_IMPLEMENTATION 1
+
+/* PR9974: Adapt to struct renaming. */
+#ifdef UTRACE_API_VERSION
+#define utrace_attached_engine utrace_engine
+#endif
+
#include "uprobes.h"
#include <linux/tracehook.h>
#include <linux/mm.h>
@@ -949,10 +955,15 @@ static int defer_registration(struct uprobe *u, int regflag,
*/
static struct pid *uprobe_get_tg_leader(pid_t p)
{
- struct pid *pid;
+ struct pid *pid = NULL;
rcu_read_lock();
- pid = find_vpid(p);
+ /*
+ * We need this check because unmap_u[ret]probe() can be called
+ * from a report_death callback, where current->proxy is NULL.
+ */
+ if (current->nsproxy)
+ pid = find_vpid(p);
if (pid) {
struct task_struct *t = pid_task(pid, PIDTYPE_PID);
if (t)
@@ -1132,8 +1143,7 @@ fail_tsk:
}
EXPORT_SYMBOL_GPL(register_uprobe);
-/* See Documentation/uprobes.txt. */
-void unregister_uprobe(struct uprobe *u)
+void __unregister_uprobe(struct uprobe *u, bool remove_bkpt)
{
struct pid *p;
struct uprobe_process *uproc;
@@ -1187,10 +1197,13 @@ void unregister_uprobe(struct uprobe *u)
if (!list_empty(&ppt->uprobe_list))
goto done;
- /*
- * The last uprobe at ppt's probepoint is being unregistered.
- * Queue the breakpoint for removal.
- */
+ /* The last uprobe at ppt's probepoint is being unregistered. */
+ if (!remove_bkpt) {
+ uprobe_free_probept(ppt);
+ goto done;
+ }
+
+ /* Queue the breakpoint for removal. */
ppt->state = UPROBE_REMOVING;
list_add_tail(&ppt->pd_node, &uproc->pending_uprobes);
@@ -1215,8 +1228,20 @@ done:
up_write(&uproc->rwsem);
uprobe_put_process(uproc, false);
}
+
+/* See Documentation/uprobes.txt. */
+void unregister_uprobe(struct uprobe *u)
+{
+ __unregister_uprobe(u, true);
+}
EXPORT_SYMBOL_GPL(unregister_uprobe);
+void unmap_uprobe(struct uprobe *u)
+{
+ __unregister_uprobe(u, false);
+}
+EXPORT_SYMBOL_GPL(unmap_uprobe);
+
/* Find a surviving thread in uproc. Runs with uproc->rwsem locked. */
static struct task_struct *find_surviving_thread(struct uprobe_process *uproc)
{
@@ -2214,7 +2239,8 @@ static u32 uprobe_report_exit(enum utrace_resume_action action,
}
}
up_read(&uproc->rwsem);
- if (utask->state == UPTASK_TRAMPOLINE_HIT)
+ if (utask->state == UPTASK_TRAMPOLINE_HIT ||
+ utask->state == UPTASK_BP_HIT)
uprobe_decref_process(uproc);
}
@@ -2712,6 +2738,14 @@ void unregister_uretprobe(struct uretprobe *rp)
}
EXPORT_SYMBOL_GPL(unregister_uretprobe);
+void unmap_uretprobe(struct uretprobe *rp)
+{
+ if (!rp)
+ return;
+ unmap_uprobe(&rp->u);
+}
+EXPORT_SYMBOL_GPL(unmap_uretprobe);
+
/*
* uproc->ssol_area has been successfully set up. Establish the
* uretprobe trampoline in the next available slot following the
diff --git a/runtime/uprobes2/uprobes.h b/runtime/uprobes2/uprobes.h
index 11d01f5c..ae0692f0 100644
--- a/runtime/uprobes2/uprobes.h
+++ b/runtime/uprobes2/uprobes.h
@@ -23,6 +23,14 @@
#include <linux/types.h>
#include <linux/list.h>
+/* Adapt to struct renaming. */
+#ifdef UTRACE_API_VERSION
+#define utrace_attached_engine utrace_engine
+#endif
+
+/* Version 2 includes unmap_u[ret]probe(). */
+#define UPROBES_API_VERSION 2
+
struct pt_regs;
enum uprobe_type {
@@ -77,6 +85,9 @@ extern void unregister_uprobe(struct uprobe *u);
/* For runtime, assume uprobes support includes uretprobes. */
extern int register_uretprobe(struct uretprobe *rp);
extern void unregister_uretprobe(struct uretprobe *rp);
+/* For PRs 9940, 6852... */
+extern void unmap_uprobe(struct uprobe *u);
+extern void unmap_uretprobe(struct uretprobe *rp);
#ifdef UPROBES_IMPLEMENTATION
diff --git a/runtime/uprobes2/uprobes_x86.c b/runtime/uprobes2/uprobes_x86.c
index effb7444..8c80293d 100644
--- a/runtime/uprobes2/uprobes_x86.c
+++ b/runtime/uprobes2/uprobes_x86.c
@@ -50,8 +50,8 @@ static const u64 good_insns_64[256 / 64] = {
W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */
W(0x60, 0,0,0,1,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
- W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0x80, 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -76,7 +76,7 @@ static const u64 good_insns_32[256 / 64] = {
W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -124,7 +124,7 @@ static const u64 good_2byte_insns[256 / 64] = {
* 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
*
* invalid opcodes in 64-bit mode:
- * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, c4-c5, d4-d5
+ * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
*
* 63 - we support this opcode in x86_64 but not in i386.
* opcodes we may need to refine support for:
@@ -146,7 +146,6 @@ static const u64 good_2byte_insns[256 / 64] = {
* 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
* but 64 and 65 (fs: and gs:) seem to be used, so we support them
* 67 - addr16 prefix
- * 9b - wait/fwait
* ce - into
* f0 - lock prefix
*/
diff --git a/runtime/uprobes2/uprobes_x86.h b/runtime/uprobes2/uprobes_x86.h
index ca3f4873..a07fa0d3 100644
--- a/runtime/uprobes2/uprobes_x86.h
+++ b/runtime/uprobes2/uprobes_x86.h
@@ -93,11 +93,22 @@ static inline unsigned long arch_get_cur_sp(struct pt_regs *regs)
return (unsigned long) regs->sp;
}
+/*
+ * On x86_32, if a function returns a struct or union, the return
+ * value is copied into an area created by the caller. The address
+ * of this area is passed on the stack as a "hidden" first argument.
+ * When such a function returns, it uses a "ret $4" instruction to pop
+ * not only the return address but also the hidden arg. To accommodate
+ * such functions, we add 4 bytes of slop when predicting the return
+ * address. See PR #10078.
+ */
+#define STRUCT_RETURN_SLOP 4
+
static inline unsigned long arch_predict_sp_at_ret(struct pt_regs *regs,
struct task_struct *tsk)
{
if (test_tsk_thread_flag(tsk, TIF_IA32))
- return (unsigned long) (regs->sp + 4);
+ return (unsigned long) (regs->sp + 4 + STRUCT_RETURN_SLOP);
else
return (unsigned long) (regs->sp + 8);
}
diff --git a/runtime/utrace_compatibility.h b/runtime/utrace_compatibility.h
index 00b841d2..5521a5c2 100644
--- a/runtime/utrace_compatibility.h
+++ b/runtime/utrace_compatibility.h
@@ -1,6 +1,6 @@
/*
* utrace compatibility defines and inlines
- * Copyright (C) 2008 Red Hat Inc.
+ * Copyright (C) 2008-2009 Red Hat Inc.
*
* This file is part of systemtap, and is free software. You can
* redistribute it and/or modify it under the terms of the GNU General
@@ -13,6 +13,11 @@
#include <linux/utrace.h>
+/* PR9974: Adapt to struct renaming. */
+#ifdef UTRACE_API_VERSION
+#define utrace_attached_engine utrace_engine
+#endif
+
#ifdef UTRACE_ACTION_RESUME
/*
@@ -28,6 +33,8 @@ enum utrace_resume_action {
UTRACE_STOP = UTRACE_ACTION_QUIESCE,
UTRACE_RESUME = UTRACE_ACTION_RESUME,
UTRACE_DETACH = UTRACE_ACTION_DETACH,
+ UTRACE_SINGLESTEP = UTRACE_ACTION_SINGLESTEP,
+ UTRACE_BLOCKSTEP = UTRACE_ACTION_BLOCKSTEP,
};
static inline struct utrace_attached_engine *
@@ -48,6 +55,11 @@ utrace_control(struct task_struct *target,
case UTRACE_STOP:
return utrace_set_flags(target, engine,
(engine->flags | UTRACE_ACTION_QUIESCE));
+ case UTRACE_SINGLESTEP:
+ case UTRACE_BLOCKSTEP:
+ return utrace_set_flags(target, engine,
+ engine->flags | action);
+
default:
return -EINVAL;
}
diff --git a/runtime/vsprintf.c b/runtime/vsprintf.c
index bd58d760..23810e75 100644
--- a/runtime/vsprintf.c
+++ b/runtime/vsprintf.c
@@ -12,6 +12,9 @@
#ifndef _VSPRINTF_C_
#define _VSPRINTF_C_
+//forward declaration for _stp_vsnprintf
+static void * _stp_reserve_bytes (int);
+
static int skip_atoi(const char **s)
{
int i=0;
@@ -22,6 +25,10 @@ static int skip_atoi(const char **s)
enum print_flag {STP_ZEROPAD=1, STP_SIGN=2, STP_PLUS=4, STP_SPACE=8, STP_LEFT=16, STP_SPECIAL=32, STP_LARGE=64};
+/*
+ * Changes to number() will require a corresponding change to number_size below,
+ * to ensure proper buffer allocation for _stp_printf.
+ */
static char * number(char * buf, char * end, uint64_t num, int base, int size, int precision, enum print_flag type)
{
char c,sign,tmp[66];
@@ -115,6 +122,85 @@ static char * number(char * buf, char * end, uint64_t num, int base, int size, i
return buf;
}
+/*
+ * Calculates the number of bytes required to print the paramater num. A change to
+ * number() requires a corresponding change here, and vice versa, to ensure the
+ * calculated size and printed size match.
+ */
+static int number_size(uint64_t num, int base, int size, int precision, enum print_flag type) {
+ char c,sign,tmp[66];
+ const char *digits;
+ static const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+ static const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ int i, num_bytes = 0;
+
+ digits = (type & STP_LARGE) ? large_digits : small_digits;
+ if (type & STP_LEFT)
+ type &= ~STP_ZEROPAD;
+ if (base < 2 || base > 36)
+ return 0;
+ c = (type & STP_ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & STP_SIGN) {
+ if ((int64_t) num < 0) {
+ sign = '-';
+ num = - (int64_t) num;
+ size--;
+ } else if (type & STP_PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & STP_SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & STP_SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++]='0';
+ else while (num != 0)
+ tmp[i++] = digits[do_div(num,base)];
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type&(STP_ZEROPAD+STP_LEFT))) {
+ while(size-->0) {
+ num_bytes++;
+ }
+ }
+ if (sign) {
+ num_bytes++;
+ }
+ if (type & STP_SPECIAL) {
+ if (base==8) {
+ num_bytes++;
+ } else if (base==16) {
+ num_bytes+=2;
+ }
+ }
+ if (!(type & STP_LEFT)) {
+ while (size-- > 0) {
+ num_bytes++;
+ }
+ }
+ while (i < precision--) {
+ num_bytes++;
+ }
+ while (i-- > 0) {
+ num_bytes++;
+ }
+ while (size-- > 0) {
+ num_bytes++;
+ }
+ return num_bytes;
+
+}
+
static int check_binary_precision (int precision) {
/* precision can be unspecified (-1) or one of 1, 2, 4 or 8. */
switch (precision) {
@@ -148,9 +234,260 @@ static int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
if (unlikely((int) size < 0))
return 0;
- str = buf;
- end = buf + size - 1;
+ /*
+ * buf will be NULL when this function is called from _stp_printf.
+ * This branch calculates the exact size print buffer required for
+ * the string and allocates it with _stp_reserve_bytes. A change
+ * to this branch requires a corresponding change to the same
+ * section of code below.
+ */
+ if (buf == NULL) {
+ const char* fmt_copy = fmt;
+ int num_bytes = 0;
+ va_list args_copy;
+
+ va_copy(args_copy, args);
+
+ for (; *fmt_copy ; ++fmt_copy) {
+ if (*fmt_copy != '%') {
+ num_bytes++;
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+ repeat_copy:
+ ++fmt_copy; /* this also skips first '%' */
+ switch (*fmt_copy) {
+ case '-': flags |= STP_LEFT; goto repeat_copy;
+ case '+': flags |= STP_PLUS; goto repeat_copy;
+ case ' ': flags |= STP_SPACE; goto repeat_copy;
+ case '#': flags |= STP_SPECIAL; goto repeat_copy;
+ case '0': flags |= STP_ZEROPAD; goto repeat_copy;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt_copy))
+ field_width = skip_atoi(&fmt_copy);
+ else if (*fmt_copy == '*') {
+ ++fmt_copy;
+ /* it's the next argument */
+ field_width = va_arg(args_copy, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= STP_LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt_copy == '.') {
+ ++fmt_copy;
+ if (isdigit(*fmt_copy))
+ precision = skip_atoi(&fmt_copy);
+ else if (*fmt_copy == '*') {
+ ++fmt_copy;
+ /* it's the next argument */
+ precision = va_arg(args_copy, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt_copy == 'h' || *fmt_copy == 'l' || *fmt_copy == 'L') {
+ qualifier = *fmt_copy;
+ ++fmt_copy;
+ if (qualifier == 'l' && *fmt_copy == 'l') {
+ qualifier = 'L';
+ ++fmt_copy;
+ }
+ }
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt_copy) {
+ case 'b':
+ num = va_arg(args_copy, int64_t);
+
+ /* Only certain values are valid for the precision. */
+ precision = check_binary_precision (precision);
+
+ /* Unspecified field width defaults to the specified
+ precision and vice versa. If neither is specified,
+ then both default to 8. */
+ if (field_width == -1) {
+ if (precision == -1) {
+ field_width = 8;
+ precision = 8;
+ }
+ else
+ field_width = precision;
+ }
+ else if (precision == -1) {
+ precision = check_binary_precision (field_width);
+ if (precision == -1)
+ precision = 8;
+ }
+
+ len = precision;
+ if (!(flags & STP_LEFT)) {
+ while (len < field_width--) {
+ num_bytes++;
+ }
+ }
+
+ num_bytes += precision;
+
+ while (len < field_width--)
+ num_bytes++;
+
+ continue;
+
+ case 's':
+ case 'M':
+ case 'm':
+ s = va_arg(args_copy, char *);
+ if ((unsigned long)s < PAGE_SIZE)
+ s = "<NULL>";
+
+ if (*fmt_copy == 's')
+ len = strnlen(s, precision);
+ else if (precision > 0)
+ len = precision;
+ else
+ len = 1;
+
+ if (*fmt_copy == 'M')
+ len = len * 2; /* hex dump print size */
+
+ if (!(flags & STP_LEFT)) {
+ while (len < field_width--) {
+ num_bytes++;
+ }
+ }
+
+ num_bytes += len;
+
+ while (len < field_width--) {
+ num_bytes++;
+ }
+ if(flags & STP_ZEROPAD) {
+ num_bytes++;
+ }
+ continue;
+ case 'X':
+ flags |= STP_LARGE;
+ case 'x':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= STP_SIGN;
+ case 'u':
+ break;
+
+ case 'p':
+ /* Note that %p takes an int64_t argument. */
+ len = 2*sizeof(void *) + 2;
+ flags |= STP_ZEROPAD;
+
+ if (field_width == -1)
+ field_width = len;
+
+ if (!(flags & STP_LEFT)) {
+ while (len < field_width) {
+ field_width--;
+ num_bytes++;
+ }
+ }
+
+ //account for "0x"
+ num_bytes+=2;
+ field_width-=2;
+
+ num_bytes += number_size((unsigned long) va_arg(args_copy, int64_t),
+ 16, field_width, field_width, flags);
+ continue;
+
+ case '%':
+ num_bytes++;
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'c':
+ if (!(flags & STP_LEFT)) {
+ while (--field_width > 0) {
+ num_bytes++;
+ }
+ }
+ c = (unsigned char) va_arg(args_copy, int);
+ num_bytes++;
+ while (--field_width > 0) {
+ num_bytes++;
+ }
+ continue;
+
+ default:
+ num_bytes++;
+ if (*fmt_copy) {
+ num_bytes++;
+ } else {
+ --fmt_copy;
+ }
+ continue;
+ }
+
+ if (qualifier == 'L')
+ num = va_arg(args_copy, int64_t);
+ else if (qualifier == 'l') {
+ num = va_arg(args_copy, unsigned long);
+ if (flags & STP_SIGN)
+ num = (signed long) num;
+ } else if (qualifier == 'h') {
+ num = (unsigned short) va_arg(args_copy, int);
+ if (flags & STP_SIGN)
+ num = (signed short) num;
+ } else {
+ num = va_arg(args_copy, unsigned int);
+ if (flags & STP_SIGN)
+ num = (signed int) num;
+ }
+ num_bytes += number_size(num, base, field_width, precision, flags);
+ }
+
+ va_end(args_copy);
+
+ if (num_bytes == 0)
+ return 0;
+
+ //max print buffer size
+ if (num_bytes > STP_BUFFER_SIZE) {
+ num_bytes = STP_BUFFER_SIZE;
+ }
+
+ str = (char*)_stp_reserve_bytes(num_bytes);
+ size = num_bytes;
+ end = str + size - 1;
+
+ } else {
+ str = buf;
+ end = buf + size - 1;
+ }
+ /*
+ * Note that a change to code below requires a corresponding
+ * change in the code above to properly calculate the bytes
+ * required in the output buffer.
+ */
for (; *fmt ; ++fmt) {
if (*fmt != '%') {
if (str <= end)
@@ -297,16 +634,25 @@ static int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
len = 1;
if (!(flags & STP_LEFT)) {
- while (len < field_width--) {
+ int actlen = len;
+ if (*fmt == 'M')
+ actlen = len * 2;
+ while (actlen < field_width--) {
if (str <= end)
*str = ' ';
++str;
}
}
- if (*fmt == 'M') {
- str = number(str, str + len - 1 < end ? str + len - 1 : end,
- (unsigned long) *(uint64_t *) s,
- 16, field_width, len, flags);
+ if (*fmt == 'M') { /* stolen from kernel: trace_seq_putmem_hex() */
+ const char _stp_hex_asc[] = "0123456789abcdef";
+ int j;
+ for (i = 0, j = 0; i < len; i++) {
+ *str = _stp_hex_asc[((*s) & 0xf0) >> 4];
+ str++;
+ *str = _stp_hex_asc[((*s) & 0x0f)];
+ str++; s++;
+ }
+ len = len * 2; /* the actual length */
}
else {
for (i = 0; i < len; ++i) {
@@ -433,11 +779,13 @@ static int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
field_width, precision, flags);
}
- if (likely(str <= end))
- *str = '\0';
- else if (size > 0)
- /* don't write out a null byte if the buf size is zero */
- *end = '\0';
+ if (buf != NULL) {
+ if (likely(str <= end))
+ *str = '\0';
+ else if (size > 0)
+ /* don't write out a null byte if the buf size is zero */
+ *end = '\0';
+ }
return str-buf;
}