diff options
Diffstat (limited to 'runtime')
49 files changed, 3395 insertions, 760 deletions
diff --git a/runtime/autoconf-asm-syscall.c b/runtime/autoconf-asm-syscall.c new file mode 100644 index 00000000..bf7a273f --- /dev/null +++ b/runtime/autoconf-asm-syscall.c @@ -0,0 +1,2 @@ +#include <asm/syscall.h> + diff --git a/runtime/autoconf-find-task-pid.c b/runtime/autoconf-find-task-pid.c new file mode 100644 index 00000000..549d5ac3 --- /dev/null +++ b/runtime/autoconf-find-task-pid.c @@ -0,0 +1,6 @@ +#include <linux/sched.h> + +void foo (pid_t k) { + struct task_struct *tsk = find_task_by_pid (k); + (void) tsk; +} diff --git a/runtime/autoconf-x86-gs.c b/runtime/autoconf-x86-gs.c new file mode 100644 index 00000000..f4dda795 --- /dev/null +++ b/runtime/autoconf-x86-gs.c @@ -0,0 +1,5 @@ +#include <asm/ptrace.h> + +#if defined (__i386__) +struct pt_regs regs = {.gs = 0x0}; +#endif diff --git a/runtime/itrace.c b/runtime/itrace.c index ed32b0bc..68f85301 100644 --- a/runtime/itrace.c +++ b/runtime/itrace.c @@ -1,6 +1,7 @@ /* * user space instruction tracing * Copyright (C) 2005, 2006, 2007, 2008, 2009 IBM Corp. + * Copyright (C) 2009 Red Hat Inc. * * This file is part of systemtap, and is free software. You can * redistribute it and/or modify it under the terms of the GNU General @@ -17,8 +18,16 @@ #include <linux/sched.h> #include <linux/rcupdate.h> #include <linux/utrace.h> +#include "ptrace_compatibility.h" + +/* PR9974: Adapt to struct renaming. */ +#ifdef UTRACE_API_VERSION +#define utrace_attached_engine utrace_engine +#endif + #include <asm/string.h> #include "uprobes/uprobes.h" +#include "utrace_compatibility.h" #ifndef put_task_struct #define put_task_struct(t) \ @@ -55,7 +64,7 @@ struct itrace_info { struct list_head link; }; -static u32 debug = 1; +static u32 debug = 0 /* 1 */; static LIST_HEAD(usr_itrace_info); static spinlock_t itrace_lock; @@ -118,10 +127,15 @@ static int __access_process_vm(struct task_struct *tsk, unsigned long addr, void return buf - old_buf; } +#ifdef UTRACE_ORIG_VERSION +static u32 usr_itrace_report_quiesce(struct utrace_attached_engine *engine, + struct task_struct *tsk) +#else static u32 usr_itrace_report_quiesce(enum utrace_resume_action action, struct utrace_attached_engine *engine, struct task_struct *tsk, unsigned long event) +#endif { int status; struct itrace_info *ui; @@ -129,10 +143,23 @@ static u32 usr_itrace_report_quiesce(enum utrace_resume_action action, ui = rcu_dereference(engine->data); WARN_ON(!ui); +#ifdef UTRACE_ORIG_VERSION + return (ui->step_flag | UTRACE_ACTION_NEWSTATE); +#else return (event == 0 ? ui->step_flag : UTRACE_RESUME); +#endif } +#ifdef UTRACE_ORIG_VERSION +static u32 usr_itrace_report_signal( + struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct pt_regs *regs, + u32 action, siginfo_t *info, + const struct k_sigaction *orig_ka, + struct k_sigaction *return_ka) +#else static u32 usr_itrace_report_signal(u32 action, struct utrace_attached_engine *engine, struct task_struct *tsk, @@ -140,6 +167,7 @@ static u32 usr_itrace_report_signal(u32 action, siginfo_t *info, const struct k_sigaction *orig_ka, struct k_sigaction *return_ka) +#endif { struct itrace_info *ui; u32 return_flags; @@ -174,16 +202,31 @@ static u32 usr_itrace_report_signal(u32 action, return return_flags; } + + +#ifdef UTRACE_ORIG_VERSION +static u32 usr_itrace_report_clone( + struct utrace_attached_engine *engine, + struct task_struct *parent, + unsigned long clone_flags, + struct task_struct *child) +#else static u32 usr_itrace_report_clone(enum utrace_resume_action action, struct utrace_attached_engine *engine, struct task_struct *parent, unsigned long clone_flags, struct task_struct *child) +#endif { return UTRACE_RESUME; } +#ifdef UTRACE_ORIG_VERSION +static u32 usr_itrace_report_death(struct utrace_attached_engine *e, + struct task_struct *tsk) +#else static u32 usr_itrace_report_death(struct utrace_attached_engine *e, struct task_struct *tsk, bool group_dead, int signal) +#endif { struct itrace_info *ui = rcu_dereference(e->data); WARN_ON(!ui); @@ -275,8 +318,13 @@ static int usr_itrace_init(int single_step, pid_t tid, struct stap_itrace_probe struct itrace_info *ui; struct task_struct *tsk; + spin_lock_init(&itrace_lock); rcu_read_lock(); +#ifdef STAPCONF_FIND_TASK_PID + tsk = find_task_by_pid(tid); +#else tsk = find_task_by_vpid(tid); +#endif if (!tsk) { printk(KERN_ERR "usr_itrace_init: Cannot find process %d\n", tid); rcu_read_unlock(); @@ -293,12 +341,8 @@ static int usr_itrace_init(int single_step, pid_t tid, struct stap_itrace_probe put_task_struct(tsk); rcu_read_unlock(); - spin_lock_init(&itrace_lock); - - /* set initial state */ - spin_lock(&itrace_lock); - spin_unlock(&itrace_lock); - printk(KERN_INFO "usr_itrace_init: completed for tid = %d\n", tid); + if (debug) + printk(KERN_INFO "usr_itrace_init: completed for tid = %d\n", tid); return 0; } @@ -314,7 +358,6 @@ void static remove_usr_itrace_info(struct itrace_info *ui) if (debug) printk(KERN_INFO "remove_usr_itrace_info: tid=%d\n", ui->tid); - spin_lock(&itrace_lock); if (ui->tsk && ui->engine) { status = utrace_control(ui->tsk, ui->engine, UTRACE_DETACH); if (status < 0 && status != -ESRCH && status != -EALREADY) @@ -322,6 +365,7 @@ void static remove_usr_itrace_info(struct itrace_info *ui) "utrace_control(UTRACE_DETACH) returns %d\n", status); } + spin_lock(&itrace_lock); list_del(&ui->link); spin_unlock(&itrace_lock); kfree(ui); diff --git a/runtime/loc2c-runtime.h b/runtime/loc2c-runtime.h index 0af19edc..620e1615 100644 --- a/runtime/loc2c-runtime.h +++ b/runtime/loc2c-runtime.h @@ -29,11 +29,12 @@ & (((__typeof (base)) 1 << (nbits)) - 1)) #define store_bitfield(target, base, higherbits, nbits) \ - target = (target \ - &~ ((((__typeof (base)) 1 << (nbits)) - 1) \ - << (sizeof (base) * 8 - (higherbits) - (nbits))) \ - | ((__typeof (base)) (base) \ - << (sizeof (base) * 8 - (higherbits) - (nbits)))) + target = ((target \ + &~ ((((__typeof (target)) 1 << (nbits)) - 1) \ + << (sizeof (target) * 8 - (higherbits) - (nbits)))) \ + | ((((__typeof (target)) (base)) \ + & (((__typeof (target)) 1 << (nbits)) - 1)) \ + << (sizeof (target) * 8 - (higherbits) - (nbits)))) /* Given a DWARF register number, fetch its intptr_t (long) value from the @@ -62,6 +63,10 @@ must work right for kernel addresses, and can use whatever existing machine-specific kernel macros are convenient. */ +#if STP_SKIP_BADVARS +#define DEREF_FAULT(addr) ({0; }) +#define STORE_DEREF_FAULT(addr) ({0; }) +#else #define DEREF_FAULT(addr) ({ \ snprintf(c->error_buffer, sizeof(c->error_buffer), \ "kernel read fault at 0x%p (%s)", (void *)(intptr_t)(addr), #addr); \ @@ -75,7 +80,7 @@ c->last_error = c->error_buffer; \ goto deref_fault; \ }) - +#endif #if defined (STAPCONF_X86_UNIREGS) && defined (__i386__) @@ -186,37 +191,34 @@ */ #define kread(ptr) ({ \ - typeof(*(ptr)) _v; \ - if (probe_kernel_read((void *)&_v, (void *)(ptr), sizeof(*(ptr)))) \ - DEREF_FAULT(ptr); \ + typeof(*(ptr)) _v = 0; \ + if (lookup_bad_addr((unsigned long)(ptr)) || \ + probe_kernel_read((void *)&_v, (void *)(ptr), sizeof(*(ptr)))) \ + DEREF_FAULT(ptr); \ _v; \ }) #define kwrite(ptr, value) ({ \ typeof(*(ptr)) _v; \ _v = (typeof(*(ptr)))(value); \ - if (probe_kernel_write((void *)(ptr), (void *)&_v, sizeof(*(ptr)))) \ - STORE_DEREF_FAULT(ptr); \ + if (lookup_bad_addr((unsigned long)addr) || \ + probe_kernel_write((void *)(ptr), (void *)&_v, sizeof(*(ptr)))) \ + STORE_DEREF_FAULT(ptr); \ }) #define deref(size, addr) ({ \ - intptr_t _i; \ - if (lookup_bad_addr((unsigned long)addr)) \ - __deref_bad(); \ + intptr_t _i = 0; \ switch (size) { \ case 1: _i = kread((u8 *)(addr)); break; \ case 2: _i = kread((u16 *)(addr)); break; \ case 4: _i = kread((u32 *)(addr)); break; \ case 8: _i = kread((u64 *)(addr)); break; \ default: __deref_bad(); \ - /* uninitialized _i should also be caught by -Werror */ \ } \ _i; \ }) #define store_deref(size, addr, value) ({ \ - if (lookup_bad_addr((unsigned long)addr)) \ - __store_deref_bad(); \ switch (size) { \ case 1: kwrite((u8 *)(addr), (value)); break; \ case 2: kwrite((u16 *)(addr), (value)); break; \ @@ -237,7 +239,7 @@ extern void __store_deref_bad(void); ({ \ int _bad = 0; \ u8 _b; u16 _w; u32 _l; \ - intptr_t _v; \ + intptr_t _v = 0; \ if (lookup_bad_addr((unsigned long)addr)) \ _bad = 1; \ else \ @@ -277,7 +279,7 @@ extern void __store_deref_bad(void); ({ \ int _bad = 0; \ u8 _b; u16 _w; u32 _l; u64 _q; \ - intptr_t _v; \ + intptr_t _v = 0; \ if (lookup_bad_addr((unsigned long)addr)) \ _bad = 1; \ else \ @@ -394,7 +396,7 @@ extern void __store_deref_bad(void); #define deref(size, addr) \ ({ \ int _bad = 0; \ - intptr_t _v; \ + intptr_t _v = 0; \ if (lookup_bad_addr((unsigned long)addr)) \ _bad = 1; \ else \ diff --git a/runtime/map-gen.c b/runtime/map-gen.c index c4bdf2c7..fdb75089 100644 --- a/runtime/map-gen.c +++ b/runtime/map-gen.c @@ -26,6 +26,14 @@ #define JOIN5x(a,b,c,d,e,f) a##_##b##c##d##e##f #define JOIN6(a,b,c,d,e,f,g) JOIN6x(a,b,c,d,e,f,g) #define JOIN6x(a,b,c,d,e,f,g) a##_##b##c##d##e##f##g +#define JOIN7(a,b,c,d,e,f,g,h) JOIN7x(a,b,c,d,e,f,g,h) +#define JOIN7x(a,b,c,d,e,f,g,h) a##_##b##c##d##e##f##g##h +#define JOIN8(a,b,c,d,e,f,g,h,i) JOIN8x(a,b,c,d,e,f,g,h,i) +#define JOIN8x(a,b,c,d,e,f,g,h,i) a##_##b##c##d##e##f##g##h##i +#define JOIN9(a,b,c,d,e,f,g,h,i,j) JOIN9x(a,b,c,d,e,f,g,h,i,j) +#define JOIN9x(a,b,c,d,e,f,g,h,i,j) a##_##b##c##d##e##f##g##h##i##j +#define JOIN10(a,b,c,d,e,f,g,h,i,j,k) JOIN10x(a,b,c,d,e,f,g,h,i,j,k) +#define JOIN10x(a,b,c,d,e,f,g,h,i,j,k) a##_##b##c##d##e##f##g##h##i##j##k #include "map.h" @@ -162,6 +170,113 @@ #define KEY5_HASH JOIN(KEY5NAME,hash) #endif /* defined(KEY5_TYPE) */ +#if defined (KEY6_TYPE) +#undef KEY_ARITY +#define KEY_ARITY 6 +#if KEY6_TYPE == STRING +#define KEY6TYPE char* +#define KEY6NAME str +#define KEY6N s +#define KEY6STOR char key6[MAP_STRING_LENGTH] +#define KEY6CPY(m) str_copy(m->key6, key6) +#else +#define KEY6TYPE int64_t +#define KEY6NAME int64 +#define KEY6N i +#define KEY6STOR int64_t key6 +#define KEY6CPY(m) m->key6=key6 +#endif +#define KEY6_EQ_P JOIN(KEY6NAME,eq_p) +#define KEY6_HASH JOIN(KEY6NAME,hash) +#endif /* defined(KEY6_TYPE) */ + +#if defined (KEY7_TYPE) +#undef KEY_ARITY +#define KEY_ARITY 7 +#if KEY7_TYPE == STRING +#define KEY7TYPE char* +#define KEY7NAME str +#define KEY7N s +#define KEY7STOR char key7[MAP_STRING_LENGTH] +#define KEY7CPY(m) str_copy(m->key7, key7) +#else +#define KEY7TYPE int64_t +#define KEY7NAME int64 +#define KEY7N i +#define KEY7STOR int64_t key7 +#define KEY7CPY(m) m->key7=key7 +#endif +#define KEY7_EQ_P JOIN(KEY7NAME,eq_p) +#define KEY7_HASH JOIN(KEY7NAME,hash) +#endif /* defined(KEY7_TYPE) */ + +#if defined (KEY7_TYPE) +#undef KEY_ARITY +#define KEY_ARITY 7 +#if KEY7_TYPE == STRING +#define KEY7TYPE char* +#define KEY7NAME str +#define KEY7N s +#define KEY7STOR char key7[MAP_STRING_LENGTH] +#define KEY7CPY(m) str_copy(m->key7, key7) +#else +#define KEY7TYPE int64_t +#define KEY7NAME int64 +#define KEY7N i +#define KEY7STOR int64_t key7 +#define KEY7CPY(m) m->key7=key7 +#endif +#define KEY7_EQ_P JOIN(KEY7NAME,eq_p) +#define KEY7_HASH JOIN(KEY7NAME,hash) +#endif /* defined(KEY7_TYPE) */ + +#if defined (KEY8_TYPE) +#undef KEY_ARITY +#define KEY_ARITY 8 +#if KEY8_TYPE == STRING +#define KEY8TYPE char* +#define KEY8NAME str +#define KEY8N s +#define KEY8STOR char key8[MAP_STRING_LENGTH] +#define KEY8CPY(m) str_copy(m->key8, key8) +#else +#define KEY8TYPE int64_t +#define KEY8NAME int64 +#define KEY8N i +#define KEY8STOR int64_t key8 +#define KEY8CPY(m) m->key8=key8 +#endif +#define KEY8_EQ_P JOIN(KEY8NAME,eq_p) +#define KEY8_HASH JOIN(KEY8NAME,hash) +#endif /* defined(KEY8_TYPE) */ + +#if defined (KEY9_TYPE) +#undef KEY_ARITY +#define KEY_ARITY 9 +#if KEY9_TYPE == STRING +#define KEY9TYPE char* +#define KEY9NAME str +#define KEY9N s +#define KEY9STOR char key9[MAP_STRING_LENGTH] +#define KEY9CPY(m) str_copy(m->key9, key9) +#else +#define KEY9TYPE int64_t +#define KEY9NAME int64 +#define KEY9N i +#define KEY9STOR int64_t key9 +#define KEY9CPY(m) m->key9=key9 +#endif +#define KEY9_EQ_P JOIN(KEY9NAME,eq_p) +#define KEY9_HASH JOIN(KEY9NAME,hash) +#endif /* defined(KEY9_TYPE) */ + +/* Not so many, cowboy! */ +#if defined (KEY10_TYPE) +#error "excessive key arity == too many array indexes" +#endif + + + #if KEY_ARITY == 1 #define KEYSYM(x) JOIN2(x,KEY1N,VALN) #define ALLKEYS(x) x##1 @@ -187,6 +302,26 @@ #define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5 #define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5 #define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);} +#elif KEY_ARITY == 6 +#define KEYSYM(x) JOIN7(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,VALN) +#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6 +#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6 +#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);} +#elif KEY_ARITY == 7 +#define KEYSYM(x) JOIN8(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,KEY7N,VALN) +#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6, x##7 +#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6, KEY7TYPE x##7 +#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);KEY7CPY(m);} +#elif KEY_ARITY == 8 +#define KEYSYM(x) JOIN9(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,KEY7N,KEY8N,VALN) +#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8 +#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6, KEY7TYPE x##7, KEY8TYPE x##8 +#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);KEY7CPY(m);KEY8CPY(m);} +#elif KEY_ARITY == 9 +#define KEYSYM(x) JOIN10(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,KEY7N,KEY8N,KEY9N,VALN) +#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8, x##9 +#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6, KEY7TYPE x##7, KEY8TYPE x##8, KEY9TYPE x##9 +#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);KEY7CPY(m);KEY8CPY(m);KEY9CPY(m);} #endif /* */ @@ -208,6 +343,18 @@ struct KEYSYM(map_node) { KEY4STOR; #if KEY_ARITY > 4 KEY5STOR; +#if KEY_ARITY > 5 + KEY6STOR; +#if KEY_ARITY > 6 + KEY7STOR; +#if KEY_ARITY > 7 + KEY8STOR; +#if KEY_ARITY > 8 + KEY9STOR; +#endif +#endif +#endif +#endif #endif #endif #endif @@ -266,6 +413,34 @@ static key_data KEYSYM(map_get_key) (struct map_node *mn, int n, int *type) if (type) *type = type_to_enum(KEY5TYPE); break; +#if KEY_ARITY > 5 + case 6: + ptr = (key_data)m->key6; + if (type) + *type = type_to_enum(KEY6TYPE); + break; +#if KEY_ARITY > 6 + case 7: + ptr = (key_data)m->key7; + if (type) + *type = type_to_enum(KEY7TYPE); + break; +#if KEY_ARITY > 7 + case 8: + ptr = (key_data)m->key8; + if (type) + *type = type_to_enum(KEY8TYPE); + break; +#if KEY_ARITY > 8 + case 9: + ptr = (key_data)m->key9; + if (type) + *type = type_to_enum(KEY9TYPE); + break; +#endif +#endif +#endif +#endif #endif #endif #endif @@ -309,6 +484,34 @@ static unsigned int KEYSYM(keycheck) (ALLKEYSD(key)) if (key5 == NULL) return 0; #endif + +#if KEY_ARITY > 5 +#if KEY6_TYPE == STRING + if (key6 == NULL) + return 0; +#endif + +#if KEY_ARITY > 6 +#if KEY7_TYPE == STRING + if (key7 == NULL) + return 0; +#endif + +#if KEY_ARITY > 7 +#if KEY8_TYPE == STRING + if (key8 == NULL) + return 0; +#endif + +#if KEY_ARITY > 8 +#if KEY9_TYPE == STRING + if (key9 == NULL) + return 0; +#endif +#endif +#endif +#endif +#endif #endif #endif #endif @@ -327,6 +530,18 @@ static unsigned int KEYSYM(hash) (ALLKEYSD(key)) hash ^= KEY4_HASH(key4); #if KEY_ARITY > 4 hash ^= KEY5_HASH(key5); +#if KEY_ARITY > 5 + hash ^= KEY6_HASH(key6); +#if KEY_ARITY > 6 + hash ^= KEY7_HASH(key7); +#if KEY_ARITY > 7 + hash ^= KEY8_HASH(key8); +#if KEY_ARITY > 8 + hash ^= KEY9_HASH(key9); +#endif +#endif +#endif +#endif #endif #endif #endif @@ -411,6 +626,18 @@ static int KEYSYM(__stp_map_set) (MAP map, ALLKEYSD(key), VSTYPE val, int add) && KEY4_EQ_P(n->key4, key4) #if KEY_ARITY > 4 && KEY5_EQ_P(n->key5, key5) +#if KEY_ARITY > 5 + && KEY6_EQ_P(n->key6, key6) +#if KEY_ARITY > 6 + && KEY7_EQ_P(n->key7, key7) +#if KEY_ARITY > 7 + && KEY8_EQ_P(n->key8, key8) +#if KEY_ARITY > 8 + && KEY9_EQ_P(n->key9, key9) +#endif +#endif +#endif +#endif #endif #endif #endif @@ -462,6 +689,18 @@ static VALTYPE KEYSYM(_stp_map_get) (MAP map, ALLKEYSD(key)) && KEY4_EQ_P(n->key4, key4) #if KEY_ARITY > 4 && KEY5_EQ_P(n->key5, key5) +#if KEY_ARITY > 5 + && KEY6_EQ_P(n->key6, key6) +#if KEY_ARITY > 6 + && KEY7_EQ_P(n->key7, key7) +#if KEY_ARITY > 7 + && KEY8_EQ_P(n->key8, key8) +#if KEY_ARITY > 8 + && KEY9_EQ_P(n->key9, key9) +#endif +#endif +#endif +#endif #endif #endif #endif @@ -498,6 +737,18 @@ static int KEYSYM(_stp_map_del) (MAP map, ALLKEYSD(key)) && KEY4_EQ_P(n->key4, key4) #if KEY_ARITY > 4 && KEY5_EQ_P(n->key5, key5) +#if KEY_ARITY > 5 + && KEY6_EQ_P(n->key6, key6) +#if KEY_ARITY > 6 + && KEY7_EQ_P(n->key7, key7) +#if KEY_ARITY > 7 + && KEY8_EQ_P(n->key8, key8) +#if KEY_ARITY > 8 + && KEY9_EQ_P(n->key9, key9) +#endif +#endif +#endif +#endif #endif #endif #endif @@ -535,6 +786,18 @@ static int KEYSYM(_stp_map_exists) (MAP map, ALLKEYSD(key)) && KEY4_EQ_P(n->key4, key4) #if KEY_ARITY > 4 && KEY5_EQ_P(n->key5, key5) +#if KEY_ARITY > 5 + && KEY6_EQ_P(n->key6, key6) +#if KEY_ARITY > 6 + && KEY7_EQ_P(n->key7, key7) +#if KEY_ARITY > 7 + && KEY8_EQ_P(n->key8, key8) +#if KEY_ARITY > 8 + && KEY9_EQ_P(n->key9, key9) +#endif +#endif +#endif +#endif #endif #endif #endif @@ -582,6 +845,34 @@ static int KEYSYM(_stp_map_exists) (MAP map, ALLKEYSD(key)) #undef KEY5STOR #undef KEY5CPY +#undef KEY6NAME +#undef KEY6N +#undef KEY6TYPE +#undef KEY6_TYPE +#undef KEY6STOR +#undef KEY6CPY + +#undef KEY7NAME +#undef KEY7N +#undef KEY7TYPE +#undef KEY7_TYPE +#undef KEY7STOR +#undef KEY7CPY + +#undef KEY8NAME +#undef KEY8N +#undef KEY8TYPE +#undef KEY8_TYPE +#undef KEY8STOR +#undef KEY8CPY + +#undef KEY9NAME +#undef KEY9N +#undef KEY9TYPE +#undef KEY9_TYPE +#undef KEY9STOR +#undef KEY9CPY + #undef KEY_ARITY #undef ALLKEYS #undef ALLKEYSD diff --git a/runtime/map.c b/runtime/map.c index de25d6f3..74467f30 100644 --- a/runtime/map.c +++ b/runtime/map.c @@ -38,27 +38,16 @@ static int int64_eq_p (int64_t key1, int64_t key2) static void str_copy(char *dest, char *src) { - int len = 0; - if (src) { - len = strlen(src); - if (len > MAP_STRING_LENGTH - 1) - len = MAP_STRING_LENGTH - 1; - memcpy (dest, src, len); - } - dest[len] = 0; + if (src) + strlcpy(dest, src, MAP_STRING_LENGTH); + else + *dest = 0; } static void str_add(void *dest, char *val) { char *dst = (char *)dest; - int len = strlen(val); - int len1 = strlen(dst); - int num = MAP_STRING_LENGTH - 1 - len1; - - if (len > num) - len = num; - memcpy (&dst[len1], val, len); - dst[len + len1] = 0; + strlcat(dst, val, MAP_STRING_LENGTH); } static int str_eq_p (char *key1, char *key2) @@ -730,7 +719,7 @@ static MAP _stp_pmap_agg (PMAP pmap) { int i, hash; MAP m, agg; - struct map_node *ptr, *aptr; + struct map_node *ptr, *aptr = NULL; struct hlist_head *head, *ahead; struct hlist_node *e, *f; diff --git a/runtime/pmap-gen.c b/runtime/pmap-gen.c index 86c3dc42..c95adc6b 100644 --- a/runtime/pmap-gen.c +++ b/runtime/pmap-gen.c @@ -26,6 +26,14 @@ #define JOIN5x(a,b,c,d,e,f) a##_##b##c##d##e##f #define JOIN6(a,b,c,d,e,f,g) JOIN6x(a,b,c,d,e,f,g) #define JOIN6x(a,b,c,d,e,f,g) a##_##b##c##d##e##f##g +#define JOIN7(a,b,c,d,e,f,g,h) JOIN7x(a,b,c,d,e,f,g,h) +#define JOIN7x(a,b,c,d,e,f,g,h) a##_##b##c##d##e##f##g##h +#define JOIN8(a,b,c,d,e,f,g,h,i) JOIN8x(a,b,c,d,e,f,g,h,i) +#define JOIN8x(a,b,c,d,e,f,g,h,i) a##_##b##c##d##e##f##g##h##i +#define JOIN9(a,b,c,d,e,f,g,h,i,j) JOIN9x(a,b,c,d,e,f,g,h,i,j) +#define JOIN9x(a,b,c,d,e,f,g,h,i,j) a##_##b##c##d##e##f##g##h##i##j +#define JOIN10(a,b,c,d,e,f,g,h,i,j,k) JOIN10x(a,b,c,d,e,f,g,h,i,j,k) +#define JOIN10x(a,b,c,d,e,f,g,h,i,j,k) a##_##b##c##d##e##f##g##h##i##j##k #include "map.h" @@ -162,6 +170,113 @@ #define KEY5_HASH JOIN(KEY5NAME,hash) #endif /* defined(KEY5_TYPE) */ +#if defined (KEY6_TYPE) +#undef KEY_ARITY +#define KEY_ARITY 6 +#if KEY6_TYPE == STRING +#define KEY6TYPE char* +#define KEY6NAME str +#define KEY6N s +#define KEY6STOR char key6[MAP_STRING_LENGTH] +#define KEY6CPY(m) str_copy(m->key6, key6) +#else +#define KEY6TYPE int64_t +#define KEY6NAME int64 +#define KEY6N i +#define KEY6STOR int64_t key6 +#define KEY6CPY(m) m->key6=key6 +#endif +#define KEY6_EQ_P JOIN(KEY6NAME,eq_p) +#define KEY6_HASH JOIN(KEY6NAME,hash) +#endif /* defined(KEY6_TYPE) */ + +#if defined (KEY7_TYPE) +#undef KEY_ARITY +#define KEY_ARITY 7 +#if KEY7_TYPE == STRING +#define KEY7TYPE char* +#define KEY7NAME str +#define KEY7N s +#define KEY7STOR char key7[MAP_STRING_LENGTH] +#define KEY7CPY(m) str_copy(m->key7, key7) +#else +#define KEY7TYPE int64_t +#define KEY7NAME int64 +#define KEY7N i +#define KEY7STOR int64_t key7 +#define KEY7CPY(m) m->key7=key7 +#endif +#define KEY7_EQ_P JOIN(KEY7NAME,eq_p) +#define KEY7_HASH JOIN(KEY7NAME,hash) +#endif /* defined(KEY7_TYPE) */ + +#if defined (KEY7_TYPE) +#undef KEY_ARITY +#define KEY_ARITY 7 +#if KEY7_TYPE == STRING +#define KEY7TYPE char* +#define KEY7NAME str +#define KEY7N s +#define KEY7STOR char key7[MAP_STRING_LENGTH] +#define KEY7CPY(m) str_copy(m->key7, key7) +#else +#define KEY7TYPE int64_t +#define KEY7NAME int64 +#define KEY7N i +#define KEY7STOR int64_t key7 +#define KEY7CPY(m) m->key7=key7 +#endif +#define KEY7_EQ_P JOIN(KEY7NAME,eq_p) +#define KEY7_HASH JOIN(KEY7NAME,hash) +#endif /* defined(KEY7_TYPE) */ + +#if defined (KEY8_TYPE) +#undef KEY_ARITY +#define KEY_ARITY 8 +#if KEY8_TYPE == STRING +#define KEY8TYPE char* +#define KEY8NAME str +#define KEY8N s +#define KEY8STOR char key8[MAP_STRING_LENGTH] +#define KEY8CPY(m) str_copy(m->key8, key8) +#else +#define KEY8TYPE int64_t +#define KEY8NAME int64 +#define KEY8N i +#define KEY8STOR int64_t key8 +#define KEY8CPY(m) m->key8=key8 +#endif +#define KEY8_EQ_P JOIN(KEY8NAME,eq_p) +#define KEY8_HASH JOIN(KEY8NAME,hash) +#endif /* defined(KEY8_TYPE) */ + +#if defined (KEY9_TYPE) +#undef KEY_ARITY +#define KEY_ARITY 9 +#if KEY9_TYPE == STRING +#define KEY9TYPE char* +#define KEY9NAME str +#define KEY9N s +#define KEY9STOR char key9[MAP_STRING_LENGTH] +#define KEY9CPY(m) str_copy(m->key9, key9) +#else +#define KEY9TYPE int64_t +#define KEY9NAME int64 +#define KEY9N i +#define KEY9STOR int64_t key9 +#define KEY9CPY(m) m->key9=key9 +#endif +#define KEY9_EQ_P JOIN(KEY9NAME,eq_p) +#define KEY9_HASH JOIN(KEY9NAME,hash) +#endif /* defined(KEY9_TYPE) */ + +/* Not so many, cowboy! */ +#if defined (KEY10_TYPE) +#error "excessive key arity == too many array indexes" +#endif + + + #if KEY_ARITY == 1 #define KEYSYM(x) JOIN2(x,KEY1N,VALN) #define ALLKEYS(x) x##1 @@ -187,6 +302,26 @@ #define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5 #define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5 #define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);} +#elif KEY_ARITY == 6 +#define KEYSYM(x) JOIN7(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,VALN) +#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6 +#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6 +#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);} +#elif KEY_ARITY == 7 +#define KEYSYM(x) JOIN8(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,KEY7N,VALN) +#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6, x##7 +#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6, KEY7TYPE x##7 +#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);KEY7CPY(m);} +#elif KEY_ARITY == 8 +#define KEYSYM(x) JOIN9(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,KEY7N,KEY8N,VALN) +#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8 +#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6, KEY7TYPE x##7, KEY8TYPE x##8 +#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);KEY7CPY(m);KEY8CPY(m);} +#elif KEY_ARITY == 9 +#define KEYSYM(x) JOIN10(x,KEY1N,KEY2N,KEY3N,KEY4N,KEY5N,KEY6N,KEY7N,KEY8N,KEY9N,VALN) +#define ALLKEYS(x) x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8, x##9 +#define ALLKEYSD(x) KEY1TYPE x##1, KEY2TYPE x##2, KEY3TYPE x##3, KEY4TYPE x##4, KEY5TYPE x##5, KEY6TYPE x##6, KEY7TYPE x##7, KEY8TYPE x##8, KEY9TYPE x##9 +#define KEYCPY(m) {KEY1CPY(m);KEY2CPY(m);KEY3CPY(m);KEY4CPY(m);KEY5CPY(m);KEY6CPY(m);KEY7CPY(m);KEY8CPY(m);KEY9CPY(m);} #endif /* */ @@ -208,6 +343,18 @@ struct KEYSYM(pmap_node) { KEY4STOR; #if KEY_ARITY > 4 KEY5STOR; +#if KEY_ARITY > 5 + KEY6STOR; +#if KEY_ARITY > 6 + KEY7STOR; +#if KEY_ARITY > 7 + KEY8STOR; +#if KEY_ARITY > 8 + KEY9STOR; +#endif +#endif +#endif +#endif #endif #endif #endif @@ -238,6 +385,18 @@ static int KEYSYM(pmap_key_cmp) (struct map_node *m1, struct map_node *m2) && KEY4_EQ_P(n1->key4, n2->key4) #if KEY_ARITY > 4 && KEY5_EQ_P(n1->key5, n2->key5) +#if KEY_ARITY > 5 + && KEY6_EQ_P(n1->key6, n2->key6) +#if KEY_ARITY > 6 + && KEY7_EQ_P(n1->key7, n2->key7) +#if KEY_ARITY > 7 + && KEY8_EQ_P(n1->key8, n2->key8) +#if KEY_ARITY > 8 + && KEY9_EQ_P(n1->key9, n2->key9) +#endif +#endif +#endif +#endif #endif #endif #endif @@ -282,6 +441,34 @@ static void KEYSYM(pmap_copy_keys) (struct map_node *m1, struct map_node *m2) #else dst->key5 = src->key5; #endif +#if KEY_ARITY > 5 +#if KEY6_TYPE == STRING + str_copy (dst->key6, src->key6); +#else + dst->key6 = src->key6; +#endif +#if KEY_ARITY > 6 +#if KEY7_TYPE == STRING + str_copy (dst->key7, src->key7); +#else + dst->key7 = src->key7; +#endif +#if KEY_ARITY > 7 +#if KEY8_TYPE == STRING + str_copy (dst->key8, src->key8); +#else + dst->key8 = src->key8; +#endif +#if KEY_ARITY > 8 +#if KEY9_TYPE == STRING + str_copy (dst->key9, src->key9); +#else + dst->key9 = src->key9; +#endif +#endif +#endif +#endif +#endif #endif #endif #endif @@ -330,6 +517,34 @@ static key_data KEYSYM(pmap_get_key) (struct map_node *mn, int n, int *type) if (type) *type = type_to_enum(KEY5TYPE); break; +#if KEY_ARITY > 5 + case 6: + ptr = (key_data)m->key6; + if (type) + *type = type_to_enum(KEY6TYPE); + break; +#if KEY_ARITY > 6 + case 7: + ptr = (key_data)m->key7; + if (type) + *type = type_to_enum(KEY7TYPE); + break; +#if KEY_ARITY > 7 + case 8: + ptr = (key_data)m->key8; + if (type) + *type = type_to_enum(KEY8TYPE); + break; +#if KEY_ARITY > 8 + case 9: + ptr = (key_data)m->key9; + if (type) + *type = type_to_enum(KEY9TYPE); + break; +#endif +#endif +#endif +#endif #endif #endif #endif @@ -373,6 +588,34 @@ static unsigned int KEYSYM(pkeycheck) (ALLKEYSD(key)) if (key5 == NULL) return 0; #endif + +#if KEY_ARITY > 5 +#if KEY6_TYPE == STRING + if (key6 == NULL) + return 0; +#endif + +#if KEY_ARITY > 6 +#if KEY7_TYPE == STRING + if (key7 == NULL) + return 0; +#endif + +#if KEY_ARITY > 7 +#if KEY8_TYPE == STRING + if (key8 == NULL) + return 0; +#endif + +#if KEY_ARITY > 8 +#if KEY9_TYPE == STRING + if (key9 == NULL) + return 0; +#endif +#endif +#endif +#endif +#endif #endif #endif #endif @@ -391,6 +634,18 @@ static unsigned int KEYSYM(phash) (ALLKEYSD(key)) hash ^= KEY4_HASH(key4); #if KEY_ARITY > 4 hash ^= KEY5_HASH(key5); +#if KEY_ARITY > 5 + hash ^= KEY6_HASH(key6); +#if KEY_ARITY > 6 + hash ^= KEY7_HASH(key7); +#if KEY_ARITY > 7 + hash ^= KEY8_HASH(key8); +#if KEY_ARITY > 8 + hash ^= KEY9_HASH(key9); +#endif +#endif +#endif +#endif #endif #endif #endif @@ -504,6 +759,18 @@ static int KEYSYM(__stp_pmap_set) (MAP map, ALLKEYSD(key), VSTYPE val, int add) && KEY4_EQ_P(n->key4, key4) #if KEY_ARITY > 4 && KEY5_EQ_P(n->key5, key5) +#if KEY_ARITY > 5 + && KEY6_EQ_P(n->key6, key6) +#if KEY_ARITY > 6 + && KEY7_EQ_P(n->key7, key7) +#if KEY_ARITY > 7 + && KEY8_EQ_P(n->key8, key8) +#if KEY_ARITY > 8 + && KEY9_EQ_P(n->key9, key9) +#endif +#endif +#endif +#endif #endif #endif #endif @@ -587,6 +854,18 @@ static VALTYPE KEYSYM(_stp_pmap_get_cpu) (PMAP pmap, ALLKEYSD(key)) && KEY4_EQ_P(n->key4, key4) #if KEY_ARITY > 4 && KEY5_EQ_P(n->key5, key5) +#if KEY_ARITY > 5 + && KEY6_EQ_P(n->key6, key6) +#if KEY_ARITY > 6 + && KEY7_EQ_P(n->key7, key7) +#if KEY_ARITY > 7 + && KEY8_EQ_P(n->key8, key8) +#if KEY_ARITY > 8 + && KEY9_EQ_P(n->key9, key9) +#endif +#endif +#endif +#endif #endif #endif #endif @@ -637,6 +916,18 @@ static VALTYPE KEYSYM(_stp_pmap_get) (PMAP pmap, ALLKEYSD(key)) && KEY4_EQ_P(n->key4, key4) #if KEY_ARITY > 4 && KEY5_EQ_P(n->key5, key5) +#if KEY_ARITY > 5 + && KEY6_EQ_P(n->key6, key6) +#if KEY_ARITY > 6 + && KEY7_EQ_P(n->key7, key7) +#if KEY_ARITY > 7 + && KEY8_EQ_P(n->key8, key8) +#if KEY_ARITY > 8 + && KEY9_EQ_P(n->key9, key9) +#endif +#endif +#endif +#endif #endif #endif #endif @@ -669,6 +960,18 @@ static VALTYPE KEYSYM(_stp_pmap_get) (PMAP pmap, ALLKEYSD(key)) && KEY4_EQ_P(n->key4, key4) #if KEY_ARITY > 4 && KEY5_EQ_P(n->key5, key5) +#if KEY_ARITY > 5 + && KEY6_EQ_P(n->key6, key6) +#if KEY_ARITY > 6 + && KEY7_EQ_P(n->key7, key7) +#if KEY_ARITY > 7 + && KEY8_EQ_P(n->key8, key8) +#if KEY_ARITY > 8 + && KEY9_EQ_P(n->key9, key9) +#endif +#endif +#endif +#endif #endif #endif #endif @@ -723,6 +1026,18 @@ static int KEYSYM(__stp_pmap_del) (MAP map, ALLKEYSD(key)) && KEY4_EQ_P(n->key4, key4) #if KEY_ARITY > 4 && KEY5_EQ_P(n->key5, key5) +#if KEY_ARITY > 5 + && KEY6_EQ_P(n->key6, key6) +#if KEY_ARITY > 6 + && KEY7_EQ_P(n->key7, key7) +#if KEY_ARITY > 7 + && KEY8_EQ_P(n->key8, key8) +#if KEY_ARITY > 8 + && KEY9_EQ_P(n->key9, key9) +#endif +#endif +#endif +#endif #endif #endif #endif @@ -788,6 +1103,34 @@ static int KEYSYM(_stp_pmap_del) (PMAP pmap, ALLKEYSD(key)) #undef KEY5STOR #undef KEY5CPY +#undef KEY6NAME +#undef KEY6N +#undef KEY6TYPE +#undef KEY6_TYPE +#undef KEY6STOR +#undef KEY6CPY + +#undef KEY7NAME +#undef KEY7N +#undef KEY7TYPE +#undef KEY7_TYPE +#undef KEY7STOR +#undef KEY7CPY + +#undef KEY8NAME +#undef KEY8N +#undef KEY8TYPE +#undef KEY8_TYPE +#undef KEY8STOR +#undef KEY8CPY + +#undef KEY9NAME +#undef KEY9N +#undef KEY9TYPE +#undef KEY9_TYPE +#undef KEY9STOR +#undef KEY9CPY + #undef KEY_ARITY #undef ALLKEYS #undef ALLKEYSD diff --git a/runtime/print.c b/runtime/print.c index c1fff306..d51c8108 100644 --- a/runtime/print.c +++ b/runtime/print.c @@ -16,6 +16,7 @@ #include "vsprintf.c" #include "print.h" #include "transport/transport.c" +#include "vsprintf.c" /** @file print.c * Printing Functions. @@ -168,34 +169,10 @@ static void _stp_print_binary (int num, ...) */ static void _stp_printf (const char *fmt, ...) { - int num; va_list args; - _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); - char *buf = pb->buf + pb->len; - int size = STP_BUFFER_SIZE - pb->len; - va_start(args, fmt); - num = _stp_vsnprintf(buf, size, fmt, args); + _stp_vsnprintf(NULL, 0, fmt, args); va_end(args); - if (unlikely(num >= size)) { - /* overflowed the buffer */ - if (pb->len == 0) { - /* A single print request exceeded the buffer size. */ - /* Should not be possible with Systemtap-generated code. */ - pb->len = STP_BUFFER_SIZE; - _stp_print_flush(); - num = 0; - } else { - /* Need more space. Flush the previous contents */ - _stp_print_flush(); - - /* try again */ - va_start(args, fmt); - num = _stp_vsnprintf(pb->buf, STP_BUFFER_SIZE, fmt, args); - va_end(args); - } - } - pb->len += num; } /** Write a string into the print buffer. diff --git a/runtime/ptrace_compatibility.h b/runtime/ptrace_compatibility.h new file mode 100644 index 00000000..939c3b56 --- /dev/null +++ b/runtime/ptrace_compatibility.h @@ -0,0 +1,50 @@ +#ifndef _PTRACE_COMPATIBILITY_H_ +#define _PTRACE_COMPATIBILITY_H_ + +#include <linux/ptrace.h> + +/* Older kernel's linux/ptrace.h don't define + * arch_has_single_step()/arch_has_block_step(). */ + +#ifndef arch_has_single_step + +#include <linux/tracehook.h> + +/** + * arch_has_single_step - does this CPU support user-mode single-step? + * + * If this is defined, then there must be function declarations or + * inlines for user_enable_single_step() and user_disable_single_step(). + * arch_has_single_step() should evaluate to nonzero iff the machine + * supports instruction single-step for user mode. + * It can be a constant or it can test a CPU feature bit. + */ + +#ifdef ARCH_HAS_SINGLE_STEP +#define arch_has_single_step() (ARCH_HAS_SINGLE_STEP) +#else +#define arch_has_single_step() (0) +#endif /* ARCH_HAS_SINGLE_STEP */ + +#endif /* arch_has_single_step */ + +#ifndef arch_has_block_step +/** + * arch_has_block_step - does this CPU support user-mode block-step? + * + * If this is defined, then there must be a function declaration or inline + * for user_enable_block_step(), and arch_has_single_step() must be defined + * too. arch_has_block_step() should evaluate to nonzero iff the machine + * supports step-until-branch for user mode. It can be a constant or it + * can test a CPU feature bit. + */ + +#ifdef ARCH_HAS_BLOCK_STEP +#define arch_has_block_step() (ARCH_HAS_BLOCK_STEP) +#else +#define arch_has_block_step() (0) +#endif /* ARCH_HAS_BLOCK_STEP */ + +#endif /* arch_has_block_step */ + +#endif /* _PTRACE_COMPATIBILITY_H_ */ diff --git a/runtime/runtime.h b/runtime/runtime.h index 78c27a84..7418d13b 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -74,6 +74,13 @@ static struct #define MAXTRACE 20 #endif +/* dwarf unwinder only tested so far on i386 and x86_64. */ +#if (defined(__i386__) || defined(__x86_64__)) +#ifndef STP_USE_DWARF_UNWINDER +#define STP_USE_DWARF_UNWINDER +#endif +#endif + #ifdef CONFIG_FRAME_POINTER /* Just because frame pointers are available does not mean we can trust them. */ #ifndef STP_USE_DWARF_UNWINDER @@ -81,19 +88,17 @@ static struct #endif #endif -/* dwarf unwinder only tested so far on i386 and x86_64, - but globally disabled for now */ -#if 0 -// !defined(STP_USE_FRAME_BUFFER) && (defined(__i386__) || defined(__x86_64__)) -#define STP_USE_DWARF_UNWINDER -#endif - #include "alloc.c" #include "print.c" #include "string.c" #include "io.c" #include "arith.c" #include "copy.c" +#include "regs.c" +#include "regs-ia64.c" + +#include "task_finder.c" + #include "sym.c" #ifdef STP_PERFMON #include "perf.c" diff --git a/runtime/stack-arm.c b/runtime/stack-arm.c index 9b0b772d..fcff0a3b 100644 --- a/runtime/stack-arm.c +++ b/runtime/stack-arm.c @@ -31,7 +31,8 @@ static int __init find_str_pc_offset(void) } -static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels) +static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels, + struct task_struct *tsk) { #ifdef STP_USE_FRAME_POINTER int pc_offset = find_str_pc_offset(); diff --git a/runtime/stack-i386.c b/runtime/stack-i386.c index 5a18c9d8..69623765 100644 --- a/runtime/stack-i386.c +++ b/runtime/stack-i386.c @@ -23,14 +23,15 @@ static void _stp_stack_print_fallback(unsigned long stack, int verbose, int leve /* cannot access stack. give up. */ return; } - if (_stp_func_print(addr, verbose, 0)) + if (_stp_func_print(addr, verbose, 0, NULL)) levels--; stack++; } } #endif -static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels) +static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels, + struct task_struct *tsk) { unsigned long context = (unsigned long)®_SP(regs) & ~(THREAD_SIZE - 1); @@ -43,7 +44,7 @@ static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels) /* cannot access stack. give up. */ return; } - _stp_func_print(addr, verbose, 1); + _stp_func_print(addr, verbose, 1, NULL); if (unlikely(_stp_read_address(next_fp, (unsigned long *)fp, KERNEL_DS))) { /* cannot access stack. give up. */ return; @@ -60,19 +61,23 @@ static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels) struct unwind_frame_info info; arch_unw_init_frame_info(&info, regs); - while (levels && !arch_unw_user_mode(&info)) { - int ret = unwind(&info); + while (levels && (tsk || !arch_unw_user_mode(&info))) { + int ret = unwind(&info, tsk); dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(&info), UNW_SP(&info)); if (ret == 0) { - _stp_func_print(UNW_PC(&info), verbose, 1); + _stp_func_print(UNW_PC(&info), verbose, 1, tsk); levels--; continue; } - /* If an error happened or we hit a kretprobe trampoline, use fallback backtrace */ - /* FIXME: is there a way to unwind across kretprobe trampolines? */ - if (ret < 0 || (ret > 0 && UNW_PC(&info) == _stp_kretprobe_trampoline)) + /* If an error happened or we hit a kretprobe trampoline, + * use fallback backtrace, unless user task backtrace. + * FIXME: is there a way to unwind across kretprobe + * trampolines? */ + if ((ret < 0 + || (ret > 0 && UNW_PC(&info) == _stp_kretprobe_trampoline)) + && ! (tsk || arch_unw_user_mode(&info))) _stp_stack_print_fallback(UNW_SP(&info), verbose, levels); - break; + return; } #else /* ! STP_USE_DWARF_UNWINDER */ _stp_stack_print_fallback((unsigned long)®_SP(regs), verbose, levels); diff --git a/runtime/stack-ia64.c b/runtime/stack-ia64.c index ca9d25a6..a04355fa 100644 --- a/runtime/stack-ia64.c +++ b/runtime/stack-ia64.c @@ -48,7 +48,8 @@ static void __stp_show_stack_addr(struct unw_frame_info *info, void *arg) } while (unw_unwind(info) >= 0); } -static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels) +static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels, + struct task_struct *tsk) { unsigned long *stack = (unsigned long *)®_SP(regs); struct dump_para para; diff --git a/runtime/stack-ppc64.c b/runtime/stack-ppc64.c index 3dc38526..3267194e 100644 --- a/runtime/stack-ppc64.c +++ b/runtime/stack-ppc64.c @@ -7,7 +7,8 @@ * later version. */ -static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels) +static void __stp_stack_print (struct pt_regs *regs, int verbose, int levels, + struct task_struct *tsk) { unsigned long ip, newsp, lr = 0; int count = 0; diff --git a/runtime/stack-s390.c b/runtime/stack-s390.c index c9654102..14e9b7d8 100644 --- a/runtime/stack-s390.c +++ b/runtime/stack-s390.c @@ -66,7 +66,8 @@ __stp_show_stack (unsigned long sp, unsigned long low, } static void __stp_stack_print (struct pt_regs *regs, - int verbose, int levels) + int verbose, int levels, + struct task_struct *tsk) { unsigned long *_sp = (unsigned long *)®_SP(regs); unsigned long sp = (unsigned long)_sp; diff --git a/runtime/stack-x86_64.c b/runtime/stack-x86_64.c index 03d88ef0..9afdf38a 100644 --- a/runtime/stack-x86_64.c +++ b/runtime/stack-x86_64.c @@ -19,7 +19,7 @@ static void _stp_stack_print_fallback(unsigned long stack, int verbose, int leve /* cannot access stack. give up. */ return; } - if (_stp_func_print(addr, verbose, 0)) + if (_stp_func_print(addr, verbose, 0, NULL)) levels--; stack++; } @@ -27,26 +27,31 @@ static void _stp_stack_print_fallback(unsigned long stack, int verbose, int leve #endif -static void __stp_stack_print(struct pt_regs *regs, int verbose, int levels) +static void __stp_stack_print(struct pt_regs *regs, int verbose, int levels, + struct task_struct *tsk) { #ifdef STP_USE_DWARF_UNWINDER // FIXME: large stack allocation struct unwind_frame_info info; arch_unw_init_frame_info(&info, regs); - while (levels && !arch_unw_user_mode(&info)) { - int ret = unwind(&info); + while (levels && (tsk || !arch_unw_user_mode(&info))) { + int ret = unwind(&info, tsk); dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(&info), UNW_SP(&info)); if (ret == 0) { - _stp_func_print(UNW_PC(&info), verbose, 1); + _stp_func_print(UNW_PC(&info), verbose, 1, tsk); levels--; continue; } - /* If an error happened or we hit a kretprobe trampoline, use fallback backtrace */ - /* FIXME: is there a way to unwind across kretprobe trampolines? */ - if (ret < 0 || (ret > 0 && UNW_PC(&info) == _stp_kretprobe_trampoline)) + /* If an error happened or we hit a kretprobe trampoline, + * use fallback backtrace, unless user task backtrace. + * FIXME: is there a way to unwind across kretprobe + * trampolines? */ + if ((ret < 0 + || (ret > 0 && UNW_PC(&info) == _stp_kretprobe_trampoline)) + && ! (tsk || arch_unw_user_mode(&info))) _stp_stack_print_fallback(UNW_SP(&info), verbose, levels); - break; + return; } #else /* ! STP_USE_DWARF_UNWINDER */ _stp_stack_print_fallback(REG_SP(regs), verbose, levels); diff --git a/runtime/stack.c b/runtime/stack.c index f6b1cd08..042f44c7 100644 --- a/runtime/stack.c +++ b/runtime/stack.c @@ -1,6 +1,6 @@ /* -*- linux-c -*- * Stack tracing functions - * Copyright (C) 2005-2008 Red Hat Inc. + * Copyright (C) 2005-2009 Red Hat Inc. * Copyright (C) 2005 Intel Corporation. * * This file is part of systemtap, and is free software. You can @@ -77,7 +77,7 @@ static void print_stack_address(void *data, unsigned long addr, int reliable) { struct print_stack_data *sdata = data; if (sdata->level++ < sdata->max_level) - _stp_func_print(addr,sdata->verbose, 0); + _stp_func_print(addr, sdata->verbose, 0, NULL); } static const struct stacktrace_ops print_stack_ops = { @@ -97,11 +97,17 @@ static void _stp_stack_print_fallback(unsigned long stack, int verbose, int leve &print_data); } #endif + +// Without KPROBES very little works atm. +// But this file is unconditionally imported, while these two functions are only +// used through context-unwind.stp. +#if defined (CONFIG_KPROBES) + /** Prints the stack backtrace * @param regs A pointer to the struct pt_regs. */ -static void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe_instance *pi, int levels) +static void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe_instance *pi, int levels, struct task_struct *tsk) { if (verbose) { /* print the current address */ @@ -112,7 +118,10 @@ static void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe _stp_symbol_print((unsigned long)_stp_ret_addr_r(pi)); } else { _stp_print_char(' '); - _stp_symbol_print(REG_IP(regs)); + if (tsk) + _stp_usymbol_print(REG_IP(regs), tsk); + else + _stp_symbol_print(REG_IP(regs)); } _stp_print_char('\n'); } else if (pi) @@ -120,7 +129,7 @@ static void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe else _stp_printf("%p ", (int64_t) REG_IP(regs)); - __stp_stack_print(regs, verbose, levels); + __stp_stack_print(regs, verbose, levels, tsk); } /** Writes stack backtrace to a string @@ -129,34 +138,19 @@ static void _stp_stack_print(struct pt_regs *regs, int verbose, struct kretprobe * @param regs A pointer to the struct pt_regs. * @returns void */ -static void _stp_stack_snprint(char *str, int size, struct pt_regs *regs, int verbose, struct kretprobe_instance *pi, int levels) +static void _stp_stack_snprint(char *str, int size, struct pt_regs *regs, int verbose, struct kretprobe_instance *pi, int levels, struct task_struct *tsk) { /* To get a string, we use a simple trick. First flush the print buffer, */ /* then call _stp_stack_print, then copy the result into the output string */ /* and clear the print buffer. */ _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); _stp_print_flush(); - _stp_stack_print(regs, verbose, pi, levels); + _stp_stack_print(regs, verbose, pi, levels, tsk); strlcpy(str, pb->buf, size < (int)pb->len ? size : (int)pb->len); pb->len = 0; } -/** Prints the user stack backtrace - * @param str string - * @returns Same string as was input with trace info appended, - * @note Currently limited to a depth of two. Works from jprobes and kprobes. - */ -#if 0 -static void _stp_ustack_print(char *str) -{ - struct pt_regs *nregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)current->thread_info)) - 1; - _stp_printf("%p : [user]\n", (int64_t) REG_IP(nregs)); - if (REG_SP(nregs)) - _stp_printf("%p : [user]\n", (int64_t) (*(unsigned long *)REG_SP(nregs))); -} -#endif /* 0 */ - -/** @} */ +#endif /* CONFIG_KPROBES */ void _stp_stack_print_tsk(struct task_struct *tsk, int verbose, int levels) { diff --git a/runtime/staprun/common.c b/runtime/staprun/common.c index fd16b4b8..c67ce340 100644 --- a/runtime/staprun/common.c +++ b/runtime/staprun/common.c @@ -27,6 +27,9 @@ int attach_mod; int delete_mod; int load_only; int need_uprobes; +int daemon_mode; +off_t fsize_max; +int fnum_max; /* module variables */ char *modname = NULL; @@ -35,9 +38,38 @@ char *modoptions[MAXMODOPTIONS]; int control_channel = -1; /* NB: fd==0 possible */ +static char path_buf[PATH_MAX]; +static char *get_abspath(char *path) +{ + int len; + if (path[0] == '/') + return path; + + len = strlen(getcwd(path_buf, PATH_MAX)); + if (len + 2 + strlen(path) >= PATH_MAX) + return NULL; + path_buf[len] = '/'; + strcpy(&path_buf[len + 1], path); + return path_buf; +} + +int stap_strfloctime(char *buf, size_t max, const char *fmt, time_t t) +{ + struct tm tm; + size_t ret; + if (buf == NULL || fmt == NULL || max <= 1) + return -EINVAL; + localtime_r(&t, &tm); + ret = strftime(buf, max, fmt, &tm); + if (ret == 0) + return -EINVAL; + return (int)ret; +} + void parse_args(int argc, char **argv) { int c; + char *s; /* Initialize option variables. */ verbose = 0; @@ -49,8 +81,11 @@ void parse_args(int argc, char **argv) delete_mod = 0; load_only = 0; need_uprobes = 0; + daemon_mode = 0; + fsize_max = 0; + fnum_max = 0; - while ((c = getopt(argc, argv, "ALuvb:t:dc:o:x:")) != EOF) { + while ((c = getopt(argc, argv, "ALuvb:t:dc:o:x:S:D")) != EOF) { switch (c) { case 'u': need_uprobes = 1; @@ -85,11 +120,38 @@ void parse_args(int argc, char **argv) case 'L': load_only = 1; break; + case 'D': + daemon_mode = 1; + break; + case 'S': + fsize_max = strtoul(optarg, &s, 10); + fsize_max <<= 20; + if (s[0] == ',') + fnum_max = (int)strtoul(&s[1], &s, 10); + if (s[0] != '\0') { + err("Invalid file size option '%s'.\n", optarg); + usage(argv[0]); + } + break; default: usage(argv[0]); } } - + if (outfile_name) { + char tmp[PATH_MAX]; + int ret; + outfile_name = get_abspath(outfile_name); + if (outfile_name == NULL) { + err("File name is too long.\n"); + usage(argv[0]); + } + ret = stap_strfloctime(tmp, PATH_MAX - 18, /* = _cpuNNN.SSSSSSSSSS */ + outfile_name, time(NULL)); + if (ret < 0) { + err("Filename format is invalid or too long.\n"); + usage(argv[0]); + } + } if (attach_mod && load_only) { err("You can't specify the '-A' and '-L' options together.\n"); usage(argv[0]); @@ -118,18 +180,40 @@ void parse_args(int argc, char **argv) err("You can't specify the '-c' and '-x' options together.\n"); usage(argv[0]); } + + if (daemon_mode && load_only) { + err("You can't specify the '-D' and '-L' options together.\n"); + usage(argv[0]); + } + if (daemon_mode && delete_mod) { + err("You can't specify the '-D' and '-d' options together.\n"); + usage(argv[0]); + } + if (daemon_mode && target_cmd) { + err("You can't specify the '-D' and '-c' options together.\n"); + usage(argv[0]); + } + if (daemon_mode && outfile_name == NULL) { + err("You have to specify output FILE with '-D' option.\n"); + usage(argv[0]); + } + if (outfile_name == NULL && fsize_max != 0) { + err("You have to specify output FILE with '-S' option.\n"); + usage(argv[0]); + } } void usage(char *prog) { - err("\n%s [-v] [-c cmd ] [-x pid] [-u user]\n" - "\t[-A|-L] [-b bufsize] [-o FILE] MODULE [module-options]\n", prog); + err("\n%s [-v] [-c cmd ] [-x pid] [-u user] [-A|-L|-d]\n" + "\t[-b bufsize] [-o FILE [-D] [-S size[,N]]] MODULE [module-options]\n", prog); err("-v Increase verbosity.\n"); err("-c cmd Command \'cmd\' will be run and staprun will\n"); err(" exit when it does. The '_stp_target' variable\n"); err(" will contain the pid for the command.\n"); err("-x pid Sets the '_stp_target' variable to pid.\n"); - err("-o FILE Send output to FILE.\n"); + err("-o FILE Send output to FILE. This supports strftime(3)\n"); + err(" formats for FILE.\n"); err("-b buffer size The systemtap module specifies a buffer size.\n"); err(" Setting one here will override that value. The\n"); err(" value should be an integer between 1 and 4095 \n"); @@ -140,6 +224,14 @@ void usage(char *prog) err("-d Delete a module. Only detached or unused modules\n"); err(" the user has permission to access will be deleted. Use \"*\"\n"); err(" (quoted) to delete all unused modules.\n"); + err("-D Run in background. This requires '-o' option.\n"); + err("-S size[,N] Switches output file to next file when the size\n"); + err(" of file reaches the specified size. The value\n"); + err(" should be an integer greater than 1 which is\n"); + err(" assumed to be the maximum file size in MB.\n"); + err(" When the number of output files reaches N, it\n"); + err(" switches to the first output file. You can omit\n"); + err(" the second argument.\n"); err("MODULE can be either a module name or a module path. If a\n"); err("module name is used, it is looked for in the following\n"); err("directory: /lib/modules/`uname -r`/systemtap\n"); @@ -344,3 +436,24 @@ int send_request(int type, void *data, int len) if (rc < 0) return rc; return (rc != len+4); } + +#include <stdarg.h> + +static int use_syslog = 0; + +void eprintf(const char *fmt, ...) +{ + va_list va; + va_start(va, fmt); + if (use_syslog) + vsyslog(LOG_ERR, fmt, va); + else + vfprintf(stderr, fmt, va); + va_end(va); +} + +void switch_syslog(const char *name) +{ + openlog(name, LOG_PID, LOG_DAEMON); + use_syslog = 1; +} diff --git a/runtime/staprun/mainloop.c b/runtime/staprun/mainloop.c index 0745f611..7125a7bb 100644 --- a/runtime/staprun/mainloop.c +++ b/runtime/staprun/mainloop.c @@ -7,7 +7,7 @@ * Public License (GPL); either version 2, or (at your option) any * later version. * - * Copyright (C) 2005-2008 Red Hat Inc. + * Copyright (C) 2005-2009 Red Hat Inc. */ #include "staprun.h" @@ -318,6 +318,41 @@ int init_stapio(void) if (target_cmd) start_cmd(); + /* Run in background */ + if (daemon_mode) { + pid_t pid; + int ret; + dbug(2, "daemonizing stapio\n"); + + /* daemonize */ + ret = daemon(0, 1); /* don't close stdout at this time. */ + if (ret) { + err("Failed to daemonize stapio\n"); + return -1; + } + + /* change error messages to syslog. */ + switch_syslog("stapio"); + + /* show new pid */ + pid = getpid(); + fprintf(stdout, "%d\n", pid); + fflush(stdout); + + /* redirect all outputs to /dev/null */ + ret = open("/dev/null", O_RDWR); + if (ret < 0) { + err("Failed to open /dev/null\n"); + return -1; + } + close(STDIN_FILENO); + close(STDOUT_FILENO); + close(STDERR_FILENO); + dup2(ret, STDOUT_FILENO); + dup2(ret, STDERR_FILENO); + close(ret); + } + return 0; } @@ -360,10 +395,10 @@ void cleanup_and_exit(int detach) #define BUG9788_WORKAROUND #ifndef BUG9788_WORKAROUND dbug(2, "removing %s\n", modname); - if (execlp(staprun, basename (staprun), "-d", modname, NULL) < 0) { + if (execlp(staprun, basename (staprun), "-d", modpath, NULL) < 0) { if (errno == ENOEXEC) { char *cmd; - if (asprintf(&cmd, "%s -d '%s'", staprun, modname) > 0) + if (asprintf(&cmd, "%s -d '%s'", staprun, modpath) > 0) execl("/bin/sh", "sh", "-c", cmd, NULL); free(cmd); } @@ -392,10 +427,10 @@ void cleanup_and_exit(int detach) if (pid == 0) { /* child process */ /* Run the command. */ - if (execlp(staprun, basename (staprun), "-d", modname, NULL) < 0) { + if (execlp(staprun, basename (staprun), "-d", modpath, NULL) < 0) { if (errno == ENOEXEC) { char *cmd; - if (asprintf(&cmd, "%s -d '%s'", staprun, modname) > 0) + if (asprintf(&cmd, "%s -d '%s'", staprun, modpath) > 0) execl("/bin/sh", "sh", "-c", cmd, NULL); free(cmd); } @@ -454,21 +489,14 @@ int stp_main_loop(void) switch (type) { #if STP_TRANSPORT_VERSION == 1 case STP_REALTIME_DATA: - { - ssize_t bw = write(out_fd[0], data, nb); - if (bw >= 0 && bw != nb) { - nb = nb - bw; - bw = write(out_fd[0], data, nb); - } - if (bw != nb) { - _perr("write error (nb=%ld)", (long)nb); - cleanup_and_exit(0); - } - break; + if (write_realtime_data(data, nb)) { + _perr("write error (nb=%ld)", (long)nb); + cleanup_and_exit(0); } + break; #endif case STP_OOB_DATA: - fputs((char *)data, stderr); + eprintf("%s", (char *)data); break; case STP_EXIT: { @@ -477,6 +505,14 @@ int stp_main_loop(void) cleanup_and_exit(0); break; } + case STP_REQUEST_EXIT: + { + /* module asks us to start exiting, so send STP_EXIT */ + dbug(2, "got STP_REQUEST_EXIT\n"); + int32_t rc, btype = STP_EXIT; + rc = write(control_channel, &btype, sizeof(btype)); + break; + } case STP_START: { struct _stp_msg_start *t = (struct _stp_msg_start *)data; diff --git a/runtime/staprun/modverify.c b/runtime/staprun/modverify.c new file mode 100644 index 00000000..b50a69f4 --- /dev/null +++ b/runtime/staprun/modverify.c @@ -0,0 +1,391 @@ +/* + This program verifies the given file using the given signature, the named + certificate and public key in the given certificate database. + + Copyright (C) 2009 Red Hat Inc. + + This file is part of systemtap, and is free software. You can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <stdio.h> + +#include <nspr.h> +#include <nss.h> +#include <pk11pub.h> +#include <cryptohi.h> +#include <cert.h> +#include <certt.h> + +#include "nsscommon.h" +#include "modverify.h" + +#include <sys/stat.h> + +/* Function: int check_cert_db_permissions (const char *cert_db_path); + * + * Check that the given certificate directory and its contents have + * the correct permissions. + * + * Returns 0 if there is an error, 1 otherwise. + */ +static int +check_db_file_permissions (const char *cert_db_file) { + struct stat info; + int rc; + + rc = stat (cert_db_file, & info); + if (rc) + { + fprintf (stderr, "Could not obtain information on certificate database file %s.\n", + cert_db_file); + perror (""); + return 0; + } + + rc = 1; /* ok */ + + /* The owner of the file must be root. */ + if (info.st_uid != 0) + { + fprintf (stderr, "Certificate database file %s must be owned by root.\n", + cert_db_file); + rc = 0; + } + + /* Check the access permissions of the file. */ + if ((info.st_mode & S_IRUSR) == 0) + fprintf (stderr, "Certificate database file %s should be readable by the owner.\n", cert_db_file); + if ((info.st_mode & S_IWUSR) == 0) + fprintf (stderr, "Certificate database file %s should be writeable by the owner.\n", cert_db_file); + if ((info.st_mode & S_IXUSR) != 0) + { + fprintf (stderr, "Certificate database file %s must not be executable by the owner.\n", cert_db_file); + rc = 0; + } + if ((info.st_mode & S_IRGRP) == 0) + { + fprintf (stderr, "Certificate database file %s should be readable by the group.\n", cert_db_file); + rc = 0; + } + if ((info.st_mode & S_IWGRP) != 0) + { + fprintf (stderr, "Certificate database file %s must not be writable by the group.\n", cert_db_file); + rc = 0; + } + if ((info.st_mode & S_IXGRP) != 0) + { + fprintf (stderr, "Certificate database file %s must not be executable by the group.\n", cert_db_file); + rc = 0; + } + if ((info.st_mode & S_IROTH) == 0) + { + fprintf (stderr, "Certificate database file %s should be readable by others.\n", cert_db_file); + rc = 0; + } + if ((info.st_mode & S_IWOTH) != 0) + { + fprintf (stderr, "Certificate database file %s must not be writable by others.\n", cert_db_file); + rc = 0; + } + if ((info.st_mode & S_IXOTH) != 0) + { + fprintf (stderr, "Certificate database file %s must not be executable by others.\n", cert_db_file); + rc = 0; + } + + return rc; +} + +/* Function: int check_cert_db_permissions (const char *cert_db_path); + * + * Check that the given certificate directory and its contents have + * the correct permissions. + * + * Returns 0 if there is an error, 1 otherwise. + */ +static int +check_cert_db_permissions (const char *cert_db_path) { + struct stat info; + char *fileName; + int rc; + + rc = stat (cert_db_path, & info); + if (rc) + { + fprintf (stderr, "Could not obtain information on certificate database directory %s.\n", + cert_db_path); + perror (""); + return 0; + } + + rc = 1; /* ok */ + + /* The owner of the database must be root. */ + if (info.st_uid != 0) + { + fprintf (stderr, "Certificate database directory %s must be owned by root.\n", cert_db_path); + rc = 0; + } + + /* Check the database directory access permissions */ + if ((info.st_mode & S_IRUSR) == 0) + fprintf (stderr, "Certificate database %s should be readable by the owner.\n", cert_db_path); + if ((info.st_mode & S_IWUSR) == 0) + fprintf (stderr, "Certificate database %s should be writeable by the owner.\n", cert_db_path); + if ((info.st_mode & S_IXUSR) == 0) + fprintf (stderr, "Certificate database %s should be searchable by the owner.\n", cert_db_path); + if ((info.st_mode & S_IRGRP) == 0) + fprintf (stderr, "Certificate database %s should be readable by the group.\n", cert_db_path); + if ((info.st_mode & S_IWGRP) != 0) + { + fprintf (stderr, "Certificate database %s must not be writable by the group.\n", cert_db_path); + rc = 0; + } + if ((info.st_mode & S_IXGRP) == 0) + fprintf (stderr, "Certificate database %s should be searchable by the group.\n", cert_db_path); + if ((info.st_mode & S_IROTH) == 0) + fprintf (stderr, "Certificate database %s should be readable by others.\n", cert_db_path); + if ((info.st_mode & S_IWOTH) != 0) + { + fprintf (stderr, "Certificate database %s must not be writable by others.\n", cert_db_path); + rc = 0; + } + if ((info.st_mode & S_IXOTH) == 0) + fprintf (stderr, "Certificate database %s should be searchable by others.\n", cert_db_path); + + /* Now check the permissions of the critical files. */ + fileName = PORT_Alloc (strlen (cert_db_path) + 11); + if (! fileName) + { + fprintf (stderr, "Unable to allocate memory for certificate database file names\n"); + return 0; + } + + sprintf (fileName, "%s/cert8.db", cert_db_path); + rc &= check_db_file_permissions (fileName); + sprintf (fileName, "%s/key3.db", cert_db_path); + rc &= check_db_file_permissions (fileName); + sprintf (fileName, "%s/secmod.db", cert_db_path); + rc &= check_db_file_permissions (fileName); + + PORT_Free (fileName); + + if (rc == 0) + fprintf (stderr, "Unable to use certificate database %s due to errors.\n", cert_db_path); + + return rc; +} + +static int +verify_it (const char *inputName, const char *signatureName, SECKEYPublicKey *pubKey) +{ + unsigned char buffer[4096]; + PRFileInfo info; + PRStatus prStatus; + PRInt32 numBytes; + PRFileDesc *local_file_fd; + VFYContext *vfy; + SECItem signature; + SECStatus secStatus; + + /* Get the size of the signature file. */ + prStatus = PR_GetFileInfo (signatureName, &info); + if (prStatus != PR_SUCCESS || info.type != PR_FILE_FILE || info.size < 0) + { + fprintf (stderr, "Unable to obtain information on the signature file %s.\n", signatureName); + nssError (); + return MODULE_UNTRUSTED; /* Not signed */ + } + + /* Open the signature file. */ + local_file_fd = PR_Open (signatureName, PR_RDONLY, 0); + if (local_file_fd == NULL) + { + fprintf (stderr, "Could not open the signature file %s\n.", signatureName); + nssError (); + return MODULE_CHECK_ERROR; + } + + /* Allocate space to read the signature file. */ + signature.data = PORT_Alloc (info.size); + if (! signature.data) + { + fprintf (stderr, "Unable to allocate memory for the signature in %s.\n", signatureName); + nssError (); + return MODULE_CHECK_ERROR; + } + + /* Read the signature. */ + numBytes = PR_Read (local_file_fd, signature.data, info.size); + if (numBytes == 0) /* EOF */ + { + fprintf (stderr, "EOF reading signature file %s.\n", signatureName); + return MODULE_CHECK_ERROR; + } + if (numBytes < 0) + { + fprintf (stderr, "Error reading signature file %s.\n", signatureName); + nssError (); + return MODULE_CHECK_ERROR; + } + if (numBytes != info.size) + { + fprintf (stderr, "Incomplete data while reading signature file %s.\n", signatureName); + return MODULE_CHECK_ERROR; + } + signature.len = info.size; + + /* Done with the signature file. */ + PR_Close (local_file_fd); + + /* Create a verification context. */ + vfy = VFY_CreateContextDirect (pubKey, & signature, SEC_OID_PKCS1_RSA_ENCRYPTION, + SEC_OID_UNKNOWN, NULL, NULL); + if (! vfy) + { + /* The key does not match the signature. This is not an error. It just means + we are currently trying the wrong certificate/key. i.e. the module + remains untrusted for now. */ + return MODULE_UNTRUSTED; + } + + /* Begin the verification process. */ + secStatus = VFY_Begin(vfy); + if (secStatus != SECSuccess) + { + fprintf (stderr, "Unable to initialize verification context while verifying %s using the signature in %s.\n", + inputName, signatureName); + nssError (); + return MODULE_CHECK_ERROR; + } + + /* Now read the data and add it to the signature. */ + local_file_fd = PR_Open (inputName, PR_RDONLY, 0); + if (local_file_fd == NULL) + { + fprintf (stderr, "Could not open module file %s.\n", inputName); + nssError (); + return MODULE_CHECK_ERROR; + } + + for (;;) + { + numBytes = PR_Read (local_file_fd, buffer, sizeof (buffer)); + if (numBytes == 0) + break; /* EOF */ + + if (numBytes < 0) + { + fprintf (stderr, "Error reading module file %s.\n", inputName); + nssError (); + return MODULE_CHECK_ERROR; + } + + /* Add the data to the signature. */ + secStatus = VFY_Update (vfy, buffer, numBytes); + if (secStatus != SECSuccess) + { + fprintf (stderr, "Error while verifying module file %s.\n", inputName); + nssError (); + return MODULE_CHECK_ERROR; + } + } + + PR_Close(local_file_fd); + + /* Complete the verification. */ + secStatus = VFY_End (vfy); + if (secStatus != SECSuccess) { + fprintf (stderr, "Unable to verify signed module %s. It may have been altered since it was created.\n", inputName); + nssError (); + return MODULE_ALTERED; + } + + return MODULE_OK; +} + +int verify_module (const char *module_name, const char *signature_name) +{ + const char *dbdir = SYSCONFDIR "/systemtap/staprun"; + SECKEYPublicKey *pubKey; + SECStatus secStatus; + CERTCertList *certList; + CERTCertListNode *certListNode; + CERTCertificate *cert; + PRStatus prStatus; + PRFileInfo info; + int rc = 0; + + /* Look for the certificate database. If it's not there, it's not an error, it + just means that the module can't be verified. */ + prStatus = PR_GetFileInfo (dbdir, &info); + if (prStatus != PR_SUCCESS || info.type != PR_FILE_DIRECTORY) + return MODULE_UNTRUSTED; + + /* Verify the permissions of the certificate database and its files. */ + if (! check_cert_db_permissions (dbdir)) + return MODULE_UNTRUSTED; + + /* Call the NSPR initialization routines. */ + PR_Init (PR_SYSTEM_THREAD, PR_PRIORITY_NORMAL, 1); + + /* Initialize NSS. */ + secStatus = NSS_Init (dbdir); + if (secStatus != SECSuccess) + { + fprintf (stderr, "Unable to initialize nss library using the database in %s.\n", + dbdir); + nssError (); + return MODULE_CHECK_ERROR; + } + + certList = PK11_ListCerts (PK11CertListAll, NULL); + if (certList == NULL) + { + fprintf (stderr, "Unable to find certificates in the certificate database in %s.\n", + dbdir); + nssError (); + return MODULE_UNTRUSTED; + } + + /* We need to look at each certificate in the database. */ + for (certListNode = CERT_LIST_HEAD (certList); + ! CERT_LIST_END (certListNode, certList); + certListNode = CERT_LIST_NEXT (certListNode)) + { + cert = certListNode->cert; + + pubKey = CERT_ExtractPublicKey (cert); + if (pubKey == NULL) + { + fprintf (stderr, "Unable to extract public key from the certificate with nickname %s from the certificate database in %s.\n", + cert->nickname, dbdir); + nssError (); + return MODULE_CHECK_ERROR; + } + + /* Verify the file. */ + rc = verify_it (module_name, signature_name, pubKey); + if (rc == MODULE_OK || rc == MODULE_ALTERED || rc == MODULE_CHECK_ERROR) + break; /* resolved or error */ + } + + /* Shutdown NSS and exit NSPR gracefully. */ + nssCleanup (); + + return rc; +} + +/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */ diff --git a/runtime/staprun/modverify.h b/runtime/staprun/modverify.h new file mode 100644 index 00000000..49b90bfe --- /dev/null +++ b/runtime/staprun/modverify.h @@ -0,0 +1,9 @@ +int verify_module (const char *module_name, const char *signature_name); + +/* return codes for verify_module. */ +#define MODULE_OK 1 +#define MODULE_UNTRUSTED 0 +#define MODULE_CHECK_ERROR -1 +#define MODULE_ALTERED -2 + +/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */ diff --git a/runtime/staprun/relay.c b/runtime/staprun/relay.c index 19621933..b9796241 100644 --- a/runtime/staprun/relay.c +++ b/runtime/staprun/relay.c @@ -17,6 +17,9 @@ static pthread_t reader[NR_CPUS]; static int relay_fd[NR_CPUS]; static int bulkmode = 0; static volatile int stop_threads = 0; +static time_t *time_backlog[NR_CPUS]; +static int backlog_order=0; +#define BACKLOG_MASK ((1 << backlog_order) - 1) /* * ppoll exists in glibc >= 2.4 @@ -44,6 +47,90 @@ static int ppoll(struct pollfd *fds, nfds_t nfds, } #endif +int init_backlog(int cpu) +{ + int order = 0; + if (!fnum_max) + return 0; + while (fnum_max >> order) order++; + if (fnum_max == 1<<(order-1)) order--; + time_backlog[cpu] = (time_t *)calloc(1<<order, sizeof(time_t)); + if (time_backlog[cpu] == NULL) { + _err("Memory allocation failed\n"); + return -1; + } + backlog_order = order; + return 0; +} + +void write_backlog(int cpu, int fnum, time_t t) +{ + time_backlog[cpu][fnum & BACKLOG_MASK] = t; +} + +time_t read_backlog(int cpu, int fnum) +{ + return time_backlog[cpu][fnum & BACKLOG_MASK]; +} + +int make_outfile_name(char *buf, int max, int fnum, int cpu, time_t t) +{ + int len; + len = stap_strfloctime(buf, max, outfile_name, t); + if (len < 0) { + err("Invalid FILE name format\n"); + return -1; + } + if (bulkmode) { + /* special case: for testing we sometimes want to write to /dev/null */ + if (strcmp(outfile_name, "/dev/null") == 0) { + strcpy(buf, "/dev/null"); + } else { + if (snprintf_chk(&buf[len], PATH_MAX - len, + "_cpu%d.%d", cpu, fnum)) + return -1; + } + } else { + /* stream mode */ + if (snprintf_chk(&buf[len], PATH_MAX - len, ".%d", fnum)) + return -1; + } + return 0; +} + +static int open_outfile(int fnum, int cpu, int remove_file) +{ + char buf[PATH_MAX]; + time_t t; + if (!outfile_name) { + _err("-S is set without -o. Please file a bug report.\n"); + return -1; + } + + time(&t); + if (fnum_max) { + if (remove_file) { + /* remove oldest file */ + if (make_outfile_name(buf, PATH_MAX, fnum - fnum_max, + cpu, read_backlog(cpu, fnum - fnum_max)) < 0) + return -1; + remove(buf); /* don't care */ + } + write_backlog(cpu, fnum, t); + } + + if (make_outfile_name(buf, PATH_MAX, fnum, cpu, t) < 0) + return -1; + out_fd[cpu] = open (buf, O_CREAT|O_TRUNC|O_WRONLY, 0666); + if (out_fd[cpu] < 0) { + perr("Couldn't open output file %s", buf); + return -1; + } + if (set_clexec(out_fd[cpu]) < 0) + return -1; + return 0; +} + /** * reader_thread - per-cpu channel buffer reader */ @@ -57,6 +144,9 @@ static void *reader_thread(void *data) struct timespec tim = {.tv_sec=0, .tv_nsec=200000000}, *timeout = &tim; sigset_t sigs; struct sigaction sa; + off_t wsize = 0; + int fnum = 0; + int remove_file = 0; sigemptyset(&sigs); sigaddset(&sigs,SIGUSR2); @@ -95,17 +185,37 @@ static void *reader_thread(void *data) dbug(3, "cpu=%d poll=%d errno=%d\n", cpu, rc, errno); if (errno != EINTR) { _perr("poll error"); - return(NULL); + goto error_out; } } while ((rc = read(relay_fd[cpu], buf, sizeof(buf))) > 0) { + wsize += rc; + /* Switching file */ + if (fsize_max && wsize > fsize_max) { + close(out_fd[cpu]); + fnum++; + if (fnum_max && fnum == fnum_max) + remove_file = 1; + if (open_outfile(fnum, cpu, remove_file) < 0) { + perr("Couldn't open file for cpu %d, exiting.", cpu); + goto error_out; + } + wsize = rc; + } if (write(out_fd[cpu], buf, rc) != rc) { - perr("Couldn't write to output %d for cpu %d, exiting.", out_fd[cpu], cpu); - return(NULL); + if (errno != EPIPE) + perr("Couldn't write to output %d for cpu %d, exiting.", out_fd[cpu], cpu); + goto error_out; } } } while (!stop_threads); - dbug(3, "exiting thread %d\n", cpu); + dbug(3, "exiting thread for cpu %d\n", cpu); + return(NULL); + +error_out: + /* Signal the main thread that we need to quit */ + kill(getpid(), SIGTERM); + dbug(2, "exiting thread for cpu %d after error\n", cpu); return(NULL); } @@ -116,7 +226,7 @@ static void *reader_thread(void *data) */ int init_relayfs(void) { - int i; + int i, len; struct statfs st; char rqbuf[128]; char buf[PATH_MAX], relay_filebase[PATH_MAX]; @@ -163,14 +273,29 @@ int init_relayfs(void) return -1; } - if (bulkmode) { + if (fsize_max) { + /* switch file mode */ + for (i = 0; i < ncpus; i++) { + if (init_backlog(i) < 0) + return -1; + if (open_outfile(0, i, 0) < 0) + return -1; + } + } else if (bulkmode) { for (i = 0; i < ncpus; i++) { if (outfile_name) { /* special case: for testing we sometimes want to write to /dev/null */ if (strcmp(outfile_name, "/dev/null") == 0) { strcpy(buf, "/dev/null"); } else { - if (sprintf_chk(buf, "%s_%d", outfile_name, i)) + len = stap_strfloctime(buf, PATH_MAX, + outfile_name, time(NULL)); + if (len < 0) { + err("Invalid FILE name format\n"); + return -1; + } + if (snprintf_chk(&buf[len], + PATH_MAX - len, "_%d", i)) return -1; } } else { @@ -189,9 +314,15 @@ int init_relayfs(void) } else { /* stream mode */ if (outfile_name) { - out_fd[0] = open (outfile_name, O_CREAT|O_TRUNC|O_WRONLY, 0666); + len = stap_strfloctime(buf, PATH_MAX, + outfile_name, time(NULL)); + if (len < 0) { + err("Invalid FILE name format\n"); + return -1; + } + out_fd[0] = open (buf, O_CREAT|O_TRUNC|O_WRONLY, 0666); if (out_fd[0] < 0) { - perr("Couldn't open output file %s", outfile_name); + perr("Couldn't open output file %s", buf); return -1; } if (set_clexec(out_fd[i]) < 0) diff --git a/runtime/staprun/relay_old.c b/runtime/staprun/relay_old.c index bd746f19..33d2daf3 100644 --- a/runtime/staprun/relay_old.c +++ b/runtime/staprun/relay_old.c @@ -23,6 +23,14 @@ static int bulkmode = 0; unsigned subbuf_size = 0; unsigned n_subbufs = 0; +struct switchfile_ctrl_block { + off_t wsize; + int fnum; + int rmfile; +}; + +static struct switchfile_ctrl_block global_scb = {0, 0, 0}; + /* per-cpu buffer info */ static struct buf_status { @@ -70,6 +78,41 @@ void close_oldrelayfs(int detach) close_relayfs_files(i); } +static int open_oldoutfile(int fnum, int cpu, int remove_file) +{ + char buf[PATH_MAX]; + time_t t; + if (outfile_name) { + time(&t); + if (fnum_max) { + if (remove_file) { + /* remove oldest file */ + if (make_outfile_name(buf, PATH_MAX, fnum - fnum_max, + cpu, read_backlog(cpu, fnum - fnum_max)) < 0) + return -1; + remove(buf); /* don't care */ + } + write_backlog(cpu, fnum, t); + } + if (make_outfile_name(buf, PATH_MAX, fnum, cpu, t) < 0) + return -1; + } else if (bulkmode) { + if (sprintf_chk(buf, "stpd_cpu%d.%d", cpu, fnum)) + return -1; + } else { /* stream mode */ + out_fd[cpu] = STDOUT_FILENO; + return 0; + } + + out_fd[cpu] = open (buf, O_CREAT|O_TRUNC|O_WRONLY, 0666); + if (out_fd[cpu] < 0) { + perr("Couldn't open output file %s", buf); + return -1; + } + if (set_clexec(out_fd[cpu]) < 0) + return -1; + return 0; +} /** * open_relayfs_files - open and mmap buffer and open output file. * Returns -1 on unexpected failure, 0 if file not found, 1 on success. @@ -104,18 +147,31 @@ static int open_relayfs_files(int cpu, const char *relay_filebase, const char *p return -1; } + if (fsize_max) { + if (init_backlog(cpu) < 0) + goto err2; + if (open_oldoutfile(0, cpu, 0) < 0) + goto err2; + goto opened; + } if (outfile_name) { /* special case: for testing we sometimes want to * write to /dev/null */ if (strcmp(outfile_name, "/dev/null") == 0) { strcpy(tmp, "/dev/null"); } else { - if (sprintf_chk(tmp, "%s_%d", outfile_name, cpu)) - goto err1; + int len; + len = stap_strfloctime(tmp, PATH_MAX, outfile_name, time(NULL)); + if (len < 0) { + err("Invalid FILE name format\n"); + goto err2; + } + if (snprintf_chk(&tmp[len], PATH_MAX - len, "_%d", cpu)) + goto err2; } } else { if (sprintf_chk(tmp, "stpd_cpu%d", cpu)) - goto err1; + goto err2; } if((percpu_tmpfile[cpu] = fopen(tmp, "w+")) == NULL) { @@ -126,6 +182,7 @@ static int open_relayfs_files(int cpu, const char *relay_filebase, const char *p perr("Couldn't open output file %s", tmp); goto err2; } +opened: total_bufsize = subbuf_size * n_subbufs; relay_buffer[cpu] = mmap(NULL, total_bufsize, PROT_READ, @@ -155,7 +212,8 @@ err1: /** * process_subbufs - write ready subbufs to disk */ -static int process_subbufs(struct _stp_buf_info *info) +static int process_subbufs(struct _stp_buf_info *info, + struct switchfile_ctrl_block *scb) { unsigned subbufs_ready, start_subbuf, end_subbuf, subbuf_idx, i; int len, cpu = info->cpu; @@ -173,10 +231,23 @@ static int process_subbufs(struct _stp_buf_info *info) padding = *((unsigned *)subbuf_ptr); subbuf_ptr += sizeof(padding); len = (subbuf_size - sizeof(padding)) - padding; + scb->wsize += len; + if (fsize_max && scb->wsize > fsize_max) { + fclose(percpu_tmpfile[cpu]); + scb->fnum ++; + if (fnum_max && scb->fnum == fnum_max) + scb->rmfile = 1; + if (open_oldoutfile(scb->fnum, cpu, scb->rmfile) < 0) { + perr("Couldn't open file for cpu %d, exiting.", cpu); + return -1; + } + scb->wsize = len; + } if (len) { if (fwrite_unlocked (subbuf_ptr, len, 1, percpu_tmpfile[cpu]) != 1) { - _perr("Couldn't write to output file for cpu %d, exiting:", cpu); - exit(1); + if (errno != EPIPE) + _perr("Couldn't write to output file for cpu %d, exiting:", cpu); + return -1; } } subbufs_consumed++; @@ -196,6 +267,7 @@ static void *reader_thread(void *data) struct _stp_consumed_info consumed_info; unsigned subbufs_consumed; cpu_set_t cpu_mask; + struct switchfile_ctrl_block scb = {0, 0, 0}; CPU_ZERO(&cpu_mask); CPU_SET(cpu, &cpu_mask); @@ -210,14 +282,17 @@ static void *reader_thread(void *data) if (rc < 0) { if (errno != EINTR) { _perr("poll error"); - exit(1); + break; } err("WARNING: poll warning: %s\n", strerror(errno)); rc = 0; } rc = read(proc_fd[cpu], &status[cpu].info, sizeof(struct _stp_buf_info)); - subbufs_consumed = process_subbufs(&status[cpu].info); + rc = process_subbufs(&status[cpu].info, &scb); + if (rc < 0) + break; + subbufs_consumed = rc; if (subbufs_consumed) { if (subbufs_consumed > status[cpu].max_backlog) status[cpu].max_backlog = subbufs_consumed; @@ -230,6 +305,37 @@ static void *reader_thread(void *data) if (status[cpu].info.flushing) pthread_exit(NULL); } while (1); + + /* Signal the main thread that we need to quit */ + kill(getpid(), SIGTERM); + pthread_exit(NULL); +} + +/** + * write_realtime_data - write realtime data packet to disk + */ +int write_realtime_data(void *data, ssize_t nb) +{ + ssize_t bw; + global_scb.wsize += nb; + if (fsize_max && global_scb.wsize > fsize_max) { + close(out_fd[0]); + global_scb.fnum++; + if (fnum_max && global_scb.fnum == fnum_max) + global_scb.rmfile = 1; + if (open_oldoutfile(global_scb.fnum, 0, + global_scb.rmfile) < 0) { + perr("Couldn't open file, exiting."); + return -1; + } + global_scb.wsize = nb; + } + bw = write(out_fd[0], data, nb); + if (bw >= 0 && bw != nb) { + nb = nb - bw; + bw = write(out_fd[0], data, nb); + } + return bw != nb; } /** @@ -249,10 +355,22 @@ int init_oldrelayfs(void) bulkmode = 1; if (!bulkmode) { + int len; + char tmp[PATH_MAX]; + if (fsize_max) { + if (init_backlog(0)) + return -1; + return open_oldoutfile(0, 0, 0); + } if (outfile_name) { - out_fd[0] = open (outfile_name, O_CREAT|O_TRUNC|O_WRONLY, 0666); + len = stap_strfloctime(tmp, PATH_MAX, outfile_name, time(NULL)); + if (len < 0) { + err("Invalid FILE name format\n"); + return -1; + } + out_fd[0] = open (tmp, O_CREAT|O_TRUNC|O_WRONLY, 0666); if (out_fd[0] < 0 || set_clexec(out_fd[0]) < 0) { - perr("Couldn't open output file '%s'", outfile_name); + perr("Couldn't open output file '%s'", tmp); return -1; } } else diff --git a/runtime/staprun/staprun.h b/runtime/staprun/staprun.h index f49cc7db..bd6402e4 100644 --- a/runtime/staprun/staprun.h +++ b/runtime/staprun/staprun.h @@ -9,7 +9,7 @@ * * Copyright (C) 2005-2008 Red Hat Inc. */ - +#define _FILE_OFFSET_BITS 64 #include <stdio.h> #include <stdlib.h> #include <ctype.h> @@ -33,31 +33,35 @@ #include <sys/wait.h> #include <sys/statfs.h> #include <linux/version.h> +#include <syslog.h> /* Include config.h to pick up dependency for --prefix usage. */ #include "config.h" -#define dbug(level, args...) {if (verbose>=level) {fprintf(stderr,"%s:%s:%d ",__name__,__FUNCTION__, __LINE__); fprintf(stderr,args);}} +extern void eprintf(const char *fmt, ...); +extern void switch_syslog(const char *name); + +#define dbug(level, args...) do {if (verbose>=level) {eprintf("%s:%s:%d ",__name__,__FUNCTION__, __LINE__); eprintf(args);}} while (0) extern char *__name__; /* print to stderr */ -#define err(args...) fprintf(stderr,args) +#define err(args...) eprintf(args) /* better perror() */ #define perr(args...) do { \ int _errno = errno; \ - fputs("ERROR: ", stderr); \ - fprintf(stderr, args); \ - fprintf(stderr, ": %s\n", strerror(_errno)); \ + eprintf("ERROR: "); \ + eprintf(args); \ + eprintf(": %s\n", strerror(_errno)); \ } while (0) /* Error messages. Use these for serious errors, not informational messages to stderr. */ -#define _err(args...) do {fprintf(stderr,"%s:%s:%d: ERROR: ",__name__, __FUNCTION__, __LINE__); fprintf(stderr,args);} while(0) +#define _err(args...) do {eprintf("%s:%s:%d: ERROR: ",__name__, __FUNCTION__, __LINE__); eprintf(args);} while(0) #define _perr(args...) do { \ int _errno = errno; \ _err(args); \ - fprintf(stderr, ": %s\n", strerror(_errno)); \ + eprintf(": %s\n", strerror(_errno)); \ } while (0) #define overflow_error() _err("Internal buffer overflow. Please file a bug report.\n") @@ -114,7 +118,12 @@ int init_relayfs(void); void close_relayfs(void); int init_oldrelayfs(void); void close_oldrelayfs(int); +int write_realtime_data(void *data, ssize_t nb); void setup_signals(void); +int make_outfile_name(char *buf, int max, int fnum, int cpu, time_t t); +int init_backlog(int cpu); +void write_backlog(int cpu, int fnum, time_t t); +time_t read_backlog(int cpu, int fnum); /* staprun_funcs.c */ void setup_staprun_signals(void); const char *moderror(int err); @@ -126,6 +135,7 @@ void start_symbol_thread(void); void stop_symbol_thread(void); /* common.c functions */ +int stap_strfloctime(char *buf, size_t max, const char *fmt, time_t t); void parse_args(int argc, char **argv); void usage(char *prog); void parse_modpath(const char *); @@ -154,6 +164,9 @@ extern int attach_mod; extern int delete_mod; extern int load_only; extern int need_uprobes; +extern int daemon_mode; +extern off_t fsize_max; +extern int fnum_max; /* getopt variables */ extern char *optarg; diff --git a/runtime/staprun/staprun_funcs.c b/runtime/staprun/staprun_funcs.c index 5e7fa102..8da7e7e8 100644 --- a/runtime/staprun/staprun_funcs.c +++ b/runtime/staprun/staprun_funcs.c @@ -7,14 +7,20 @@ * Public License (GPL); either version 2, or (at your option) any * later version. * - * Copyright (C) 2007-2008 Red Hat Inc. + * Copyright (C) 2007-2009 Red Hat Inc. */ +#include "config.h" #include "staprun.h" +#if HAVE_NSS +#include "modverify.h" +#endif + #include <sys/mount.h> #include <sys/utsname.h> #include <grp.h> #include <pwd.h> +#include <assert.h> extern long init_module(void *, unsigned long, const char *); @@ -199,6 +205,44 @@ int mountfs(void) return 0; } +#if HAVE_NSS +/* + * Modules which have been signed using a certificate and private key + * corresponding to a certificate and public key in the database in + * the '$sysconfdir/systemtap/staprun' directory may be loaded by + * anyone. + * + * Returns: -1 on errors, 0 on failure, 1 on success. + */ +static int +check_signature(void) +{ + char module_realpath[PATH_MAX]; + char signature_realpath[PATH_MAX]; + int rc; + + dbug(2, "checking signature for %s\n", modpath); + + /* Use realpath() to canonicalize the module path. */ + if (realpath(modpath, module_realpath) == NULL) { + perr("Unable to canonicalize module path \"%s\"", modpath); + return MODULE_CHECK_ERROR; + } + + /* Now add the .sgn suffix to get the signature file name. */ + if (strlen (module_realpath) > PATH_MAX - 4) { + err("Path \"%s\" is too long.", modpath); + return MODULE_CHECK_ERROR; + } + sprintf (signature_realpath, "%s.sgn", module_realpath); + + rc = verify_module (module_realpath, signature_realpath); + + dbug(2, "verify_module returns %d\n", rc); + + return rc; +} +#endif /* HAVE_NSS */ /* * Members of the 'stapusr' group can only use "blessed" modules - @@ -269,6 +313,15 @@ check_path(void) return -1; } + /* Overwrite the modpath with the canonicalized one, to defeat + a possible race between path checking below and somewhat later + module loading. */ + modpath = strdup (module_realpath); + if (modpath == NULL) { + _perr("allocating memory failed"); + exit (1); + } + /* To make sure the user can't specify something like * /lib/modules/`uname -r`/systemtapmod.ko, put a '/' on the * end of staplib_dir_realpath. */ @@ -293,22 +346,23 @@ check_path(void) } /* - * Check the user's permissions. Is he allowed to run staprun (or is - * he limited to "blessed" modules)? + * Check the user's group membership. Is he allowed to run staprun (or is * - * Returns: -1 on errors, 0 on failure, 1 on success. + * o members of stapdev can do anything + * o members of stapusr can load modules from /lib/modules/KVER/systemtap + * + * Returns: -2 if neither group exists + * -1 for other errors + * 0 on failure + * 1 on success */ -int check_permissions(void) +static int +check_groups (void) { gid_t gid, gidlist[NGROUPS_MAX]; gid_t stapdev_gid, stapusr_gid; int i, ngids; struct group *stgr; - int path_check = 0; - - /* If we're root, we can do anything. */ - if (getuid() == 0) - return 1; /* Lookup the gid for group "stapdev" */ errno = 0; @@ -332,55 +386,42 @@ int check_permissions(void) else stapusr_gid = stgr->gr_gid; - /* If neither group was found, just return an error. */ - if (stapdev_gid == (gid_t)-1 && stapusr_gid == (gid_t)-1) { - err("ERROR: You are trying to run stap as a normal user.\n" - "You should either be root, or be part of either " - "group \"stapdev\" or group \"stapusr\".\n" - "Your system doesn't seem to have either group.\n" - "For more information, please consult the \"SAFETY AND SECURITY\" section of the \"stap(1)\" manpage\n"); - return -1; - } + /* If neither group was found, then return -2. */ + if (stapdev_gid == (gid_t)-1 && stapusr_gid == (gid_t)-1) + return -2; /* According to the getgroups() man page, getgroups() may not * return the effective gid, so try to match it first. */ gid = getegid(); if (gid == stapdev_gid) return 1; - else if (gid == stapusr_gid) - path_check = 1; - /* Get the list of the user's groups. */ - ngids = getgroups(NGROUPS_MAX, gidlist); - if (ngids < 0) { - perr("Unable to retrieve group list"); - return -1; - } - - for (i = 0; i < ngids; i++) { - /* If the user is a member of 'stapdev', then we're - * done, since he can use staprun without any - * restrictions. */ - if (gidlist[i] == stapdev_gid) - return 1; - - /* If the user is a member of 'stapusr', then we'll - * need to check the module path. However, we'll keep - * checking groups since it is possible the user is a - * member of both groups and we haven't seen the - * 'stapdev' group yet. */ - if (gidlist[i] == stapusr_gid) - path_check = 1; - } + if (gid != stapusr_gid) { + /* Get the list of the user's groups. */ + ngids = getgroups(NGROUPS_MAX, gidlist); + if (ngids < 0) { + perr("Unable to retrieve group list"); + return -1; + } - /* If path_check is 0, then the user isn't a member of either - * group. Error out. */ - if (path_check == 0) { - err("ERROR: You are trying to run stap as a normal user.\n" - "You must be a member of either group \"stapdev\" or group \"stapusr\".\n" - "Please contact your system administrator to get yourself membership to either of those groups.\n" - "For more information, please consult the \"SAFETY AND SECURITY\" section of the \"stap(1)\" manpage.\n"); - return 0; + for (i = 0; i < ngids; i++) { + /* If the user is a member of 'stapdev', then we're + * done, since he can use staprun without any + * restrictions. */ + if (gidlist[i] == stapdev_gid) + return 1; + + /* If the user is a member of 'stapusr', then we'll + * need to check the module path. However, we'll keep + * checking groups since it is possible the user is a + * member of both groups and we haven't seen the + * 'stapdev' group yet. */ + if (gidlist[i] == stapusr_gid) + gid = stapusr_gid; + } + /* Not a member of stapusr? */ + if (gid != stapusr_gid) + return 0; } /* At this point the user is only a member of the 'stapusr' @@ -389,3 +430,50 @@ int check_permissions(void) * is in that directory. */ return check_path(); } + +/* + * Check the user's permissions. Is he allowed to run staprun (or is + * he limited to "blessed" modules)? + * + * There are several levels of possible permission: + * + * 1) root can do anything + * 2) members of stapdev can do anything + * 3) members of stapusr can load modules from /lib/modules/KVER/systemtap + * + * It is only an error if all 3 levels of checking fail + * + * Returns: -1 on errors, 0 on failure, 1 on success. + */ +int check_permissions(void) +{ + int check_groups_rc; +#if HAVE_NSS + int check_signature_rc = 0; + + /* Attempt to verify the module against its signature. Return failure + if the module has been tampered with (altered). */ + check_signature_rc = check_signature (); + if (check_signature_rc == MODULE_ALTERED) + return 0; +#endif + + /* If we're root, we can do anything. */ + if (getuid() == 0) + return 1; + + /* Check permissions for group membership. */ + check_groups_rc = check_groups (); + if (check_groups_rc == 1) + return 1; + + err("ERROR: You are trying to run stap as a normal user.\n" + "You should either be root, or be part of either " + "group \"stapdev\" or group \"stapusr\".\n"); + if (check_groups_rc == -2) { + err("Your system doesn't seem to have either group.\n"); + check_groups_rc = -1; + } + + return check_groups_rc; +} diff --git a/runtime/sym.c b/runtime/sym.c index 31700326..386005b2 100644 --- a/runtime/sym.c +++ b/runtime/sym.c @@ -14,6 +14,7 @@ #include "sym.h" #include "string.c" +#include "task_finder_vma.c" /** @file sym.c * @addtogroup sym Symbolic Functions @@ -21,6 +22,62 @@ * @{ */ +static void _stp_sym_init(void) +{ + static int initialized = 0; + if (! initialized) { + __stp_tf_vma_initialize(); + initialized = 1; + } +} + +/* Callback that needs to be registered (in tapsets.cxx for + emit_module_init) for every user task path or pid for which we + might need symbols or unwind info. */ +static int _stp_tf_mmap_cb(struct stap_task_finder_target *tgt, + struct task_struct *tsk, + char *path, + unsigned long addr, + unsigned long length, + unsigned long offset, + unsigned long vm_flags) +{ + int i; + struct _stp_module *module = NULL; + +#ifdef DEBUG_TASK_FINDER_VMA + _stp_dbug(__FUNCTION__, __LINE__, + "mmap_cb: tsk %d:%d path %s, addr 0x%08lx, length 0x%08lx, offset 0x%lx, flags 0x%lx\n", + tsk->pid, tsk->tgid, path, addr, length, offset, vm_flags); +#endif + if (path != NULL) { + for (i = 0; i < _stp_num_modules; i++) { + if (strcmp(path, _stp_modules[i]->path) == 0) + { +#ifdef DEBUG_TASK_FINDER_VMA + _stp_dbug(__FUNCTION__, __LINE__, + "vm_cb: matched path %s to module\n", + path); +#endif + module = _stp_modules[i]; + break; + } + } + } + stap_add_vma_map_info(tsk->group_leader, addr, addr + length, offset, + module); + return 0; +} + +static int _stp_tf_munmap_cb(struct stap_task_finder_target *tgt, + struct task_struct *tsk, + unsigned long addr, + unsigned long length) +{ + stap_remove_vma_map_info(tsk->group_leader, addr, addr + length, 0); + return 0; +} + /* XXX: this needs to be address-space-specific. */ static unsigned long _stp_module_relocate(const char *module, const char *section, unsigned long offset) { @@ -72,35 +129,54 @@ static unsigned long _stp_module_relocate(const char *module, const char *sectio return 0; } - -/* Return module owner and fills in closest section of the address - if found, return NULL otherwise. +/* Return module owner and, if sec != NULL, fills in closest section + of the address if found, return NULL otherwise. XXX: needs to be address-space-specific. */ static struct _stp_module *_stp_mod_sec_lookup(unsigned long addr, + struct task_struct *task, struct _stp_section **sec) { - struct _stp_module *m = NULL; + void *user = NULL; unsigned midx = 0; - unsigned long closest_section_offset = ~0; + + // Try vma matching first if task given. + if (task) + { + unsigned long vm_start = 0; + if (stap_find_vma_map_info(task->group_leader, addr, + &vm_start, NULL, + NULL, &user) == 0) + if (user != NULL) + { + struct _stp_module *m = (struct _stp_module *)user; + if (sec) + *sec = &m->sections[0]; // XXX check actual section and relocate + dbug_sym(1, "found section %s in module %s at 0x%lx\n", + m->sections[0].name, m->name, vm_start); + if (strcmp(".dynamic", m->sections[0].name) == 0) + m->sections[0].addr = vm_start; // cheat... + return m; + } + } + for (midx = 0; midx < _stp_num_modules; midx++) { unsigned secidx; for (secidx = 0; secidx < _stp_modules[midx]->num_sections; secidx++) { - unsigned long this_section_addr; - unsigned long this_section_offset; - this_section_addr = _stp_modules[midx]->sections[secidx].addr; - if (addr < this_section_addr) continue; - this_section_offset = addr - this_section_addr; - if (this_section_offset < closest_section_offset) - { - closest_section_offset = this_section_offset; - m = _stp_modules[midx]; - *sec = & m->sections[secidx]; + unsigned long sec_addr; + unsigned long sec_size; + sec_addr = _stp_modules[midx]->sections[secidx].addr; + sec_size = _stp_modules[midx]->sections[secidx].size; + if (addr >= sec_addr && addr < sec_addr + sec_size) + { + if (sec) + *sec = & _stp_modules[midx]->sections[secidx]; + return _stp_modules[midx]; } } } - return m; + return NULL; } @@ -109,14 +185,15 @@ static const char *_stp_kallsyms_lookup(unsigned long addr, unsigned long *symbo unsigned long *offset, const char **modname, /* char ** secname? */ - char *namebuf) + char *namebuf, + struct task_struct *task) { struct _stp_module *m = NULL; struct _stp_section *sec = NULL; struct _stp_symbol *s = NULL; unsigned end, begin = 0; - m = _stp_mod_sec_lookup(addr, &sec); + m = _stp_mod_sec_lookup(addr, task, &sec); if (unlikely (m == NULL || sec == NULL)) return NULL; @@ -195,34 +272,29 @@ static int _stp_module_check(void) dwfl_module_build_id was not intended to return the end address. */ notes_addr -= m->build_id_len; - if (notes_addr > base_addr) { - for (j = 0; j < m->build_id_len; j++) - { - unsigned char theory, practice; - theory = m->build_id_bits [j]; - practice = ((unsigned char*) notes_addr) [j]; - /* XXX: consider using kread() instead of above. */ - if (theory != practice) - { - #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27) - _stp_error ("%s: inconsistent %s build-id byte #%d " - "(0x%x [actual] vs. 0x%x [debuginfo])\n", - THIS_MODULE->name, m->name, j, - practice, theory); - return 1; - #else - /* This branch is a surrogate for - kernels affected by Fedora bug - #465873. */ - printk(KERN_WARNING - "%s: inconsistent %s build-id byte #%d " - "(0x%x [actual] vs. 0x%x [debuginfo])\n", - THIS_MODULE->name, m->name, j, - practice, theory); - break; /* Note just the first mismatch. */ - #endif - } - } + if (notes_addr <= base_addr) /* shouldn't happen */ + continue; + if (memcmp(m->build_id_bits, (unsigned char*) notes_addr, m->build_id_len)) { + const char *basename; + + basename = strrchr(m->path, '/'); + if (basename) + basename++; + else + basename = m->path; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27) + _stp_error ("Build-id mismatch: \"%s\" %.*M" + " vs. \"%s\" %.*M\n", + m->name, m->build_id_len, notes_addr, + basename, m->build_id_len, m->build_id_bits); + return 1; +#else + /* This branch is a surrogate for kernels + * affected by Fedora bug #465873. */ + printk(KERN_WARNING + "Build-id mismatch: \"%s\" vs. \"%s\"\n", + m->name, basename); +#endif } } /* end checking */ } /* end loop */ @@ -241,7 +313,33 @@ static void _stp_symbol_print(unsigned long address) const char *name; unsigned long offset, size; - name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL); + name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL, NULL); + + _stp_printf("%p", (int64_t) address); + + if (name) { + if (modname && *modname) + _stp_printf(" : %s+%#lx/%#lx [%s]", name, offset, size, modname); + else + _stp_printf(" : %s+%#lx/%#lx", name, offset, size); + } +} + +/** Print an user space address from a specific task symbolically. + * @param address The address to lookup. + * @param task The address to lookup. + * @note Symbolic lookups should not normally be done within + * a probe because it is too time-consuming. Use at module exit time. + */ + +static void _stp_usymbol_print(unsigned long address, struct task_struct *task) +{ + const char *modname; + const char *name; + unsigned long offset, size; + + name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL, + task); _stp_printf("%p", (int64_t) address); @@ -254,7 +352,8 @@ static void _stp_symbol_print(unsigned long address) } /* Like _stp_symbol_print, except only print if the address is a valid function address */ -static int _stp_func_print(unsigned long address, int verbose, int exact) +static int _stp_func_print(unsigned long address, int verbose, int exact, + struct task_struct *task) { const char *modname; const char *name; @@ -266,7 +365,7 @@ static int _stp_func_print(unsigned long address, int verbose, int exact) else exstr = " (inexact)"; - name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL); + name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL, task); if (name) { if (verbose) { @@ -282,16 +381,29 @@ static int _stp_func_print(unsigned long address, int verbose, int exact) return 0; } -static void _stp_symbol_snprint(char *str, size_t len, unsigned long address) +/** Puts symbolic information of an address in a string. + * @param src The string to fill in. + * @param len The length of the given src string. + * @param address The address to lookup. + * @param add_mod Whether to include module name information if found. + */ + +static void _stp_symbol_snprint(char *str, size_t len, unsigned long address, + struct task_struct *task, int add_mod) { const char *modname; const char *name; unsigned long offset, size; - name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL); - if (name) - strlcpy(str, name, len); - else + name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL, + task); + if (name) { + if (add_mod && modname && *modname) + _stp_snprintf(str, len, "%s %s+%#lx/%#lx", + name, modname, offset, size); + else + strlcpy(str, name, len); + } else _stp_snprintf(str, len, "%p", (int64_t) address); } diff --git a/runtime/sym.h b/runtime/sym.h index e642cab4..80c334fb 100644 --- a/runtime/sym.h +++ b/runtime/sym.h @@ -18,6 +18,7 @@ struct _stp_symbol { struct _stp_section { const char *name; unsigned long addr; /* XXX: belongs in per-address-space tables */ + unsigned long size; /* length of the address space module covers. */ struct _stp_symbol *symbols; /* ordered by address */ unsigned num_symbols; }; @@ -25,6 +26,7 @@ struct _stp_section { struct _stp_module { const char* name; + const char* path; /* canonical path used for runtime matching. */ struct _stp_section *sections; unsigned num_sections; diff --git a/runtime/syscall.h b/runtime/syscall.h index ae451070..38b523e1 100644 --- a/runtime/syscall.h +++ b/runtime/syscall.h @@ -1,5 +1,6 @@ -/* syscall defines and inlines - * Copyright (C) 2008 Red Hat Inc. +/* + * syscall defines and inlines + * Copyright (C) 2008-2009 Red Hat Inc. * * This file is part of systemtap, and is free software. You can * redistribute it and/or modify it under the terms of the GNU General @@ -89,9 +90,17 @@ #error "Unimplemented architecture" #endif +#ifdef STAPCONF_ASM_SYSCALL_H + +/* If the system has asm/syscall.h, use defines from it. */ +#include <asm/syscall.h> + +#else /* !STAPCONF_ASM_SYSCALL_H */ + +/* If the system doesn't have asm/syscall.h, use our defines. */ #if defined(__i386__) || defined(__x86_64__) -static inline unsigned long -__stp_user_syscall_nr(struct pt_regs *regs) +static inline long +syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { #if defined(STAPCONF_X86_UNIREGS) return regs->orig_ax; @@ -104,37 +113,45 @@ __stp_user_syscall_nr(struct pt_regs *regs) #endif #if defined(__powerpc__) -static inline unsigned long -__stp_user_syscall_nr(struct pt_regs *regs) +static inline long +syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { return regs->gpr[0]; } #endif #if defined(__ia64__) -static inline unsigned long -__stp_user_syscall_nr(struct pt_regs *regs) +static inline long +syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->r15; + if ((long)regs->cr_ifs < 0) /* Not a syscall */ + return -1; + +#ifdef CONFIG_IA32_SUPPORT + if (IS_IA32_PROCESS(regs)) + return regs->r1; +#endif + + return regs->r15; } #endif #if defined(__s390__) || defined(__s390x__) -static inline unsigned long -__stp_user_syscall_nr(struct pt_regs *regs) +static inline long +syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - // might need to be 'orig_gpr2' + // might need to be 'orig_gpr2' return regs->gprs[2]; } #endif #if defined(__i386__) || defined(__x86_64__) -static inline long * -__stp_user_syscall_return_value(struct task_struct *task, struct pt_regs *regs) +static inline long +syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { #ifdef CONFIG_IA32_EMULATION // This code works, but isn't what we need. Since -// __stp_user_syscall_arg() doesn't sign-extend, a value passed in as +// syscall_get_syscall_arg() doesn't sign-extend, a value passed in as // an argument and then returned won't compare correctly anymore. So, // for now, disable this code. # if 0 @@ -145,158 +162,332 @@ __stp_user_syscall_return_value(struct task_struct *task, struct pt_regs *regs) # endif #endif #if defined(STAPCONF_X86_UNIREGS) - return ®s->ax; + return regs->ax; #elif defined(__x86_64__) - return ®s->rax; + return regs->rax; #elif defined (__i386__) - return ®s->eax; + return regs->eax; #endif } #endif #if defined(__powerpc__) -static inline long * -__stp_user_syscall_return_value(struct task_struct *task, struct pt_regs *regs) +static inline long +syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { - return ®s->gpr[3]; + return regs->gpr[3]; } #endif #if defined(__ia64__) -static inline long * -__stp_user_syscall_return_value(struct task_struct *task, struct pt_regs *regs) +static inline long +syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { - return ®s->r8; + return regs->r8; } #endif #if defined(__s390__) || defined(__s390x__) -static inline long * -__stp_user_syscall_return_value(struct task_struct *task, struct pt_regs *regs) +static inline long +syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { - return ®s->gprs[2]; + return regs->gprs[2]; } #endif #if defined(__i386__) || defined(__x86_64__) -static inline long * -__stp_user_syscall_arg(struct task_struct *task, struct pt_regs *regs, - unsigned int n) +static inline void +syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, + unsigned int i, unsigned int n, unsigned long *args) { -#if defined(__i386__) - if (n > 5) { - _stp_error("syscall arg > 5"); - return NULL; + if (i + n > 6) { + _stp_error("invalid syscall arg request"); + return; } +#if defined(__i386__) #if defined(STAPCONF_X86_UNIREGS) - return ®s->bx + n; + memcpy(args, ®s->bx + i, n * sizeof(args[0])); #else - return ®s->ebx + n; + memcpy(args, ®s->ebx + i, n * sizeof(args[0])); #endif #elif defined(__x86_64__) #ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - switch (n) { + if (test_tsk_thread_flag(task, TIF_IA32)) { + switch (i) { #if defined(STAPCONF_X86_UNIREGS) - case 0: return ®s->bx; - case 1: return ®s->cx; - case 2: return ®s->dx; - case 3: return ®s->si; - case 4: return ®s->di; - case 5: return ®s->bp; + case 0: + if (!n--) break; + *args++ = regs->bx; + case 1: + if (!n--) break; + *args++ = regs->cx; + case 2: + if (!n--) break; + *args++ = regs->dx; + case 3: + if (!n--) break; + *args++ = regs->si; + case 4: + if (!n--) break; + *args++ = regs->di; + case 5: + if (!n--) break; + *args++ = regs->bp; #else - case 0: return ®s->rbx; - case 1: return ®s->rcx; - case 2: return ®s->rdx; - case 3: return ®s->rsi; - case 4: return ®s->rdi; - case 5: return ®s->rbp; + case 0: + if (!n--) break; + *args++ = regs->rbx; + case 1: + if (!n--) break; + *args++ = regs->rcx; + case 2: + if (!n--) break; + *args++ = regs->rdx; + case 3: + if (!n--) break; + *args++ = regs->rsi; + case 4: + if (!n--) break; + *args++ = regs->rdi; + case 5: + if (!n--) break; + *args++ = regs->rbp; #endif - default: - _stp_error("syscall arg > 5"); - return NULL; } + return; + } #endif /* CONFIG_IA32_EMULATION */ - switch (n) { + switch (i) { #if defined(STAPCONF_X86_UNIREGS) - case 0: return ®s->di; - case 1: return ®s->si; - case 2: return ®s->dx; - case 3: return ®s->r10; - case 4: return ®s->r8; - case 5: return ®s->r9; + case 0: + if (!n--) break; + *args++ = regs->di; + case 1: + if (!n--) break; + *args++ = regs->si; + case 2: + if (!n--) break; + *args++ = regs->dx; + case 3: + if (!n--) break; + *args++ = regs->r10; + case 4: + if (!n--) break; + *args++ = regs->r8; + case 5: + if (!n--) break; + *args++ = regs->r9; #else - case 0: return ®s->rdi; - case 1: return ®s->rsi; - case 2: return ®s->rdx; - case 3: return ®s->r10; - case 4: return ®s->r8; - case 5: return ®s->r9; + case 0: + if (!n--) break; + *args++ = regs->rdi; + case 1: + if (!n--) break; + *args++ = regs->rsi; + case 2: + if (!n--) break; + *args++ = regs->rdx; + case 3: + if (!n--) break; + *args++ = regs->r10; + case 4: + if (!n--) break; + *args++ = regs->r8; + case 5: + if (!n--) break; + *args++ = regs->r9; #endif - default: - _stp_error("syscall arg > 5"); - return NULL; } #endif /* CONFIG_X86_32 */ + return; } #endif #if defined(__powerpc__) -static inline long * -__stp_user_syscall_arg(struct task_struct *task, struct pt_regs *regs, - unsigned int n) +static inline void +syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, + unsigned int i, unsigned int n, unsigned long *args) { - switch (n) { - case 0: return ®s->gpr[3]; - case 1: return ®s->gpr[4]; - case 2: return ®s->gpr[5]; - case 3: return ®s->gpr[6]; - case 4: return ®s->gpr[7]; - case 5: return ®s->gpr[8]; - default: - _stp_error("syscall arg > 5"); - return NULL; + if (i + n > 6) { + _stp_error("invalid syscall arg request"); + return; } +#ifdef CONFIG_PPC64 + if (test_tsk_thread_flag(task, TIF_32BIT)) { + /* + * Zero-extend 32-bit argument values. The high bits are + * garbage ignored by the actual syscall dispatch. + */ + while (n-- > 0) + args[n] = (u32) regs->gpr[3 + i + n]; + return; + } +#endif + memcpy(args, ®s->gpr[3 + i], n * sizeof(args[0])); } #endif #if defined(__ia64__) -#define __stp_user_syscall_arg(task, regs, n) \ - ____stp_user_syscall_arg(task, regs, n, &c->unwaddr) -static inline long * -____stp_user_syscall_arg(struct task_struct *task, struct pt_regs *regs, - unsigned int n, unsigned long **cache) +/* Return TRUE if PT was created due to kernel-entry via a system-call. */ + +static inline int +in_syscall (struct pt_regs *pt) +{ + return (long) pt->cr_ifs >= 0; +} + +struct syscall_get_set_args { + unsigned int i; + unsigned int n; + unsigned long *args; + struct pt_regs *regs; + int rw; +}; + +#define CFM_SOF(cfm) ((cfm) & 0x7f) /* Size of frame */ +#define CFM_SOL(cfm) (((cfm) >> 7) & 0x7f) /* Size of locals */ +#define CFM_OUT(cfm) (CFM_SOF(cfm) - CFM_SOL(cfm)) /* Size of outputs */ + +static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data) +{ + struct syscall_get_set_args *args = data; + struct pt_regs *pt = args->regs; + unsigned long *krbs, cfm, ndirty; + int i, count; + + if (unw_unwind_to_user(info) < 0) + return; + + cfm = pt->cr_ifs; + krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8; + ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); + + count = 0; + if (in_syscall(pt)) + /* args->i + args->n must be less equal than nr outputs */ + count = min_t(int, args->n, CFM_OUT(cfm) - args->i); + + for (i = 0; i < count; i++) { + /* Skips dirties and locals */ + if (args->rw) + *ia64_rse_skip_regs(krbs, + ndirty + CFM_SOL(cfm) + args->i + i) = + args->args[i]; + else + args->args[i] = *ia64_rse_skip_regs(krbs, + ndirty + CFM_SOL(cfm) + args->i + i); + } + + if (!args->rw) { + while (i < args->n) { + args->args[i] = 0; + i++; + } + } +} + +void ia64_syscall_get_set_arguments(struct task_struct *task, + struct pt_regs *regs, unsigned int i, unsigned int n, + unsigned long *args, int rw) { - if (n > 5) { - _stp_error("syscall arg > 5"); - return NULL; + struct syscall_get_set_args data = { + .i = i, + .n = n, + .args = args, + .regs = regs, + .rw = rw, + }; + + if (task == current) + unw_init_running(syscall_get_set_args_cb, &data); + else { + struct unw_frame_info ufi; + memset(&ufi, 0, sizeof(ufi)); + unw_init_from_blocked_task(&ufi, task); + syscall_get_set_args_cb(&ufi, &data); } - return __ia64_fetch_register(n + 32, regs, cache); +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + BUG_ON(i + n > 6); + +#ifdef CONFIG_IA32_SUPPORT + if (IS_IA32_PROCESS(regs)) { + switch (i + n) { + case 6: + if (!n--) break; + *args++ = regs->r13; + case 5: + if (!n--) break; + *args++ = regs->r15; + case 4: + if (!n--) break; + *args++ = regs->r14; + case 3: + if (!n--) break; + *args++ = regs->r10; + case 2: + if (!n--) break; + *args++ = regs->r9; + case 1: + if (!n--) break; + *args++ = regs->r11; + case 0: + if (!n--) break; + default: + BUG(); + break; + } + + return; + } +#endif + ia64_syscall_get_set_arguments(task, regs, i, n, args, 0); } #endif #if defined(__s390__) || defined(__s390x__) -static inline long * -__stp_user_syscall_arg(struct task_struct *task, struct pt_regs *regs, - unsigned int n) +static inline void +syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, + unsigned int i, unsigned int n, unsigned long *args) { - /* If we were returning a value, we could check for TIF_31BIT - * here and cast the value with '(u32)' to make sure it got - * down to 32bits. But, since we're returning an address, - * there isn't much we can do. */ - switch (n) { - case 0: return ®s->orig_gpr2; - case 1: return ®s->gprs[3]; - case 2: return ®s->gprs[4]; - case 3: return ®s->gprs[5]; - case 4: return ®s->gprs[6]; - case 5: return ®s->args[0]; - default: - _stp_error("syscall arg > 5"); - return NULL; + unsigned long mask = -1UL; + + if (i + n > 6) { + _stp_error("invalid syscall arg request"); + return; + } +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) + mask = 0xffffffff; +#endif + switch (i) { + case 0: + if (!n--) break; + *args++ = regs->orig_gpr2 & mask; + case 1: + if (!n--) break; + *args++ = regs->gprs[3] & mask; + case 2: + if (!n--) break; + *args++ = regs->gprs[4] & mask; + case 3: + if (!n--) break; + *args++ = regs->gprs[5] & mask; + case 4: + if (!n--) break; + *args++ = regs->gprs[6] & mask; + case 5: + if (!n--) break; + *args++ = regs->args[0] & mask; } } #endif +#endif /* !STAPCONF_ASM_SYSCALL_H */ #endif /* _SYSCALL_H_ */ diff --git a/runtime/task_finder.c b/runtime/task_finder.c index 9db713c3..f5e059ca 100644 --- a/runtime/task_finder.c +++ b/runtime/task_finder.c @@ -2,17 +2,24 @@ #define TASK_FINDER_C #if ! defined(CONFIG_UTRACE) -#error "Need CONFIG_UTRACE!" -#endif +/* Dummy definitions for use in sym.c */ +struct stap_task_finder_target { }; +#else #include <linux/utrace.h> + +/* PR9974: Adapt to struct renaming. */ +#ifdef UTRACE_API_VERSION +#define utrace_attached_engine utrace_engine +#endif + #include <linux/list.h> #include <linux/binfmts.h> #include <linux/mount.h> #include "syscall.h" #include "utrace_compatibility.h" -#include "task_finder_vma.c" +#include "task_finder_map.c" static LIST_HEAD(__stp_task_finder_list); @@ -33,51 +40,49 @@ static atomic_t __stp_attach_count = ATOMIC_INIT (0); #define debug_task_finder_attach() (atomic_inc(&__stp_attach_count)) #define debug_task_finder_detach() (atomic_dec(&__stp_attach_count)) +#ifdef DEBUG_TASK_FINDER_PRINTK +#define debug_task_finder_report() (printk(KERN_ERR \ + "%s:%d attach count: %d, inuse count: %d\n", \ + __FUNCTION__, __LINE__, \ + atomic_read(&__stp_attach_count), \ + atomic_read(&__stp_inuse_count))) +#else #define debug_task_finder_report() (_stp_dbug(__FUNCTION__, __LINE__, \ "attach count: %d, inuse count: %d\n", \ atomic_read(&__stp_attach_count), \ atomic_read(&__stp_inuse_count))) +#endif /* !DEBUG_TASK_FINDER_PRINTK */ #else #define debug_task_finder_attach() /* empty */ #define debug_task_finder_detach() /* empty */ #define debug_task_finder_report() /* empty */ -#endif +#endif /* !DEBUG_TASK_FINDER */ typedef int (*stap_task_finder_callback)(struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p); -typedef int (*stap_task_finder_vm_callback)(struct stap_task_finder_target *tgt, - struct task_struct *tsk, - int map_p, char *vm_path, - unsigned long vm_start, - unsigned long vm_end, - unsigned long vm_pgoff); - -#ifdef DEBUG_TASK_FINDER_VMA -static int __stp_tf_vm_cb(struct stap_task_finder_target *tgt, - struct task_struct *tsk, - int map_p, char *vm_path, - unsigned long vm_start, - unsigned long vm_end, - unsigned long vm_pgoff) -{ - _stp_dbug(__FUNCTION__, __LINE__, - "vm_cb: tsk %d:%d path %s, start 0x%08lx, end 0x%08lx, offset 0x%lx\n", - tsk->pid, map_p, vm_path, vm_start, vm_end, vm_pgoff); - if (map_p) { - // FIXME: What should we do with vm_path? We can't save - // the vm_path pointer itself, but we don't have any - // storage space allocated to save it in... - stap_add_vma_map_info(tsk, vm_start, vm_end, vm_pgoff); - } - else { - stap_remove_vma_map_info(tsk, vm_start, vm_end, vm_pgoff); - } - return 0; -} -#endif +typedef int +(*stap_task_finder_mmap_callback)(struct stap_task_finder_target *tgt, + struct task_struct *tsk, + char *path, + unsigned long addr, + unsigned long length, + unsigned long offset, + unsigned long vm_flags); +typedef int +(*stap_task_finder_munmap_callback)(struct stap_task_finder_target *tgt, + struct task_struct *tsk, + unsigned long addr, + unsigned long length); + +typedef int +(*stap_task_finder_mprotect_callback)(struct stap_task_finder_target *tgt, + struct task_struct *tsk, + unsigned long addr, + unsigned long length, + int prot); struct stap_task_finder_target { /* private: */ @@ -86,14 +91,18 @@ struct stap_task_finder_target { struct list_head callback_list; struct utrace_engine_ops ops; unsigned engine_attached:1; - unsigned vm_events:1; + unsigned mmap_events:1; + unsigned munmap_events:1; + unsigned mprotect_events:1; size_t pathlen; /* public: */ const char *pathname; pid_t pid; stap_task_finder_callback callback; - stap_task_finder_vm_callback vm_callback; + stap_task_finder_mmap_callback mmap_callback; + stap_task_finder_munmap_callback munmap_callback; + stap_task_finder_mprotect_callback mprotect_callback; }; #ifdef UTRACE_ORIG_VERSION @@ -165,7 +174,9 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt) // Make sure everything is initialized properly. new_tgt->engine_attached = 0; - new_tgt->vm_events = 0; + new_tgt->mmap_events = 0; + new_tgt->munmap_events = 0; + new_tgt->mprotect_events = 0; memset(&new_tgt->ops, 0, sizeof(new_tgt->ops)); new_tgt->ops.report_death = &__stp_utrace_task_finder_target_death; new_tgt->ops.report_quiesce = &__stp_utrace_task_finder_target_quiesce; @@ -184,7 +195,7 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt) && strcmp(tgt->pathname, new_tgt->pathname) == 0) /* pid-based target (a specific pid or all * pids) */ - || (new_tgt->pathlen == 0 + || (new_tgt->pathlen == 0 && tgt->pathlen == 0 && tgt->pid == new_tgt->pid))) { found_node = 1; break; @@ -202,9 +213,13 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt) // Add this target to the callback list for this task. list_add_tail(&new_tgt->callback_list, &tgt->callback_list_head); - // If the new target has a vm_callback, remember this. - if (new_tgt->vm_callback != NULL) - tgt->vm_events = 1; + // If the new target has any m* callbacks, remember this. + if (new_tgt->mmap_callback != NULL) + tgt->mmap_events = 1; + if (new_tgt->munmap_callback != NULL) + tgt->munmap_events = 1; + if (new_tgt->mprotect_callback != NULL) + tgt->mprotect_events = 1; return 0; } @@ -264,11 +279,15 @@ stap_utrace_detach(struct task_struct *tsk, break; case -ESRCH: /* REAP callback already begun */ case -EALREADY: /* DEATH callback already begun */ - rc = 0; /* ignore these errors*/ + rc = 0; /* ignore these errors */ + break; + case -EINPROGRESS: + debug_task_finder_detach(); + rc = 0; break; default: rc = -rc; - _stp_error("utrace_detach returned error %d on pid %d", + _stp_error("utrace_control returned error %d on pid %d", rc, tsk->pid); break; } @@ -282,7 +301,6 @@ stap_utrace_detach_ops(struct utrace_engine_ops *ops) { struct task_struct *grp, *tsk; struct utrace_attached_engine *engine; - int rc = 0; pid_t pid = 0; // Notice we're not calling get_task_mm() in this loop. In @@ -308,11 +326,12 @@ stap_utrace_detach_ops(struct utrace_engine_ops *ops) continue; #endif - rc = stap_utrace_detach(tsk, ops); - if (rc != 0) - goto udo_err; + /* Notice we're purposefully ignoring errors from + * stap_utrace_detach(). Even if we got an error on + * this task, we need to keep detaching from other + * tasks. */ + (void) stap_utrace_detach(tsk, ops); } while_each_thread(grp, tsk); -udo_err: rcu_read_unlock(); debug_task_finder_report(); } @@ -383,10 +402,10 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen) /* * __STP_TASK_BASE_EVENTS: base events for stap_task_finder_target's - * without vm_callback's + * without map callback's * * __STP_TASK_VM_BASE_EVENTS: base events for - * stap_task_finder_target's with vm_callback's + * stap_task_finder_target's with map callback's */ #define __STP_TASK_BASE_EVENTS (UTRACE_EVENT(DEATH)) @@ -403,8 +422,10 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen) #define __STP_ATTACHED_TASK_EVENTS (__STP_TASK_BASE_EVENTS \ | UTRACE_EVENT(QUIESCE)) -#define __STP_ATTACHED_TASK_BASE_EVENTS(tgt) \ - ((tgt)->vm_events ? __STP_TASK_VM_BASE_EVENTS : __STP_TASK_BASE_EVENTS) +#define __STP_ATTACHED_TASK_BASE_EVENTS(tgt) \ + (((tgt)->mmap_events || (tgt)->munmap_events \ + || (tgt)->mprotect_events) \ + ? __STP_TASK_VM_BASE_EVENTS : __STP_TASK_BASE_EVENTS) static int __stp_utrace_attach(struct task_struct *tsk, @@ -459,7 +480,7 @@ __stp_utrace_attach(struct task_struct *tsk, * ref. */ rc = utrace_barrier(tsk, engine); - if (rc != 0) + if (rc != -ESRCH && rc != -EALREADY) _stp_error("utrace_barrier returned error %d on pid %d", rc, (int)tsk->pid); } @@ -478,7 +499,7 @@ __stp_utrace_attach(struct task_struct *tsk, } } - else + else if (rc != -ESRCH && rc != -EALREADY) _stp_error("utrace_set_events2 returned error %d on pid %d", rc, (int)tsk->pid); utrace_engine_put(engine); @@ -520,11 +541,113 @@ __stp_call_callbacks(struct stap_task_finder_target *tgt, } } +static void +__stp_call_mmap_callbacks(struct stap_task_finder_target *tgt, + struct task_struct *tsk, char *path, + unsigned long addr, unsigned long length, + unsigned long offset, unsigned long vm_flags) +{ + struct list_head *cb_node; + int rc; + + if (tgt == NULL || tsk == NULL) + return; + +#ifdef DEBUG_TASK_FINDER_VMA + _stp_dbug(__FUNCTION__, __LINE__, + "pid %d, a/l/o/p/path 0x%lx 0x%lx 0x%lx %c%c%c%c %s\n", + tsk->pid, addr, length, offset, + vm_flags & VM_READ ? 'r' : '-', + vm_flags & VM_WRITE ? 'w' : '-', + vm_flags & VM_EXEC ? 'x' : '-', + vm_flags & VM_MAYSHARE ? 's' : 'p', + path); +#endif + list_for_each(cb_node, &tgt->callback_list_head) { + struct stap_task_finder_target *cb_tgt; + + cb_tgt = list_entry(cb_node, struct stap_task_finder_target, + callback_list); + if (cb_tgt == NULL || cb_tgt->mmap_callback == NULL) + continue; + + rc = cb_tgt->mmap_callback(cb_tgt, tsk, path, addr, length, + offset, vm_flags); + if (rc != 0) { + _stp_error("mmap callback for %d failed: %d", + (int)tsk->pid, rc); + } + } +} + +static void +__stp_call_mmap_callbacks_with_vma(struct stap_task_finder_target *tgt, + struct task_struct *tsk, + struct vm_area_struct *vma) +{ + char *mmpath_buf; + char *mmpath; + int rc; + + // Allocate space for a path + mmpath_buf = _stp_kmalloc(PATH_MAX); + if (mmpath_buf == NULL) { + _stp_error("Unable to allocate space for path"); + return; + } + + // Grab the path associated with this vma. +#ifdef STAPCONF_DPATH_PATH + mmpath = d_path(&(vma->vm_file->f_path), mmpath_buf, PATH_MAX); +#else + mmpath = d_path(vma->vm_file->f_dentry, vma->vm_file->f_vfsmnt, + mmpath_buf, PATH_MAX); +#endif + if (mmpath == NULL || IS_ERR(mmpath)) { + rc = -PTR_ERR(mmpath); + _stp_error("Unable to get path (error %d) for pid %d", + rc, (int)tsk->pid); + } + else { + __stp_call_mmap_callbacks(tgt, tsk, mmpath, vma->vm_start, + vma->vm_end - vma->vm_start, + (vma->vm_pgoff << PAGE_SHIFT), + vma->vm_flags); + } + _stp_kfree(mmpath_buf); +} + +static inline void +__stp_call_munmap_callbacks(struct stap_task_finder_target *tgt, + struct task_struct *tsk, unsigned long addr, + unsigned long length) +{ + struct list_head *cb_node; + int rc; + + if (tgt == NULL || tsk == NULL) + return; + + list_for_each(cb_node, &tgt->callback_list_head) { + struct stap_task_finder_target *cb_tgt; + + cb_tgt = list_entry(cb_node, struct stap_task_finder_target, + callback_list); + if (cb_tgt == NULL || cb_tgt->munmap_callback == NULL) + continue; + + rc = cb_tgt->munmap_callback(cb_tgt, tsk, addr, length); + if (rc != 0) { + _stp_error("munmap callback for %d failed: %d", + (int)tsk->pid, rc); + } + } +} + static inline void -__stp_call_vm_callbacks(struct stap_task_finder_target *tgt, - struct task_struct *tsk, int map_p, char *vm_path, - unsigned long vm_start, unsigned long vm_end, - unsigned long vm_pgoff) +__stp_call_mprotect_callbacks(struct stap_task_finder_target *tgt, + struct task_struct *tsk, unsigned long addr, + unsigned long length, int prot) { struct list_head *cb_node; int rc; @@ -537,13 +660,13 @@ __stp_call_vm_callbacks(struct stap_task_finder_target *tgt, cb_tgt = list_entry(cb_node, struct stap_task_finder_target, callback_list); - if (cb_tgt == NULL || cb_tgt->vm_callback == NULL) + if (cb_tgt == NULL || cb_tgt->mprotect_callback == NULL) continue; - rc = cb_tgt->vm_callback(cb_tgt, tsk, map_p, vm_path, - vm_start, vm_end, vm_pgoff); + rc = cb_tgt->mprotect_callback(cb_tgt, tsk, addr, length, + prot); if (rc != 0) { - _stp_error("vm callback for %d failed: %d", + _stp_error("mprotect callback for %d failed: %d", (int)tsk->pid, rc); } } @@ -853,11 +976,12 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action, * a stale task pointer, if we have an engine ref. */ rc = utrace_barrier(tsk, engine); - if (rc != 0) + if (rc == 0) + rc = utrace_set_events(tsk, engine, + __STP_ATTACHED_TASK_BASE_EVENTS(tgt)); + else if (rc != -ESRCH && rc != -EALREADY) _stp_error("utrace_barrier returned error %d on pid %d", rc, (int)tsk->pid); - rc = utrace_set_events(tsk, engine, - __STP_ATTACHED_TASK_BASE_EVENTS(tgt)); } if (rc != 0) _stp_error("utrace_set_events returned error %d on pid %d", @@ -869,16 +993,16 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action, __stp_call_callbacks(tgt, tsk, 1, (tsk->pid == tsk->tgid)); /* If this is just a thread other than the thread group leader, - don't bother inform vm_callback clients about its memory map, + don't bother inform map callback clients about its memory map, since they will simply duplicate each other. */ - if (tgt->vm_events == 1 && tsk->tgid == tsk->pid) { + if (tgt->mmap_events == 1 && tsk->tgid == tsk->pid) { struct mm_struct *mm; char *mmpath_buf; char *mmpath; struct vm_area_struct *vma; int rc; - /* Call the vm_callback for every vma associated with + /* Call the mmap_callback for every vma associated with * a file. */ mm = get_task_mm(tsk); if (! mm) @@ -905,12 +1029,13 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action, mmpath_buf, PATH_MAX); #endif if (mmpath) { - __stp_call_vm_callbacks(tgt, tsk, 1, - mmpath, - vma->vm_start, - vma->vm_end, - (vma->vm_pgoff - << PAGE_SHIFT)); + __stp_call_mmap_callbacks(tgt, tsk, + mmpath, + vma->vm_start, + vma->vm_end - vma->vm_start, + (vma->vm_pgoff + << PAGE_SHIFT), + vma->vm_flags); } else { _stp_dbug(__FUNCTION__, __LINE__, @@ -957,96 +1082,60 @@ __stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action, #endif { struct stap_task_finder_target *tgt = engine->data; - unsigned long syscall_no; - struct mm_struct *mm; - struct vm_area_struct *vma; - unsigned long *arg0_addr, arg0; + long syscall_no; + unsigned long args[3] = { 0L }; int rc; -#if defined(__ia64__) - struct { unsigned long *unwaddr; } _c = {.unwaddr = NULL}, *c = &_c; -#endif if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { debug_task_finder_detach(); return UTRACE_DETACH; } - if (tgt == NULL || tgt->vm_events == 0) + if (tgt == NULL) return UTRACE_RESUME; // See if syscall is one we're interested in. // // FIXME: do we need to handle mremap()? - syscall_no = __stp_user_syscall_nr(regs); + syscall_no = syscall_get_nr(tsk, regs); if (syscall_no != MMAP_SYSCALL_NO(tsk) && syscall_no != MMAP2_SYSCALL_NO(tsk) && syscall_no != MPROTECT_SYSCALL_NO(tsk) && syscall_no != MUNMAP_SYSCALL_NO(tsk)) return UTRACE_RESUME; - __stp_tf_handler_start(); + // The syscall is one we're interested in, but do we have a + // handler for it? + if (((syscall_no == MMAP_SYSCALL_NO(tsk) + || syscall_no == MMAP2_SYSCALL_NO(tsk)) && tgt->mmap_events == 0) + || (syscall_no == MPROTECT_SYSCALL_NO(tsk) + && tgt->mprotect_events == 0) + || (syscall_no == MUNMAP_SYSCALL_NO(tsk) + && tgt->munmap_events == 0)) + return UTRACE_RESUME; - // We need the first syscall argument to see what address - // we're operating on. - arg0_addr = __stp_user_syscall_arg(tsk, regs, 0); - if ((rc = __stp_get_user(arg0, arg0_addr)) != 0) { - _stp_error("couldn't read syscall arg 0 for pid %d: %d", - tsk->pid, rc); + __stp_tf_handler_start(); + if (syscall_no == MUNMAP_SYSCALL_NO(tsk)) { + // We need 2 arguments + syscall_get_arguments(tsk, regs, 0, 2, args); } - else if (arg0 != (unsigned long)NULL) { - mm = get_task_mm(tsk); - if (mm) { - down_read(&mm->mmap_sem); - - // If we can find a matching vma associated - // with a file, save off its details. - vma = __stp_find_file_based_vma(mm, arg0); - if (vma != NULL) { - __stp_tf_add_vma(tsk, arg0, vma); - } - - up_read(&mm->mmap_sem); - mmput(mm); - } + else if (syscall_no == MMAP_SYSCALL_NO(tsk) + || syscall_no == MMAP2_SYSCALL_NO(tsk)) { + // For mmap, we really just need the return value, so + // there is no need to save arguments } - __stp_tf_handler_end(); - return UTRACE_RESUME; -} - -static void -__stp_call_vm_callbacks_with_vma(struct stap_task_finder_target *tgt, - struct task_struct *tsk, - struct vm_area_struct *vma) -{ - char *mmpath_buf; - char *mmpath; - int rc; - - // Allocate space for a path - mmpath_buf = _stp_kmalloc(PATH_MAX); - if (mmpath_buf == NULL) { - _stp_error("Unable to allocate space for path"); - return; + else { // mprotect() + // We need 3 arguments + syscall_get_arguments(tsk, regs, 0, 3, args); } - // Grab the path associated with this vma. -#ifdef STAPCONF_DPATH_PATH - mmpath = d_path(&(vma->vm_file->f_path), mmpath_buf, PATH_MAX); -#else - mmpath = d_path(vma->vm_file->f_dentry, vma->vm_file->f_vfsmnt, - mmpath_buf, PATH_MAX); -#endif - if (mmpath == NULL || IS_ERR(mmpath)) { - rc = -PTR_ERR(mmpath); - _stp_error("Unable to get path (error %d) for pid %d", - rc, (int)tsk->pid); - } - else { - __stp_call_vm_callbacks(tgt, tsk, 1, mmpath, - vma->vm_start, vma->vm_end, - (vma->vm_pgoff << PAGE_SHIFT)); - } - _stp_kfree(mmpath_buf); + // Remember the syscall information + rc = __stp_tf_add_map(tsk, syscall_no, args[0], args[1], args[2]); + if (rc != 0) + _stp_error("__stp_tf_add_map returned error %d on pid %d", + rc, tsk->pid); + __stp_tf_handler_end(); + return UTRACE_RESUME; } #ifdef UTRACE_ORIG_VERSION @@ -1063,165 +1152,75 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action, #endif { struct stap_task_finder_target *tgt = engine->data; - unsigned long syscall_no; - unsigned long *rv_addr, rv; - unsigned long *arg0_addr, arg0; - int rc; - struct mm_struct *mm; - struct vm_area_struct *vma; - struct __stp_tf_vma_entry *entry = NULL; -#if defined(__ia64__) - struct { unsigned long *unwaddr; } _c = {.unwaddr = NULL}, *c = &_c; -#endif + unsigned long rv; + struct __stp_tf_map_entry *entry; if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { debug_task_finder_detach(); return UTRACE_DETACH; } - if (tgt == NULL || tgt->vm_events == 0) + if (tgt == NULL) return UTRACE_RESUME; - // See if syscall is one we're interested in. - // - // FIXME: do we need to handle mremap()? - syscall_no = __stp_user_syscall_nr(regs); - if (syscall_no != MMAP_SYSCALL_NO(tsk) - && syscall_no != MMAP2_SYSCALL_NO(tsk) - && syscall_no != MPROTECT_SYSCALL_NO(tsk) - && syscall_no != MUNMAP_SYSCALL_NO(tsk)) + // See if we can find saved syscall info. If we can, it must + // be one of the syscalls we are interested in (and we must + // have callbacks to call for it). + entry = __stp_tf_get_map_entry(tsk); + if (entry == NULL) return UTRACE_RESUME; // Get return value - rv_addr = __stp_user_syscall_return_value(tsk, regs); - if ((rc = __stp_get_user(rv, rv_addr)) != 0) { - _stp_error("couldn't read syscall return value for pid %d: %d", - tsk->pid, rc); - return UTRACE_RESUME; - } - - // We need the first syscall argument to see what address we - // were operating on. - arg0_addr = __stp_user_syscall_arg(tsk, regs, 0); - if ((rc = __stp_get_user(arg0, arg0_addr)) != 0) { - _stp_error("couldn't read syscall arg 0 for pid %d: %d", - tsk->pid, rc); - return UTRACE_RESUME; - } + __stp_tf_handler_start(); + rv = syscall_get_return_value(tsk, regs); #ifdef DEBUG_TASK_FINDER_VMA _stp_dbug(__FUNCTION__, __LINE__, "tsk %d found %s(0x%lx), returned 0x%lx\n", tsk->pid, - ((syscall_no == MMAP_SYSCALL_NO(tsk)) ? "mmap" - : ((syscall_no == MMAP2_SYSCALL_NO(tsk)) ? "mmap2" - : ((syscall_no == MPROTECT_SYSCALL_NO(tsk)) ? "mprotect" - : ((syscall_no == MUNMAP_SYSCALL_NO(tsk)) ? "munmap" + ((entry->syscall_no == MMAP_SYSCALL_NO(tsk)) ? "mmap" + : ((entry->syscall_no == MMAP2_SYSCALL_NO(tsk)) ? "mmap2" + : ((entry->syscall_no == MPROTECT_SYSCALL_NO(tsk)) + ? "mprotect" + : ((entry->syscall_no == MUNMAP_SYSCALL_NO(tsk)) + ? "munmap" : "UNKNOWN")))), - arg0, rv); + entry->arg0, rv); #endif - __stp_tf_handler_start(); - // Try to find the vma info we might have saved. - if (arg0 != (unsigned long)NULL) - entry = __stp_tf_get_vma_entry(tsk, arg0); + if (entry->syscall_no == MUNMAP_SYSCALL_NO(tsk)) { + // Call the callbacks + __stp_call_munmap_callbacks(tgt, tsk, entry->arg0, entry->arg1); + } + else if (entry->syscall_no == MMAP_SYSCALL_NO(tsk) + || entry->syscall_no == MMAP2_SYSCALL_NO(tsk)) { + struct mm_struct *mm; - // If entry is NULL, this means we didn't find a file based - // vma to store in the syscall_entry routine. This could mean - // we just created a new vma. - if (entry == NULL) { mm = get_task_mm(tsk); if (mm) { + struct vm_area_struct *vma; + down_read(&mm->mmap_sem); vma = __stp_find_file_based_vma(mm, rv); - if (vma != NULL) { - __stp_call_vm_callbacks_with_vma(tgt, tsk, vma); - } - up_read(&mm->mmap_sem); - mmput(mm); - } - } - // If we found saved vma information, try to match it up with - // what currently exists. - else { -#ifdef DEBUG_TASK_FINDER_VMA - _stp_dbug(__FUNCTION__, __LINE__, - "** found stored vma 0x%lx/0x%lx/0x%lx!\n", - entry->vm_start, entry->vm_end, entry->vm_pgoff); -#endif - mm = get_task_mm(tsk); - if (mm) { - down_read(&mm->mmap_sem); - vma = __stp_find_file_based_vma(mm, entry->vm_start); - - // We couldn't find the vma at all. The - // original vma was deleted. - if (vma == NULL) { - // FIXME: We'll need to figure out to - // retrieve the path of a deleted - // vma. - - __stp_call_vm_callbacks(tgt, tsk, 0, NULL, - entry->vm_start, - entry->vm_end, - (entry->vm_pgoff - << PAGE_SHIFT)); - } - // If nothing has changed, there is no - // need to call the callback. - else if (vma->vm_start == entry->vm_start - && vma->vm_end == entry->vm_end - && vma->vm_pgoff == entry->vm_pgoff) { - // do nothing + // Call the callbacks + if (vma) { + __stp_call_mmap_callbacks_with_vma(tgt, tsk, + vma); } - // The original vma has been changed. It is - // possible that calling mprotect (e.g.) split - // up an existing vma into 2 or 3 new vma's - // (assuming it protected a portion of the - // original vma at the beginning, middle, or - // end). Try to determine what happened. - else { - unsigned long tmp; - - // First report that the original vma - // is gone. - // - // FIXME: We'll need to figure out to - // retrieve the path of a deleted - // vma. - __stp_call_vm_callbacks(tgt, tsk, 0, NULL, - entry->vm_start, - entry->vm_end, - (entry->vm_pgoff - << PAGE_SHIFT)); - - // Now find all the new vma's that - // made up the original vma's address - // space and call the callback on each - // new vma. - tmp = entry->vm_start; - while (((vma = __stp_find_file_based_vma(mm, - tmp)) - != NULL) - && vma->vm_end <= entry->vm_end) { - __stp_call_vm_callbacks_with_vma(tgt, - tsk, - vma); - if (vma->vm_end >= entry->vm_end) - break; - tmp = vma->vm_end; - } - } up_read(&mm->mmap_sem); mmput(mm); } - - // Cleanup by deleting the saved vma info. - __stp_tf_remove_vma_entry(entry); } + else { // mprotect + // Call the callbacks + __stp_call_mprotect_callbacks(tgt, tsk, entry->arg0, + entry->arg1, entry->arg2); + } + __stp_tf_handler_end(); + __stp_tf_remove_map_entry(entry); return UTRACE_RESUME; } @@ -1245,7 +1244,7 @@ stap_start_task_finder(void) return ENOMEM; } - __stp_tf_vma_initialize(); + __stp_tf_map_initialize(); atomic_set(&__stp_task_finder_state, __STP_TF_RUNNING); @@ -1362,5 +1361,5 @@ stap_stop_task_finder(void) debug_task_finder_report(); } - +#endif /* defined(CONFIG_UTRACE) */ #endif /* TASK_FINDER_C */ diff --git a/runtime/task_finder_map.c b/runtime/task_finder_map.c new file mode 100644 index 00000000..b770dd0e --- /dev/null +++ b/runtime/task_finder_map.c @@ -0,0 +1,191 @@ +#include <linux/list.h> +#include <linux/jhash.h> +#include <linux/spinlock.h> + +// When handling mmap()/munmap()/mprotect() syscall tracing to notice +// memory map changes, we need to cache syscall entry parameter values +// for processing at syscall exit. + +// __stp_tf_map_lock protects the hash table. +// Documentation/spinlocks.txt suggest we can be a bit more clever +// if we guarantee that in interrupt context we only read, not write +// the datastructures. We should never change the hash table or the +// contents in interrupt context (which should only ever call +// stap_find_map_map_info for getting stored info). So we might +// want to look into that if this seems a bottleneck. +static DEFINE_RWLOCK(__stp_tf_map_lock); + +#define __STP_TF_HASH_BITS 4 +#define __STP_TF_TABLE_SIZE (1 << __STP_TF_HASH_BITS) + +#ifndef TASK_FINDER_MAP_ENTRY_ITEMS +#define TASK_FINDER_MAP_ENTRY_ITEMS 100 +#endif + +struct __stp_tf_map_entry { +/* private: */ + struct hlist_node hlist; + int usage; + +/* public: */ + pid_t pid; + long syscall_no; + unsigned long arg0; + unsigned long arg1; + unsigned long arg2; +}; + +static struct __stp_tf_map_entry +__stp_tf_map_free_list_items[TASK_FINDER_MAP_ENTRY_ITEMS]; + +static struct hlist_head __stp_tf_map_free_list[1]; + +static struct hlist_head __stp_tf_map_table[__STP_TF_TABLE_SIZE]; + +// __stp_tf_map_initialize(): Initialize the free list. Grabs the +// lock. +static void +__stp_tf_map_initialize(void) +{ + int i; + struct hlist_head *head = &__stp_tf_map_free_list[0]; + + unsigned long flags; + write_lock_irqsave(&__stp_tf_map_lock, flags); + for (i = 0; i < TASK_FINDER_MAP_ENTRY_ITEMS; i++) { + hlist_add_head(&__stp_tf_map_free_list_items[i].hlist, head); + } + write_unlock_irqrestore(&__stp_tf_map_lock, flags); +} + + +// __stp_tf_map_get_free_entry(): Returns an entry from the free list +// or NULL. The __stp_tf_map_lock must be write locked before calling this +// function. +static struct __stp_tf_map_entry * +__stp_tf_map_get_free_entry(void) +{ + struct hlist_head *head = &__stp_tf_map_free_list[0]; + struct hlist_node *node; + struct __stp_tf_map_entry *entry = NULL; + + if (hlist_empty(head)) + return NULL; + hlist_for_each_entry(entry, node, head, hlist) { + break; + } + if (entry != NULL) + hlist_del(&entry->hlist); + return entry; +} + + +// __stp_tf_map_put_free_entry(): Puts an entry back on the free +// list. The __stp_tf_map_lock must be write locked before calling this +// function. +static void +__stp_tf_map_put_free_entry(struct __stp_tf_map_entry *entry) +{ + struct hlist_head *head = &__stp_tf_map_free_list[0]; + hlist_add_head(&entry->hlist, head); +} + + +// __stp_tf_map_hash(): Compute the map hash. +static inline u32 +__stp_tf_map_hash(struct task_struct *tsk) +{ + return (jhash_1word(tsk->pid, 0) & (__STP_TF_TABLE_SIZE - 1)); +} + + +// Get map_entry if the map is present in the map hash table. +// Returns NULL if not present. Takes a read lock on __stp_tf_map_lock. +static struct __stp_tf_map_entry * +__stp_tf_get_map_entry(struct task_struct *tsk) +{ + struct hlist_head *head; + struct hlist_node *node; + struct __stp_tf_map_entry *entry; + + unsigned long flags; + read_lock_irqsave(&__stp_tf_map_lock, flags); + head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)]; + hlist_for_each_entry(entry, node, head, hlist) { + if (tsk->pid == entry->pid) { + read_unlock_irqrestore(&__stp_tf_map_lock, flags); + return entry; + } + } + read_unlock_irqrestore(&__stp_tf_map_lock, flags); + return NULL; +} + + +// Add the map info to the map hash table. Takes a write lock on +// __stp_tf_map_lock. +static int +__stp_tf_add_map(struct task_struct *tsk, long syscall_no, unsigned long arg0, + unsigned long arg1, unsigned long arg2) +{ + struct hlist_head *head; + struct hlist_node *node; + struct __stp_tf_map_entry *entry; + unsigned long flags; + + write_lock_irqsave(&__stp_tf_map_lock, flags); + head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)]; + hlist_for_each_entry(entry, node, head, hlist) { + // If we find an existing entry, just increment the + // usage count. + if (tsk->pid == entry->pid) { + entry->usage++; + write_unlock_irqrestore(&__stp_tf_map_lock, flags); + return 0; + } + } + + // Get an element from the free list. + entry = __stp_tf_map_get_free_entry(); + if (!entry) { + write_unlock_irqrestore(&__stp_tf_map_lock, flags); + return -ENOMEM; + } + entry->usage = 1; + entry->pid = tsk->pid; + entry->syscall_no = syscall_no; + entry->arg0 = arg0; + entry->arg1 = arg1; + entry->arg2 = arg2; + hlist_add_head(&entry->hlist, head); + write_unlock_irqrestore(&__stp_tf_map_lock, flags); + return 0; +} + + +// Remove the map entry from the map hash table. Takes a write lock on +// __stp_tf_map_lock. +static int +__stp_tf_remove_map_entry(struct __stp_tf_map_entry *entry) +{ + struct hlist_head *head; + struct hlist_node *node; + int found = 0; + + if (entry != NULL) { + unsigned long flags; + write_lock_irqsave(&__stp_tf_map_lock, flags); + + // Decrement the usage count. + entry->usage--; + + // If the entry is unused, put it back on the free + // list. + if (entry->usage == 0) { + hlist_del(&entry->hlist); + __stp_tf_map_put_free_entry(entry); + } + write_unlock_irqrestore(&__stp_tf_map_lock, flags); + } + return 0; +} diff --git a/runtime/task_finder_vma.c b/runtime/task_finder_vma.c index 4dce4be8..ed9c6f4f 100644 --- a/runtime/task_finder_vma.c +++ b/runtime/task_finder_vma.c @@ -1,13 +1,19 @@ #include <linux/list.h> #include <linux/jhash.h> -#include <linux/mutex.h> +#include <linux/spinlock.h> // When handling memcpy() syscall tracing to notice memory map // changes, we need to cache memcpy() entry parameter values for // processing at memcpy() exit. -// __stp_tf_vma_mutex protects the hash table. -static DEFINE_MUTEX(__stp_tf_vma_mutex); +// __stp_tf_vma_lock protects the hash table. +// Documentation/spinlocks.txt suggest we can be a bit more clever +// if we guarantee that in interrupt context we only read, not write +// the datastructures. We should never change the hash table or the +// contents in interrupt context (which should only ever call +// stap_find_vma_map_info for getting stored vma info). So we might +// want to look into that if this seems a bottleneck. +static DEFINE_RWLOCK(__stp_tf_vma_lock); #define __STP_TF_HASH_BITS 4 #define __STP_TF_TABLE_SIZE (1 << __STP_TF_HASH_BITS) @@ -25,6 +31,9 @@ struct __stp_tf_vma_entry { unsigned long vm_end; unsigned long vm_pgoff; // Is that enough? Should we store a dcookie for vm_file? + + // User data (possibly stp_module) + void *user; }; static struct __stp_tf_vma_entry @@ -37,23 +46,24 @@ static struct hlist_head __stp_tf_vma_table[__STP_TF_TABLE_SIZE]; static struct hlist_head __stp_tf_vma_map[__STP_TF_TABLE_SIZE]; // __stp_tf_vma_initialize(): Initialize the free list. Grabs the -// mutex. +// spinlock. static void __stp_tf_vma_initialize(void) { int i; struct hlist_head *head = &__stp_tf_vma_free_list[0]; - mutex_lock(&__stp_tf_vma_mutex); + unsigned long flags; + write_lock_irqsave(&__stp_tf_vma_lock, flags); for (i = 0; i < TASK_FINDER_VMA_ENTRY_ITEMS; i++) { hlist_add_head(&__stp_tf_vma_free_list_items[i].hlist, head); } - mutex_unlock(&__stp_tf_vma_mutex); + write_unlock_irqrestore(&__stp_tf_vma_lock, flags); } // __stp_tf_vma_get_free_entry(): Returns an entry from the free list -// or NULL. The __stp_tf_vma_mutex must be locked before calling this +// or NULL. The __stp_tf_vma_lock must be write locked before calling this // function. static struct __stp_tf_vma_entry * __stp_tf_vma_get_free_entry(void) @@ -74,7 +84,7 @@ __stp_tf_vma_get_free_entry(void) // __stp_tf_vma_put_free_entry(): Puts an entry back on the free -// list. The __stp_tf_vma_mutex must be locked before calling this +// list. The __stp_tf_vma_lock must be write locked before calling this // function. static void __stp_tf_vma_put_free_entry(struct __stp_tf_vma_entry *entry) @@ -98,7 +108,7 @@ __stp_tf_vma_hash(struct task_struct *tsk, unsigned long addr) // Get vma_entry if the vma is present in the vma hash table. -// Returns NULL if not present. +// Returns NULL if not present. Takes a read lock on __stp_tf_vma_lock. static struct __stp_tf_vma_entry * __stp_tf_get_vma_entry(struct task_struct *tsk, unsigned long addr) { @@ -106,20 +116,22 @@ __stp_tf_get_vma_entry(struct task_struct *tsk, unsigned long addr) struct hlist_node *node; struct __stp_tf_vma_entry *entry; - mutex_lock(&__stp_tf_vma_mutex); + unsigned long flags; + read_lock_irqsave(&__stp_tf_vma_lock, flags); head = &__stp_tf_vma_table[__stp_tf_vma_hash(tsk, addr)]; hlist_for_each_entry(entry, node, head, hlist) { if (tsk->pid == entry->pid && addr == entry->addr) { - mutex_unlock(&__stp_tf_vma_mutex); + read_unlock_irqrestore(&__stp_tf_vma_lock, flags); return entry; } } - mutex_unlock(&__stp_tf_vma_mutex); + read_unlock_irqrestore(&__stp_tf_vma_lock, flags); return NULL; } // Add the vma info to the vma hash table. +// Takes a write lock on __stp_tf_vma_lock. static int __stp_tf_add_vma(struct task_struct *tsk, unsigned long addr, struct vm_area_struct *vma) @@ -128,7 +140,8 @@ __stp_tf_add_vma(struct task_struct *tsk, unsigned long addr, struct hlist_node *node; struct __stp_tf_vma_entry *entry; - mutex_lock(&__stp_tf_vma_mutex); + unsigned long flags; + write_lock_irqsave(&__stp_tf_vma_lock, flags); head = &__stp_tf_vma_table[__stp_tf_vma_hash(tsk, addr)]; hlist_for_each_entry(entry, node, head, hlist) { if (tsk->pid == entry->pid @@ -138,7 +151,7 @@ __stp_tf_add_vma(struct task_struct *tsk, unsigned long addr, "vma (pid: %d, vm_start: 0x%lx) present?\n", tsk->pid, vma->vm_start); #endif - mutex_unlock(&__stp_tf_vma_mutex); + write_unlock_irqrestore(&__stp_tf_vma_lock, flags); return -EBUSY; /* Already there */ } } @@ -146,7 +159,7 @@ __stp_tf_add_vma(struct task_struct *tsk, unsigned long addr, // Get an element from the free list. entry = __stp_tf_vma_get_free_entry(); if (!entry) { - mutex_unlock(&__stp_tf_vma_mutex); + write_unlock_irqrestore(&__stp_tf_vma_lock, flags); return -ENOMEM; } entry->pid = tsk->pid; @@ -155,11 +168,12 @@ __stp_tf_add_vma(struct task_struct *tsk, unsigned long addr, entry->vm_end = vma->vm_end; entry->vm_pgoff = vma->vm_pgoff; hlist_add_head(&entry->hlist, head); - mutex_unlock(&__stp_tf_vma_mutex); + write_unlock_irqrestore(&__stp_tf_vma_lock, flags); return 0; } // Remove the vma entry from the vma hash table. +// Takes a write lock on __stp_tf_vma_lock. static int __stp_tf_remove_vma_entry(struct __stp_tf_vma_entry *entry) { @@ -168,10 +182,11 @@ __stp_tf_remove_vma_entry(struct __stp_tf_vma_entry *entry) int found = 0; if (entry != NULL) { - mutex_lock(&__stp_tf_vma_mutex); + unsigned long flags; + write_lock_irqsave(&__stp_tf_vma_lock, flags); hlist_del(&entry->hlist); __stp_tf_vma_put_free_entry(entry); - mutex_unlock(&__stp_tf_vma_mutex); + write_unlock_irqrestore(&__stp_tf_vma_lock, flags); } return 0; } @@ -186,7 +201,7 @@ __stp_tf_vma_map_hash(struct task_struct *tsk) } // Get vma_entry if the vma is present in the vma map hash table. -// Returns NULL if not present. The __stp_tf_vma_mutex must be locked +// Returns NULL if not present. The __stp_tf_vma_lock must be read locked // before calling this function. static struct __stp_tf_vma_entry * __stp_tf_get_vma_map_entry_internal(struct task_struct *tsk, @@ -200,7 +215,6 @@ __stp_tf_get_vma_map_entry_internal(struct task_struct *tsk, hlist_for_each_entry(entry, node, head, hlist) { if (tsk->pid == entry->pid && vm_start == entry->addr) { - mutex_unlock(&__stp_tf_vma_mutex); return entry; } } @@ -211,13 +225,17 @@ __stp_tf_get_vma_map_entry_internal(struct task_struct *tsk, // Add the vma info to the vma map hash table. static int stap_add_vma_map_info(struct task_struct *tsk, unsigned long vm_start, - unsigned long vm_end, unsigned long vm_pgoff) + unsigned long vm_end, unsigned long vm_pgoff, + void *user) { struct hlist_head *head; struct hlist_node *node; struct __stp_tf_vma_entry *entry; - mutex_lock(&__stp_tf_vma_mutex); + unsigned long flags; + // Take a write lock, since we are most likely going to write + // after reading. + write_lock_irqsave(&__stp_tf_vma_lock, flags); entry = __stp_tf_get_vma_map_entry_internal(tsk, vm_start); if (entry != NULL) { #if 0 @@ -225,14 +243,14 @@ stap_add_vma_map_info(struct task_struct *tsk, unsigned long vm_start, "vma (pid: %d, vm_start: 0x%lx) present?\n", tsk->pid, entry->vm_start); #endif - mutex_unlock(&__stp_tf_vma_mutex); + write_unlock_irqrestore(&__stp_tf_vma_lock, flags); return -EBUSY; /* Already there */ } // Get an element from the free list. entry = __stp_tf_vma_get_free_entry(); if (!entry) { - mutex_unlock(&__stp_tf_vma_mutex); + write_unlock_irqrestore(&__stp_tf_vma_lock, flags); return -ENOMEM; } @@ -242,10 +260,11 @@ stap_add_vma_map_info(struct task_struct *tsk, unsigned long vm_start, entry->vm_start = vm_start; entry->vm_end = vm_end; entry->vm_pgoff = vm_pgoff; + entry->user = user; head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)]; hlist_add_head(&entry->hlist, head); - mutex_unlock(&__stp_tf_vma_mutex); + write_unlock_irqrestore(&__stp_tf_vma_lock, flags); return 0; } @@ -259,23 +278,26 @@ stap_remove_vma_map_info(struct task_struct *tsk, unsigned long vm_start, struct hlist_node *node; struct __stp_tf_vma_entry *entry; - mutex_lock(&__stp_tf_vma_mutex); + // Take a write lock since we are most likely going to delete + // after reading. + unsigned long flags; + write_lock_irqsave(&__stp_tf_vma_lock, flags); entry = __stp_tf_get_vma_map_entry_internal(tsk, vm_start); if (entry != NULL) { hlist_del(&entry->hlist); __stp_tf_vma_put_free_entry(entry); } - mutex_unlock(&__stp_tf_vma_mutex); + write_unlock_irqrestore(&__stp_tf_vma_lock, flags); return 0; } // Finds vma info if the vma is present in the vma map hash table. -// Returns ESRCH if not present. The __stp_tf_vma_mutex must *not* be +// Returns ESRCH if not present. The __stp_tf_vma_lock must *not* be // locked before calling this function. static int stap_find_vma_map_info(struct task_struct *tsk, unsigned long vm_addr, unsigned long *vm_start, unsigned long *vm_end, - unsigned long *vm_pgoff) + unsigned long *vm_pgoff, void **user) { struct hlist_head *head; struct hlist_node *node; @@ -283,7 +305,8 @@ stap_find_vma_map_info(struct task_struct *tsk, unsigned long vm_addr, struct __stp_tf_vma_entry *found_entry = NULL; int rc = ESRCH; - mutex_lock(&__stp_tf_vma_mutex); + unsigned long flags; + read_lock_irqsave(&__stp_tf_vma_lock, flags); head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)]; hlist_for_each_entry(entry, node, head, hlist) { if (tsk->pid == entry->pid @@ -300,8 +323,10 @@ stap_find_vma_map_info(struct task_struct *tsk, unsigned long vm_addr, *vm_end = found_entry->vm_end; if (vm_pgoff != NULL) *vm_pgoff = found_entry->vm_pgoff; + if (user != NULL) + *user = found_entry->user; rc = 0; } - mutex_unlock(&__stp_tf_vma_mutex); + read_unlock_irqrestore(&__stp_tf_vma_lock, flags); return rc; } diff --git a/runtime/transport/control.c b/runtime/transport/control.c index 4e07a0a7..a1624152 100644 --- a/runtime/transport/control.c +++ b/runtime/transport/control.c @@ -18,6 +18,8 @@ static _stp_mempool_t *_stp_pool_q; static struct list_head _stp_ctl_ready_q; static DEFINE_SPINLOCK(_stp_ctl_ready_lock); +static void _stp_cleanup_and_exit(int send_exit); + static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { u32 type; @@ -51,7 +53,7 @@ static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, siz } break; case STP_EXIT: - _stp_exit_flag = 1; + _stp_cleanup_and_exit(1); break; case STP_BULK: #ifdef STP_BULKMODE @@ -98,6 +100,9 @@ static void _stp_ctl_write_dbug(int type, void *data, int len) case STP_TRANSPORT: _dbug("sending STP_TRANSPORT\n"); break; + case STP_REQUEST_EXIT: + _dbug("sending STP_REQUEST_EXIT\n"); + break; default: _dbug("ERROR: unknown message type: %d\n", type); break; diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c index 3d0453bf..792ea815 100644 --- a/runtime/transport/transport.c +++ b/runtime/transport/transport.c @@ -148,6 +148,18 @@ static void _stp_cleanup_and_exit(int send_exit) } } +static void _stp_request_exit(void) +{ + static int called = 0; + if (!called) { + /* we only want to do this once */ + called = 1; + dbug_trans(1, "ctl_send STP_REQUEST_EXIT\n"); + _stp_ctl_send(STP_REQUEST_EXIT, NULL, 0); + dbug_trans(1, "done with ctl_send STP_REQUEST_EXIT\n"); + } +} + /* * Called when stapio closes the control channel. */ @@ -202,7 +214,7 @@ static void _stp_work_queue(void *data) /* if exit flag is set AND we have finished with probe_start() */ if (unlikely(_stp_exit_flag && _stp_probes_started)) - _stp_cleanup_and_exit(1); + _stp_request_exit(); if (likely(_stp_ctl_attached)) queue_delayed_work(_stp_wq, &_stp_work, STP_WORK_TIMER); } diff --git a/runtime/transport/transport_msgs.h b/runtime/transport/transport_msgs.h index 0d6853f7..aa50051c 100644 --- a/runtime/transport/transport_msgs.h +++ b/runtime/transport/transport_msgs.h @@ -21,12 +21,12 @@ struct _stp_trace { enum { STP_START, - STP_EXIT, + STP_EXIT, STP_OOB_DATA, STP_SYSTEM, STP_TRANSPORT, STP_CONNECT, - STP_DISCONNECT, + STP_DISCONNECT, STP_BULK, STP_READY, STP_RELOCATION, @@ -34,6 +34,7 @@ enum STP_BUF_INFO, STP_SUBBUFS_CONSUMED, STP_REALTIME_DATA, + STP_REQUEST_EXIT, STP_MAX_CMD }; @@ -52,6 +53,7 @@ static const char *_stp_command_name[] = { "STP_BUF_INFO", "STP_SUBBUFS_CONSUMED", "STP_REALTIME_DATA", + "STP_REQUEST_EXIT", }; #endif /* DEBUG_TRANS */ diff --git a/runtime/unwind.c b/runtime/unwind.c index 9c704e28..aacd56f1 100644 --- a/runtime/unwind.c +++ b/runtime/unwind.c @@ -345,7 +345,7 @@ static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc, s state->label = NULL; return 1; } - if (state->stackDepth >= MAX_STACK_DEPTH) + if (state->stackDepth >= STP_MAX_STACK_DEPTH) return 0; state->stack[state->stackDepth++] = ptr.p8; break; @@ -435,12 +435,18 @@ adjustStartLoc (unsigned long startLoc, struct _stp_module *m, struct _stp_section *s) { - if (startLoc && (strcmp (m->name, "kernel") != 0)) - { - startLoc = _stp_module_relocate (m->name, s->name, - startLoc); - startLoc -= m->dwarf_module_base; - } + /* XXX - some, or all, of this should really be done by + _stp_module_relocate. */ + if (startLoc == 0 + || strcmp (m->name, "kernel") == 0 + || strcmp (s->name, ".absolute") == 0) + return startLoc; + + if (strcmp (s->name, ".dynamic") == 0) + return startLoc + s->addr; + + startLoc = _stp_module_relocate (m->name, s->name, startLoc); + startLoc -= m->dwarf_module_base; return startLoc; } @@ -562,7 +568,7 @@ static char *_stp_eh_enc_name(signed type) /* Unwind to previous to frame. Returns 0 if successful, negative * number in case of an error. A positive return means unwinding is finished; * don't try to fallback to dumping addresses on the stack. */ -static int unwind(struct unwind_frame_info *frame) +static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk) { #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) const u32 *fde, *cie = NULL; @@ -581,7 +587,7 @@ static int unwind(struct unwind_frame_info *frame) if (UNW_PC(frame) == 0) return -EINVAL; - m = _stp_mod_sec_lookup (pc, &s); + m = _stp_mod_sec_lookup (pc, tsk, &s); if (unlikely(m == NULL)) { dbug_unwind(1, "No module found for pc=%lx", pc); return -EINVAL; diff --git a/runtime/unwind/unwind.h b/runtime/unwind/unwind.h index 78a4bfef..3b6d0de0 100644 --- a/runtime/unwind/unwind.h +++ b/runtime/unwind/unwind.h @@ -23,7 +23,7 @@ #error "Unsupported dwarf unwind architecture" #endif -#define MAX_STACK_DEPTH 8 +#define STP_MAX_STACK_DEPTH 8 #ifndef BUILD_BUG_ON_ZERO #define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1) @@ -135,7 +135,7 @@ struct unwind_state { unsigned stackDepth:8; unsigned version:8; const u8 *label; - const u8 *stack[MAX_STACK_DEPTH]; + const u8 *stack[STP_MAX_STACK_DEPTH]; }; static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; diff --git a/runtime/uprobes/.gitignore b/runtime/uprobes/.gitignore new file mode 100644 index 00000000..c8172c41 --- /dev/null +++ b/runtime/uprobes/.gitignore @@ -0,0 +1,7 @@ +/*.o +/*.cmd +/.tmp_versions +/Module.* +/modules.order +/uprobes.ko +/uprobes.mod.c diff --git a/runtime/uprobes/uprobes.c b/runtime/uprobes/uprobes.c index 9dfb82b9..27e923b8 100644 --- a/runtime/uprobes/uprobes.c +++ b/runtime/uprobes/uprobes.c @@ -1049,8 +1049,7 @@ fail_tsk: } EXPORT_SYMBOL_GPL(register_uprobe); -/* See Documentation/uprobes.txt. */ -void unregister_uprobe(struct uprobe *u) +void __unregister_uprobe(struct uprobe *u, bool remove_bkpt) { struct task_struct *p; struct uprobe_process *uproc; @@ -1104,10 +1103,13 @@ void unregister_uprobe(struct uprobe *u) if (!list_empty(&ppt->uprobe_list)) goto done; - /* - * The last uprobe at ppt's probepoint is being unregistered. - * Queue the breakpoint for removal. - */ + /* The last uprobe at ppt's probepoint is being unregistered. */ + if (!remove_bkpt) { + uprobe_free_probept(ppt); + goto done; + } + + /* Queue the breakpoint for removal. */ ppt->state = UPROBE_REMOVING; list_add_tail(&ppt->pd_node, &uproc->pending_uprobes); @@ -1132,8 +1134,20 @@ done: up_write(&uproc->rwsem); uprobe_put_process(uproc); } + +/* See Documentation/uprobes.txt. */ +void unregister_uprobe(struct uprobe *u) +{ + __unregister_uprobe(u, true); +} EXPORT_SYMBOL_GPL(unregister_uprobe); +void unmap_uprobe(struct uprobe *u) +{ + __unregister_uprobe(u, false); +} +EXPORT_SYMBOL_GPL(unmap_uprobe); + /* Find a surviving thread in uproc. Runs with uproc->rwsem locked. */ static struct task_struct *find_surviving_thread(struct uprobe_process *uproc) { @@ -2540,6 +2554,14 @@ void unregister_uretprobe(struct uretprobe *rp) } EXPORT_SYMBOL_GPL(unregister_uretprobe); +void unmap_uretprobe(struct uretprobe *rp) +{ + if (!rp) + return; + unmap_uprobe(&rp->u); +} +EXPORT_SYMBOL_GPL(unmap_uretprobe); + /* * uproc->ssol_area has been successfully set up. Establish the * uretprobe trampoline in slot 0. diff --git a/runtime/uprobes/uprobes.h b/runtime/uprobes/uprobes.h index 0266cb7d..d542420d 100644 --- a/runtime/uprobes/uprobes.h +++ b/runtime/uprobes/uprobes.h @@ -35,6 +35,9 @@ #include <linux/types.h> #include <linux/list.h> +/* Version 2 includes unmap_u[ret]probe(). */ +#define UPROBES_API_VERSION 2 + struct pt_regs; enum uprobe_type { @@ -89,6 +92,9 @@ extern void unregister_uprobe(struct uprobe *u); /* For runtime, assume uprobes support includes uretprobes. */ extern int register_uretprobe(struct uretprobe *rp); extern void unregister_uretprobe(struct uretprobe *rp); +/* For PRs 9940, 6852... */ +extern void unmap_uprobe(struct uprobe *u); +extern void unmap_uretprobe(struct uretprobe *rp); #ifdef UPROBES_IMPLEMENTATION diff --git a/runtime/uprobes/uprobes_i386.c b/runtime/uprobes/uprobes_i386.c index ffa088ed..008f32de 100644 --- a/runtime/uprobes/uprobes_i386.c +++ b/runtime/uprobes/uprobes_i386.c @@ -44,7 +44,7 @@ W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ - W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1), /* 90 */ + W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */ @@ -58,22 +58,22 @@ static const unsigned long good_2byte_insns[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ - W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 00 */ - W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */ - W(0x20, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */ - W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */ - W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */ - W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */ - W(0x60, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 60 */ - W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */ + W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1)| /* 00 */ + W(0x10, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* 10 */ + W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* 20 */ + W(0x30, 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */ + W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ + W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */ + W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 60 */ + W(0x70, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ W(0xa0, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* b0 */ - W(0xc0, 1,1,0,0,0,0,0,1,1,1,1,1,1,1,1,1)| /* c0 */ - W(0xd0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* d0 */ - W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */ - W(0xf0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0) /* f0 */ + W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* c0 */ + W(0xd0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* d0 */ + W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* e0 */ + W(0xf0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* f0 */ /* ------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; @@ -114,7 +114,6 @@ * 26, 2e, 36, 3e, - es:, cs:, ss:, ds: segment prefixes -- * but 64 and 65 (fs: and gs:) seems to be used, so we support them. * 67 - addr16 prefix - * 9b - wait/fwait * ce - into * f0 - lock prefix * f2, f3 - repnz, repz prefixes @@ -302,9 +301,20 @@ unsigned long arch_hijack_uret_addr(unsigned long trampoline_address, return orig_ret_addr; } +/* + * On x86_32, if a function returns a struct or union, the return + * value is copied into an area created by the caller. The address + * of this area is passed on the stack as a "hidden" first argument. + * When such a function returns, it uses a "ret $4" instruction to pop + * not only the return address but also the hidden arg. To accommodate + * such functions, we add 4 bytes of slop when predicting the return + * address. See PR #10078. + */ +#define STRUCT_RETURN_SLOP 4 + static unsigned long arch_predict_sp_at_ret(struct pt_regs *regs, struct task_struct *tsk) { - return (unsigned long) (regs->esp + 4); + return (unsigned long) (regs->esp + 4 + STRUCT_RETURN_SLOP); } diff --git a/runtime/uprobes/uprobes_x86.c b/runtime/uprobes/uprobes_x86.c index e3bdf8ff..93331715 100644 --- a/runtime/uprobes/uprobes_x86.c +++ b/runtime/uprobes/uprobes_x86.c @@ -45,8 +45,8 @@ static const unsigned long long good_insns_64[256 / 64] = { W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */ W(0x60, 0,0,0,1,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */ - W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ - W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */ + W(0x80, 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ + W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,0)| /* c0 */ @@ -71,7 +71,7 @@ static const unsigned long long good_insns_32[256 / 64] = { W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ - W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */ + W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */ @@ -119,7 +119,7 @@ static const unsigned long long good_2byte_insns[256 / 64] = { * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2 * * invalid opcodes in 64-bit mode: - * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, c4-c5, d4-d5 + * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5 * * 63 - we support this opcode in x86_64 but not in i386. * opcodes we may need to refine support for: @@ -141,7 +141,6 @@ static const unsigned long long good_2byte_insns[256 / 64] = { * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes -- * but 64 and 65 (fs: and gs:) seems to be used, so we support them. * 67 - addr16 prefix - * 9b - wait/fwait * ce - into * f0 - lock prefix */ @@ -717,12 +716,23 @@ unsigned long arch_hijack_uret_addr(unsigned long trampoline_address, return orig_ret_addr; } +/* + * On x86_32, if a function returns a struct or union, the return + * value is copied into an area created by the caller. The address + * of this area is passed on the stack as a "hidden" first argument. + * When such a function returns, it uses a "ret $4" instruction to pop + * not only the return address but also the hidden arg. To accommodate + * such functions, we add 4 bytes of slop when predicting the return + * address. See PR #10078. + */ +#define STRUCT_RETURN_SLOP 4 + static unsigned long arch_predict_sp_at_ret(struct pt_regs *regs, struct task_struct *tsk) { if (test_tsk_thread_flag(tsk, TIF_IA32)) - return (unsigned long) (REGS_SP + 4); + return (unsigned long) (REGS_SP + 4 + STRUCT_RETURN_SLOP); else return (unsigned long) (REGS_SP + 8); } diff --git a/runtime/uprobes/uprobes_x86_64.c b/runtime/uprobes/uprobes_x86_64.c index 8cf36623..56ebe2e1 100644 --- a/runtime/uprobes/uprobes_x86_64.c +++ b/runtime/uprobes/uprobes_x86_64.c @@ -45,8 +45,8 @@ static const unsigned long good_insns_64[256 / 64] = { W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */ W(0x60, 0,0,0,1,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */ - W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ - W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */ + W(0x80, 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ + W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,0)| /* c0 */ @@ -71,7 +71,7 @@ static const unsigned long good_insns_32[256 / 64] = { W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ - W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */ + W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */ @@ -141,7 +141,6 @@ static const unsigned long good_2byte_insns[256 / 64] = { * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes -- * but 64 and 65 (fs: and gs:) seems to be used, so we support them. * 67 - addr16 prefix - * 9b - wait/fwait * ce - into * f0 - lock prefix */ diff --git a/runtime/uprobes2/uprobes.c b/runtime/uprobes2/uprobes.c index af187fc9..07ad3984 100644 --- a/runtime/uprobes2/uprobes.c +++ b/runtime/uprobes2/uprobes.c @@ -29,6 +29,12 @@ #include <linux/utrace.h> #include <linux/regset.h> #define UPROBES_IMPLEMENTATION 1 + +/* PR9974: Adapt to struct renaming. */ +#ifdef UTRACE_API_VERSION +#define utrace_attached_engine utrace_engine +#endif + #include "uprobes.h" #include <linux/tracehook.h> #include <linux/mm.h> @@ -949,10 +955,15 @@ static int defer_registration(struct uprobe *u, int regflag, */ static struct pid *uprobe_get_tg_leader(pid_t p) { - struct pid *pid; + struct pid *pid = NULL; rcu_read_lock(); - pid = find_vpid(p); + /* + * We need this check because unmap_u[ret]probe() can be called + * from a report_death callback, where current->proxy is NULL. + */ + if (current->nsproxy) + pid = find_vpid(p); if (pid) { struct task_struct *t = pid_task(pid, PIDTYPE_PID); if (t) @@ -1132,8 +1143,7 @@ fail_tsk: } EXPORT_SYMBOL_GPL(register_uprobe); -/* See Documentation/uprobes.txt. */ -void unregister_uprobe(struct uprobe *u) +void __unregister_uprobe(struct uprobe *u, bool remove_bkpt) { struct pid *p; struct uprobe_process *uproc; @@ -1187,10 +1197,13 @@ void unregister_uprobe(struct uprobe *u) if (!list_empty(&ppt->uprobe_list)) goto done; - /* - * The last uprobe at ppt's probepoint is being unregistered. - * Queue the breakpoint for removal. - */ + /* The last uprobe at ppt's probepoint is being unregistered. */ + if (!remove_bkpt) { + uprobe_free_probept(ppt); + goto done; + } + + /* Queue the breakpoint for removal. */ ppt->state = UPROBE_REMOVING; list_add_tail(&ppt->pd_node, &uproc->pending_uprobes); @@ -1215,8 +1228,20 @@ done: up_write(&uproc->rwsem); uprobe_put_process(uproc, false); } + +/* See Documentation/uprobes.txt. */ +void unregister_uprobe(struct uprobe *u) +{ + __unregister_uprobe(u, true); +} EXPORT_SYMBOL_GPL(unregister_uprobe); +void unmap_uprobe(struct uprobe *u) +{ + __unregister_uprobe(u, false); +} +EXPORT_SYMBOL_GPL(unmap_uprobe); + /* Find a surviving thread in uproc. Runs with uproc->rwsem locked. */ static struct task_struct *find_surviving_thread(struct uprobe_process *uproc) { @@ -2214,7 +2239,8 @@ static u32 uprobe_report_exit(enum utrace_resume_action action, } } up_read(&uproc->rwsem); - if (utask->state == UPTASK_TRAMPOLINE_HIT) + if (utask->state == UPTASK_TRAMPOLINE_HIT || + utask->state == UPTASK_BP_HIT) uprobe_decref_process(uproc); } @@ -2712,6 +2738,14 @@ void unregister_uretprobe(struct uretprobe *rp) } EXPORT_SYMBOL_GPL(unregister_uretprobe); +void unmap_uretprobe(struct uretprobe *rp) +{ + if (!rp) + return; + unmap_uprobe(&rp->u); +} +EXPORT_SYMBOL_GPL(unmap_uretprobe); + /* * uproc->ssol_area has been successfully set up. Establish the * uretprobe trampoline in the next available slot following the diff --git a/runtime/uprobes2/uprobes.h b/runtime/uprobes2/uprobes.h index 11d01f5c..ae0692f0 100644 --- a/runtime/uprobes2/uprobes.h +++ b/runtime/uprobes2/uprobes.h @@ -23,6 +23,14 @@ #include <linux/types.h> #include <linux/list.h> +/* Adapt to struct renaming. */ +#ifdef UTRACE_API_VERSION +#define utrace_attached_engine utrace_engine +#endif + +/* Version 2 includes unmap_u[ret]probe(). */ +#define UPROBES_API_VERSION 2 + struct pt_regs; enum uprobe_type { @@ -77,6 +85,9 @@ extern void unregister_uprobe(struct uprobe *u); /* For runtime, assume uprobes support includes uretprobes. */ extern int register_uretprobe(struct uretprobe *rp); extern void unregister_uretprobe(struct uretprobe *rp); +/* For PRs 9940, 6852... */ +extern void unmap_uprobe(struct uprobe *u); +extern void unmap_uretprobe(struct uretprobe *rp); #ifdef UPROBES_IMPLEMENTATION diff --git a/runtime/uprobes2/uprobes_x86.c b/runtime/uprobes2/uprobes_x86.c index effb7444..8c80293d 100644 --- a/runtime/uprobes2/uprobes_x86.c +++ b/runtime/uprobes2/uprobes_x86.c @@ -50,8 +50,8 @@ static const u64 good_insns_64[256 / 64] = { W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */ W(0x60, 0,0,0,1,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */ - W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ - W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */ + W(0x80, 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ + W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,0)| /* c0 */ @@ -76,7 +76,7 @@ static const u64 good_insns_32[256 / 64] = { W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ - W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */ + W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */ @@ -124,7 +124,7 @@ static const u64 good_2byte_insns[256 / 64] = { * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2 * * invalid opcodes in 64-bit mode: - * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, c4-c5, d4-d5 + * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5 * * 63 - we support this opcode in x86_64 but not in i386. * opcodes we may need to refine support for: @@ -146,7 +146,6 @@ static const u64 good_2byte_insns[256 / 64] = { * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes -- * but 64 and 65 (fs: and gs:) seem to be used, so we support them * 67 - addr16 prefix - * 9b - wait/fwait * ce - into * f0 - lock prefix */ diff --git a/runtime/uprobes2/uprobes_x86.h b/runtime/uprobes2/uprobes_x86.h index ca3f4873..a07fa0d3 100644 --- a/runtime/uprobes2/uprobes_x86.h +++ b/runtime/uprobes2/uprobes_x86.h @@ -93,11 +93,22 @@ static inline unsigned long arch_get_cur_sp(struct pt_regs *regs) return (unsigned long) regs->sp; } +/* + * On x86_32, if a function returns a struct or union, the return + * value is copied into an area created by the caller. The address + * of this area is passed on the stack as a "hidden" first argument. + * When such a function returns, it uses a "ret $4" instruction to pop + * not only the return address but also the hidden arg. To accommodate + * such functions, we add 4 bytes of slop when predicting the return + * address. See PR #10078. + */ +#define STRUCT_RETURN_SLOP 4 + static inline unsigned long arch_predict_sp_at_ret(struct pt_regs *regs, struct task_struct *tsk) { if (test_tsk_thread_flag(tsk, TIF_IA32)) - return (unsigned long) (regs->sp + 4); + return (unsigned long) (regs->sp + 4 + STRUCT_RETURN_SLOP); else return (unsigned long) (regs->sp + 8); } diff --git a/runtime/utrace_compatibility.h b/runtime/utrace_compatibility.h index 00b841d2..5521a5c2 100644 --- a/runtime/utrace_compatibility.h +++ b/runtime/utrace_compatibility.h @@ -1,6 +1,6 @@ /* * utrace compatibility defines and inlines - * Copyright (C) 2008 Red Hat Inc. + * Copyright (C) 2008-2009 Red Hat Inc. * * This file is part of systemtap, and is free software. You can * redistribute it and/or modify it under the terms of the GNU General @@ -13,6 +13,11 @@ #include <linux/utrace.h> +/* PR9974: Adapt to struct renaming. */ +#ifdef UTRACE_API_VERSION +#define utrace_attached_engine utrace_engine +#endif + #ifdef UTRACE_ACTION_RESUME /* @@ -28,6 +33,8 @@ enum utrace_resume_action { UTRACE_STOP = UTRACE_ACTION_QUIESCE, UTRACE_RESUME = UTRACE_ACTION_RESUME, UTRACE_DETACH = UTRACE_ACTION_DETACH, + UTRACE_SINGLESTEP = UTRACE_ACTION_SINGLESTEP, + UTRACE_BLOCKSTEP = UTRACE_ACTION_BLOCKSTEP, }; static inline struct utrace_attached_engine * @@ -48,6 +55,11 @@ utrace_control(struct task_struct *target, case UTRACE_STOP: return utrace_set_flags(target, engine, (engine->flags | UTRACE_ACTION_QUIESCE)); + case UTRACE_SINGLESTEP: + case UTRACE_BLOCKSTEP: + return utrace_set_flags(target, engine, + engine->flags | action); + default: return -EINVAL; } diff --git a/runtime/vsprintf.c b/runtime/vsprintf.c index bd58d760..23810e75 100644 --- a/runtime/vsprintf.c +++ b/runtime/vsprintf.c @@ -12,6 +12,9 @@ #ifndef _VSPRINTF_C_ #define _VSPRINTF_C_ +//forward declaration for _stp_vsnprintf +static void * _stp_reserve_bytes (int); + static int skip_atoi(const char **s) { int i=0; @@ -22,6 +25,10 @@ static int skip_atoi(const char **s) enum print_flag {STP_ZEROPAD=1, STP_SIGN=2, STP_PLUS=4, STP_SPACE=8, STP_LEFT=16, STP_SPECIAL=32, STP_LARGE=64}; +/* + * Changes to number() will require a corresponding change to number_size below, + * to ensure proper buffer allocation for _stp_printf. + */ static char * number(char * buf, char * end, uint64_t num, int base, int size, int precision, enum print_flag type) { char c,sign,tmp[66]; @@ -115,6 +122,85 @@ static char * number(char * buf, char * end, uint64_t num, int base, int size, i return buf; } +/* + * Calculates the number of bytes required to print the paramater num. A change to + * number() requires a corresponding change here, and vice versa, to ensure the + * calculated size and printed size match. + */ +static int number_size(uint64_t num, int base, int size, int precision, enum print_flag type) { + char c,sign,tmp[66]; + const char *digits; + static const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + static const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + int i, num_bytes = 0; + + digits = (type & STP_LARGE) ? large_digits : small_digits; + if (type & STP_LEFT) + type &= ~STP_ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & STP_ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & STP_SIGN) { + if ((int64_t) num < 0) { + sign = '-'; + num = - (int64_t) num; + size--; + } else if (type & STP_PLUS) { + sign = '+'; + size--; + } else if (type & STP_SPACE) { + sign = ' '; + size--; + } + } + if (type & STP_SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else while (num != 0) + tmp[i++] = digits[do_div(num,base)]; + if (i > precision) + precision = i; + size -= precision; + if (!(type&(STP_ZEROPAD+STP_LEFT))) { + while(size-->0) { + num_bytes++; + } + } + if (sign) { + num_bytes++; + } + if (type & STP_SPECIAL) { + if (base==8) { + num_bytes++; + } else if (base==16) { + num_bytes+=2; + } + } + if (!(type & STP_LEFT)) { + while (size-- > 0) { + num_bytes++; + } + } + while (i < precision--) { + num_bytes++; + } + while (i-- > 0) { + num_bytes++; + } + while (size-- > 0) { + num_bytes++; + } + return num_bytes; + +} + static int check_binary_precision (int precision) { /* precision can be unspecified (-1) or one of 1, 2, 4 or 8. */ switch (precision) { @@ -148,9 +234,260 @@ static int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) if (unlikely((int) size < 0)) return 0; - str = buf; - end = buf + size - 1; + /* + * buf will be NULL when this function is called from _stp_printf. + * This branch calculates the exact size print buffer required for + * the string and allocates it with _stp_reserve_bytes. A change + * to this branch requires a corresponding change to the same + * section of code below. + */ + if (buf == NULL) { + const char* fmt_copy = fmt; + int num_bytes = 0; + va_list args_copy; + + va_copy(args_copy, args); + + for (; *fmt_copy ; ++fmt_copy) { + if (*fmt_copy != '%') { + num_bytes++; + continue; + } + + /* process flags */ + flags = 0; + repeat_copy: + ++fmt_copy; /* this also skips first '%' */ + switch (*fmt_copy) { + case '-': flags |= STP_LEFT; goto repeat_copy; + case '+': flags |= STP_PLUS; goto repeat_copy; + case ' ': flags |= STP_SPACE; goto repeat_copy; + case '#': flags |= STP_SPECIAL; goto repeat_copy; + case '0': flags |= STP_ZEROPAD; goto repeat_copy; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt_copy)) + field_width = skip_atoi(&fmt_copy); + else if (*fmt_copy == '*') { + ++fmt_copy; + /* it's the next argument */ + field_width = va_arg(args_copy, int); + if (field_width < 0) { + field_width = -field_width; + flags |= STP_LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt_copy == '.') { + ++fmt_copy; + if (isdigit(*fmt_copy)) + precision = skip_atoi(&fmt_copy); + else if (*fmt_copy == '*') { + ++fmt_copy; + /* it's the next argument */ + precision = va_arg(args_copy, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt_copy == 'h' || *fmt_copy == 'l' || *fmt_copy == 'L') { + qualifier = *fmt_copy; + ++fmt_copy; + if (qualifier == 'l' && *fmt_copy == 'l') { + qualifier = 'L'; + ++fmt_copy; + } + } + + /* default base */ + base = 10; + + switch (*fmt_copy) { + case 'b': + num = va_arg(args_copy, int64_t); + + /* Only certain values are valid for the precision. */ + precision = check_binary_precision (precision); + + /* Unspecified field width defaults to the specified + precision and vice versa. If neither is specified, + then both default to 8. */ + if (field_width == -1) { + if (precision == -1) { + field_width = 8; + precision = 8; + } + else + field_width = precision; + } + else if (precision == -1) { + precision = check_binary_precision (field_width); + if (precision == -1) + precision = 8; + } + + len = precision; + if (!(flags & STP_LEFT)) { + while (len < field_width--) { + num_bytes++; + } + } + + num_bytes += precision; + + while (len < field_width--) + num_bytes++; + + continue; + + case 's': + case 'M': + case 'm': + s = va_arg(args_copy, char *); + if ((unsigned long)s < PAGE_SIZE) + s = "<NULL>"; + + if (*fmt_copy == 's') + len = strnlen(s, precision); + else if (precision > 0) + len = precision; + else + len = 1; + + if (*fmt_copy == 'M') + len = len * 2; /* hex dump print size */ + + if (!(flags & STP_LEFT)) { + while (len < field_width--) { + num_bytes++; + } + } + + num_bytes += len; + + while (len < field_width--) { + num_bytes++; + } + if(flags & STP_ZEROPAD) { + num_bytes++; + } + continue; + case 'X': + flags |= STP_LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= STP_SIGN; + case 'u': + break; + + case 'p': + /* Note that %p takes an int64_t argument. */ + len = 2*sizeof(void *) + 2; + flags |= STP_ZEROPAD; + + if (field_width == -1) + field_width = len; + + if (!(flags & STP_LEFT)) { + while (len < field_width) { + field_width--; + num_bytes++; + } + } + + //account for "0x" + num_bytes+=2; + field_width-=2; + + num_bytes += number_size((unsigned long) va_arg(args_copy, int64_t), + 16, field_width, field_width, flags); + continue; + + case '%': + num_bytes++; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'c': + if (!(flags & STP_LEFT)) { + while (--field_width > 0) { + num_bytes++; + } + } + c = (unsigned char) va_arg(args_copy, int); + num_bytes++; + while (--field_width > 0) { + num_bytes++; + } + continue; + + default: + num_bytes++; + if (*fmt_copy) { + num_bytes++; + } else { + --fmt_copy; + } + continue; + } + + if (qualifier == 'L') + num = va_arg(args_copy, int64_t); + else if (qualifier == 'l') { + num = va_arg(args_copy, unsigned long); + if (flags & STP_SIGN) + num = (signed long) num; + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args_copy, int); + if (flags & STP_SIGN) + num = (signed short) num; + } else { + num = va_arg(args_copy, unsigned int); + if (flags & STP_SIGN) + num = (signed int) num; + } + num_bytes += number_size(num, base, field_width, precision, flags); + } + + va_end(args_copy); + + if (num_bytes == 0) + return 0; + + //max print buffer size + if (num_bytes > STP_BUFFER_SIZE) { + num_bytes = STP_BUFFER_SIZE; + } + + str = (char*)_stp_reserve_bytes(num_bytes); + size = num_bytes; + end = str + size - 1; + + } else { + str = buf; + end = buf + size - 1; + } + /* + * Note that a change to code below requires a corresponding + * change in the code above to properly calculate the bytes + * required in the output buffer. + */ for (; *fmt ; ++fmt) { if (*fmt != '%') { if (str <= end) @@ -297,16 +634,25 @@ static int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) len = 1; if (!(flags & STP_LEFT)) { - while (len < field_width--) { + int actlen = len; + if (*fmt == 'M') + actlen = len * 2; + while (actlen < field_width--) { if (str <= end) *str = ' '; ++str; } } - if (*fmt == 'M') { - str = number(str, str + len - 1 < end ? str + len - 1 : end, - (unsigned long) *(uint64_t *) s, - 16, field_width, len, flags); + if (*fmt == 'M') { /* stolen from kernel: trace_seq_putmem_hex() */ + const char _stp_hex_asc[] = "0123456789abcdef"; + int j; + for (i = 0, j = 0; i < len; i++) { + *str = _stp_hex_asc[((*s) & 0xf0) >> 4]; + str++; + *str = _stp_hex_asc[((*s) & 0x0f)]; + str++; s++; + } + len = len * 2; /* the actual length */ } else { for (i = 0; i < len; ++i) { @@ -433,11 +779,13 @@ static int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) field_width, precision, flags); } - if (likely(str <= end)) - *str = '\0'; - else if (size > 0) - /* don't write out a null byte if the buf size is zero */ - *end = '\0'; + if (buf != NULL) { + if (likely(str <= end)) + *str = '\0'; + else if (size > 0) + /* don't write out a null byte if the buf size is zero */ + *end = '\0'; + } return str-buf; } |