From 52aeb26b8d83c26e00adaf70bbf5a3a828689fb2 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Thu, 12 Mar 2009 17:12:38 -0700 Subject: PR9947: move runtime cleanup out of the work queue The kernel lockdep checking found a possible deadlock if a forced rmmod tried to destroy _stp_work_queue at the same time that the work queue was unregistering tracepoints. An unlikely scenario, but still possible. Now the work queue will just issue a STP_REQUEST_EXIT down to usermode, and usermode will echo back an STP_EXIT that triggers the actual probe cleanup. This way the unregistrations are happening in exactly the same context as the registrations were. --- runtime/staprun/mainloop.c | 8 ++++++++ runtime/transport/control.c | 7 ++++++- runtime/transport/transport.c | 20 ++++++++++++++++---- runtime/transport/transport_msgs.h | 8 +++++--- 4 files changed, 35 insertions(+), 8 deletions(-) (limited to 'runtime') diff --git a/runtime/staprun/mainloop.c b/runtime/staprun/mainloop.c index 29eb4f1f..db6ef6b7 100644 --- a/runtime/staprun/mainloop.c +++ b/runtime/staprun/mainloop.c @@ -477,6 +477,14 @@ int stp_main_loop(void) cleanup_and_exit(0); break; } + case STP_REQUEST_EXIT: + { + /* module asks us to start exiting, so send STP_EXIT */ + dbug(2, "got STP_REQUEST_EXIT\n"); + int32_t rc, btype = STP_EXIT; + rc = write(control_channel, &btype, sizeof(btype)); + break; + } case STP_START: { struct _stp_msg_start *t = (struct _stp_msg_start *)data; diff --git a/runtime/transport/control.c b/runtime/transport/control.c index edde244d..680d7306 100644 --- a/runtime/transport/control.c +++ b/runtime/transport/control.c @@ -13,6 +13,8 @@ static _stp_mempool_t *_stp_pool_q; static struct list_head _stp_ctl_ready_q; static DEFINE_SPINLOCK(_stp_ctl_ready_lock); +static void _stp_cleanup_and_exit(int send_exit); + static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { u32 type; @@ -46,7 +48,7 @@ static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, siz } break; case STP_EXIT: - _stp_exit_flag = 1; + _stp_cleanup_and_exit(1); break; case STP_BULK: #ifdef STP_BULKMODE @@ -93,6 +95,9 @@ static void _stp_ctl_write_dbug(int type, void *data, int len) case STP_TRANSPORT: _dbug("sending STP_TRANSPORT\n"); break; + case STP_REQUEST_EXIT: + _dbug("sending STP_REQUEST_EXIT\n"); + break; default: _dbug("ERROR: unknown message type: %d\n", type); break; diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c index 7fcebd42..762c0a92 100644 --- a/runtime/transport/transport.c +++ b/runtime/transport/transport.c @@ -25,7 +25,6 @@ static struct utt_trace *_stp_utt = NULL; static unsigned int utt_seq = 1; static int _stp_probes_started = 0; static pid_t _stp_target = 0; -static int _stp_exit_called = 0; static int _stp_exit_flag = 0; #include "control.h" #ifdef STP_OLD_TRANSPORT @@ -89,13 +88,14 @@ static void _stp_handle_start(struct _stp_msg_start *st) /* when someone does /sbin/rmmod on a loaded systemtap module. */ static void _stp_cleanup_and_exit(int send_exit) { - if (!_stp_exit_called) { + static int called = 0; + if (!called) { int failures; dbug_trans(1, "cleanup_and_exit (%d)\n", send_exit); _stp_exit_flag = 1; /* we only want to do this stuff once */ - _stp_exit_called = 1; + called = 1; if (_stp_probes_started) { dbug_trans(1, "calling probe_exit\n"); @@ -119,6 +119,18 @@ static void _stp_cleanup_and_exit(int send_exit) } } +static void _stp_request_exit(void) +{ + static int called = 0; + if (!called) { + /* we only want to do this once */ + called = 1; + dbug_trans(1, "ctl_send STP_REQUEST_EXIT\n"); + _stp_ctl_send(STP_REQUEST_EXIT, NULL, 0); + dbug_trans(1, "done with ctl_send STP_REQUEST_EXIT\n"); + } +} + /* * Called when stapio closes the control channel. */ @@ -169,7 +181,7 @@ static void _stp_work_queue(void *data) /* if exit flag is set AND we have finished with probe_start() */ if (unlikely(_stp_exit_flag && _stp_probes_started)) - _stp_cleanup_and_exit(1); + _stp_request_exit(); if (likely(_stp_attached)) queue_delayed_work(_stp_wq, &_stp_work, STP_WORK_TIMER); } diff --git a/runtime/transport/transport_msgs.h b/runtime/transport/transport_msgs.h index 596f4925..0d9a5983 100644 --- a/runtime/transport/transport_msgs.h +++ b/runtime/transport/transport_msgs.h @@ -21,19 +21,20 @@ struct _stp_trace { enum { STP_START, - STP_EXIT, + STP_EXIT, STP_OOB_DATA, STP_SYSTEM, STP_TRANSPORT, STP_CONNECT, - STP_DISCONNECT, + STP_DISCONNECT, STP_BULK, STP_READY, - STP_RELOCATION, + STP_RELOCATION, /** deprecated STP_OLD_TRANSPORT **/ STP_BUF_INFO, STP_SUBBUFS_CONSUMED, STP_REALTIME_DATA, + STP_REQUEST_EXIT, STP_MAX_CMD }; @@ -52,6 +53,7 @@ static const char *_stp_command_name[] = { "STP_BUF_INFO", "STP_SUBBUFS_CONSUMED", "STP_REALTIME_DATA", + "STP_REQUEST_EXIT", }; #endif /* DEBUG_TRANS */ -- cgit From 436b47f678c2fc5397ed66a1eddf6b419cc6585b Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Fri, 13 Mar 2009 16:22:04 -0700 Subject: Move lookup_bad_addr call in STAPCONF_PROBE_KERNEL With most of the implementations, kread/kwrite call deref/store_deref, and so it makes sense to have lookup_bad_addr in the latter as an underlying address check. However, in the STAPCONF_PROBE_KERNEL case that uses probe_kernel_read and probe_kernel_write, the roles are reversed, so lookup_bad_addr needs to be in kread/kwrite. Also note that __deref_bad and __store_deref_bad should only be used in cases that can be determined at compile time. These turn into invalid symbols which prevent the module from loading. (They might be better replaced with compile-time assertions.) --- runtime/loc2c-runtime.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'runtime') diff --git a/runtime/loc2c-runtime.h b/runtime/loc2c-runtime.h index 0af19edc..92c017d3 100644 --- a/runtime/loc2c-runtime.h +++ b/runtime/loc2c-runtime.h @@ -187,22 +187,22 @@ #define kread(ptr) ({ \ typeof(*(ptr)) _v; \ - if (probe_kernel_read((void *)&_v, (void *)(ptr), sizeof(*(ptr)))) \ - DEREF_FAULT(ptr); \ + if (lookup_bad_addr((unsigned long)(ptr)) || \ + probe_kernel_read((void *)&_v, (void *)(ptr), sizeof(*(ptr)))) \ + DEREF_FAULT(ptr); \ _v; \ }) #define kwrite(ptr, value) ({ \ typeof(*(ptr)) _v; \ _v = (typeof(*(ptr)))(value); \ - if (probe_kernel_write((void *)(ptr), (void *)&_v, sizeof(*(ptr)))) \ - STORE_DEREF_FAULT(ptr); \ + if (lookup_bad_addr((unsigned long)addr) || \ + probe_kernel_write((void *)(ptr), (void *)&_v, sizeof(*(ptr)))) \ + STORE_DEREF_FAULT(ptr); \ }) #define deref(size, addr) ({ \ intptr_t _i; \ - if (lookup_bad_addr((unsigned long)addr)) \ - __deref_bad(); \ switch (size) { \ case 1: _i = kread((u8 *)(addr)); break; \ case 2: _i = kread((u16 *)(addr)); break; \ @@ -215,8 +215,6 @@ }) #define store_deref(size, addr, value) ({ \ - if (lookup_bad_addr((unsigned long)addr)) \ - __store_deref_bad(); \ switch (size) { \ case 1: kwrite((u8 *)(addr), (value)); break; \ case 2: kwrite((u16 *)(addr), (value)); break; \ -- cgit From bb64f40b58a64a9ae065dba5058463ac604c3896 Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Sun, 15 Mar 2009 15:29:01 +0100 Subject: Move vma module tracking from pr6866 branch to master. * tapsets.cxx (utrace_derived_probe_group::emit_module_decls): Always emit vm callback probe for __stp_tf_vm_cb. * runtime/task_finder.c (__stp_tf_vm_cb): Always expose, move _stp_dbug statements under ifdef DEBUG_TASK_FINDER_VMA. Find and record corresponding module when vm_path not NULL. * runtime/task_finder_vma.c (struct __stp_tf_vma_entry): Add _stp_module. (stap_add_vma_map_info): Add _stp_module argument and assign. (__stp_tf_get_vma_entry_addr): New static function to get the __stp_tf_vma_entry given an address. --- runtime/task_finder.c | 22 ++++++++++++++++------ runtime/task_finder_vma.c | 30 +++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 7 deletions(-) (limited to 'runtime') diff --git a/runtime/task_finder.c b/runtime/task_finder.c index 9db713c3..ae381a41 100644 --- a/runtime/task_finder.c +++ b/runtime/task_finder.c @@ -55,7 +55,6 @@ typedef int (*stap_task_finder_vm_callback)(struct stap_task_finder_target *tgt, unsigned long vm_end, unsigned long vm_pgoff); -#ifdef DEBUG_TASK_FINDER_VMA static int __stp_tf_vm_cb(struct stap_task_finder_target *tgt, struct task_struct *tsk, int map_p, char *vm_path, @@ -63,21 +62,32 @@ static int __stp_tf_vm_cb(struct stap_task_finder_target *tgt, unsigned long vm_end, unsigned long vm_pgoff) { + int i; +#ifdef DEBUG_TASK_FINDER_VMA _stp_dbug(__FUNCTION__, __LINE__, "vm_cb: tsk %d:%d path %s, start 0x%08lx, end 0x%08lx, offset 0x%lx\n", tsk->pid, map_p, vm_path, vm_start, vm_end, vm_pgoff); +#endif if (map_p) { - // FIXME: What should we do with vm_path? We can't save - // the vm_path pointer itself, but we don't have any - // storage space allocated to save it in... - stap_add_vma_map_info(tsk, vm_start, vm_end, vm_pgoff); + struct _stp_module *module = NULL; + if (vm_path != NULL) + for (i = 0; i < _stp_num_modules; i++) + if (strcmp(vm_path, _stp_modules[i]->name) == 0) + { +#ifdef DEBUG_TASK_FINDER_VMA + _stp_dbug(__FUNCTION__, __LINE__, + "vm_cb: matched path %s to module\n", vm_path); +#endif + module = _stp_modules[i]; + break; + } + stap_add_vma_map_info(tsk, vm_start, vm_end, vm_pgoff, module); } else { stap_remove_vma_map_info(tsk, vm_start, vm_end, vm_pgoff); } return 0; } -#endif struct stap_task_finder_target { /* private: */ diff --git a/runtime/task_finder_vma.c b/runtime/task_finder_vma.c index 4dce4be8..87a32fe5 100644 --- a/runtime/task_finder_vma.c +++ b/runtime/task_finder_vma.c @@ -25,6 +25,9 @@ struct __stp_tf_vma_entry { unsigned long vm_end; unsigned long vm_pgoff; // Is that enough? Should we store a dcookie for vm_file? + + // Module that this vma entry is mapped from, if any. + struct _stp_module *module; }; static struct __stp_tf_vma_entry @@ -211,7 +214,8 @@ __stp_tf_get_vma_map_entry_internal(struct task_struct *tsk, // Add the vma info to the vma map hash table. static int stap_add_vma_map_info(struct task_struct *tsk, unsigned long vm_start, - unsigned long vm_end, unsigned long vm_pgoff) + unsigned long vm_end, unsigned long vm_pgoff, + struct _stp_module *module) { struct hlist_head *head; struct hlist_node *node; @@ -242,6 +246,7 @@ stap_add_vma_map_info(struct task_struct *tsk, unsigned long vm_start, entry->vm_start = vm_start; entry->vm_end = vm_end; entry->vm_pgoff = vm_pgoff; + entry->module = module; head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)]; hlist_add_head(&entry->hlist, head); @@ -305,3 +310,26 @@ stap_find_vma_map_info(struct task_struct *tsk, unsigned long vm_addr, mutex_unlock(&__stp_tf_vma_mutex); return rc; } + +// Get vma_entry of the address (vm_start/vm_end) if the vma is +// present in the vma hash table containing. +// Returns NULL if not present. +static struct __stp_tf_vma_entry * +__stp_tf_get_vma_entry_addr(struct task_struct *tsk, unsigned long addr) +{ + struct hlist_head *head; + struct hlist_node *node; + struct __stp_tf_vma_entry *entry; + + mutex_lock(&__stp_tf_vma_mutex); + head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)]; + hlist_for_each_entry(entry, node, head, hlist) { + if (tsk->pid == entry->pid + && addr >= entry->vm_start && addr < entry->vm_end) { + mutex_unlock(&__stp_tf_vma_mutex); + return entry; + } + } + mutex_unlock(&__stp_tf_vma_mutex); + return NULL; +} -- cgit