diff options
30 files changed, 1290 insertions, 237 deletions
diff --git a/runtime/debug.h b/runtime/debug.h index e8b2e701..ce0c3943 100644 --- a/runtime/debug.h +++ b/runtime/debug.h @@ -14,7 +14,6 @@ * _dbug() writes to systemtap stderr. * errk() writes to the system log. */ -static int _stp_transport_state = 0; #define _dbug(args...) _stp_dbug(__FUNCTION__, __LINE__, args) diff --git a/runtime/print.c b/runtime/print.c index 964a73c2..d51c8108 100644 --- a/runtime/print.c +++ b/runtime/print.c @@ -13,6 +13,8 @@ #include "string.h" +#include "vsprintf.c" +#include "print.h" #include "transport/transport.c" #include "vsprintf.c" @@ -86,14 +88,8 @@ static void _stp_print_cleanup (void) #endif #if !defined(RELAY_GUEST) -/* The relayfs API changed between 2.6.15 and 2.6.16. */ -/* Use the appropriate print flush function. */ -#ifdef STP_OLD_TRANSPORT -#include "print_old.c" -#else #include "print_new.c" -#endif #if defined(RELAY_HOST) EXPORT_SYMBOL_GPL(EXPORT_FN(stp_print_flush)); #endif diff --git a/runtime/print.h b/runtime/print.h new file mode 100644 index 00000000..d5c588a3 --- /dev/null +++ b/runtime/print.h @@ -0,0 +1,16 @@ +/* -*- linux-c -*- + * Copyright (C) 2009 Red Hat Inc. + * + * This file is part of systemtap, and is free software. You can + * redistribute it and/or modify it under the terms of the GNU General + * Public License (GPL); either version 2, or (at your option) any + * later version. + */ + +#ifndef _STP_PRINT_H_ +#define _STP_PRINT_H_ + +static int _stp_print_init(void); +static void _stp_print_cleanup(void); + +#endif /* _STP_PRINT_H_ */ diff --git a/runtime/print_new.c b/runtime/print_new.c index fa7b4727..2d5a6e10 100644 --- a/runtime/print_new.c +++ b/runtime/print_new.c @@ -18,50 +18,113 @@ static DEFINE_SPINLOCK(_stp_print_lock); -void EXPORT_FN(stp_print_flush) (_stp_pbuf *pb) +void EXPORT_FN(stp_print_flush)(_stp_pbuf *pb) { - uint32_t len = pb->len; + size_t len = pb->len; + void *entry = NULL; /* check to see if there is anything in the buffer */ - if (likely (len == 0)) + dbug_trans(1, "len = %zu\n", len); + if (likely(len == 0)) return; pb->len = 0; - if (unlikely(!_stp_utt || _stp_utt->trace_state != Utt_trace_running)) - return; +//DRS FIXME: this digs down too deep in internals +// if (unlikely(!_stp_utt || _stp_utt->trace_state != Utt_trace_running)) +// return; #ifdef STP_BULKMODE - { #ifdef NO_PERCPU_HEADERS - void *buf = utt_reserve(_stp_utt, len); - if (likely(buf)) - memcpy(buf, pb->buf, len); - else - atomic_inc (&_stp_transport_failures); -#else - void *buf = utt_reserve(_stp_utt, - sizeof(struct _stp_trace) + len); - if (likely(buf)) { - struct _stp_trace t = { .sequence = _stp_seq_inc(), - .pdu_len = len}; - memcpy(buf, &t, sizeof(t)); // prevent unaligned access - memcpy(buf + sizeof(t), pb->buf, len); - } else - atomic_inc (&_stp_transport_failures); -#endif - } -#else { - void *buf; + char *bufp = pb->buf; + + while (len > 0) { + size_t bytes_reserved; + + bytes_reserved = _stp_data_write_reserve(len, &entry); + if (likely(entry && bytes_reserved > 0)) { + memcpy(_stp_data_entry_data(entry), bufp, + bytes_reserved); + _stp_data_write_commit(entry); + bufp += bytes_reserved; + len -= bytes_reserved; + } + else { + atomic_inc(&_stp_transport_failures); + break; + } + } + } + +#else /* !NO_PERCPU_HEADERS */ + + { + char *bufp = pb->buf; + struct _stp_trace t = { .sequence = _stp_seq_inc(), + .pdu_len = len}; + size_t bytes_reserved; + + bytes_reserved = _stp_data_write_reserve(sizeof(struct _stp_trace), &entry); + if (likely(entry && bytes_reserved > 0)) { + /* prevent unaligned access by using memcpy() */ + memcpy(_stp_data_entry_data(entry), &t, sizeof(t)); + _stp_data_write_commit(entry); + } + else { + atomic_inc(&_stp_transport_failures); + return; + } + + while (len > 0) { + bytes_reserved = _stp_data_write_reserve(len, &entry); + if (likely(entry && bytes_reserved > 0)) { + memcpy(_stp_data_entry_data(entry), bufp, + bytes_reserved); + _stp_data_write_commit(entry); + bufp += bytes_reserved; + len -= bytes_reserved; + } + else { + atomic_inc(&_stp_transport_failures); + break; + } + } + } +#endif /* !NO_PERCPU_HEADERS */ + +#else /* !STP_BULKMODE */ + +#if STP_TRANSPORT_VERSION == 1 + + if (unlikely(_stp_ctl_write(STP_REALTIME_DATA, pb->buf, len) <= 0)) + atomic_inc (&_stp_transport_failures); + +#else /* STP_TRANSPORT_VERSION != 1 */ + { unsigned long flags; + char *bufp = pb->buf; + + dbug_trans(1, "calling _stp_data_write...\n"); spin_lock_irqsave(&_stp_print_lock, flags); - buf = utt_reserve(_stp_utt, len); - if (likely(buf)) - memcpy(buf, pb->buf, len); - else - atomic_inc (&_stp_transport_failures); + while (len > 0) { + size_t bytes_reserved; + + bytes_reserved = _stp_data_write_reserve(len, &entry); + if (likely(entry && bytes_reserved > 0)) { + memcpy(_stp_data_entry_data(entry), bufp, + bytes_reserved); + _stp_data_write_commit(entry); + bufp += bytes_reserved; + len -= bytes_reserved; + } + else { + atomic_inc(&_stp_transport_failures); + break; + } + } spin_unlock_irqrestore(&_stp_print_lock, flags); } -#endif /* STP_BULKMODE */ +#endif /* STP_TRANSPORT_VERSION != 1 */ +#endif /* !STP_BULKMODE */ } diff --git a/runtime/runtime.h b/runtime/runtime.h index c2e927cc..7418d13b 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -34,9 +34,14 @@ #if !defined (CONFIG_DEBUG_FS) && !defined (CONFIG_DEBUG_FS_MODULE) #error "DebugFS is required and was not found in the kernel." #endif +#ifdef CONFIG_RING_BUFFER +#define STP_TRANSPORT_VERSION 3 +#else +#define STP_TRANSPORT_VERSION 2 +#endif #else /* older kernels have no debugfs and older version of relayfs. */ -#define STP_OLD_TRANSPORT +#define STP_TRANSPORT_VERSION 1 #endif #ifndef stp_for_each_cpu @@ -45,6 +50,9 @@ static void _stp_dbug (const char *func, int line, const char *fmt, ...); static void _stp_error (const char *fmt, ...); +static void _stp_warn (const char *fmt, ...); + +static void _stp_exit(void); #include "debug.h" diff --git a/runtime/staprun/mainloop.c b/runtime/staprun/mainloop.c index 205fdf37..7125a7bb 100644 --- a/runtime/staprun/mainloop.c +++ b/runtime/staprun/mainloop.c @@ -487,7 +487,7 @@ int stp_main_loop(void) nb -= sizeof(uint32_t); switch (type) { -#ifdef STP_OLD_TRANSPORT +#if STP_TRANSPORT_VERSION == 1 case STP_REALTIME_DATA: if (write_realtime_data(data, nb)) { _perr("write error (nb=%ld)", (long)nb); diff --git a/runtime/staprun/staprun.h b/runtime/staprun/staprun.h index acc533b2..bd6402e4 100644 --- a/runtime/staprun/staprun.h +++ b/runtime/staprun/staprun.h @@ -94,8 +94,9 @@ extern char *__name__; /* Grabbed from linux/module.h kernel include. */ #define MODULE_NAME_LEN (64 - sizeof(unsigned long)) -/* we define this so we are compatible with old transport, but we don't have to use it. */ -#define STP_OLD_TRANSPORT +/* We define this so we are compatible with old transport, but we + * don't have to use it. */ +#define STP_TRANSPORT_VERSION 1 #include "../transport/transport_msgs.h" #define RELAYFS_MAGIC 0xF0B4A981 diff --git a/runtime/sym.c b/runtime/sym.c index 63dad1af..386005b2 100644 --- a/runtime/sym.c +++ b/runtime/sym.c @@ -12,6 +12,7 @@ #ifndef _STP_SYM_C_ #define _STP_SYM_C_ +#include "sym.h" #include "string.c" #include "task_finder_vma.c" diff --git a/runtime/transport/control.c b/runtime/transport/control.c index 680d7306..a1624152 100644 --- a/runtime/transport/control.c +++ b/runtime/transport/control.c @@ -9,6 +9,11 @@ * later version. */ +#include "control.h" +#include "../mempool.c" +#include "symbols.c" +#include <linux/delay.h> + static _stp_mempool_t *_stp_pool_q; static struct list_head _stp_ctl_ready_q; static DEFINE_SPINLOCK(_stp_ctl_ready_lock); @@ -197,7 +202,7 @@ static ssize_t _stp_ctl_read_cmd(struct file *file, char __user *buf, static int _stp_ctl_open_cmd(struct inode *inode, struct file *file) { - if (_stp_attached) + if (_stp_ctl_attached) return -1; _stp_attach(); return 0; @@ -205,7 +210,7 @@ static int _stp_ctl_open_cmd(struct inode *inode, struct file *file) static int _stp_ctl_close_cmd(struct inode *inode, struct file *file) { - if (_stp_attached) + if (_stp_ctl_attached) _stp_detach(); return 0; } diff --git a/runtime/transport/control.h b/runtime/transport/control.h index 5e7204ee..48289276 100644 --- a/runtime/transport/control.h +++ b/runtime/transport/control.h @@ -15,7 +15,6 @@ #include <linux/spinlock.h> #include <linux/list.h> -static _stp_mempool_t *_stp_pool_q; static struct list_head _stp_ctl_ready_q; static spinlock_t _stp_ctl_ready_lock; static wait_queue_head_t _stp_ctl_wq; diff --git a/runtime/transport/debugfs.c b/runtime/transport/debugfs.c index 85ee604d..7a08982a 100644 --- a/runtime/transport/debugfs.c +++ b/runtime/transport/debugfs.c @@ -10,6 +10,7 @@ */ #include <linux/debugfs.h> +#include "transport.h" #define STP_DEFAULT_BUFFERS 50 @@ -22,13 +23,14 @@ static struct dentry *_stp_cmd_file = NULL; static int _stp_register_ctl_channel_fs(void) { - if (_stp_utt == NULL) { - errk("_expected _stp_utt to be set.\n"); + struct dentry *module_dir = _stp_get_module_dir(); + if (module_dir == NULL) { + errk("no module directory found.\n"); return -1; } /* create [debugfs]/systemtap/module_name/.cmd */ - _stp_cmd_file = debugfs_create_file(".cmd", 0600, _stp_utt->dir, + _stp_cmd_file = debugfs_create_file(".cmd", 0600, module_dir, NULL, &_stp_ctl_fops_cmd); if (_stp_cmd_file == NULL) { errk("Error creating systemtap debugfs entries.\n"); diff --git a/runtime/transport/procfs.c b/runtime/transport/procfs.c index 6afbdea1..9e05cc14 100644 --- a/runtime/transport/procfs.c +++ b/runtime/transport/procfs.c @@ -14,7 +14,6 @@ #define STP_DEFAULT_BUFFERS 256 #ifdef STP_BULKMODE -extern int _stp_relay_flushing; /* handle the per-cpu subbuf info read for relayfs */ static ssize_t _stp_proc_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -23,13 +22,13 @@ static ssize_t _stp_proc_read(struct file *file, char __user *buf, size_t count, int cpu = *(int *)(PDE(file->f_dentry->d_inode)->data); - if (!_stp_utt->rchan) + if (!_stp_relay_data.rchan) return -EINVAL; out.cpu = cpu; - out.produced = atomic_read(&_stp_utt->rchan->buf[cpu]->subbufs_produced); - out.consumed = atomic_read(&_stp_utt->rchan->buf[cpu]->subbufs_consumed); - out.flushing = _stp_relay_flushing; + out.produced = atomic_read(&_stp_relay_data.rchan->buf[cpu]->subbufs_produced); + out.consumed = atomic_read(&_stp_relay_data.rchan->buf[cpu]->subbufs_consumed); + out.flushing = _stp_relay_data.flushing; num = sizeof(out); if (copy_to_user(buf, &out, num)) @@ -46,7 +45,7 @@ static ssize_t _stp_proc_write(struct file *file, const char __user *buf, size_t if (copy_from_user(&info, buf, count)) return -EFAULT; - relay_subbufs_consumed(_stp_utt->rchan, cpu, info.consumed); + relay_subbufs_consumed(_stp_relay_data.rchan, cpu, info.consumed); return count; } diff --git a/runtime/transport/relay_v2.c b/runtime/transport/relay_v2.c new file mode 100644 index 00000000..c0a772ed --- /dev/null +++ b/runtime/transport/relay_v2.c @@ -0,0 +1,348 @@ +/* -*- linux-c -*- + * + * This transport version uses relayfs on top of a debugfs file. This + * code started as a proposed relayfs interface called 'utt'. It has + * been modified and simplified for systemtap. + * + * Changes Copyright (C) 2009 Red Hat Inc. + * + * Original utt code by: + * Copyright (C) 2006 Jens Axboe <axboe@suse.de> + * Moved to utt.c by Tom Zanussi, 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/init.h> +#include <linux/debugfs.h> +#include <linux/mm.h> +#include <linux/relay.h> +#include <linux/timer.h> + +#ifndef STP_RELAY_TIMER_INTERVAL +/* Wakeup timer interval in jiffies (default 10 ms) */ +#define STP_RELAY_TIMER_INTERVAL ((HZ + 99) / 100) +#endif + +enum _stp_transport_state { + STP_TRANSPORT_STOPPED, + STP_TRANSPORT_INITIALIZED, + STP_TRANSPORT_RUNNING, +}; + +struct _stp_relay_data_type { + enum _stp_transport_state transport_state; + struct rchan *rchan; + struct dentry *dropped_file; + atomic_t dropped; + atomic_t wakeup; + struct timer_list timer; + int overwrite_flag; +}; +struct _stp_relay_data_type _stp_relay_data; + +/* + * __stp_relay_switch_subbuf - switch to a new sub-buffer + * + * Most of this function is deadcopy of relay_switch_subbuf. + */ +static size_t __stp_relay_switch_subbuf(struct rchan_buf *buf, size_t length) +{ + char *old, *new; + size_t old_subbuf, new_subbuf; + + if (unlikely(buf == NULL)) + return 0; + + if (unlikely(length > buf->chan->subbuf_size)) + goto toobig; + + if (buf->offset != buf->chan->subbuf_size + 1) { + buf->prev_padding = buf->chan->subbuf_size - buf->offset; + old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; + buf->padding[old_subbuf] = buf->prev_padding; + buf->subbufs_produced++; + buf->dentry->d_inode->i_size += buf->chan->subbuf_size - + buf->padding[old_subbuf]; + smp_mb(); + if (waitqueue_active(&buf->read_wait)) + /* + * Calling wake_up_interruptible() and __mod_timer() + * from here will deadlock if we happen to be logging + * from the scheduler and timer (trying to re-grab + * rq->lock/timer->base->lock), so just set a flag. + */ + atomic_set(&_stp_relay_data.wakeup, 1); + } + + old = buf->data; + new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; + new = (char*)buf->start + new_subbuf * buf->chan->subbuf_size; + buf->offset = 0; + if (!buf->chan->cb->subbuf_start(buf, new, old, buf->prev_padding)) { + buf->offset = buf->chan->subbuf_size + 1; + return 0; + } + buf->data = new; + buf->padding[new_subbuf] = 0; + + if (unlikely(length + buf->offset > buf->chan->subbuf_size)) + goto toobig; + + return length; + +toobig: + buf->chan->last_toobig = length; + return 0; +} + +static void __stp_relay_wakeup_readers(struct rchan_buf *buf) +{ + if (buf && waitqueue_active(&buf->read_wait) && + buf->subbufs_produced != buf->subbufs_consumed) + wake_up_interruptible(&buf->read_wait); +} + +static void __stp_relay_wakeup_timer(unsigned long val) +{ +#ifdef STP_BULKMODE + int i; +#endif + + if (atomic_read(&_stp_relay_data.wakeup)) { + atomic_set(&_stp_relay_data.wakeup, 0); +#ifdef STP_BULKMODE + for_each_possible_cpu(i) + __stp_relay_wakeup_readers(_stp_relay_data.rchan->buf[i]); +#else + __stp_relay_wakeup_readers(_stp_relay_data.rchan->buf[0]); +#endif + } + + mod_timer(&_stp_relay_data.timer, jiffies + STP_RELAY_TIMER_INTERVAL); +} + +static void __stp_relay_timer_init(void) +{ + atomic_set(&_stp_relay_data.wakeup, 0); + init_timer(&_stp_relay_data.timer); + _stp_relay_data.timer.expires = jiffies + STP_RELAY_TIMER_INTERVAL; + _stp_relay_data.timer.function = __stp_relay_wakeup_timer; + _stp_relay_data.timer.data = 0; + add_timer(&_stp_relay_data.timer); + smp_mb(); +} + +static void stp_relay_set_overwrite(int overwrite) +{ + _stp_relay_data.overwrite_flag = overwrite; +} + +static int __stp_relay_dropped_open(struct inode *inode, struct file *filp) +{ + return 0; +} + +static ssize_t __stp_relay_dropped_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + char buf[16]; + + snprintf(buf, sizeof(buf), "%u\n", + atomic_read(&_stp_relay_data.dropped)); + + return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); +} + +static struct file_operations __stp_relay_dropped_fops = { + .owner = THIS_MODULE, + .open = __stp_relay_dropped_open, + .read = __stp_relay_dropped_read, +}; + +/* + * Keep track of how many times we encountered a full subbuffer, to aid + * the user space app in telling how many lost events there were. + */ +static int __stp_relay_subbuf_start_callback(struct rchan_buf *buf, + void *subbuf, void *prev_subbuf, + size_t prev_padding) +{ + if (_stp_relay_data.overwrite_flag || !relay_buf_full(buf)) + return 1; + + atomic_inc(&_stp_relay_data.dropped); + return 0; +} + +static int __stp_relay_remove_buf_file_callback(struct dentry *dentry) +{ + debugfs_remove(dentry); + return 0; +} + +static struct dentry * +__stp_relay_create_buf_file_callback(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf, + int *is_global) +{ + struct dentry *file = debugfs_create_file(filename, mode, parent, buf, + &relay_file_operations); + if (file) { + file->d_inode->i_uid = _stp_uid; + file->d_inode->i_gid = _stp_gid; + } + return file; +} + +static struct rchan_callbacks __stp_relay_callbacks = { + .subbuf_start = __stp_relay_subbuf_start_callback, + .create_buf_file = __stp_relay_create_buf_file_callback, + .remove_buf_file = __stp_relay_remove_buf_file_callback, +}; + +static void _stp_transport_data_fs_close(void) +{ + if (_stp_relay_data.transport_state == STP_TRANSPORT_RUNNING) + del_timer_sync(&_stp_relay_data.timer); + + if (_stp_relay_data.dropped_file) + debugfs_remove(_stp_relay_data.dropped_file); + if (_stp_relay_data.rchan) { + relay_flush(_stp_relay_data.rchan); + relay_close(_stp_relay_data.rchan); + } + _stp_relay_data.transport_state = STP_TRANSPORT_STOPPED; +} + +static int _stp_transport_data_fs_init(void) +{ + int rc; + u64 npages; + struct sysinfo si; + + _stp_relay_data.transport_state = STP_TRANSPORT_STOPPED; + _stp_relay_data.overwrite_flag = 0; + atomic_set(&_stp_relay_data.dropped, 0); + _stp_relay_data.dropped_file = NULL; + _stp_relay_data.rchan = NULL; + + /* Create "dropped" file. */ + _stp_relay_data.dropped_file + = debugfs_create_file("dropped", 0444, _stp_get_module_dir(), + NULL, &__stp_relay_dropped_fops); + if (!_stp_relay_data.dropped_file) { + rc = -EIO; + goto err; + } + _stp_relay_data.dropped_file->d_inode->i_uid = _stp_uid; + _stp_relay_data.dropped_file->d_inode->i_gid = _stp_gid; + + /* Create "trace" file. */ + npages = _stp_subbuf_size * _stp_nsubbufs; +#ifdef STP_BULKMODE + npages *= num_possible_cpus(); +#endif + npages >>= PAGE_SHIFT; + si_meminfo(&si); +#define MB(i) (unsigned long)((i) >> (20 - PAGE_SHIFT)) + if (npages > (si.freeram + si.bufferram)) { + errk("Not enough free+buffered memory(%luMB) for log buffer(%luMB)\n", + MB(si.freeram + si.bufferram), + MB(npages)); + rc = -ENOMEM; + goto err; + } + else if (npages > si.freeram) { + /* exceeds freeram, but below freeram+bufferram */ + printk(KERN_WARNING + "log buffer size exceeds free memory(%luMB)\n", + MB(si.freeram)); + } +#if (RELAYFS_CHANNEL_VERSION >= 7) + _stp_relay_data.rchan = relay_open("trace", _stp_get_module_dir(), + _stp_subbuf_size, _stp_nsubbufs, + &__stp_relay_callbacks, NULL); +#else /* (RELAYFS_CHANNEL_VERSION < 7) */ + _stp_relay_data.rchan = relay_open("trace", _stp_get_module_dir(), + _stp_subbuf_size, _stp_nsubbufs, + &__stp_relay_callbacks); +#endif /* (RELAYFS_CHANNEL_VERSION < 7) */ + if (!_stp_relay_data.rchan) { + rc = -ENOENT; + goto err; + } + dbug_trans(1, "returning 0...\n"); + _stp_relay_data.transport_state = STP_TRANSPORT_INITIALIZED; + + /* We're initialized. Now start the timer. */ + __stp_relay_timer_init(); + _stp_relay_data.transport_state = STP_TRANSPORT_RUNNING; + + return 0; + +err: + _stp_transport_data_fs_close(); + return rc; +} + + +/** + * _stp_data_write_reserve - try to reserve size_request bytes + * @size_request: number of bytes to attempt to reserve + * @entry: entry is returned here + * + * Returns number of bytes reserved, 0 if full. On return, entry + * will point to allocated opaque pointer. Use + * _stp_data_entry_data() to get pointer to copy data into. + * + * (For this code's purposes, entry is filled in with the actual + * data pointer, but the caller doesn't know that.) + */ +static size_t +_stp_data_write_reserve(size_t size_request, void **entry) +{ + struct rchan_buf *buf; + + if (entry == NULL) + return -EINVAL; + + buf = _stp_relay_data.rchan->buf[smp_processor_id()]; + if (unlikely(buf->offset + size_request > buf->chan->subbuf_size)) { + size_request = __stp_relay_switch_subbuf(buf, size_request); + if (!size_request) + return 0; + } + *entry = (char*)buf->data + buf->offset; + buf->offset += size_request; + + return size_request; +} + +static unsigned char *_stp_data_entry_data(void *entry) +{ + /* Nothing to do here. */ + return entry; +} + +static int _stp_data_write_commit(void *entry) +{ + /* Nothing to do here. */ + return 0; +} diff --git a/runtime/transport/relayfs.c b/runtime/transport/relayfs.c index 6eefda8d..dac6db1d 100644 --- a/runtime/transport/relayfs.c +++ b/runtime/transport/relayfs.c @@ -22,17 +22,33 @@ #include <linux/init.h> #include <linux/relayfs_fs.h> #include <linux/namei.h> -#include "utt.h" -static int _stp_relay_flushing = 0; +enum _stp_transport_state { + STP_TRANSPORT_STOPPED, + STP_TRANSPORT_INITIALIZED, + STP_TRANSPORT_RUNNING, +}; + +struct _stp_relay_data_type { + enum _stp_transport_state transport_state; + struct rchan *rchan; + int flushing; +}; +struct _stp_relay_data_type _stp_relay_data; + +/* We need to include procfs.c here so that it can see the + * _stp_relay_data_type definition. */ +#include "procfs.c" /** - * _stp_subbuf_start - subbuf_start() relayfs callback implementation + * __stp_relay_subbuf_start_callback - subbuf_start() relayfs + * callback implementation */ -static int _stp_subbuf_start(struct rchan_buf *buf, - void *subbuf, - unsigned prev_subbuf_idx, - void *prev_subbuf) +static int +__stp_relay_subbuf_start_callback(struct rchan_buf *buf, + void *subbuf, + unsigned prev_subbuf_idx, + void *prev_subbuf) { unsigned padding = buf->padding[prev_subbuf_idx]; if (prev_subbuf) @@ -42,11 +58,12 @@ static int _stp_subbuf_start(struct rchan_buf *buf, } /** - * _stp_buf_full - buf_full() relayfs callback implementation + * __stp_relay_buf_full_callback - buf_full() relayfs callback + * implementation */ -static void _stp_buf_full(struct rchan_buf *buf, - unsigned subbuf_idx, - void *subbuf) +static void __stp_relay_buf_full_callback(struct rchan_buf *buf, + unsigned subbuf_idx, + void *subbuf) { unsigned padding = buf->padding[subbuf_idx]; *((unsigned *)subbuf) = padding; @@ -54,126 +71,94 @@ static void _stp_buf_full(struct rchan_buf *buf, static struct rchan_callbacks stp_rchan_callbacks = { - .subbuf_start = _stp_subbuf_start, - .buf_full = _stp_buf_full, + .subbuf_start = __stp_relay_subbuf_start_callback, + .buf_full = __stp_relay_buf_full_callback, }; - -static void _stp_remove_relay_dir(struct dentry *dir) -{ - if (dir) - relayfs_remove_dir(dir); -} - -static void _stp_remove_relay_root(struct dentry *root) +static void _stp_transport_data_fs_close(void) { - if (root) { - if (!_stp_lock_transport_dir()) { - errk("Unable to lock transport directory.\n"); - return; - } - _stp_remove_relay_dir(root); - _stp_unlock_transport_dir(); + if (_stp_relay_data.rchan) { + _stp_relay_data.flushing = 1; + relay_flush(_stp_relay_data.rchan); + relay_close(_stp_relay_data.rchan); } + _stp_relay_data.transport_state = STP_TRANSPORT_STOPPED; } -static struct utt_trace *utt_trace_setup(struct utt_trace_setup *utts) +static int _stp_transport_data_fs_init(void) { - struct utt_trace *utt; + int rc = 0; int i; - utt = _stp_kzalloc(sizeof(*utt)); - if (!utt) - return NULL; - - utt->utt_tree_root = _stp_get_root_dir(utts->root); - if (!utt->utt_tree_root) - goto err; - - utt->dir = relayfs_create_dir(utts->name, utt->utt_tree_root); - if (!utt->dir) - goto err; - - dbug_trans(1, "relay_open %d %d\n", utts->buf_size, utts->buf_nr); + dbug_trans(1, "relay_open %d %d\n", _stp_subbuf_size, _stp_nsubbufs); + _stp_relay_data.transport_state = STP_TRANSPORT_STOPPED; + _stp_relay_data.flushing = 0; - utt->rchan = relay_open("trace", utt->dir, utts->buf_size, - utts->buf_nr, 0, &stp_rchan_callbacks); - if (!utt->rchan) + /* Create "trace" file. */ + _stp_relay_data.rchan = relay_open("trace", _stp_get_module_dir(), + _stp_subbuf_size, _stp_nsubbufs, + 0, &stp_rchan_callbacks); + if (!_stp_relay_data.rchan) { + rc = -ENOENT; goto err; + } /* now set ownership */ for_each_online_cpu(i) { - utt->rchan->buf[i]->dentry->d_inode->i_uid = _stp_uid; - utt->rchan->buf[i]->dentry->d_inode->i_gid = _stp_gid; + _stp_relay_data.rchan->buf[i]->dentry->d_inode->i_uid + = _stp_uid; + _stp_relay_data.rchan->buf[i]->dentry->d_inode->i_gid + = _stp_gid; } - utt->rchan->private_data = utt; - utt->trace_state = Utt_trace_setup; - utts->err = 0; - return utt; + /* We're off and running. */ + smp_mb(); + _stp_relay_data.transport_state = STP_TRANSPORT_RUNNING; + return rc; err: errk("couldn't create relay channel.\n"); - if (utt->dir) - _stp_remove_relay_dir(utt->dir); - if (utt->utt_tree_root) - _stp_remove_relay_root(utt->utt_tree_root); - _stp_kfree(utt); - return NULL; + _stp_transport_data_fs_close(); + return rc; } -static void utt_set_overwrite(int overwrite) +static void stp_relay_set_overwrite(int overwrite) { - if (_stp_utt) - _stp_utt->rchan->overwrite = overwrite; + _stp_relay_data.rchan->overwrite = overwrite; } -static int utt_trace_startstop(struct utt_trace *utt, int start, - unsigned int *trace_seq) +/** + * _stp_data_write_reserve - try to reserve size_request bytes + * @size_request: number of bytes to attempt to reserve + * @entry: entry is returned here + * + * Returns number of bytes reserved, 0 if full. On return, entry + * will point to allocated opaque pointer. Use + * _stp_data_entry_data() to get pointer to copy data into. + * + * (For this code's purposes, entry is filled in with the actual + * data pointer, but the caller doesn't know that.) + */ +static size_t +_stp_data_write_reserve(size_t size_request, void **entry) { - int ret; + if (entry == NULL) + return -EINVAL; - if (!utt) + *entry = relay_reserve(_stp_relay_data.rchan, size_request); + if (*entry == NULL) return 0; - - /* - * For starting a trace, we can transition from a setup or stopped - * trace. For stopping a trace, the state must be running - */ - ret = -EINVAL; - if (start) { - if (utt->trace_state == Utt_trace_setup || - utt->trace_state == Utt_trace_stopped) { - if (trace_seq) - (*trace_seq)++; - smp_mb(); - utt->trace_state = Utt_trace_running; - ret = 0; - } - } else { - if (utt->trace_state == Utt_trace_running) { - utt->trace_state = Utt_trace_stopped; - _stp_relay_flushing = 1; - relay_flush(utt->rchan); - ret = 0; - } - } - - return ret; + return size_request; } +static unsigned char *_stp_data_entry_data(void *entry) +{ + /* Nothing to do here. */ + return entry; +} -static int utt_trace_remove(struct utt_trace *utt) +static int _stp_data_write_commit(void *entry) { - dbug_trans(1, "removing relayfs files. %d\n", utt->trace_state); - if (utt && (utt->trace_state == Utt_trace_setup || utt->trace_state == Utt_trace_stopped)) { - if (utt->rchan) - relay_close(utt->rchan); - if (utt->dir) - _stp_remove_relay_dir(utt->dir); - if (utt->utt_tree_root) - _stp_remove_relay_root(utt->utt_tree_root); - _stp_kfree(utt); - } + /* Nothing to do here. */ return 0; } diff --git a/runtime/transport/ring_buffer.c b/runtime/transport/ring_buffer.c new file mode 100644 index 00000000..0385e7d3 --- /dev/null +++ b/runtime/transport/ring_buffer.c @@ -0,0 +1,433 @@ +#include <linux/types.h> +#include <linux/ring_buffer.h> +#include <linux/wait.h> +#include <linux/poll.h> +#include <linux/cpumask.h> + +struct _stp_data_entry { + size_t len; + unsigned char buf[]; +}; + +static struct ring_buffer *__stp_ring_buffer = NULL; + +/* _stp_poll_wait is a waitqueue for tasks blocked on + * _stp_data_poll_trace() */ +static DECLARE_WAIT_QUEUE_HEAD(_stp_poll_wait); + +/* + * Trace iterator - used by printout routines who present trace + * results to users and which routines might sleep, etc: + */ +struct _stp_ring_buffer_data { + int cpu; + u64 ts; +}; +static struct _stp_ring_buffer_data _stp_rb_data; + +static cpumask_var_t _stp_trace_reader_cpumask; + +static void __stp_free_ring_buffer(void) +{ + free_cpumask_var(_stp_trace_reader_cpumask); + if (__stp_ring_buffer) + ring_buffer_free(__stp_ring_buffer); + __stp_ring_buffer = NULL; +} + +static int __stp_alloc_ring_buffer(void) +{ + int i; + unsigned long buffer_size = _stp_bufsize; + + if (!alloc_cpumask_var(&_stp_trace_reader_cpumask, GFP_KERNEL)) + goto fail; + cpumask_clear(_stp_trace_reader_cpumask); + + if (buffer_size == 0) { + dbug_trans(1, "using default buffer size...\n"); + buffer_size = _stp_nsubbufs * _stp_subbuf_size; + } + /* The number passed to ring_buffer_alloc() is per cpu. Our + * 'buffer_size' is a total number of bytes to allocate. So, + * we need to divide buffer_size by the number of cpus. */ + buffer_size /= num_online_cpus(); + dbug_trans(1, "%lu\n", buffer_size); + __stp_ring_buffer = ring_buffer_alloc(buffer_size, 0); + if (!__stp_ring_buffer) + goto fail; + + dbug_trans(1, "size = %lu\n", ring_buffer_size(__stp_ring_buffer)); + return 0; + +fail: + __stp_free_ring_buffer(); + return -ENOMEM; +} + +static int _stp_data_open_trace(struct inode *inode, struct file *file) +{ + long cpu_file = (long) inode->i_private; + + /* We only allow for one reader per cpu */ + dbug_trans(1, "trace attach\n"); +#ifdef STP_BULKMODE + if (!cpumask_test_cpu(cpu_file, _stp_trace_reader_cpumask)) + cpumask_set_cpu(cpu_file, _stp_trace_reader_cpumask); + else { + dbug_trans(1, "returning EBUSY\n"); + return -EBUSY; + } +#else + if (!cpumask_empty(_stp_trace_reader_cpumask)) { + dbug_trans(1, "returning EBUSY\n"); + return -EBUSY; + } + cpumask_setall(_stp_trace_reader_cpumask); +#endif + file->private_data = inode->i_private; + return 0; +} + +static int _stp_data_release_trace(struct inode *inode, struct file *file) +{ + long cpu_file = (long) inode->i_private; + dbug_trans(1, "trace detach\n"); +#ifdef STP_BULKMODE + cpumask_clear_cpu(cpu_file, _stp_trace_reader_cpumask); +#else + cpumask_clear(_stp_trace_reader_cpumask); +#endif + + return 0; +} + +size_t +_stp_event_to_user(struct ring_buffer_event *event, char __user *ubuf, + size_t cnt) +{ + int ret; + struct _stp_data_entry *entry; + + dbug_trans(1, "event(%p), ubuf(%p), cnt(%lu)\n", event, ubuf, cnt); + if (event == NULL || ubuf == NULL) + return -EFAULT; + + entry = (struct _stp_data_entry *)ring_buffer_event_data(event); + if (entry == NULL) + return -EFAULT; + + /* We don't do partial entries - just fail. */ + if (entry->len > cnt) + return -EBUSY; + + if (cnt > entry->len) + cnt = entry->len; + ret = copy_to_user(ubuf, entry->buf, cnt); + if (ret) + return -EFAULT; + + return cnt; +} + +static ssize_t tracing_wait_pipe(struct file *filp) +{ + while (ring_buffer_empty(__stp_ring_buffer)) { + + if ((filp->f_flags & O_NONBLOCK)) { + dbug_trans(1, "returning -EAGAIN\n"); + return -EAGAIN; + } + + /* + * This is a make-shift waitqueue. The reason we don't use + * an actual wait queue is because: + * 1) we only ever have one waiter + * 2) the tracing, traces all functions, we don't want + * the overhead of calling wake_up and friends + * (and tracing them too) + * Anyway, this is really very primitive wakeup. + */ + set_current_state(TASK_INTERRUPTIBLE); + + /* sleep for 100 msecs, and try again. */ + schedule_timeout(HZ/10); + + if (signal_pending(current)) { + dbug_trans(1, "returning -EINTR\n"); + return -EINTR; + } + } + + dbug_trans(1, "returning 1\n"); + return 1; +} + +static struct ring_buffer_event * +peek_next_event(int cpu, u64 *ts) +{ + return ring_buffer_peek(__stp_ring_buffer, cpu, ts); +} + +/* Find the next real event */ +static struct ring_buffer_event * +_stp_find_next_event(long cpu_file) +{ + struct ring_buffer_event *event; + +#ifdef STP_BULKMODE + /* + * If we are in a per_cpu trace file, don't bother by iterating over + * all cpus and peek directly. + */ + if (ring_buffer_empty_cpu(__stp_ring_buffer, (int)cpu_file)) + return NULL; + event = peek_next_event(cpu_file, &_stp_rb_data.ts); + _stp_rb_data.cpu = cpu_file; + + return event; +#else + struct ring_buffer_event *next = NULL; + u64 next_ts = 0, ts; + int next_cpu = -1; + int cpu; + + for_each_possible_cpu(cpu) { + + if (ring_buffer_empty_cpu(__stp_ring_buffer, cpu)) + continue; + + event = peek_next_event(cpu, &ts); + + /* + * Pick the event with the smallest timestamp: + */ + if (event && (!next || ts < next_ts)) { + next = event; + next_cpu = cpu; + next_ts = ts; + } + } + + _stp_rb_data.cpu = next_cpu; + _stp_rb_data.ts = next_ts; + + return next; +#endif +} + + +/* + * Consumer reader. + */ +static ssize_t +_stp_data_read_trace(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + ssize_t sret; + struct ring_buffer_event *event; + long cpu_file = (long) filp->private_data; + + dbug_trans(1, "%lu\n", (unsigned long)cnt); + + sret = tracing_wait_pipe(filp); + dbug_trans(1, "tracing_wait_pipe returned %ld\n", sret); + if (sret <= 0) + goto out; + + /* stop when tracing is finished */ + if (ring_buffer_empty(__stp_ring_buffer)) { + sret = 0; + goto out; + } + + if (cnt >= PAGE_SIZE) + cnt = PAGE_SIZE - 1; + + dbug_trans(1, "sret = %lu\n", (unsigned long)sret); + sret = 0; + while ((event = _stp_find_next_event(cpu_file)) != NULL) { + ssize_t len; + + len = _stp_event_to_user(event, ubuf, cnt); + if (len <= 0) + break; + + ring_buffer_consume(__stp_ring_buffer, _stp_rb_data.cpu, + &_stp_rb_data.ts); + ubuf += len; + cnt -= len; + sret += len; + if (cnt <= 0) + break; + } +out: + return sret; +} + + +static unsigned int +_stp_data_poll_trace(struct file *filp, poll_table *poll_table) +{ + dbug_trans(1, "entry\n"); + if (! ring_buffer_empty(__stp_ring_buffer)) + return POLLIN | POLLRDNORM; + poll_wait(filp, &_stp_poll_wait, poll_table); + if (! ring_buffer_empty(__stp_ring_buffer)) + return POLLIN | POLLRDNORM; + + dbug_trans(1, "exit\n"); + return 0; +} + +static struct file_operations __stp_data_fops = { + .owner = THIS_MODULE, + .open = _stp_data_open_trace, + .release = _stp_data_release_trace, + .poll = _stp_data_poll_trace, + .read = _stp_data_read_trace, +#if 0 + .splice_read = tracing_splice_read_pipe, +#endif +}; + +/* + * Here's how __STP_MAX_RESERVE_SIZE is figured. The value of + * BUF_PAGE_SIZE was gotten from the kernel's ring_buffer code. It + * is divided by 4, so we waste a maximum of 1/4 of the buffer (in + * the case of a small reservation). + */ +#define __STP_MAX_RESERVE_SIZE ((/*BUF_PAGE_SIZE*/ 4080 / 4) \ + - sizeof(struct _stp_data_entry) \ + - sizeof(struct ring_buffer_event)) + +/* + * This function prepares the cpu buffer to write a sample. + * + * Struct op_entry is used during operations on the ring buffer while + * struct op_sample contains the data that is stored in the ring + * buffer. Struct entry can be uninitialized. The function reserves a + * data array that is specified by size. Use + * op_cpu_buffer_write_commit() after preparing the sample. In case of + * errors a null pointer is returned, otherwise the pointer to the + * sample. + * + */ +static size_t +_stp_data_write_reserve(size_t size_request, void **entry) +{ + struct ring_buffer_event *event; + struct _stp_data_entry *sde; + + if (entry == NULL) + return -EINVAL; + + if (size_request > __STP_MAX_RESERVE_SIZE) { + size_request = __STP_MAX_RESERVE_SIZE; + } + + event = ring_buffer_lock_reserve(__stp_ring_buffer, + sizeof(struct _stp_data_entry) + size_request, + 0); + if (unlikely(! event)) { + dbug_trans(1, "event = NULL (%p)?\n", event); + entry = NULL; + return 0; + } + + sde = (struct _stp_data_entry *)ring_buffer_event_data(event); + sde->len = size_request; + + *entry = event; + return size_request; +} + +static unsigned char *_stp_data_entry_data(void *entry) +{ + struct ring_buffer_event *event = entry; + struct _stp_data_entry *sde; + + if (event == NULL) + return NULL; + + sde = (struct _stp_data_entry *)ring_buffer_event_data(event); + return sde->buf; +} + +static int _stp_data_write_commit(void *entry) +{ + int ret; + struct ring_buffer_event *event = (struct ring_buffer_event *)entry; + + if (unlikely(! entry)) { + dbug_trans(1, "entry = NULL, returning -EINVAL\n"); + return -EINVAL; + } + + ret = ring_buffer_unlock_commit(__stp_ring_buffer, event, 0); + dbug_trans(1, "after commit, empty returns %d\n", + ring_buffer_empty(__stp_ring_buffer)); + + wake_up_interruptible(&_stp_poll_wait); + return ret; +} + + +static struct dentry *__stp_entry[NR_CPUS] = { NULL }; + +static int _stp_transport_data_fs_init(void) +{ + int rc; + long cpu; + + // allocate buffer + dbug_trans(1, "entry...\n"); + rc = __stp_alloc_ring_buffer(); + if (rc != 0) + return rc; + + // create file(s) + for_each_online_cpu(cpu) { + char cpu_file[9]; /* 5(trace) + 3(XXX) + 1(\0) = 9 */ + + if (cpu > 999 || cpu < 0) { + _stp_transport_data_fs_close(); + return -EINVAL; + } + sprintf(cpu_file, "trace%ld", cpu); + __stp_entry[cpu] = debugfs_create_file(cpu_file, 0600, + _stp_get_module_dir(), + (void *)cpu, + &__stp_data_fops); + + if (!__stp_entry[cpu]) { + pr_warning("Could not create debugfs 'trace' entry\n"); + __stp_free_ring_buffer(); + return -ENOENT; + } + __stp_entry[cpu]->d_inode->i_uid = _stp_uid; + __stp_entry[cpu]->d_inode->i_gid = _stp_gid; + +#ifndef STP_BULKMODE + if (cpu != 0) + break; +#endif + } + + dbug_trans(1, "returning 0...\n"); + return 0; +} + +static void _stp_transport_data_fs_close(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (__stp_entry[cpu]) + debugfs_remove(__stp_entry[cpu]); + __stp_entry[cpu] = NULL; + } + + __stp_free_ring_buffer(); +} + diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c index 762c0a92..792ea815 100644 --- a/runtime/transport/transport.c +++ b/runtime/transport/transport.c @@ -14,28 +14,55 @@ #ifndef _TRANSPORT_TRANSPORT_C_ #define _TRANSPORT_TRANSPORT_C_ -#include <linux/delay.h> -#include <linux/namei.h> #include "transport.h" -#include "time.c" -#include "../mempool.c" -#include "symbols.c" +#include <linux/debugfs.h> +#include <linux/namei.h> +#include <linux/workqueue.h> +#include <linux/delay.h> + +#if 0 +static void utt_set_overwrite(int overwrite) +{ + return; +} +#endif -static struct utt_trace *_stp_utt = NULL; -static unsigned int utt_seq = 1; -static int _stp_probes_started = 0; -static pid_t _stp_target = 0; static int _stp_exit_flag = 0; + +static uid_t _stp_uid = 0; +static gid_t _stp_gid = 0; +static int _stp_pid = 0; + +static int _stp_ctl_attached = 0; + +static pid_t _stp_target = 0; +static int _stp_probes_started = 0; + +#if 1 +//static struct utt_trace *_stp_utt = NULL; +//static unsigned int utt_seq = 1; #include "control.h" -#ifdef STP_OLD_TRANSPORT +#if STP_TRANSPORT_VERSION == 1 #include "relayfs.c" -#include "procfs.c" -#else -#include "utt.c" +#elif STP_TRANSPORT_VERSION == 2 +#include "relay_v2.c" #include "debugfs.c" +#elif STP_TRANSPORT_VERSION == 3 +#include "ring_buffer.c" +#include "debugfs.c" +#else +#error "Unknown STP_TRANSPORT_VERSION" #endif #include "control.c" +#else /* #if 0 */ +#include "control.h" +#include "debugfs.c" +#include "control.c" +#endif /* if 0 */ +static unsigned _stp_nsubbufs = 8; +static unsigned _stp_subbuf_size = 65536*4; + /* module parameters */ static int _stp_bufsize; module_param(_stp_bufsize, int, 0); @@ -44,7 +71,6 @@ MODULE_PARM_DESC(_stp_bufsize, "buffer size"); /* forward declarations */ static void probe_exit(void); static int probe_start(void); -static void _stp_exit(void); /* check for new workqueue API */ #ifdef DECLARE_DELAYED_WORK @@ -88,14 +114,15 @@ static void _stp_handle_start(struct _stp_msg_start *st) /* when someone does /sbin/rmmod on a loaded systemtap module. */ static void _stp_cleanup_and_exit(int send_exit) { - static int called = 0; - if (!called) { + static int _stp_exit_called = 0; + + if (!_stp_exit_called) { int failures; dbug_trans(1, "cleanup_and_exit (%d)\n", send_exit); _stp_exit_flag = 1; /* we only want to do this stuff once */ - called = 1; + _stp_exit_called = 1; if (_stp_probes_started) { dbug_trans(1, "calling probe_exit\n"); @@ -109,8 +136,10 @@ static void _stp_cleanup_and_exit(int send_exit) _stp_warn("There were %d transport failures.\n", failures); dbug_trans(1, "************** calling startstop 0 *************\n"); +#if 0 if (_stp_utt) utt_trace_startstop(_stp_utt, 0, &utt_seq); +#endif dbug_trans(1, "ctl_send STP_EXIT\n"); if (send_exit) @@ -137,11 +166,13 @@ static void _stp_request_exit(void) static void _stp_detach(void) { dbug_trans(1, "detach\n"); - _stp_attached = 0; + _stp_ctl_attached = 0; _stp_pid = 0; +#if 0 if (!_stp_exit_flag) utt_set_overwrite(1); +#endif cancel_delayed_work(&_stp_work); wake_up_interruptible(&_stp_ctl_wq); @@ -153,9 +184,11 @@ static void _stp_detach(void) static void _stp_attach(void) { dbug_trans(1, "attach\n"); - _stp_attached = 1; + _stp_ctl_attached = 1; _stp_pid = current->pid; +#if 0 utt_set_overwrite(0); +#endif queue_delayed_work(_stp_wq, &_stp_work, STP_WORK_TIMER); } @@ -182,7 +215,7 @@ static void _stp_work_queue(void *data) /* if exit flag is set AND we have finished with probe_start() */ if (unlikely(_stp_exit_flag && _stp_probes_started)) _stp_request_exit(); - if (likely(_stp_attached)) + if (likely(_stp_ctl_attached)) queue_delayed_work(_stp_wq, &_stp_work, STP_WORK_TIMER); } @@ -192,19 +225,25 @@ static void _stp_work_queue(void *data) * This is called automatically when the module is unloaded. * */ -static void _stp_transport_close() +static void _stp_transport_close(void) { - dbug_trans(1, "%d: ************** transport_close *************\n", current->pid); + dbug_trans(1, "%d: ************** transport_close *************\n", + current->pid); _stp_cleanup_and_exit(0); destroy_workqueue(_stp_wq); _stp_unregister_ctl_channel(); +#if 0 if (_stp_utt) utt_trace_remove(_stp_utt); +#endif /* #if 0 */ _stp_print_cleanup(); /* free print buffers */ _stp_mem_debug_done(); + _stp_transport_fs_close(); + dbug_trans(1, "---- CLOSED ----\n"); } +#if 0 static struct utt_trace *_stp_utt_open(void) { struct utt_trace_setup utts; @@ -221,6 +260,7 @@ static struct utt_trace *_stp_utt_open(void) return utt_trace_setup(&utts); } +#endif /* #if 0 */ /** * _stp_transport_init() is called from the module initialization. @@ -229,7 +269,6 @@ static struct utt_trace *_stp_utt_open(void) static int _stp_transport_init(void) { dbug_trans(1, "transport_init\n"); - _stp_init_pid = current->pid; #ifdef STAPCONF_TASK_UID _stp_uid = current->uid; _stp_gid = current->gid; @@ -238,6 +277,8 @@ static int _stp_transport_init(void) _stp_gid = current_gid(); #endif +// DRS: is RELAY_GUEST/RELAY_HOST documented? does it work? are there +// test cases? #ifdef RELAY_GUEST /* Guest scripts use relay only for reporting warnings and errors */ _stp_subbuf_size = 65536; @@ -255,12 +296,18 @@ static int _stp_transport_init(void) dbug_trans(1, "Using %d subbufs of size %d\n", _stp_nsubbufs, _stp_subbuf_size); } + if (_stp_transport_fs_init(THIS_MODULE->name) != 0) + goto err0; + +#if 0 #if !defined (STP_OLD_TRANSPORT) || defined (STP_BULKMODE) /* open utt (relayfs) channel to send data to userspace */ _stp_utt = _stp_utt_open(); if (!_stp_utt) goto err0; #endif +#else /* #if 0 */ +#endif /* #if 0 */ /* create control channel */ if (_stp_register_ctl_channel() < 0) @@ -270,29 +317,38 @@ static int _stp_transport_init(void) if (_stp_print_init() < 0) goto err2; +#if 0 /* start transport */ utt_trace_startstop(_stp_utt, 1, &utt_seq); +#endif /* #if 0 */ /* create workqueue of kernel threads */ _stp_wq = create_workqueue("systemtap"); if (!_stp_wq) goto err3; - _stp_transport_state = 1; - /* Signal stapio to send us STP_START back (XXX: ?!?!?!). */ _stp_ctl_send(STP_TRANSPORT, NULL, 0); + dbug_trans(1, "returning 0...\n"); return 0; err3: + dbug_trans(1, "err3\n"); _stp_print_cleanup(); err2: + dbug_trans(1, "err2\n"); _stp_unregister_ctl_channel(); err1: +#if 0 if (_stp_utt) utt_trace_remove(_stp_utt); +#else + dbug_trans(1, "err1\n"); + _stp_transport_fs_close(); +#endif /* #if 0 */ err0: + dbug_trans(1, "err0\n"); return -1; } @@ -319,7 +375,7 @@ static struct dentry *_stp_lockfile = NULL; static int _stp_lock_transport_dir(void) { int numtries = 0; -#ifdef STP_OLD_TRANSPORT +#if STP_TRANSPORT_VERSION == 1 while ((_stp_lockfile = relayfs_create_dir("systemtap_lock", NULL)) == NULL) { #else while ((_stp_lockfile = debugfs_create_dir("systemtap_lock", NULL)) == NULL) { @@ -334,7 +390,7 @@ static int _stp_lock_transport_dir(void) static void _stp_unlock_transport_dir(void) { if (_stp_lockfile) { -#ifdef STP_OLD_TRANSPORT +#if STP_TRANSPORT_VERSION == 1 relayfs_remove_dir(_stp_lockfile); #else debugfs_remove(_stp_lockfile); @@ -343,51 +399,124 @@ static void _stp_unlock_transport_dir(void) } } -/* _stp_get_root_dir(name) - creates root directory 'name' or */ -/* returns a pointer to it if it already exists. Used in */ -/* utt.c and relayfs.c. Will not be necessary if utt is included */ -/* in the kernel. */ +static struct dentry *__stp_root_dir = NULL; + +/* _stp_get_root_dir() - creates root directory or returns + * a pointer to it if it already exists. */ -static struct dentry *_stp_get_root_dir(const char *name) +static struct dentry *_stp_get_root_dir(void) { struct file_system_type *fs; - struct dentry *root; struct super_block *sb; + const char *name = "systemtap"; -#ifdef STP_OLD_TRANSPORT + if (__stp_root_dir != NULL) { + return __stp_root_dir; + } + +#if STP_TRANSPORT_VERSION == 1 fs = get_fs_type("relayfs"); + if (!fs) { + errk("Couldn't find relayfs filesystem.\n"); + return NULL; + } #else fs = get_fs_type("debugfs"); -#endif if (!fs) { - errk("Couldn't find debugfs or relayfs filesystem.\n"); + errk("Couldn't find debugfs filesystem.\n"); return NULL; } +#endif if (!_stp_lock_transport_dir()) { errk("Couldn't lock transport directory.\n"); return NULL; } -#ifdef STP_OLD_TRANSPORT - root = relayfs_create_dir(name, NULL); +#if STP_TRANSPORT_VERSION == 1 + __stp_root_dir = relayfs_create_dir(name, NULL); #else - root = debugfs_create_dir(name, NULL); + __stp_root_dir = debugfs_create_dir(name, NULL); #endif - if (!root) { - /* couldn't create it because it is already there, so find it. */ - sb = list_entry(fs->fs_supers.next, struct super_block, s_instances); + if (!__stp_root_dir) { + /* Couldn't create it because it is already there, so + * find it. */ + sb = list_entry(fs->fs_supers.next, struct super_block, + s_instances); _stp_lock_inode(sb->s_root->d_inode); - root = lookup_one_len(name, sb->s_root, strlen(name)); + __stp_root_dir = lookup_one_len(name, sb->s_root, + strlen(name)); _stp_unlock_inode(sb->s_root->d_inode); - if (!IS_ERR(root)) - dput(root); + if (!IS_ERR(__stp_root_dir)) + dput(__stp_root_dir); else { - root = NULL; + __stp_root_dir = NULL; errk("Could not create or find transport directory.\n"); } } _stp_unlock_transport_dir(); - return root; + return __stp_root_dir; } +static void _stp_remove_root_dir(void) +{ + if (__stp_root_dir) { + if (!_stp_lock_transport_dir()) { + errk("Unable to lock transport directory.\n"); + return; + } + if (simple_empty(__stp_root_dir)) + debugfs_remove(__stp_root_dir); + _stp_unlock_transport_dir(); + __stp_root_dir = NULL; + } +} + +static struct dentry *__stp_module_dir = NULL; + +static struct dentry *_stp_get_module_dir(void) +{ + return __stp_module_dir; +} + +static int _stp_transport_fs_init(const char *module_name) +{ + struct dentry *root_dir; + + dbug_trans(1, "entry\n"); + if (module_name == NULL) + return -1; + + root_dir = _stp_get_root_dir(); + if (root_dir == NULL) + return -1; + + __stp_module_dir = debugfs_create_dir(module_name, root_dir); + if (!__stp_module_dir) { + _stp_remove_root_dir(); + return -1; + } + + if (_stp_transport_data_fs_init() != 0) { + _stp_remove_root_dir(); + return -1; + } + dbug_trans(1, "returning 0\n"); + return 0; +} + +static void _stp_transport_fs_close(void) +{ + dbug_trans(1, "stp_transport_fs_close\n"); + + _stp_transport_data_fs_close(); + + if (__stp_module_dir) { + debugfs_remove(__stp_module_dir); + __stp_module_dir = NULL; + } + + _stp_remove_root_dir(); +} + + #endif /* _TRANSPORT_C_ */ diff --git a/runtime/transport/transport.h b/runtime/transport/transport.h index 7d249c45..c560be5d 100644 --- a/runtime/transport/transport.h +++ b/runtime/transport/transport.h @@ -11,9 +11,22 @@ /* amount of data a print can send. */ #define STP_BUFFER_SIZE 8192 +struct utt_trace; + +static int _stp_ctl_write(int type, void *data, unsigned len); + +static int _stp_transport_init(void); +static void _stp_transport_close(void); + +static inline void *utt_reserve(struct utt_trace *utt, size_t length) +{ + return NULL; +} + + /* STP_CTL_BUFFER_SIZE is the maximum size of a message */ /* exchanged on the control channel. */ -#ifdef STP_OLD_TRANSPORT +#if STP_TRANSPORT_VERSION == 1 /* Old transport sends print output on control channel */ #define STP_CTL_BUFFER_SIZE STP_BUFFER_SIZE #else @@ -23,27 +36,80 @@ /* how often the work queue wakes up and checks buffers */ #define STP_WORK_TIMER (HZ/100) -static unsigned _stp_nsubbufs = 8; -static unsigned _stp_subbuf_size = 65536*4; +static unsigned _stp_nsubbufs; +static unsigned _stp_subbuf_size; static int _stp_transport_init(void); static void _stp_transport_close(void); -static void _stp_warn (const char *fmt, ...); -static int _stp_print_init(void); -static void _stp_print_cleanup(void); -static struct dentry *_stp_get_root_dir(const char *name); - static int _stp_lock_transport_dir(void); static void _stp_unlock_transport_dir(void); +static struct dentry *_stp_get_root_dir(void); +static struct dentry *_stp_get_module_dir(void); + +static int _stp_transport_fs_init(const char *module_name); +static void _stp_transport_fs_close(void); + static void _stp_attach(void); static void _stp_detach(void); static void _stp_handle_start(struct _stp_msg_start *st); -static int _stp_pid = 0; -static uid_t _stp_uid = 0; -static gid_t _stp_gid = 0; -static pid_t _stp_init_pid = 0; -static int _stp_attached = 0; +static uid_t _stp_uid; +static gid_t _stp_gid; + +static int _stp_ctl_attached; + +static int _stp_bufsize; + +/* + * All transports must provide the following functions. + */ + +/* + * _stp_transport_data_fs_init + * + * This function allocates any buffers needed, creates files, + * etc. needed for this transport. + */ +static int _stp_transport_data_fs_init(void); + +/* + * _stp_transport_data_fs_close + * + * This function cleans up items created by + * _stp_transport_data_fs_init(). + */ +static void _stp_transport_data_fs_close(void); + +/* + * _stp_data_write_reserve - reserve bytes + * size_request: number of bytes to reserve + * entry: allocated buffer is returned here + * + * This function attempts to reserve size_request number of bytes, + * returning the number of bytes actually reserved. The allocated + * buffer is returned in entry. Note that the number of bytes + * allocated may be less than the number of bytes requested. + */ +static size_t _stp_data_write_reserve(size_t size_request, void **entry); + + +/* + * _stp_data_entry_data - return data pointer from entry + * entry: entry + * + * This function returns the data pointer from entry. + */ +static unsigned char *_stp_data_entry_data(void *entry); + +/* + * _stp_data_write_commit - + * entry: pointer returned by _stp-data_write_reserve() + * + * This function notifies the transport that the bytes in entry are + * ready to be written. + */ +static int _stp_data_write_commit(void *entry); + #endif /* _TRANSPORT_TRANSPORT_H_ */ diff --git a/runtime/transport/transport_msgs.h b/runtime/transport/transport_msgs.h index 0d9a5983..aa50051c 100644 --- a/runtime/transport/transport_msgs.h +++ b/runtime/transport/transport_msgs.h @@ -29,8 +29,8 @@ enum STP_DISCONNECT, STP_BULK, STP_READY, - STP_RELOCATION, - /** deprecated STP_OLD_TRANSPORT **/ + STP_RELOCATION, + /** deprecated STP_TRANSPORT_VERSION == 1 **/ STP_BUF_INFO, STP_SUBBUFS_CONSUMED, STP_REALTIME_DATA, @@ -83,7 +83,7 @@ struct _stp_msg_start int32_t res; // for reply: result of probe_start() }; -#ifdef STP_OLD_TRANSPORT +#if STP_TRANSPORT_VERSION == 1 /**** for compatibility with old relayfs ****/ struct _stp_buf_info { diff --git a/runtime/transport/utt.c b/runtime/transport/utt.c index 915662b2..2dd303a5 100644 --- a/runtime/transport/utt.c +++ b/runtime/transport/utt.c @@ -26,7 +26,6 @@ #include <linux/percpu.h> #include <linux/init.h> #include <linux/debugfs.h> -#include <linux/relay.h> #include <linux/mm.h> #include "utt.h" @@ -157,7 +156,7 @@ static struct dentry *utt_create_tree(struct utt_trace *utt, const char *root, c return NULL; if (!utt->utt_tree_root) { - utt->utt_tree_root = _stp_get_root_dir(root); + utt->utt_tree_root = _stp_get_root_dir(); if (!utt->utt_tree_root) goto err; } @@ -169,7 +168,6 @@ err: return dir; } - static void utt_trace_cleanup(struct utt_trace *utt) { if (utt == NULL) diff --git a/runtime/transport/utt.h b/runtime/transport/utt.h index 40e54919..2a479d70 100644 --- a/runtime/transport/utt.h +++ b/runtime/transport/utt.h @@ -9,11 +9,11 @@ enum { struct utt_trace { int trace_state; - struct rchan *rchan; +// struct rchan *rchan; struct dentry *dir; /* systemtap/module_name */ struct dentry *dropped_file; atomic_t dropped; - struct dentry *utt_tree_root; /* systemtap */ + struct dentry *utt_tree_root; /* systemtap */ void *private_data; atomic_t wakeup; struct timer_list timer; @@ -41,12 +41,13 @@ struct utt_trace_setup { static struct utt_trace *utt_trace_setup(struct utt_trace_setup *utts); + static int utt_trace_startstop(struct utt_trace *utt, int start, unsigned int *trace_seq); static void utt_trace_cleanup(struct utt_trace *utt); static int utt_trace_remove(struct utt_trace *utt); -#ifndef STP_OLD_TRANSPORT +#if STP_TRANSPORT_VERSION == 2 static size_t utt_switch_subbuf(struct utt_trace *utt, struct rchan_buf *buf, size_t length); /** @@ -60,6 +61,7 @@ static size_t utt_switch_subbuf(struct utt_trace *utt, struct rchan_buf *buf, */ static inline void *utt_reserve(struct utt_trace *utt, size_t length) { +#if 0 void *reserved; struct rchan_buf *buf = utt->rchan->buf[smp_processor_id()]; @@ -72,6 +74,9 @@ static inline void *utt_reserve(struct utt_trace *utt, size_t length) buf->offset += length; return reserved; +#else + return NULL; +#endif } #endif diff --git a/testsuite/systemtap.printf/end1.exp b/testsuite/systemtap.printf/end1.exp index 590340d3..ab1de590 100644 --- a/testsuite/systemtap.printf/end1.exp +++ b/testsuite/systemtap.printf/end1.exp @@ -11,15 +11,15 @@ if {[catch {exec mktemp -t staptestXXXXXX} tmpfile]} { } if {[catch {exec stap -o $tmpfile $tpath} res]} { - untested $TEST_NAME - puts "$res" + fail $TEST_NAME + puts "stap failed: $res" catch {exec rm -f $tmpfile} return } if {[catch {exec cmp $tmpfile $srcdir/$subdir/large_output} res]} { - fail $TEST_NAME puts "$res" + fail $TEST_NAME catch {exec rm -f $tmpfile} return } diff --git a/testsuite/systemtap.printf/end1b.exp b/testsuite/systemtap.printf/end1b.exp index bea5736e..46cdc9c7 100644 --- a/testsuite/systemtap.printf/end1b.exp +++ b/testsuite/systemtap.printf/end1b.exp @@ -16,8 +16,8 @@ if {[catch {exec mktemp -t staptestXXXXXX} tmpfile]} { } if {[catch {exec stap -b -o $tmpfile $test} res]} { - untested $TEST_NAME puts "stap failed: $res" + fail $TEST_NAME as_root "/bin/rm -f [glob ${tmpfile}*]" return } diff --git a/testsuite/systemtap.printf/mixed_out.exp b/testsuite/systemtap.printf/mixed_out.exp index 3b66e7c0..55320e80 100644 --- a/testsuite/systemtap.printf/mixed_out.exp +++ b/testsuite/systemtap.printf/mixed_out.exp @@ -11,8 +11,8 @@ if {[catch {exec mktemp -t staptestXXXXXX} tmpfile]} { } if {[catch {exec stap -DMAXACTION=100000 -o $tmpfile $tpath} res]} { - untested $TEST_NAME - puts "$res" + fail $TEST_NAME + puts "stap failed: $res" catch {exec rm -f $tmpfile} return } diff --git a/testsuite/systemtap.printf/mixed_outb.exp b/testsuite/systemtap.printf/mixed_outb.exp index db82cc79..c15520b1 100644 --- a/testsuite/systemtap.printf/mixed_outb.exp +++ b/testsuite/systemtap.printf/mixed_outb.exp @@ -16,7 +16,7 @@ if {[catch {exec mktemp -t staptestXXXXXX} tmpfile]} { } if {[catch {exec stap -DMAXACTION=100000 -b -o $tmpfile $test} res]} { - untested $TEST_NAME + fail $TEST_NAME puts "stap failed: $res" as_root "/bin/rm -f [glob ${tmpfile}*]" return diff --git a/testsuite/systemtap.printf/out1.exp b/testsuite/systemtap.printf/out1.exp index 7577a54d..f973ae00 100644 --- a/testsuite/systemtap.printf/out1.exp +++ b/testsuite/systemtap.printf/out1.exp @@ -11,7 +11,7 @@ if {[catch {exec mktemp -t staptestXXXXXX} tmpfile]} { } if {[catch {exec stap -o $tmpfile $tpath} res]} { - untested $TEST_NAME + fail $TEST_NAME puts "$res" catch {exec rm -f $tmpfile} return diff --git a/testsuite/systemtap.printf/out1b.exp b/testsuite/systemtap.printf/out1b.exp index c3e21ba9..24efbf4c 100644 --- a/testsuite/systemtap.printf/out1b.exp +++ b/testsuite/systemtap.printf/out1b.exp @@ -16,7 +16,7 @@ if {[catch {exec mktemp -t staptestXXXXXX} tmpfile]} { } if {[catch {exec stap -b -o $tmpfile $test} res]} { - untested $TEST_NAME + fail $TEST_NAME puts "stap failed: $res" as_root "/bin/rm -f [glob ${tmpfile}*]" return diff --git a/testsuite/systemtap.printf/out2.exp b/testsuite/systemtap.printf/out2.exp index ce1f7c6e..8c66e73d 100644 --- a/testsuite/systemtap.printf/out2.exp +++ b/testsuite/systemtap.printf/out2.exp @@ -11,7 +11,7 @@ if {[catch {exec mktemp -t staptestXXXXXX} tmpfile]} { } if {[catch {exec stap -o $tmpfile $tpath} res]} { - untested $TEST_NAME + fail $TEST_NAME puts "$res" catch {exec rm -f $tmpfile} return diff --git a/testsuite/systemtap.printf/out2b.exp b/testsuite/systemtap.printf/out2b.exp index fcc12f63..70a98ea2 100644 --- a/testsuite/systemtap.printf/out2b.exp +++ b/testsuite/systemtap.printf/out2b.exp @@ -16,7 +16,7 @@ if {[catch {exec mktemp -t staptestXXXXXX} tmpfile]} { } if {[catch {exec stap -b -o $tmpfile $test} res]} { - untested $TEST_NAME + fail $TEST_NAME puts "stap failed: $res" as_root "/bin/rm -f [glob ${tmpfile}*]" return diff --git a/testsuite/systemtap.printf/out3.exp b/testsuite/systemtap.printf/out3.exp index 51124757..63a67d8f 100644 --- a/testsuite/systemtap.printf/out3.exp +++ b/testsuite/systemtap.printf/out3.exp @@ -11,7 +11,7 @@ if {[catch {exec mktemp -t staptestXXXXXX} tmpfile]} { } if {[catch {exec stap -DMAXACTION=100000 -o $tmpfile $tpath} res]} { - untested $TEST_NAME + fail $TEST_NAME puts "$res" catch {exec rm -f $tmpfile} return diff --git a/testsuite/systemtap.printf/out3b.exp b/testsuite/systemtap.printf/out3b.exp index 740a8b68..d49625e9 100644 --- a/testsuite/systemtap.printf/out3b.exp +++ b/testsuite/systemtap.printf/out3b.exp @@ -16,7 +16,7 @@ if {[catch {exec mktemp -t staptestXXXXXX} tmpfile]} { } if {[catch {exec stap -DMAXACTION=100000 -b -o $tmpfile $test} res]} { - untested $TEST_NAME + fail $TEST_NAME puts "stap failed: $res" as_root "/bin/rm -f [glob ${tmpfile}*]" return |