diff options
author | David Smith <dsmith@redhat.com> | 2009-05-06 12:25:02 -0500 |
---|---|---|
committer | David Smith <dsmith@redhat.com> | 2009-05-06 12:25:02 -0500 |
commit | 4a8c28f93fa29e47c604e80e383c826070d6c383 (patch) | |
tree | 4aaea5111b6461f39e22d9bae289456429906028 | |
parent | 9b23198d73b782bc05dc2b834c7db3afbdefda86 (diff) | |
download | systemtap-steved-4a8c28f93fa29e47c604e80e383c826070d6c383.tar.gz systemtap-steved-4a8c28f93fa29e47c604e80e383c826070d6c383.tar.xz systemtap-steved-4a8c28f93fa29e47c604e80e383c826070d6c383.zip |
Start of support for older transport and cleanup.
* runtime/transport/control.c: Added inclusion of linux/delay.h to get
declaration of msleep().
* runtime/transport/relay_v2.c: New skeleton file for transport version 2.
* runtime/transport/ring_buffer.c: Cleanup.
* runtime/transport/transport.c: Ditto.
-rw-r--r-- | runtime/transport/control.c | 1 | ||||
-rw-r--r-- | runtime/transport/relay_v2.c | 550 | ||||
-rw-r--r-- | runtime/transport/ring_buffer.c | 51 | ||||
-rw-r--r-- | runtime/transport/transport.c | 22 |
4 files changed, 573 insertions, 51 deletions
diff --git a/runtime/transport/control.c b/runtime/transport/control.c index 7626305a..4e07a0a7 100644 --- a/runtime/transport/control.c +++ b/runtime/transport/control.c @@ -12,6 +12,7 @@ #include "control.h" #include "../mempool.c" #include "symbols.c" +#include <linux/delay.h> static _stp_mempool_t *_stp_pool_q; static struct list_head _stp_ctl_ready_q; diff --git a/runtime/transport/relay_v2.c b/runtime/transport/relay_v2.c new file mode 100644 index 00000000..771a3a3c --- /dev/null +++ b/runtime/transport/relay_v2.c @@ -0,0 +1,550 @@ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/relay.h> +#include <linux/debugfs.h> +#if 0 +#include <linux/ring_buffer.h> +#include <linux/wait.h> +#include <linux/poll.h> +#include <linux/cpumask.h> + +static struct ring_buffer *__stp_ring_buffer = NULL; +//DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); + +/* _stp_poll_wait is a waitqueue for tasks blocked on + * _stp_data_poll_trace() */ +static DECLARE_WAIT_QUEUE_HEAD(_stp_poll_wait); + +#if 1 +/* + * Trace iterator - used by printout routines who present trace + * results to users and which routines might sleep, etc: + */ +struct _stp_ring_buffer_iterator { +#if 0 + struct trace_array *tr; + struct tracer *trace; + void *private; + struct ring_buffer_iter *buffer_iter[NR_CPUS]; + + /* The below is zeroed out in pipe_read */ + struct trace_seq seq; + struct trace_entry *ent; +#endif + int cpu; + u64 ts; + +#if 0 + unsigned long iter_flags; + loff_t pos; + long idx; + + cpumask_var_t started; +#endif +}; +static struct _stp_ring_buffer_iterator _stp_iter; +#endif + +static cpumask_var_t _stp_trace_reader_cpumask; + +static void __stp_free_ring_buffer(void) +{ + free_cpumask_var(_stp_trace_reader_cpumask); + if (__stp_ring_buffer) + ring_buffer_free(__stp_ring_buffer); + __stp_ring_buffer = NULL; +} + +static int __stp_alloc_ring_buffer(void) +{ + int i; + unsigned long buffer_size = _stp_bufsize; + + if (!alloc_cpumask_var(&_stp_trace_reader_cpumask, GFP_KERNEL)) + goto fail; + cpumask_clear(_stp_trace_reader_cpumask); + + if (buffer_size == 0) { + dbug_trans(1, "using default buffer size...\n"); + buffer_size = _stp_nsubbufs * _stp_subbuf_size; + } + /* The number passed to ring_buffer_alloc() is per cpu. Our + * 'buffer_size' is a total number of bytes to allocate. So, + * we need to divide buffer_size by the number of cpus. */ + buffer_size /= num_online_cpus(); + dbug_trans(1, "%lu\n", buffer_size); + __stp_ring_buffer = ring_buffer_alloc(buffer_size, 0); + if (!__stp_ring_buffer) + goto fail; + + dbug_trans(1, "size = %lu\n", ring_buffer_size(__stp_ring_buffer)); + return 0; + +fail: + __stp_free_ring_buffer(); + return -ENOMEM; +} + +static int _stp_data_open_trace(struct inode *inode, struct file *file) +{ + long cpu_file = (long) inode->i_private; + + /* We only allow for one reader per cpu */ + dbug_trans(1, "trace attach\n"); +#ifdef STP_BULKMODE + if (!cpumask_test_cpu(cpu_file, _stp_trace_reader_cpumask)) + cpumask_set_cpu(cpu_file, _stp_trace_reader_cpumask); + else { + dbug_trans(1, "returning EBUSY\n"); + return -EBUSY; + } +#else + if (!cpumask_empty(_stp_trace_reader_cpumask)) { + dbug_trans(1, "returning EBUSY\n"); + return -EBUSY; + } + cpumask_setall(_stp_trace_reader_cpumask); +#endif + file->private_data = inode->i_private; + return 0; +} + +static int _stp_data_release_trace(struct inode *inode, struct file *file) +{ + long cpu_file = (long) inode->i_private; + dbug_trans(1, "trace detach\n"); +#ifdef STP_BULKMODE + cpumask_clear_cpu(cpu_file, _stp_trace_reader_cpumask); +#else + cpumask_clear(_stp_trace_reader_cpumask); +#endif + + return 0; +} + +size_t +_stp_entry_to_user(struct _stp_entry *entry, char __user *ubuf, size_t cnt) +{ + int ret; + + dbug_trans(1, "entry(%p), ubuf(%p), cnt(%lu)\n", entry, ubuf, cnt); + if (entry == NULL || ubuf == NULL) + return -EFAULT; + + /* We don't do partial entries - just fail. */ + if (entry->len > cnt) + return -EBUSY; + + if (cnt > entry->len) + cnt = entry->len; + ret = copy_to_user(ubuf, entry->buf, cnt); + if (ret) + return -EFAULT; + + return cnt; +} + +static ssize_t tracing_wait_pipe(struct file *filp) +{ + while (ring_buffer_empty(__stp_ring_buffer)) { + + if ((filp->f_flags & O_NONBLOCK)) { + dbug_trans(1, "returning -EAGAIN\n"); + return -EAGAIN; + } + + /* + * This is a make-shift waitqueue. The reason we don't use + * an actual wait queue is because: + * 1) we only ever have one waiter + * 2) the tracing, traces all functions, we don't want + * the overhead of calling wake_up and friends + * (and tracing them too) + * Anyway, this is really very primitive wakeup. + */ + set_current_state(TASK_INTERRUPTIBLE); + //iter->tr->waiter = current; + + /* sleep for 100 msecs, and try again. */ + schedule_timeout(HZ/10); + + //iter->tr->waiter = NULL; + + if (signal_pending(current)) { + dbug_trans(1, "returning -EINTR\n"); + return -EINTR; + } + } + + dbug_trans(1, "returning 1\n"); + return 1; +} + +static struct _stp_entry * +peek_next_entry(int cpu, u64 *ts) +{ + struct ring_buffer_event *event; + + event = ring_buffer_peek(__stp_ring_buffer, cpu, ts); + + return event ? ring_buffer_event_data(event) : NULL; +} + +static struct _stp_entry * +__stp_find_next_entry(long cpu_file, int *ent_cpu, u64 *ent_ts) +{ + struct _stp_entry *ent; + +#ifdef STP_BULKMODE + /* + * If we are in a per_cpu trace file, don't bother by iterating over + * all cpus and peek directly. + */ + if (ring_buffer_empty_cpu(__stp_ring_buffer, (int)cpu_file)) + return NULL; + ent = peek_next_entry(cpu_file, ent_ts); + if (ent_cpu) + *ent_cpu = cpu_file; + + return ent; +#else + struct _stp_entry *next = NULL; + u64 next_ts = 0, ts; + int next_cpu = -1; + int cpu; + + for_each_possible_cpu(cpu) { + + if (ring_buffer_empty_cpu(__stp_ring_buffer, cpu)) + continue; + + ent = peek_next_entry(cpu, &ts); + + /* + * Pick the entry with the smallest timestamp: + */ + if (ent && (!next || ts < next_ts)) { + next = ent; + next_cpu = cpu; + next_ts = ts; + } + } + + if (ent_cpu) + *ent_cpu = next_cpu; + + if (ent_ts) + *ent_ts = next_ts; + + return next; +#endif +} + +/* Find the next real entry, and increment the iterator to the next entry */ +static struct _stp_entry *_stp_find_next_entry(long cpu_file) +{ + return __stp_find_next_entry(cpu_file, &_stp_iter.cpu, &_stp_iter.ts); +} + + +/* + * Consumer reader. + */ +static ssize_t +_stp_data_read_trace(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + ssize_t sret; + struct _stp_entry *entry; + long cpu_file = (long) filp->private_data; + + dbug_trans(1, "%lu\n", (unsigned long)cnt); + + sret = tracing_wait_pipe(filp); + dbug_trans(1, "tracing_wait_pipe returned %ld\n", sret); + if (sret <= 0) + goto out; + + /* stop when tracing is finished */ + if (ring_buffer_empty(__stp_ring_buffer)) { + sret = 0; + goto out; + } + + if (cnt >= PAGE_SIZE) + cnt = PAGE_SIZE - 1; + + dbug_trans(1, "sret = %lu\n", (unsigned long)sret); + sret = 0; + while ((entry = _stp_find_next_entry(cpu_file)) != NULL) { + ssize_t len; + + len = _stp_entry_to_user(entry, ubuf, cnt); + if (len <= 0) + break; + + ring_buffer_consume(__stp_ring_buffer, _stp_iter.cpu, + &_stp_iter.ts); + ubuf += len; + cnt -= len; + sret += len; + if (cnt <= 0) + break; + } +out: + return sret; +} + + +static unsigned int +_stp_data_poll_trace(struct file *filp, poll_table *poll_table) +{ + dbug_trans(1, "entry\n"); + if (! ring_buffer_empty(__stp_ring_buffer)) + return POLLIN | POLLRDNORM; + poll_wait(filp, &_stp_poll_wait, poll_table); + if (! ring_buffer_empty(__stp_ring_buffer)) + return POLLIN | POLLRDNORM; + + dbug_trans(1, "exit\n"); + return 0; +} + +static struct file_operations __stp_data_fops = { + .owner = THIS_MODULE, + .open = _stp_data_open_trace, + .release = _stp_data_release_trace, + .poll = _stp_data_poll_trace, + .read = _stp_data_read_trace, +#if 0 + .splice_read = tracing_splice_read_pipe, +#endif +}; + +/* Here's how __STP_MAX_RESERVE_SIZE is figured. The value of + * BUF_PAGE_SIZE was gotten from the kernel's ring_buffer code. It + * is divided by 4, so we waste a maximum of 1/4 of the buffer (in + * the case of a small reservation). We then subtract the sizes of + * structures needed for every reservation. */ +#define __STP_MAX_RESERVE_SIZE ((/*BUF_PAGE_SIZE*/ 4080 / 4) \ + - sizeof(struct _stp_entry) \ + - sizeof(struct ring_buffer_event)) + +/* + * This function prepares the cpu buffer to write a sample. + * + * Struct op_entry is used during operations on the ring buffer while + * struct op_sample contains the data that is stored in the ring + * buffer. Struct entry can be uninitialized. The function reserves a + * data array that is specified by size. Use + * op_cpu_buffer_write_commit() after preparing the sample. In case of + * errors a null pointer is returned, otherwise the pointer to the + * sample. + * + */ +static size_t +_stp_data_write_reserve(size_t size_request, struct _stp_entry **entry) +{ + struct ring_buffer_event *event; + + if (entry == NULL) + return -EINVAL; + + if (size_request > __STP_MAX_RESERVE_SIZE) { + size_request = __STP_MAX_RESERVE_SIZE; + } + + event = ring_buffer_lock_reserve(__stp_ring_buffer, + (sizeof(struct _stp_entry) + size_request), + 0); + if (unlikely(! event)) { + dbug_trans(1, "event = NULL (%p)?\n", event); + entry = NULL; + return 0; + } + + *entry = ring_buffer_event_data(event); + (*entry)->event = event; + (*entry)->len = size_request; + return size_request; +} +#endif + +static int _stp_data_write_commit(struct _stp_entry *entry) +{ +#if 0 + int ret; + + if (unlikely(! entry)) { + dbug_trans(1, "entry = NULL, returning -EINVAL\n"); + return -EINVAL; + } + + ret = ring_buffer_unlock_commit(__stp_ring_buffer, entry->event, 0); + dbug_trans(1, "after commit, empty returns %d\n", + ring_buffer_empty(__stp_ring_buffer)); + + wake_up_interruptible(&_stp_poll_wait); + return ret; +#else + return 0; +#endif +} + + +#if 0 +static struct dentry *__stp_entry[NR_CPUS] = { NULL }; +#endif + + + +struct _stp_relay_data { + struct rchan *rchan; + int overwrite_flag; +}; +static struct _stp_relay_data _stp_relay; + + + +static size_t utt_switch_subbuf(struct utt_trace *utt, struct rchan_buf *buf, + size_t length); +/** + * utt_reserve - reserve slot in channel buffer + * @utt: utt channel + * @length: number of bytes to reserve + * + * Returns pointer to reserved slot, NULL if full. + * + * This function is utt_switch_subbuf version of relay_reserve. + */ +static size_t +_stp_data_write_reserve(size_t size_request, struct _stp_entry **entry) +{ +#if 0 + void *reserved; + struct rchan_buf *buf = utt->rchan->buf[smp_processor_id()]; + + if (unlikely(buf->offset + length > buf->chan->subbuf_size)) { + length = utt_switch_subbuf(utt, buf, length); + if (!length) + return NULL; + } + reserved = (char*)buf->data + buf->offset; + buf->offset += length; + + return reserved; +#else + return 0; +#endif +} + + + + +static int +_stp_relay_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, + void *prev_subbuf, size_t prev_padding) +{ + if (_stp_relay.overwrite_flag || !relay_buf_full(buf)) + return 1; + + return 0; +} + +static struct dentry * +_stp_relay_create_buf_file_callback(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf, + int *is_global) +{ + struct dentry *file; + + if (is_global) { +#ifdef STP_BULKMODE + *is_global = 0; +#else + *is_global = 1; +#endif + } + + file = debugfs_create_file(filename, mode, parent, buf, + &relay_file_operations); + if (file) { + file->d_inode->i_uid = _stp_uid; + file->d_inode->i_gid = _stp_gid; + } + return file; +} + +static int +_stp_relay_remove_buf_file_callback(struct dentry *dentry) +{ + debugfs_remove(dentry); + return 0; +} + +static struct rchan_callbacks _stp_relay_callbacks = { + .subbuf_start = _stp_relay_subbuf_start_callback, + .create_buf_file = _stp_relay_create_buf_file_callback, + .remove_buf_file = _stp_relay_remove_buf_file_callback, +}; + +static int _stp_transport_data_fs_init(void) +{ + int rc; + u64 npages; + struct sysinfo si; + + _stp_relay.overwrite_flag = 0; + + npages = _stp_subbuf_size * _stp_nsubbufs; +#ifdef STP_BULKMODE + npages *= num_possible_cpus(); +#endif + npages >>= PAGE_SHIFT; + si_meminfo(&si); +#define MB(i) (unsigned long)((i) >> (20 - PAGE_SHIFT)) + if (npages > (si.freeram + si.bufferram)) { + errk("Not enough free+buffered memory(%luMB) for log buffer(%luMB)\n", + MB(si.freeram + si.bufferram), + MB(npages)); + rc = -ENOMEM; + goto err; + } + else if (npages > si.freeram) { + /* exceeds freeram, but below freeram+bufferram */ + printk(KERN_WARNING + "log buffer size exceeds free memory(%luMB)\n", + MB(si.freeram)); + } + +#if (RELAYFS_CHANNEL_VERSION >= 7) + _stp_relay.rchan = relay_open("trace", _stp_get_module_dir(), + _stp_subbuf_size, _stp_nsubbufs, + &_stp_relay_callbacks, NULL); +#else /* (RELAYFS_CHANNEL_VERSION < 7) */ + _stp_relay.rchan = relay_open("trace", _stp_get_module_dir(), + _stp_subbuf_size, _stp_nsubbufs, + &_stp_relay_callbacks); +#endif /* (RELAYFS_CHANNEL_VERSION < 7) */ + + if (!_stp_relay.rchan) { + rc = -ENOENT; + goto err; + } + dbug_trans(1, "returning 0...\n"); + return 0; + +err: + if (_stp_relay.rchan) + relay_close(_stp_relay.rchan); + return rc; +} + +static void _stp_transport_data_fs_close(void) +{ + if (_stp_relay.rchan) + relay_close(_stp_relay.rchan); +} + diff --git a/runtime/transport/ring_buffer.c b/runtime/transport/ring_buffer.c index abe9c360..99ce2031 100644 --- a/runtime/transport/ring_buffer.c +++ b/runtime/transport/ring_buffer.c @@ -5,41 +5,20 @@ #include <linux/cpumask.h> static struct ring_buffer *__stp_ring_buffer = NULL; -//DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); /* _stp_poll_wait is a waitqueue for tasks blocked on * _stp_data_poll_trace() */ static DECLARE_WAIT_QUEUE_HEAD(_stp_poll_wait); -#if 1 /* * Trace iterator - used by printout routines who present trace * results to users and which routines might sleep, etc: */ -struct _stp_ring_buffer_iterator { -#if 0 - struct trace_array *tr; - struct tracer *trace; - void *private; - struct ring_buffer_iter *buffer_iter[NR_CPUS]; - - /* The below is zeroed out in pipe_read */ - struct trace_seq seq; - struct trace_entry *ent; -#endif +struct _stp_ring_buffer_data { int cpu; u64 ts; - -#if 0 - unsigned long iter_flags; - loff_t pos; - long idx; - - cpumask_var_t started; -#endif }; -static struct _stp_ring_buffer_iterator _stp_iter; -#endif +static struct _stp_ring_buffer_data _stp_rb_data; static cpumask_var_t _stp_trace_reader_cpumask; @@ -159,13 +138,10 @@ static ssize_t tracing_wait_pipe(struct file *filp) * Anyway, this is really very primitive wakeup. */ set_current_state(TASK_INTERRUPTIBLE); - //iter->tr->waiter = current; /* sleep for 100 msecs, and try again. */ schedule_timeout(HZ/10); - //iter->tr->waiter = NULL; - if (signal_pending(current)) { dbug_trans(1, "returning -EINTR\n"); return -EINTR; @@ -186,8 +162,9 @@ peek_next_entry(int cpu, u64 *ts) return event ? ring_buffer_event_data(event) : NULL; } +/* Find the next real entry */ static struct _stp_entry * -__stp_find_next_entry(long cpu_file, int *ent_cpu, u64 *ent_ts) +_stp_find_next_entry(long cpu_file) { struct _stp_entry *ent; @@ -199,8 +176,7 @@ __stp_find_next_entry(long cpu_file, int *ent_cpu, u64 *ent_ts) if (ring_buffer_empty_cpu(__stp_ring_buffer, (int)cpu_file)) return NULL; ent = peek_next_entry(cpu_file, ent_ts); - if (ent_cpu) - *ent_cpu = cpu_file; + _stp_rb_data.cpu = cpu_file; return ent; #else @@ -226,22 +202,13 @@ __stp_find_next_entry(long cpu_file, int *ent_cpu, u64 *ent_ts) } } - if (ent_cpu) - *ent_cpu = next_cpu; - - if (ent_ts) - *ent_ts = next_ts; + _stp_rb_data.cpu = next_cpu; + _stp_rb_data.ts = next_ts; return next; #endif } -/* Find the next real entry, and increment the iterator to the next entry */ -static struct _stp_entry *_stp_find_next_entry(long cpu_file) -{ - return __stp_find_next_entry(cpu_file, &_stp_iter.cpu, &_stp_iter.ts); -} - /* * Consumer reader. @@ -279,8 +246,8 @@ _stp_data_read_trace(struct file *filp, char __user *ubuf, if (len <= 0) break; - ring_buffer_consume(__stp_ring_buffer, _stp_iter.cpu, - &_stp_iter.ts); + ring_buffer_consume(__stp_ring_buffer, _stp_rb_data.cpu, + &_stp_rb_data.ts); ubuf += len; cnt -= len; sret += len; diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c index c0c97467..5a499e8d 100644 --- a/runtime/transport/transport.c +++ b/runtime/transport/transport.c @@ -18,11 +18,14 @@ #include <linux/debugfs.h> #include <linux/namei.h> #include <linux/workqueue.h> +#include <linux/delay.h> +#if 0 static void utt_set_overwrite(int overwrite) { return; } +#endif static int _stp_exit_flag = 0; @@ -35,21 +38,19 @@ static int _stp_ctl_attached = 0; static pid_t _stp_target = 0; static int _stp_probes_started = 0; -#if 0 -#include <linux/delay.h> - -static struct utt_trace *_stp_utt = NULL; -static unsigned int utt_seq = 1; +#if 1 +//static struct utt_trace *_stp_utt = NULL; +//static unsigned int utt_seq = 1; #include "control.h" #if STP_TRANSPORT_VERSION == 1 #include "relayfs.c" #include "procfs.c" #elif STP_TRANSPORT_VERSION == 2 -#include "utt.c" +#include "relay_v2.c" #include "debugfs.c" #elif STP_TRANSPORT_VERSION == 3 -#include "debugfs.c" #include "ring_buffer.c" +#include "debugfs.c" #else #error "Unknown STP_TRANSPORT_VERSION" #endif @@ -59,7 +60,6 @@ static unsigned int utt_seq = 1; #include "control.h" #include "debugfs.c" #include "control.c" -#include "ring_buffer.c" #endif /* if 0 */ static unsigned _stp_nsubbufs = 8; static unsigned _stp_subbuf_size = 65536*4; @@ -158,8 +158,10 @@ static void _stp_detach(void) _stp_ctl_attached = 0; _stp_pid = 0; +#if 0 if (!_stp_exit_flag) utt_set_overwrite(1); +#endif cancel_delayed_work(&_stp_work); wake_up_interruptible(&_stp_ctl_wq); @@ -173,7 +175,9 @@ static void _stp_attach(void) dbug_trans(1, "attach\n"); _stp_ctl_attached = 1; _stp_pid = current->pid; +#if 0 utt_set_overwrite(0); +#endif queue_delayed_work(_stp_wq, &_stp_work, STP_WORK_TIMER); } @@ -458,7 +462,7 @@ static void _stp_remove_root_dir(void) static struct dentry *__stp_module_dir = NULL; -static inline struct dentry *_stp_get_module_dir(void) +static struct dentry *_stp_get_module_dir(void) { return __stp_module_dir; } |