diff options
Diffstat (limited to 'drivers/oprofile')
-rw-r--r-- | drivers/oprofile/buffer_sync.c | 595 | ||||
-rw-r--r-- | drivers/oprofile/buffer_sync.h | 22 | ||||
-rw-r--r-- | drivers/oprofile/cpu_buffer.c | 448 | ||||
-rw-r--r-- | drivers/oprofile/cpu_buffer.h | 120 | ||||
-rw-r--r-- | drivers/oprofile/event_buffer.c | 208 | ||||
-rw-r--r-- | drivers/oprofile/event_buffer.h | 40 | ||||
-rw-r--r-- | drivers/oprofile/oprof.c | 285 | ||||
-rw-r--r-- | drivers/oprofile/oprof.h | 42 | ||||
-rw-r--r-- | drivers/oprofile/oprofile_files.c | 191 | ||||
-rw-r--r-- | drivers/oprofile/oprofile_stats.c | 84 | ||||
-rw-r--r-- | drivers/oprofile/oprofile_stats.h | 34 | ||||
-rw-r--r-- | drivers/oprofile/oprofilefs.c | 296 | ||||
-rw-r--r-- | drivers/oprofile/timer_int.c | 46 |
13 files changed, 2411 insertions, 0 deletions
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c new file mode 100644 index 00000000000..c9e2ae90f19 --- /dev/null +++ b/drivers/oprofile/buffer_sync.c @@ -0,0 +1,595 @@ +/** + * @file buffer_sync.c + * + * @remark Copyright 2002-2009 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@movementarian.org> + * @author Barry Kasindorf + * @author Robert Richter <robert.richter@amd.com> + * + * This is the core of the buffer management. Each + * CPU buffer is processed and entered into the + * global event buffer. Such processing is necessary + * in several circumstances, mentioned below. + * + * The processing does the job of converting the + * transitory EIP value into a persistent dentry/offset + * value that the profiler can record at its leisure. + * + * See fs/dcookies.c for a description of the dentry/offset + * objects. + */ + +#include <linux/mm.h> +#include <linux/workqueue.h> +#include <linux/notifier.h> +#include <linux/dcookies.h> +#include <linux/profile.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/oprofile.h> +#include <linux/sched.h> + +#include "oprofile_stats.h" +#include "event_buffer.h" +#include "cpu_buffer.h" +#include "buffer_sync.h" + +static LIST_HEAD(dying_tasks); +static LIST_HEAD(dead_tasks); +static cpumask_var_t marked_cpus; +static DEFINE_SPINLOCK(task_mortuary); +static void process_task_mortuary(void); + +/* Take ownership of the task struct and place it on the + * list for processing. Only after two full buffer syncs + * does the task eventually get freed, because by then + * we are sure we will not reference it again. + * Can be invoked from softirq via RCU callback due to + * call_rcu() of the task struct, hence the _irqsave. + */ +static int +task_free_notify(struct notifier_block *self, unsigned long val, void *data) +{ + unsigned long flags; + struct task_struct *task = data; + spin_lock_irqsave(&task_mortuary, flags); + list_add(&task->tasks, &dying_tasks); + spin_unlock_irqrestore(&task_mortuary, flags); + return NOTIFY_OK; +} + + +/* The task is on its way out. A sync of the buffer means we can catch + * any remaining samples for this task. + */ +static int +task_exit_notify(struct notifier_block *self, unsigned long val, void *data) +{ + /* To avoid latency problems, we only process the current CPU, + * hoping that most samples for the task are on this CPU + */ + sync_buffer(raw_smp_processor_id()); + return 0; +} + + +/* The task is about to try a do_munmap(). We peek at what it's going to + * do, and if it's an executable region, process the samples first, so + * we don't lose any. This does not have to be exact, it's a QoI issue + * only. + */ +static int +munmap_notify(struct notifier_block *self, unsigned long val, void *data) +{ + unsigned long addr = (unsigned long)data; + struct mm_struct *mm = current->mm; + struct vm_area_struct *mpnt; + + down_read(&mm->mmap_sem); + + mpnt = find_vma(mm, addr); + if (mpnt && mpnt->vm_file && (mpnt->vm_flags & VM_EXEC)) { + up_read(&mm->mmap_sem); + /* To avoid latency problems, we only process the current CPU, + * hoping that most samples for the task are on this CPU + */ + sync_buffer(raw_smp_processor_id()); + return 0; + } + + up_read(&mm->mmap_sem); + return 0; +} + + +/* We need to be told about new modules so we don't attribute to a previously + * loaded module, or drop the samples on the floor. + */ +static int +module_load_notify(struct notifier_block *self, unsigned long val, void *data) +{ +#ifdef CONFIG_MODULES + if (val != MODULE_STATE_COMING) + return 0; + + /* FIXME: should we process all CPU buffers ? */ + mutex_lock(&buffer_mutex); + add_event_entry(ESCAPE_CODE); + add_event_entry(MODULE_LOADED_CODE); + mutex_unlock(&buffer_mutex); +#endif + return 0; +} + + +static struct notifier_block task_free_nb = { + .notifier_call = task_free_notify, +}; + +static struct notifier_block task_exit_nb = { + .notifier_call = task_exit_notify, +}; + +static struct notifier_block munmap_nb = { + .notifier_call = munmap_notify, +}; + +static struct notifier_block module_load_nb = { + .notifier_call = module_load_notify, +}; + + +static void end_sync(void) +{ + end_cpu_work(); + /* make sure we don't leak task structs */ + process_task_mortuary(); + process_task_mortuary(); +} + + +int sync_start(void) +{ + int err; + + if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL)) + return -ENOMEM; + + start_cpu_work(); + + err = task_handoff_register(&task_free_nb); + if (err) + goto out1; + err = profile_event_register(PROFILE_TASK_EXIT, &task_exit_nb); + if (err) + goto out2; + err = profile_event_register(PROFILE_MUNMAP, &munmap_nb); + if (err) + goto out3; + err = register_module_notifier(&module_load_nb); + if (err) + goto out4; + +out: + return err; +out4: + profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); +out3: + profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); +out2: + task_handoff_unregister(&task_free_nb); +out1: + end_sync(); + free_cpumask_var(marked_cpus); + goto out; +} + + +void sync_stop(void) +{ + unregister_module_notifier(&module_load_nb); + profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); + profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); + task_handoff_unregister(&task_free_nb); + end_sync(); + free_cpumask_var(marked_cpus); +} + + +/* Optimisation. We can manage without taking the dcookie sem + * because we cannot reach this code without at least one + * dcookie user still being registered (namely, the reader + * of the event buffer). */ +static inline unsigned long fast_get_dcookie(struct path *path) +{ + unsigned long cookie; + + if (path->dentry->d_flags & DCACHE_COOKIE) + return (unsigned long)path->dentry; + get_dcookie(path, &cookie); + return cookie; +} + + +/* Look up the dcookie for the task's first VM_EXECUTABLE mapping, + * which corresponds loosely to "application name". This is + * not strictly necessary but allows oprofile to associate + * shared-library samples with particular applications + */ +static unsigned long get_exec_dcookie(struct mm_struct *mm) +{ + unsigned long cookie = NO_COOKIE; + struct vm_area_struct *vma; + + if (!mm) + goto out; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (!vma->vm_file) + continue; + if (!(vma->vm_flags & VM_EXECUTABLE)) + continue; + cookie = fast_get_dcookie(&vma->vm_file->f_path); + break; + } + +out: + return cookie; +} + + +/* Convert the EIP value of a sample into a persistent dentry/offset + * pair that can then be added to the global event buffer. We make + * sure to do this lookup before a mm->mmap modification happens so + * we don't lose track. + */ +static unsigned long +lookup_dcookie(struct mm_struct *mm, unsigned long addr, off_t *offset) +{ + unsigned long cookie = NO_COOKIE; + struct vm_area_struct *vma; + + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { + + if (addr < vma->vm_start || addr >= vma->vm_end) + continue; + + if (vma->vm_file) { + cookie = fast_get_dcookie(&vma->vm_file->f_path); + *offset = (vma->vm_pgoff << PAGE_SHIFT) + addr - + vma->vm_start; + } else { + /* must be an anonymous map */ + *offset = addr; + } + + break; + } + + if (!vma) + cookie = INVALID_COOKIE; + + return cookie; +} + +static unsigned long last_cookie = INVALID_COOKIE; + +static void add_cpu_switch(int i) +{ + add_event_entry(ESCAPE_CODE); + add_event_entry(CPU_SWITCH_CODE); + add_event_entry(i); + last_cookie = INVALID_COOKIE; +} + +static void add_kernel_ctx_switch(unsigned int in_kernel) +{ + add_event_entry(ESCAPE_CODE); + if (in_kernel) + add_event_entry(KERNEL_ENTER_SWITCH_CODE); + else + add_event_entry(KERNEL_EXIT_SWITCH_CODE); +} + +static void +add_user_ctx_switch(struct task_struct const *task, unsigned long cookie) +{ + add_event_entry(ESCAPE_CODE); + add_event_entry(CTX_SWITCH_CODE); + add_event_entry(task->pid); + add_event_entry(cookie); + /* Another code for daemon back-compat */ + add_event_entry(ESCAPE_CODE); + add_event_entry(CTX_TGID_CODE); + add_event_entry(task->tgid); +} + + +static void add_cookie_switch(unsigned long cookie) +{ + add_event_entry(ESCAPE_CODE); + add_event_entry(COOKIE_SWITCH_CODE); + add_event_entry(cookie); +} + + +static void add_trace_begin(void) +{ + add_event_entry(ESCAPE_CODE); + add_event_entry(TRACE_BEGIN_CODE); +} + +static void add_data(struct op_entry *entry, struct mm_struct *mm) +{ + unsigned long code, pc, val; + unsigned long cookie; + off_t offset; + + if (!op_cpu_buffer_get_data(entry, &code)) + return; + if (!op_cpu_buffer_get_data(entry, &pc)) + return; + if (!op_cpu_buffer_get_size(entry)) + return; + + if (mm) { + cookie = lookup_dcookie(mm, pc, &offset); + + if (cookie == NO_COOKIE) + offset = pc; + if (cookie == INVALID_COOKIE) { + atomic_inc(&oprofile_stats.sample_lost_no_mapping); + offset = pc; + } + if (cookie != last_cookie) { + add_cookie_switch(cookie); + last_cookie = cookie; + } + } else + offset = pc; + + add_event_entry(ESCAPE_CODE); + add_event_entry(code); + add_event_entry(offset); /* Offset from Dcookie */ + + while (op_cpu_buffer_get_data(entry, &val)) + add_event_entry(val); +} + +static inline void add_sample_entry(unsigned long offset, unsigned long event) +{ + add_event_entry(offset); + add_event_entry(event); +} + + +/* + * Add a sample to the global event buffer. If possible the + * sample is converted into a persistent dentry/offset pair + * for later lookup from userspace. Return 0 on failure. + */ +static int +add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel) +{ + unsigned long cookie; + off_t offset; + + if (in_kernel) { + add_sample_entry(s->eip, s->event); + return 1; + } + + /* add userspace sample */ + + if (!mm) { + atomic_inc(&oprofile_stats.sample_lost_no_mm); + return 0; + } + + cookie = lookup_dcookie(mm, s->eip, &offset); + + if (cookie == INVALID_COOKIE) { + atomic_inc(&oprofile_stats.sample_lost_no_mapping); + return 0; + } + + if (cookie != last_cookie) { + add_cookie_switch(cookie); + last_cookie = cookie; + } + + add_sample_entry(offset, s->event); + + return 1; +} + + +static void release_mm(struct mm_struct *mm) +{ + if (!mm) + return; + up_read(&mm->mmap_sem); + mmput(mm); +} + + +static struct mm_struct *take_tasks_mm(struct task_struct *task) +{ + struct mm_struct *mm = get_task_mm(task); + if (mm) + down_read(&mm->mmap_sem); + return mm; +} + + +static inline int is_code(unsigned long val) +{ + return val == ESCAPE_CODE; +} + + +/* Move tasks along towards death. Any tasks on dead_tasks + * will definitely have no remaining references in any + * CPU buffers at this point, because we use two lists, + * and to have reached the list, it must have gone through + * one full sync already. + */ +static void process_task_mortuary(void) +{ + unsigned long flags; + LIST_HEAD(local_dead_tasks); + struct task_struct *task; + struct task_struct *ttask; + + spin_lock_irqsave(&task_mortuary, flags); + + list_splice_init(&dead_tasks, &local_dead_tasks); + list_splice_init(&dying_tasks, &dead_tasks); + + spin_unlock_irqrestore(&task_mortuary, flags); + + list_for_each_entry_safe(task, ttask, &local_dead_tasks, tasks) { + list_del(&task->tasks); + free_task(task); + } +} + + +static void mark_done(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, marked_cpus); + + for_each_online_cpu(i) { + if (!cpumask_test_cpu(i, marked_cpus)) + return; + } + + /* All CPUs have been processed at least once, + * we can process the mortuary once + */ + process_task_mortuary(); + + cpumask_clear(marked_cpus); +} + + +/* FIXME: this is not sufficient if we implement syscall barrier backtrace + * traversal, the code switch to sb_sample_start at first kernel enter/exit + * switch so we need a fifth state and some special handling in sync_buffer() + */ +typedef enum { + sb_bt_ignore = -2, + sb_buffer_start, + sb_bt_start, + sb_sample_start, +} sync_buffer_state; + +/* Sync one of the CPU's buffers into the global event buffer. + * Here we need to go through each batch of samples punctuated + * by context switch notes, taking the task's mmap_sem and doing + * lookup in task->mm->mmap to convert EIP into dcookie/offset + * value. + */ +void sync_buffer(int cpu) +{ + struct mm_struct *mm = NULL; + struct mm_struct *oldmm; + unsigned long val; + struct task_struct *new; + unsigned long cookie = 0; + int in_kernel = 1; + sync_buffer_state state = sb_buffer_start; + unsigned int i; + unsigned long available; + unsigned long flags; + struct op_entry entry; + struct op_sample *sample; + + mutex_lock(&buffer_mutex); + + add_cpu_switch(cpu); + + op_cpu_buffer_reset(cpu); + available = op_cpu_buffer_entries(cpu); + + for (i = 0; i < available; ++i) { + sample = op_cpu_buffer_read_entry(&entry, cpu); + if (!sample) + break; + + if (is_code(sample->eip)) { + flags = sample->event; + if (flags & TRACE_BEGIN) { + state = sb_bt_start; + add_trace_begin(); + } + if (flags & KERNEL_CTX_SWITCH) { + /* kernel/userspace switch */ + in_kernel = flags & IS_KERNEL; + if (state == sb_buffer_start) + state = sb_sample_start; + add_kernel_ctx_switch(flags & IS_KERNEL); + } + if (flags & USER_CTX_SWITCH + && op_cpu_buffer_get_data(&entry, &val)) { + /* userspace context switch */ + new = (struct task_struct *)val; + oldmm = mm; + release_mm(oldmm); + mm = take_tasks_mm(new); + if (mm != oldmm) + cookie = get_exec_dcookie(mm); + add_user_ctx_switch(new, cookie); + } + if (op_cpu_buffer_get_size(&entry)) + add_data(&entry, mm); + continue; + } + + if (state < sb_bt_start) + /* ignore sample */ + continue; + + if (add_sample(mm, sample, in_kernel)) + continue; + + /* ignore backtraces if failed to add a sample */ + if (state == sb_bt_start) { + state = sb_bt_ignore; + atomic_inc(&oprofile_stats.bt_lost_no_mapping); + } + } + release_mm(mm); + + mark_done(cpu); + + mutex_unlock(&buffer_mutex); +} + +/* The function can be used to add a buffer worth of data directly to + * the kernel buffer. The buffer is assumed to be a circular buffer. + * Take the entries from index start and end at index end, wrapping + * at max_entries. + */ +void oprofile_put_buff(unsigned long *buf, unsigned int start, + unsigned int stop, unsigned int max) +{ + int i; + + i = start; + + mutex_lock(&buffer_mutex); + while (i != stop) { + add_event_entry(buf[i++]); + + if (i >= max) + i = 0; + } + + mutex_unlock(&buffer_mutex); +} + diff --git a/drivers/oprofile/buffer_sync.h b/drivers/oprofile/buffer_sync.h new file mode 100644 index 00000000000..3110732c183 --- /dev/null +++ b/drivers/oprofile/buffer_sync.h @@ -0,0 +1,22 @@ +/** + * @file buffer_sync.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@movementarian.org> + */ + +#ifndef OPROFILE_BUFFER_SYNC_H +#define OPROFILE_BUFFER_SYNC_H + +/* add the necessary profiling hooks */ +int sync_start(void); + +/* remove the hooks */ +void sync_stop(void); + +/* sync the given CPU's buffer */ +void sync_buffer(int cpu); + +#endif /* OPROFILE_BUFFER_SYNC_H */ diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c new file mode 100644 index 00000000000..de82183bb9b --- /dev/null +++ b/drivers/oprofile/cpu_buffer.c @@ -0,0 +1,448 @@ +/** + * @file cpu_buffer.c + * + * @remark Copyright 2002-2009 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@movementarian.org> + * @author Barry Kasindorf <barry.kasindorf@amd.com> + * @author Robert Richter <robert.richter@amd.com> + * + * Each CPU has a local buffer that stores PC value/event + * pairs. We also log context switches when we notice them. + * Eventually each CPU's buffer is processed into the global + * event buffer by sync_buffer(). + * + * We use a local buffer for two reasons: an NMI or similar + * interrupt cannot synchronise, and high sampling rates + * would lead to catastrophic global synchronisation if + * a global buffer was used. + */ + +#include <linux/sched.h> +#include <linux/oprofile.h> +#include <linux/errno.h> + +#include "event_buffer.h" +#include "cpu_buffer.h" +#include "buffer_sync.h" +#include "oprof.h" + +#define OP_BUFFER_FLAGS 0 + +static struct ring_buffer *op_ring_buffer; +DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer); + +static void wq_sync_buffer(struct work_struct *work); + +#define DEFAULT_TIMER_EXPIRE (HZ / 10) +static int work_enabled; + +unsigned long oprofile_get_cpu_buffer_size(void) +{ + return oprofile_cpu_buffer_size; +} + +void oprofile_cpu_buffer_inc_smpl_lost(void) +{ + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + + cpu_buf->sample_lost_overflow++; +} + +void free_cpu_buffers(void) +{ + if (op_ring_buffer) + ring_buffer_free(op_ring_buffer); + op_ring_buffer = NULL; +} + +#define RB_EVENT_HDR_SIZE 4 + +int alloc_cpu_buffers(void) +{ + int i; + + unsigned long buffer_size = oprofile_cpu_buffer_size; + unsigned long byte_size = buffer_size * (sizeof(struct op_sample) + + RB_EVENT_HDR_SIZE); + + op_ring_buffer = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS); + if (!op_ring_buffer) + goto fail; + + for_each_possible_cpu(i) { + struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i); + + b->last_task = NULL; + b->last_is_kernel = -1; + b->tracing = 0; + b->buffer_size = buffer_size; + b->sample_received = 0; + b->sample_lost_overflow = 0; + b->backtrace_aborted = 0; + b->sample_invalid_eip = 0; + b->cpu = i; + INIT_DELAYED_WORK(&b->work, wq_sync_buffer); + } + return 0; + +fail: + free_cpu_buffers(); + return -ENOMEM; +} + +void start_cpu_work(void) +{ + int i; + + work_enabled = 1; + + for_each_online_cpu(i) { + struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i); + + /* + * Spread the work by 1 jiffy per cpu so they dont all + * fire at once. + */ + schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i); + } +} + +void end_cpu_work(void) +{ + int i; + + work_enabled = 0; + + for_each_online_cpu(i) { + struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i); + + cancel_delayed_work(&b->work); + } + + flush_scheduled_work(); +} + +/* + * This function prepares the cpu buffer to write a sample. + * + * Struct op_entry is used during operations on the ring buffer while + * struct op_sample contains the data that is stored in the ring + * buffer. Struct entry can be uninitialized. The function reserves a + * data array that is specified by size. Use + * op_cpu_buffer_write_commit() after preparing the sample. In case of + * errors a null pointer is returned, otherwise the pointer to the + * sample. + * + */ +struct op_sample +*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size) +{ + entry->event = ring_buffer_lock_reserve + (op_ring_buffer, sizeof(struct op_sample) + + size * sizeof(entry->sample->data[0])); + if (!entry->event) + return NULL; + entry->sample = ring_buffer_event_data(entry->event); + entry->size = size; + entry->data = entry->sample->data; + + return entry->sample; +} + +int op_cpu_buffer_write_commit(struct op_entry *entry) +{ + return ring_buffer_unlock_commit(op_ring_buffer, entry->event); +} + +struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu) +{ + struct ring_buffer_event *e; + e = ring_buffer_consume(op_ring_buffer, cpu, NULL); + if (!e) + return NULL; + + entry->event = e; + entry->sample = ring_buffer_event_data(e); + entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample)) + / sizeof(entry->sample->data[0]); + entry->data = entry->sample->data; + return entry->sample; +} + +unsigned long op_cpu_buffer_entries(int cpu) +{ + return ring_buffer_entries_cpu(op_ring_buffer, cpu); +} + +static int +op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace, + int is_kernel, struct task_struct *task) +{ + struct op_entry entry; + struct op_sample *sample; + unsigned long flags; + int size; + + flags = 0; + + if (backtrace) + flags |= TRACE_BEGIN; + + /* notice a switch from user->kernel or vice versa */ + is_kernel = !!is_kernel; + if (cpu_buf->last_is_kernel != is_kernel) { + cpu_buf->last_is_kernel = is_kernel; + flags |= KERNEL_CTX_SWITCH; + if (is_kernel) + flags |= IS_KERNEL; + } + + /* notice a task switch */ + if (cpu_buf->last_task != task) { + cpu_buf->last_task = task; + flags |= USER_CTX_SWITCH; + } + + if (!flags) + /* nothing to do */ + return 0; + + if (flags & USER_CTX_SWITCH) + size = 1; + else + size = 0; + + sample = op_cpu_buffer_write_reserve(&entry, size); + if (!sample) + return -ENOMEM; + + sample->eip = ESCAPE_CODE; + sample->event = flags; + + if (size) + op_cpu_buffer_add_data(&entry, (unsigned long)task); + + op_cpu_buffer_write_commit(&entry); + + return 0; +} + +static inline int +op_add_sample(struct oprofile_cpu_buffer *cpu_buf, + unsigned long pc, unsigned long event) +{ + struct op_entry entry; + struct op_sample *sample; + + sample = op_cpu_buffer_write_reserve(&entry, 0); + if (!sample) + return -ENOMEM; + + sample->eip = pc; + sample->event = event; + + return op_cpu_buffer_write_commit(&entry); +} + +/* + * This must be safe from any context. + * + * is_kernel is needed because on some architectures you cannot + * tell if you are in kernel or user space simply by looking at + * pc. We tag this in the buffer by generating kernel enter/exit + * events whenever is_kernel changes + */ +static int +log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, + unsigned long backtrace, int is_kernel, unsigned long event) +{ + cpu_buf->sample_received++; + + if (pc == ESCAPE_CODE) { + cpu_buf->sample_invalid_eip++; + return 0; + } + + if (op_add_code(cpu_buf, backtrace, is_kernel, current)) + goto fail; + + if (op_add_sample(cpu_buf, pc, event)) + goto fail; + + return 1; + +fail: + cpu_buf->sample_lost_overflow++; + return 0; +} + +static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) +{ + cpu_buf->tracing = 1; +} + +static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) +{ + cpu_buf->tracing = 0; +} + +static inline void +__oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel) +{ + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + unsigned long backtrace = oprofile_backtrace_depth; + + /* + * if log_sample() fail we can't backtrace since we lost the + * source of this event + */ + if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event)) + /* failed */ + return; + + if (!backtrace) + return; + + oprofile_begin_trace(cpu_buf); + oprofile_ops.backtrace(regs, backtrace); + oprofile_end_trace(cpu_buf); +} + +void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel) +{ + __oprofile_add_ext_sample(pc, regs, event, is_kernel); +} + +void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) +{ + int is_kernel = !user_mode(regs); + unsigned long pc = profile_pc(regs); + + __oprofile_add_ext_sample(pc, regs, event, is_kernel); +} + +/* + * Add samples with data to the ring buffer. + * + * Use oprofile_add_data(&entry, val) to add data and + * oprofile_write_commit(&entry) to commit the sample. + */ +void +oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs, + unsigned long pc, int code, int size) +{ + struct op_sample *sample; + int is_kernel = !user_mode(regs); + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + + cpu_buf->sample_received++; + + /* no backtraces for samples with data */ + if (op_add_code(cpu_buf, 0, is_kernel, current)) + goto fail; + + sample = op_cpu_buffer_write_reserve(entry, size + 2); + if (!sample) + goto fail; + sample->eip = ESCAPE_CODE; + sample->event = 0; /* no flags */ + + op_cpu_buffer_add_data(entry, code); + op_cpu_buffer_add_data(entry, pc); + + return; + +fail: + entry->event = NULL; + cpu_buf->sample_lost_overflow++; +} + +int oprofile_add_data(struct op_entry *entry, unsigned long val) +{ + if (!entry->event) + return 0; + return op_cpu_buffer_add_data(entry, val); +} + +int oprofile_add_data64(struct op_entry *entry, u64 val) +{ + if (!entry->event) + return 0; + if (op_cpu_buffer_get_size(entry) < 2) + /* + * the function returns 0 to indicate a too small + * buffer, even if there is some space left + */ + return 0; + if (!op_cpu_buffer_add_data(entry, (u32)val)) + return 0; + return op_cpu_buffer_add_data(entry, (u32)(val >> 32)); +} + +int oprofile_write_commit(struct op_entry *entry) +{ + if (!entry->event) + return -EINVAL; + return op_cpu_buffer_write_commit(entry); +} + +void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) +{ + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + log_sample(cpu_buf, pc, 0, is_kernel, event); +} + +void oprofile_add_trace(unsigned long pc) +{ + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + + if (!cpu_buf->tracing) + return; + + /* + * broken frame can give an eip with the same value as an + * escape code, abort the trace if we get it + */ + if (pc == ESCAPE_CODE) + goto fail; + + if (op_add_sample(cpu_buf, pc, 0)) + goto fail; + + return; +fail: + cpu_buf->tracing = 0; + cpu_buf->backtrace_aborted++; + return; +} + +/* + * This serves to avoid cpu buffer overflow, and makes sure + * the task mortuary progresses + * + * By using schedule_delayed_work_on and then schedule_delayed_work + * we guarantee this will stay on the correct cpu + */ +static void wq_sync_buffer(struct work_struct *work) +{ + struct oprofile_cpu_buffer *b = + container_of(work, struct oprofile_cpu_buffer, work.work); + if (b->cpu != smp_processor_id()) { + printk(KERN_DEBUG "WQ on CPU%d, prefer CPU%d\n", + smp_processor_id(), b->cpu); + + if (!cpu_online(b->cpu)) { + cancel_delayed_work(&b->work); + return; + } + } + sync_buffer(b->cpu); + + /* don't re-add the work if we're shutting down */ + if (work_enabled) + schedule_delayed_work(&b->work, DEFAULT_TIMER_EXPIRE); +} diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h new file mode 100644 index 00000000000..68ea16ab645 --- /dev/null +++ b/drivers/oprofile/cpu_buffer.h @@ -0,0 +1,120 @@ +/** + * @file cpu_buffer.h + * + * @remark Copyright 2002-2009 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@movementarian.org> + * @author Robert Richter <robert.richter@amd.com> + */ + +#ifndef OPROFILE_CPU_BUFFER_H +#define OPROFILE_CPU_BUFFER_H + +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> +#include <linux/cache.h> +#include <linux/sched.h> +#include <linux/ring_buffer.h> + +struct task_struct; + +int alloc_cpu_buffers(void); +void free_cpu_buffers(void); + +void start_cpu_work(void); +void end_cpu_work(void); + +/* CPU buffer is composed of such entries (which are + * also used for context switch notes) + */ +struct op_sample { + unsigned long eip; + unsigned long event; + unsigned long data[0]; +}; + +struct op_entry; + +struct oprofile_cpu_buffer { + unsigned long buffer_size; + struct task_struct *last_task; + int last_is_kernel; + int tracing; + unsigned long sample_received; + unsigned long sample_lost_overflow; + unsigned long backtrace_aborted; + unsigned long sample_invalid_eip; + int cpu; + struct delayed_work work; +}; + +DECLARE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer); + +/* + * Resets the cpu buffer to a sane state. + * + * reset these to invalid values; the next sample collected will + * populate the buffer with proper values to initialize the buffer + */ +static inline void op_cpu_buffer_reset(int cpu) +{ + struct oprofile_cpu_buffer *cpu_buf = &per_cpu(op_cpu_buffer, cpu); + + cpu_buf->last_is_kernel = -1; + cpu_buf->last_task = NULL; +} + +/* + * op_cpu_buffer_add_data() and op_cpu_buffer_write_commit() may be + * called only if op_cpu_buffer_write_reserve() did not return NULL or + * entry->event != NULL, otherwise entry->size or entry->event will be + * used uninitialized. + */ + +struct op_sample +*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size); +int op_cpu_buffer_write_commit(struct op_entry *entry); +struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu); +unsigned long op_cpu_buffer_entries(int cpu); + +/* returns the remaining free size of data in the entry */ +static inline +int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val) +{ + if (!entry->size) + return 0; + *entry->data = val; + entry->size--; + entry->data++; + return entry->size; +} + +/* returns the size of data in the entry */ +static inline +int op_cpu_buffer_get_size(struct op_entry *entry) +{ + return entry->size; +} + +/* returns 0 if empty or the size of data including the current value */ +static inline +int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val) +{ + int size = entry->size; + if (!size) + return 0; + *val = *entry->data; + entry->size--; + entry->data++; + return size; +} + +/* extra data flags */ +#define KERNEL_CTX_SWITCH (1UL << 0) +#define IS_KERNEL (1UL << 1) +#define TRACE_BEGIN (1UL << 2) +#define USER_CTX_SWITCH (1UL << 3) + +#endif /* OPROFILE_CPU_BUFFER_H */ diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c new file mode 100644 index 00000000000..5df60a6b677 --- /dev/null +++ b/drivers/oprofile/event_buffer.c @@ -0,0 +1,208 @@ +/** + * @file event_buffer.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@movementarian.org> + * + * This is the global event buffer that the user-space + * daemon reads from. The event buffer is an untyped array + * of unsigned longs. Entries are prefixed by the + * escape value ESCAPE_CODE followed by an identifying code. + */ + +#include <linux/vmalloc.h> +#include <linux/oprofile.h> +#include <linux/sched.h> +#include <linux/capability.h> +#include <linux/dcookies.h> +#include <linux/fs.h> +#include <asm/uaccess.h> + +#include "oprof.h" +#include "event_buffer.h" +#include "oprofile_stats.h" + +DEFINE_MUTEX(buffer_mutex); + +static unsigned long buffer_opened; +static DECLARE_WAIT_QUEUE_HEAD(buffer_wait); +static unsigned long *event_buffer; +static unsigned long buffer_size; +static unsigned long buffer_watershed; +static size_t buffer_pos; +/* atomic_t because wait_event checks it outside of buffer_mutex */ +static atomic_t buffer_ready = ATOMIC_INIT(0); + +/* + * Add an entry to the event buffer. When we get near to the end we + * wake up the process sleeping on the read() of the file. To protect + * the event_buffer this function may only be called when buffer_mutex + * is set. + */ +void add_event_entry(unsigned long value) +{ + /* + * This shouldn't happen since all workqueues or handlers are + * canceled or flushed before the event buffer is freed. + */ + if (!event_buffer) { + WARN_ON_ONCE(1); + return; + } + + if (buffer_pos == buffer_size) { + atomic_inc(&oprofile_stats.event_lost_overflow); + return; + } + + event_buffer[buffer_pos] = value; + if (++buffer_pos == buffer_size - buffer_watershed) { + atomic_set(&buffer_ready, 1); + wake_up(&buffer_wait); + } +} + + +/* Wake up the waiting process if any. This happens + * on "echo 0 >/dev/oprofile/enable" so the daemon + * processes the data remaining in the event buffer. + */ +void wake_up_buffer_waiter(void) +{ + mutex_lock(&buffer_mutex); + atomic_set(&buffer_ready, 1); + wake_up(&buffer_wait); + mutex_unlock(&buffer_mutex); +} + + +int alloc_event_buffer(void) +{ + unsigned long flags; + + spin_lock_irqsave(&oprofilefs_lock, flags); + buffer_size = oprofile_buffer_size; + buffer_watershed = oprofile_buffer_watershed; + spin_unlock_irqrestore(&oprofilefs_lock, flags); + + if (buffer_watershed >= buffer_size) + return -EINVAL; + + buffer_pos = 0; + event_buffer = vmalloc(sizeof(unsigned long) * buffer_size); + if (!event_buffer) + return -ENOMEM; + + return 0; +} + + +void free_event_buffer(void) +{ + mutex_lock(&buffer_mutex); + vfree(event_buffer); + buffer_pos = 0; + event_buffer = NULL; + mutex_unlock(&buffer_mutex); +} + + +static int event_buffer_open(struct inode *inode, struct file *file) +{ + int err = -EPERM; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (test_and_set_bit_lock(0, &buffer_opened)) + return -EBUSY; + + /* Register as a user of dcookies + * to ensure they persist for the lifetime of + * the open event file + */ + err = -EINVAL; + file->private_data = dcookie_register(); + if (!file->private_data) + goto out; + + if ((err = oprofile_setup())) + goto fail; + + /* NB: the actual start happens from userspace + * echo 1 >/dev/oprofile/enable + */ + + return 0; + +fail: + dcookie_unregister(file->private_data); +out: + __clear_bit_unlock(0, &buffer_opened); + return err; +} + + +static int event_buffer_release(struct inode *inode, struct file *file) +{ + oprofile_stop(); + oprofile_shutdown(); + dcookie_unregister(file->private_data); + buffer_pos = 0; + atomic_set(&buffer_ready, 0); + __clear_bit_unlock(0, &buffer_opened); + return 0; +} + + +static ssize_t event_buffer_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + int retval = -EINVAL; + size_t const max = buffer_size * sizeof(unsigned long); + + /* handling partial reads is more trouble than it's worth */ + if (count != max || *offset) + return -EINVAL; + + wait_event_interruptible(buffer_wait, atomic_read(&buffer_ready)); + + if (signal_pending(current)) + return -EINTR; + + /* can't currently happen */ + if (!atomic_read(&buffer_ready)) + return -EAGAIN; + + mutex_lock(&buffer_mutex); + + /* May happen if the buffer is freed during pending reads. */ + if (!event_buffer) { + retval = -EINTR; + goto out; + } + + atomic_set(&buffer_ready, 0); + + retval = -EFAULT; + + count = buffer_pos * sizeof(unsigned long); + + if (copy_to_user(buf, event_buffer, count)) + goto out; + + retval = count; + buffer_pos = 0; + +out: + mutex_unlock(&buffer_mutex); + return retval; +} + +const struct file_operations event_buffer_fops = { + .open = event_buffer_open, + .release = event_buffer_release, + .read = event_buffer_read, +}; diff --git a/drivers/oprofile/event_buffer.h b/drivers/oprofile/event_buffer.h new file mode 100644 index 00000000000..4e70749f8d1 --- /dev/null +++ b/drivers/oprofile/event_buffer.h @@ -0,0 +1,40 @@ +/** + * @file event_buffer.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@movementarian.org> + */ + +#ifndef EVENT_BUFFER_H +#define EVENT_BUFFER_H + +#include <linux/types.h> +#include <asm/mutex.h> + +int alloc_event_buffer(void); + +void free_event_buffer(void); + +/** + * Add data to the event buffer. + * The data passed is free-form, but typically consists of + * file offsets, dcookies, context information, and ESCAPE codes. + */ +void add_event_entry(unsigned long data); + +/* wake up the process sleeping on the event file */ +void wake_up_buffer_waiter(void); + +#define INVALID_COOKIE ~0UL +#define NO_COOKIE 0UL + +extern const struct file_operations event_buffer_fops; + +/* mutex between sync_cpu_buffers() and the + * file reading code. + */ +extern struct mutex buffer_mutex; + +#endif /* EVENT_BUFFER_H */ diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c new file mode 100644 index 00000000000..dc8a0428260 --- /dev/null +++ b/drivers/oprofile/oprof.c @@ -0,0 +1,285 @@ +/** + * @file oprof.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@movementarian.org> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/oprofile.h> +#include <linux/moduleparam.h> +#include <linux/workqueue.h> +#include <linux/time.h> +#include <asm/mutex.h> + +#include "oprof.h" +#include "event_buffer.h" +#include "cpu_buffer.h" +#include "buffer_sync.h" +#include "oprofile_stats.h" + +struct oprofile_operations oprofile_ops; + +unsigned long oprofile_started; +unsigned long oprofile_backtrace_depth; +static unsigned long is_setup; +static DEFINE_MUTEX(start_mutex); + +/* timer + 0 - use performance monitoring hardware if available + 1 - use the timer int mechanism regardless + */ +static int timer = 0; + +int oprofile_setup(void) +{ + int err; + + mutex_lock(&start_mutex); + + if ((err = alloc_cpu_buffers())) + goto out; + + if ((err = alloc_event_buffer())) + goto out1; + + if (oprofile_ops.setup && (err = oprofile_ops.setup())) + goto out2; + + /* Note even though this starts part of the + * profiling overhead, it's necessary to prevent + * us missing task deaths and eventually oopsing + * when trying to process the event buffer. + */ + if (oprofile_ops.sync_start) { + int sync_ret = oprofile_ops.sync_start(); + switch (sync_ret) { + case 0: + goto post_sync; + case 1: + goto do_generic; + case -1: + goto out3; + default: + goto out3; + } + } +do_generic: + if ((err = sync_start())) + goto out3; + +post_sync: + is_setup = 1; + mutex_unlock(&start_mutex); + return 0; + +out3: + if (oprofile_ops.shutdown) + oprofile_ops.shutdown(); +out2: + free_event_buffer(); +out1: + free_cpu_buffers(); +out: + mutex_unlock(&start_mutex); + return err; +} + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void switch_worker(struct work_struct *work); +static DECLARE_DELAYED_WORK(switch_work, switch_worker); + +static void start_switch_worker(void) +{ + if (oprofile_ops.switch_events) + schedule_delayed_work(&switch_work, oprofile_time_slice); +} + +static void stop_switch_worker(void) +{ + cancel_delayed_work_sync(&switch_work); +} + +static void switch_worker(struct work_struct *work) +{ + if (oprofile_ops.switch_events()) + return; + + atomic_inc(&oprofile_stats.multiplex_counter); + start_switch_worker(); +} + +/* User inputs in ms, converts to jiffies */ +int oprofile_set_timeout(unsigned long val_msec) +{ + int err = 0; + unsigned long time_slice; + + mutex_lock(&start_mutex); + + if (oprofile_started) { + err = -EBUSY; + goto out; + } + + if (!oprofile_ops.switch_events) { + err = -EINVAL; + goto out; + } + + time_slice = msecs_to_jiffies(val_msec); + if (time_slice == MAX_JIFFY_OFFSET) { + err = -EINVAL; + goto out; + } + + oprofile_time_slice = time_slice; + +out: + mutex_unlock(&start_mutex); + return err; + +} + +#else + +static inline void start_switch_worker(void) { } +static inline void stop_switch_worker(void) { } + +#endif + +/* Actually start profiling (echo 1>/dev/oprofile/enable) */ +int oprofile_start(void) +{ + int err = -EINVAL; + + mutex_lock(&start_mutex); + + if (!is_setup) + goto out; + + err = 0; + + if (oprofile_started) + goto out; + + oprofile_reset_stats(); + + if ((err = oprofile_ops.start())) + goto out; + + start_switch_worker(); + + oprofile_started = 1; +out: + mutex_unlock(&start_mutex); + return err; +} + + +/* echo 0>/dev/oprofile/enable */ +void oprofile_stop(void) +{ + mutex_lock(&start_mutex); + if (!oprofile_started) + goto out; + oprofile_ops.stop(); + oprofile_started = 0; + + stop_switch_worker(); + + /* wake up the daemon to read what remains */ + wake_up_buffer_waiter(); +out: + mutex_unlock(&start_mutex); +} + + +void oprofile_shutdown(void) +{ + mutex_lock(&start_mutex); + if (oprofile_ops.sync_stop) { + int sync_ret = oprofile_ops.sync_stop(); + switch (sync_ret) { + case 0: + goto post_sync; + case 1: + goto do_generic; + default: + goto post_sync; + } + } +do_generic: + sync_stop(); +post_sync: + if (oprofile_ops.shutdown) + oprofile_ops.shutdown(); + is_setup = 0; + free_event_buffer(); + free_cpu_buffers(); + mutex_unlock(&start_mutex); +} + +int oprofile_set_backtrace(unsigned long val) +{ + int err = 0; + + mutex_lock(&start_mutex); + + if (oprofile_started) { + err = -EBUSY; + goto out; + } + + if (!oprofile_ops.backtrace) { + err = -EINVAL; + goto out; + } + + oprofile_backtrace_depth = val; + +out: + mutex_unlock(&start_mutex); + return err; +} + +static int __init oprofile_init(void) +{ + int err; + + err = oprofile_arch_init(&oprofile_ops); + + if (err < 0 || timer) { + printk(KERN_INFO "oprofile: using timer interrupt.\n"); + oprofile_timer_init(&oprofile_ops); + } + + err = oprofilefs_register(); + if (err) + oprofile_arch_exit(); + + return err; +} + + +static void __exit oprofile_exit(void) +{ + oprofilefs_unregister(); + oprofile_arch_exit(); +} + + +module_init(oprofile_init); +module_exit(oprofile_exit); + +module_param_named(timer, timer, int, 0644); +MODULE_PARM_DESC(timer, "force use of timer interrupt"); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("John Levon <levon@movementarian.org>"); +MODULE_DESCRIPTION("OProfile system profiler"); diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h new file mode 100644 index 00000000000..cb92f5c98c1 --- /dev/null +++ b/drivers/oprofile/oprof.h @@ -0,0 +1,42 @@ +/** + * @file oprof.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@movementarian.org> + */ + +#ifndef OPROF_H +#define OPROF_H + +int oprofile_setup(void); +void oprofile_shutdown(void); + +int oprofilefs_register(void); +void oprofilefs_unregister(void); + +int oprofile_start(void); +void oprofile_stop(void); + +struct oprofile_operations; + +extern unsigned long oprofile_buffer_size; +extern unsigned long oprofile_cpu_buffer_size; +extern unsigned long oprofile_buffer_watershed; +extern unsigned long oprofile_time_slice; + +extern struct oprofile_operations oprofile_ops; +extern unsigned long oprofile_started; +extern unsigned long oprofile_backtrace_depth; + +struct super_block; +struct dentry; + +void oprofile_create_files(struct super_block *sb, struct dentry *root); +void oprofile_timer_init(struct oprofile_operations *ops); + +int oprofile_set_backtrace(unsigned long depth); +int oprofile_set_timeout(unsigned long time); + +#endif /* OPROF_H */ diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c new file mode 100644 index 00000000000..bbd7516e086 --- /dev/null +++ b/drivers/oprofile/oprofile_files.c @@ -0,0 +1,191 @@ +/** + * @file oprofile_files.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@movementarian.org> + */ + +#include <linux/fs.h> +#include <linux/oprofile.h> +#include <linux/jiffies.h> + +#include "event_buffer.h" +#include "oprofile_stats.h" +#include "oprof.h" + +#define BUFFER_SIZE_DEFAULT 131072 +#define CPU_BUFFER_SIZE_DEFAULT 8192 +#define BUFFER_WATERSHED_DEFAULT 32768 /* FIXME: tune */ +#define TIME_SLICE_DEFAULT 1 + +unsigned long oprofile_buffer_size; +unsigned long oprofile_cpu_buffer_size; +unsigned long oprofile_buffer_watershed; +unsigned long oprofile_time_slice; + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static ssize_t timeout_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(jiffies_to_msecs(oprofile_time_slice), + buf, count, offset); +} + + +static ssize_t timeout_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + retval = oprofile_set_timeout(val); + + if (retval) + return retval; + return count; +} + + +static const struct file_operations timeout_fops = { + .read = timeout_read, + .write = timeout_write, +}; + +#endif + + +static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(oprofile_backtrace_depth, buf, count, + offset); +} + + +static ssize_t depth_write(struct file *file, char const __user *buf, size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + retval = oprofile_set_backtrace(val); + + if (retval) + return retval; + return count; +} + + +static const struct file_operations depth_fops = { + .read = depth_read, + .write = depth_write +}; + + +static ssize_t pointer_size_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(sizeof(void *), buf, count, offset); +} + + +static const struct file_operations pointer_size_fops = { + .read = pointer_size_read, +}; + + +static ssize_t cpu_type_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + return oprofilefs_str_to_user(oprofile_ops.cpu_type, buf, count, offset); +} + + +static const struct file_operations cpu_type_fops = { + .read = cpu_type_read, +}; + + +static ssize_t enable_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(oprofile_started, buf, count, offset); +} + + +static ssize_t enable_write(struct file *file, char const __user *buf, size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val) + retval = oprofile_start(); + else + oprofile_stop(); + + if (retval) + return retval; + return count; +} + + +static const struct file_operations enable_fops = { + .read = enable_read, + .write = enable_write, +}; + + +static ssize_t dump_write(struct file *file, char const __user *buf, size_t count, loff_t *offset) +{ + wake_up_buffer_waiter(); + return count; +} + + +static const struct file_operations dump_fops = { + .write = dump_write, +}; + +void oprofile_create_files(struct super_block *sb, struct dentry *root) +{ + /* reinitialize default values */ + oprofile_buffer_size = BUFFER_SIZE_DEFAULT; + oprofile_cpu_buffer_size = CPU_BUFFER_SIZE_DEFAULT; + oprofile_buffer_watershed = BUFFER_WATERSHED_DEFAULT; + oprofile_time_slice = msecs_to_jiffies(TIME_SLICE_DEFAULT); + + oprofilefs_create_file(sb, root, "enable", &enable_fops); + oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); + oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); + oprofilefs_create_ulong(sb, root, "buffer_size", &oprofile_buffer_size); + oprofilefs_create_ulong(sb, root, "buffer_watershed", &oprofile_buffer_watershed); + oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &oprofile_cpu_buffer_size); + oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); + oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); + oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + oprofilefs_create_file(sb, root, "time_slice", &timeout_fops); +#endif + oprofile_create_stats_files(sb, root); + if (oprofile_ops.create_files) + oprofile_ops.create_files(sb, root); +} diff --git a/drivers/oprofile/oprofile_stats.c b/drivers/oprofile/oprofile_stats.c new file mode 100644 index 00000000000..917d28ebeac --- /dev/null +++ b/drivers/oprofile/oprofile_stats.c @@ -0,0 +1,84 @@ +/** + * @file oprofile_stats.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#include <linux/oprofile.h> +#include <linux/smp.h> +#include <linux/cpumask.h> +#include <linux/threads.h> + +#include "oprofile_stats.h" +#include "cpu_buffer.h" + +struct oprofile_stat_struct oprofile_stats; + +void oprofile_reset_stats(void) +{ + struct oprofile_cpu_buffer *cpu_buf; + int i; + + for_each_possible_cpu(i) { + cpu_buf = &per_cpu(op_cpu_buffer, i); + cpu_buf->sample_received = 0; + cpu_buf->sample_lost_overflow = 0; + cpu_buf->backtrace_aborted = 0; + cpu_buf->sample_invalid_eip = 0; + } + + atomic_set(&oprofile_stats.sample_lost_no_mm, 0); + atomic_set(&oprofile_stats.sample_lost_no_mapping, 0); + atomic_set(&oprofile_stats.event_lost_overflow, 0); + atomic_set(&oprofile_stats.bt_lost_no_mapping, 0); + atomic_set(&oprofile_stats.multiplex_counter, 0); +} + + +void oprofile_create_stats_files(struct super_block *sb, struct dentry *root) +{ + struct oprofile_cpu_buffer *cpu_buf; + struct dentry *cpudir; + struct dentry *dir; + char buf[10]; + int i; + + dir = oprofilefs_mkdir(sb, root, "stats"); + if (!dir) + return; + + for_each_possible_cpu(i) { + cpu_buf = &per_cpu(op_cpu_buffer, i); + snprintf(buf, 10, "cpu%d", i); + cpudir = oprofilefs_mkdir(sb, dir, buf); + + /* Strictly speaking access to these ulongs is racy, + * but we can't simply lock them, and they are + * informational only. + */ + oprofilefs_create_ro_ulong(sb, cpudir, "sample_received", + &cpu_buf->sample_received); + oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_overflow", + &cpu_buf->sample_lost_overflow); + oprofilefs_create_ro_ulong(sb, cpudir, "backtrace_aborted", + &cpu_buf->backtrace_aborted); + oprofilefs_create_ro_ulong(sb, cpudir, "sample_invalid_eip", + &cpu_buf->sample_invalid_eip); + } + + oprofilefs_create_ro_atomic(sb, dir, "sample_lost_no_mm", + &oprofile_stats.sample_lost_no_mm); + oprofilefs_create_ro_atomic(sb, dir, "sample_lost_no_mapping", + &oprofile_stats.sample_lost_no_mapping); + oprofilefs_create_ro_atomic(sb, dir, "event_lost_overflow", + &oprofile_stats.event_lost_overflow); + oprofilefs_create_ro_atomic(sb, dir, "bt_lost_no_mapping", + &oprofile_stats.bt_lost_no_mapping); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + oprofilefs_create_ro_atomic(sb, dir, "multiplex_counter", + &oprofile_stats.multiplex_counter); +#endif +} diff --git a/drivers/oprofile/oprofile_stats.h b/drivers/oprofile/oprofile_stats.h new file mode 100644 index 00000000000..0b54e46c3c1 --- /dev/null +++ b/drivers/oprofile/oprofile_stats.h @@ -0,0 +1,34 @@ +/** + * @file oprofile_stats.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#ifndef OPROFILE_STATS_H +#define OPROFILE_STATS_H + +#include <asm/atomic.h> + +struct oprofile_stat_struct { + atomic_t sample_lost_no_mm; + atomic_t sample_lost_no_mapping; + atomic_t bt_lost_no_mapping; + atomic_t event_lost_overflow; + atomic_t multiplex_counter; +}; + +extern struct oprofile_stat_struct oprofile_stats; + +/* reset all stats to zero */ +void oprofile_reset_stats(void); + +struct super_block; +struct dentry; + +/* create the stats/ dir */ +void oprofile_create_stats_files(struct super_block *sb, struct dentry *root); + +#endif /* OPROFILE_STATS_H */ diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c new file mode 100644 index 00000000000..2766a6d3c2e --- /dev/null +++ b/drivers/oprofile/oprofilefs.c @@ -0,0 +1,296 @@ +/** + * @file oprofilefs.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * A simple filesystem for configuration and + * access of oprofile. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/oprofile.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <asm/uaccess.h> + +#include "oprof.h" + +#define OPROFILEFS_MAGIC 0x6f70726f + +DEFINE_SPINLOCK(oprofilefs_lock); + +static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode) +{ + struct inode *inode = new_inode(sb); + + if (inode) { + inode->i_mode = mode; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + } + return inode; +} + + +static const struct super_operations s_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +}; + + +ssize_t oprofilefs_str_to_user(char const *str, char __user *buf, size_t count, loff_t *offset) +{ + return simple_read_from_buffer(buf, count, offset, str, strlen(str)); +} + + +#define TMPBUFSIZE 50 + +ssize_t oprofilefs_ulong_to_user(unsigned long val, char __user *buf, size_t count, loff_t *offset) +{ + char tmpbuf[TMPBUFSIZE]; + size_t maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%lu\n", val); + if (maxlen > TMPBUFSIZE) + maxlen = TMPBUFSIZE; + return simple_read_from_buffer(buf, count, offset, tmpbuf, maxlen); +} + + +int oprofilefs_ulong_from_user(unsigned long *val, char const __user *buf, size_t count) +{ + char tmpbuf[TMPBUFSIZE]; + unsigned long flags; + + if (!count) + return 0; + + if (count > TMPBUFSIZE - 1) + return -EINVAL; + + memset(tmpbuf, 0x0, TMPBUFSIZE); + + if (copy_from_user(tmpbuf, buf, count)) + return -EFAULT; + + spin_lock_irqsave(&oprofilefs_lock, flags); + *val = simple_strtoul(tmpbuf, NULL, 0); + spin_unlock_irqrestore(&oprofilefs_lock, flags); + return 0; +} + + +static ssize_t ulong_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + unsigned long *val = file->private_data; + return oprofilefs_ulong_to_user(*val, buf, count, offset); +} + + +static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset) +{ + unsigned long *value = file->private_data; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(value, buf, count); + + if (retval) + return retval; + return count; +} + + +static int default_open(struct inode *inode, struct file *filp) +{ + if (inode->i_private) + filp->private_data = inode->i_private; + return 0; +} + + +static const struct file_operations ulong_fops = { + .read = ulong_read_file, + .write = ulong_write_file, + .open = default_open, +}; + + +static const struct file_operations ulong_ro_fops = { + .read = ulong_read_file, + .open = default_open, +}; + + +static struct dentry *__oprofilefs_create_file(struct super_block *sb, + struct dentry *root, char const *name, const struct file_operations *fops, + int perm) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(root, name); + if (!dentry) + return NULL; + inode = oprofilefs_get_inode(sb, S_IFREG | perm); + if (!inode) { + dput(dentry); + return NULL; + } + inode->i_fop = fops; + d_add(dentry, inode); + return dentry; +} + + +int oprofilefs_create_ulong(struct super_block *sb, struct dentry *root, + char const *name, unsigned long *val) +{ + struct dentry *d = __oprofilefs_create_file(sb, root, name, + &ulong_fops, 0644); + if (!d) + return -EFAULT; + + d->d_inode->i_private = val; + return 0; +} + + +int oprofilefs_create_ro_ulong(struct super_block *sb, struct dentry *root, + char const *name, unsigned long *val) +{ + struct dentry *d = __oprofilefs_create_file(sb, root, name, + &ulong_ro_fops, 0444); + if (!d) + return -EFAULT; + + d->d_inode->i_private = val; + return 0; +} + + +static ssize_t atomic_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + atomic_t *val = file->private_data; + return oprofilefs_ulong_to_user(atomic_read(val), buf, count, offset); +} + + +static const struct file_operations atomic_ro_fops = { + .read = atomic_read_file, + .open = default_open, +}; + + +int oprofilefs_create_ro_atomic(struct super_block *sb, struct dentry *root, + char const *name, atomic_t *val) +{ + struct dentry *d = __oprofilefs_create_file(sb, root, name, + &atomic_ro_fops, 0444); + if (!d) + return -EFAULT; + + d->d_inode->i_private = val; + return 0; +} + + +int oprofilefs_create_file(struct super_block *sb, struct dentry *root, + char const *name, const struct file_operations *fops) +{ + if (!__oprofilefs_create_file(sb, root, name, fops, 0644)) + return -EFAULT; + return 0; +} + + +int oprofilefs_create_file_perm(struct super_block *sb, struct dentry *root, + char const *name, const struct file_operations *fops, int perm) +{ + if (!__oprofilefs_create_file(sb, root, name, fops, perm)) + return -EFAULT; + return 0; +} + + +struct dentry *oprofilefs_mkdir(struct super_block *sb, + struct dentry *root, char const *name) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(root, name); + if (!dentry) + return NULL; + inode = oprofilefs_get_inode(sb, S_IFDIR | 0755); + if (!inode) { + dput(dentry); + return NULL; + } + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + d_add(dentry, inode); + return dentry; +} + + +static int oprofilefs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *root_inode; + struct dentry *root_dentry; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = OPROFILEFS_MAGIC; + sb->s_op = &s_ops; + sb->s_time_gran = 1; + + root_inode = oprofilefs_get_inode(sb, S_IFDIR | 0755); + if (!root_inode) + return -ENOMEM; + root_inode->i_op = &simple_dir_inode_operations; + root_inode->i_fop = &simple_dir_operations; + root_dentry = d_alloc_root(root_inode); + if (!root_dentry) { + iput(root_inode); + return -ENOMEM; + } + + sb->s_root = root_dentry; + + oprofile_create_files(sb, root_dentry); + + // FIXME: verify kill_litter_super removes our dentries + return 0; +} + + +static int oprofilefs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) +{ + return get_sb_single(fs_type, flags, data, oprofilefs_fill_super, mnt); +} + + +static struct file_system_type oprofilefs_type = { + .owner = THIS_MODULE, + .name = "oprofilefs", + .get_sb = oprofilefs_get_sb, + .kill_sb = kill_litter_super, +}; + + +int __init oprofilefs_register(void) +{ + return register_filesystem(&oprofilefs_type); +} + + +void __exit oprofilefs_unregister(void) +{ + unregister_filesystem(&oprofilefs_type); +} diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c new file mode 100644 index 00000000000..333f915568c --- /dev/null +++ b/drivers/oprofile/timer_int.c @@ -0,0 +1,46 @@ +/** + * @file timer_int.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@movementarian.org> + */ + +#include <linux/kernel.h> +#include <linux/notifier.h> +#include <linux/smp.h> +#include <linux/oprofile.h> +#include <linux/profile.h> +#include <linux/init.h> +#include <asm/ptrace.h> + +#include "oprof.h" + +static int timer_notify(struct pt_regs *regs) +{ + oprofile_add_sample(regs, 0); + return 0; +} + +static int timer_start(void) +{ + return register_timer_hook(timer_notify); +} + + +static void timer_stop(void) +{ + unregister_timer_hook(timer_notify); +} + + +void __init oprofile_timer_init(struct oprofile_operations *ops) +{ + ops->create_files = NULL; + ops->setup = NULL; + ops->shutdown = NULL; + ops->start = timer_start; + ops->stop = timer_stop; + ops->cpu_type = "timer"; +} |