From c530665c31c0140b74ca7689e7f836177796e5bd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 3 Mar 2010 07:16:16 +0100 Subject: perf: Take a hot regs snapshot for trace events We are taking a wrong regs snapshot when a trace event triggers. Either we use get_irq_regs(), which gives us the interrupted registers if we are in an interrupt, or we use task_pt_regs() which gives us the state before we entered the kernel, assuming we are lucky enough to be no kernel thread, in which case task_pt_regs() returns the initial set of regs when the kernel thread was started. What we want is different. We need a hot snapshot of the regs, so that we can get the instruction pointer to record in the sample, the frame pointer for the callchain, and some other things. Let's use the new perf_fetch_caller_regs() for that. Comparison with perf record -e lock: -R -a -f -g Before: perf [kernel] [k] __do_softirq | --- __do_softirq | |--55.16%-- __open | --44.84%-- __write_nocancel After: perf [kernel] [k] perf_tp_event | --- perf_tp_event | |--41.07%-- lock_acquire | | | |--39.36%-- _raw_spin_lock | | | | | |--7.81%-- hrtimer_interrupt | | | smp_apic_timer_interrupt | | | apic_timer_interrupt The old case was producing unreliable callchains. Now having right frame and instruction pointers, we have the trace we want. Also syscalls and kprobe events already have the right regs, let's use them instead of wasting a retrieval. v2: Follow the rename perf_save_regs() -> perf_fetch_caller_regs() Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: Jason Baron Cc: Archs --- include/linux/ftrace_event.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux/ftrace_event.h') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 6b7c444ab8f..ac424f18ce6 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -187,6 +187,9 @@ do { \ #ifdef CONFIG_PERF_EVENTS struct perf_event; + +DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); + extern int ftrace_profile_enable(int event_id); extern void ftrace_profile_disable(int event_id); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, @@ -198,11 +201,11 @@ ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp, static inline void ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr, - u64 count, unsigned long irq_flags) + u64 count, unsigned long irq_flags, struct pt_regs *regs) { struct trace_entry *entry = raw_data; - perf_tp_event(entry->type, addr, count, raw_data, size); + perf_tp_event(entry->type, addr, count, raw_data, size, regs); perf_swevent_put_recursion_context(rctx); local_irq_restore(irq_flags); } -- cgit From 97d5a22005f38057b4bc0d95f81cd26510268794 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 5 Mar 2010 05:35:37 +0100 Subject: perf: Drop the obsolete profile naming for trace events Drop the obsolete "profile" naming used by perf for trace events. Perf can now do more than simple events counting, so generalize the API naming. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Cc: Masami Hiramatsu Cc: Jason Baron --- include/linux/ftrace_event.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux/ftrace_event.h') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index ac424f18ce6..c0f4b364c71 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -131,12 +131,12 @@ struct ftrace_event_call { void *mod; void *data; - int profile_count; - int (*profile_enable)(struct ftrace_event_call *); - void (*profile_disable)(struct ftrace_event_call *); + int perf_refcount; + int (*perf_event_enable)(struct ftrace_event_call *); + void (*perf_event_disable)(struct ftrace_event_call *); }; -#define FTRACE_MAX_PROFILE_SIZE 2048 +#define PERF_MAX_TRACE_SIZE 2048 #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ @@ -190,17 +190,17 @@ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); -extern int ftrace_profile_enable(int event_id); -extern void ftrace_profile_disable(int event_id); +extern int perf_trace_enable(int event_id); +extern void perf_trace_disable(int event_id); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); extern void * -ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp, +perf_trace_buf_prepare(int size, unsigned short type, int *rctxp, unsigned long *irq_flags); static inline void -ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr, +perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, u64 count, unsigned long irq_flags, struct pt_regs *regs) { struct trace_entry *entry = raw_data; -- cgit