diff options
-rw-r--r-- | arch/powerpc/include/asm/hw_irq.h | 1 | ||||
-rw-r--r-- | arch/powerpc/include/asm/perf_counter.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/perf_counter.c | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_counter.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_32.h | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 115 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 56 | ||||
-rw-r--r-- | include/asm-generic/kmap_types.h | 5 | ||||
-rw-r--r-- | include/linux/mm.h | 6 | ||||
-rw-r--r-- | include/linux/perf_counter.h | 1 | ||||
-rw-r--r-- | kernel/perf_counter.c | 38 | ||||
-rw-r--r-- | tools/perf/Makefile | 2 | ||||
-rw-r--r-- | tools/perf/builtin-annotate.c | 200 | ||||
-rw-r--r-- | tools/perf/builtin-record.c | 24 | ||||
-rw-r--r-- | tools/perf/builtin-report.c | 87 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 302 | ||||
-rw-r--r-- | tools/perf/util/parse-events.c | 4 | ||||
-rw-r--r-- | tools/perf/util/symbol.h | 1 |
18 files changed, 679 insertions, 182 deletions
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index b7f8f4a87cc..10a642df014 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -162,7 +162,6 @@ static inline unsigned long test_perf_counter_pending(void) return 0; } -static inline void set_perf_counter_pending(void) {} static inline void clear_perf_counter_pending(void) {} #endif /* CONFIG_PERF_COUNTERS */ diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index cc7c887705b..b398a84edce 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -10,6 +10,8 @@ */ #include <linux/types.h> +#include <asm/hw_irq.h> + #define MAX_HWCOUNTERS 8 #define MAX_EVENT_ALTERNATIVES 8 #define MAX_LIMITED_HWCOUNTERS 2 diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index bb202388170..e6dc1850191 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -913,6 +913,8 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) case PERF_TYPE_RAW: ev = counter->attr.config; break; + default: + return ERR_PTR(-EINVAL); } counter->hw.config_base = ev; counter->hw.idx = 0; @@ -1013,7 +1015,7 @@ static void record_and_restart(struct perf_counter *counter, long val, u64 period = counter->hw.sample_period; s64 prev, delta, left; int record = 0; - u64 addr, mmcra, sdsync; + u64 mmcra, sdsync; /* we don't have to worry about interrupts here */ prev = atomic64_read(&counter->hw.prev_count); diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 876ed97147b..5fb33e160ea 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,11 +84,6 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -extern void set_perf_counter_pending(void); - -#define clear_perf_counter_pending() do { } while (0) -#define test_perf_counter_pending() (0) - #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(void); diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 31bd120cf2a..01fd9461d32 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); #endif #if defined(CONFIG_HIGHPTE) +#define __KM_PTE \ + (in_nmi() ? KM_NMI_PTE : \ + in_irq() ? KM_IRQ_PTE : \ + KM_PTE0) #define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ pte_index((address))) #define pte_offset_map_nested(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ pte_index((address))) -#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) +#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) #else #define pte_offset_map(dir, address) \ diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 275bc142cd5..e8c68a5091d 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -19,6 +19,7 @@ #include <linux/kdebug.h> #include <linux/sched.h> #include <linux/uaccess.h> +#include <linux/highmem.h> #include <asm/apic.h> #include <asm/stacktrace.h> @@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event) return event & CORE_EVNTSEL_MASK; } -static const u64 amd_0f_hw_cache_event_ids +static const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { [ C(L1D) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ [ C(RESULT_MISS) ] = 0, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ + [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ }, }, [ C(L1I ) ] = { @@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids [ C(RESULT_MISS) ] = -1, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ [ C(RESULT_MISS) ] = 0, }, }, [ C(LL ) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ + [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ [ C(RESULT_MISS) ] = 0, }, [ C(OP_PREFETCH) ] = { @@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids }, [ C(DTLB) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0, @@ -1459,18 +1460,16 @@ static int intel_pmu_init(void) static int amd_pmu_init(void) { + /* Performance-monitoring supported from K7 and later: */ + if (boot_cpu_data.x86 < 6) + return -ENODEV; + x86_pmu = amd_pmu; - switch (boot_cpu_data.x86) { - case 0x0f: - case 0x10: - case 0x11: - memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); + /* Events are common for all AMDs */ + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); - pr_cont("AMD Family 0f/10/11 events, "); - break; - } return 0; } @@ -1577,8 +1576,8 @@ static void backtrace_warning(void *data, char *msg) static int backtrace_stack(void *data, char *name) { - /* Don't bother with IRQ stacks for now */ - return -1; + /* Process all stacks: */ + return 0; } static void backtrace_address(void *data, unsigned long addr, int reliable) @@ -1596,6 +1595,8 @@ static const struct stacktrace_ops backtrace_ops = { .address = backtrace_address, }; +#include "../dumpstack.h" + static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { @@ -1603,40 +1604,62 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) char *stack; int nr = entry->nr; - callchain_store(entry, instruction_pointer(regs)); + callchain_store(entry, regs->ip); stack = ((char *)regs + sizeof(struct pt_regs)); #ifdef CONFIG_FRAME_POINTER - bp = frame_pointer(regs); + get_bp(bp); #else bp = 0; #endif - dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); + dump_trace(NULL, regs, (void *)&stack, bp, &backtrace_ops, entry); entry->kernel = entry->nr - nr; } +/* + * best effort, GUP based copy_from_user() that assumes IRQ or NMI context + */ +static unsigned long +copy_from_user_nmi(void *to, const void __user *from, unsigned long n) +{ + unsigned long offset, addr = (unsigned long)from; + int type = in_nmi() ? KM_NMI : KM_IRQ0; + unsigned long size, len = 0; + struct page *page; + void *map; + int ret; -struct stack_frame { - const void __user *next_fp; - unsigned long return_address; -}; + do { + ret = __get_user_pages_fast(addr, 1, 0, &page); + if (!ret) + break; + + offset = addr & (PAGE_SIZE - 1); + size = min(PAGE_SIZE - offset, n - len); + + map = kmap_atomic(page, type); + memcpy(to, map+offset, size); + kunmap_atomic(map, type); + put_page(page); + + len += size; + to += size; + addr += size; + + } while (len < n); + + return len; +} static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) { - int ret; + unsigned long bytes; - if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) - return 0; - - ret = 1; - pagefault_disable(); - if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) - ret = 0; - pagefault_enable(); + bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); - return ret; + return bytes == sizeof(*frame); } static void @@ -1646,23 +1669,25 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) const void __user *fp; int nr = entry->nr; - regs = (struct pt_regs *)current->thread.sp0 - 1; - fp = (void __user *)regs->bp; + if (!user_mode(regs)) + regs = task_pt_regs(current); + + fp = (void __user *)regs->bp; callchain_store(entry, regs->ip); while (entry->nr < MAX_STACK_DEPTH) { - frame.next_fp = NULL; + frame.next_frame = NULL; frame.return_address = 0; if (!copy_stack_frame(fp, &frame)) break; - if ((unsigned long)fp < user_stack_pointer(regs)) + if ((unsigned long)fp < regs->sp) break; callchain_store(entry, frame.return_address); - fp = frame.next_fp; + fp = frame.next_frame; } entry->user = entry->nr - nr; diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 6340cef6798..697d5727c11 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +/* + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall + * back to the regular GUP. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + unsigned long flags; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + (void __user *)start, len))) + return 0; + + /* + * XXX: batch / limit 'nr', to avoid large irq off latency + * needs some instrumenting to determine the common sizes used by + * important workloads (eg. DB2), and whether limiting the batch size + * will decrease performance. + * + * It seems like we're in the clear for the moment. Direct-IO is + * the main guy that batches up lots of get_user_pages, and even + * they are limited to 64-at-a-time which is not so many. + */ + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables and pages from being freed on x86. + * + * So long as we atomically load page table pointers versus teardown + * (which we do on x86, with the above PAE exception), we can follow the + * address down to the the page and take a ref on it. + */ + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address diff --git a/include/asm-generic/kmap_types.h b/include/asm-generic/kmap_types.h index 54e8b3d956b..eddbce0f9fb 100644 --- a/include/asm-generic/kmap_types.h +++ b/include/asm-generic/kmap_types.h @@ -24,7 +24,10 @@ D(12) KM_SOFTIRQ1, D(13) KM_SYNC_ICACHE, D(14) KM_SYNC_DCACHE, D(15) KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */ -D(16) KM_TYPE_NR +D(16) KM_IRQ_PTE, +D(17) KM_NMI, +D(18) KM_NMI_PTE, +D(19) KM_TYPE_NR }; #undef D diff --git a/include/linux/mm.h b/include/linux/mm.h index d88d6fc530a..cf260d848eb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -854,6 +854,12 @@ extern int mprotect_fixup(struct vm_area_struct *vma, unsigned long end, unsigned long newflags); /* + * doesn't attempt to fault and will return short. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages); + +/* * A callback you can register to apply pressure to ageable caches. * * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1b3118a1023..eccae437fe3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -604,6 +604,7 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_free_task(struct task_struct *task); +extern void set_perf_counter_pending(void); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void __perf_disable(void); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 29b685f551a..109a9572385 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1283,7 +1283,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) if (!interrupts) { perf_disable(); counter->pmu->disable(counter); - atomic_set(&hwc->period_left, 0); + atomic64_set(&hwc->period_left, 0); counter->pmu->enable(counter); perf_enable(); } @@ -1553,7 +1553,7 @@ static int perf_release(struct inode *inode, struct file *file) static ssize_t perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) { - u64 values[3]; + u64 values[4]; int n; /* @@ -1620,22 +1620,6 @@ static void perf_counter_reset(struct perf_counter *counter) perf_counter_update_userpage(counter); } -static void perf_counter_for_each_sibling(struct perf_counter *counter, - void (*func)(struct perf_counter *)) -{ - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *sibling; - - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); - counter = counter->group_leader; - - func(counter); - list_for_each_entry(sibling, &counter->sibling_list, list_entry) - func(sibling); - mutex_unlock(&ctx->mutex); -} - /* * Holding the top-level counter's child_mutex means that any * descendant process that has inherited this counter will block @@ -1658,14 +1642,18 @@ static void perf_counter_for_each_child(struct perf_counter *counter, static void perf_counter_for_each(struct perf_counter *counter, void (*func)(struct perf_counter *)) { - struct perf_counter *child; + struct perf_counter_context *ctx = counter->ctx; + struct perf_counter *sibling; - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->child_mutex); - perf_counter_for_each_sibling(counter, func); - list_for_each_entry(child, &counter->child_list, child_list) - perf_counter_for_each_sibling(child, func); - mutex_unlock(&counter->child_mutex); + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + counter = counter->group_leader; + + perf_counter_for_each_child(counter, func); + func(counter); + list_for_each_entry(sibling, &counter->sibling_list, list_entry) + perf_counter_for_each_child(counter, func); + mutex_unlock(&ctx->mutex); } static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0cbd5d6874e..e8346f95fbb 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -160,7 +160,7 @@ uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') # CFLAGS and LDFLAGS are for the users to override from the command line. CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6 -LDFLAGS = -lpthread -lrt -lelf +LDFLAGS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index b1ed5f766cb..94cea678fd7 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -25,6 +25,10 @@ #define SHOW_USER 2 #define SHOW_HV 4 +#define MIN_GREEN 0.5 +#define MIN_RED 5.0 + + static char const *input_name = "perf.data"; static char *vmlinux = "vmlinux"; @@ -39,6 +43,8 @@ static int dump_trace = 0; static int verbose; +static int print_line; + static unsigned long page_size; static unsigned long mmap_window = 32; @@ -84,6 +90,13 @@ typedef union event_union { struct period_event period; } event_t; + +struct sym_ext { + struct rb_node node; + double percent; + char *path; +}; + static LIST_HEAD(dsos); static struct dso *kernel_dso; static struct dso *vdso; @@ -1030,10 +1043,30 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } +static char *get_color(double percent) +{ + char *color = PERF_COLOR_NORMAL; + + /* + * We color high-overhead entries in red, mid-overhead + * entries in green - and keep the low overhead places + * normal: + */ + if (percent >= MIN_RED) + color = PERF_COLOR_RED; + else { + if (percent > MIN_GREEN) + color = PERF_COLOR_GREEN; + } + return color; +} + static int parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) { char *line = NULL, *tmp, *tmp2; + static const char *prev_line; + static const char *prev_color; unsigned int offset; size_t line_len; __u64 line_ip; @@ -1073,27 +1106,36 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) } if (line_ip != -1) { + const char *path = NULL; unsigned int hits = 0; double percent = 0.0; - char *color = PERF_COLOR_NORMAL; + char *color; + struct sym_ext *sym_ext = sym->priv; offset = line_ip - start; if (offset < len) hits = sym->hist[offset]; - if (sym->hist_sum) + if (offset < len && sym_ext) { + path = sym_ext[offset].path; + percent = sym_ext[offset].percent; + } else if (sym->hist_sum) percent = 100.0 * hits / sym->hist_sum; + color = get_color(percent); + /* - * We color high-overhead entries in red, mid-overhead - * entries in green - and keep the low overhead places - * normal: + * Also color the filename and line if needed, with + * the same color than the percentage. Don't print it + * twice for close colored ip with the same filename:line */ - if (percent >= 5.0) - color = PERF_COLOR_RED; - else { - if (percent > 0.5) - color = PERF_COLOR_GREEN; + if (path) { + if (!prev_line || strcmp(prev_line, path) + || color != prev_color) { + color_fprintf(stdout, color, " %s", path); + prev_line = path; + prev_color = color; + } } color_fprintf(stdout, color, " %7.2f", percent); @@ -1109,6 +1151,121 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) return 0; } +static struct rb_root root_sym_ext; + +static void insert_source_line(struct sym_ext *sym_ext) +{ + struct sym_ext *iter; + struct rb_node **p = &root_sym_ext.rb_node; + struct rb_node *parent = NULL; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct sym_ext, node); + + if (sym_ext->percent > iter->percent) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&sym_ext->node, parent, p); + rb_insert_color(&sym_ext->node, &root_sym_ext); +} + +static void free_source_line(struct symbol *sym, int len) +{ + struct sym_ext *sym_ext = sym->priv; + int i; + + if (!sym_ext) + return; + + for (i = 0; i < len; i++) + free(sym_ext[i].path); + free(sym_ext); + + sym->priv = NULL; + root_sym_ext = RB_ROOT; +} + +/* Get the filename:line for the colored entries */ +static void +get_source_line(struct symbol *sym, __u64 start, int len, char *filename) +{ + int i; + char cmd[PATH_MAX * 2]; + struct sym_ext *sym_ext; + + if (!sym->hist_sum) + return; + + sym->priv = calloc(len, sizeof(struct sym_ext)); + if (!sym->priv) + return; + + sym_ext = sym->priv; + + for (i = 0; i < len; i++) { + char *path = NULL; + size_t line_len; + __u64 offset; + FILE *fp; + + sym_ext[i].percent = 100.0 * sym->hist[i] / sym->hist_sum; + if (sym_ext[i].percent <= 0.5) + continue; + + offset = start + i; + sprintf(cmd, "addr2line -e %s %016llx", filename, offset); + fp = popen(cmd, "r"); + if (!fp) + continue; + + if (getline(&path, &line_len, fp) < 0 || !line_len) + goto next; + + sym_ext[i].path = malloc(sizeof(char) * line_len + 1); + if (!sym_ext[i].path) + goto next; + + strcpy(sym_ext[i].path, path); + insert_source_line(&sym_ext[i]); + + next: + pclose(fp); + } +} + +static void print_summary(char *filename) +{ + struct sym_ext *sym_ext; + struct rb_node *node; + + printf("\nSorted summary for file %s\n", filename); + printf("----------------------------------------------\n\n"); + + if (RB_EMPTY_ROOT(&root_sym_ext)) { + printf(" Nothing higher than %1.1f%%\n", MIN_GREEN); + return; + } + + node = rb_first(&root_sym_ext); + while (node) { + double percent; + char *color; + char *path; + + sym_ext = rb_entry(node, struct sym_ext, node); + percent = sym_ext->percent; + color = get_color(percent); + path = sym_ext->path; + + color_fprintf(stdout, color, " %7.2f %s", percent, path); + node = rb_next(node); + } +} + static void annotate_sym(struct dso *dso, struct symbol *sym) { char *filename = dso->name; @@ -1121,13 +1278,6 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) if (dso == kernel_dso) filename = vmlinux; - printf("\n------------------------------------------------\n"); - printf(" Percent | Source code & Disassembly of %s\n", filename); - printf("------------------------------------------------\n"); - - if (verbose >= 2) - printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); - start = sym->obj_start; if (!start) start = sym->start; @@ -1135,6 +1285,18 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) end = start + sym->end - sym->start + 1; len = sym->end - sym->start; + if (print_line) { + get_source_line(sym, start, len, filename); + print_summary(filename); + } + + printf("\n\n------------------------------------------------\n"); + printf(" Percent | Source code & Disassembly of %s\n", filename); + printf("------------------------------------------------\n"); + + if (verbose >= 2) + printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); + sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename); if (verbose >= 3) @@ -1150,6 +1312,8 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) } pclose(file); + if (print_line) + free_source_line(sym, len); } static void find_annotations(void) @@ -1308,6 +1472,8 @@ static const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), + OPT_BOOLEAN('l', "print-line", &print_line, + "print matching source lines (may be slow)"), OPT_END() }; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0f5771f615d..e1dfef24887 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -37,6 +37,7 @@ static pid_t target_pid = -1; static int inherit = 1; static int force = 0; static int append_file = 0; +static int call_graph = 0; static int verbose = 0; static long samples; @@ -201,8 +202,12 @@ static void pid_synthesize_comm_event(pid_t pid, int full) fd = open(filename, O_RDONLY); if (fd < 0) { - fprintf(stderr, "couldn't open %s\n", filename); - exit(EXIT_FAILURE); + /* + * We raced with a task exiting - just return: + */ + if (verbose) + fprintf(stderr, "couldn't open %s\n", filename); + return; } if (read(fd, bf, sizeof(bf)) < 0) { fprintf(stderr, "couldn't read %s\n", filename); @@ -272,8 +277,12 @@ static void pid_synthesize_mmap_samples(pid_t pid) fp = fopen(filename, "r"); if (fp == NULL) { - fprintf(stderr, "couldn't open %s\n", filename); - exit(EXIT_FAILURE); + /* + * We raced with a task exiting - just return: + */ + if (verbose) + fprintf(stderr, "couldn't open %s\n", filename); + return; } while (1) { char bf[BUFSIZ], *pbf = bf; @@ -351,11 +360,16 @@ static void create_counter(int counter, int cpu, pid_t pid) int track = 1; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; + if (freq) { attr->sample_type |= PERF_SAMPLE_PERIOD; attr->freq = 1; attr->sample_freq = freq; } + + if (call_graph) + attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + attr->mmap = track; attr->comm = track; attr->inherit = (cpu < 0) && inherit; @@ -555,6 +569,8 @@ static const struct option options[] = { "profile at this frequency"), OPT_INTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), + OPT_BOOLEAN('g', "call-graph", &call_graph, + "do call-graph (stack chain/backtrace) recording"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_END() diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 82fa93b4db9..f86bb07c0e8 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -36,18 +36,28 @@ static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; static int dump_trace = 0; #define dprintf(x...) do { if (dump_trace) printf(x); } while (0) +#define cdprintf(x...) do { if (dump_trace) color_fprintf(stdout, color, x); } while (0) static int verbose; static int full_paths; +static int collapse_syscalls; static unsigned long page_size; static unsigned long mmap_window = 32; +struct ip_chain_event { + __u16 nr; + __u16 hv; + __u16 kernel; + __u16 user; + __u64 ips[]; +}; + struct ip_event { struct perf_event_header header; __u64 ip; __u32 pid, tid; - __u64 period; + unsigned char __more_data[]; }; struct mmap_event { @@ -944,9 +954,13 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) __u64 ip = event->ip.ip; __u64 period = 1; struct map *map = NULL; + void *more_data = event->ip.__more_data; + struct ip_chain_event *chain; - if (event->header.type & PERF_SAMPLE_PERIOD) - period = event->ip.period; + if (event->header.type & PERF_SAMPLE_PERIOD) { + period = *(__u64 *)more_data; + more_data += sizeof(__u64); + } dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", (void *)(offset + head), @@ -956,6 +970,31 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) (void *)(long)ip, (long long)period); + if (event->header.type & PERF_SAMPLE_CALLCHAIN) { + int i; + + chain = (void *)more_data; + + if (dump_trace) { + dprintf("... chain: u:%d, k:%d, nr:%d\n", + chain->user, + chain->kernel, + chain->nr); + + for (i = 0; i < chain->nr; i++) + dprintf("..... %2d: %016Lx\n", i, chain->ips[i]); + } + if (collapse_syscalls) { + /* + * Find the all-but-last kernel entry + * amongst the call-chains - to get + * to the level of system calls: + */ + if (chain->kernel >= 2) + ip = chain->ips[chain->kernel-2]; + } + } + dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); if (thread == NULL) { @@ -1095,9 +1134,47 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) return 0; } +static void trace_event(event_t *event) +{ + unsigned char *raw_event = (void *)event; + char *color = PERF_COLOR_BLUE; + int i, j; + + if (!dump_trace) + return; + + dprintf("."); + cdprintf("\n. ... raw event: size %d bytes\n", event->header.size); + + for (i = 0; i < event->header.size; i++) { + if ((i & 15) == 0) { + dprintf("."); + cdprintf(" %04x: ", i); + } + + cdprintf(" %02x", raw_event[i]); + + if (((i & 15) == 15) || i == event->header.size-1) { + cdprintf(" "); + for (j = 0; j < 15-(i & 15); j++) + cdprintf(" "); + for (j = 0; j < (i & 15); j++) { + if (isprint(raw_event[i-15+j])) + cdprintf("%c", raw_event[i-15+j]); + else + cdprintf("."); + } + cdprintf("\n"); + } + } + dprintf(".\n"); +} + static int process_event(event_t *event, unsigned long offset, unsigned long head) { + trace_event(event); + if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) return process_overflow_event(event, offset, head); @@ -1204,7 +1281,7 @@ more: size = event->header.size; - dprintf("%p [%p]: event: %d\n", + dprintf("\n%p [%p]: event: %d\n", (void *)(offset + head), (void *)(long)event->header.size, event->header.type); @@ -1276,6 +1353,8 @@ static const struct option options[] = { "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"), OPT_BOOLEAN('P', "full-paths", &full_paths, "Don't shorten the pathnames taking into account the cwd"), + OPT_BOOLEAN('S', "syscalls", &collapse_syscalls, + "show per syscall summary overhead, using call graph"), OPT_END() }; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c43e4a97dc4..e5b3c0ff03a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -43,6 +43,7 @@ #include "util/parse-events.h" #include <sys/prctl.h> +#include <math.h> static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { @@ -79,12 +80,34 @@ static const unsigned int default_count[] = { 10000, }; -static __u64 event_res[MAX_COUNTERS][3]; -static __u64 event_scaled[MAX_COUNTERS]; +#define MAX_RUN 100 -static __u64 runtime_nsecs; -static __u64 walltime_nsecs; -static __u64 runtime_cycles; +static int run_count = 1; +static int run_idx = 0; + +static __u64 event_res[MAX_RUN][MAX_COUNTERS][3]; +static __u64 event_scaled[MAX_RUN][MAX_COUNTERS]; + +//static __u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; + + +static __u64 runtime_nsecs[MAX_RUN]; +static __u64 walltime_nsecs[MAX_RUN]; +static __u64 runtime_cycles[MAX_RUN]; + +static __u64 event_res_avg[MAX_COUNTERS][3]; +static __u64 event_res_noise[MAX_COUNTERS][3]; + +static __u64 event_scaled_avg[MAX_COUNTERS]; + +static __u64 runtime_nsecs_avg; +static __u64 runtime_nsecs_noise; + +static __u64 walltime_nsecs_avg; +static __u64 walltime_nsecs_noise; + +static __u64 runtime_cycles_avg; +static __u64 runtime_cycles_noise; static void create_perf_stat_counter(int counter) { @@ -140,7 +163,7 @@ static void read_counter(int counter) int cpu, nv; int scaled; - count = event_res[counter]; + count = event_res[run_idx][counter]; count[0] = count[1] = count[2] = 0; @@ -151,6 +174,8 @@ static void read_counter(int counter) res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); assert(res == nv * sizeof(__u64)); + close(fd[cpu][counter]); + fd[cpu][counter] = -1; count[0] += single_count[0]; if (scale) { @@ -162,13 +187,13 @@ static void read_counter(int counter) scaled = 0; if (scale) { if (count[2] == 0) { - event_scaled[counter] = -1; + event_scaled[run_idx][counter] = -1; count[0] = 0; return; } if (count[2] < count[1]) { - event_scaled[counter] = 1; + event_scaled[run_idx][counter] = 1; count[0] = (unsigned long long) ((double)count[0] * count[1] / count[2] + 0.5); } @@ -178,10 +203,94 @@ static void read_counter(int counter) */ if (attrs[counter].type == PERF_TYPE_SOFTWARE && attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) - runtime_nsecs = count[0]; + runtime_nsecs[run_idx] = count[0]; if (attrs[counter].type == PERF_TYPE_HARDWARE && attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) - runtime_cycles = count[0]; + runtime_cycles[run_idx] = count[0]; +} + +static int run_perf_stat(int argc, const char **argv) +{ + unsigned long long t0, t1; + int status = 0; + int counter; + int pid; + + if (!system_wide) + nr_cpus = 1; + + for (counter = 0; counter < nr_counters; counter++) + create_perf_stat_counter(counter); + + /* + * Enable counters and exec the command: + */ + t0 = rdclock(); + prctl(PR_TASK_PERF_COUNTERS_ENABLE); + + if ((pid = fork()) < 0) + perror("failed to fork"); + + if (!pid) { + if (execvp(argv[0], (char **)argv)) { + perror(argv[0]); + exit(-1); + } + } + + wait(&status); + + prctl(PR_TASK_PERF_COUNTERS_DISABLE); + t1 = rdclock(); + + walltime_nsecs[run_idx] = t1 - t0; + + for (counter = 0; counter < nr_counters; counter++) + read_counter(counter); + + return WEXITSTATUS(status); +} + +static void print_noise(__u64 *count, __u64 *noise) +{ + if (run_count > 1) + fprintf(stderr, " ( +- %7.3f%% )", + (double)noise[0]/(count[0]+1)*100.0); +} + +static void nsec_printout(int counter, __u64 *count, __u64 *noise) +{ + double msecs = (double)count[0] / 1000000; + + fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); + + if (attrs[counter].type == PERF_TYPE_SOFTWARE && + attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { + + if (walltime_nsecs_avg) + fprintf(stderr, " # %10.3f CPUs ", + (double)count[0] / (double)walltime_nsecs_avg); + } + print_noise(count, noise); +} + +static void abs_printout(int counter, __u64 *count, __u64 *noise) +{ + fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); + + if (runtime_cycles_avg && + attrs[counter].type == PERF_TYPE_HARDWARE && + attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { + + fprintf(stderr, " # %10.3f IPC ", + (double)count[0] / (double)runtime_cycles_avg); + } else { + if (runtime_nsecs_avg) { + fprintf(stderr, " # %10.3f M/sec", + (double)count[0]/runtime_nsecs_avg*1000.0); + } + } + print_noise(count, noise); } /* @@ -189,11 +298,12 @@ static void read_counter(int counter) */ static void print_counter(int counter) { - __u64 *count; + __u64 *count, *noise; int scaled; - count = event_res[counter]; - scaled = event_scaled[counter]; + count = event_res_avg[counter]; + noise = event_res_noise[counter]; + scaled = event_scaled_avg[counter]; if (scaled == -1) { fprintf(stderr, " %14s %-20s\n", @@ -201,75 +311,107 @@ static void print_counter(int counter) return; } - if (nsec_counter(counter)) { - double msecs = (double)count[0] / 1000000; - - fprintf(stderr, " %14.6f %-20s", - msecs, event_name(counter)); - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { + if (nsec_counter(counter)) + nsec_printout(counter, count, noise); + else + abs_printout(counter, count, noise); - if (walltime_nsecs) - fprintf(stderr, " # %11.3f CPU utilization factor", - (double)count[0] / (double)walltime_nsecs); - } - } else { - fprintf(stderr, " %14Ld %-20s", - count[0], event_name(counter)); - if (runtime_nsecs) - fprintf(stderr, " # %11.3f M/sec", - (double)count[0]/runtime_nsecs*1000.0); - if (runtime_cycles && - attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { - - fprintf(stderr, " # %1.3f per cycle", - (double)count[0] / (double)runtime_cycles); - } - } if (scaled) fprintf(stderr, " (scaled from %.2f%%)", (double) count[2] / count[1] * 100); + fprintf(stderr, "\n"); } -static int do_perf_stat(int argc, const char **argv) +/* + * normalize_noise noise values down to stddev: + */ +static void normalize_noise(__u64 *val) { - unsigned long long t0, t1; - int counter; - int status; - int pid; - int i; + double res; - if (!system_wide) - nr_cpus = 1; + res = (double)*val / (run_count * sqrt((double)run_count)); - for (counter = 0; counter < nr_counters; counter++) - create_perf_stat_counter(counter); + *val = (__u64)res; +} - /* - * Enable counters and exec the command: - */ - t0 = rdclock(); - prctl(PR_TASK_PERF_COUNTERS_ENABLE); +static void update_avg(const char *name, int idx, __u64 *avg, __u64 *val) +{ + *avg += *val; - if ((pid = fork()) < 0) - perror("failed to fork"); + if (verbose > 1) + fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); +} +/* + * Calculate the averages and noises: + */ +static void calc_avg(void) +{ + int i, j; + + if (verbose > 1) + fprintf(stderr, "\n"); + + for (i = 0; i < run_count; i++) { + update_avg("runtime", 0, &runtime_nsecs_avg, runtime_nsecs + i); + update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i); + update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i); + + for (j = 0; j < nr_counters; j++) { + update_avg("counter/0", j, + event_res_avg[j]+0, event_res[i][j]+0); + update_avg("counter/1", j, + event_res_avg[j]+1, event_res[i][j]+1); + update_avg("counter/2", j, + event_res_avg[j]+2, event_res[i][j]+2); + update_avg("scaled", j, + event_scaled_avg + j, event_scaled[i]+j); + } + } + runtime_nsecs_avg /= run_count; + walltime_nsecs_avg /= run_count; + runtime_cycles_avg /= run_count; + + for (j = 0; j < nr_counters; j++) { + event_res_avg[j][0] /= run_count; + event_res_avg[j][1] /= run_count; + event_res_avg[j][2] /= run_count; + } - if (!pid) { - if (execvp(argv[0], (char **)argv)) { - perror(argv[0]); - exit(-1); + for (i = 0; i < run_count; i++) { + runtime_nsecs_noise += + abs((__s64)(runtime_nsecs[i] - runtime_nsecs_avg)); + walltime_nsecs_noise += + abs((__s64)(walltime_nsecs[i] - walltime_nsecs_avg)); + runtime_cycles_noise += + abs((__s64)(runtime_cycles[i] - runtime_cycles_avg)); + + for (j = 0; j < nr_counters; j++) { + event_res_noise[j][0] += + abs((__s64)(event_res[i][j][0] - event_res_avg[j][0])); + event_res_noise[j][1] += + abs((__s64)(event_res[i][j][1] - event_res_avg[j][1])); + event_res_noise[j][2] += + abs((__s64)(event_res[i][j][2] - event_res_avg[j][2])); } } - while (wait(&status) >= 0) - ; + normalize_noise(&runtime_nsecs_noise); + normalize_noise(&walltime_nsecs_noise); + normalize_noise(&runtime_cycles_noise); - prctl(PR_TASK_PERF_COUNTERS_DISABLE); - t1 = rdclock(); + for (j = 0; j < nr_counters; j++) { + normalize_noise(&event_res_noise[j][0]); + normalize_noise(&event_res_noise[j][1]); + normalize_noise(&event_res_noise[j][2]); + } +} - walltime_nsecs = t1 - t0; +static void print_stat(int argc, const char **argv) +{ + int i, counter; + + calc_avg(); fflush(stdout); @@ -279,22 +421,19 @@ static int do_perf_stat(int argc, const char **argv) for (i = 1; i < argc; i++) fprintf(stderr, " %s", argv[i]); - fprintf(stderr, "\':\n"); - fprintf(stderr, "\n"); - - for (counter = 0; counter < nr_counters; counter++) - read_counter(counter); + fprintf(stderr, "\'"); + if (run_count > 1) + fprintf(stderr, " (%d runs)", run_count); + fprintf(stderr, ":\n\n"); for (counter = 0; counter < nr_counters; counter++) print_counter(counter); fprintf(stderr, "\n"); - fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", - (double)(t1-t0)/1e6); + fprintf(stderr, " %14.9f seconds time elapsed.\n", + (double)walltime_nsecs_avg/1e9); fprintf(stderr, "\n"); - - return 0; } static volatile int signr = -1; @@ -332,11 +471,15 @@ static const struct option options[] = { "scale/normalize counters"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_INTEGER('r', "repeat", &run_count, + "repeat command and print average + stddev (max: 100)"), OPT_END() }; int cmd_stat(int argc, const char **argv, const char *prefix) { + int status; + page_size = sysconf(_SC_PAGE_SIZE); memcpy(attrs, default_attrs, sizeof(attrs)); @@ -344,6 +487,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, options, stat_usage, 0); if (!argc) usage_with_options(stat_usage, options); + if (run_count <= 0 || run_count > MAX_RUN) + usage_with_options(stat_usage, options); if (!nr_counters) nr_counters = 8; @@ -363,5 +508,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix) signal(SIGALRM, skip_signal); signal(SIGABRT, skip_signal); - return do_perf_stat(argc, argv); + status = 0; + for (run_idx = 0; run_idx < run_count; run_idx++) { + if (run_count != 1 && verbose) + fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); + status = run_perf_stat(argc, argv); + } + + print_stat(argc, argv); + + return status; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5a72586e1df..f0c9f2627fe 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -63,8 +63,8 @@ static char *hw_event_names[] = { }; static char *sw_event_names[] = { - "cpu-clock-ticks", - "task-clock-ticks", + "cpu-clock-msecs", + "task-clock-msecs", "page-faults", "context-switches", "CPU-migrations", diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 0d1292bd827..5ad9b06c3f6 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -12,6 +12,7 @@ struct symbol { __u64 obj_start; __u64 hist_sum; __u64 *hist; + void *priv; char name[0]; }; |