From 1224550969e0bf18785786a1a9f801cd86d68586 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 5 Mar 2010 11:54:02 -0300 Subject: perf tools: Don't trow away old map slices not overlapped by new maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Arnaldo Carvalho de Melo Cc: David S. Miller Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267800842-22324-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 21b92162282..9024fa1ff5c 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -183,8 +183,8 @@ struct thread *perf_session__findnew(struct perf_session *self, pid_t pid) return th; } -static void map_groups__remove_overlappings(struct map_groups *self, - struct map *map) +static int map_groups__fixup_overlappings(struct map_groups *self, + struct map *map) { struct rb_root *root = &self->maps[map->type]; struct rb_node *next = rb_first(root); @@ -209,7 +209,36 @@ static void map_groups__remove_overlappings(struct map_groups *self, * list. */ list_add_tail(&pos->node, &self->removed_maps[map->type]); + /* + * Now check if we need to create new maps for areas not + * overlapped by the new map: + */ + if (map->start > pos->start) { + struct map *before = map__clone(pos); + + if (before == NULL) + return -ENOMEM; + + before->end = map->start - 1; + map_groups__insert(self, before); + if (verbose >= 2) + map__fprintf(before, stderr); + } + + if (map->end < pos->end) { + struct map *after = map__clone(pos); + + if (after == NULL) + return -ENOMEM; + + after->start = map->end + 1; + map_groups__insert(self, after); + if (verbose >= 2) + map__fprintf(after, stderr); + } } + + return 0; } void maps__insert(struct rb_root *maps, struct map *map) @@ -254,7 +283,7 @@ struct map *maps__find(struct rb_root *maps, u64 ip) void thread__insert_map(struct thread *self, struct map *map) { - map_groups__remove_overlappings(&self->mg, map); + map_groups__fixup_overlappings(&self->mg, map); map_groups__insert(&self->mg, map); } -- cgit From accd3cc45a0e1d11090ea66888405987de77bdca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 5 Mar 2010 12:51:04 -0300 Subject: perf probe: Add missing variable initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cc1: warnings being treated as errors util/probe-finder.c: In function 'find_line_range': util/probe-finder.c:172: warning: 'src' may be used uninitialized in this function make: *** [util/probe-finder.o] Error 1 Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Masami Hiramatsu Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267804269-22660-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/probe-finder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index e77dc886760..1e6c65ebbd8 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -169,7 +169,7 @@ static const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname) { Dwarf_Files *files; size_t nfiles, i; - const char *src; + const char *src = NULL; int ret; if (!fname) -- cgit From 8907fd607b66e36636469a2de9833db643869db8 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Fri, 5 Mar 2010 12:51:05 -0300 Subject: perf record: Add ID and to recorded event data when recording multiple events Currently perf record does not write the ID or the to disk for events. This doesn't allow report to tell if an event stream contains one or more types of events. This patch adds this entry to the list of data that record will write to disk if more than one event was requested. Signed-off-by: Eric B Munson Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267804269-22660-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 771533ced6a..f573bbb8357 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -244,6 +244,9 @@ static void create_counter(int counter, int cpu, pid_t pid) attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; + if (nr_counters > 1) + attr->sample_type |= PERF_SAMPLE_ID; + if (freq) { attr->sample_type |= PERF_SAMPLE_PERIOD; attr->freq = 1; -- cgit From d403d0acc9c5afa679a3f61e71489530d7fa0606 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Fri, 5 Mar 2010 12:51:06 -0300 Subject: perf session: Change add_hist_entry to take the tree root instead of session In order to minimize the impact of storing multiple events in a report this function will now take the root of the histogram tree so that the logic for selecting the proper tree can be inserted before the call. Signed-off-by: Eric B Munson Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267804269-22660-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-diff.c | 3 ++- tools/perf/builtin-report.c | 3 ++- tools/perf/util/hist.c | 6 +++--- tools/perf/util/hist.h | 3 ++- 5 files changed, 10 insertions(+), 7 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 5ec5de99587..4b734c731e2 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -116,7 +116,7 @@ static int perf_session__add_hist_entry(struct perf_session *self, return 0; } - he = __perf_session__add_hist_entry(self, al, NULL, count, &hit); + he = __perf_session__add_hist_entry(&self->hists, al, NULL, count, &hit); if (he == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 18b3f505f9d..20df7352629 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -26,7 +26,8 @@ static int perf_session__add_hist_entry(struct perf_session *self, struct addr_location *al, u64 count) { bool hit; - struct hist_entry *he = __perf_session__add_hist_entry(self, al, NULL, + struct hist_entry *he = __perf_session__add_hist_entry(&self->hists, + al, NULL, count, &hit); if (he == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index cfc655d40bb..cd16e6a7d6d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -56,7 +56,8 @@ static int perf_session__add_hist_entry(struct perf_session *self, if ((sort__has_parent || symbol_conf.use_callchain) && chain) syms = perf_session__resolve_callchain(self, al->thread, chain, &parent); - he = __perf_session__add_hist_entry(self, al, parent, count, &hit); + he = __perf_session__add_hist_entry(&self->hists, al, parent, + count, &hit); if (he == NULL) return -ENOMEM; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e8daf5ca6fd..55dd9115d1b 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -12,12 +12,12 @@ struct callchain_param callchain_param = { * histogram, sorted on item, collects counts */ -struct hist_entry *__perf_session__add_hist_entry(struct perf_session *self, +struct hist_entry *__perf_session__add_hist_entry(struct rb_root *hists, struct addr_location *al, struct symbol *sym_parent, u64 count, bool *hit) { - struct rb_node **p = &self->hists.rb_node; + struct rb_node **p = &hists->rb_node; struct rb_node *parent = NULL; struct hist_entry *he; struct hist_entry entry = { @@ -53,7 +53,7 @@ struct hist_entry *__perf_session__add_hist_entry(struct perf_session *self, return NULL; *he = entry; rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &self->hists); + rb_insert_color(&he->rb_node, hists); *hit = false; return he; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index e5f99b24048..7b48590c3ee 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -10,8 +10,9 @@ struct perf_session; struct hist_entry; struct addr_location; struct symbol; +struct rb_root; -struct hist_entry *__perf_session__add_hist_entry(struct perf_session *self, +struct hist_entry *__perf_session__add_hist_entry(struct rb_root *hists, struct addr_location *al, struct symbol *parent, u64 count, bool *hit); -- cgit From cb8f09393646c5058056db771583c86e0ed1d92f Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Fri, 5 Mar 2010 12:51:07 -0300 Subject: perf session: Add storage for seperating event types in report This patch adds the structures necessary to count each event type independently in perf report. Signed-off-by: Eric B Munson Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267804269-22660-4-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/event.h | 9 +++++++++ tools/perf/util/session.c | 1 + tools/perf/util/session.h | 1 + 3 files changed, 11 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 50a7132887f..a33b94952e3 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -99,6 +99,15 @@ struct events_stats { u64 lost; }; +struct event_stat_id { + struct rb_node rb_node; + struct rb_root hists; + struct events_stats stats; + u64 config; + u64 event_stream; + u32 type; +}; + void event__print_totals(void); struct perf_session; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0de7258e70a..eed1cb88900 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -70,6 +70,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc memcpy(self->filename, filename, len); self->threads = RB_ROOT; + self->stats_by_id = RB_ROOT; self->last_match = NULL; self->mmap_window = 32; self->cwd = NULL; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 31950fcd8a4..5c33417eebb 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -20,6 +20,7 @@ struct perf_session { struct thread *last_match; struct map *vmlinux_maps[MAP__NR_TYPES]; struct events_stats events_stats; + struct rb_root stats_by_id; unsigned long event_total[PERF_RECORD_MAX]; unsigned long unknown_events; struct rb_root hists; -- cgit From eefc465cdd49cb89a742083fac2807c718ddad31 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Fri, 5 Mar 2010 12:51:08 -0300 Subject: perf session: Change perf_session post processing functions to take histogram tree Now that report can store historgrams for multiple events we need to be able to do the post processing work for each histogram. This patch changes the post processing functions so that they can be called individually for each event's histogram. Signed-off-by: Eric B Munson [ Guarantee bisectabilty by fixing up builtin-report.c ] Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267804269-22660-5-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 4 ++-- tools/perf/builtin-diff.c | 10 ++++++---- tools/perf/builtin-report.c | 8 +++++--- tools/perf/util/hist.c | 39 ++++++++++++++++++++------------------- tools/perf/util/hist.h | 9 +++++---- 5 files changed, 38 insertions(+), 32 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4b734c731e2..6ad7148451c 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -564,8 +564,8 @@ static int __cmd_annotate(void) if (verbose > 2) dsos__fprintf(stdout); - perf_session__collapse_resort(session); - perf_session__output_resort(session, session->event_total[0]); + perf_session__collapse_resort(&session->hists); + perf_session__output_resort(&session->hists, session->event_total[0]); perf_session__find_annotations(session); out_delete: perf_session__delete(session); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 20df7352629..1ea15d8aeed 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -115,7 +115,7 @@ static void perf_session__resort_hist_entries(struct perf_session *self) static void perf_session__set_hist_entries_positions(struct perf_session *self) { - perf_session__output_resort(self, self->events_stats.total); + perf_session__output_resort(&self->hists, self->events_stats.total); perf_session__resort_hist_entries(self); } @@ -167,13 +167,15 @@ static int __cmd_diff(void) goto out_delete; } - perf_session__output_resort(session[1], session[1]->events_stats.total); + perf_session__output_resort(&session[1]->hists, + session[1]->events_stats.total); if (show_displacement) perf_session__set_hist_entries_positions(session[0]); perf_session__match_hists(session[0], session[1]); - perf_session__fprintf_hists(session[1], session[0], - show_displacement, stdout); + perf_session__fprintf_hists(&session[1]->hists, session[0], + show_displacement, stdout, + session[1]->events_stats.total); out_delete: for (i = 0; i < 2; ++i) perf_session__delete(session[i]); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index cd16e6a7d6d..294b4cf105f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -225,10 +225,12 @@ static int __cmd_report(void) if (verbose > 2) dsos__fprintf(stdout); - perf_session__collapse_resort(session); - perf_session__output_resort(session, session->events_stats.total); + perf_session__collapse_resort(&session->hists); + perf_session__output_resort(&session->hists, + session->events_stats.total); fprintf(stdout, "# Samples: %Ld\n#\n", session->events_stats.total); - perf_session__fprintf_hists(session, NULL, false, stdout); + perf_session__fprintf_hists(&session->hists, NULL, false, stdout, + session->events_stats.total); if (sort_order == default_sort_order && parent_pattern == default_parent_pattern) fprintf(stdout, "#\n# (For a higher level overview, try: perf report --sort comm,dso)\n#\n"); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 55dd9115d1b..73ebb6fb34a 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -130,7 +130,7 @@ static void collapse__insert_entry(struct rb_root *root, struct hist_entry *he) rb_insert_color(&he->rb_node, root); } -void perf_session__collapse_resort(struct perf_session *self) +void perf_session__collapse_resort(struct rb_root *hists) { struct rb_root tmp; struct rb_node *next; @@ -140,17 +140,17 @@ void perf_session__collapse_resort(struct perf_session *self) return; tmp = RB_ROOT; - next = rb_first(&self->hists); + next = rb_first(hists); while (next) { n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - rb_erase(&n->rb_node, &self->hists); + rb_erase(&n->rb_node, hists); collapse__insert_entry(&tmp, n); } - self->hists = tmp; + *hists = tmp; } /* @@ -183,7 +183,7 @@ static void perf_session__insert_output_hist_entry(struct rb_root *root, rb_insert_color(&he->rb_node, root); } -void perf_session__output_resort(struct perf_session *self, u64 total_samples) +void perf_session__output_resort(struct rb_root *hists, u64 total_samples) { struct rb_root tmp; struct rb_node *next; @@ -194,18 +194,18 @@ void perf_session__output_resort(struct perf_session *self, u64 total_samples) total_samples * (callchain_param.min_percent / 100); tmp = RB_ROOT; - next = rb_first(&self->hists); + next = rb_first(hists); while (next) { n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - rb_erase(&n->rb_node, &self->hists); + rb_erase(&n->rb_node, hists); perf_session__insert_output_hist_entry(&tmp, n, min_callchain_hits); } - self->hists = tmp; + *hists = tmp; } static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) @@ -456,10 +456,10 @@ static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, } static size_t hist_entry__fprintf(struct hist_entry *self, - struct perf_session *session, struct perf_session *pair_session, bool show_displacement, - long displacement, FILE *fp) + long displacement, FILE *fp, + u64 session_total) { struct sort_entry *se; u64 count, total; @@ -474,7 +474,7 @@ static size_t hist_entry__fprintf(struct hist_entry *self, total = pair_session->events_stats.total; } else { count = self->count; - total = session->events_stats.total; + total = session_total; } if (total) @@ -496,8 +496,8 @@ static size_t hist_entry__fprintf(struct hist_entry *self, if (total > 0) old_percent = (count * 100.0) / total; - if (session->events_stats.total > 0) - new_percent = (self->count * 100.0) / session->events_stats.total; + if (session_total > 0) + new_percent = (self->count * 100.0) / session_total; diff = new_percent - old_percent; @@ -544,16 +544,17 @@ static size_t hist_entry__fprintf(struct hist_entry *self, left_margin -= thread__comm_len(self->thread); } - hist_entry_callchain__fprintf(fp, self, session->events_stats.total, + hist_entry_callchain__fprintf(fp, self, session_total, left_margin); } return ret; } -size_t perf_session__fprintf_hists(struct perf_session *self, +size_t perf_session__fprintf_hists(struct rb_root *hists, struct perf_session *pair, - bool show_displacement, FILE *fp) + bool show_displacement, FILE *fp, + u64 session_total) { struct sort_entry *se; struct rb_node *nd; @@ -641,7 +642,7 @@ size_t perf_session__fprintf_hists(struct perf_session *self, fprintf(fp, "\n#\n"); print_entries: - for (nd = rb_first(&self->hists); nd; nd = rb_next(nd)) { + for (nd = rb_first(hists); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); if (show_displacement) { @@ -652,8 +653,8 @@ print_entries: displacement = 0; ++position; } - ret += hist_entry__fprintf(h, self, pair, show_displacement, - displacement, fp); + ret += hist_entry__fprintf(h, pair, show_displacement, + displacement, fp, session_total); } free(rem_sq_bracket); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 7b48590c3ee..16f360cce5b 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -20,9 +20,10 @@ extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); void hist_entry__free(struct hist_entry *); -void perf_session__output_resort(struct perf_session *self, u64 total_samples); -void perf_session__collapse_resort(struct perf_session *self); -size_t perf_session__fprintf_hists(struct perf_session *self, +void perf_session__output_resort(struct rb_root *hists, u64 total_samples); +void perf_session__collapse_resort(struct rb_root *hists); +size_t perf_session__fprintf_hists(struct rb_root *hists, struct perf_session *pair, - bool show_displacement, FILE *fp); + bool show_displacement, FILE *fp, + u64 session_total); #endif /* __PERF_HIST_H */ -- cgit From cbbc79a53278b83bf7f834127751459f9299e402 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Fri, 5 Mar 2010 12:51:09 -0300 Subject: perf report: Add multiple event support Perf report does not handle multiple events being reported, even though perf record stores them properly on disk. This patch addresses that issue by adding the logic to perf report to use the event stream id that is saved by record and the new data structures to seperate the event streams and report them individually. Signed-off-by: Eric B Munson Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267804269-22660-6-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 115 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 100 insertions(+), 15 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 294b4cf105f..f815de25d0f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -45,29 +45,71 @@ static char *pretty_printing_style = default_pretty_printing_style; static char callchain_default_opt[] = "fractal,0.5"; +static struct event_stat_id *get_stats(struct perf_session *self, + u64 event_stream, u32 type, u64 config) +{ + struct rb_node **p = &self->stats_by_id.rb_node; + struct rb_node *parent = NULL; + struct event_stat_id *iter, *new; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct event_stat_id, rb_node); + if (iter->config == config) + return iter; + + + if (config > iter->config) + p = &(*p)->rb_right; + else + p = &(*p)->rb_left; + } + + new = malloc(sizeof(struct event_stat_id)); + if (new == NULL) + return NULL; + memset(new, 0, sizeof(struct event_stat_id)); + new->event_stream = event_stream; + new->config = config; + new->type = type; + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, &self->stats_by_id); + return new; +} + static int perf_session__add_hist_entry(struct perf_session *self, struct addr_location *al, - struct ip_callchain *chain, u64 count) + struct sample_data *data) { struct symbol **syms = NULL, *parent = NULL; bool hit; struct hist_entry *he; + struct event_stat_id *stats; + struct perf_event_attr *attr; - if ((sort__has_parent || symbol_conf.use_callchain) && chain) + if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain) syms = perf_session__resolve_callchain(self, al->thread, - chain, &parent); - he = __perf_session__add_hist_entry(&self->hists, al, parent, - count, &hit); + data->callchain, &parent); + + attr = perf_header__find_attr(data->id, &self->header); + if (attr) + stats = get_stats(self, data->id, attr->type, attr->config); + else + stats = get_stats(self, data->id, 0, 0); + if (stats == NULL) + return -ENOMEM; + he = __perf_session__add_hist_entry(&stats->hists, al, parent, + data->period, &hit); if (he == NULL) return -ENOMEM; if (hit) - he->count += count; + he->count += data->period; if (symbol_conf.use_callchain) { if (!hit) callchain_init(&he->callchain); - append_chain(&he->callchain, chain, syms); + append_chain(&he->callchain, data->callchain, syms); free(syms); } @@ -87,10 +129,30 @@ static int validate_chain(struct ip_callchain *chain, event_t *event) return 0; } +static int add_event_total(struct perf_session *session, + struct sample_data *data, + struct perf_event_attr *attr) +{ + struct event_stat_id *stats; + + if (attr) + stats = get_stats(session, data->id, attr->type, attr->config); + else + stats = get_stats(session, data->id, 0, 0); + + if (!stats) + return -ENOMEM; + + stats->stats.total += data->period; + session->events_stats.total += data->period; + return 0; +} + static int process_sample_event(event_t *event, struct perf_session *session) { struct sample_data data = { .period = 1, }; struct addr_location al; + struct perf_event_attr *attr; event__parse_sample(event, session->sample_type, &data); @@ -124,12 +186,18 @@ static int process_sample_event(event_t *event, struct perf_session *session) if (al.filtered || (hide_unresolved && al.sym == NULL)) return 0; - if (perf_session__add_hist_entry(session, &al, data.callchain, data.period)) { + if (perf_session__add_hist_entry(session, &al, &data)) { pr_debug("problem incrementing symbol count, skipping event\n"); return -1; } - session->events_stats.total += data.period; + attr = perf_header__find_attr(data.id, &session->header); + + if (add_event_total(session, &data, attr)) { + pr_debug("problem adding event count\n"); + return -1; + } + return 0; } @@ -198,6 +266,7 @@ static int __cmd_report(void) { int ret = -EINVAL; struct perf_session *session; + struct rb_node *next; session = perf_session__new(input_name, O_RDONLY, force); if (session == NULL) @@ -225,12 +294,28 @@ static int __cmd_report(void) if (verbose > 2) dsos__fprintf(stdout); - perf_session__collapse_resort(&session->hists); - perf_session__output_resort(&session->hists, - session->events_stats.total); - fprintf(stdout, "# Samples: %Ld\n#\n", session->events_stats.total); - perf_session__fprintf_hists(&session->hists, NULL, false, stdout, - session->events_stats.total); + next = rb_first(&session->stats_by_id); + while (next) { + struct event_stat_id *stats; + + stats = rb_entry(next, struct event_stat_id, rb_node); + perf_session__collapse_resort(&stats->hists); + perf_session__output_resort(&stats->hists, stats->stats.total); + if (rb_first(&session->stats_by_id) == + rb_last(&session->stats_by_id)) + fprintf(stdout, "# Samples: %Ld\n#\n", + stats->stats.total); + else + fprintf(stdout, "# Samples: %Ld %s\n#\n", + stats->stats.total, + __event_name(stats->type, stats->config)); + + perf_session__fprintf_hists(&stats->hists, NULL, false, stdout, + stats->stats.total); + fprintf(stdout, "\n\n"); + next = rb_next(&stats->rb_node); + } + if (sort_order == default_sort_order && parent_pattern == default_parent_pattern) fprintf(stdout, "#\n# (For a higher level overview, try: perf report --sort comm,dso)\n#\n"); -- cgit From 65f2ed2b2fa6034ef9890b60c8fd39fbe76b9d37 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Mar 2010 15:58:17 -0300 Subject: perf report: Print the map table just after samples for which no map was found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If -vv is used just the map table will be printed, -vvv will print the symbol table too, with it we can see that we have a bug where some samples are not being resolved to a map when we get them in the perf.data stream, but after we have it all processed, we can find the right map, some reordering probably is happening. Upcoming patches will provide ways to ask for most PERF_SAMPLE_ conditional samples to be taken for !PERF_RECORD_SAMPLE events too, then we'll be able to ask for PERF_SAMPLE_TIME and PERF_SAMPLE_CPU to help diagnose this. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1268161097-17761-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/hist.c | 5 +++++ tools/perf/util/thread.c | 6 +++--- tools/perf/util/thread.h | 3 +++ 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 73ebb6fb34a..bdcfd6190b2 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -655,6 +655,11 @@ print_entries: } ret += hist_entry__fprintf(h, pair, show_displacement, displacement, fp, session_total); + if (h->map == NULL && verbose > 1) { + __map_groups__fprintf_maps(&h->thread->mg, + MAP__FUNCTION, fp); + fprintf(fp, "%.10s end\n", graph_dotted_line); + } } free(rem_sq_bracket); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 9024fa1ff5c..fa968312ee7 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -79,8 +79,8 @@ int thread__comm_len(struct thread *self) return self->comm_len; } -static size_t __map_groups__fprintf_maps(struct map_groups *self, - enum map_type type, FILE *fp) +size_t __map_groups__fprintf_maps(struct map_groups *self, + enum map_type type, FILE *fp) { size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); struct rb_node *nd; @@ -89,7 +89,7 @@ static size_t __map_groups__fprintf_maps(struct map_groups *self, struct map *pos = rb_entry(nd, struct map, rb_node); printed += fprintf(fp, "Map:"); printed += map__fprintf(pos, fp); - if (verbose > 1) { + if (verbose > 2) { printed += dso__fprintf(pos->dso, type, fp); printed += fprintf(fp, "--\n"); } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 0a28f39de54..dcf70303e58 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -10,6 +10,9 @@ struct map_groups { struct list_head removed_maps[MAP__NR_TYPES]; }; +size_t __map_groups__fprintf_maps(struct map_groups *self, + enum map_type type, FILE *fp); + struct thread { struct rb_node rb_node; struct map_groups mg; -- cgit From a12b51c478899fe0b7e874a559b05ba35f1128ee Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 10 Mar 2010 20:36:09 +1100 Subject: perf tools: Fix sparse CPU numbering related bugs At present, the perf subcommands that do system-wide monitoring (perf stat, perf record and perf top) don't work properly unless the online cpus are numbered 0, 1, ..., N-1. These tools ask for the number of online cpus with sysconf(_SC_NPROCESSORS_ONLN) and then try to create events for cpus 0, 1, ..., N-1. This creates problems for systems where the online cpus are numbered sparsely. For example, a POWER6 system in single-threaded mode (i.e. only running 1 hardware thread per core) will have only even-numbered cpus online. This fixes the problem by reading the /sys/devices/system/cpu/online file to find out which cpus are online. The code that does that is in tools/perf/util/cpumap.[ch], and consists of a read_cpu_map() function that sets up a cpumap[] array and returns the number of online cpus. If /sys/devices/system/cpu/online can't be read or can't be parsed successfully, it falls back to using sysconf to ask how many cpus are online and sets up an identity map in cpumap[]. The perf record, perf stat and perf top code then calls read_cpu_map() in the system-wide monitoring case (instead of sysconf) and uses cpumap[] to get the cpu numbers to pass to perf_event_open. Signed-off-by: Paul Mackerras Cc: Anton Blanchard Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo LKML-Reference: <20100310093609.GA3959@brick.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 ++ tools/perf/builtin-record.c | 7 +++--- tools/perf/builtin-stat.c | 10 +++++--- tools/perf/builtin-top.c | 9 +++---- tools/perf/util/cpumap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/cpumap.h | 7 ++++++ 6 files changed, 81 insertions(+), 13 deletions(-) create mode 100644 tools/perf/util/cpumap.c create mode 100644 tools/perf/util/cpumap.h (limited to 'tools/perf') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 2d537382c68..5840499e2d2 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -387,6 +387,7 @@ LIB_H += util/thread.h LIB_H += util/trace-event.h LIB_H += util/probe-finder.h LIB_H += util/probe-event.h +LIB_H += util/cpumap.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o @@ -433,6 +434,7 @@ LIB_OBJS += util/sort.o LIB_OBJS += util/hist.o LIB_OBJS += util/probe-event.o LIB_OBJS += util/util.o +LIB_OBJS += util/cpumap.o BUILTIN_OBJS += builtin-annotate.o diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f573bbb8357..b09d3b27ca1 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -22,6 +22,7 @@ #include "util/debug.h" #include "util/session.h" #include "util/symbol.h" +#include "util/cpumap.h" #include #include @@ -421,9 +422,6 @@ static int __cmd_record(int argc, const char **argv) char buf; page_size = sysconf(_SC_PAGE_SIZE); - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); atexit(sig_atexit); signal(SIGCHLD, sig_handler); @@ -547,8 +545,9 @@ static int __cmd_record(int argc, const char **argv) if ((!system_wide && !inherit) || profile_cpu != -1) { open_counters(profile_cpu, target_pid); } else { + nr_cpus = read_cpu_map(); for (i = 0; i < nr_cpus; i++) - open_counters(i, target_pid); + open_counters(cpumap[i], target_pid); } if (file_new) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e8c85d5aec4..95db31cff6f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -45,6 +45,7 @@ #include "util/event.h" #include "util/debug.h" #include "util/header.h" +#include "util/cpumap.h" #include #include @@ -151,7 +152,7 @@ static void create_perf_stat_counter(int counter, int pid) unsigned int cpu; for (cpu = 0; cpu < nr_cpus; cpu++) { - fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0); + fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); if (fd[cpu][counter] < 0 && verbose) fprintf(stderr, ERR_PERF_OPEN, counter, fd[cpu][counter], strerror(errno)); @@ -519,9 +520,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) nr_counters = ARRAY_SIZE(default_attrs); } - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert((int)nr_cpus >= 0); + if (system_wide) + nr_cpus = read_cpu_map(); + else + nr_cpus = 1; /* * We dont want to block the signals - that would cause diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 31f2e597800..0b719e3dde0 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -28,6 +28,7 @@ #include #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/cpumap.h" #include "util/debug.h" @@ -1123,7 +1124,7 @@ static void start_counter(int i, int counter) cpu = profile_cpu; if (target_pid == -1 && profile_cpu == -1) - cpu = i; + cpu = cpumap[i]; attr = attrs + counter; @@ -1347,12 +1348,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) attrs[counter].sample_period = default_interval; } - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); - if (target_pid != -1 || profile_cpu != -1) nr_cpus = 1; + else + nr_cpus = read_cpu_map(); get_term_dimensions(&winsize); if (print_entries == 0) { diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c new file mode 100644 index 00000000000..4e01490e51e --- /dev/null +++ b/tools/perf/util/cpumap.c @@ -0,0 +1,59 @@ +#include "util.h" +#include "../perf.h" +#include "cpumap.h" +#include +#include + +int cpumap[MAX_NR_CPUS]; + +static int default_cpu_map(void) +{ + int nr_cpus, i; + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert((int)nr_cpus >= 0); + + for (i = 0; i < nr_cpus; ++i) + cpumap[i] = i; + + return nr_cpus; +} + +int read_cpu_map(void) +{ + FILE *onlnf; + int nr_cpus = 0; + int n, cpu, prev; + char sep; + + onlnf = fopen("/sys/devices/system/cpu/online", "r"); + if (!onlnf) + return default_cpu_map(); + + sep = 0; + prev = -1; + for (;;) { + n = fscanf(onlnf, "%u%c", &cpu, &sep); + if (n <= 0) + break; + if (prev >= 0) { + assert(nr_cpus + cpu - prev - 1 < MAX_NR_CPUS); + while (++prev < cpu) + cpumap[nr_cpus++] = prev; + } + assert (nr_cpus < MAX_NR_CPUS); + cpumap[nr_cpus++] = cpu; + if (n == 2 && sep == '-') + prev = cpu; + else + prev = -1; + if (n == 1 || sep == '\n') + break; + } + fclose(onlnf); + if (nr_cpus > 0) + return nr_cpus; + + return default_cpu_map(); +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h new file mode 100644 index 00000000000..86c78bb3309 --- /dev/null +++ b/tools/perf/util/cpumap.h @@ -0,0 +1,7 @@ +#ifndef __PERF_CPUMAP_H +#define __PERF_CPUMAP_H + +extern int read_cpu_map(void); +extern int cpumap[]; + +#endif /* __PERF_CPUMAP_H */ -- cgit From 7ae5f21361fea11f58c398701da635f778635d13 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Thu, 11 Mar 2010 13:57:00 +0100 Subject: perf: Make the install relative to DESTDIR if specified Without this change, the install path is relative to prefix/DESTDIR where prefix is automatically set to $HOME. This can produce unexpected results. For example: make -C tools/perf DESTDIR=/home/jkacur/tmp install-man creates the directory: /home/jkacur/home/jkacur/tmp/share/... instead of the expected: /home/jkacur/tmp/share/... Signed-off-by: John Kacur Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Tom Zanussi Cc: Kyle McMartin Cc: LKML-Reference: <1268312220-12880-1-git-send-email-jkacur@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/Makefile | 4 +++- tools/perf/Makefile | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index bdd3b7ecad0..bd498d49695 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -24,7 +24,10 @@ DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) +# Make the path relative to DESTDIR, not prefix +ifndef DESTDIR prefix?=$(HOME) +endif bindir?=$(prefix)/bin htmldir?=$(prefix)/share/doc/perf-doc pdfdir?=$(prefix)/share/doc/perf-doc @@ -32,7 +35,6 @@ mandir?=$(prefix)/share/man man1dir=$(mandir)/man1 man5dir=$(mandir)/man5 man7dir=$(mandir)/man7 -# DESTDIR= ASCIIDOC=asciidoc ASCIIDOC_EXTRA = --unsafe diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 5840499e2d2..8a8f52db7e3 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -216,7 +216,10 @@ STRIP ?= strip # runtime figures out where they are based on the path to the executable. # This can help installing the suite in a relocatable way. +# Make the path relative to DESTDIR, not to prefix +ifndef DESTDIR prefix = $(HOME) +endif bindir_relative = bin bindir = $(prefix)/$(bindir_relative) mandir = share/man @@ -233,7 +236,6 @@ sysconfdir = $(prefix)/etc ETC_PERFCONFIG = etc/perfconfig endif lib = lib -# DESTDIR= export prefix bindir sharedir sysconfdir -- cgit From 9f591fd76afdc0e5192e9ed00a36f8efc0b4dfe6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 11 Mar 2010 15:53:11 -0300 Subject: perf record: Don't try to find buildids in a zero sized file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixing this symptom: [acme@mica linux-2.6-tip]$ perf record -a -f Fatal: Permission error - are you root? Bus error [acme@mica linux-2.6-tip]$ I.e. if for some reason no data is collected, in this case a non root user trying to do systemwide profiling, no data will be collected, and then we end up trying to mmap a zero sized file and access the file header, b00m. Reported-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: LKML-Reference: <1268333592-30872-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b09d3b27ca1..3b8b6387c47 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -395,6 +395,9 @@ static int process_buildids(void) { u64 size = lseek(output, 0, SEEK_CUR); + if (size == 0) + return 0; + session->fd = output; return __perf_session__process_events(session, post_processing_offset, size - post_processing_offset, -- cgit From 594087a04eea544356f9c52e83c1a9bc380ce80f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 12 Mar 2010 18:22:17 -0500 Subject: perf probe: Fix probe_point buffer overrun Fix probe_point array-size overrun problem. In some cases (e.g. inline function), one user-specified probe-point can be translated to many probe address, and it overruns pre-defined array-size. This also removes redundant MAX_PROBES macro definition. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: LKML-Reference: <20100312232217.2017.45017.stgit@localhost6.localdomain6> [ Note that only root can create new probes. Eventually we should remove the MAX_PROBES limit, but that is a larger patch not eligible to perf/urgent treatment. ] Signed-off-by: Ingo Molnar --- tools/perf/builtin-probe.c | 1 - tools/perf/util/probe-finder.c | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index c30a3359234..152d6c9b1fa 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -47,7 +47,6 @@ #include "util/probe-event.h" #define MAX_PATH_LEN 256 -#define MAX_PROBES 128 /* Session management structure */ static struct { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 1e6c65ebbd8..f9cbbf18e6c 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -455,6 +455,9 @@ static void show_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) /* *pf->fb_ops will be cached in libdw. Don't free it. */ pf->fb_ops = NULL; + if (pp->found == MAX_PROBES) + die("Too many( > %d) probe point found.\n", MAX_PROBES); + pp->probes[pp->found] = strdup(tmp); pp->found++; } -- cgit From fc6ceea045031658d0b59af562369eae980b4370 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 12 Mar 2010 18:22:24 -0500 Subject: perf probe: Fix need_dwarf flag if lazy matching is used Set need_dwarf if lazy matching pattern is specified, because lazy matching requires real source path for which we must use debuginfo. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE LKML-Reference: <20100312232224.2017.54550.stgit@localhost6.localdomain6> Signed-off-by: Ingo Molnar --- tools/perf/util/probe-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 53181dbfe4a..7c004b6ef24 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -242,7 +242,7 @@ void parse_perf_probe_event(const char *str, struct probe_point *pp, /* Parse probe point */ parse_perf_probe_probepoint(argv[0], pp); - if (pp->file || pp->line) + if (pp->file || pp->line || pp->lazy_line) *need_dwarf = true; /* Copy arguments and ensure return probe has no C argument */ -- cgit From b63be8d7beda7fe5879559be6f70f8e1c93109e4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 15 Mar 2010 15:03:50 -0300 Subject: perf top: Improve the autosizing of column lenghts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When profiling C++ workloads the symbol name length can be really big, so cap it before it garbles the result. This builds upon the autosizing already present where we choose to use the short, basename of DSOs instead of its long, full pathname. Reported-by: Pavel Krauz Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1268676230-9261-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 13 +++++++++---- tools/perf/util/symbol.c | 18 +++++++++++++----- tools/perf/util/symbol.h | 3 ++- 3 files changed, 24 insertions(+), 10 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0b719e3dde0..8364c8aba19 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -455,7 +455,7 @@ static void print_sym_table(void) struct sym_entry *syme, *n; struct rb_root tmp = RB_ROOT; struct rb_node *nd; - int sym_width = 0, dso_width = 0, max_dso_width; + int sym_width = 0, dso_width = 0, dso_short_width; const int win_width = winsize.ws_col - 1; samples = userspace_samples = 0; @@ -545,15 +545,20 @@ static void print_sym_table(void) if (syme->map->dso->long_name_len > dso_width) dso_width = syme->map->dso->long_name_len; + if (syme->map->dso->short_name_len > dso_short_width) + dso_short_width = syme->map->dso->short_name_len; + if (syme->name_len > sym_width) sym_width = syme->name_len; } printed = 0; - max_dso_width = winsize.ws_col - sym_width - 29; - if (dso_width > max_dso_width) - dso_width = max_dso_width; + if (sym_width + dso_width > winsize.ws_col - 29) { + dso_width = dso_short_width; + if (sym_width + dso_width > winsize.ws_col - 29) + sym_width = winsize.ws_col - dso_width - 29; + } putchar('\n'); if (nr_counters == 1) printf(" samples pcnt"); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 323c0aea0a9..c458c4a371d 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -163,9 +163,17 @@ void dso__set_long_name(struct dso *self, char *name) self->long_name_len = strlen(name); } +static void dso__set_short_name(struct dso *self, const char *name) +{ + if (name == NULL) + return; + self->short_name = name; + self->short_name_len = strlen(name); +} + static void dso__set_basename(struct dso *self) { - self->short_name = basename(self->long_name); + dso__set_short_name(self, basename(self->long_name)); } struct dso *dso__new(const char *name) @@ -176,7 +184,7 @@ struct dso *dso__new(const char *name) int i; strcpy(self->name, name); dso__set_long_name(self, self->name); - self->short_name = self->name; + dso__set_short_name(self, self->name); for (i = 0; i < MAP__NR_TYPES; ++i) self->symbols[i] = self->symbol_names[i] = RB_ROOT; self->slen_calculated = 0; @@ -897,7 +905,6 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, struct kmap *kmap = self->kernel ? map__kmap(map) : NULL; struct map *curr_map = map; struct dso *curr_dso = self; - size_t dso_name_len = strlen(self->short_name); Elf_Data *symstrs, *secstrs; uint32_t nr_syms; int err = -1; @@ -987,7 +994,8 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, char dso_name[PATH_MAX]; if (strcmp(section_name, - curr_dso->short_name + dso_name_len) == 0) + (curr_dso->short_name + + self->short_name_len)) == 0) goto new_symbol; if (strcmp(section_name, ".text") == 0) { @@ -1782,7 +1790,7 @@ struct dso *dso__new_kernel(const char *name) struct dso *self = dso__new(name ?: "[kernel.kallsyms]"); if (self != NULL) { - self->short_name = "[kernel]"; + dso__set_short_name(self, "[kernel]"); self->kernel = 1; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 280dadd32a0..f30a3742891 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -110,9 +110,10 @@ struct dso { u8 sorted_by_name; u8 loaded; u8 build_id[BUILD_ID_SIZE]; - u16 long_name_len; const char *short_name; char *long_name; + u16 long_name_len; + u16 short_name_len; char name[0]; }; -- cgit From 67c7ff7c56f38a8ab338fbbfe366621ce6303ba1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 15 Mar 2010 13:02:28 -0400 Subject: perf probe: Fix offset to allow signed value Fix dereference offset to intmax_t from uintmax_t, because it can have negative values (for example local variable's offset from frame pointer). Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE LKML-Reference: <20100315170228.31852.71946.stgit@localhost6.localdomain6> Signed-off-by: Ingo Molnar --- tools/perf/util/probe-finder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index f9cbbf18e6c..0e8c8f1594e 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -333,8 +333,8 @@ static void show_location(Dwarf_Op *op, struct probe_finder *pf) die("%u exceeds max register number.", regn); if (deref) - ret = snprintf(pf->buf, pf->len, " %s=+%ju(%s)", - pf->var, (uintmax_t)offs, regs); + ret = snprintf(pf->buf, pf->len, " %s=%+jd(%s)", + pf->var, (intmax_t)offs, regs); else ret = snprintf(pf->buf, pf->len, " %s=%s", pf->var, regs); DIE_IF(ret < 0); -- cgit From d0cb4260f899d07462d49fc67e29f2438dbaca2f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 15 Mar 2010 13:02:35 -0400 Subject: perf probe: Use original address instead of CU-based address Use original address for looking up the location of variables for dwarf_getlocation_addr() instead of CU-based address. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE LKML-Reference: <20100315170235.31852.91195.stgit@localhost6.localdomain6> Signed-off-by: Ingo Molnar --- tools/perf/util/probe-finder.c | 11 ++--------- tools/perf/util/probe-finder.h | 1 - 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 0e8c8f1594e..c171a243d05 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -352,8 +352,7 @@ static void show_variable(Dwarf_Die *vr_die, struct probe_finder *pf) if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL) goto error; /* TODO: handle more than 1 exprs */ - ret = dwarf_getlocation_addr(&attr, (pf->addr - pf->cu_base), - &expr, &nexpr, 1); + ret = dwarf_getlocation_addr(&attr, pf->addr, &expr, &nexpr, 1); if (ret <= 0 || nexpr == 0) goto error; @@ -437,8 +436,7 @@ static void show_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) /* Get the frame base attribute/ops */ dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr); - ret = dwarf_getlocation_addr(&fb_attr, (pf->addr - pf->cu_base), - &pf->fb_ops, &nops, 1); + ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); if (ret <= 0 || nops == 0) pf->fb_ops = NULL; @@ -644,7 +642,6 @@ static void find_probe_point_by_func(struct probe_finder *pf) int find_probe_point(int fd, struct probe_point *pp) { struct probe_finder pf = {.pp = pp}; - int ret; Dwarf_Off off, noff; size_t cuhl; Dwarf_Die *diep; @@ -671,10 +668,6 @@ int find_probe_point(int fd, struct probe_point *pp) pf.fname = NULL; if (!pp->file || pf.fname) { - /* Save CU base address (for frame_base) */ - ret = dwarf_lowpc(&pf.cu_die, &pf.cu_base); - if (ret != 0) - pf.cu_base = 0; if (pp->function) find_probe_point_by_func(&pf); else if (pp->lazy_line) diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index d1a651793ba..21f7354397b 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -71,7 +71,6 @@ struct probe_finder { /* For variable searching */ Dwarf_Op *fb_ops; /* Frame base attribute */ - Dwarf_Addr cu_base; /* Current CU base address */ const char *var; /* Current variable name */ char *buf; /* Current output buffer */ int len; /* Length of output buffer */ -- cgit From 00909e955125e90a6ebb34671c56c4c851e62951 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Mar 2010 18:28:46 -0300 Subject: perf top: Add missing initialization to zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dso_short_width has to start as zero, as we're calculating the maximum short DSO name length, somehow I missed this one. Reported-by: Frédéric Weisbecker Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1268774926-27488-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 8364c8aba19..1f529321607 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -455,7 +455,7 @@ static void print_sym_table(void) struct sym_entry *syme, *n; struct rb_root tmp = RB_ROOT; struct rb_node *nd; - int sym_width = 0, dso_width = 0, dso_short_width; + int sym_width = 0, dso_width = 0, dso_short_width = 0; const int win_width = winsize.ws_col - 1; samples = userspace_samples = 0; -- cgit From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Mar 2010 17:04:11 +0900 Subject: include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo Guess-its-ok-by: Christoph Lameter Cc: Ingo Molnar Cc: Lee Schermerhorn --- tools/perf/builtin-kmem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 924a9518931..7d9e3a7e34d 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -14,6 +14,7 @@ #include "util/debug.h" #include +#include struct alloc_stat; typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); -- cgit From b1dcc03cb8ee0f5718491e8c518257238dc64e00 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 1 Apr 2010 23:58:25 -0500 Subject: perf/scripts: Tuple was set from long in both branches in python_process_event() This is a fix to the signed/unsigned field handling in the Python scripting engine, based on a patch from Roel Kluin. Basically, Python wants to use a PyInt (which is internally a long) if it can i.e. if the value will fit into that type. If not, it stores it into a PyLong, which isn't actually a long, but an arbitrary-precision integer variable. The code below is similar to to what Python does internally, and it seems to work as expected on the x86 and x86_64 sytems I tested it on. Signed-off-by: Tom Zanussi Cc: Arnaldo Carvalho de Melo Cc: Roel Kluin Cc: Frederic Weisbecker Cc: rostedt@goodmis.org LKML-Reference: <1270184305.6422.10.camel@tropicana> Signed-off-by: Ingo Molnar --- tools/perf/util/scripting-engines/trace-event-python.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 33a414bbba3..6a72f14c598 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -208,7 +208,7 @@ static void python_process_event(int cpu, void *data, int size __unused, unsigned long long nsecs, char *comm) { - PyObject *handler, *retval, *context, *t; + PyObject *handler, *retval, *context, *t, *obj; static char handler_name[256]; struct format_field *field; unsigned long long val; @@ -256,16 +256,23 @@ static void python_process_event(int cpu, void *data, offset &= 0xffff; } else offset = field->offset; - PyTuple_SetItem(t, n++, - PyString_FromString((char *)data + offset)); + obj = PyString_FromString((char *)data + offset); } else { /* FIELD_IS_NUMERIC */ val = read_size(data + field->offset, field->size); if (field->flags & FIELD_IS_SIGNED) { - PyTuple_SetItem(t, n++, PyInt_FromLong(val)); + if ((long long)val >= LONG_MIN && + (long long)val <= LONG_MAX) + obj = PyInt_FromLong(val); + else + obj = PyLong_FromLongLong(val); } else { - PyTuple_SetItem(t, n++, PyInt_FromLong(val)); + if (val <= LONG_MAX) + obj = PyInt_FromLong(val); + else + obj = PyLong_FromUnsignedLongLong(val); } } + PyTuple_SetItem(t, n++, obj); } if (_PyTuple_Resize(&t, n) == -1) -- cgit From b0f86f5a169c758a82b0e23eef6795356f6d5a25 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 29 Mar 2010 18:47:55 +0200 Subject: perf, probe-finder: Build fix on Debian Building chokes with: In file included from /usr/include/gelf.h:53, from /usr/include/elfutils/libdw.h:53, from util/probe-finder.h:61, from util/probe-finder.c:39: /usr/include/libelf.h:98: error: expected specifier-qualifier-list before 'off64_t' [...] Signed-off-by: Borislav Petkov Acked-by: Masami Hiramatsu LKML-Reference: <20100329164755.GA16034@aftab> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 8a8f52db7e3..bc0f670a833 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -200,7 +200,7 @@ endif CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) EXTLIBS = -lpthread -lrt -lelf -lm -ALL_CFLAGS = $(CFLAGS) +ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip @@ -492,19 +492,19 @@ ifeq ($(uname_S),Darwin) PTHREAD_LIBS = endif -ifeq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) -ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) +ifeq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) +ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); endif - ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) + ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) BASIC_CFLAGS += -DLIBELF_NO_MMAP endif else msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]); endif -ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { Dwarf *dbg; dbg = dwarf_begin(0, DWARF_C_READ); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -I/usr/include/elfutils -ldw -lelf -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) +ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { Dwarf *dbg; dbg = dwarf_begin(0, DWARF_C_READ); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) msg := $(warning No libdw.h found or old libdw.h found, disables dwarf support. Please install elfutils-devel/elfutils-dev); BASIC_CFLAGS += -DNO_DWARF_SUPPORT else -- cgit From 8c40041f75a202ed6a3b38143b823cb80f6d6b7c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 6 Apr 2010 10:37:33 -0300 Subject: perf kmem: Fix breakage introduced by 5a0e3ad slab.h script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 5a0e3ad ("include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h") added a '#include ' to tools/perf/builtin-kmem.h because: that tool has lines like this: if (!strcmp(event->name, "kmalloc") || !strcmp(event->name, "kmem_cache_alloc")) { process_alloc_event(data, event, cpu, timestamp, thread, 0); return; } So, using the script regex: >>> import re >>> s = re.compile(r'^(|.*[^a-zA-Z0-9_])_*(slab_is_available|kmem_cache_|k[mzc]alloc|krealloc|kz?free|ksize|__getname|putname)') >>> l = ' !strcmp(event->name, "kmem_cache_alloc")) {' >>> s.search(l) <_sre.SRE_Match object at 0xb77b1ad0> >>> Remove that file that is not available in the tools/perf include path and thus builtin-kmem.c couldn't be compiled. Reported-by: Peter Zijlstra Cc: Christoph Lameter Cc: Frédéric Weisbecker Cc: Lee Schermerhorn Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Tejun Heo Cc: Linus Torvalds LKML-Reference: <1270561053-14308-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-kmem.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 7d9e3a7e34d..924a9518931 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -14,7 +14,6 @@ #include "util/debug.h" #include -#include struct alloc_stat; typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); -- cgit