From f9573fe3e0e68f15ed61293e5567b5c4981c57f3 Mon Sep 17 00:00:00 2001 From: Robb Romans Date: Fri, 26 Jun 2009 14:25:56 -0500 Subject: SystemTap Language Reference: DWARF-less probing patch Here is a patch to the Language Reference Guide that adds information about DWARF-less probing. Signed-off-by: Robb Romans Signed-off-by: Josh Stone --- doc/langref.tex | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/doc/langref.tex b/doc/langref.tex index 5a149d19..6bbd704f 100644 --- a/doc/langref.tex +++ b/doc/langref.tex @@ -53,7 +53,7 @@ This document was derived from other documents contributed to the SystemTap project by employees of Red Hat, IBM and Intel.\newline Copyright \copyright\space 2007 Red Hat Inc.\newline -Copyright \copyright\space 2007 IBM Corp.\newline +Copyright \copyright\space 2007-2009 IBM Corp.\newline Copyright \copyright\space 2007 Intel Corporation.\newline Permission is granted to copy, distribute and/or modify this document @@ -853,6 +853,64 @@ kernel.statement("bio_init@fs/bio.c+3") \end{verbatim} \end{vindent} + +\subsection{DWARF-less probing} +\index{DWARF-less probing} + +In the absence of debugging information, you can still use the +\emph{kprobe} family of probes to examine the entry and exit points of +kernel and module functions. You cannot look up the arguments or local +variables of a function using these probes. However, you can access +the parameters by following this procedure: + +When you're stopped at the entry to a function, you can refer to the +function's arguments by number. For example, when probing the function +declared: + +\begin{vindent} +\begin{verbatim} +asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t +count) +\end{verbatim} +\end{vindent} + +You can obtain the values of \texttt{fd}, \texttt{buf}, and +\texttt{count}, respectively, as \texttt{uint\_arg(1)}, +\texttt{pointer\_arg(2)}, and \texttt{ulong\_arg(3)}. In this case, your +probe code must first call \texttt{asmlinkage()}, because on some +architectures the asmlinkage attribute affects how the function's +arguments are passed. + +When you're in a return probe, \texttt{\$return} isn't supported +without DWARF, but you can call \texttt{returnval()} to get the value +of the register in which the function value is typically returned, or +call \texttt{returnstr()} to get a string version of that value. + +And at any code probepoint, you can call +\texttt{{register("regname")}} to get the value of the specified CPU +register when the probe point was hit. +\texttt{u\_register("regname")} is like \texttt{register("regname")}, +but interprets the value as an unsigned integer. + +SystemTap supports the following constructs: +\begin{vindent} +\begin{verbatim} +kprobe.function(FUNCTION) +kprobe.function(FUNCTION).return +kprobe.module(NAME).function(FUNCTION) +kprobe.module(NAME).function(FUNCTION).return +kprobe.statement.(ADDRESS).absolute +\end{verbatim} +\end{vindent} + +Use \textbf{.function} probes for kernel functions and +\textbf{.module} probes for probing functions of a specified module. +If you do not know the absolute address of a kernel or module +function, use \textbf{.statement} probes. Do not use wildcards in +\textit{FUNCTION} and \textit{MODULE} names. Wildcards cause the probe +to not register. Also, run statement probes in guru mode only. + + \begin{comment} \subsection{Marker probes} -- cgit From ba27a9783db39606478d58c52021d47df232edb0 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 1 Jul 2009 16:46:05 -0700 Subject: Removed extra period in statement.(ADDRESS) --- doc/langref.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/langref.tex b/doc/langref.tex index 6bbd704f..35ff3312 100644 --- a/doc/langref.tex +++ b/doc/langref.tex @@ -899,7 +899,7 @@ kprobe.function(FUNCTION) kprobe.function(FUNCTION).return kprobe.module(NAME).function(FUNCTION) kprobe.module(NAME).function(FUNCTION).return -kprobe.statement.(ADDRESS).absolute +kprobe.statement(ADDRESS).absolute \end{verbatim} \end{vindent} -- cgit From bfbbea5a2c9690b82b7b75617befd5074149138a Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 1 Jul 2009 16:46:43 -0700 Subject: AUTHORS update --- AUTHORS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/AUTHORS b/AUTHORS index 4ae6c8ff..46ddf021 100644 --- a/AUTHORS +++ b/AUTHORS @@ -42,6 +42,7 @@ Phil Muldoon Prerna Saxena Przemysław Pawełczyk Rajan Arora +Robb Romans Roland McGrath Shaohua Li Srikar Dronamraju @@ -52,6 +53,7 @@ Thang Nguyen Theodore Ts'o Tim Moore Tom Zanussi +Varun Chandramohan Wenji Huang William Cohen Zhaolei -- cgit From 1c6b77e564d4d849f1994d56d662ceb018a112d4 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 1 Jul 2009 18:59:41 -0700 Subject: PR10327: resolve symbol aliases to dwarf functions This will first read in the symbol table for modules, and update the dwarf cu_function_cache with aliased names too. Then when iterating in dwarf, all of the possible names are matched, instead of only the canonical dwarf name. * dwflpp.cxx (dwflpp::iterate_over_functions): call update_symtab, and track wildcard addresses in a set to avoid alias dupes * dwflpp.h (symbol_table::Compare): removed * tapsets.cxx (symbol_table::map_by_addr): replaces list_by_addr (symbol_table::sort): removed -- multimap doesn't need sorting (symbol_table::mark_dwarf_redundancies): removed, see update_symtab (symbol_table::purge_syscall_stubs): remove map elements inline (dwarf_query::handle_query_module): preload the symtable. (query_dwarf_func): don't compare the function a second time, especially since it may have been an alias that matched at first. (module_info::get_symtab): allow being called multiple times (module_info::update_symtab): copy dies from the cache to the symtab, and also add aliased names to the cache --- dwflpp.cxx | 19 ++++++-- dwflpp.h | 23 +-------- tapsets.cxx | 155 ++++++++++++++++++++++++++++-------------------------------- 3 files changed, 89 insertions(+), 108 deletions(-) diff --git a/dwflpp.cxx b/dwflpp.cxx index ee5d2233..b2532246 100644 --- a/dwflpp.cxx +++ b/dwflpp.cxx @@ -563,29 +563,40 @@ dwflpp::iterate_over_functions (int (* callback)(Dwarf_Die * func, base_query * dwarf_getfuncs (cu, cu_function_caching_callback, v, 0); if (sess.verbose > 4) clog << "function cache " << key << " size " << v->size() << endl; + mod_info->update_symtab(v); } cu_function_cache_t::iterator it = v->find(function); if (it != v->end()) { - Dwarf_Die die = it->second; + Dwarf_Die& die = it->second; if (sess.verbose > 4) clog << "function cache " << key << " hit " << function << endl; return (*callback)(& die, q); } else if (name_has_wildcard (function)) { + // track addresses we've already seen + set alias_dupes; + for (it = v->begin(); it != v->end(); it++) { - if (pending_interrupts) return DWARF_CB_ABORT; - string func_name = it->first; - Dwarf_Die die = it->second; + if (pending_interrupts) return DWARF_CB_ABORT; + const string& func_name = it->first; + Dwarf_Die& die = it->second; if (function_name_matches_pattern (func_name, function)) { if (sess.verbose > 4) clog << "function cache " << key << " match " << func_name << " vs " << function << endl; + // make sure that this function address hasn't + // already been matched under an aliased name + Dwarf_Addr addr; + if (dwarf_entrypc(&die, &addr) == 0 && + !alias_dupes.insert(addr).second) + continue; + rc = (*callback)(& die, q); if (rc != DWARF_CB_OK) break; } diff --git a/dwflpp.h b/dwflpp.h index 8b503b33..ec6c5a0c 100644 --- a/dwflpp.h +++ b/dwflpp.h @@ -87,6 +87,7 @@ module_info info_status symtab_status; // symbol table cached? void get_symtab(dwarf_query *q); + void update_symtab(cu_function_cache_t *funcs); module_info(const char *name) : mod(NULL), @@ -128,28 +129,6 @@ struct func_info Dwarf_Addr entrypc; Dwarf_Addr prologue_end; bool weak; - - // Comparison functor for list of functions sorted by address. The - // two versions that take a Dwarf_Addr let us use the STL algorithms - // upper_bound, equal_range et al., but we don't know whether the - // searched-for value will be passed as the first or the second - // argument. - struct Compare - { - bool operator() (const func_info* f1, const func_info* f2) const - { - return f1->addr < f2->addr; - } - // For doing lookups by address. - bool operator() (Dwarf_Addr addr, const func_info* f2) const - { - return addr < f2->addr; - } - bool operator() (const func_info* f1, Dwarf_Addr addr) const - { - return f1->addr < addr; - } - }; }; diff --git a/tapsets.cxx b/tapsets.cxx index 765c2cd3..4f172506 100644 --- a/tapsets.cxx +++ b/tapsets.cxx @@ -282,24 +282,20 @@ symbol_table { module_info *mod_info; // associated module map map_by_name; - vector list_by_addr; - typedef vector::iterator iterator_t; + multimap map_by_addr; + typedef multimap::iterator iterator_t; typedef pair range_t; #ifdef __powerpc__ GElf_Word opd_section; #endif - // add_symbol doesn't leave symbol table in order; call - // symbol_table::sort() when done adding symbols. void add_symbol(const char *name, bool weak, Dwarf_Addr addr, Dwarf_Addr *high_addr); - void sort(); enum info_status read_symbols(FILE *f, const string& path); enum info_status read_from_elf_file(const string& path); enum info_status read_from_text_file(const string& path); enum info_status get_from_elf(); void prepare_section_rejection(Dwfl_Module *mod); bool reject_section(GElf_Word section); - void mark_dwarf_redundancies(dwflpp *dw); void purge_syscall_stubs(); func_info *lookup_symbol(const string& name); Dwarf_Addr lookup_symbol_address(const string& name); @@ -1064,11 +1060,11 @@ dwarf_query::query_module_symtab() return; } symbol_table::iterator_t iter; - for (iter = sym_table->list_by_addr.begin(); - iter != sym_table->list_by_addr.end(); + for (iter = sym_table->map_by_addr.begin(); + iter != sym_table->map_by_addr.end(); ++iter) { - fi = *iter; + fi = iter->second; if (!null_die(&fi->die)) continue; // already handled in query_module_dwarf() if (dw.function_name_matches_pattern(fi->name, function_str_val)) @@ -1116,10 +1112,15 @@ dwarf_query::query_module_symtab() void dwarf_query::handle_query_module() { - dw.get_module_dwarf(false, - (dbinfo_reqt == dbr_need_dwarf || !sess.consult_symtab)); + bool report = dbinfo_reqt == dbr_need_dwarf || !sess.consult_symtab; + dw.get_module_dwarf(false, report); + + // prebuild the symbol table to resolve aliases + dw.mod_info->get_symtab(this); + if (dw.mod_info->dwarf_status == info_present) query_module_dwarf(); + // Consult the symbol table if we haven't found all we're looking for. // asm functions can show up in the symbol table but not in dwarf. if (sess.consult_symtab && !query_done) @@ -1537,8 +1538,7 @@ query_dwarf_func (Dwarf_Die * func, base_query * bq) if (q->dw.func_is_inline () && (! q->has_call) && (! q->has_return) - && (((q->has_statement_str || q->has_function_str) - && q->dw.function_name_matches(q->function)))) + && (q->has_statement_str || q->has_function_str)) { if (q->sess.verbose>3) clog << "checking instances of inline " << q->dw.function_name @@ -1552,8 +1552,7 @@ query_dwarf_func (Dwarf_Die * func, base_query * bq) { bool record_this_function = false; - if ((q->has_statement_str || q->has_function_str) - && q->dw.function_name_matches(q->function)) + if (q->has_statement_str || q->has_function_str) { record_this_function = true; } @@ -3605,8 +3604,8 @@ dwarf_builder::build(systemtap_session & sess, symbol_table::~symbol_table() { - for (iterator_t i = list_by_addr.begin(); i != list_by_addr.end(); ++i) - delete *i; + for (iterator_t i = map_by_addr.begin(); i != map_by_addr.end(); ++i) + delete i->second; } void @@ -3625,7 +3624,7 @@ symbol_table::add_symbol(const char *name, bool weak, Dwarf_Addr addr, map_by_name[fi->name] = fi; // TODO: Use a multimap in case there are multiple static // functions with the same name? - list_by_addr.push_back(fi); + map_by_addr.insert(make_pair(addr, fi)); } enum info_status @@ -3666,13 +3665,12 @@ symbol_table::read_symbols(FILE *f, const string& path) add_symbol(name, (type == 'W'), (Dwarf_Addr) addr, &high_addr); } - if (list_by_addr.size() < 1) + if (map_by_addr.size() < 1) { cerr << "Symbol table error: " << path << " contains no function symbols." << endl; return info_absent; } - sort(); return info_present; } @@ -3784,53 +3782,17 @@ symbol_table::get_from_elf() add_symbol(name, (GELF_ST_BIND(sym.st_info) == STB_WEAK), sym.st_value, &high_addr); } - sort(); return info_present; } -void -symbol_table::mark_dwarf_redundancies(dwflpp *dw) -{ - // dwflpp.cu_function_cache maps each module_name:cu_name to a - // vector of Dwarf_Dies, one per function. - string module_prefix = string(mod_info->name) + ":"; - - for (mod_cu_function_cache_t::iterator cu = dw->cu_function_cache.begin(); - cu != dw->cu_function_cache.end(); cu++) - { - string key = cu->first; - if (key.find(module_prefix) == 0) - { - // Found a compilation unit in the module of interest. - // Mark all its functions in the symbol table. - cu_function_cache_t* v = cu->second; - assert(v); - for (cu_function_cache_t::iterator fc = v->begin(); fc != v->end(); fc++) - { - Dwarf_Die func = fc->second; - string func_name = fc->first; // == dwarf_diename(&func); - // map_by_name[func_name]->die = func; - map::iterator i = map_by_name.find(func_name); - // Func names can show up in the dwarf but not the symtab (!). - if (i != map_by_name.end()) - { - func_info *fi = i->second; - fi->die = func; - } - } - } - } -} - func_info * symbol_table::get_func_containing_address(Dwarf_Addr addr) { - iterator_t iter = upper_bound(list_by_addr.begin(), list_by_addr.end(), addr, - func_info::Compare()); - if (iter == list_by_addr.begin()) + iterator_t iter = map_by_addr.upper_bound(addr); + if (iter == map_by_addr.begin()) return NULL; else - return *(iter - 1); + return (--iter)->second; } func_info * @@ -3865,30 +3827,21 @@ symbol_table::purge_syscall_stubs() Dwarf_Addr stub_addr = lookup_symbol_address("sys_ni_syscall"); if (stub_addr == 0) return; - range_t purge_range = equal_range(list_by_addr.begin(), list_by_addr.end(), - stub_addr, func_info::Compare()); + range_t purge_range = map_by_addr.equal_range(stub_addr); for (iterator_t iter = purge_range.first; iter != purge_range.second; - ++iter) + ) { - func_info *fi = *iter; + func_info *fi = iter->second; if (fi->weak && fi->name != "sys_ni_syscall") { map_by_name.erase(fi->name); + map_by_addr.erase(iter++); delete fi; - *iter = 0; } + else + iter++; } - // Range might have null pointer entries that should be erased. - list_by_addr.erase(remove(purge_range.first, purge_range.second, - (func_info*)0), - purge_range.second); -} - -void -symbol_table::sort() -{ - stable_sort(list_by_addr.begin(), list_by_addr.end(), func_info::Compare()); } void @@ -3896,6 +3849,9 @@ module_info::get_symtab(dwarf_query *q) { systemtap_session &sess = q->sess; + if (symtab_status != info_unknown) + return; + sym_table = new symbol_table(this); if (!elf_path.empty()) { @@ -3938,18 +3894,53 @@ module_info::get_symtab(dwarf_query *q) return; } - // If we have dwarf for the same module, mark the redundant symtab - // entries. - // - // In dwarf_query::handle_query_module(), the call to query_module_dwarf() - // precedes the call to query_module_symtab(). So we should never read - // a module's symbol table without first having tried to get its dwarf. - sym_table->mark_dwarf_redundancies(&q->dw); - if (name == TOK_KERNEL) sym_table->purge_syscall_stubs(); } +// update_symtab reconciles data between the elf symbol table and the dwarf +// function enumeration. It updates the symbol table entries with the dwarf +// die that describes the function, which also signals to query_module_symtab +// that a statement probe isn't needed. In return, it also adds aliases to the +// function table for names that share the same addr/die. +void +module_info::update_symtab(cu_function_cache_t *funcs) +{ + if (!sym_table) + return; + + cu_function_cache_t new_funcs; + + for (cu_function_cache_t::iterator func = funcs->begin(); + func != funcs->end(); func++) + { + // optimization: inlines will never be in the symbol table + if (dwarf_func_inline(&func->second) != 0) + continue; + + func_info *fi = sym_table->lookup_symbol(func->first); + if (!fi) + continue; + + // iterate over all functions at the same address + symbol_table::range_t er = sym_table->map_by_addr.equal_range(fi->addr); + for (symbol_table::iterator_t it = er.first; it != er.second; ++it) + { + // update this function with the dwarf die + it->second->die = func->second; + + // if this function is a new alias, then + // save it to merge into the function cache + if (it->second != fi) + new_funcs[it->second->name] = it->second->die; + } + } + + // add all discovered aliases back into the function cache + // NB: this won't replace any names that dwarf may have already found + funcs->insert(new_funcs.begin(), new_funcs.end()); +} + module_info::~module_info() { if (sym_table) -- cgit