diff options
author | Jim Keniston <jkenisto@us.ibm.com> | 2008-05-12 12:17:20 -0700 |
---|---|---|
committer | Jim Keniston <jkenisto@us.ibm.com> | 2008-05-12 12:17:20 -0700 |
commit | c7fe0041d2132e801f72e158854d54da50fc651d (patch) | |
tree | 258d955ef4a1ead4ba1102812d962c9c981d873b /tapsets.cxx | |
parent | 8dd6b23de4dd099aa244402192cb3d7be2bda739 (diff) | |
parent | da3fe5fe9641e7c4cc6ae5c4a289ddbc020aca1a (diff) | |
download | systemtap-steved-c7fe0041d2132e801f72e158854d54da50fc651d.tar.gz systemtap-steved-c7fe0041d2132e801f72e158854d54da50fc651d.tar.xz systemtap-steved-c7fe0041d2132e801f72e158854d54da50fc651d.zip |
Merge commit 'origin/dwarfless'
PR 4311 - Function boundary tracing without debuginfo: Phases 1 and 2
Diffstat (limited to 'tapsets.cxx')
-rw-r--r-- | tapsets.cxx | 916 |
1 files changed, 782 insertions, 134 deletions
diff --git a/tapsets.cxx b/tapsets.cxx index 7cdf3c32..9528066f 100644 --- a/tapsets.cxx +++ b/tapsets.cxx @@ -40,6 +40,7 @@ extern "C" { #include <regex.h> #include <glob.h> #include <fnmatch.h> +#include <stdio.h> #include "loc2c.h" #define __STDC_FORMAT_MACROS @@ -208,6 +209,7 @@ common_probe_entryfn_prologue (translator_output* o, string statestr, o->newline() << "c->unwaddr = 0;"; // reset unwound address cache o->newline() << "c->pi = 0;"; + o->newline() << "c->regparm = 0;"; o->newline() << "c->probe_point = 0;"; if (! interruptible) o->newline() << "c->actionremaining = MAXACTION;"; @@ -444,12 +446,27 @@ static string TOK_STATEMENT("statement"); static string TOK_ABSOLUTE("absolute"); static string TOK_PROCESS("process"); +// Can we handle this query with just symbol-table info? +enum dbinfo_reqt +{ + dbr_unknown, + dbr_none, // kernel.statement(NUM).absolute + dbr_need_symtab, // can get by with symbol table if there's no dwarf + dbr_need_dwarf +}; + +enum info_status +{ + info_unknown, + info_present, + info_absent +}; struct func_info { func_info() - : decl_file(NULL), decl_line(-1), prologue_end(0) + : decl_file(NULL), decl_line(-1), addr(0), prologue_end(0) { memset(&die, 0, sizeof(die)); } @@ -457,6 +474,7 @@ func_info char const * decl_file; int decl_line; Dwarf_Die die; + Dwarf_Addr addr; Dwarf_Addr prologue_end; }; @@ -475,6 +493,78 @@ inline_instance_info }; +struct dwarf_query; // forward decls +struct dwflpp; +struct symbol_table; + +struct +module_info +{ + Dwfl_Module* mod; + const char* name; + string elf_path; + Dwarf_Addr addr; + Dwarf_Addr bias; + symbol_table *sym_table; + info_status dwarf_status; // module has dwarf info? + info_status symtab_status; // symbol table cached? + + void get_symtab(dwarf_query *q); + + module_info(const char *name) : + mod(NULL), + name(name), + addr(0), + bias(0), + sym_table(NULL), + dwarf_status(info_unknown), + symtab_status(info_unknown) + {} + + ~module_info(); +}; + +struct +module_cache +{ + map<string, module_info*> cache; + bool paths_collected; + bool dwarf_collected; + + module_cache() : paths_collected(false), dwarf_collected(false) {} +}; +typedef struct module_cache module_cache_t; + +typedef map<string, vector<Dwarf_Die>*> cu_function_cache_t; + +struct +symbol_table +{ + module_info *mod_info; // associated module + map<string, func_info*> map_by_name; + vector<func_info*> list_by_addr; + + void add_symbol(const char *name, Dwarf_Addr addr, Dwarf_Addr *high_addr); + enum info_status read_symbols(FILE *f, const string& path); + enum info_status read_from_elf_file(const string& path); + enum info_status read_from_text_file(const string& path); + enum info_status get_from_elf(); + void mark_dwarf_redundancies(dwflpp *dw); + func_info *lookup_symbol(const string& name); + Dwarf_Addr lookup_symbol_address(const string& name); + func_info *get_func_containing_address(Dwarf_Addr addr); + int get_index_for_address(Dwarf_Addr addr); + + symbol_table(module_info *mi) : mod_info(mi) {} + ~symbol_table(); +}; + +static bool null_die(Dwarf_Die *die) +{ + static Dwarf_Die null = { 0 }; + return (!die || !memcmp(die, &null, sizeof(null))); +} + static int query_cu (Dwarf_Die * cudie, void * arg); @@ -489,9 +579,6 @@ dwarf_diename_integrate (Dwarf_Die *die) return dwarf_formstring (dwarf_attr_integrate (die, DW_AT_name, &attr_mem)); } - -struct dwarf_query; // forward decl - struct dwflpp { systemtap_session & sess; @@ -501,6 +588,7 @@ struct dwflpp Dwfl_Module * module; Dwarf * module_dwarf; Dwarf_Addr module_bias; + module_info * mod_info; // These describe the current module's PC address range Dwarf_Addr module_start; @@ -522,40 +610,55 @@ struct dwflpp } - void get_module_dwarf(bool required = false) + void get_module_dwarf(bool required = false, bool report = true) { - if (!module_dwarf) - module_dwarf = dwfl_module_getdwarf(module, &module_bias); - - if (!module_dwarf) + if (!module_dwarf && mod_info->dwarf_status != info_absent) { - string msg = "cannot find "; - if (module_name == "") - msg += "kernel"; - else - msg += string("module ") + module_name; - msg += " debuginfo"; - - int i = dwfl_errno(); - if (i) - msg += string(": ") + dwfl_errmsg (i); + if (!sess.ignore_dwarf) + module_dwarf = dwfl_module_getdwarf(module, &module_bias); + mod_info->dwarf_status = (module_dwarf ? info_present : info_absent); + } - if (required) - throw semantic_error (msg); - else - cerr << "WARNING: " << msg << "\n"; + if (!module_dwarf && report) + { + string msg = "cannot find "; + if (module_name == "") + msg += "kernel"; + else + msg += string("module ") + module_name; + msg += " debuginfo"; + + int i = dwfl_errno(); + if (i) + msg += string(": ") + dwfl_errmsg (i); + + if (required) + throw semantic_error (msg); + else + cerr << "WARNING: " << msg << "\n"; } } - void focus_on_module(Dwfl_Module * m) + void focus_on_module(Dwfl_Module * m, module_info * mi) { - assert(m); module = m; - module_name = default_name(dwfl_module_info(module, NULL, + mod_info = mi; + if (m) + { + module_name = default_name(dwfl_module_info(module, NULL, &module_start, &module_end, NULL, NULL, NULL, NULL), "module"); + } + else + { + assert(mi && mi->name && mi->name == TOK_KERNEL); + module_name = mi->name; + module_start = 0; + module_end = 0; + module_bias = mi->bias; + } // Reset existing pointers and names @@ -601,7 +704,7 @@ struct dwflpp cu = NULL; Dwfl_Module* mod = dwfl_addrmodule(dwfl, a); if (mod) // address could be wildly out of range - focus_on_module(mod); + focus_on_module(mod, NULL); } @@ -644,7 +747,6 @@ struct dwflpp bool module_name_matches(string pattern) { - assert(module); bool t = (fnmatch(pattern.c_str(), module_name.c_str(), 0) == 0); if (t && sess.verbose>3) clog << "pattern '" << pattern << "' " @@ -652,27 +754,35 @@ struct dwflpp << "module '" << module_name << "'" << "\n"; return t; } + bool name_has_wildcard(string pattern) + { + return (pattern.find('*') != string::npos || + pattern.find('?') != string::npos || + pattern.find('[') != string::npos); + } bool module_name_final_match(string pattern) { // Assume module_name_matches(). Can there be any more matches? // Not unless the pattern is a wildcard, since module names are // presumed unique. - return (pattern.find('*') == string::npos && - pattern.find('?') == string::npos && - pattern.find('[') == string::npos); + return !name_has_wildcard(pattern); } - bool function_name_matches(string pattern) + bool function_name_matches_pattern(string name, string pattern) { - assert(function); - bool t = (fnmatch(pattern.c_str(), function_name.c_str(), 0) == 0); + bool t = (fnmatch(pattern.c_str(), name.c_str(), 0) == 0); if (t && sess.verbose>3) clog << "pattern '" << pattern << "' " << "matches " - << "function '" << function_name << "'" << "\n"; + << "function '" << name << "'" << "\n"; return t; } + bool function_name_matches(string pattern) + { + assert(function); + return function_name_matches_pattern(function_name, pattern); + } bool function_name_final_match(string pattern) { return module_name_final_match (pattern); @@ -714,20 +824,57 @@ struct dwflpp throw semantic_error (msg); } + // static so pathname_caching_callback() can access them + static module_cache_t module_cache; + static bool ignore_vmlinux; + - dwflpp(systemtap_session & sess) + dwflpp(systemtap_session & session) : - sess(sess), + sess(session), dwfl(NULL), module(NULL), module_dwarf(NULL), module_bias(0), + mod_info(NULL), module_start(0), module_end(0), cu(NULL), function(NULL) - {} + { + ignore_vmlinux = sess.ignore_vmlinux; + } + + // Called by dwfl_linux_kernel_report_offline(). We may not have + // dwarf info for the kernel and/or modules, so remember this + // module's pathname in case we need to extract elf info from it. + // (Currently, we get all the elf info we need via elfutils -- if the + // elf file exists -- so remembering the pathname isn't strictly needed. + // But we still need to handle the case where there's no vmlinux.) + static int pathname_caching_callback(const char *name, const char *path) + { + module_info *mi = new module_info(name); + module_cache.cache[name] = mi; + if (ignore_vmlinux && path && name == TOK_KERNEL) + { + // report_kernel() in elfutils found vmlinux, but pretend it didn't. + // Given a non-null path, returning 1 means keep reporting modules. + mi->dwarf_status = info_absent; + return 1; + } + else if (path) + { + mi->elf_path = path; + return 1; + } + + // No vmlinux. Here returning 0 to report_kernel() means go ahead + // and keep reporting modules. + assert(name == TOK_KERNEL); + mi->dwarf_status = info_absent; + return 0; + } void setup(bool kernel, bool debuginfo_needed = true) { @@ -761,10 +908,18 @@ struct dwflpp throw semantic_error ("cannot open dwfl"); dwfl_report_begin (dwfl); + int (*callback)(const char *name, const char *path); + if (sess.consult_symtab && !module_cache.paths_collected) + { + callback = pathname_caching_callback; + module_cache.paths_collected = true; + } + else + callback = NULL; int rc = dwfl_linux_kernel_report_offline (dwfl, sess.kernel_release.c_str(), /* selection predicate */ - NULL); + callback); if (debuginfo_needed) dwfl_assert (string("missing kernel ") + sess.kernel_release + @@ -806,36 +961,34 @@ struct dwflpp // ----------------------------------------------------------------- - struct module_cache_entry { - Dwfl_Module* mod; - const char* name; - Dwarf_Addr addr; - }; - typedef vector<module_cache_entry> module_cache_t; - module_cache_t module_cache; - static int module_caching_callback(Dwfl_Module * mod, void **, const char *name, Dwarf_Addr addr, void *param) { - module_cache_t* cache = static_cast<module_cache_t*>(param); - module_cache_entry it; - it.mod = mod; - it.name = name; - it.addr = addr; - cache->push_back (it); + module_cache_t *cache = static_cast<module_cache_t*>(param); + module_info *mi = NULL; + + if (ignore_vmlinux && name == TOK_KERNEL) + // This wouldn't be called for vmlinux if vmlinux weren't there. + return DWARF_CB_OK; + + if (cache->paths_collected) + mi = cache->cache[name]; + if (!mi) + { + mi = new module_info(name); + cache->cache[name] = mi; + } + mi->mod = mod; + mi->addr = addr; return DWARF_CB_OK; } - - void iterate_over_modules(int (* callback)(Dwfl_Module *, void **, - const char *, Dwarf_Addr, - void *), - void * data) + void cache_modules_dwarf() { - if (module_cache.empty()) + if (!module_cache.dwarf_collected) { ptrdiff_t off = 0; do @@ -846,18 +999,29 @@ struct dwflpp } while (off > 0); dwfl_assert("dwfl_getmodules", off); + module_cache.dwarf_collected = true; } + } - // Traverse the cache. - for (unsigned i = 0; i < module_cache.size(); i++) + void iterate_over_modules(int (* callback)(Dwfl_Module *, module_info *, + const char *, Dwarf_Addr, + void *), + void * data) + { + cache_modules_dwarf(); + + map<string, module_info*>::iterator i; + for (i = module_cache.cache.begin(); i != module_cache.cache.end(); i++) { if (pending_interrupts) return; - module_cache_entry& it = module_cache[i]; - int rc = callback (it.mod, 0, it.name, it.addr, data); + module_info *mi = i->second; + int rc = callback (mi->mod, mi, mi->name, mi->addr, data); if (rc != DWARF_CB_OK) break; } } + // Defined after dwarf_query + void query_modules(dwarf_query *q); // ----------------------------------------------------------------- @@ -947,7 +1111,6 @@ struct dwflpp // ----------------------------------------------------------------- - typedef map<string, vector<Dwarf_Die>*> cu_function_cache_t; cu_function_cache_t cu_function_cache; static int cu_function_caching_callback (Dwarf_Die* func, void *arg) @@ -957,9 +1120,10 @@ struct dwflpp return DWARF_CB_OK; } - void iterate_over_functions (int (* callback)(Dwarf_Die * func, void * arg), + int iterate_over_functions (int (* callback)(Dwarf_Die * func, void * arg), void * data) { + int rc = DWARF_CB_OK; assert (module); assert (cu); @@ -975,9 +1139,10 @@ struct dwflpp for (unsigned i=0; i<v->size(); i++) { Dwarf_Die die = v->at(i); - int rc = (*callback)(& die, data); + rc = (*callback)(& die, data); if (rc != DWARF_CB_OK) break; } + return rc; } @@ -2018,6 +2183,9 @@ struct dwflpp } }; +module_cache_t dwflpp::module_cache; +bool dwflpp::ignore_vmlinux = false; + enum function_spec_type @@ -2029,7 +2197,6 @@ function_spec_type struct dwarf_builder; -struct dwarf_query; // XXX: This class is a candidate for subclassing to separate @@ -2187,6 +2354,8 @@ struct dwarf_query : public base_query vector<derived_probe *> & results); virtual void handle_query_module(); + void query_module_dwarf(); + void query_module_symtab(); void add_probe_point(string const & funcname, char const * filename, @@ -2234,11 +2403,15 @@ struct dwarf_query : public base_query bool has_absolute; + enum dbinfo_reqt dbinfo_reqt; + enum dbinfo_reqt assess_dbinfo_reqt(); + function_spec_type parse_function_spec(string & spec); function_spec_type spec_type; string function; string file; int line; + bool query_done; // Found exact match set<char const *> filtered_srcfiles; @@ -2375,11 +2548,13 @@ dwarf_query::dwarf_query(systemtap_session & sess, spec_type = parse_function_spec(statement_str_val); build_blacklist(); // XXX: why not reuse amongst dwarf_query instances? + dbinfo_reqt = assess_dbinfo_reqt(); + query_done = false; } void -dwarf_query::handle_query_module() +dwarf_query::query_module_dwarf() { if (has_function_num || has_statement_num) { @@ -2408,6 +2583,110 @@ dwarf_query::handle_query_module() } } +static void query_func_info (Dwarf_Addr entrypc, func_info & fi, + dwarf_query * q); + +void +dwarf_query::query_module_symtab() +{ + // Get the symbol table if it's necessary, sufficient, and not already got. + if (dbinfo_reqt == dbr_need_dwarf) + return; + + module_info *mi = dw.mod_info; + if (dbinfo_reqt == dbr_need_symtab) + { + if (mi->symtab_status == info_unknown) + mi->get_symtab(this); + if (mi->symtab_status == info_absent) + return; + } + + func_info *fi = NULL; + symbol_table *sym_table = mi->sym_table; + + if (has_function_str) + { + // Per dwarf_query::assess_dbinfo_reqt()... + assert(spec_type == function_alone); + if (dw.name_has_wildcard(function_str_val)) + { + // Until we augment the blacklist sufficently... + if (function_str_val.find_first_not_of("*?") == string::npos) + { + // e.g., kernel.function("*") + cerr << "Error: Pattern '" + << function_str_val + << "' matches every instruction address in the symbol table," + << endl + << "some of which aren't even functions." + << " Please be more precise." + << endl; + return; + } + + size_t i; + size_t nsyms = sym_table->list_by_addr.size(); + for (i = 0; i < nsyms; i++) + { + fi = sym_table->list_by_addr.at(i); + if (!null_die(&fi->die)) + continue; // already handled in query_module_dwarf() + if (dw.function_name_matches_pattern(fi->name, function_str_val)) + query_func_info(fi->addr, *fi, this); + } + } + else + { + fi = sym_table->lookup_symbol(function_str_val); + if (fi && null_die(&fi->die)) + query_func_info(fi->addr, *fi, this); + } + } + else + { + assert(has_function_num || has_statement_num); + // Find the "function" in which the indicated address resides. + Dwarf_Addr addr = + (has_function_num ? function_num_val : statement_num_val); + fi = sym_table->get_func_containing_address(addr); + if (!fi) + { + cerr << "Warning: address " + << hex << addr << dec + << " out of range for module " + << dw.module_name; + return; + } + if (!null_die(&fi->die)) + { + // addr looks like it's in the compilation unit containing + // the indicated function, but query_module_dwarf() didn't + // match addr to any compilation unit, so addr must be + // above that cu's address range. + cerr << "Warning: address " + << hex << addr << dec + << " maps to no known compilation unit in module " + << dw.module_name; + return; + } + query_func_info(fi->addr, *fi, this); + } +} + +void +dwarf_query::handle_query_module() +{ + dw.get_module_dwarf(false, + (dbinfo_reqt == dbr_need_dwarf || !sess.consult_symtab)); + if (dw.mod_info->dwarf_status == info_present) + query_module_dwarf(); + // Consult the symbol table if we haven't found all we're looking for. + // asm functions can show up in the symbol table but not in dwarf. + if (sess.consult_symtab && !query_done) + query_module_symtab(); +} + void dwarf_query::build_blacklist() @@ -2596,7 +2875,7 @@ dwarf_query::parse_function_spec(string & spec) // Forward declaration. -static int query_kernel_module (Dwfl_Module *, void **, const char *, +static int query_kernel_module (Dwfl_Module *, module_info *, const char *, Dwarf_Addr, void *); @@ -2720,7 +2999,13 @@ dwarf_query::add_probe_point(const string& funcname, assert (! has_absolute); // already handled in dwarf_builder::build() - if (dwfl_module_relocations (dw.module) > 0) + if (!dw.module) + { + assert(module == TOK_KERNEL); + reloc_section = ""; + blacklist_section = ""; + } + else if (dwfl_module_relocations (dw.module) > 0) { // This is arelocatable module; libdwfl already knows its // sections, so we can relativize addr. @@ -2770,6 +3055,44 @@ dwarf_query::add_probe_point(const string& funcname, } } +enum dbinfo_reqt +dwarf_query::assess_dbinfo_reqt() +{ + if (has_absolute) + { + // kernel.statement(NUM).absolute + return dbr_none; + } + if (has_inline) + { + // kernel.function("f").inline or module("m").function("f").inline + return dbr_need_dwarf; + } + if (has_function_str && spec_type == function_alone) + { + // kernel.function("f") or module("m").function("f") + return dbr_need_symtab; + } + if (has_statement_num) + { + // kernel.statement(NUM) or module("m").statement(NUM) + // Technically, all we need is the module offset (or _stext, for + // the kernel). But for that we need either the ELF file or (for + // _stext) the symbol table. In either case, the symbol table + // is available, and that allows us to map the NUM (address) + // to a function, which is goodness. + return dbr_need_symtab; + } + if (has_function_num) + { + // kernel.function(NUM) or module("m").function(NUM) + // Need the symbol table so we can back up from NUM to the + // start of the function. + return dbr_need_symtab; + } + // Symbol table tells us nothing about source files or line numbers. + return dbr_need_dwarf; +} @@ -3102,7 +3425,9 @@ query_cu (Dwarf_Die * cudie, void * arg) // Pick up [entrypc, name, DIE] tuples for all the functions // matching the query, and fill in the prologue endings of them // all in a single pass. - q->dw.iterate_over_functions (query_dwarf_func, q); + int rc = q->dw.iterate_over_functions (query_dwarf_func, q); + if (rc != DWARF_CB_OK) + q->query_done = true; if (q->sess.prologue_searching && !q->has_statement_str && !q->has_statement_num) // PR 2608 @@ -3163,7 +3488,7 @@ query_cu (Dwarf_Die * cudie, void * arg) static int query_kernel_module (Dwfl_Module *mod, - void **, + module_info *, const char *name, Dwarf_Addr, void *arg) @@ -3178,10 +3503,74 @@ query_kernel_module (Dwfl_Module *mod, return DWARF_CB_OK; } +static void +validate_module_elf (Dwfl_Module *mod, const char *name, base_query *q) +{ + // Validate the machine code in this elf file against the + // session machine. This is important, in case the wrong kind + // of debuginfo is being automagically processed by elfutils. + // While we can tell i686 apart from x86-64, unfortunately + // we can't help confusing i586 vs i686 (both EM_386). + + Dwarf_Addr bias; + // We prefer dwfl_module_getdwarf to dwfl_module_getelf here, + // because dwfl_module_getelf can force costly section relocations + // we don't really need, while either will do for this purpose. + Elf* elf = (dwarf_getelf (dwfl_module_getdwarf (mod, &bias)) + ?: dwfl_module_getelf (mod, &bias)); + + GElf_Ehdr ehdr_mem; + GElf_Ehdr* em = gelf_getehdr (elf, &ehdr_mem); + if (em == 0) { q->dw.dwfl_assert ("dwfl_getehdr", dwfl_errno()); } + int elf_machine = em->e_machine; + const char* debug_filename = ""; + const char* main_filename = ""; + (void) dwfl_module_info (mod, NULL, NULL, + NULL, NULL, NULL, + & main_filename, + & debug_filename); + const string& sess_machine = q->sess.architecture; + string expect_machine; + + switch (elf_machine) + { + case EM_386: expect_machine = "i?86"; break; // accept e.g. i586 + case EM_X86_64: expect_machine = "x86_64"; break; + case EM_PPC: expect_machine = "ppc"; break; + case EM_PPC64: expect_machine = "ppc64"; break; + case EM_S390: expect_machine = "s390x"; break; + case EM_IA_64: expect_machine = "ia64"; break; + case EM_ARM: expect_machine = "armv*"; break; + // XXX: fill in some more of these + default: expect_machine = "?"; break; + } + + if (! debug_filename) debug_filename = main_filename; + if (! debug_filename) debug_filename = name; + + if (fnmatch (expect_machine.c_str(), sess_machine.c_str(), 0) != 0) + { + stringstream msg; + msg << "ELF machine " << expect_machine << " (code " << elf_machine + << ") mismatch with target " << sess_machine + << " in '" << debug_filename << "'"; + throw semantic_error(msg.str ()); + } + + if (q->sess.verbose>2) + clog << "focused on module '" << q->dw.module_name + << " = [0x" << hex << q->dw.module_start + << "-0x" << q->dw.module_end + << ", bias 0x" << q->dw.module_bias << "]" << dec + << " file " << debug_filename + << " ELF machine " << expect_machine + << " (code " << elf_machine << ")" + << "\n"; +} static int query_module (Dwfl_Module *mod, - void **, + module_info *mi, const char *name, Dwarf_Addr, void *arg) @@ -3190,7 +3579,7 @@ query_module (Dwfl_Module *mod, try { - q->dw.focus_on_module(mod); + q->dw.focus_on_module(mod, mi); // If we have enough information in the pattern to skip a module and // the module does not match that information, return early. @@ -3203,67 +3592,15 @@ query_module (Dwfl_Module *mod, if (q->dw.module_name == TOK_KERNEL && ! q->has_kernel) return DWARF_CB_OK; - // Validate the machine code in this elf file against the - // session machine. This is important, in case the wrong kind - // of debuginfo is being automagically processed by elfutils. - // While we can tell i686 apart from x86-64, unfortunately - // we can't help confusing i586 vs i686 (both EM_386). - - Dwarf_Addr bias; - // We prefer dwfl_module_getdwarf to dwfl_module_getelf here, - // because dwfl_module_getelf can force costly section relocations - // we don't really need, while either will do for this purpose. - Elf* elf = (dwarf_getelf (dwfl_module_getdwarf (mod, &bias)) - ?: dwfl_module_getelf (mod, &bias)); - - GElf_Ehdr ehdr_mem; - GElf_Ehdr* em = gelf_getehdr (elf, &ehdr_mem); - if (em == 0) { q->dw.dwfl_assert ("dwfl_getehdr", dwfl_errno()); } - int elf_machine = em->e_machine; - const char* debug_filename = ""; - const char* main_filename = ""; - (void) dwfl_module_info (mod, NULL, NULL, - NULL, NULL, NULL, - & main_filename, - & debug_filename); - const string& sess_machine = q->sess.architecture; - string expect_machine; - - switch (elf_machine) - { - case EM_386: expect_machine = "i?86"; break; // accept e.g. i586 - case EM_X86_64: expect_machine = "x86_64"; break; - case EM_PPC: expect_machine = "ppc"; break; - case EM_PPC64: expect_machine = "ppc64"; break; - case EM_S390: expect_machine = "s390x"; break; - case EM_IA_64: expect_machine = "ia64"; break; - case EM_ARM: expect_machine = "armv*"; break; - // XXX: fill in some more of these - default: expect_machine = "?"; break; - } - - if (! debug_filename) debug_filename = main_filename; - if (! debug_filename) debug_filename = name; - - if (fnmatch (expect_machine.c_str(), sess_machine.c_str(), 0) != 0) + if (mod) + validate_module_elf(mod, name, q); + else { - stringstream msg; - msg << "ELF machine " << expect_machine << " (code " << elf_machine - << ") mismatch with target " << sess_machine - << " in '" << debug_filename << "'"; - throw semantic_error(msg.str ()); + assert(q->has_kernel); // and no vmlinux to examine + if (q->sess.verbose>2) + cerr << "focused on module '" << q->dw.module_name << "'\n"; } - if (q->sess.verbose>2) - clog << "focused on module '" << q->dw.module_name - << " = [0x" << hex << q->dw.module_start - << "-0x" << q->dw.module_end - << ", bias 0x" << q->dw.module_bias << "]" << dec - << " file " << debug_filename - << " ELF machine " << expect_machine - << " (code " << elf_machine << ")" - << "\n"; - q->handle_query_module(); // If we know that there will be no more matches, abort early. @@ -3279,6 +3616,24 @@ query_module (Dwfl_Module *mod, } } +void +dwflpp::query_modules(dwarf_query *q) +{ + string name = q->module_val; + if (name_has_wildcard(name)) + iterate_over_modules(&query_module, q); + else + { + cache_modules_dwarf(); + + map<string, module_info*>::iterator i = module_cache.cache.find(name); + if (i != module_cache.cache.end()) + { + module_info *mi = i->second; + query_module(mi->mod, mi, name.c_str(), mi->addr, q); + } + } +} struct var_expanding_copy_visitor: public deep_copy_visitor { @@ -3810,7 +4165,7 @@ dwarf_derived_probe::dwarf_derived_probe(const string& funcname, q.base_loc->tok); // Make a target-variable-expanded copy of the probe body - if (scope_die) + if (!null_die(scope_die)) { dwarf_var_expanding_copy_visitor v (q, scope_die, dwfl_addr); require <statement*> (&v, &(this->body), this->body); @@ -4266,7 +4621,300 @@ dwarf_builder::build(systemtap_session & sess, return; } - dw->iterate_over_modules(&query_module, &q); + // dw->iterate_over_modules(&query_module, &q); + dw->query_modules(&q); +} + +symbol_table::~symbol_table() +{ + // map::clear() and vector::clear() don't call destructors for + // pointers, only for objects. + int i; + int nsym = (int) list_by_addr.size(); + for (i = 0; i < nsym; i++) + delete list_by_addr.at(i); + list_by_addr.clear(); + map_by_name.clear(); +} + +void +symbol_table::add_symbol(const char *name, Dwarf_Addr addr, + Dwarf_Addr *high_addr) +{ + func_info *fi = new func_info(); + fi->addr = addr; + fi->name = name; + map_by_name[fi->name] = fi; + // TODO: Use a multimap in case there are multiple static + // functions with the same name? + if (addr >= *high_addr) + { + list_by_addr.push_back(fi); + *high_addr = addr; + } + else + { + // Symbols aren't in numerical order. FWIW, sort(1) doesn't + // handle hex numbers without the leading 0x. + int index = get_index_for_address(fi->addr); + list_by_addr.insert(list_by_addr.begin()+(index+1), fi); + } +} + +enum info_status +symbol_table::read_symbols(FILE *f, const string& path) +{ + // Based on do_kernel_symbols() in runtime/staprun/symbols.c + int ret; + char *name, *mod; + char type; + unsigned long long addr; + Dwarf_Addr high_addr = 0; + int line = 0; + + // %as (non-POSIX) mallocs space for the string and stores its address. + while ((ret = fscanf(f, "%llx %c %as [%as", &addr, &type, &name, &mod)) > 0) + { + line++; + if (ret < 3) + { + cerr << "Symbol table error: Line " + << line + << " of symbol list from " + << path + << " is not in correct format: address type name [module]"; + // Caller should delete symbol_table object. + return info_absent; + } + if (ret > 3) + { + // Modules are loaded above the kernel, so if we're getting + // modules, we're done. + free(name); + free(mod); + goto done; + } + if (type == 'T' || type == 't') + add_symbol(name, (Dwarf_Addr) addr, &high_addr); + free(name); + } + +done: + if (list_by_addr.size() < 1) + { + cerr << "Symbol table error: " + << path << " contains no function symbols." << endl; + return info_absent; + } + return info_present; +} + +// NB: This currently unused. We use get_from_elf() instead because +// that gives us raw addresses -- which we need for modules -- whereas +// nm provides the address relative to the beginning of the section. +enum info_status +symbol_table::read_from_elf_file(const string &path) +{ + FILE *f; + string cmd = string("/usr/bin/nm -n --defined-only ") + path; + f = popen(cmd.c_str(), "r"); + if (!f) + { + // nm failures are detected by pclose, not popen. + cerr << "Internal error reading symbol table from " + << path << " -- " << strerror (errno); + return info_absent; + } + enum info_status status = read_symbols(f, path); + if (pclose(f) != 0) + { + if (status == info_present) + cerr << "Warning: nm cannot read symbol table from " << path; + return info_absent; + } + return status; +} + +enum info_status +symbol_table::read_from_text_file(const string& path) +{ + FILE *f = fopen(path.c_str(), "r"); + if (!f) + { + cerr << "Warning: cannot read symbol table from " + << path << " -- " << strerror (errno); + return info_absent; + } + enum info_status status = read_symbols(f, path); + (void) fclose(f); + return status; +} + +enum info_status +symbol_table::get_from_elf() +{ + Dwarf_Addr high_addr = 0; + Dwfl_Module *mod = mod_info->mod; + int syments = dwfl_module_getsymtab(mod); + assert(syments); + for (int i = 1; i < syments; ++i) + { + GElf_Sym sym; + const char *name = dwfl_module_getsym(mod, i, &sym, NULL); + if (name && GELF_ST_TYPE(sym.st_info) == STT_FUNC) + add_symbol(name, sym.st_value, &high_addr); + } + return info_present; +} + +void +symbol_table::mark_dwarf_redundancies(dwflpp *dw) +{ + // dwflpp.cu_function_cache maps each module_name:cu_name to a + // vector of Dwarf_Dies, one per function. + string module_prefix = string(mod_info->name) + ":"; + + cu_function_cache_t::iterator cu; + for (cu = dw->cu_function_cache.begin(); + cu != dw->cu_function_cache.end(); cu++) + { + string key = cu->first; + if (key.find(module_prefix) == 0) + { + // Found a compilation unit in the module of interest. + // Mark all its functions in the symbol table. + vector<Dwarf_Die>* v = cu->second; + assert(v); + for (unsigned f=0; f < v->size(); f++) + { + Dwarf_Die func = v->at(f); + string func_name = dwarf_diename(&func); + // map_by_name[func_name]->die = func; + map<string, func_info*>::iterator i = map_by_name.find(func_name); + // Func names can show up in the dwarf but not the symtab (!). + if (i != map_by_name.end()) + { + func_info *fi = i->second; + fi->die = func; + } + } + } + } +} + +func_info * +symbol_table::get_func_containing_address(Dwarf_Addr addr) +{ + int index = get_index_for_address(addr); + if (index < 0) + return NULL; + return list_by_addr.at(index); +} + +// Find the index in list_by_addr of the last element whose address +// is <= addr. Returns -1 if addr is less than the first address in +// list_by_addr. +int +symbol_table::get_index_for_address(Dwarf_Addr addr) +{ + // binary search from runtime/sym.c + int begin = 0; + int mid; + int end = list_by_addr.size(); + + if (end == 0 || addr < list_by_addr.at(0)->addr) + return -1; + do + { + mid = (begin + end) / 2; + if (addr < list_by_addr.at(mid)->addr) + end = mid; + else + begin = mid; + } + while (begin + 1 < end); + return begin; +} + +func_info * +symbol_table::lookup_symbol(const string& name) +{ + map<string, func_info*>::iterator i = map_by_name.find(name); + if (i == map_by_name.end()) + return NULL; + return i->second; +} + +Dwarf_Addr +symbol_table::lookup_symbol_address(const string& name) +{ + func_info *fi = lookup_symbol(name); + if (fi) + return fi->addr; + return 0; +} + +void +module_info::get_symtab(dwarf_query *q) +{ + systemtap_session &sess = q->sess; + + sym_table = new symbol_table(this); + if (!elf_path.empty()) + { + if (name == TOK_KERNEL && !sess.kernel_symtab_path.empty()) + cerr << "Warning: reading symbol table from " + << elf_path + << " -- ignoring " + << sess.kernel_symtab_path + << endl ;; + symtab_status = sym_table->get_from_elf(); + } + else + { + assert(name == TOK_KERNEL); + if (sess.kernel_symtab_path.empty()) + { + symtab_status = info_absent; + cerr << "Error: Cannot find vmlinux." + << " Consider using --kmap instead of --kelf." + << endl;; + } + else + { + symtab_status = + sym_table->read_from_text_file(sess.kernel_symtab_path); + if (symtab_status == info_present) + { + sess.sym_kprobes_text_start = + sym_table->lookup_symbol_address("__kprobes_text_start"); + sess.sym_kprobes_text_end = + sym_table->lookup_symbol_address("__kprobes_text_end"); + sess.sym_stext = sym_table->lookup_symbol_address("_stext"); + bias = sym_table->lookup_symbol_address("_text"); + } + } + } + if (symtab_status == info_absent) + { + delete sym_table; + sym_table = NULL; + return; + } + + // If we have dwarf for the same module, mark the redundant symtab + // entries. + // + // In dwarf_query::handle_query_module(), the call to query_module_dwarf() + // precedes the call to query_module_symtab(). So we should never read + // a module's symbol table without first having tried to get its dwarf. + sym_table->mark_dwarf_redundancies(&q->dw); +} + +module_info::~module_info() +{ + if (sym_table) + delete sym_table; } |