summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog13
-rw-r--r--hash.cxx15
-rw-r--r--main.cxx71
-rw-r--r--session.h6
-rw-r--r--stap.1.in76
-rw-r--r--tapsets.cxx915
6 files changed, 960 insertions, 136 deletions
diff --git a/ChangeLog b/ChangeLog
index e35d90b6..5c7f5135 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2008-04-18 Jim Keniston <jkenisto@us.ibm.com>
+
+ PR 4311 - Function boundary tracing without debuginfo: Phase I
+ * tapsets.cxx: Major rework of dwflpp, dwarf_query, and related
+ code to make do with elf info if dwarf info is absent, or
+ (in the case of vmlinux) make do with a System.map-style
+ symbol table if even the elf file is absent.
+ * main.cxx: Use getopt_long instead of getopt. Added --kelf,
+ --kmap, --ignore-vmlinux, and --ignore-dwarf.
+ * hash.cxx, session.h, stap.1.in: Added --kelf, --kmap,
+ --ignore-vmlinux, and --ignore-dwarf.
+ * testsuite/{semok,semko}/nodwf*.stp
+
2008-04-04 Masami Hiramatsu <mhiramat@redhat.com>
PR 6028
diff --git a/hash.cxx b/hash.cxx
index d05be268..c41600d2 100644
--- a/hash.cxx
+++ b/hash.cxx
@@ -97,6 +97,21 @@ find_hash (systemtap_session& s, const string& script)
h.add(s.merge); // '-M'
h.add(s.timing); // '-t'
h.add(s.prologue_searching); // '-P'
+ h.add(s.ignore_vmlinux); // --ignore-vmlinux
+ h.add(s.ignore_dwarf); // --ignore-dwarf
+ h.add(s.consult_symtab); // --kelf, --kmap
+ if (!s.kernel_symtab_path.empty()) // --kmap
+ {
+ h.add(s.kernel_symtab_path);
+ if (stat(s.kernel_symtab_path.c_str(), &st) == 0)
+ {
+ // NB: stat of /proc/kallsyms always returns size=0, mtime=now...
+ // which is a good reason to use the default /boot/System.map-2.6.xx
+ // instead.
+ h.add(st.st_size);
+ h.add(st.st_mtime);
+ }
+ }
for (unsigned i = 0; i < s.macros.size(); i++)
h.add(s.macros[i]);
diff --git a/main.cxx b/main.cxx
index 4364f664..f547d13d 100644
--- a/main.cxx
+++ b/main.cxx
@@ -36,6 +36,7 @@ extern "C" {
#include <sys/stat.h>
#include <time.h>
#include <elfutils/libdwfl.h>
+#include <getopt.h>
}
using namespace std;
@@ -103,8 +104,15 @@ usage (systemtap_session& s, int exitcode)
<< " -x PID sets target() to PID" << endl
<< " -t collect probe timing information" << endl
#ifdef HAVE_LIBSQLITE3
- << " -q generate information on tapset coverage"
+ << " -q generate information on tapset coverage" << endl
#endif /* HAVE_LIBSQLITE3 */
+ << " --kelf make do with symbol table from vmlinux" << endl
+ << " --kmap[=FILE]" << endl
+ << " make do with symbol table from nm listing" << endl
+ << " --ignore-vmlinux" << endl
+ << " for testing, pretend vmlinux can't be found" << endl
+ << " --ignore-dwarf" << endl
+ << " for testing, pretend vmlinux and modules lack debug info"
<< endl
;
// -d: dump safety-related external references
@@ -244,6 +252,9 @@ main (int argc, char * const argv [])
s.use_cache = true;
s.tapset_compile_coverage = false;
s.need_uprobes = false;
+ s.consult_symtab = false;
+ s.ignore_vmlinux = false;
+ s.ignore_dwarf = false;
const char* s_p = getenv ("SYSTEMTAP_TAPSET");
if (s_p != NULL)
@@ -294,8 +305,21 @@ main (int argc, char * const argv [])
while (true)
{
+ int long_opt;
+#define LONG_OPT_KELF 1
+#define LONG_OPT_KMAP 2
+#define LONG_OPT_IGNORE_VMLINUX 3
+#define LONG_OPT_IGNORE_DWARF 4
// NB: also see find_hash(), usage(), switch stmt below, stap.1 man page
- int grc = getopt (argc, argv, "hVMvtp:I:e:o:R:r:m:kgPc:x:D:bs:uqw");
+ static struct option long_options[] = {
+ { "kelf", 0, &long_opt, LONG_OPT_KELF },
+ { "kmap", 2, &long_opt, LONG_OPT_KMAP },
+ { "ignore-vmlinux", 0, &long_opt, LONG_OPT_IGNORE_VMLINUX },
+ { "ignore-dwarf", 0, &long_opt, LONG_OPT_IGNORE_DWARF },
+ { NULL, 0, NULL, 0 }
+ };
+ int grc = getopt_long (argc, argv, "hVMvtp:I:e:o:R:r:m:kgPc:x:D:bs:uqw",
+ long_options, NULL);
if (grc < 0)
break;
switch (grc)
@@ -453,6 +477,37 @@ main (int argc, char * const argv [])
usage (s, 0);
break;
+ case 0:
+ switch (long_opt)
+ {
+ case LONG_OPT_KELF:
+ s.consult_symtab = true;
+ break;
+ case LONG_OPT_KMAP:
+ // Leave s.consult_symtab unset for now, to ease error checking.
+ if (!s.kernel_symtab_path.empty())
+ {
+ cerr << "You can't specify multiple --kmap options." << endl;
+ usage(s, 1);
+ }
+ if (optarg)
+ s.kernel_symtab_path = optarg;
+ else
+#define PATH_TBD string("__TBD__")
+ s.kernel_symtab_path = PATH_TBD;
+ break;
+ case LONG_OPT_IGNORE_VMLINUX:
+ s.ignore_vmlinux = true;
+ break;
+ case LONG_OPT_IGNORE_DWARF:
+ s.ignore_dwarf = true;
+ break;
+ default:
+ cerr << "Internal error parsing command arguments." << endl;
+ usage(s, 1);
+ }
+ break;
+
default:
usage (s, 1);
break;
@@ -477,6 +532,18 @@ main (int argc, char * const argv [])
usage (s, 1);
}
+ if (!s.kernel_symtab_path.empty())
+ {
+ if (s.consult_symtab)
+ {
+ cerr << "You can't specify --kelf and --kmap together." << endl;
+ usage (s, 1);
+ }
+ s.consult_symtab = true;
+ if (s.kernel_symtab_path == PATH_TBD)
+ s.kernel_symtab_path = string("/boot/System.map-") + s.kernel_release;
+ }
+
if (s.last_pass > 4 && release_changed)
{
if (s.verbose)
diff --git a/session.h b/session.h
index b06d9ecc..e44e5c98 100644
--- a/session.h
+++ b/session.h
@@ -104,6 +104,12 @@ struct systemtap_session
std::string cache_path;
std::string hash_path;
+ // dwarfless operation
+ bool consult_symtab;
+ std::string kernel_symtab_path;
+ bool ignore_vmlinux;
+ bool ignore_dwarf;
+
// temporary directory for module builds etc.
// hazardous - it is "rm -rf"'d at exit
std::string tmpdir;
diff --git a/stap.1.in b/stap.1.in
index acfc64c3..1b9fe466 100644
--- a/stap.1.in
+++ b/stap.1.in
@@ -153,6 +153,41 @@ Start the probes, run CMD, and exit when CMD finishes.
.BI \-x " PID"
Sets target() to PID. This allows scripts to be written that filter on
a specific process.
+.TP
+.B \-\-kelf
+For names and addresses of functions to probe,
+consult the symbol tables in the kernel and modules.
+This can be useful if your kernel and/or modules were compiled
+without debugging information, or the function you want to probe
+is in an assembly-language file built without debugging information.
+See the
+.B "MAKING DO WITH SYMBOL TABLES"
+section for more information.
+.TP
+.BI \-\-kmap [=FILE]
+For names and addresses of kernel functions to probe,
+consult the symbol table in the indicated text file.
+The default is /boot/System.map-VERSION.
+The contents of this file should be in the form of the default output from
+.IR nm (1).
+Only symbols of type T or t are used.
+If you specify /proc/kallsyms or some other file in that format,
+where lines for module symbols contain a fourth column,
+reading of the symbol table stops with the first module symbol
+(which should be right after the last kernel symbol).
+As with
+.BR \-\-kelf ,
+the symbol table in each module's .ko file will also be consulted.
+See the
+.B "MAKING DO WITH SYMBOL TABLES"
+section for more information.
+.TP
+.B \-\-ignore\-vmlinux
+For testing, act as though neither the uncompressed kernel (vmlinux)
+nor the kernel debugging information can be found.
+.TP
+.B \-\-ignore\-dwarf
+For testing, act as though vmlinux and modules lack debugging information.
.SH ARGUMENTS
@@ -916,6 +951,47 @@ have overloaded the system and an exit is triggered.
By default, overload processing is turned on for all modules. If you
would like to disable overload processing, define STP_NO_OVERLOAD.
+.SH MAKING DO WITH SYMBOL TABLES
+Systemtap performs best when it has access to the debugging information
+associated with your kernel and modules.
+However, if this information is not available,
+systemtap can still support probing of function entries and returns
+using symbols read from vmlinux and/or the modules in /lib/modules.
+Systemtap can also read the kernel symbol table from a text file
+such as /boot/System.map or /proc/kallsyms.
+See the
+.B \-\--kelf
+and
+.B \-\--kmap
+options.
+.PP
+If systemtap finds relevant debugging information,
+it will use it even if you specify
+.B \-\--kelf
+or
+.BR \-\--kmap .
+.PP
+Without debugging information, systemtap cannot support the
+following types of language constructs:
+.IP \(bu 4
+probe specifications that refer to source files or line numbers
+.IP \(bu 4
+probe specifications that refer to inline functions
+.IP \(bu 4
+statements that refer to $target variables
+.IP \(bu 4
+tapset-defined variables defined using any of the above constructs.
+In particular, at this writing,
+the prologue blocks for certain aliases in the syscall tapset
+(e.g., syscall.open) contain "if" statements that refer to $target variables.
+If your script refers to any such aliases,
+systemtap must have access to the kernel's debugging information.
+.PP
+Most T and t symbols correspond to function entry points, but some do not.
+Based only on the symbol table, systemtap cannot tell the difference.
+Placing return probes on symbols that aren't entry points
+will most likely lead to kernel stack corruption.
+
.SH FILES
.\" consider autoconf-substituting these directories
.TP
diff --git a/tapsets.cxx b/tapsets.cxx
index ceda9015..af7e5590 100644
--- a/tapsets.cxx
+++ b/tapsets.cxx
@@ -40,6 +40,7 @@ extern "C" {
#include <regex.h>
#include <glob.h>
#include <fnmatch.h>
+#include <stdio.h>
#include "loc2c.h"
#define __STDC_FORMAT_MACROS
@@ -434,12 +435,27 @@ static string TOK_STATEMENT("statement");
static string TOK_ABSOLUTE("absolute");
static string TOK_PROCESS("process");
+// Can we handle this query with just symbol-table info?
+enum dbinfo_reqt
+{
+ dbr_unknown,
+ dbr_none, // kernel.statement(NUM).absolute
+ dbr_need_symtab, // can get by with symbol table if there's no dwarf
+ dbr_need_dwarf
+};
+
+enum info_status
+{
+ info_unknown,
+ info_present,
+ info_absent
+};
struct
func_info
{
func_info()
- : decl_file(NULL), decl_line(-1), prologue_end(0)
+ : decl_file(NULL), decl_line(-1), addr(0), prologue_end(0)
{
memset(&die, 0, sizeof(die));
}
@@ -447,6 +463,7 @@ func_info
char const * decl_file;
int decl_line;
Dwarf_Die die;
+ Dwarf_Addr addr;
Dwarf_Addr prologue_end;
};
@@ -465,6 +482,78 @@ inline_instance_info
};
+struct dwarf_query; // forward decls
+struct dwflpp;
+struct symbol_table;
+
+struct
+module_info
+{
+ Dwfl_Module* mod;
+ const char* name;
+ string elf_path;
+ Dwarf_Addr addr;
+ Dwarf_Addr bias;
+ symbol_table *sym_table;
+ info_status dwarf_status; // module has dwarf info?
+ info_status symtab_status; // symbol table cached?
+
+ void get_symtab(dwarf_query *q);
+
+ module_info(const char *name) :
+ mod(NULL),
+ name(name),
+ addr(0),
+ bias(0),
+ sym_table(NULL),
+ dwarf_status(info_unknown),
+ symtab_status(info_unknown)
+ {}
+
+ ~module_info();
+};
+
+struct
+module_cache
+{
+ map<string, module_info*> cache;
+ bool paths_collected;
+ bool dwarf_collected;
+
+ module_cache() : paths_collected(false), dwarf_collected(false) {}
+};
+typedef struct module_cache module_cache_t;
+
+typedef map<string, vector<Dwarf_Die>*> cu_function_cache_t;
+
+struct
+symbol_table
+{
+ module_info *mod_info; // associated module
+ map<string, func_info*> map_by_name;
+ vector<func_info*> list_by_addr;
+
+ void add_symbol(const char *name, Dwarf_Addr addr, Dwarf_Addr *high_addr);
+ enum info_status read_symbols(FILE *f, const string& path);
+ enum info_status read_from_elf_file(const string& path);
+ enum info_status read_from_text_file(const string& path);
+ enum info_status get_from_elf();
+ void mark_dwarf_redundancies(dwflpp *dw);
+ func_info *lookup_symbol(const string& name);
+ Dwarf_Addr lookup_symbol_address(const string& name);
+ func_info *get_func_containing_address(Dwarf_Addr addr);
+ int get_index_for_address(Dwarf_Addr addr);
+
+ symbol_table(module_info *mi) : mod_info(mi) {}
+ ~symbol_table();
+};
+
+static bool null_die(Dwarf_Die *die)
+{
+ static Dwarf_Die null = { 0 };
+ return (!die || !memcmp(die, &null, sizeof(null)));
+}
+
static int
query_cu (Dwarf_Die * cudie, void * arg);
@@ -479,9 +568,6 @@ dwarf_diename_integrate (Dwarf_Die *die)
return dwarf_formstring (dwarf_attr_integrate (die, DW_AT_name, &attr_mem));
}
-
-struct dwarf_query; // forward decl
-
struct dwflpp
{
systemtap_session & sess;
@@ -491,6 +577,7 @@ struct dwflpp
Dwfl_Module * module;
Dwarf * module_dwarf;
Dwarf_Addr module_bias;
+ module_info * mod_info;
// These describe the current module's PC address range
Dwarf_Addr module_start;
@@ -512,40 +599,55 @@ struct dwflpp
}
- void get_module_dwarf(bool required = false)
+ void get_module_dwarf(bool required = false, bool report = true)
{
- if (!module_dwarf)
- module_dwarf = dwfl_module_getdwarf(module, &module_bias);
-
- if (!module_dwarf)
+ if (!module_dwarf && mod_info->dwarf_status != info_absent)
{
- string msg = "cannot find ";
- if (module_name == "")
- msg += "kernel";
- else
- msg += string("module ") + module_name;
- msg += " debuginfo";
-
- int i = dwfl_errno();
- if (i)
- msg += string(": ") + dwfl_errmsg (i);
+ if (!sess.ignore_dwarf)
+ module_dwarf = dwfl_module_getdwarf(module, &module_bias);
+ mod_info->dwarf_status = (module_dwarf ? info_present : info_absent);
+ }
- if (required)
- throw semantic_error (msg);
- else
- cerr << "WARNING: " << msg << "\n";
+ if (!module_dwarf && report)
+ {
+ string msg = "cannot find ";
+ if (module_name == "")
+ msg += "kernel";
+ else
+ msg += string("module ") + module_name;
+ msg += " debuginfo";
+
+ int i = dwfl_errno();
+ if (i)
+ msg += string(": ") + dwfl_errmsg (i);
+
+ if (required)
+ throw semantic_error (msg);
+ else
+ cerr << "WARNING: " << msg << "\n";
}
}
- void focus_on_module(Dwfl_Module * m)
+ void focus_on_module(Dwfl_Module * m, module_info * mi)
{
- assert(m);
module = m;
- module_name = default_name(dwfl_module_info(module, NULL,
+ mod_info = mi;
+ if (m)
+ {
+ module_name = default_name(dwfl_module_info(module, NULL,
&module_start, &module_end,
NULL, NULL,
NULL, NULL),
"module");
+ }
+ else
+ {
+ assert(mi && mi->name && mi->name == TOK_KERNEL);
+ module_name = mi->name;
+ module_start = 0;
+ module_end = 0;
+ module_bias = mi->bias;
+ }
// Reset existing pointers and names
@@ -591,7 +693,7 @@ struct dwflpp
cu = NULL;
Dwfl_Module* mod = dwfl_addrmodule(dwfl, a);
if (mod) // address could be wildly out of range
- focus_on_module(mod);
+ focus_on_module(mod, NULL);
}
@@ -634,7 +736,6 @@ struct dwflpp
bool module_name_matches(string pattern)
{
- assert(module);
bool t = (fnmatch(pattern.c_str(), module_name.c_str(), 0) == 0);
if (t && sess.verbose>3)
clog << "pattern '" << pattern << "' "
@@ -642,27 +743,35 @@ struct dwflpp
<< "module '" << module_name << "'" << "\n";
return t;
}
+ bool name_has_wildcard(string pattern)
+ {
+ return (pattern.find('*') != string::npos ||
+ pattern.find('?') != string::npos ||
+ pattern.find('[') != string::npos);
+ }
bool module_name_final_match(string pattern)
{
// Assume module_name_matches(). Can there be any more matches?
// Not unless the pattern is a wildcard, since module names are
// presumed unique.
- return (pattern.find('*') == string::npos &&
- pattern.find('?') == string::npos &&
- pattern.find('[') == string::npos);
+ return !name_has_wildcard(pattern);
}
- bool function_name_matches(string pattern)
+ bool function_name_matches_pattern(string name, string pattern)
{
- assert(function);
- bool t = (fnmatch(pattern.c_str(), function_name.c_str(), 0) == 0);
+ bool t = (fnmatch(pattern.c_str(), name.c_str(), 0) == 0);
if (t && sess.verbose>3)
clog << "pattern '" << pattern << "' "
<< "matches "
- << "function '" << function_name << "'" << "\n";
+ << "function '" << name << "'" << "\n";
return t;
}
+ bool function_name_matches(string pattern)
+ {
+ assert(function);
+ return function_name_matches_pattern(function_name, pattern);
+ }
bool function_name_final_match(string pattern)
{
return module_name_final_match (pattern);
@@ -704,20 +813,57 @@ struct dwflpp
throw semantic_error (msg);
}
+ // static so pathname_caching_callback() can access them
+ static module_cache_t module_cache;
+ static bool ignore_vmlinux;
+
- dwflpp(systemtap_session & sess)
+ dwflpp(systemtap_session & session)
:
- sess(sess),
+ sess(session),
dwfl(NULL),
module(NULL),
module_dwarf(NULL),
module_bias(0),
+ mod_info(NULL),
module_start(0),
module_end(0),
cu(NULL),
function(NULL)
- {}
+ {
+ ignore_vmlinux = sess.ignore_vmlinux;
+ }
+
+ // Called by dwfl_linux_kernel_report_offline(). We may not have
+ // dwarf info for the kernel and/or modules, so remember this
+ // module's pathname in case we need to extract elf info from it.
+ // (Currently, we get all the elf info we need via elfutils -- if the
+ // elf file exists -- so remembering the pathname isn't strictly needed.
+ // But we still need to handle the case where there's no vmlinux.)
+ static int pathname_caching_callback(const char *name, const char *path)
+ {
+ module_info *mi = new module_info(name);
+ module_cache.cache[name] = mi;
+ if (ignore_vmlinux && path && name == TOK_KERNEL)
+ {
+ // report_kernel() in elfutils found vmlinux, but pretend it didn't.
+ // Given a non-null path, returning 1 means keep reporting modules.
+ mi->dwarf_status = info_absent;
+ return 1;
+ }
+ else if (path)
+ {
+ mi->elf_path = path;
+ return 1;
+ }
+
+ // No vmlinux. Here returning 0 to report_kernel() means go ahead
+ // and keep reporting modules.
+ assert(name == TOK_KERNEL);
+ mi->dwarf_status = info_absent;
+ return 0;
+ }
void setup(bool kernel, bool debuginfo_needed = true)
{
@@ -751,10 +897,18 @@ struct dwflpp
throw semantic_error ("cannot open dwfl");
dwfl_report_begin (dwfl);
+ int (*callback)(const char *name, const char *path);
+ if (sess.consult_symtab && !module_cache.paths_collected)
+ {
+ callback = pathname_caching_callback;
+ module_cache.paths_collected = true;
+ }
+ else
+ callback = NULL;
int rc = dwfl_linux_kernel_report_offline (dwfl,
sess.kernel_release.c_str(),
/* selection predicate */
- NULL);
+ callback);
if (debuginfo_needed)
dwfl_assert (string("missing kernel ") +
sess.kernel_release +
@@ -796,36 +950,34 @@ struct dwflpp
// -----------------------------------------------------------------
- struct module_cache_entry {
- Dwfl_Module* mod;
- const char* name;
- Dwarf_Addr addr;
- };
- typedef vector<module_cache_entry> module_cache_t;
- module_cache_t module_cache;
-
static int module_caching_callback(Dwfl_Module * mod,
void **,
const char *name,
Dwarf_Addr addr,
void *param)
{
- module_cache_t* cache = static_cast<module_cache_t*>(param);
- module_cache_entry it;
- it.mod = mod;
- it.name = name;
- it.addr = addr;
- cache->push_back (it);
+ module_cache_t *cache = static_cast<module_cache_t*>(param);
+ module_info *mi = NULL;
+
+ if (ignore_vmlinux && name == TOK_KERNEL)
+ // This wouldn't be called for vmlinux if vmlinux weren't there.
+ return DWARF_CB_OK;
+
+ if (cache->paths_collected)
+ mi = cache->cache[name];
+ if (!mi)
+ {
+ mi = new module_info(name);
+ cache->cache[name] = mi;
+ }
+ mi->mod = mod;
+ mi->addr = addr;
return DWARF_CB_OK;
}
-
- void iterate_over_modules(int (* callback)(Dwfl_Module *, void **,
- const char *, Dwarf_Addr,
- void *),
- void * data)
+ void cache_modules_dwarf()
{
- if (module_cache.empty())
+ if (!module_cache.dwarf_collected)
{
ptrdiff_t off = 0;
do
@@ -836,18 +988,29 @@ struct dwflpp
}
while (off > 0);
dwfl_assert("dwfl_getmodules", off);
+ module_cache.dwarf_collected = true;
}
+ }
- // Traverse the cache.
- for (unsigned i = 0; i < module_cache.size(); i++)
+ void iterate_over_modules(int (* callback)(Dwfl_Module *, module_info *,
+ const char *, Dwarf_Addr,
+ void *),
+ void * data)
+ {
+ cache_modules_dwarf();
+
+ map<string, module_info*>::iterator i;
+ for (i = module_cache.cache.begin(); i != module_cache.cache.end(); i++)
{
if (pending_interrupts) return;
- module_cache_entry& it = module_cache[i];
- int rc = callback (it.mod, 0, it.name, it.addr, data);
+ module_info *mi = i->second;
+ int rc = callback (mi->mod, mi, mi->name, mi->addr, data);
if (rc != DWARF_CB_OK) break;
}
}
+ // Defined after dwarf_query
+ void query_modules(dwarf_query *q);
// -----------------------------------------------------------------
@@ -937,7 +1100,6 @@ struct dwflpp
// -----------------------------------------------------------------
- typedef map<string, vector<Dwarf_Die>*> cu_function_cache_t;
cu_function_cache_t cu_function_cache;
static int cu_function_caching_callback (Dwarf_Die* func, void *arg)
@@ -947,9 +1109,10 @@ struct dwflpp
return DWARF_CB_OK;
}
- void iterate_over_functions (int (* callback)(Dwarf_Die * func, void * arg),
+ int iterate_over_functions (int (* callback)(Dwarf_Die * func, void * arg),
void * data)
{
+ int rc = DWARF_CB_OK;
assert (module);
assert (cu);
@@ -965,9 +1128,10 @@ struct dwflpp
for (unsigned i=0; i<v->size(); i++)
{
Dwarf_Die die = v->at(i);
- int rc = (*callback)(& die, data);
+ rc = (*callback)(& die, data);
if (rc != DWARF_CB_OK) break;
}
+ return rc;
}
@@ -2008,6 +2172,9 @@ struct dwflpp
}
};
+module_cache_t dwflpp::module_cache;
+bool dwflpp::ignore_vmlinux = false;
+
enum
function_spec_type
@@ -2019,7 +2186,6 @@ function_spec_type
struct dwarf_builder;
-struct dwarf_query;
// XXX: This class is a candidate for subclassing to separate
@@ -2177,6 +2343,8 @@ struct dwarf_query : public base_query
vector<derived_probe *> & results);
virtual void handle_query_module();
+ void query_module_dwarf();
+ void query_module_symtab();
void add_probe_point(string const & funcname,
char const * filename,
@@ -2224,11 +2392,15 @@ struct dwarf_query : public base_query
bool has_absolute;
+ enum dbinfo_reqt dbinfo_reqt;
+ enum dbinfo_reqt assess_dbinfo_reqt();
+
function_spec_type parse_function_spec(string & spec);
function_spec_type spec_type;
string function;
string file;
int line;
+ bool query_done; // Found exact match
set<char const *> filtered_srcfiles;
@@ -2365,11 +2537,13 @@ dwarf_query::dwarf_query(systemtap_session & sess,
spec_type = parse_function_spec(statement_str_val);
build_blacklist(); // XXX: why not reuse amongst dwarf_query instances?
+ dbinfo_reqt = assess_dbinfo_reqt();
+ query_done = false;
}
void
-dwarf_query::handle_query_module()
+dwarf_query::query_module_dwarf()
{
if (has_function_num || has_statement_num)
{
@@ -2398,6 +2572,110 @@ dwarf_query::handle_query_module()
}
}
+static void query_func_info (Dwarf_Addr entrypc, func_info & fi,
+ dwarf_query * q);
+
+void
+dwarf_query::query_module_symtab()
+{
+ // Get the symbol table if it's necessary, sufficient, and not already got.
+ if (dbinfo_reqt == dbr_need_dwarf)
+ return;
+
+ module_info *mi = dw.mod_info;
+ if (dbinfo_reqt == dbr_need_symtab)
+ {
+ if (mi->symtab_status == info_unknown)
+ mi->get_symtab(this);
+ if (mi->symtab_status == info_absent)
+ return;
+ }
+
+ func_info *fi = NULL;
+ symbol_table *sym_table = mi->sym_table;
+
+ if (has_function_str)
+ {
+ // Per dwarf_query::assess_dbinfo_reqt()...
+ assert(spec_type == function_alone);
+ if (dw.name_has_wildcard(function_str_val))
+ {
+ // Until we augment the blacklist sufficently...
+ if (function_str_val.find_first_not_of("*?") == string::npos)
+ {
+ // e.g., kernel.function("*")
+ cerr << "Error: Pattern '"
+ << function_str_val
+ << "' matches every instruction address in the symbol table,"
+ << endl
+ << "some of which aren't even functions."
+ << " Please be more precise."
+ << endl;
+ return;
+ }
+
+ size_t i;
+ size_t nsyms = sym_table->list_by_addr.size();
+ for (i = 0; i < nsyms; i++)
+ {
+ fi = sym_table->list_by_addr.at(i);
+ if (!null_die(&fi->die))
+ continue; // already handled in query_module_dwarf()
+ if (dw.function_name_matches_pattern(fi->name, function_str_val))
+ query_func_info(fi->addr, *fi, this);
+ }
+ }
+ else
+ {
+ fi = sym_table->lookup_symbol(function_str_val);
+ if (fi && null_die(&fi->die))
+ query_func_info(fi->addr, *fi, this);
+ }
+ }
+ else
+ {
+ assert(has_function_num || has_statement_num);
+ // Find the "function" in which the indicated address resides.
+ Dwarf_Addr addr =
+ (has_function_num ? function_num_val : statement_num_val);
+ fi = sym_table->get_func_containing_address(addr);
+ if (!fi)
+ {
+ cerr << "Warning: address "
+ << hex << addr << dec
+ << " out of range for module "
+ << dw.module_name;
+ return;
+ }
+ if (!null_die(&fi->die))
+ {
+ // addr looks like it's in the compilation unit containing
+ // the indicated function, but query_module_dwarf() didn't
+ // match addr to any compilation unit, so addr must be
+ // above that cu's address range.
+ cerr << "Warning: address "
+ << hex << addr << dec
+ << " maps to no known compilation unit in module "
+ << dw.module_name;
+ return;
+ }
+ query_func_info(fi->addr, *fi, this);
+ }
+}
+
+void
+dwarf_query::handle_query_module()
+{
+ dw.get_module_dwarf(false,
+ (dbinfo_reqt == dbr_need_dwarf || !sess.consult_symtab));
+ if (dw.mod_info->dwarf_status == info_present)
+ query_module_dwarf();
+ // Consult the symbol table if we haven't found all we're looking for.
+ // asm functions can show up in the symbol table but not in dwarf.
+ if (sess.consult_symtab && !query_done)
+ query_module_symtab();
+}
+
void
dwarf_query::build_blacklist()
@@ -2578,7 +2856,7 @@ dwarf_query::parse_function_spec(string & spec)
// Forward declaration.
-static int query_kernel_module (Dwfl_Module *, void **, const char *,
+static int query_kernel_module (Dwfl_Module *, module_info *, const char *,
Dwarf_Addr, void *);
@@ -2696,7 +2974,13 @@ dwarf_query::add_probe_point(const string& funcname,
assert (! has_absolute); // already handled in dwarf_builder::build()
- if (dwfl_module_relocations (dw.module) > 0)
+ if (!dw.module)
+ {
+ assert(module == TOK_KERNEL);
+ reloc_section = "";
+ blacklist_section = "";
+ }
+ else if (dwfl_module_relocations (dw.module) > 0)
{
// This is arelocatable module; libdwfl already knows its
// sections, so we can relativize addr.
@@ -2746,6 +3030,44 @@ dwarf_query::add_probe_point(const string& funcname,
}
}
+enum dbinfo_reqt
+dwarf_query::assess_dbinfo_reqt()
+{
+ if (has_absolute)
+ {
+ // kernel.statement(NUM).absolute
+ return dbr_none;
+ }
+ if (has_inline)
+ {
+ // kernel.function("f").inline or module("m").function("f").inline
+ return dbr_need_dwarf;
+ }
+ if (has_function_str && spec_type == function_alone)
+ {
+ // kernel.function("f") or module("m").function("f")
+ return dbr_need_symtab;
+ }
+ if (has_statement_num)
+ {
+ // kernel.statement(NUM) or module("m").statement(NUM)
+ // Technically, all we need is the module offset (or _stext, for
+ // the kernel). But for that we need either the ELF file or (for
+ // _stext) the symbol table. In either case, the symbol table
+ // is available, and that allows us to map the NUM (address)
+ // to a function, which is goodness.
+ return dbr_need_symtab;
+ }
+ if (has_function_num)
+ {
+ // kernel.function(NUM) or module("m").function(NUM)
+ // Need the symbol table so we can back up from NUM to the
+ // start of the function.
+ return dbr_need_symtab;
+ }
+ // Symbol table tells us nothing about source files or line numbers.
+ return dbr_need_dwarf;
+}
@@ -3078,7 +3400,9 @@ query_cu (Dwarf_Die * cudie, void * arg)
// Pick up [entrypc, name, DIE] tuples for all the functions
// matching the query, and fill in the prologue endings of them
// all in a single pass.
- q->dw.iterate_over_functions (query_dwarf_func, q);
+ int rc = q->dw.iterate_over_functions (query_dwarf_func, q);
+ if (rc != DWARF_CB_OK)
+ q->query_done = true;
if (q->sess.prologue_searching
&& !q->has_statement_str && !q->has_statement_num) // PR 2608
@@ -3139,7 +3463,7 @@ query_cu (Dwarf_Die * cudie, void * arg)
static int
query_kernel_module (Dwfl_Module *mod,
- void **,
+ module_info *,
const char *name,
Dwarf_Addr,
void *arg)
@@ -3154,10 +3478,74 @@ query_kernel_module (Dwfl_Module *mod,
return DWARF_CB_OK;
}
+static void
+validate_module_elf (Dwfl_Module *mod, const char *name, base_query *q)
+{
+ // Validate the machine code in this elf file against the
+ // session machine. This is important, in case the wrong kind
+ // of debuginfo is being automagically processed by elfutils.
+ // While we can tell i686 apart from x86-64, unfortunately
+ // we can't help confusing i586 vs i686 (both EM_386).
+
+ Dwarf_Addr bias;
+ // We prefer dwfl_module_getdwarf to dwfl_module_getelf here,
+ // because dwfl_module_getelf can force costly section relocations
+ // we don't really need, while either will do for this purpose.
+ Elf* elf = (dwarf_getelf (dwfl_module_getdwarf (mod, &bias))
+ ?: dwfl_module_getelf (mod, &bias));
+
+ GElf_Ehdr ehdr_mem;
+ GElf_Ehdr* em = gelf_getehdr (elf, &ehdr_mem);
+ if (em == 0) { q->dw.dwfl_assert ("dwfl_getehdr", dwfl_errno()); }
+ int elf_machine = em->e_machine;
+ const char* debug_filename = "";
+ const char* main_filename = "";
+ (void) dwfl_module_info (mod, NULL, NULL,
+ NULL, NULL, NULL,
+ & main_filename,
+ & debug_filename);
+ const string& sess_machine = q->sess.architecture;
+ string expect_machine;
+
+ switch (elf_machine)
+ {
+ case EM_386: expect_machine = "i?86"; break; // accept e.g. i586
+ case EM_X86_64: expect_machine = "x86_64"; break;
+ case EM_PPC: expect_machine = "ppc"; break;
+ case EM_PPC64: expect_machine = "ppc64"; break;
+ case EM_S390: expect_machine = "s390x"; break;
+ case EM_IA_64: expect_machine = "ia64"; break;
+ case EM_ARM: expect_machine = "armv*"; break;
+ // XXX: fill in some more of these
+ default: expect_machine = "?"; break;
+ }
+
+ if (! debug_filename) debug_filename = main_filename;
+ if (! debug_filename) debug_filename = name;
+
+ if (fnmatch (expect_machine.c_str(), sess_machine.c_str(), 0) != 0)
+ {
+ stringstream msg;
+ msg << "ELF machine " << expect_machine << " (code " << elf_machine
+ << ") mismatch with target " << sess_machine
+ << " in '" << debug_filename << "'";
+ throw semantic_error(msg.str ());
+ }
+
+ if (q->sess.verbose>2)
+ clog << "focused on module '" << q->dw.module_name
+ << " = [0x" << hex << q->dw.module_start
+ << "-0x" << q->dw.module_end
+ << ", bias 0x" << q->dw.module_bias << "]" << dec
+ << " file " << debug_filename
+ << " ELF machine " << expect_machine
+ << " (code " << elf_machine << ")"
+ << "\n";
+}
static int
query_module (Dwfl_Module *mod,
- void **,
+ module_info *mi,
const char *name,
Dwarf_Addr,
void *arg)
@@ -3166,7 +3554,7 @@ query_module (Dwfl_Module *mod,
try
{
- q->dw.focus_on_module(mod);
+ q->dw.focus_on_module(mod, mi);
// If we have enough information in the pattern to skip a module and
// the module does not match that information, return early.
@@ -3179,67 +3567,15 @@ query_module (Dwfl_Module *mod,
if (q->dw.module_name == TOK_KERNEL && ! q->has_kernel)
return DWARF_CB_OK;
- // Validate the machine code in this elf file against the
- // session machine. This is important, in case the wrong kind
- // of debuginfo is being automagically processed by elfutils.
- // While we can tell i686 apart from x86-64, unfortunately
- // we can't help confusing i586 vs i686 (both EM_386).
-
- Dwarf_Addr bias;
- // We prefer dwfl_module_getdwarf to dwfl_module_getelf here,
- // because dwfl_module_getelf can force costly section relocations
- // we don't really need, while either will do for this purpose.
- Elf* elf = (dwarf_getelf (dwfl_module_getdwarf (mod, &bias))
- ?: dwfl_module_getelf (mod, &bias));
-
- GElf_Ehdr ehdr_mem;
- GElf_Ehdr* em = gelf_getehdr (elf, &ehdr_mem);
- if (em == 0) { q->dw.dwfl_assert ("dwfl_getehdr", dwfl_errno()); }
- int elf_machine = em->e_machine;
- const char* debug_filename = "";
- const char* main_filename = "";
- (void) dwfl_module_info (mod, NULL, NULL,
- NULL, NULL, NULL,
- & main_filename,
- & debug_filename);
- const string& sess_machine = q->sess.architecture;
- string expect_machine;
-
- switch (elf_machine)
- {
- case EM_386: expect_machine = "i?86"; break; // accept e.g. i586
- case EM_X86_64: expect_machine = "x86_64"; break;
- case EM_PPC: expect_machine = "ppc"; break;
- case EM_PPC64: expect_machine = "ppc64"; break;
- case EM_S390: expect_machine = "s390x"; break;
- case EM_IA_64: expect_machine = "ia64"; break;
- case EM_ARM: expect_machine = "armv*"; break;
- // XXX: fill in some more of these
- default: expect_machine = "?"; break;
- }
-
- if (! debug_filename) debug_filename = main_filename;
- if (! debug_filename) debug_filename = name;
-
- if (fnmatch (expect_machine.c_str(), sess_machine.c_str(), 0) != 0)
+ if (mod)
+ validate_module_elf(mod, name, q);
+ else
{
- stringstream msg;
- msg << "ELF machine " << expect_machine << " (code " << elf_machine
- << ") mismatch with target " << sess_machine
- << " in '" << debug_filename << "'";
- throw semantic_error(msg.str ());
+ assert(q->has_kernel); // and no vmlinux to examine
+ if (q->sess.verbose>2)
+ cerr << "focused on module '" << q->dw.module_name << "'\n";
}
- if (q->sess.verbose>2)
- clog << "focused on module '" << q->dw.module_name
- << " = [0x" << hex << q->dw.module_start
- << "-0x" << q->dw.module_end
- << ", bias 0x" << q->dw.module_bias << "]" << dec
- << " file " << debug_filename
- << " ELF machine " << expect_machine
- << " (code " << elf_machine << ")"
- << "\n";
-
q->handle_query_module();
// If we know that there will be no more matches, abort early.
@@ -3255,6 +3591,24 @@ query_module (Dwfl_Module *mod,
}
}
+void
+dwflpp::query_modules(dwarf_query *q)
+{
+ string name = q->module_val;
+ if (name_has_wildcard(name))
+ iterate_over_modules(&query_module, q);
+ else
+ {
+ cache_modules_dwarf();
+
+ map<string, module_info*>::iterator i = module_cache.cache.find(name);
+ if (i != module_cache.cache.end())
+ {
+ module_info *mi = i->second;
+ query_module(mi->mod, mi, name.c_str(), mi->addr, q);
+ }
+ }
+}
struct var_expanding_copy_visitor: public deep_copy_visitor
{
@@ -3786,7 +4140,7 @@ dwarf_derived_probe::dwarf_derived_probe(const string& funcname,
q.base_loc->tok);
// Make a target-variable-expanded copy of the probe body
- if (scope_die)
+ if (!null_die(scope_die))
{
dwarf_var_expanding_copy_visitor v (q, scope_die, dwfl_addr);
require <block*> (&v, &(this->body), this->body);
@@ -4242,7 +4596,300 @@ dwarf_builder::build(systemtap_session & sess,
return;
}
- dw->iterate_over_modules(&query_module, &q);
+ // dw->iterate_over_modules(&query_module, &q);
+ dw->query_modules(&q);
+}
+
+symbol_table::~symbol_table()
+{
+ // map::clear() and vector::clear() don't call destructors for
+ // pointers, only for objects.
+ int i;
+ int nsym = (int) list_by_addr.size();
+ for (i = 0; i < nsym; i++)
+ delete list_by_addr.at(i);
+ list_by_addr.clear();
+ map_by_name.clear();
+}
+
+void
+symbol_table::add_symbol(const char *name, Dwarf_Addr addr,
+ Dwarf_Addr *high_addr)
+{
+ func_info *fi = new func_info();
+ fi->addr = addr;
+ fi->name = name;
+ map_by_name[fi->name] = fi;
+ // TODO: Use a multimap in case there are multiple static
+ // functions with the same name?
+ if (addr >= *high_addr)
+ {
+ list_by_addr.push_back(fi);
+ *high_addr = addr;
+ }
+ else
+ {
+ // Symbols aren't in numerical order. FWIW, sort(1) doesn't
+ // handle hex numbers without the leading 0x.
+ int index = get_index_for_address(fi->addr);
+ list_by_addr.insert(list_by_addr.begin()+(index+1), fi);
+ }
+}
+
+enum info_status
+symbol_table::read_symbols(FILE *f, const string& path)
+{
+ // Based on do_kernel_symbols() in runtime/staprun/symbols.c
+ int ret;
+ char *name, *mod;
+ char type;
+ unsigned long long addr;
+ Dwarf_Addr high_addr = 0;
+ int line = 0;
+
+ // %as (non-POSIX) mallocs space for the string and stores its address.
+ while ((ret = fscanf(f, "%llx %c %as [%as", &addr, &type, &name, &mod)) > 0)
+ {
+ line++;
+ if (ret < 3)
+ {
+ cerr << "Symbol table error: Line "
+ << line
+ << " of symbol list from "
+ << path
+ << " is not in correct format: address type name [module]";
+ // Caller should delete symbol_table object.
+ return info_absent;
+ }
+ if (ret > 3)
+ {
+ // Modules are loaded above the kernel, so if we're getting
+ // modules, we're done.
+ free(name);
+ free(mod);
+ goto done;
+ }
+ if (type == 'T' || type == 't')
+ add_symbol(name, (Dwarf_Addr) addr, &high_addr);
+ free(name);
+ }
+
+done:
+ if (list_by_addr.size() < 1)
+ {
+ cerr << "Symbol table error: "
+ << path << " contains no function symbols." << endl;
+ return info_absent;
+ }
+ return info_present;
+}
+
+// NB: This currently unused. We use get_from_elf() instead because
+// that gives us raw addresses -- which we need for modules -- whereas
+// nm provides the address relative to the beginning of the section.
+enum info_status
+symbol_table::read_from_elf_file(const string &path)
+{
+ FILE *f;
+ string cmd = string("/usr/bin/nm -n --defined-only ") + path;
+ f = popen(cmd.c_str(), "r");
+ if (!f)
+ {
+ // nm failures are detected by pclose, not popen.
+ cerr << "Internal error reading symbol table from "
+ << path << " -- " << strerror (errno);
+ return info_absent;
+ }
+ enum info_status status = read_symbols(f, path);
+ if (pclose(f) != 0)
+ {
+ if (status == info_present)
+ cerr << "Warning: nm cannot read symbol table from " << path;
+ return info_absent;
+ }
+ return status;
+}
+
+enum info_status
+symbol_table::read_from_text_file(const string& path)
+{
+ FILE *f = fopen(path.c_str(), "r");
+ if (!f)
+ {
+ cerr << "Warning: cannot read symbol table from "
+ << path << " -- " << strerror (errno);
+ return info_absent;
+ }
+ enum info_status status = read_symbols(f, path);
+ (void) fclose(f);
+ return status;
+}
+
+enum info_status
+symbol_table::get_from_elf()
+{
+ Dwarf_Addr high_addr = 0;
+ Dwfl_Module *mod = mod_info->mod;
+ int syments = dwfl_module_getsymtab(mod);
+ assert(syments);
+ for (int i = 1; i < syments; ++i)
+ {
+ GElf_Sym sym;
+ const char *name = dwfl_module_getsym(mod, i, &sym, NULL);
+ if (name && GELF_ST_TYPE(sym.st_info) == STT_FUNC)
+ add_symbol(name, sym.st_value, &high_addr);
+ }
+ return info_present;
+}
+
+void
+symbol_table::mark_dwarf_redundancies(dwflpp *dw)
+{
+ // dwflpp.cu_function_cache maps each module_name:cu_name to a
+ // vector of Dwarf_Dies, one per function.
+ string module_prefix = string(mod_info->name) + ":";
+
+ cu_function_cache_t::iterator cu;
+ for (cu = dw->cu_function_cache.begin();
+ cu != dw->cu_function_cache.end(); cu++)
+ {
+ string key = cu->first;
+ if (key.find(module_prefix) == 0)
+ {
+ // Found a compilation unit in the module of interest.
+ // Mark all its functions in the symbol table.
+ vector<Dwarf_Die>* v = cu->second;
+ assert(v);
+ for (unsigned f=0; f < v->size(); f++)
+ {
+ Dwarf_Die func = v->at(f);
+ string func_name = dwarf_diename(&func);
+ // map_by_name[func_name]->die = func;
+ map<string, func_info*>::iterator i = map_by_name.find(func_name);
+ // Func names can show up in the dwarf but not the symtab (!).
+ if (i != map_by_name.end())
+ {
+ func_info *fi = i->second;
+ fi->die = func;
+ }
+ }
+ }
+ }
+}
+
+func_info *
+symbol_table::get_func_containing_address(Dwarf_Addr addr)
+{
+ int index = get_index_for_address(addr);
+ if (index < 0)
+ return NULL;
+ return list_by_addr.at(index);
+}
+
+// Find the index in list_by_addr of the last element whose address
+// is <= addr. Returns -1 if addr is less than the first address in
+// list_by_addr.
+int
+symbol_table::get_index_for_address(Dwarf_Addr addr)
+{
+ // binary search from runtime/sym.c
+ int begin = 0;
+ int mid;
+ int end = list_by_addr.size();
+
+ if (end == 0 || addr < list_by_addr.at(0)->addr)
+ return -1;
+ do
+ {
+ mid = (begin + end) / 2;
+ if (addr < list_by_addr.at(mid)->addr)
+ end = mid;
+ else
+ begin = mid;
+ }
+ while (begin + 1 < end);
+ return begin;
+}
+
+func_info *
+symbol_table::lookup_symbol(const string& name)
+{
+ map<string, func_info*>::iterator i = map_by_name.find(name);
+ if (i == map_by_name.end())
+ return NULL;
+ return i->second;
+}
+
+Dwarf_Addr
+symbol_table::lookup_symbol_address(const string& name)
+{
+ func_info *fi = lookup_symbol(name);
+ if (fi)
+ return fi->addr;
+ return 0;
+}
+
+void
+module_info::get_symtab(dwarf_query *q)
+{
+ systemtap_session &sess = q->sess;
+
+ sym_table = new symbol_table(this);
+ if (!elf_path.empty())
+ {
+ if (name == TOK_KERNEL && !sess.kernel_symtab_path.empty())
+ cerr << "Warning: reading symbol table from "
+ << elf_path
+ << " -- ignoring "
+ << sess.kernel_symtab_path
+ << endl ;;
+ symtab_status = sym_table->get_from_elf();
+ }
+ else
+ {
+ assert(name == TOK_KERNEL);
+ if (sess.kernel_symtab_path.empty())
+ {
+ symtab_status = info_absent;
+ cerr << "Error: Cannot find vmlinux."
+ << " Consider using --kmap instead of --kelf."
+ << endl;;
+ }
+ else
+ {
+ symtab_status =
+ sym_table->read_from_text_file(sess.kernel_symtab_path);
+ if (symtab_status == info_present)
+ {
+ sess.sym_kprobes_text_start =
+ sym_table->lookup_symbol_address("__kprobes_text_start");
+ sess.sym_kprobes_text_end =
+ sym_table->lookup_symbol_address("__kprobes_text_end");
+ sess.sym_stext = sym_table->lookup_symbol_address("_stext");
+ bias = sym_table->lookup_symbol_address("_text");
+ }
+ }
+ }
+ if (symtab_status == info_absent)
+ {
+ delete sym_table;
+ sym_table = NULL;
+ return;
+ }
+
+ // If we have dwarf for the same module, mark the redundant symtab
+ // entries.
+ //
+ // In dwarf_query::handle_query_module(), the call to query_module_dwarf()
+ // precedes the call to query_module_symtab(). So we should never read
+ // a module's symbol table without first having tried to get its dwarf.
+ sym_table->mark_dwarf_redundancies(&q->dw);
+}
+
+module_info::~module_info()
+{
+ if (sym_table)
+ delete sym_table;
}