// tapset resolution // Copyright (C) 2005, 2006 Red Hat Inc. // Copyright (C) 2005, 2006 Intel Corporation. // // This file is part of systemtap, and is free software. You can // redistribute it and/or modify it under the terms of the GNU General // Public License (GPL); either version 2, or (at your option) any // later version. #include "config.h" #include "staptree.h" #include "elaborate.h" #include "tapsets.h" #include "translate.h" #include "session.h" #include #include #include #include #include #include #include #include #include extern "C" { #include #include #include #include #include #include #include #include #include "loc2c.h" #define __STDC_FORMAT_MACROS #include } #ifdef PERFMON #include #include #endif using namespace std; // XXX: should standardize to these functions throughout translator template inline OUT lex_cast(IN const & in) { stringstream ss; OUT out; if (!(ss << in && ss >> out)) throw runtime_error("bad lexical cast"); return out; } template inline OUT lex_cast_hex(IN const & in) { stringstream ss; OUT out; if (!(ss << hex << showbase << in && ss >> out)) throw runtime_error("bad lexical cast"); return out; } // return as quoted string, with at least '"' backslash-escaped template inline string lex_cast_qstring(IN const & in) { stringstream ss; string out, out2; if (!(ss << in)) throw runtime_error("bad lexical cast"); out = ss.str(); out2 += '"'; for (unsigned i=0; i basest_names; string nm = p->basest()->name; if (basest_names.find(nm) == basest_names.end()) { basest_names.insert (nm); o->newline() << "#ifdef STP_TIMING"; o->newline() << "{"; o->newline(1) << "const char *probe_point = " << lex_cast_qstring (*p->basest()->locations[0]) << ";"; o->newline() << "const char *decl_location = " << lex_cast_qstring (p->basest()->tok->location) << ";"; o->newline() << "struct stat_data *stats = _stp_stat_get (time_" << p->basest()->name << ", 0);"; o->newline() << "const char *error;"; o->newline() << "if (stats->count) {"; o->newline(1) << "int64_t avg = _stp_div64 (&error, stats->sum, stats->count);"; o->newline() << "_stp_printf (\"probe %s (%s), %lld hits taking %lldmin/%lldavg/%lldmax cycles.\\n\","; o->newline() << "probe_point, decl_location, (long long) stats->count, (long long) stats->min, (long long) avg, (long long) stats->max);"; o->newline() << "_stp_print_flush();"; o->newline(-1) << "}"; o->newline(-1) << "}"; o->newline() << "#endif"; } } // ------------------------------------------------------------------------ void derived_probe::emit_common_header (translator_output* o) { #ifdef PERFMON o->newline() << "static struct pfarg_ctx _pfm_context;"; o->newline() << "static void *_pfm_desc;"; o->newline() << "static struct pfarg_pmc *_pfm_pmc_x;"; o->newline() << "static int _pfm_num_pmc_x;"; o->newline() << "static struct pfarg_pmd *_pfm_pmd_x;"; o->newline() << "static int _pfm_num_pmd_x;"; #endif o->newline(); o->newline() << "static struct context* common_probe_prologue (int state) {"; o->newline(1); o->newline() << "struct context* c;"; o->newline() << "if (atomic_read (&session_state) != state)"; o->newline(1) << "return NULL;"; o->newline() << "c = per_cpu_ptr (contexts, smp_processor_id());"; o->newline(-1) << "if (unlikely (atomic_inc_return (&c->busy) != 1)) {"; o->newline(1) << "if (atomic_inc_return (& skipped_count) > MAXSKIPPED) {"; o->newline(1) << "atomic_set (& session_state, STAP_SESSION_ERROR);"; // NB: We don't assume that we can safely call stp_error etc. in such // a reentrant context. But this is OK: o->newline() << "_stp_exit ();"; o->newline(-1) << "}"; o->newline() << "atomic_dec (& c->busy);"; o->newline() << "return NULL;"; o->newline(-1) << "}"; o->newline(); o->newline() << "c->last_error = 0;"; o->newline() << "c->nesting = 0;"; o->newline() << "c->regs = 0;"; o->newline() << "c->actioncount = 0;"; o->newline() << "return c;"; o->newline(-1) << "}"; o->newline(); o->newline() << "static void common_probe_epilogue (struct context* c) {"; o->newline(1) ; o->newline() << "if (unlikely (c->last_error && c->last_error[0])) {"; o->newline(1) << "if (c->last_stmt != NULL)"; o->newline(1) << "_stp_softerror (\"%s near %s\", c->last_error, c->last_stmt);"; o->newline(-1) << "else"; o->newline(1) << "_stp_softerror (\"%s\", c->last_error);"; o->indent(-1); o->newline() << "atomic_inc (& error_count);"; o->newline() << "if (atomic_read (& error_count) > MAXERRORS) {"; o->newline(1) << "atomic_set (& session_state, STAP_SESSION_ERROR);"; o->newline() << "_stp_exit ();"; o->newline(-1) << "}"; o->newline(-1) << "}"; o->newline() << "atomic_dec (&c->busy);"; o->newline(-1) << "}"; } void derived_probe::emit_probe_prologue (translator_output* o, const std::string& statereq) { o->newline() << "struct context* c;"; o->newline() << "unsigned long flags;"; // NB: this cycles_t stuff might go into the common header block above o->newline() << "#ifdef STP_TIMING"; o->newline() << "cycles_t cycles_atstart;"; o->newline() << "#endif"; o->newline() << "local_irq_save (flags);"; o->newline() << "#ifdef STP_TIMING"; o->newline() << "cycles_atstart = get_cycles ();"; o->newline() << "#endif"; o->newline() << "c = common_probe_prologue (" << statereq << ");"; o->newline() << "if (c == NULL) goto probe_epilogue;"; o->newline() << "c->probe_point = probe_point;"; } void derived_probe::emit_probe_epilogue (translator_output* o) { o->newline() << "common_probe_epilogue (c);"; o->newline(-1) << "probe_epilogue:"; o->newline(1) << "#ifdef STP_TIMING"; o->newline() << "{"; o->newline(1) << "cycles_t cycles_atend = get_cycles ();"; // XXX: get_cycles() may return fewer significant digits than // cycles_t can carry. On some machines, cycles_t is 64 bits wide // but get_cycles() is only 52. So we should investigate truncating // these get_cycles() return values to some reasonable smaller // number of bits, perhaps 32 or even 24. o->newline() << "int64_t cycles_elapsed = (cycles_atend > cycles_atstart)"; o->newline(1) << "? (int64_t) (cycles_atend - cycles_atstart)"; o->newline() << ": (int64_t) (~(cycles_t)0) - cycles_atstart + cycles_atend + 1;"; o->newline() << "_stp_stat_add(time_" << basest()->name << ",cycles_elapsed);"; o->indent(-1); o->newline(-1) << "}"; o->newline() << "#endif"; o->newline() << "local_irq_restore (flags);"; } // ------------------------------------------------------------------------ // begin/end probes are run right during registration / deregistration // ------------------------------------------------------------------------ struct be_derived_probe: public derived_probe { bool begin; be_derived_probe (probe* p, bool b): derived_probe (p), begin (b) {} be_derived_probe (probe* p, probe_point* l, bool b): derived_probe (p, l), begin (b) {} void register_probe (systemtap_session& s); void emit_registrations_start (translator_output* o, unsigned index=0); void emit_registrations_end (translator_output* o, unsigned index) {} void emit_deregistrations (translator_output* o); void emit_probe_entries (translator_output* o); }; struct be_derived_probe_group: public derived_probe_group { private: vector probes; public: virtual void register_probe(be_derived_probe* p) { probes.push_back (p); } virtual size_t size () { return probes.size (); } virtual void emit_probes (translator_output* op, unparser* up); virtual void emit_module_init (translator_output* o); }; struct be_builder: public derived_probe_builder { bool begin; be_builder(bool b) : begin(b) {} virtual void build(systemtap_session & sess, probe * base, probe_point * location, std::map const & parameters, vector & finished_results) { finished_results.push_back(new be_derived_probe(base, location, begin)); } }; void be_derived_probe::register_probe(systemtap_session& s) { s.probes.register_probe(this); } void be_derived_probe::emit_registrations_start (translator_output* o, unsigned index) { if (begin) for (unsigned i=0; inewline() << "enter_" << name << "_" << i << " ();"; } void be_derived_probe::emit_deregistrations (translator_output* o) { if (!begin) for (unsigned i=0; inewline() << "enter_" << name << "_" << i << " ();"; } void be_derived_probe::emit_probe_entries (translator_output* o) { o->newline() << "#ifdef STP_TIMING"; o->newline() << "static __cacheline_aligned Stat " << "time_" << basest()->name << ";"; o->newline() << "#endif"; for (unsigned i=0; inewline() << "/* location " << i << ": " << *l << " */"; o->newline() << "static void enter_" << name << "_" << i << " (void) {"; // While begin/end probes are executed single-threaded, we // still code defensively and use a per-cpu context. o->indent(1); o->newline() << "const char* probe_point = " << lex_cast_qstring(*l) << ";"; emit_probe_prologue (o, (begin ? "STAP_SESSION_STARTING" : "STAP_SESSION_STOPPING")); // NB: locals are initialized by probe function itself o->newline() << name << " (c);"; emit_probe_epilogue (o); o->newline(-1) << "}\n"; } } void be_derived_probe_group::emit_probes (translator_output* op, unparser* up) { for (unsigned i=0; i < probes.size(); i++) { op->newline (); up->emit_probe (probes[i]); } } void be_derived_probe_group::emit_module_init (translator_output* o) { if (probes.size () == 0) return; // Output the be probes create function o->newline() << "static int register_be_probes (void) {"; o->indent(1); for (unsigned i=0; i < probes.size (); i++) probes[i]->emit_registrations_start (o); o->newline() << "return 0;"; o->newline(-1) << "}\n"; // Output the be probes destroy function o->newline() << "static void unregister_be_probes (void) {"; o->indent(1); for (unsigned i=0; i < probes.size (); i++) { probes[i]->emit_deregistrations (o); emit_probe_timing(probes[i], o); } o->newline(-1) << "}\n"; } // ------------------------------------------------------------------------ // never probes are never run // ------------------------------------------------------------------------ struct never_derived_probe: public derived_probe { never_derived_probe (probe* p): derived_probe (p) {} never_derived_probe (probe* p, probe_point* l): derived_probe (p, l) {} void register_probe (systemtap_session& s); void emit_registrations_start (translator_output* o, unsigned index) {} void emit_registrations_end (translator_output* o, unsigned index) {} void emit_deregistrations (translator_output* o) {} void emit_probe_entries (translator_output* o) {} }; struct never_derived_probe_group: public derived_probe_group { private: vector probes; public: virtual void register_probe(never_derived_probe* p) { probes.push_back (p); } virtual size_t size () { return probes.size (); } virtual void emit_probes (translator_output* op, unparser* up) {} virtual void emit_module_init (translator_output* o) {} }; void never_derived_probe::register_probe(systemtap_session& s) { s.probes.register_probe(this); } struct never_builder: public derived_probe_builder { never_builder() {} virtual void build(systemtap_session & sess, probe * base, probe_point * location, std::map const & parameters, vector & finished_results) { finished_results.push_back(new never_derived_probe(base, location)); } }; // ------------------------------------------------------------------------ // Dwarf derived probes. // ------------------------------------------------------------------------ static string TOK_PROCESS("process"); static string TOK_KERNEL("kernel"); static string TOK_MODULE("module"); static string TOK_FUNCTION("function"); static string TOK_INLINE("inline"); static string TOK_RETURN("return"); static string TOK_CALLEES("callees"); static string TOK_STATEMENT("statement"); static string TOK_LABEL("label"); static string TOK_RELATIVE("relative"); struct func_info { func_info() : decl_file(NULL), decl_line(-1), prologue_end(0) { memset(&die, 0, sizeof(die)); } string name; char const * decl_file; int decl_line; Dwarf_Die die; Dwarf_Addr prologue_end; }; struct inline_instance_info { inline_instance_info() : decl_file(NULL), decl_line(-1) { memset(&die, 0, sizeof(die)); } string name; char const * decl_file; int decl_line; Dwarf_Die die; }; class symbol_cache { // For each module, we keep a multimap from function names to // (cudie, funcdie*) pairs. The first time we pass over a module, // we build up this multimap as an index. Our iteration over the // module's CUs and functions is then driven by the function or // statement pattern string we're scanning for. struct entry { Dwarf_Die cu; Dwarf_Die function; }; typedef multimap index; map indices; index *curr_index; Dwarf_Die * cu_die; void make_entry_for_function(Dwarf_Die *func_die); static int function_callback(Dwarf_Die * func, void * arg); void index_module(Dwarf * mod); public: void select_die_subsets(Dwarf * mod, string const & pattern, set & cus, multimap & funcs); }; void symbol_cache::make_entry_for_function(Dwarf_Die *func_die) { entry e; assert(this->cu_die); assert(this->curr_index); e.cu = *(this->cu_die); e.function = *(func_die); char const * fname = dwarf_diename(func_die); if (fname) curr_index->insert(make_pair(string(fname), e)); } int symbol_cache::function_callback(Dwarf_Die * func, void * arg) { symbol_cache *sym = static_cast(arg); sym->make_entry_for_function(func); return DWARF_CB_OK; } void symbol_cache::index_module(Dwarf *module_dwarf) { Dwarf_Off off = 0; size_t cuhl = 0; Dwarf_Off noff = 0; this->cu_die = NULL; while (dwarf_nextcu (module_dwarf, off, &noff, &cuhl, NULL, NULL, NULL) == 0) { Dwarf_Die die_mem; this->cu_die = dwarf_offdie (module_dwarf, off + cuhl, &die_mem); dwarf_getfuncs (this->cu_die, function_callback, this, 0); off = noff; } this->cu_die = NULL; } inline bool operator<(Dwarf_Die const & a, Dwarf_Die const & b) { return (a.addr < b.addr) || ((a.addr == b.addr) && (a.cu < b.cu)) || ((a.addr == b.addr) && (a.cu == b.cu) && (a.abbrev < b.abbrev)); } inline bool operator==(Dwarf_Die const & a, Dwarf_Die const & b) { return !((a < b) || (b < a)); } void symbol_cache::select_die_subsets(Dwarf *mod, string const & pattern, set & cus, multimap & funcs) { cus.clear(); funcs.clear(); index *ix = NULL; // First find the index for this module. If there's no index, build // one. map::const_iterator i = indices.find(mod); if (i == indices.end()) { this->curr_index = new index; index_module(mod); indices.insert(make_pair(mod, this->curr_index)); ix = this->curr_index; this->curr_index = NULL; this->cu_die = NULL; } else ix = i->second; assert(ix); // Now stem the pattern such that we have a minimal non-wildcard // prefix to search in the multimap for. We will use the full pattern // to narrow this set further. string stem; for (string::const_iterator i = pattern.begin(); i != pattern.end(); ++i) { if (*i == '?' || *i == '*' || *i == '[' || *i == ']') break; stem += *i; } // Now perform a lower-bound on the multimap, refine that result // set, and copy the CU and function DIEs into the parameter sets. index::const_iterator j = stem.empty() ? ix->begin() : ix->lower_bound(stem); while (j != ix->end() && (stem.empty() || j->first.compare(0, stem.size(), stem) == 0)) { if (fnmatch(pattern.c_str(), j->first.c_str(), 0) == 0) { cus.insert(j->second.cu); funcs.insert(make_pair(j->second.cu, j->second.function)); } ++j; } } static int query_cu (Dwarf_Die * cudie, void * arg); // Helper for dealing with selected portions of libdwfl in a more readable // fashion, and with specific cleanup / checking / logging options. static const char * dwarf_diename_integrate (Dwarf_Die *die) { Dwarf_Attribute attr_mem; return dwarf_formstring (dwarf_attr_integrate (die, DW_AT_name, &attr_mem)); } struct dwflpp { systemtap_session & sess; Dwfl * dwfl; symbol_cache cache; // These are "current" values we focus on. Dwfl_Module * module; Dwarf * module_dwarf; Dwarf_Addr module_bias; // These describe the current module's PC address range Dwarf_Addr module_start; Dwarf_Addr module_end; Dwarf_Die * cu; Dwarf_Die * function; set pattern_limited_cus; multimap pattern_limited_funcs; string module_name; string cu_name; string function_name; string const default_name(char const * in, char const * type) { if (in) return in; return string(""); } void get_module_dwarf(bool required = false) { if (!module_dwarf) module_dwarf = dwfl_module_getdwarf(module, &module_bias); if (!module_dwarf) { string msg = "cannot find "; if (module_name == "") msg += "kernel"; else msg += string("module ") + module_name; msg += " debuginfo"; int i = dwfl_errno(); if (i) msg += string(": ") + dwfl_errmsg (i); if (required) throw semantic_error (msg); else cerr << "WARNING: " << msg << "\n"; } } void limit_search_to_function_pattern(string const & pattern) { get_module_dwarf(false); cache.select_die_subsets(module_dwarf, pattern, pattern_limited_cus, pattern_limited_funcs); } void focus_on_module(Dwfl_Module * m) { assert(m); module = m; module_name = default_name(dwfl_module_info(module, NULL, &module_start, &module_end, NULL, NULL, NULL, NULL), "module"); // Reset existing pointers and names module_dwarf = NULL; pattern_limited_cus.clear(); pattern_limited_funcs.clear(); cu_name.clear(); cu = NULL; function_name.clear(); function = NULL; } void focus_on_cu(Dwarf_Die * c) { assert(c); assert(module); cu = c; cu_name = default_name(dwarf_diename(c), "CU"); // Reset existing pointers and names function_name.clear(); function = NULL; } void focus_on_function(Dwarf_Die * f) { assert(f); assert(module); assert(cu); function = f; function_name = default_name(dwarf_diename(function), "function"); } void focus_on_module_containing_global_address(Dwarf_Addr a) { assert(dwfl); cu = NULL; Dwfl_Module* mod = dwfl_addrmodule(dwfl, a); if (mod) // address could be wildly out of range focus_on_module(mod); } void query_cu_containing_global_address(Dwarf_Addr a, void *arg) { Dwarf_Addr bias; assert(dwfl); get_module_dwarf(); Dwarf_Die* cudie = dwfl_module_addrdie(module, a, &bias); if (cudie) // address could be wildly out of range query_cu (cudie, arg); assert(bias == module_bias); } void query_cu_containing_module_address(Dwarf_Addr a, void *arg) { query_cu_containing_global_address(module_address_to_global(a), arg); } Dwarf_Addr module_address_to_global(Dwarf_Addr a) { assert(dwfl); assert(module); get_module_dwarf(); if (module_name == TOK_KERNEL) return a; return a + module_start; } Dwarf_Addr global_address_to_module(Dwarf_Addr a) { assert(module); get_module_dwarf(); return a - module_bias; } bool module_name_matches(string pattern) { assert(module); bool t = (fnmatch(pattern.c_str(), module_name.c_str(), 0) == 0); if (t && sess.verbose>2) clog << "pattern '" << pattern << "' " << "matches " << "module '" << module_name << "'" << "\n"; return t; } bool function_name_matches(string pattern) { assert(function); bool t = (fnmatch(pattern.c_str(), function_name.c_str(), 0) == 0); if (t && sess.verbose>2) clog << "pattern '" << pattern << "' " << "matches " << "function '" << function_name << "'" << "\n"; return t; } bool cu_name_matches(string pattern) { assert(cu); bool t = (fnmatch(pattern.c_str(), cu_name.c_str(), 0) == 0); if (t && sess.verbose>2) clog << "pattern '" << pattern << "' " << "matches " << "CU '" << cu_name << "'" << "\n"; return t; } // NB: "rc == 0" means OK in this case void dwfl_assert(string desc, int rc, string extra_msg = "") { string msg = "libdwfl failure (" + desc + "): "; if (rc < 0) msg += dwfl_errmsg (rc); else if (rc > 0) msg += strerror (rc); if (rc != 0) { if (extra_msg.length() > 0) msg += "\n" + extra_msg; throw semantic_error (msg); } } void dwarf_assert(string desc, int rc) // NB: "rc == 0" means OK in this case { string msg = "libdw failure (" + desc + "): "; if (rc < 0) msg += dwarf_errmsg (rc); else if (rc > 0) msg += strerror (rc); if (rc != 0) throw semantic_error (msg); } dwflpp(systemtap_session & sess) : sess(sess), dwfl(NULL), module(NULL), module_dwarf(NULL), module_bias(0), module_start(0), module_end(0), cu(NULL), function(NULL) {} void setup(bool kernel) { // XXX: this is where the session -R parameter could come in static char debuginfo_path_arr[] = "-:.debug:/usr/lib/debug"; static char *debuginfo_path = debuginfo_path_arr; static const Dwfl_Callbacks proc_callbacks = { dwfl_linux_proc_find_elf, dwfl_standard_find_debuginfo, NULL, & debuginfo_path }; static const Dwfl_Callbacks kernel_callbacks = { dwfl_linux_kernel_find_elf, dwfl_standard_find_debuginfo, dwfl_linux_kernel_module_section_address, & debuginfo_path }; if (kernel) { dwfl = dwfl_begin (&kernel_callbacks); if (!dwfl) throw semantic_error ("cannot open dwfl"); dwfl_report_begin (dwfl); // XXX: if we have only kernel.* probe points, we shouldn't waste time // looking for module debug-info (and vice versa). dwfl_assert ("dwfl_linux_kernel_report_kernel", dwfl_linux_kernel_report_kernel (dwfl), "Ensure kernel debuginfo is installed"); dwfl_assert ("dwfl_linux_kernel_report_modules", dwfl_linux_kernel_report_modules (dwfl), "Ensure kernel debuginfo is installed"); } else { dwfl = dwfl_begin (&proc_callbacks); dwfl_report_begin (dwfl); if (!dwfl) throw semantic_error ("cannot open dwfl"); // XXX: Find pids or processes, do userspace stuff. } dwfl_assert ("dwfl_report_end", dwfl_report_end(dwfl, NULL, NULL)); } void iterate_over_modules(int (* callback)(Dwfl_Module *, void **, const char *, Dwarf_Addr, void *), void * data) { ptrdiff_t off = 0; do { off = dwfl_getmodules (dwfl, callback, data, off); } while (off > 0); dwfl_assert("dwfl_getmodules", off); } void iterate_over_cus (int (*callback)(Dwarf_Die * die, void * arg), void * data) { get_module_dwarf(false); if (!module_dwarf) return; for (set::const_iterator i = pattern_limited_cus.begin(); i != pattern_limited_cus.end(); ++i) { Dwarf_Die die = *i; if (callback (&die, data) != DWARF_CB_OK) break; } } bool func_is_inline() { assert (function); return dwarf_func_inline (function) != 0; } void iterate_over_inline_instances (int (* callback)(Dwarf_Die * die, void * arg), void * data) { assert (function); assert (func_is_inline ()); dwarf_assert ("dwarf_func_inline_instances", dwarf_func_inline_instances (function, callback, data)); } void iterate_over_functions (int (* callback)(Dwarf_Die * func, void * arg), void * data) { assert (module); assert (cu); multimap::const_iterator i = pattern_limited_funcs.lower_bound(*cu); while (i != pattern_limited_funcs.end() && (i->first == *cu)) { Dwarf_Die func_die = i->second; if (callback (&func_die, data) != DWARF_CB_OK) break; ++i; } } bool has_single_line_record (char const * srcfile, int lineno) { if (lineno < 0) return false; Dwarf_Line **srcsp = NULL; size_t nsrcs = 0; dwarf_assert ("dwarf_getsrc_file", dwarf_getsrc_file (module_dwarf, srcfile, lineno, 0, &srcsp, &nsrcs)); return nsrcs == 1; } void iterate_over_srcfile_lines (char const * srcfile, int lineno, bool need_single_match, void (* callback) (Dwarf_Line * line, void * arg), void *data) { Dwarf_Line **srcsp = NULL; size_t nsrcs = 0; get_module_dwarf(); dwarf_assert ("dwarf_getsrc_file", dwarf_getsrc_file (module_dwarf, srcfile, lineno, 0, &srcsp, &nsrcs)); if (need_single_match && nsrcs > 1) { // We wanted a single line record (a unique address for the // line) and we got a bunch of line records. We're going to // skip this probe (throw an exception) but before we throw // we're going to look around a bit to see if there's a low or // high line number nearby which *doesn't* have this problem, // so we can give the user some advice. int lo_try = -1; int hi_try = -1; for (size_t i = 1; i < 6; ++i) { if (lo_try == -1 && has_single_line_record(srcfile, lineno - i)) lo_try = lineno - i; if (hi_try == -1 && has_single_line_record(srcfile, lineno + i)) hi_try = lineno + i; } string advice = ""; if (lo_try > 0 || hi_try > 0) advice = " (try " + (lo_try > 0 ? (string(srcfile) + ":" + lex_cast(lo_try)) : string("")) + (lo_try > 0 && hi_try > 0 ? " or " : "") + (hi_try > 0 ? (string(srcfile) + ":"+ lex_cast(hi_try)) : string("")) + ")"; throw semantic_error("multiple addresses for " + string(srcfile) + ":" + lex_cast(lineno) + advice); } try { for (size_t i = 0; i < nsrcs; ++i) { callback (srcsp[i], data); } } catch (...) { free (srcsp); throw; } free (srcsp); } void collect_srcfiles_matching (string const & pattern, set & filtered_srcfiles) { assert (module); assert (cu); size_t nfiles; Dwarf_Files *srcfiles; dwarf_assert ("dwarf_getsrcfiles", dwarf_getsrcfiles (cu, &srcfiles, &nfiles)); { for (size_t i = 0; i < nfiles; ++i) { char const * fname = dwarf_filesrc (srcfiles, i, NULL, NULL); if (fnmatch (pattern.c_str(), fname, 0) == 0) { filtered_srcfiles.insert (fname); if (sess.verbose>2) clog << "selected source file '" << fname << "'\n"; } } } } void resolve_prologue_endings (map & funcs) { // This heuristic attempts to pick the first address that has a // source line distinct from the function declaration's. In a // perfect world, this would be the first statement *past* the // prologue. assert(module); assert(cu); size_t nlines = 0; Dwarf_Lines *lines = NULL; /* trouble cases: malloc do_symlink in init/initramfs.c tail-recursive/tiny then no-prologue sys_get?id in kernel/timer.c no-prologue sys_exit_group tail-recursive {do_,}sys_open extra-long-prologue (gcc 3.4) cpu_to_logical_apicid NULL-decl_file */ // Fetch all srcline records, sorted by address. dwarf_assert ("dwarf_getsrclines", dwarf_getsrclines(cu, &lines, &nlines)); // XXX: free lines[] later, but how? for(map::iterator it = funcs.begin(); it != funcs.end(); it++) { #if 0 /* someday */ Dwarf_Addr* bkpts = 0; int n = dwarf_entry_breakpoints (& it->second.die, & bkpts); // ... free (bkpts); #endif Dwarf_Addr entrypc = it->first; Dwarf_Addr highpc; // NB: highpc is exclusive: [entrypc,highpc) func_info* func = &it->second; dwfl_assert ("dwarf_highpc", dwarf_highpc (& func->die, & highpc)); if (func->decl_file == 0) func->decl_file = ""; unsigned entrypc_srcline_idx = 0; Dwarf_Line* entrypc_srcline = 0; // open-code binary search for exact match { unsigned l = 0, h = nlines; while (l < h) { entrypc_srcline_idx = (l + h) / 2; Dwarf_Addr addr; Dwarf_Line *lr = dwarf_onesrcline(lines, entrypc_srcline_idx); dwarf_lineaddr (lr, &addr); if (addr == entrypc) { entrypc_srcline = lr; break; } else if (l + 1 == h) { break; } // ran off bottom of tree else if (addr < entrypc) { l = entrypc_srcline_idx; } else { h = entrypc_srcline_idx; } } } if (entrypc_srcline == 0) throw semantic_error ("missing entrypc dwarf line record for function '" + func->name + "'"); if (sess.verbose>2) clog << "prologue searching function '" << func->name << "'" << " 0x" << hex << entrypc << "-0x" << highpc << dec << "@" << func->decl_file << ":" << func->decl_line << "\n"; // Now we go searching for the first line record that has a // file/line different from the one in the declaration. // Normally, this will be the next one. BUT: // // We may have to skip a few because some old compilers plop // in dummy line records for longer prologues. If we go too // far (addr >= highpc), we take the previous one. Or, it may // be the first one, if the function had no prologue, and thus // the entrypc maps to a statement in the body rather than the // declaration. unsigned postprologue_srcline_idx = entrypc_srcline_idx; bool ranoff_end = false; while (postprologue_srcline_idx < nlines) { Dwarf_Addr postprologue_addr; Dwarf_Line *lr = dwarf_onesrcline(lines, postprologue_srcline_idx); dwarf_lineaddr (lr, &postprologue_addr); const char* postprologue_file = dwarf_linesrc (lr, NULL, NULL); int postprologue_lineno; dwfl_assert ("dwarf_lineno", dwarf_lineno (lr, & postprologue_lineno)); if (sess.verbose>2) clog << "checking line record 0x" << hex << postprologue_addr << dec << "@" << postprologue_file << ":" << postprologue_lineno << "\n"; if (postprologue_addr >= highpc) { ranoff_end = true; postprologue_srcline_idx --; continue; } if (ranoff_end || (strcmp (postprologue_file, func->decl_file) || // We have a winner! (postprologue_lineno != func->decl_line))) { func->prologue_end = postprologue_addr; if (sess.verbose>2) { clog << "prologue found function '" << func->name << "'"; // Add a little classification datum if (postprologue_srcline_idx == entrypc_srcline_idx) clog << " (naked)"; if (ranoff_end) clog << " (tail-call?)"; clog << " = 0x" << hex << postprologue_addr << dec << "\n"; } break; } // Let's try the next srcline. postprologue_srcline_idx ++; } // loop over srclines // if (strlen(func->decl_file) == 0) func->decl_file = NULL; } // loop over functions } bool function_entrypc (Dwarf_Addr * addr) { assert (function); return (dwarf_entrypc (function, addr) == 0); } bool die_entrypc (Dwarf_Die * die, Dwarf_Addr * addr) { Dwarf_Attribute attr_mem; Dwarf_Attribute *attr = dwarf_attr (die, DW_AT_entry_pc, &attr_mem); if (attr != NULL) return (dwarf_formaddr (attr, addr) == 0); return ( dwarf_lowpc (die, addr) == 0); } void function_die (Dwarf_Die *d) { assert (function); *d = *function; } void function_file (char const ** c) { assert (function); assert (c); *c = dwarf_decl_file (function); } void function_line (int *linep) { assert (function); dwarf_decl_line (function, linep); } bool die_has_pc (Dwarf_Die * die, Dwarf_Addr pc) { int res = dwarf_haspc (die, pc); if (res == -1) dwarf_assert ("dwarf_haspc", res); return res == 1; } static void loc2c_error (void *arg, const char *fmt, ...) { char *msg = NULL; va_list ap; va_start (ap, fmt); vasprintf (&msg, fmt, ap); va_end (ap); throw semantic_error (msg); } void emit_address (struct obstack *pool, Dwarf_Addr address) { // For now what we actually use is just a hard-wired constant. obstack_printf (pool, "%#" PRIx64 "UL", address); // Turn this address into a section-relative offset if it should be one. // We emit a comment approximating the variable+offset expression that // relocatable module probing code will need to have. Dwfl_Module *mod = dwfl_addrmodule (dwfl, address); dwfl_assert ("dwfl_addrmodule", mod == NULL); int n = dwfl_module_relocations (mod); dwfl_assert ("dwfl_module_relocations", n < 0); if (n > 0) { int i = dwfl_module_relocate_address (mod, &address); dwfl_assert ("dwfl_module_relocate_address", i < 0); const char *modname = dwfl_module_info (mod, NULL, NULL, NULL, NULL, NULL, NULL, NULL); dwfl_assert ("dwfl_module_info", modname == NULL); const char *secname = dwfl_module_relocation_info (mod, i, NULL); dwfl_assert ("dwfl_module_relocation_info", secname == NULL); if (n > 1 || secname[0] != '\0') // This gives us the module name, and section name within the // module, for a kernel module (or other ET_REL module object). obstack_printf (pool, " /* %s(%s)+%#" PRIx64 " */", modname, secname, address); else // This would happen for a Dwfl_Module that's a user-level DSO. obstack_printf (pool, " /* %s+%#" PRIx64 " */", modname, address); } } static void loc2c_emit_address (void *arg, struct obstack *pool, Dwarf_Addr address) { dwflpp *dwfl = (dwflpp *) arg; dwfl->emit_address (pool, address); } Dwarf_Attribute * find_variable_and_frame_base (Dwarf_Die *scope_die, Dwarf_Addr pc, string const & local, Dwarf_Die *vardie, Dwarf_Attribute *fb_attr_mem) { Dwarf_Die *scopes; int nscopes = 0; Dwarf_Attribute *fb_attr = NULL; assert (cu); if (scope_die) nscopes = dwarf_getscopes_die (scope_die, &scopes); else nscopes = dwarf_getscopes (cu, pc, &scopes); if (nscopes == 0) { throw semantic_error ("unable to find any scopes containing " + lex_cast_hex(pc) + " while searching for local '" + local + "'"); } int declaring_scope = dwarf_getscopevar (scopes, nscopes, local.c_str(), 0, NULL, 0, 0, vardie); if (declaring_scope < 0) { throw semantic_error ("unable to find local '" + local + "'" + " near pc " + lex_cast_hex(pc)); } for (int inner = 0; inner < nscopes; ++inner) { switch (dwarf_tag (&scopes[inner])) { default: continue; case DW_TAG_subprogram: case DW_TAG_entry_point: case DW_TAG_inlined_subroutine: /* XXX */ if (inner >= declaring_scope) fb_attr = dwarf_attr_integrate (&scopes[inner], DW_AT_frame_base, fb_attr_mem); break; } } return fb_attr; } struct location * translate_location(struct obstack *pool, Dwarf_Attribute *attr, Dwarf_Addr pc, Dwarf_Attribute *fb_attr, struct location **tail) { Dwarf_Op *expr; size_t len; switch (dwarf_getlocation_addr (attr, pc - module_bias, &expr, &len, 1)) { case 1: /* Should always happen. */ if (len > 0) break; /* Fall through. */ case 0: /* Shouldn't happen. */ throw semantic_error ("not accessible at this address"); default: /* Shouldn't happen. */ case -1: throw semantic_error (string ("dwarf_getlocation_addr failed") + string (dwarf_errmsg (-1))); } return c_translate_location (pool, &loc2c_error, this, &loc2c_emit_address, 1, module_bias, pc, expr, len, tail, fb_attr); } Dwarf_Die * translate_components(struct obstack *pool, struct location **tail, Dwarf_Addr pc, vector > const & components, Dwarf_Die *vardie, Dwarf_Die *die_mem, Dwarf_Attribute *attr_mem) { Dwarf_Die *die = vardie; unsigned i = 0; while (i < components.size()) { die = dwarf_formref_die (attr_mem, die_mem); const int typetag = dwarf_tag (die); switch (typetag) { case DW_TAG_typedef: case DW_TAG_const_type: case DW_TAG_volatile_type: /* Just iterate on the referent type. */ break; case DW_TAG_pointer_type: if (components[i].first == target_symbol::comp_literal_array_index) goto subscript; c_translate_pointer (pool, 1, module_bias, die, tail); break; case DW_TAG_array_type: if (components[i].first == target_symbol::comp_literal_array_index) { subscript: c_translate_array (pool, 1, module_bias, die, tail, NULL, lex_cast(components[i].second)); ++i; } else throw semantic_error("bad field '" + components[i].second + "' for array type"); break; case DW_TAG_structure_type: case DW_TAG_union_type: switch (dwarf_child (die, die_mem)) { case 1: /* No children. */ throw semantic_error ("empty struct " + string (dwarf_diename_integrate (die) ?: "")); break; case -1: /* Error. */ default: /* Shouldn't happen */ throw semantic_error (string (typetag == DW_TAG_union_type ? "union" : "struct") + string (dwarf_diename_integrate (die) ?: "") + string (dwarf_errmsg (-1))); break; case 0: break; } while (dwarf_tag (die) != DW_TAG_member || ({ const char *member = dwarf_diename_integrate (die); member == NULL || string(member) != components[i].second; })) if (dwarf_siblingof (die, die_mem) != 0) throw semantic_error ("field name " + components[i].second + " not found"); if (dwarf_attr_integrate (die, DW_AT_data_member_location, attr_mem) == NULL) { /* Union members don't usually have a location, but just use the containing union's location. */ if (typetag != DW_TAG_union_type) throw semantic_error ("no location for field " + components[i].second + " :" + string(dwarf_errmsg (-1))); } else translate_location (pool, attr_mem, pc, NULL, tail); ++i; break; case DW_TAG_base_type: throw semantic_error ("field " + components[i].second + " vs base type " + string(dwarf_diename_integrate (die) ?: "")); break; case -1: throw semantic_error ("cannot find type: " + string(dwarf_errmsg (-1))); break; default: throw semantic_error (string(dwarf_diename_integrate (die) ?: "") + ": unexpected type tag " + lex_cast(dwarf_tag (die))); break; } /* Now iterate on the type in DIE's attribute. */ if (dwarf_attr_integrate (die, DW_AT_type, attr_mem) == NULL) throw semantic_error ("cannot get type of field: " + string(dwarf_errmsg (-1))); } return die; } Dwarf_Die * resolve_unqualified_inner_typedie (Dwarf_Die *typedie_mem, Dwarf_Attribute *attr_mem) { ; Dwarf_Die *typedie; int typetag = 0; while (1) { typedie = dwarf_formref_die (attr_mem, typedie_mem); if (typedie == NULL) throw semantic_error ("cannot get type: " + string(dwarf_errmsg (-1))); typetag = dwarf_tag (typedie); if (typetag != DW_TAG_typedef && typetag != DW_TAG_const_type && typetag != DW_TAG_volatile_type) break; if (dwarf_attr_integrate (typedie, DW_AT_type, attr_mem) == NULL) throw semantic_error ("cannot get type of pointee: " + string(dwarf_errmsg (-1))); } return typedie; } void translate_final_fetch_or_store (struct obstack *pool, struct location **tail, Dwarf_Addr module_bias, Dwarf_Die *die, Dwarf_Attribute *attr_mem, bool lvalue, string & prelude, string & postlude, exp_type & ty) { /* First boil away any qualifiers associated with the type DIE of the final location to be accessed. */ Dwarf_Die typedie_mem; Dwarf_Die *typedie; int typetag; typedie = resolve_unqualified_inner_typedie (&typedie_mem, attr_mem); typetag = dwarf_tag (typedie); /* Then switch behavior depending on the type of fetch/store we want, and the type and pointer-ness of the final location. */ switch (typetag) { default: throw semantic_error ("unsupported type tag " + lex_cast(typetag)); break; case DW_TAG_enumeration_type: case DW_TAG_base_type: ty = pe_long; if (lvalue) c_translate_store (pool, 1, module_bias, die, typedie, tail, "THIS->value"); else c_translate_fetch (pool, 1, module_bias, die, typedie, tail, "THIS->__retvalue"); break; case DW_TAG_array_type: case DW_TAG_pointer_type: if (lvalue) throw semantic_error ("cannot store into target pointer value"); { Dwarf_Die pointee_typedie_mem; Dwarf_Die *pointee_typedie; Dwarf_Word pointee_encoding; Dwarf_Word pointee_byte_size = 0; pointee_typedie = resolve_unqualified_inner_typedie (&pointee_typedie_mem, attr_mem); if (dwarf_attr_integrate (pointee_typedie, DW_AT_byte_size, attr_mem)) dwarf_formudata (attr_mem, &pointee_byte_size); dwarf_formudata (dwarf_attr_integrate (pointee_typedie, DW_AT_encoding, attr_mem), &pointee_encoding); // We have the pointer: cast it to an integral type via &(*(...)) // NB: per bug #1187, at one point char*-like types were // automagically converted here to systemtap string values. // For several reasons, this was taken back out, leaving // pointer-to-string "conversion" (copying) to tapset functions. ty = pe_long; if (typetag == DW_TAG_array_type) c_translate_array (pool, 1, module_bias, typedie, tail, NULL, 0); else c_translate_pointer (pool, 1, module_bias, typedie, tail); c_translate_addressof (pool, 1, module_bias, NULL, pointee_typedie, tail, "THIS->__retvalue"); } break; } } string express_as_string (string prelude, string postlude, struct location *head) { size_t bufsz = 1024; char *buf = static_cast(malloc(bufsz)); assert(buf); FILE *memstream = open_memstream (&buf, &bufsz); assert(memstream); fprintf(memstream, "{\n"); fprintf(memstream, prelude.c_str()); bool deref = c_emit_location (memstream, head, 1); fprintf(memstream, postlude.c_str()); fprintf(memstream, " goto out;\n"); // dummy use of deref_fault label, to disable warning if deref() not used fprintf(memstream, "if (0) goto deref_fault;\n"); // XXX: deref flag not reliable; emit fault label unconditionally // XXX: print the faulting address, like the user_string/kernel_string // tapset functions do if (deref) ; fprintf(memstream, "deref_fault:\n" " c->last_error = \"pointer dereference fault\";\n" " goto out;\n"); fprintf(memstream, "}\n"); fclose (memstream); string result(buf); free (buf); return result; } string literal_stmt_for_local (Dwarf_Die *scope_die, Dwarf_Addr pc, string const & local, vector > const & components, bool lvalue, exp_type & ty) { Dwarf_Die vardie; Dwarf_Attribute fb_attr_mem, *fb_attr = NULL; fb_attr = find_variable_and_frame_base (scope_die, pc, local, &vardie, &fb_attr_mem); if (sess.verbose>2) clog << "finding location for local '" << local << "' near address " << hex << pc << ", module bias " << module_bias << dec << "\n"; Dwarf_Attribute attr_mem; if (dwarf_attr_integrate (&vardie, DW_AT_location, &attr_mem) == NULL) { throw semantic_error("failed to retrieve location " "attribute for local '" + local + "' (dieoffset: " + lex_cast_hex(dwarf_dieoffset (&vardie)) + ")"); } #define obstack_chunk_alloc malloc #define obstack_chunk_free free struct obstack pool; obstack_init (&pool); struct location *tail = NULL; /* Given $foo->bar->baz[NN], translate the location of foo. */ struct location *head = translate_location (&pool, &attr_mem, pc, fb_attr, &tail); if (dwarf_attr_integrate (&vardie, DW_AT_type, &attr_mem) == NULL) throw semantic_error("failed to retrieve type " "attribute for local '" + local + "'"); /* Translate the ->bar->baz[NN] parts. */ Dwarf_Die die_mem, *die = NULL; die = translate_components (&pool, &tail, pc, components, &vardie, &die_mem, &attr_mem); /* Translate the assignment part, either x = $foo->bar->baz[NN] or $foo->bar->baz[NN] = x */ string prelude, postlude; translate_final_fetch_or_store (&pool, &tail, module_bias, die, &attr_mem, lvalue, prelude, postlude, ty); /* Write the translation to a string. */ return express_as_string(prelude, postlude, head); } string literal_stmt_for_return (Dwarf_Die *scope_die, Dwarf_Addr pc, vector > const & components, bool lvalue, exp_type & ty) { if (sess.verbose>2) clog << "literal_stmt_for_return: finding return value for " << dwarf_diename (scope_die) << "(" << dwarf_diename (cu) << ")\n"; struct obstack pool; obstack_init (&pool); struct location *tail = NULL; /* Given $return->bar->baz[NN], translate the location of return. */ const Dwarf_Op *locops; int nlocops = dwfl_module_return_value_location (module, scope_die, &locops); if (nlocops < 0) { throw semantic_error("failed to retrieve return value location"); } // the function has no return value (e.g. "void" in C) else if (nlocops == 0) { throw semantic_error("function has no return value"); } struct location *head = c_translate_location (&pool, &loc2c_error, this, &loc2c_emit_address, 1, module_bias, pc, locops, nlocops, &tail, NULL); /* Translate the ->bar->baz[NN] parts. */ Dwarf_Attribute attr_mem; Dwarf_Attribute *attr = dwarf_attr (scope_die, DW_AT_type, &attr_mem); Dwarf_Die vardie_mem; Dwarf_Die *vardie = dwarf_formref_die (attr, &vardie_mem); Dwarf_Die die_mem, *die = NULL; die = translate_components (&pool, &tail, pc, components, vardie, &die_mem, &attr_mem); /* Translate the assignment part, either x = $return->bar->baz[NN] or $return->bar->baz[NN] = x */ string prelude, postlude; translate_final_fetch_or_store (&pool, &tail, module_bias, die, &attr_mem, lvalue, prelude, postlude, ty); /* Write the translation to a string. */ return express_as_string(prelude, postlude, head); } ~dwflpp() { if (dwfl) dwfl_end(dwfl); } }; enum function_spec_type { function_alone, function_and_file, function_file_and_line }; struct dwarf_builder; struct dwarf_query; struct dwarf_derived_probe : public derived_probe { dwarf_derived_probe (Dwarf_Die *scope_die, Dwarf_Addr addr, dwarf_query & q); vector probe_points; bool has_return; void register_probe (systemtap_session& s); void add_probe_point(string const & funcname, char const * filename, int line, Dwarf_Addr addr, dwarf_query & q); // Pattern registration helpers. static void register_relative_variants(match_node * root, dwarf_builder * dw); static void register_statement_variants(match_node * root, dwarf_builder * dw); static void register_function_variants(match_node * root, dwarf_builder * dw); static void register_inline_variants(match_node * root, dwarf_builder * dw); static void register_function_and_statement_variants(match_node * root, dwarf_builder * dw); static void register_patterns(match_node * root); virtual void emit_registrations_start (translator_output* o, unsigned index); virtual void emit_registrations_end (translator_output * o, unsigned index); virtual void emit_deregistrations (translator_output * o); virtual void emit_probe_entries (translator_output * o); }; struct dwarf_derived_probe_group: public derived_probe_group { private: vector probes; public: virtual void register_probe(dwarf_derived_probe* p) { probes.push_back (p); } virtual size_t size () { return probes.size (); } virtual void emit_probes (translator_output* op, unparser* up); virtual void emit_module_init (translator_output* o); }; // Helper struct to thread through the dwfl callbacks. struct dwarf_query { dwarf_query(systemtap_session & sess, probe * base_probe, probe_point * base_loc, dwflpp & dw, map const & params, vector & results); systemtap_session & sess; // Parameter extractors. static bool has_null_param(map const & params, string const & k); static bool get_string_param(map const & params, string const & k, string & v); static bool get_number_param(map const & params, string const & k, long & v); static bool get_number_param(map const & params, string const & k, Dwarf_Addr & v); string pt_regs_member_for_regnum(uint8_t dwarf_regnum); // Result vector and flavour-sorting mechanism. vector & results; bool probe_has_no_target_variables; map probe_flavours; void add_probe_point(string const & funcname, char const * filename, int line, Dwarf_Die *scope_die, Dwarf_Addr addr); set blacklisted_probes; set blacklisted_return_probes; void build_blacklist(); bool blacklisted_p(string const & funcname, char const * filename, int line, Dwarf_Die *scope_die, Dwarf_Addr addr); // Extracted parameters. bool has_kernel; bool has_process; bool has_module; string process_val; string module_val; string function_val; bool has_inline_str; bool has_function_str; bool has_statement_str; bool has_inline_num; bool has_function_num; bool has_statement_num; string statement_str_val; string function_str_val; string inline_str_val; Dwarf_Addr statement_num_val; Dwarf_Addr function_num_val; Dwarf_Addr inline_num_val; bool has_callees; long callee_val; bool has_return; bool has_label; string label_val; bool has_relative; long relative_val; function_spec_type parse_function_spec(string & spec); function_spec_type spec_type; string function; string file; int line; set filtered_srcfiles; // Map official entrypc -> func_info object map filtered_inlines; map filtered_functions; bool choose_next_line; Dwarf_Addr entrypc_for_next_line; probe * base_probe; probe_point * base_loc; dwflpp & dw; }; struct dwarf_builder: public derived_probe_builder { dwflpp *kern_dw; dwflpp *user_dw; dwarf_builder() : kern_dw(NULL), user_dw(NULL) {} ~dwarf_builder() { if (kern_dw) delete kern_dw; if (user_dw) delete user_dw; } virtual void build(systemtap_session & sess, probe * base, probe_point * location, std::map const & parameters, vector & finished_results); }; bool dwarf_query::has_null_param(map const & params, string const & k) { map::const_iterator i = params.find(k); if (i != params.end() && i->second == NULL) return true; return false; } bool dwarf_query::get_string_param(map const & params, string const & k, string & v) { return derived_probe_builder::get_param (params, k, v); } bool dwarf_query::get_number_param(map const & params, string const & k, long & v) { int64_t value; bool present = derived_probe_builder::get_param (params, k, value); v = (long) value; return present; } bool dwarf_query::get_number_param(map const & params, string const & k, Dwarf_Addr & v) { int64_t value; bool present = derived_probe_builder::get_param (params, k, value); v = (Dwarf_Addr) value; return present; } dwarf_query::dwarf_query(systemtap_session & sess, probe * base_probe, probe_point * base_loc, dwflpp & dw, map const & params, vector & results) : sess(sess), results(results), probe_has_no_target_variables(false), base_probe(base_probe), base_loc(base_loc), dw(dw) { // Reduce the query to more reasonable semantic values (booleans, // extracted strings, numbers, etc). has_kernel = has_null_param(params, TOK_KERNEL); has_module = get_string_param(params, TOK_MODULE, module_val); has_process = get_string_param(params, TOK_PROCESS, process_val); has_function_str = get_string_param(params, TOK_FUNCTION, function_str_val); has_function_num = get_number_param(params, TOK_FUNCTION, function_num_val); has_inline_str = get_string_param(params, TOK_INLINE, inline_str_val); has_inline_num = get_number_param(params, TOK_INLINE, inline_num_val); has_statement_str = get_string_param(params, TOK_STATEMENT, statement_str_val); has_statement_num = get_number_param(params, TOK_STATEMENT, statement_num_val); callee_val = 1; has_callees = (has_null_param(params, TOK_CALLEES) || get_number_param(params, TOK_CALLEES, callee_val)); has_return = has_null_param(params, TOK_RETURN); has_label = get_string_param(params, TOK_LABEL, label_val); has_relative = get_number_param(params, TOK_RELATIVE, relative_val); if (has_function_str) spec_type = parse_function_spec(function_str_val); else if (has_inline_str) spec_type = parse_function_spec(inline_str_val); else if (has_statement_str) spec_type = parse_function_spec(statement_str_val); build_blacklist(); } void dwarf_query::build_blacklist() { // FIXME: it would be nice if these blacklisted functions were pulled in // dynamically, instead of being statically defined here. // Most of these are marked __kprobes in newer kernels. We list them here so // the translator can block them on older kernels that don't have the // __kprobes function decorator. blacklisted_probes.insert("default_do_nmi"); blacklisted_probes.insert("__die"); blacklisted_probes.insert("die_nmi"); blacklisted_probes.insert("do_debug"); blacklisted_probes.insert("do_general_protection"); blacklisted_probes.insert("do_int3"); blacklisted_probes.insert("do_IRQ"); blacklisted_probes.insert("do_page_fault"); blacklisted_probes.insert("do_sparc64_fault"); blacklisted_probes.insert("do_trap"); blacklisted_probes.insert("dummy_nmi_callback"); blacklisted_probes.insert("flush_icache_range"); blacklisted_probes.insert("ia64_bad_break"); blacklisted_probes.insert("ia64_do_page_fault"); blacklisted_probes.insert("ia64_fault"); blacklisted_probes.insert("io_check_error"); blacklisted_probes.insert("mem_parity_error"); blacklisted_probes.insert("nmi_watchdog_tick"); blacklisted_probes.insert("notifier_call_chain"); blacklisted_probes.insert("oops_begin"); blacklisted_probes.insert("oops_end"); blacklisted_probes.insert("program_check_exception"); blacklisted_probes.insert("single_step_exception"); blacklisted_probes.insert("sync_regs"); blacklisted_probes.insert("unhandled_fault"); blacklisted_probes.insert("unknown_nmi_error"); // __switch_to is only disallowed on x86_64 if (sess.architecture == "x86_64") blacklisted_probes.insert("__switch_to"); // These functions don't return, so return probes would never be recovered blacklisted_return_probes.insert("do_exit"); blacklisted_return_probes.insert("sys_exit"); blacklisted_return_probes.insert("sys_exit_group"); } function_spec_type dwarf_query::parse_function_spec(string & spec) { string::const_iterator i = spec.begin(), e = spec.end(); function.clear(); file.clear(); line = 0; while (i != e && *i != '@') { if (*i == ':') goto bad; function += *i++; } if (i == e) { if (sess.verbose>2) clog << "parsed '" << spec << "' -> func '" << function << "'\n"; return function_alone; } if (i++ == e) goto bad; while (i != e && *i != ':') file += *i++; if (i == e) { if (sess.verbose>2) clog << "parsed '" << spec << "' -> func '"<< function << "', file '" << file << "'\n"; return function_and_file; } if (i++ == e) goto bad; try { line = lex_cast(string(i, e)); if (sess.verbose>2) clog << "parsed '" << spec << "' -> func '"<< function << "', file '" << file << "', line " << line << "\n"; return function_file_and_line; } catch (runtime_error & exn) { goto bad; } bad: throw semantic_error("malformed specification '" + spec + "'", base_probe->tok); } // Our goal here is to calculate a "flavour", a string which // characterizes the way in which this probe body depends on target // variables. The flavour is used to separate instances of a dwarf // probe which have different contextual bindings for the target // variables which occur within the probe body. If two die/addr // combinations have the same flavour string, they will be directed // into the same probe function. struct target_variable_flavour_calculating_visitor : public traversing_visitor { string flavour; dwarf_query & q; Dwarf_Die *scope_die; Dwarf_Addr addr; target_variable_flavour_calculating_visitor(dwarf_query & q, Dwarf_Die *sd, Dwarf_Addr a) : q(q), scope_die(sd), addr(a) {} void visit_target_symbol (target_symbol* e); }; void target_variable_flavour_calculating_visitor::visit_target_symbol (target_symbol *e) { assert(e->base_name.size() > 0 && e->base_name[0] == '$'); // NB: if for whatever reason this variable does not resolve, // or is illegally used (write in non-guru mode for instance), // just pretend that it's OK anyway. dwarf_var_expanding_copy_visitor // will take care of throwing the appropriate exception. bool lvalue = is_active_lvalue(e); flavour += lvalue ? 'w' : 'r'; exp_type ty; string expr; try { if (q.has_return && e->base_name == "$return") expr = q.dw.literal_stmt_for_return (scope_die, addr, e->components, lvalue, ty); else expr = q.dw.literal_stmt_for_local(scope_die, addr, e->base_name.substr(1), e->components, lvalue, ty); } catch (const semantic_error& x) { e->saved_conversion_error = new semantic_error (x); e->saved_conversion_error->tok1 = e->tok; ty = pe_unknown; } switch (ty) { case pe_unknown: flavour += 'U'; break; case pe_long: flavour += 'L'; break; case pe_string: flavour += 'S'; break; case pe_stats: flavour += 'T'; break; } flavour += lex_cast(expr.size()); flavour += '{'; flavour += expr; flavour += '}'; } bool dwarf_query::blacklisted_p(string const & funcname, char const * filename, int line, Dwarf_Die *scope_die, Dwarf_Addr addr) { // Check whether the given address points into an .init/.exit section, // which will have been unmapped by the kernel by the time we get to // insert the probe. In this case, just ignore this call. if (dwfl_module_relocations (dw.module) > 0) { // This is a relocatable module; libdwfl has noted its sections. Dwarf_Addr rel_addr = addr; int idx = dwfl_module_relocate_address (dw.module, &rel_addr); const char *name = dwfl_module_relocation_info (dw.module, idx, NULL); if (name && ((strncmp (name, ".init.", 6) == 0) || (strncmp (name, ".exit.", 6) == 0))) { if (sess.verbose>1) clog << "skipping function '" << funcname << "' base 0x" << hex << addr << dec << " is within section '" << name << "'\n"; return true; } } else { Dwarf_Addr baseaddr; Elf* elf = dwfl_module_getelf (dw.module, & baseaddr); Dwarf_Addr rel_addr = addr - baseaddr; if (elf) { // Iterate through section headers to find which one // contains the given rel_addr. Elf_Scn* scn = 0; size_t shstrndx; dw.dwfl_assert ("getshstrndx", elf_getshstrndx (elf, &shstrndx)); while ((scn = elf_nextscn (elf, scn)) != NULL) { GElf_Shdr shdr_mem; GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem); if (! shdr) continue; // XXX error? // check for address inclusion GElf_Addr start = shdr->sh_addr; GElf_Addr end = start + shdr->sh_size; if (! (rel_addr >= start && rel_addr < end)) continue; // check for section name const char* name = elf_strptr (elf, shstrndx, shdr->sh_name); if (name && ((strncmp (name, ".init.", 6) == 0) || (strncmp (name, ".exit.", 6) == 0))) { if (sess.verbose>1) clog << "skipping function '" << funcname << "' base 0x" << hex << addr << dec << " is within section '" << name << "'\n"; return true; } } } } // Check probe point against blacklist. XXX: This has to be // properly generalized, perhaps via a table populated from script // files. A "noprobe kernel.function("...")" construct might do // the trick. if (filename == 0) filename = ""; // possibly 0 string filename_s = filename; // is passed as const char* if (blacklisted_probes.count(funcname) > 0 || (has_return && blacklisted_return_probes.count(funcname) > 0) || filename_s == "kernel/kprobes.c" || 0 == fnmatch ("arch/*/kernel/kprobes.c", filename, 0)) { if (sess.verbose>1) clog << "skipping function '" << funcname << "' file '" << filename << "' is blacklisted\n"; return true; } // This probe point is not blacklisted. return false; } void dwarf_query::add_probe_point(string const & funcname, char const * filename, int line, Dwarf_Die *scope_die, Dwarf_Addr addr) { dwarf_derived_probe *probe = NULL; if (blacklisted_p (funcname, filename, line, scope_die, addr)) return; if (probe_has_no_target_variables) { assert(probe_flavours.size() == 1); probe = probe_flavours.begin()->second; } else { target_variable_flavour_calculating_visitor flav(*this, scope_die, addr); base_probe->body->visit(&flav); map::iterator i = probe_flavours.find(flav.flavour); if (i != probe_flavours.end()) probe = i->second; else { probe = new dwarf_derived_probe(scope_die, addr, *this); probe_flavours.insert(make_pair(flav.flavour, probe)); results.push_back(probe); } // Cache result in degenerate case to avoid recomputing. if (flav.flavour.empty()) probe_has_no_target_variables = true; } if (sess.verbose > 1) { clog << "probe " << funcname << "@" << filename << ":" << line << " pc=0x" << hex << addr << dec << endl; } probe->add_probe_point(funcname, filename, line, addr, *this); } // The critical determining factor when interpreting a pattern // string is, perhaps surprisingly: "presence of a lineno". The // presence of a lineno changes the search strategy completely. // // Compare the two cases: // // 1. {statement,function}(foo@file.c:lineno) // - find the files matching file.c // - in each file, find the functions matching foo // - query the file for line records matching lineno // - iterate over the line records, // - and iterate over the functions, // - if(haspc(function.DIE, line.addr)) // - if looking for statements: probe(lineno.addr) // - if looking for functions: probe(function.{entrypc,return,etc.}) // // 2. {statement,function}(foo@file.c) // - find the files matching file.c // - in each file, find the functions matching foo // - probe(function.{entrypc,return,etc.}) // // Thus the first decision we make is based on the presence of a // lineno, and we enter entirely different sets of callbacks // depending on that decision. static void query_statement (string const & func, char const * file, int line, Dwarf_Die *scope_die, Dwarf_Addr stmt_addr, dwarf_query * q) { try { // XXX: implement if (q->has_relative) throw semantic_error("incomplete: do not know how to interpret .relative", q->base_probe->tok); q->add_probe_point(func, file, line, scope_die, stmt_addr); } catch (const semantic_error& e) { q->sess.print_error (e); } } static void query_inline_instance_info (Dwarf_Addr entrypc, inline_instance_info & ii, dwarf_query * q) { try { if (q->has_return) { throw semantic_error ("cannot probe .return of inline function '" + ii.name + "'"); } else { if (q->sess.verbose>2) clog << "querying entrypc " << hex << entrypc << dec << " of instance of inline '" << ii.name << "'\n"; query_statement (ii.name, ii.decl_file, ii.decl_line, &ii.die, entrypc, q); } } catch (semantic_error &e) { q->sess.print_error (e); } } static void query_func_info (Dwarf_Addr entrypc, func_info & fi, dwarf_query * q) { try { if (q->has_return) { // NB. dwarf_derived_probe::emit_registrations will emit a // kretprobe based on the entrypc in this case. query_statement (fi.name, fi.decl_file, fi.decl_line, &fi.die, entrypc, q); } else { #ifdef __ia64__ // In IA64 platform function probe point is set at its // entry point rather than prologue end pointer query_statement (fi.name, fi.decl_file, fi.decl_line, &fi.die, entrypc, q); #else if (fi.prologue_end == 0) throw semantic_error("could not find prologue-end " "for probed function '" + fi.name + "'"); query_statement (fi.name, fi.decl_file, fi.decl_line, &fi.die, fi.prologue_end, q); #endif } } catch (semantic_error &e) { q->sess.print_error (e); } } static void query_srcfile_line (Dwarf_Line * line, void * arg) { dwarf_query * q = static_cast(arg); Dwarf_Addr addr; dwarf_lineaddr(line, &addr); for (map::iterator i = q->filtered_functions.begin(); i != q->filtered_functions.end(); ++i) { if (q->dw.die_has_pc (&(i->second.die), addr)) { if (q->sess.verbose>3) clog << "function DIE lands on srcfile\n"; if (q->has_statement_str) query_statement (i->second.name, i->second.decl_file, q->line, NULL, addr, q); else query_func_info (i->first, i->second, q); } } for (map::iterator i = q->filtered_inlines.begin(); i != q->filtered_inlines.end(); ++i) { if (q->dw.die_has_pc (&(i->second.die), addr)) { if (q->sess.verbose>3) clog << "inline instance DIE lands on srcfile\n"; if (q->has_statement_str) query_statement (i->second.name, i->second.decl_file, q->line, NULL, addr, q); else query_inline_instance_info (i->first, i->second, q); } } } static int query_dwarf_inline_instance (Dwarf_Die * die, void * arg) { dwarf_query * q = static_cast(arg); assert (!q->has_statement_num); try { bool record_this_inline = false; if (q->sess.verbose>2) clog << "examining inline instance of " << q->dw.function_name << "\n"; if (q->has_inline_str || q->has_statement_str) record_this_inline = true; else if (q->has_inline_num) { Dwarf_Addr query_addr = q->inline_num_val; if (q->has_module) query_addr = q->dw.module_address_to_global(query_addr); if (q->dw.die_has_pc (die, query_addr)) record_this_inline = true; } if (record_this_inline) { if (q->sess.verbose>2) clog << "selected inline instance of " << q->dw.function_name << "\n"; Dwarf_Addr entrypc; if (q->dw.die_entrypc (die, &entrypc)) { inline_instance_info inl; inl.die = *die; inl.name = q->dw.function_name; q->dw.function_file (&inl.decl_file); q->dw.function_line (&inl.decl_line); q->filtered_inlines[entrypc] = inl; } } return DWARF_CB_OK; } catch (const semantic_error& e) { q->sess.print_error (e); return DWARF_CB_ABORT; } } static int query_dwarf_func (Dwarf_Die * func, void * arg) { dwarf_query * q = static_cast(arg); assert (!q->has_statement_num); try { // XXX: implement if (q->has_callees) throw semantic_error ("incomplete: do not know how to interpret .callees", q->base_probe->tok); if (q->has_label) throw semantic_error ("incomplete: do not know how to interpret .label", q->base_probe->tok); q->dw.focus_on_function (func); if (q->dw.func_is_inline () && (((q->has_statement_str || q->has_inline_str) && q->dw.function_name_matches(q->function)) || q->has_inline_num)) { if (q->sess.verbose>3) clog << "checking instances of inline " << q->dw.function_name << "\n"; q->dw.iterate_over_inline_instances (query_dwarf_inline_instance, arg); } else if (!q->dw.func_is_inline ()) { bool record_this_function = false; if ((q->has_statement_str || q->has_function_str) && q->dw.function_name_matches(q->function)) { record_this_function = true; } else if (q->has_function_num) { Dwarf_Addr query_addr = q->function_num_val; if (q->has_module) query_addr = q->dw.module_address_to_global(query_addr); Dwarf_Die d; q->dw.function_die (&d); if (q->dw.die_has_pc (&d, query_addr)) record_this_function = true; } if (record_this_function) { if (q->sess.verbose>2) clog << "selected function " << q->dw.function_name << "\n"; Dwarf_Addr entrypc; if (q->dw.function_entrypc (&entrypc)) { func_info func; q->dw.function_die (&func.die); func.name = q->dw.function_name; q->dw.function_file (&func.decl_file); q->dw.function_line (&func.decl_line); q->filtered_functions[entrypc] = func; } else throw semantic_error("no entrypc found for function '" + q->dw.function_name + "'"); } } return DWARF_CB_OK; } catch (const semantic_error& e) { q->sess.print_error (e); return DWARF_CB_ABORT; } } static int query_cu (Dwarf_Die * cudie, void * arg) { dwarf_query * q = static_cast(arg); try { q->dw.focus_on_cu (cudie); if (false && q->sess.verbose>2) clog << "focused on CU '" << q->dw.cu_name << "', in module '" << q->dw.module_name << "'\n"; if (q->has_statement_str || q->has_inline_str || q->has_inline_num || q->has_function_str || q->has_function_num) { q->filtered_srcfiles.clear(); q->filtered_functions.clear(); q->filtered_inlines.clear(); // In this path, we find "abstract functions", record // information about them, and then (depending on lineno // matching) possibly emit one or more of the function's // associated addresses. Unfortunately the control of this // cannot easily be turned inside out. if ((q->has_statement_str || q->has_function_str || q->has_inline_str) && (q->spec_type != function_alone)) { // If we have a pattern string with a filename, we need // to elaborate the srcfile mask in question first. q->dw.collect_srcfiles_matching (q->file, q->filtered_srcfiles); // If we have a file pattern and *no* srcfile matches, there's // no need to look further into this CU, so skip. if (q->filtered_srcfiles.empty()) return DWARF_CB_OK; } // Pick up [entrypc, name, DIE] tuples for all the functions // matching the query, and fill in the prologue endings of them // all in a single pass. q->dw.iterate_over_functions (query_dwarf_func, q); if (! q->filtered_functions.empty()) q->dw.resolve_prologue_endings (q->filtered_functions); if ((q->has_statement_str || q->has_function_str || q->has_inline_str) && (q->spec_type == function_file_and_line)) { // If we have a pattern string with target *line*, we // have to look at lines in all the matched srcfiles. for (set::const_iterator i = q->filtered_srcfiles.begin(); i != q->filtered_srcfiles.end(); ++i) q->dw.iterate_over_srcfile_lines (*i, q->line, q->has_statement_str, query_srcfile_line, q); } else { // Otherwise, simply probe all resolved functions (if // we're scanning functions) if (q->has_statement_str || q->has_function_str || q->has_function_num) for (map::iterator i = q->filtered_functions.begin(); i != q->filtered_functions.end(); ++i) query_func_info (i->first, i->second, q); // Or all inline instances (if we're scanning inlines) if (q->has_statement_str || q->has_inline_str || q->has_inline_num) for (map::iterator i = q->filtered_inlines.begin(); i != q->filtered_inlines.end(); ++i) query_inline_instance_info (i->first, i->second, q); } } else { // Otherwise we have a statement number, and we can just // query it directly within this module. assert (q->has_statement_num); Dwarf_Addr query_addr = q->statement_num_val; if (q->has_module) query_addr = q->dw.module_address_to_global(query_addr); query_statement ("", "", -1, NULL, query_addr, q); } return DWARF_CB_OK; } catch (const semantic_error& e) { q->sess.print_error (e); return DWARF_CB_ABORT; } } static int query_kernel_exists (Dwfl_Module *mod __attribute__ ((unused)), void **userdata __attribute__ ((unused)), const char *name, Dwarf_Addr base __attribute__ ((unused)), void *arg) { int *flagp = (int *) arg; if (TOK_KERNEL == name) *flagp = 1; return DWARF_CB_OK; } static int query_module (Dwfl_Module *mod __attribute__ ((unused)), void **userdata __attribute__ ((unused)), const char *name, Dwarf_Addr base, void *arg __attribute__ ((unused))) { dwarf_query * q = static_cast(arg); try { q->dw.focus_on_module(mod); // If we have enough information in the pattern to skip a module and // the module does not match that information, return early. if (q->has_kernel && !q->dw.module_name_matches(TOK_KERNEL)) return DWARF_CB_OK; if (q->has_module && !q->dw.module_name_matches(q->module_val)) return DWARF_CB_OK; if (q->sess.verbose>2) clog << "focused on module '" << q->dw.module_name << "' = [" << hex << q->dw.module_start << "-" << q->dw.module_end << ", bias " << q->dw.module_bias << "]" << dec << "\n"; if (q->has_inline_num || q->has_function_num || q->has_statement_num) { // If we have module("foo").function(0xbeef) or // module("foo").statement(0xbeef), the address is relative // to the start of the module, so we seek the function // number plus the module's bias. Dwarf_Addr addr; if (q->has_function_num) addr = q->function_num_val; else if (q->has_inline_num) addr = q->inline_num_val; else addr = q->statement_num_val; // NB: We should not have kernel.* here; global addresses // should have bypassed query_module in dwarf_builder::build // and gone directly to query_cu. assert (!q->has_kernel); assert (q->has_module); q->dw.query_cu_containing_module_address(addr, q); } else { // Otherwise if we have a function("foo") or statement("foo") // specifier, we have to scan over all the CUs looking for // the function(s) in question q->dw.limit_search_to_function_pattern(q->function); assert(q->has_function_str || q->has_inline_str || q->has_statement_str); q->dw.iterate_over_cus(&query_cu, q); // If we just processed the module "kernel", and the user asked for // the kernel pattern, there's no need to iterate over any further // modules if (q->has_kernel && q->dw.module_name_matches(TOK_KERNEL)) return DWARF_CB_ABORT; } return DWARF_CB_OK; } catch (const semantic_error& e) { q->sess.print_error (e); return DWARF_CB_ABORT; } } struct var_expanding_copy_visitor: public deep_copy_visitor { static unsigned tick; stack target_symbol_setter_functioncalls; var_expanding_copy_visitor() {} void visit_assignment (assignment* e); }; struct dwarf_var_expanding_copy_visitor: public var_expanding_copy_visitor { dwarf_query & q; Dwarf_Die *scope_die; Dwarf_Addr addr; dwarf_var_expanding_copy_visitor(dwarf_query & q, Dwarf_Die *sd, Dwarf_Addr a): q(q), scope_die(sd), addr(a) {} void visit_target_symbol (target_symbol* e); }; unsigned var_expanding_copy_visitor::tick = 0; void var_expanding_copy_visitor::visit_assignment (assignment* e) { // Our job would normally be to require() the left and right sides // into a new assignment. What we're doing is slightly trickier: // we're pushing a functioncall** onto a stack, and if our left // child sets the functioncall* for that value, we're going to // assume our left child was a target symbol -- transformed into a // set_target_foo(value) call, and it wants to take our right child // as the argument "value". // // This is why some people claim that languages with // constructor-decomposing case expressions have a leg up on // visitors. functioncall *fcall = NULL; expression *new_left, *new_right; target_symbol_setter_functioncalls.push (&fcall); require (this, &new_left, e->left); target_symbol_setter_functioncalls.pop (); require (this, &new_right, e->right); if (fcall != NULL) { // Our left child is informing us that it was a target variable // and it has been replaced with a set_target_foo() function // call; we are going to provide that function call -- with the // right child spliced in as sole argument -- in place of // ourselves, in the deep copy we're in the middle of making. // FIXME: for the time being, we only support plan $foo = bar, // not += or any other op= variant. This is fixable, but a bit // ugly. if (e->op != "=") throw semantic_error ("Operator-assign expressions on target " "variables not implemented", e->tok); assert (new_left == fcall); fcall->args.push_back (new_right); provide (this, fcall); } else { assignment* n = new assignment; n->op = e->op; n->tok = e->tok; n->left = new_left; n->right = new_right; provide (this, n); } } void dwarf_var_expanding_copy_visitor::visit_target_symbol (target_symbol *e) { assert(e->base_name.size() > 0 && e->base_name[0] == '$'); // Synthesize a function. functiondecl *fdecl = new functiondecl; fdecl->tok = e->tok; embeddedcode *ec = new embeddedcode; ec->tok = e->tok; bool lvalue = is_active_lvalue(e); if (lvalue && !q.sess.guru_mode) throw semantic_error("write to target variable not permitted", e->tok); string fname = (string(lvalue ? "_dwarf_tvar_set" : "_dwarf_tvar_get") + "_" + e->base_name.substr(1) + "_" + lex_cast(tick++)); if (q.has_return && e->base_name != "$return") throw semantic_error ("target variables not available to .return probes"); try { if (q.has_return && e->base_name == "$return") { ec->code = q.dw.literal_stmt_for_return (scope_die, addr, e->components, lvalue, fdecl->type); } else { ec->code = q.dw.literal_stmt_for_local (scope_die, addr, e->base_name.substr(1), e->components, lvalue, fdecl->type); } if (! lvalue) ec->code += "/* pure */"; } catch (const semantic_error& er) { // We suppress this error message, and pass the unresolved // target_symbol to the next pass. We hope that this value ends // up not being referenced after all, so it can be optimized out // quietly. provide (this, e); delete fdecl; delete ec; return; } fdecl->name = fname; fdecl->body = ec; if (lvalue) { // Modify the fdecl so it carries a single pe_long formal // argument called "value". // FIXME: For the time being we only support setting target // variables which have base types; these are 'pe_long' in // stap's type vocabulary. Strings and pointers might be // reasonable, some day, but not today. vardecl *v = new vardecl; v->type = pe_long; v->name = "value"; v->tok = e->tok; fdecl->formal_args.push_back(v); } q.sess.functions.push_back(fdecl); // Synthesize a functioncall. functioncall* n = new functioncall; n->tok = e->tok; n->function = fname; n->referent = 0; // NB: must not resolve yet, to ensure inclusion in session if (lvalue) { // Provide the functioncall to our parent, so that it can be // used to substitute for the assignment node immediately above // us. assert(!target_symbol_setter_functioncalls.empty()); *(target_symbol_setter_functioncalls.top()) = n; } provide (this, n); } void dwarf_derived_probe::register_probe(systemtap_session& s) { s.probes.register_probe(this); } void dwarf_derived_probe::add_probe_point(string const & funcname, char const * filename, int line, Dwarf_Addr addr, dwarf_query & q) { string module_name(q.dw.module_name); // "Adding a probe point" means two things: // // // 1. Adding an addr to the probe-point vector probe_points.push_back(addr); // 2. Extending the "locations" vector vector comps; comps.push_back (module_name == TOK_KERNEL ? new probe_point::component(TOK_KERNEL) : new probe_point::component(TOK_MODULE, new literal_string(module_name))); string fn_or_stmt; if (q.has_function_str || q.has_function_num) fn_or_stmt = "function"; else if (q.has_inline_str || q.has_inline_num) fn_or_stmt = "inline"; else fn_or_stmt = "statement"; if (q.has_function_str || q.has_inline_str || q.has_statement_str) { string retro_name = funcname; if (filename && !string (filename).empty()) retro_name += ("@" + string (filename)); if (line != -1) retro_name += (":" + lex_cast (line)); comps.push_back (new probe_point::component (fn_or_stmt, new literal_string (retro_name))); } else if (q.has_function_num || q.has_inline_num || q.has_statement_num) { Dwarf_Addr retro_addr; if (q.has_function_num) retro_addr = q.function_num_val; else if (q.has_inline_num) retro_addr = q.inline_num_val; else retro_addr = q.statement_num_val; comps.push_back (new probe_point::component (fn_or_stmt, new literal_number(retro_addr))); // XXX: should be hex if possible } if (has_return) comps.push_back (new probe_point::component(TOK_RETURN)); assert(q.base_probe->locations.size() > 0); locations.push_back(new probe_point(comps, q.base_probe->locations[0]->tok)); } dwarf_derived_probe::dwarf_derived_probe (Dwarf_Die *scope_die, Dwarf_Addr addr, dwarf_query & q) : derived_probe (q.base_probe, 0 /* location-less */), has_return (q.has_return) { string module_name(q.dw.module_name); // Lock the kernel module in memory. if (module_name != TOK_KERNEL) { // XXX: There is a race window here, between the time that libdw // opened up this same file for its relocation duties, and now. int fd = q.sess.module_fds[module_name]; if (fd == 0) { string sys_module = "/sys/module/" + module_name + "/sections/.text"; fd = open (sys_module.c_str(), O_RDONLY); if (fd < 0) throw semantic_error ("error opening module refcount-bumping file."); q.sess.module_fds[module_name] = fd; } } // Now make a local-variable-expanded copy of the probe body dwarf_var_expanding_copy_visitor v (q, scope_die, addr); require (&v, &(this->body), q.base_probe->body); this->tok = q.base_probe->tok; } void dwarf_derived_probe::register_relative_variants(match_node * root, dwarf_builder * dw) { // Here we match 2 forms: // // . // .relative(NN) root->bind(dw); root->bind_num(TOK_RELATIVE)->bind(dw); } void dwarf_derived_probe::register_statement_variants(match_node * root, dwarf_builder * dw) { // Here we match 3 forms: // // . // .return // .label("foo") register_relative_variants(root, dw); register_relative_variants(root->bind_str(TOK_LABEL), dw); } void dwarf_derived_probe::register_inline_variants(match_node * root, dwarf_builder * dw) { // Here we match 4 forms: // // . // .callees // .callees(N) // // The last form permits N-level callee resolving without any // recursive .callees.callees.callees... pattern-matching on our part. root->bind(dw); root->bind(TOK_CALLEES)->bind(dw); root->bind_num(TOK_CALLEES)->bind(dw); } void dwarf_derived_probe::register_function_variants(match_node * root, dwarf_builder * dw) { // Here we match 4 forms: // // . // .return // .callees // .callees(N) // // The last form permits N-level callee resolving without any // recursive .callees.callees.callees... pattern-matching on our part. root->bind(dw); root->bind(TOK_RETURN)->bind(dw); root->bind(TOK_CALLEES)->bind(dw); root->bind_num(TOK_CALLEES)->bind(dw); } void dwarf_derived_probe::register_function_and_statement_variants(match_node * root, dwarf_builder * dw) { // Here we match 4 forms: // // .function("foo") // .function(0xdeadbeef) // .inline("foo") // .inline(0xdeadbeef) // .statement("foo") // .statement(0xdeadbeef) register_function_variants(root->bind_str(TOK_FUNCTION), dw); register_function_variants(root->bind_num(TOK_FUNCTION), dw); register_inline_variants(root->bind_str(TOK_INLINE), dw); register_inline_variants(root->bind_num(TOK_INLINE), dw); register_statement_variants(root->bind_str(TOK_STATEMENT), dw); register_statement_variants(root->bind_num(TOK_STATEMENT), dw); } void dwarf_derived_probe::register_patterns(match_node * root) { dwarf_builder *dw = new dwarf_builder(); // Here we match 3 forms: // // .kernel // .module("foo") // .process("foo") register_function_and_statement_variants(root->bind(TOK_KERNEL), dw); register_function_and_statement_variants(root->bind_str(TOK_MODULE), dw); // register_function_and_statement_variants(root->bind_str(TOK_PROCESS), dw); } void dwarf_derived_probe::emit_registrations_start (translator_output* o, unsigned index) { string func_name = "enter_" + name; string index_var = "i_" + lex_cast(index); o->newline(); o->newline() << "probe_point = dwarf_kprobe_" << name << "_location_names[0];"; o->newline() << "for (" << index_var << " = 0; " << index_var << " < " << probe_points.size() << "; " << index_var << "++) {"; o->indent(1); string probe_name = string("dwarf_kprobe_") + name + string("[") + index_var + string("]"); #if 0 // XXX: triggers false negatives on RHEL4U2 kernel // emit address verification code o->newline() << "void *addr = " << probe_name; if (has_return) o->line() << ".kp.addr;"; else o->line() << ".addr;"; o->newline() << "rc = ! virt_addr_valid (addr);"; #endif if (has_return) { o->newline() << "#ifdef ARCH_SUPPORTS_KRETPROBES"; o->newline() << probe_name << ".handler = &" << func_name << ";"; o->newline() << probe_name << ".maxactive = max(10, 4 * NR_CPUS);"; // XXX: pending PR 1289 // o->newline() << probe_name << ".kp_fault_handler = &stap_kprobe_fault_handler;"; o->newline() << "rc = rc || register_kretprobe (&(" << probe_name << "));"; o->newline() << "#else"; o->newline() << "rc = 1;"; o->newline() << "#endif"; } else { o->newline() << probe_name << ".pre_handler = &" << func_name << ";"; // XXX: pending PR 1289 // o->newline() << probe_name << ".kp_fault_handler = &stap_kprobe_fault_handler;"; o->newline() << "rc = rc || register_kprobe (&(" << probe_name << "));"; } o->newline() << "if (unlikely (rc)) {"; o->newline(1) << "probe_point = " << string("dwarf_kprobe_") + name + string("_location_names[") << index_var << "];"; o->newline() << "break;"; o->newline(-1) << "}"; o->newline(-1) << "}"; // if one failed, must goto code (output by emit_registrations_end) // that will roll back completed registations for this probe o->newline() << "if (unlikely (rc))"; o->newline(1) << "goto unwind_dwarf_" << index << ";"; o->indent(-1); } void dwarf_derived_probe::emit_registrations_end (translator_output* o, unsigned index) { string index_var = "i_" + lex_cast(index); string probe_name = string("dwarf_kprobe_") + name + string("[") + index_var + string("]"); // if one failed, must roll back completed registations for this probe o->newline(-1) << "unwind_dwarf_" << index << ":"; o->newline(1) << "while (--" << index_var << " >= 0)"; o->indent(1); if (has_return) { o->newline() << "#ifdef ARCH_SUPPORTS_KRETPROBES"; o->newline() << "unregister_kretprobe (&(" << probe_name << "));"; o->newline() << "#else"; o->newline() << ";"; o->newline() << "#endif"; } else o->newline() << "unregister_kprobe (&(" << probe_name << "));"; o->indent(-1); } void dwarf_derived_probe::emit_deregistrations (translator_output* o) { string probe_name = string("dwarf_kprobe_") + name + string("[i]"); o->newline() << "for (i = 0; i < " << probe_points.size() << "; i++) {"; o->indent(1); if (has_return) { o->newline() << "#ifdef ARCH_SUPPORTS_KRETPROBES"; o->newline() << "atomic_add (" << probe_name << ".kp.nmissed," << "& skipped_count);"; o->newline() << "atomic_add (" << probe_name << ".nmissed," << "& skipped_count);"; o->newline() << "unregister_kretprobe (&(" << probe_name << "));"; o->newline() << "#else"; o->newline() << ";"; o->newline() << "#endif"; } else { o->newline() << "atomic_add (" << probe_name << ".nmissed," << "& skipped_count);"; o->newline() << "unregister_kprobe (&(" << probe_name << "));"; } o->newline(-1) << "}"; } void dwarf_derived_probe::emit_probe_entries (translator_output* o) { static unsigned already_emitted_fault_handler = 0; if (! already_emitted_fault_handler) { o->newline() << "int stap_kprobe_fault_handler (struct kprobe* kp, " << "struct pt_regs* regs, int trapnr) {"; o->newline(1) << "struct context* c = per_cpu_ptr (contexts, smp_processor_id());"; o->newline() << "_stp_warn (\"systemtap probe fault\\n\");"; o->newline() << "_stp_warn (\"cpu %d, probe %s, near %s\\n\", "; o->newline(1) << "smp_processor_id(), "; o->newline() << "c->probe_point ? c->probe_point : \"unknown\", "; o->newline() << "c->last_stmt ? c->last_stmt : \"unknown\");"; o->newline() << "c->last_error = \"probe faulted\";"; o->newline(-1) << "atomic_set (& session_state, STAP_SESSION_ERROR);"; o->newline() << "return 0;"; // defer to kernel fault handler // NB: We might prefer to use "return 1" instead, to consider // the fault "handled". But we may get into an infinite loop // of traps if the faulting instruction is simply restarted. o->newline(-1) << "}"; already_emitted_fault_handler ++; } // Emit arrays of probes and location names. string probe_array = string("dwarf_kprobe_") + name; string string_array = probe_array + "_location_names"; assert(locations.size() == probe_points.size()); if (has_return) { o->newline() << "#ifdef ARCH_SUPPORTS_KRETPROBES"; o->newline() << "static struct kretprobe " << probe_array << "[" << probe_points.size() << "]" << "= {"; } else o->newline() << "static struct kprobe " << probe_array << "[" << probe_points.size() << "]" << "= {"; o->indent(1); for (vector::const_iterator i = probe_points.begin(); i != probe_points.end(); ++i) { if (i != probe_points.begin()) o->line() << ","; if (has_return) o->newline() << "{.kp.addr= (void *) 0x" << hex << *i << dec << "}"; else o->newline() << "{.addr= (void *) 0x" << hex << *i << dec << "}"; } o->newline(-1) << "};"; if (has_return) o->newline() << "#endif /* ARCH_SUPPORTS_KRETPROBES */"; o->newline(); // This is somewhat gross, but it should work: we allocate a // *parallel* array of strings containing the location of each // probe. You can calculate which kprobe or kretprobe you're in by // taking the difference of the struct kprobe pointer and the base // of the kprobe array and dividing by the size of the struct kprobe // (or kretprobe), then you can use this index into the string table // here to work out the *name* of the probe you're in. // // Sorry. assert(probe_points.size() == locations.size()); o->newline() << "static char const * " << string_array << "[" << locations.size() << "] = {"; o->indent(1); for (vector::const_iterator i = locations.begin(); i != locations.end(); ++i) { if (i != locations.begin()) o->line() << ","; o->newline() << lex_cast_qstring(*(*i)); } o->newline(-1) << "};"; o->newline(); o->newline() << "#ifdef STP_TIMING"; o->newline() << "static __cacheline_aligned Stat " << "time_" << basest()->name << ";"; o->newline() << "#endif"; // Construct a single entry function, and a struct kprobe pointing into // the entry function. The entry function will call the probe function. o->newline(); if (has_return) o->newline() << "#ifdef ARCH_SUPPORTS_KRETPROBES"; o->newline() << "static int "; o->newline() << "enter_" << name << " ("; if (has_return) o->line() << "struct kretprobe_instance *probe_instance"; else o->line() << "struct kprobe *probe_instance"; o->line() << ", struct pt_regs *regs) {"; o->indent(1); // Calculate the name of the current probe by finding its index in the probe array. if (has_return) o->newline() << "const char* probe_point = " << string_array << "[ (probe_instance->rp - &(" << probe_array << "[0]))];"; else o->newline() << "const char* probe_point = " << string_array << "[ (probe_instance - &(" << probe_array << "[0]))];"; emit_probe_prologue (o, "STAP_SESSION_RUNNING"); o->newline() << "c->regs = regs;"; // NB: locals are initialized by probe function itself o->newline() << name << " (c);"; emit_probe_epilogue (o); o->newline() << "return 0;"; o->newline(-1) << "}\n"; if (has_return) o->newline() << "#endif /* ARCH_SUPPORTS_KRETPROBES */"; o->newline(); } void dwarf_derived_probe_group::emit_probes (translator_output* op, unparser* up) { for (unsigned i=0; i < probes.size(); i++) { op->newline (); up->emit_probe (probes[i]); } } void dwarf_derived_probe_group::emit_module_init (translator_output* o) { if (probes.size () == 0) return; // Output the dwarf probes create function o->newline() << "static int register_dwarf_probes (void) {"; o->indent(1); o->newline() << "int rc = 0;"; o->newline() << "const char *probe_point;"; for (unsigned i=0; i < probes.size (); i++) o->newline() << "int i_" << i << ";"; for (unsigned i=0; i < probes.size (); i++) probes[i]->emit_registrations_start (o, i); o->newline() << "goto out;"; o->newline(); for (int i=probes.size() - 1; i >= 0; i--) probes[i]->emit_registrations_end (o, i); o->newline(); o->newline() << "if (unlikely (rc)) {"; // In case it's just a lower-layer (kprobes) error that set rc but // not session_state, do that here to prevent any other BEGIN probe // from attempting to run. o->newline(1) << "atomic_set (&session_state, STAP_SESSION_ERROR);"; o->newline() << "_stp_error (\"dwarf probe %s registration failed, rc=%d\\n\", probe_point, rc);"; o->newline(-1) << "}\n"; o->newline(-1) << "out:"; o->newline(1) << "return rc;"; o->newline(-1) << "}\n"; // Output the dwarf probes destroy function o->newline() << "static void unregister_dwarf_probes (void) {"; o->newline(1) << "int i;"; for (unsigned i=0; i < probes.size (); i++) { probes[i]->emit_deregistrations (o); emit_probe_timing(probes[i], o); } o->newline(-1) << "}\n"; } void dwarf_builder::build(systemtap_session & sess, probe * base, probe_point * location, std::map const & parameters, vector & finished_results) { dwflpp *dw = NULL; string dummy; bool has_kernel = dwarf_query::has_null_param(parameters, TOK_KERNEL); bool has_module = dwarf_query::get_string_param(parameters, TOK_MODULE, dummy); if (has_kernel || has_module) { if (!kern_dw) { kern_dw = new dwflpp(sess); assert(kern_dw); kern_dw->setup(true); } dw = kern_dw; } else { if (!user_dw) { user_dw = new dwflpp(sess); assert(user_dw); user_dw->setup(false); } dw = user_dw; } assert(dw); dwarf_query q(sess, base, location, *dw, parameters, finished_results); if (q.has_kernel && (q.has_function_num || q.has_inline_num || q.has_statement_num)) { // If we have kernel.function(0xbeef), or // kernel.statement(0xbeef) the address is global (relative to // the kernel) and we can seek directly to the module and cudie // in question. Dwarf_Addr a; if (q.has_function_num) a = q.function_num_val; else if (q.has_inline_num) a = q.inline_num_val; else a = q.statement_num_val; dw->focus_on_module_containing_global_address(a); dw->query_cu_containing_global_address(a, &q); } else { // Otherwise we have module("*bar*"), kernel.statement("foo"), or // kernel.function("foo"); in these cases we need to scan all // the modules. assert((q.has_kernel && q.has_function_str) || (q.has_kernel && q.has_inline_str) || (q.has_kernel && q.has_statement_str) || (q.has_module)); if (q.has_kernel) { int flag = 0; dw->iterate_over_modules(&query_kernel_exists, &flag); if (! flag) throw semantic_error ("cannot find kernel debuginfo"); } dw->iterate_over_modules(&query_module, &q); } } // ------------------------------------------------------------------------ // timer derived probes // ------------------------------------------------------------------------ struct timer_derived_probe: public derived_probe { int64_t interval, randomize; bool time_is_msecs; timer_derived_probe (probe* p, probe_point* l, int64_t i, int64_t r, bool ms=false); virtual void register_probe (systemtap_session& s); virtual void emit_registrations_start (translator_output* o, unsigned index); virtual void emit_registrations_end (translator_output * o, unsigned index); virtual void emit_deregistrations (translator_output * o); virtual void emit_probe_entries (translator_output * o); }; timer_derived_probe::timer_derived_probe (probe* p, probe_point* l, int64_t i, int64_t r, bool ms): derived_probe (p, l), interval (i), randomize (r), time_is_msecs(ms) { if (interval <= 0 || interval > 1000000) // make i and r fit into plain ints throw semantic_error ("invalid interval for jiffies timer"); // randomize = 0 means no randomization if (randomize < 0 || randomize > interval) throw semantic_error ("invalid randomize for jiffies timer"); if (locations.size() != 1) throw semantic_error ("expect single probe point"); // so we don't have to loop over them in the other functions } void timer_derived_probe::register_probe(systemtap_session& s) { s.probes.register_probe(this); } void timer_derived_probe::emit_registrations_start (translator_output* o, unsigned index) { o->newline(); o->newline() << "probe_point = \"" << *locations[0] << "\";"; o->newline() << "init_timer (& timer_" << name << ");"; o->newline() << "timer_" << name << ".expires = jiffies + "; if (time_is_msecs) o->line() << "msecs_to_jiffies("; o->line() << interval; if (randomize) o->line() << " + _stp_random_pm(" << randomize << ")"; if (time_is_msecs) o->line() << ")"; o->line() << ";"; o->newline() << "timer_" << name << ".function = & enter_" << name << ";"; o->newline() << "add_timer (& timer_" << name << ");"; // if one failed, must goto code (output by emit_registrations_end) // that will roll back completed registations for this probe o->newline() << "if (unlikely (rc))"; if (index == 0) o->newline(1) << "goto timer_error;"; else o->newline(1) << "goto unwind_timer_" << index - 1 << ";"; o->indent(-1); } void timer_derived_probe::emit_registrations_end (translator_output* o, unsigned index) { // if one failed, must roll back completed registations for this probe o->newline(-1) << "unwind_timer_" << index << ":"; o->newline(1) << "del_timer_sync (& timer_" << name << ");"; } void timer_derived_probe::emit_deregistrations (translator_output* o) { o->newline() << "del_timer_sync (& timer_" << name << ");"; } void timer_derived_probe::emit_probe_entries (translator_output* o) { o->newline() << "static struct timer_list timer_" << name << ";"; o->newline() << "static void enter_" << name << " (unsigned long val) {"; o->indent(1); o->newline() << "const char* probe_point = " << lex_cast_qstring(*locations[0]) << ";"; emit_probe_prologue (o, "STAP_SESSION_RUNNING"); o->newline() << "(void) val;"; o->newline() << "mod_timer (& timer_" << name << ", jiffies + "; if (time_is_msecs) o->line() << "msecs_to_jiffies("; o->line() << interval; if (randomize) o->line() << " + _stp_random_pm(" << randomize << ")"; if (time_is_msecs) o->line() << ")"; o->line() << ");"; // NB: locals are initialized by probe function itself o->newline() << name << " (c);"; emit_probe_epilogue (o); o->newline(-1) << "}\n"; } struct timer_derived_probe_group: public derived_probe_group { private: vector probes; public: virtual void register_probe(timer_derived_probe* p) { probes.push_back (p); } virtual size_t size () { return probes.size (); } virtual void emit_probes (translator_output* op, unparser* up); virtual void emit_module_init (translator_output* o); }; void timer_derived_probe_group::emit_probes (translator_output* op, unparser* up) { for (unsigned i=0; i < probes.size(); i++) { op->newline (); up->emit_probe (probes[i]); } } void timer_derived_probe_group::emit_module_init (translator_output* o) { if (probes.size () == 0) return; // Output the timer probes create function o->newline() << "static int register_timer_probes (void) {"; o->indent(1); o->newline() << "int rc = 0;"; o->newline() << "const char *probe_point;"; for (unsigned i=0; i < probes.size (); i++) o->newline() << "int i_" << i << ";"; for (unsigned i=0; i < probes.size (); i++) probes[i]->emit_registrations_start (o, i); o->newline() << "goto out;"; o->newline(); for (int i=probes.size() - 2; i >= 0; i--) probes[i]->emit_registrations_end (o, i); o->newline(); o->newline(-1) << "timer_error:"; o->newline(1) << "if (unlikely (rc)) {"; // In case it's just a lower-layer (kprobes) error that set rc but // not session_state, do that here to prevent any other BEGIN probe // from attempting to run. o->newline(1) << "atomic_set (&session_state, STAP_SESSION_ERROR);"; o->newline() << "_stp_error (\"timer probe %s registration failed, rc=%d\\n\", probe_point, rc);"; o->newline(-1) << "}\n"; o->newline(-1) << "out:"; o->newline(1) << "return rc;"; o->newline(-1) << "}\n"; // Output the timer probes destroy function o->newline() << "static void unregister_timer_probes (void) {"; o->indent(1); for (unsigned i=0; i < probes.size (); i++) { probes[i]->emit_deregistrations (o); emit_probe_timing(probes[i], o); } o->newline(-1) << "}\n"; } // ------------------------------------------------------------------------ // profile derived probes // ------------------------------------------------------------------------ // On kernels < 2.6.10, this uses the register_profile_notifier API to // generate the timed events for profiling; on kernels >= 2.6.10 this // uses the register_timer_hook API. The latter doesn't currently allow // simultaneous users, so insertion will fail if the profiler is busy. // (Conflicting users may include OProfile, other SystemTap probes, etc.) struct profile_derived_probe: public derived_probe { // kernels < 2.6.10: use register_profile_notifier API // kernels >= 2.6.10: use register_timer_hook API bool using_rpn; profile_derived_probe (systemtap_session &s, probe* p, probe_point* l); void register_probe (systemtap_session& s); virtual void emit_registrations_start (translator_output* o, unsigned index); virtual void emit_registrations_end (translator_output * o, unsigned index); virtual void emit_deregistrations (translator_output * o); virtual void emit_probe_entries (translator_output * o); }; profile_derived_probe::profile_derived_probe (systemtap_session &s, probe* p, probe_point* l): derived_probe(p, l) { using_rpn = (strverscmp(s.kernel_base_release.c_str(), "2.6.10") < 0); } void profile_derived_probe::register_probe(systemtap_session& s) { s.probes.register_probe(this); } void profile_derived_probe::emit_registrations_start (translator_output* o, unsigned index) { o->newline(); o->newline() << "probe_point = \"" << *locations[0] << "\";"; if (using_rpn) o->newline() << "rc = register_profile_notifier(& profile_" << name << ");"; else o->newline() << "rc = register_timer_hook(enter_" << name << ");"; // if one failed, must goto code (output by emit_registrations_end) // that will roll back completed registations for other probes of // this type. o->newline() << "if (unlikely (rc))"; if (index == 0) o->newline(1) << "goto profile_error;"; else o->newline(1) << "goto unwind_profile_" << index - 1 << ";"; o->indent(-1); } void profile_derived_probe::emit_registrations_end (translator_output* o, unsigned index) { // if one failed, must roll back completed registations for this // type of probe o->newline(-1) << "unwind_profile_" << index << ":"; o->indent(1); emit_deregistrations (o); } void profile_derived_probe::emit_deregistrations (translator_output* o) { if (using_rpn) o->newline() << "unregister_profile_notifier(& profile_" << name << ");"; else o->newline() << "unregister_timer_hook(enter_" << name << ");"; } void profile_derived_probe::emit_probe_entries (translator_output* o) { if (using_rpn) { o->newline() << "static int enter_" << name << " (struct notifier_block *self, unsigned long val, void *data);"; o->newline() << "static struct notifier_block profile_" << name << " = {"; o->newline(1) << ".notifier_call = enter_" << name << ","; o->newline(-1) << "};"; o->newline() << "int enter_" << name << " (struct notifier_block *self, unsigned long val, void *data) {"; o->newline(1) << "struct pt_regs *regs = (struct pt_regs *)data;"; o->indent(-1); } else { o->newline() << "static int enter_" << name << " (struct pt_regs *regs);"; o->newline() << "int enter_" << name << " (struct pt_regs *regs) {"; } o->indent(1); o->newline() << "const char* probe_point = " << lex_cast_qstring(*locations[0]) << ";"; emit_probe_prologue (o, "STAP_SESSION_RUNNING"); o->newline() << "c->regs = regs;"; if (using_rpn) { o->newline() << "(void) self;"; o->newline() << "(void) val;"; } o->newline() << name << " (c);"; emit_probe_epilogue (o); o->newline() << "return 0;"; o->newline(-1) << "}\n"; } struct profile_derived_probe_group: public derived_probe_group { private: vector probes; public: virtual void register_probe(profile_derived_probe* p) { probes.push_back (p); } virtual size_t size () { return probes.size (); } virtual void emit_probes (translator_output* op, unparser* up); virtual void emit_module_init (translator_output* o); }; void profile_derived_probe_group::emit_probes (translator_output* op, unparser* up) { for (unsigned i=0; i < probes.size(); i++) { op->newline (); up->emit_probe (probes[i]); } } void profile_derived_probe_group::emit_module_init (translator_output* o) { if (probes.size () == 0) return; // Output the profile probes create function o->newline() << "static int register_profile_probes (void) {"; o->indent(1); o->newline() << "int rc = 0;"; o->newline() << "const char *probe_point;"; for (unsigned i=0; i < probes.size (); i++) probes[i]->emit_registrations_start (o, i); o->newline() << "goto out;"; o->newline(); for (int i=probes.size() - 2; i >= 0; i--) probes[i]->emit_registrations_end (o, i); o->newline(); o->newline(-1) << "profile_error:"; o->newline(1) << "if (unlikely (rc)) {"; // In case it's just a lower-layer (kprobes) error that set rc but // not session_state, do that here to prevent any other BEGIN probe // from attempting to run. o->newline(1) << "atomic_set (&session_state, STAP_SESSION_ERROR);"; o->newline() << "_stp_error (\"profile probe %s registration failed, rc=%d\\n\", probe_point, rc);"; o->newline(-1) << "}\n"; o->newline(-1) << "out:"; o->newline(1) << "return rc;"; o->newline(-1) << "}\n"; // Output the profile probes destroy function o->newline() << "static void unregister_profile_probes (void) {"; o->indent(1); for (unsigned i=0; i < probes.size (); i++) { probes[i]->emit_deregistrations (o); emit_probe_timing(probes[i], o); } o->newline(-1) << "}\n"; } struct profile_builder: public derived_probe_builder { profile_builder() {} virtual void build(systemtap_session & sess, probe * base, probe_point * location, std::map const & parameters, vector & finished_results) { finished_results.push_back(new profile_derived_probe(sess, base, location)); } }; // ------------------------------------------------------------------------ // statically inserted macro-based derived probes // ------------------------------------------------------------------------ struct mark_derived_probe: public derived_probe { mark_derived_probe (systemtap_session &s, const string& probe_name, const string& probe_sig, uintptr_t address, const string& module, probe* base_probe); systemtap_session& sess; string probe_name, probe_sig; uintptr_t address; string module; string probe_sig_expanded; void register_probe (systemtap_session& s); void emit_registrations_start (translator_output * o, unsigned index); void emit_registrations_end (translator_output * o, unsigned index); void emit_deregistrations (translator_output * o); void emit_probe_entries (translator_output * o); void emit_probe_context_vars (translator_output* o); }; struct mark_var_expanding_copy_visitor: public var_expanding_copy_visitor { mark_var_expanding_copy_visitor(systemtap_session& s, const string& ms, const string& pn): sess (s), mark_signature (ms), probe_name (pn) {} systemtap_session& sess; string mark_signature; string probe_name; void visit_target_symbol (target_symbol* e); }; void mark_var_expanding_copy_visitor::visit_target_symbol (target_symbol* e) { assert(e->base_name.size() > 0 && e->base_name[0] == '$'); if (e->base_name.substr(0,4) != "$arg") throw semantic_error ("invalid target symbol for marker, $argN expected", e->tok); string argnum_s = e->base_name.substr(4,e->base_name.length()-4); int argnum = atoi (argnum_s.c_str()); if (argnum < 1 || argnum > (int) mark_signature.size()) throw semantic_error ("invalid marker argument number", e->tok); char argtype = mark_signature[argnum-1]; // Synthesize a function. functiondecl *fdecl = new functiondecl; fdecl->tok = e->tok; embeddedcode *ec = new embeddedcode; ec->tok = e->tok; if (is_active_lvalue (e)) throw semantic_error("write to marker parameter not permitted", e->tok); string fname = string("_mark_tvar_get") + "_" + e->base_name.substr(1) + "_" + lex_cast(tick++); ec->code = string("THIS->__retvalue = CONTEXT->locals[0].") + probe_name + string(".__mark_arg") + lex_cast(argnum) + string (";"); ec->code += "/* pure */"; fdecl->name = fname; fdecl->body = ec; fdecl->type = (argtype == 'N' ? pe_long : argtype == 'S' ? pe_string : pe_unknown); // cannot happen sess.functions.push_back(fdecl); // Synthesize a functioncall. functioncall* n = new functioncall; n->tok = e->tok; n->function = fname; n->referent = 0; // NB: must not resolve yet, to ensure inclusion in session provide (this, n); } mark_derived_probe::mark_derived_probe (systemtap_session &s, const string& p_n, const string& p_s, uintptr_t a, const string& m, probe* base): derived_probe (base, 0), sess (s), probe_name (p_n), probe_sig (p_s), address (a), module (m) { // create synthetic probe point probe_point* pp = new probe_point; probe_point::component* c; if (module == "") c = new probe_point::component ("kernel"); else c = new probe_point::component ("module", new literal_string (module)); pp->components.push_back (c); c = new probe_point::component ("mark", new literal_string (probe_name)); pp->components.push_back (c); this->locations.push_back (pp); // expand the signature string for (unsigned i=0; i 0) probe_sig_expanded += ", "; switch (probe_sig[i]) { case 'N': probe_sig_expanded += "int64_t"; break; case 'S': probe_sig_expanded += "const char *"; break; default: throw semantic_error ("unsupported probe signature " + probe_sig, this->tok); } probe_sig_expanded += " arg" + lex_cast(i+1); // arg1 ... } // Now make a local-variable-expanded copy of the probe body mark_var_expanding_copy_visitor v (sess, probe_sig, name); require (&v, &(this->body), base->body); if (sess.verbose > 1) clog << "marker-based " << name << " address=0x" << hex << address << dec << " signature=" << probe_sig << endl; } void mark_derived_probe::register_probe(systemtap_session& s) { s.probes.register_probe(this); } void mark_derived_probe::emit_probe_context_vars (translator_output* o) { // Save incoming arguments for (unsigned i=0; i(i+1); switch (probe_sig[i]) { case 'S': o->newline() << "string_t " << localname << ";"; break; case 'N': o->newline() << "int64_t " << localname << ";"; break; } } } void mark_derived_probe::emit_probe_entries (translator_output* o) { assert (this->locations.size() == 1); o->newline() << "static void enter_" << name << " (" << probe_sig_expanded << ")"; o->newline() << "{"; o->newline(1) << "const char* probe_point = " << lex_cast_qstring(* this->locations[0]) << ";"; emit_probe_prologue (o, "STAP_SESSION_RUNNING"); // Save incoming arguments for (unsigned k=0; k(k+1); string argname = "arg" + lex_cast(k+1); switch (probe_sig[k]) { case 'S': o->newline() << "strlcpy (" << localname << ", " << argname << ", MAXSTRINGLEN);"; break; // XXX: dupe with c_unparser::c_strcpy case 'N': o->newline() << localname << " = " << argname << ";"; break; } } // NB: locals are initialized by probe function itself o->newline() << name << " (c);"; emit_probe_epilogue (o); o->newline(-1) << "}"; } void mark_derived_probe::emit_registrations_start (translator_output* o, unsigned index) { assert (this->locations.size() == 1); o->newline() << "{"; o->newline(1) << "void (**fn) (" << probe_sig_expanded << ") = (void *)" << address << "UL;"; o->newline() << "#if __HAVE_ARCH_CMPXCHG"; o->newline() << "unsigned long *fnpp = (unsigned long *) (void *) fn;"; o->newline() << "unsigned long fnp = (unsigned long) (void *) & enter_" << name << ";"; o->newline() << "unsigned long oldval = cmpxchg (fnpp, 0, fnp);"; o->newline() << "if (oldval != 0) rc = 1;"; // XXX: could retry a few times o->newline() << "#else"; // XXX: need proper synchronization for concurrent registration attempts o->newline() << "if (*fn == 0) *fn = & enter_" << name << ";"; o->newline() << "#endif"; o->newline() << "mb ();"; o->newline() << "if (*fn != & enter_" << name << ") rc = 1;"; o->newline(-1) << "}"; // if one failed, must goto code (output by emit_registrations_end) // that will roll back completed registations for this probe o->newline() << "if (unlikely (rc)) {"; o->newline(1) << "probe_point = " << lex_cast_qstring (*this->locations[0]) << ";"; if (index == 0) o->newline() << "goto mark_error;"; else o->newline() << "goto unwind_mark_" << index - 1 << ";"; o->newline(-1) << "}"; } void mark_derived_probe::emit_registrations_end (translator_output* o, unsigned index) { // if one failed, must roll back completed registations for this probe o->newline(-1) << "unwind_mark_" << index << ":"; o->indent(1); emit_deregistrations (o); } void mark_derived_probe::emit_deregistrations (translator_output * o) { assert (this->locations.size() == 1); o->newline() << "{"; o->newline(1) << "void (**fn) (" << probe_sig_expanded << ") = (void *)" << address << "UL;"; o->newline() << "#if __HAVE_ARCH_CMPXCHG"; o->newline() << "unsigned long *fnpp = (unsigned long *) (void *) fn;"; o->newline() << "unsigned long fnp = (unsigned long) (void *) & enter_" << name << ";"; o->newline() << "unsigned long oldval = cmpxchg (fnpp, fnp, 0);"; o->newline() << "if (oldval != fnp) ;"; // XXX: should not happen o->newline() << "#else"; o->newline(0) << "*fn = 0;"; o->newline() << "#endif"; o->newline(-1) << "}"; } struct mark_derived_probe_group: public derived_probe_group { private: vector probes; public: virtual void register_probe(mark_derived_probe* p) { probes.push_back (p); } virtual size_t size () { return probes.size (); } virtual void emit_probes (translator_output* op, unparser* up); virtual void emit_module_init (translator_output* o); }; void mark_derived_probe_group::emit_probes (translator_output* op, unparser* up) { for (unsigned i=0; i < probes.size(); i++) { op->newline (); up->emit_probe (probes[i]); } } void mark_derived_probe_group::emit_module_init (translator_output* o) { if (probes.size () == 0) return; // Output the mark probes create function o->newline() << "static int register_mark_probes (void) {"; o->indent(1); o->newline() << "int rc = 0;"; o->newline() << "const char *probe_point;"; for (unsigned i=0; i < probes.size (); i++) probes[i]->emit_registrations_start (o, i); o->newline() << "goto out;"; o->newline(); for (int i=probes.size() - 2; i >= 0; i--) probes[i]->emit_registrations_end (o, i); o->newline(); o->newline(-1) << "mark_error:"; o->newline(1) << "if (unlikely (rc)) {"; // In case it's just a lower-layer (kprobes) error that set rc but // not session_state, do that here to prevent any other BEGIN probe // from attempting to run. o->newline(1) << "atomic_set (&session_state, STAP_SESSION_ERROR);"; o->newline() << "_stp_error (\"mark probe %s registration failed, rc=%d\\n\", probe_point, rc);"; o->newline(-1) << "}\n"; o->newline(-1) << "out:"; o->newline(1) << "return rc;"; o->newline(-1) << "}\n"; // Output the mark probes destroy function o->newline() << "static void unregister_mark_probes (void) {"; o->indent(1); for (unsigned i=0; i < probes.size (); i++) { probes[i]->emit_deregistrations (o); emit_probe_timing(probes[i], o); } o->newline(-1) << "}\n"; } struct symboltable_extract { uintptr_t address; string symbol; string module; }; #define PROBE_SYMBOL_PREFIX "__systemtap_mark_" struct mark_builder: public derived_probe_builder { private: static const vector* get_symbols (systemtap_session&); public: mark_builder() {} void build(systemtap_session & sess, probe * base, probe_point * location, std::map const & parameters, vector & finished_results); }; // Until elfutils makes this straightforward, we kludge. // See also translate.cxx:emit_symbol_data(). const vector* mark_builder::get_symbols (systemtap_session& sess) { static vector* syms = 0; if (syms) return syms; // already computed syms = new vector; // Process /proc/kallsyms - contains reliable module symbols ifstream kallsyms ("/proc/kallsyms"); while (! kallsyms.eof()) { string addr, type, sym, module; kallsyms >> addr >> type >> sym; kallsyms >> ws; if (kallsyms.peek() == '[') { string bracketed; kallsyms >> bracketed; module = bracketed.substr (1, bracketed.length()-2); } else // kernel symbols come from /boot/System.map* continue; if (type == "b" || type == "d") // static data/bss { symboltable_extract e; e.address = strtoul (addr.c_str(), 0, 16); e.symbol = sym; e.module = module; syms->push_back (e); } } kallsyms.close (); // grab them kernel symbols string smname = "/boot/System.map-"; smname += sess.kernel_release; ifstream systemmap (smname.c_str()); while (! systemmap.eof()) { string addr, type, sym, module; systemmap >> addr >> type >> sym; module = ""; if (type == "b" || type == "d") // static data/bss { symboltable_extract e; e.address = strtoul (addr.c_str(), 0, 16); e.symbol = sym; e.module = module; syms->push_back (e); } } systemmap.close (); return syms; } void mark_builder::build(systemtap_session & sess, probe * base, probe_point * location, std::map const & parameters, vector & finished_results) { const vector* syms = get_symbols (sess); string param_module; bool has_module = get_param (parameters, "module", param_module); bool has_kernel = (parameters.find("kernel") != parameters.end()); if (! (has_module ^ has_kernel)) throw semantic_error ("need kernel or module() component", location->tok); string param_probe; bool has_probe = get_param (parameters, "mark", param_probe); if (! has_probe) throw semantic_error ("need mark() component", location->tok); string symbol_regex = PROBE_SYMBOL_PREFIX "([a-zA-Z0-9_]+)_([NS]*)\\.[0-9]+"; // ^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^ ^^^^^ ^^^^^^ // common prefix probe name types suffix regex_t symbol_regex_t; int rc = regcomp (& symbol_regex_t, symbol_regex.c_str(), REG_EXTENDED); if (rc) throw semantic_error ("regcomp '" + symbol_regex + "' failed"); // cout << "searching for " << symbol_regex << endl; for (unsigned i=0; isize(); i++) { regmatch_t match[3]; const symboltable_extract& ext = syms->at(i); const char* symstr = ext.symbol.c_str(); rc = regexec (& symbol_regex_t, symstr, 3, match, 0); if (! rc) // match { #if 0 cout << "match in " << symstr << ":" << "[" << match[0].rm_so << "-" << match[0].rm_eo << "]," << "[" << match[1].rm_so << "-" << match[1].rm_eo << "]," << "[" << match[2].rm_so << "-" << match[2].rm_eo << "]" << endl; #endif string probe_name = string (symstr + match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); string probe_sig = string (symstr + match[2].rm_so, (match[2].rm_eo - match[2].rm_so)); // Below, "rc" has negative polarity: zero iff matching rc = (has_module ? fnmatch (param_module.c_str(), ext.module.c_str(), 0) : (ext.module != "")); // kernel.* rc |= fnmatch (param_probe.c_str(), probe_name.c_str(), 0); if (! rc) { // cout << "match (" << probe_name << "):" << probe_sig << endl; derived_probe *dp = new mark_derived_probe (sess, probe_name, probe_sig, ext.address, ext.module, base); finished_results.push_back (dp); } } } // cout << "done" << endl; // It's not a big deal if this is skipped due to an exception. regfree (& symbol_regex_t); } // ------------------------------------------------------------------------ // hrtimer derived probes // ------------------------------------------------------------------------ // This is a new timer interface that provides more flexibility in specifying // intervals, and uses the hrtimer APIs when available for greater precision. // While hrtimers were added in 2.6.16, the API's weren't exported until // 2.6.17, so we must check this kernel version before attempting to use // hrtimers. // // * hrtimer_derived_probe: creates a probe point based on the hrtimer APIs. struct hrtimer_derived_probe: public derived_probe { // set a (generous) maximum of one day in ns static const int64_t max_ns_interval = 1000000000LL * 60LL * 60LL * 24LL; // 100us seems like a reasonable minimum static const int64_t min_ns_interval = 100000LL; int64_t interval, randomize; hrtimer_derived_probe (probe* p, probe_point* l, int64_t i, int64_t r): derived_probe (p, l), interval (i), randomize (r) { if ((i < min_ns_interval) || (i > max_ns_interval)) throw semantic_error("interval value out of range"); // randomize = 0 means no randomization if ((r < 0) || (r > i)) throw semantic_error("randomization value out of range"); if (locations.size() != 1) throw semantic_error ("expect single probe point"); // so we don't have to loop over them in the other functions } void register_probe (systemtap_session& s); virtual void emit_interval (translator_output * o); virtual void emit_registrations_start (translator_output * o, unsigned index); virtual void emit_registrations_end (translator_output * o, unsigned index); virtual void emit_deregistrations (translator_output * o); virtual void emit_probe_entries (translator_output * o); }; void hrtimer_derived_probe::register_probe(systemtap_session& s) { s.probes.register_probe(this); } void hrtimer_derived_probe::emit_interval (translator_output* o) { o->line() << "({"; o->newline(1) << "unsigned long nsecs;"; o->newline() << "int64_t i = " << interval << "LL;"; if (randomize != 0) { o->newline() << "int64_t r;"; o->newline() << "get_random_bytes(&r, sizeof(r));"; // ensure that r is positive o->newline() << "r &= ((uint64_t)1 << (8*sizeof(r) - 1)) - 1;"; o->newline() << "r = _stp_mod64(NULL, r, " << (2*randomize + 1) << "LL);"; o->newline() << "r -= " << randomize << "LL;"; o->newline() << "i += r;"; } o->newline() << "if (unlikely(i < _stp_hrtimer_res))"; o->newline(1) << "i = _stp_hrtimer_res;"; o->indent(-1); o->newline() << "nsecs = do_div(i, NSEC_PER_SEC);"; o->newline() << "ktime_set(i, nsecs);"; o->newline(-1) << "})"; } void hrtimer_derived_probe::emit_registrations_start (translator_output* o, unsigned index) { o->newline() << "hrtimer_init (& timer_" << name << ", CLOCK_MONOTONIC, HRTIMER_REL);"; o->newline() << "timer_" << name << ".function = enter_" << name << ";"; o->newline() << "hrtimer_start (& timer_" << name << ", "; emit_interval(o); o->line() << ", HRTIMER_REL);"; } void hrtimer_derived_probe::emit_registrations_end (translator_output* o, unsigned index) { // nothing to do here... } void hrtimer_derived_probe::emit_deregistrations (translator_output* o) { o->newline() << "hrtimer_cancel (& timer_" << name << ");"; } void hrtimer_derived_probe::emit_probe_entries (translator_output* o) { o->newline() << "static int enter_" << name << " (struct hrtimer *);"; o->newline() << "static struct hrtimer timer_" << name << ";"; o->newline() << "int enter_" << name << " (struct hrtimer *timer) {"; o->newline(1) << "int restart = HRTIMER_NORESTART;"; o->newline() << "const char* probe_point = " << lex_cast_qstring(*locations[0]) << ";"; emit_probe_prologue (o, "STAP_SESSION_RUNNING"); o->newline() << "(void) timer;"; // hrtimer_forward would be preferable, but it's not exported. We already // guarantee that the interval is >= the timer resolution though, so this is // essentially the same. o->newline() << "timer_" << name << ".expires = ktime_add(timer_" << name << ".expires, "; emit_interval(o); o->line() << ");"; o->newline() << "restart = HRTIMER_RESTART;"; // NB: locals are initialized by probe function itself o->newline() << name << " (c);"; emit_probe_epilogue (o); o->newline() << "return restart;"; o->newline(-1) << "}\n"; } struct hrtimer_derived_probe_group: public derived_probe_group { private: vector probes; public: virtual void register_probe(hrtimer_derived_probe* p) { probes.push_back (p); } virtual size_t size () { return probes.size (); } virtual void emit_probes (translator_output* op, unparser* up); virtual void emit_module_init (translator_output* o); }; void hrtimer_derived_probe_group::emit_probes (translator_output* op, unparser* up) { if (probes.size () == 0) return; op->newline(); op->newline() << "static int64_t _stp_hrtimer_res;"; for (unsigned i=0; i < probes.size(); i++) { op->newline (); up->emit_probe (probes[i]); } } void hrtimer_derived_probe_group::emit_module_init (translator_output* o) { if (probes.size () == 0) return; // Output the hrtimer probes create function o->newline() << "static int register_hrtimer_probes (void) {"; o->newline(1) << "struct timespec res;"; o->newline() << "hrtimer_get_res(CLOCK_MONOTONIC, &res);"; o->newline() << "_stp_hrtimer_res = timespec_to_ns(&res);"; o->newline(); for (unsigned i=0; i < probes.size (); i++) { probes[i]->emit_registrations_start (o, i); o->newline (); } o->newline() << "return 0;"; o->newline(-1) << "}\n"; // Output the hrtimer probes destroy function o->newline() << "static void unregister_hrtimer_probes (void) {"; o->indent(1); for (unsigned i=0; i < probes.size (); i++) { probes[i]->emit_deregistrations (o); emit_probe_timing(probes[i], o); } o->newline(-1) << "}\n"; } struct timer_builder: public derived_probe_builder { virtual void build(systemtap_session & sess, probe * base, probe_point * location, std::map const & parameters, vector & finished_results); static void register_patterns(match_node *root); }; void timer_builder::build(systemtap_session & sess, probe * base, probe_point * location, std::map const & parameters, vector & finished_results) { int64_t period, rand=0; if (!get_param(parameters, "randomize", rand)) rand = 0; if (get_param(parameters, "jiffies", period)) { // always use basic timers for jiffies finished_results.push_back( new timer_derived_probe(base, location, period, rand, false)); return; } else if (get_param(parameters, "hz", period)) { if (period <= 0) throw semantic_error ("frequency must be greater than 0"); period = (1000000000 + period - 1)/period; } else if (get_param(parameters, "s", period) || get_param(parameters, "sec", period)) { period *= 1000000000; rand *= 1000000000; } else if (get_param(parameters, "ms", period) || get_param(parameters, "msec", period)) { period *= 1000000; rand *= 1000000; } else if (get_param(parameters, "us", period) || get_param(parameters, "usec", period)) { period *= 1000; rand *= 1000; } else if (get_param(parameters, "ns", period) || get_param(parameters, "nsec", period)) { // ok } else throw semantic_error ("unrecognized timer variant"); if (strverscmp(sess.kernel_base_release.c_str(), "2.6.17") < 0) { // hrtimers didn't exist, so use the old-school timers period = (period + 1000000 - 1)/1000000; rand = (rand + 1000000 - 1)/1000000; finished_results.push_back( new timer_derived_probe(base, location, period, rand, true)); } else finished_results.push_back( new hrtimer_derived_probe(base, location, period, rand)); } void timer_builder::register_patterns(match_node *root) { derived_probe_builder *builder = new timer_builder(); root = root->bind("timer"); root->bind_num("s")->bind(builder); root->bind_num("s")->bind_num("randomize")->bind(builder); root->bind_num("sec")->bind(builder); root->bind_num("sec")->bind_num("randomize")->bind(builder); root->bind_num("ms")->bind(builder); root->bind_num("ms")->bind_num("randomize")->bind(builder); root->bind_num("msec")->bind(builder); root->bind_num("msec")->bind_num("randomize")->bind(builder); root->bind_num("us")->bind(builder); root->bind_num("us")->bind_num("randomize")->bind(builder); root->bind_num("usec")->bind(builder); root->bind_num("usec")->bind_num("randomize")->bind(builder); root->bind_num("ns")->bind(builder); root->bind_num("ns")->bind_num("randomize")->bind(builder); root->bind_num("nsec")->bind(builder); root->bind_num("nsec")->bind_num("randomize")->bind(builder); root->bind_num("jiffies")->bind(builder); root->bind_num("jiffies")->bind_num("randomize")->bind(builder); root->bind_num("hz")->bind(builder); } // ------------------------------------------------------------------------ // perfmon derived probes // ------------------------------------------------------------------------ // This is a new interface to the perfmon hw. // struct perfmon_var_expanding_copy_visitor: public var_expanding_copy_visitor { systemtap_session & sess; unsigned counter_number; perfmon_var_expanding_copy_visitor(systemtap_session & s, unsigned c): sess(s), counter_number(c) {} void visit_target_symbol (target_symbol* e); }; void perfmon_var_expanding_copy_visitor::visit_target_symbol (target_symbol *e) { assert(e->base_name.size() > 0 && e->base_name[0] == '$'); // Synthesize a function. functiondecl *fdecl = new functiondecl; fdecl->tok = e->tok; embeddedcode *ec = new embeddedcode; ec->tok = e->tok; bool lvalue = is_active_lvalue(e); if (lvalue ) throw semantic_error("writes to $counter not permitted"); string fname = string("_perfmon_tvar_get") + "_" + e->base_name.substr(1) + "_" + lex_cast(counter_number); if (e->base_name != "$counter") throw semantic_error ("target variables not available to perfmon probes"); ec->code = "THIS->__retvalue = _pfm_pmd_x[" + lex_cast(counter_number) + "].reg_num;"; ec->code += "/* pure */"; fdecl->name = fname; fdecl->body = ec; fdecl->type = pe_long; sess.functions.push_back(fdecl); // Synthesize a functioncall. functioncall* n = new functioncall; n->tok = e->tok; n->function = fname; n->referent = 0; // NB: must not resolve yet, to ensure inclusion in session provide (this, n); } enum perfmon_mode { perfmon_count, perfmon_sample }; struct perfmon_derived_probe: public derived_probe { protected: static unsigned probes_allocated; public: systemtap_session & sess; string event; perfmon_mode mode; perfmon_derived_probe (probe* p, probe_point* l, systemtap_session &s, string e, perfmon_mode m); virtual void register_probe (systemtap_session& s); virtual void emit_registrations_start (translator_output* o, unsigned index); virtual void emit_registrations_end (translator_output * o, unsigned index); virtual void emit_deregistrations (translator_output * o); virtual void emit_probe_entries (translator_output * o); }; struct perfmon_derived_probe_group: public derived_probe_group { private: vector probes; public: virtual void register_probe(perfmon_derived_probe* p) { probes.push_back (p); } virtual size_t size () { return probes.size (); } virtual void emit_probes (translator_output* op, unparser* up); virtual void emit_module_init (translator_output* o); }; struct perfmon_builder: public derived_probe_builder { perfmon_builder() {} virtual void build(systemtap_session & sess, probe * base, probe_point * location, std::map const & parameters, vector & finished_results) { string event; if (!get_param (parameters, "counter", event)) throw semantic_error("perfmon requires an event"); sess.perfmon++; // XXX: need to revise when doing sampling finished_results.push_back(new perfmon_derived_probe(base, location, sess, event, perfmon_count)); } }; unsigned perfmon_derived_probe::probes_allocated; perfmon_derived_probe::perfmon_derived_probe (probe* p, probe_point* l, systemtap_session &s, string e, perfmon_mode m) : derived_probe (p, l), sess(s), event(e), mode(m) { ++probes_allocated; // Now make a local-variable-expanded copy of the probe body perfmon_var_expanding_copy_visitor v (sess, probes_allocated-1); require (&v, &(this->body), base->body); if (sess.verbose > 1) clog << "perfmon-based probe" << endl; } void perfmon_derived_probe::register_probe (systemtap_session& s) { s.probes.register_probe(this); } void perfmon_derived_probe::emit_registrations_start (translator_output* o, unsigned index) { for (unsigned i=0; inewline() << "enter_" << name << "_" << i << " ();"; } void perfmon_derived_probe::emit_registrations_end (translator_output * o, unsigned index) { } void perfmon_derived_probe::emit_deregistrations (translator_output * o) { } void perfmon_derived_probe::emit_probe_entries (translator_output * o) { o->newline() << "#ifdef STP_TIMING"; o->newline() << "static __cacheline_aligned Stat " << "time_" << basest()->name << ";"; o->newline() << "#endif"; for (unsigned i=0; inewline() << "/* location " << i << ": " << *l << " */"; o->newline() << "static void enter_" << name << "_" << i << " (void) {"; o->indent(1); o->newline() << "const char* probe_point = " << lex_cast_qstring(*l) << ";"; emit_probe_prologue (o, (mode == perfmon_count ? "STAP_SESSION_STARTING" : "STAP_SESSION_RUNNING")); // NB: locals are initialized by probe function itself o->newline() << name << " (c);"; emit_probe_epilogue (o); o->newline(-1) << "}\n"; } } #ifdef PERFMON void no_pfm_event_error (string s) { string msg(string("Cannot find event:" + s)); throw semantic_error(msg); } void no_pfm_mask_error (string s) { string msg(string("Cannot find mask:" + s)); throw semantic_error(msg); } void split(const string& s, vector& v, const string & separator) { string::size_type last_pos = s.find_first_not_of(separator, 0); string::size_type pos = s.find_first_of(separator, last_pos); while (string::npos != pos || string::npos != last_pos) { v.push_back(s.substr(last_pos, pos - last_pos)); last_pos = s.find_first_not_of(separator, pos); pos = s.find_first_of(separator, last_pos); } } void perfmon_derived_probe_group::emit_probes (translator_output* op, unparser* up) { for (unsigned i=0; i < probes.size(); i++) { op->newline (); up->emit_probe (probes[i]); } } void perfmon_derived_probe_group::emit_module_init (translator_output* o) { int ret; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_pmd_t pd[PFMLIB_MAX_PMDS]; pfarg_pmc_t pc[PFMLIB_MAX_PMCS]; pfarg_ctx_t ctx; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; unsigned int max_counters; if ( probes.size() == 0) return; ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) throw semantic_error("Unable to generate performance monitoring events (no libpfm)"); pfm_get_num_counters(&max_counters); memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); memset(&ctx, 0, sizeof(ctx)); memset(&load_args, 0, sizeof(load_args)); /* * prepare parameters to library. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); /* figure out the events */ for (unsigned i=0; ievent == "cycles") { if (pfm_get_cycle_event( &inp.pfp_events[i].event) != PFMLIB_SUCCESS) no_pfm_event_error(probes[i]->event); } else if (probes[i]->event == "instructions") { if (pfm_get_inst_retired_event( &inp.pfp_events[i].event) != PFMLIB_SUCCESS) no_pfm_event_error(probes[i]->event); } else { unsigned int event_id = 0; unsigned int mask_id = 0; vector event_spec; split(probes[i]->event, event_spec, ":"); int num = event_spec.size(); int masks = num - 1; if (num == 0) throw semantic_error("No events found"); /* setup event */ if (pfm_find_event(event_spec[0].c_str(), &event_id) != PFMLIB_SUCCESS) no_pfm_event_error(event_spec[0]); inp.pfp_events[i].event = event_id; /* set up masks */ if (masks > PFMLIB_MAX_MASKS_PER_EVENT) throw semantic_error("Too many unit masks specified"); for (int j=0; j < masks; j++) { if (pfm_find_event_mask(event_id, event_spec[j+1].c_str(), &mask_id) != PFMLIB_SUCCESS) no_pfm_mask_error(string(event_spec[j+1])); inp.pfp_events[i].unit_masks[j] = mask_id; } inp.pfp_events[i].num_masks = masks; } } /* number of counters in use */ inp.pfp_event_count = probes.size(); // XXX: no elimination of duplicated counters if (inp.pfp_event_count>max_counters) throw semantic_error("Too many performance monitoring events."); /* count events both in kernel and user-space */ inp.pfp_dfl_plm = PFM_PLM0 | PFM_PLM3; /* XXX: some cases a perfmon register might be used of watch dog this code doesn't handle that case */ /* figure out the pmcs for the events */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) throw semantic_error("Cannot configure events"); for (unsigned i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * There could be more pmc settings than pmd. * Figure out the actual pmds to use. */ for (unsigned i=0, j=0; i < inp.pfp_event_count; i++) { pd[i].reg_num = outp.pfp_pmcs[j].reg_pmd_num; for(; j < outp.pfp_pmc_count; j++) if (outp.pfp_pmcs[j].reg_evt_idx != i) break; } // Output the be probes create function o->newline() << "static int register_perfmon_probes (void) {"; o->newline(1) << "int rc = 0;"; o->newline() << "/* data for perfmon */"; o->newline() << "static int _pfm_num_pmc = " << outp.pfp_pmc_count << ";"; o->newline() << "static struct pfarg_pmc _pfm_pmc[" << outp.pfp_pmc_count << "] = {"; /* output the needed bits for pmc here */ for (unsigned i=0; i < outp.pfp_pmc_count; i++) { o->newline() << "{.reg_num=" << pc[i].reg_num << ", " << ".reg_value=" << lex_cast_hex(pc[i].reg_value) << "},"; } o->newline() << "};"; o->newline() << "static int _pfm_num_pmd = " << inp.pfp_event_count << ";"; o->newline() << "static struct pfarg_pmd _pfm_pmd[" << inp.pfp_event_count << "] = {"; /* output the needed bits for pmd here */ for (unsigned i=0; i < inp.pfp_event_count; i++) { o->newline() << "{.reg_num=" << pd[i].reg_num << ", " << ".reg_value=" << pd[i].reg_value << "},"; } o->newline() << "};"; o->newline(); o->newline() << "_pfm_pmc_x=_pfm_pmc;"; o->newline() << "_pfm_num_pmc_x=_pfm_num_pmc;"; o->newline() << "_pfm_pmd_x=_pfm_pmd;"; o->newline() << "_pfm_num_pmd_x=_pfm_num_pmd;"; // call all the function bodies associated with perfcounters for (unsigned i=0; i < probes.size (); i++) probes[i]->emit_registrations_start (o,i); /* generate call to turn on instrumentation */ o->newline() << "_pfm_context.ctx_flags |= PFM_FL_SYSTEM_WIDE;"; o->newline() << "rc = rc || _stp_perfmon_setup(&_pfm_desc, &_pfm_context,"; o->newline(1) << "_pfm_pmc, _pfm_num_pmc,"; o->newline() << "_pfm_pmd, _pfm_num_pmd);"; o->newline(-1); o->newline() << "return rc;"; o->newline(-1) << "}\n"; // Output the be probes destroy function o->newline() << "static void unregister_perfmon_probes (void) {"; o->newline(1) << "_stp_perfmon_shutdown(_pfm_desc);"; o->newline(-1) << "}\n"; } #else void perfmon_derived_probe_group::emit_probes (translator_output* op, unparser* up) { } void perfmon_derived_probe_group::emit_module_init (translator_output* o) { } #endif /* PERFMON */ // ------------------------------------------------------------------------ // Standard tapset registry. // ------------------------------------------------------------------------ void register_standard_tapsets(systemtap_session & s) { s.pattern_root->bind("begin")->bind(new be_builder(true)); s.pattern_root->bind("end")->bind(new be_builder(false)); s.pattern_root->bind("never")->bind(new never_builder()); timer_builder::register_patterns(s.pattern_root); s.pattern_root->bind("timer")->bind("profile")->bind(new profile_builder()); s.pattern_root->bind("perfmon")->bind_str("counter")->bind(new perfmon_builder()); // dwarf-based kernel/module parts dwarf_derived_probe::register_patterns(s.pattern_root); // marker-based kernel/module parts s.pattern_root->bind("kernel")->bind_str("mark")->bind(new mark_builder()); s.pattern_root->bind_str("module")->bind_str("mark")->bind(new mark_builder()); } derived_probe_group_container::derived_probe_group_container (): be_probe_group(new be_derived_probe_group), dwarf_probe_group(new dwarf_derived_probe_group), hrtimer_probe_group(new hrtimer_derived_probe_group), mark_probe_group(new mark_derived_probe_group), never_probe_group(new never_derived_probe_group), profile_probe_group(new profile_derived_probe_group), timer_probe_group(new timer_derived_probe_group), perfmon_probe_group(new perfmon_derived_probe_group) { } derived_probe_group_container::~derived_probe_group_container () { delete be_probe_group; delete dwarf_probe_group; delete hrtimer_probe_group; delete mark_probe_group; delete never_probe_group; delete profile_probe_group; delete timer_probe_group; delete perfmon_probe_group; } void derived_probe_group_container::register_probe(be_derived_probe* p) { probes.push_back (p); be_probe_group->register_probe(p); } void derived_probe_group_container::register_probe(dwarf_derived_probe* p) { probes.push_back (p); dwarf_probe_group->register_probe(p); } void derived_probe_group_container::register_probe(hrtimer_derived_probe* p) { probes.push_back (p); hrtimer_probe_group->register_probe(p); } void derived_probe_group_container::register_probe(mark_derived_probe* p) { probes.push_back (p); mark_probe_group->register_probe(p); } void derived_probe_group_container::register_probe(never_derived_probe* p) { probes.push_back (p); never_probe_group->register_probe(p); } void derived_probe_group_container::register_probe(profile_derived_probe* p) { probes.push_back (p); profile_probe_group->register_probe(p); } void derived_probe_group_container::register_probe(timer_derived_probe* p) { probes.push_back (p); timer_probe_group->register_probe(p); } void derived_probe_group_container::register_probe(perfmon_derived_probe* p) { probes.push_back (p); perfmon_probe_group->register_probe(p); } void derived_probe_group_container::emit_probes (translator_output* op, unparser* up) { // Sanity check. size_t groups_size = be_probe_group->size () + dwarf_probe_group->size () + hrtimer_probe_group->size () + mark_probe_group->size () + never_probe_group->size () + profile_probe_group->size () + timer_probe_group->size () + perfmon_probe_group->size (); if (probes.size () != groups_size) { cerr << "There are " << probes.size () << " total probes, and " << groups_size << " grouped probes\n"; throw runtime_error("internal probe mismatch"); } // Let each probe group emit its probes. be_probe_group->emit_probes (op, up); dwarf_probe_group->emit_probes (op, up); hrtimer_probe_group->emit_probes (op, up); mark_probe_group->emit_probes (op, up); never_probe_group->emit_probes (op, up); profile_probe_group->emit_probes (op, up); timer_probe_group->emit_probes (op, up); perfmon_probe_group->emit_probes (op, up); } void derived_probe_group_container::emit_module_init (translator_output* o) { // Let each probe group emit its module init logic. be_probe_group->emit_module_init (o); dwarf_probe_group->emit_module_init (o); hrtimer_probe_group->emit_module_init (o); mark_probe_group->emit_module_init (o); never_probe_group->emit_module_init (o); profile_probe_group->emit_module_init (o); timer_probe_group->emit_module_init (o); perfmon_probe_group->emit_module_init(o); } #define PERFMON_ERROR_LABEL "unregister_perfmon" #define BE_ERROR_LABEL "unregister_be" #define DWARF_ERROR_LABEL "unregister_dwarf" #define HRTIMER_ERROR_LABEL "unregister_hrtimer" #define MARK_ERROR_LABEL "unregister_mark" #define PROFILE_ERROR_LABEL "unregister_profile" #define TIMER_ERROR_LABEL "unregister_timer" void derived_probe_group_container::emit_module_init_call (translator_output* o) { int i = 0; const char *error_label = ""; if (perfmon_probe_group->size () > 0) { o->newline() << "rc = register_perfmon_probes ();"; o->newline() << "if (rc)"; o->indent(1); // We need to deregister any already probes set up - this is // essential for kprobes. o->newline() << "goto out;"; o->indent(-1); i++; error_label = PERFMON_ERROR_LABEL; } if (be_probe_group->size () > 0) { o->newline() << "rc = register_be_probes ();"; o->newline() << "if (rc)"; o->indent(1); // We need to deregister any already probes set up - this is // essential for kprobes. o->newline() << "goto out;"; o->indent(-1); i++; error_label = BE_ERROR_LABEL; } if (dwarf_probe_group->size () > 0) { o->newline() << "rc = register_dwarf_probes ();"; o->newline() << "if (rc)"; o->indent(1); // We need to deregister any already probes set up - this is // essential for kprobes. if (i > 0) o->newline() << "goto " << error_label << ";"; else o->newline() << "goto out;"; o->indent(-1); i++; error_label = DWARF_ERROR_LABEL; } if (hrtimer_probe_group->size () > 0) { o->newline() << "rc = register_hrtimer_probes ();"; o->newline() << "if (rc)"; o->indent(1); // We need to deregister any already probes set up - this is // essential for kprobes. if (i > 0) o->newline() << "goto " << error_label << ";"; else o->newline() << "goto out;"; o->indent(-1); i++; error_label = HRTIMER_ERROR_LABEL; } if (mark_probe_group->size () > 0) { o->newline() << "rc = register_mark_probes ();"; o->newline() << "if (rc)"; o->indent(1); // We need to deregister any already probes set up - this is // essential for kprobes. if (i > 0) o->newline() << "goto " << error_label << ";"; else o->newline() << "goto out;"; o->indent(-1); i++; error_label = MARK_ERROR_LABEL; } // We don't need to bother with the never_probe_group. if (profile_probe_group->size () > 0) { o->newline() << "rc = register_profile_probes ();"; o->newline() << "if (rc)"; o->indent(1); // We need to deregister any already probes set up - this is // essential for kprobes. if (i > 0) o->newline() << "goto " << error_label << ";"; else o->newline() << "goto out;"; o->indent(-1); i++; error_label = PROFILE_ERROR_LABEL; } if (timer_probe_group->size () > 0) { o->newline() << "rc = register_timer_probes ();"; o->newline() << "if (rc)"; o->indent(1); // We need to deregister any already probes set up - this is // essential for kprobes. if (i > 0) o->newline() << "goto " << error_label << ";"; else o->newline() << "goto out;"; o->indent(-1); i++; error_label = TIMER_ERROR_LABEL; } // BEGIN probes would have all been run by now. One of them may // have triggered a STAP_SESSION_ERROR (which would incidentally // block later BEGIN ones). If so, let that indication stay, and // otherwise act like probe insertion was a success. o->newline() << "if (atomic_read (&session_state) == STAP_SESSION_STARTING)"; o->newline(1) << "atomic_set (&session_state, STAP_SESSION_RUNNING);"; o->newline(-1) << "goto out;"; // Recovery code for partially successful registration (rc != 0) // XXX: Do we need to delay here to ensure any triggered probes have // terminated? Probably not much, as they should all test for // SESSION_STARTING state right at the top and return. ("begin" // probes don't count, as they return synchronously.) o->newline(); if (i > 0 && timer_probe_group->size () > 0) { o->newline(-1) << TIMER_ERROR_LABEL << ":"; o->newline(1) << "unregister_timer_probes();"; i--; } if (i > 0 && profile_probe_group->size () > 0) { o->newline(-1) << PROFILE_ERROR_LABEL << ":"; o->newline(1) << "unregister_profile_probes();"; i--; } // We don't need to bother with the never_probe_group. if (i > 0 && mark_probe_group->size () > 0) { o->newline(-1) << MARK_ERROR_LABEL << ":"; o->newline(1) << "unregister_mark_probes();"; i--; } if (i > 0 && hrtimer_probe_group->size () > 0) { o->newline(-1) << HRTIMER_ERROR_LABEL << ":"; o->newline(1) << "unregister_hrtimer_probes();"; i--; } if (i > 0 && dwarf_probe_group->size () > 0) { o->newline(-1) << DWARF_ERROR_LABEL << ":"; o->newline(1) << "unregister_dwarf_probes();"; i--; } if (i > 0 && be_probe_group->size () > 0) { o->newline(-1) << BE_ERROR_LABEL << ":"; o->newline(1) << "unregister_be_probes();"; i--; } if (i > 0 && perfmon_probe_group->size () > 0) { o->newline(-1) << PERFMON_ERROR_LABEL << ":"; o->newline(1) << "unregister_perfmon_probes();"; i--; } } void derived_probe_group_container::emit_module_exit (translator_output* o) { if (be_probe_group->size () > 0) o->newline() << "unregister_be_probes ();"; if (dwarf_probe_group->size () > 0) o->newline() << "unregister_dwarf_probes ();"; if (hrtimer_probe_group->size () > 0) o->newline() << "unregister_hrtimer_probes ();"; if (mark_probe_group->size () > 0) o->newline() << "unregister_mark_probes ();"; // We don't need to bother with the never_probe_group. if (profile_probe_group->size () > 0) o->newline() << "unregister_profile_probes ();"; if (timer_probe_group->size () > 0) o->newline() << "unregister_timer_probes ();"; if (perfmon_probe_group->size () > 0) o->newline() << "unregister_perfmon_probes ();"; }