// tapset resolution // Copyright (C) 2005-2009 Red Hat Inc. // Copyright (C) 2005-2007 Intel Corporation. // Copyright (C) 2008 James.Bottomley@HansenPartnership.com // // This file is part of systemtap, and is free software. You can // redistribute it and/or modify it under the terms of the GNU General // Public License (GPL); either version 2, or (at your option) any // later version. #include "config.h" #include "staptree.h" #include "elaborate.h" #include "tapsets.h" #include "translate.h" #include "session.h" #include "util.h" #include "buildrun.h" #include "dwarf_wrappers.h" #include "auto_free.h" #include "hash.h" #include #include #include #include #include #ifdef HAVE_TR1_UNORDERED_MAP #include #else #include #endif #include #include #include #include #include #include #include #include extern "C" { #include #include #include #include #include #include #include #include #include #include #include #include "loc2c.h" #define __STDC_FORMAT_MACROS #include } #ifdef PERFMON #include #include #endif using namespace std; using namespace __gnu_cxx; // ------------------------------------------------------------------------ // Generic derived_probe_group: contains an ordinary vector of the // given type. It provides only the enrollment function. template struct generic_dpg: public derived_probe_group { protected: vector probes; public: generic_dpg () {} void enroll (DP* probe) { probes.push_back (probe); } }; // ------------------------------------------------------------------------ // begin/end/error probes are run right during registration / deregistration // ------------------------------------------------------------------------ static string TOK_BEGIN("begin"); static string TOK_END("end"); static string TOK_ERROR("error"); enum be_t { BEGIN, END, ERROR }; struct be_derived_probe: public derived_probe { be_t type; int64_t priority; be_derived_probe (probe* p, probe_point* l, be_t t, int64_t pr): derived_probe (p, l), type (t), priority (pr) {} void join_group (systemtap_session& s); static inline bool comp(be_derived_probe const *a, be_derived_probe const *b) { // This allows the BEGIN/END/ERROR probes to intermingle. // But that's OK - they're always treversed with a nested // "if (type==FOO)" conditional. return a->priority < b->priority; } bool needs_global_locks () { return false; } // begin/end probes don't need locks around global variables, since // they aren't run concurrently with any other probes }; struct be_derived_probe_group: public generic_dpg { public: void emit_module_decls (systemtap_session& s); void emit_module_init (systemtap_session& s); void emit_module_exit (systemtap_session& s); }; struct be_builder: public derived_probe_builder { be_t type; be_builder(be_t t) : type(t) {} virtual void build(systemtap_session &, probe * base, probe_point * location, literal_map_t const & parameters, vector & finished_results) { int64_t priority; if ((type == BEGIN && !get_param(parameters, TOK_BEGIN, priority)) || (type == END && !get_param(parameters, TOK_END, priority)) || (type == ERROR && !get_param(parameters, TOK_ERROR, priority))) priority = 0; finished_results.push_back( new be_derived_probe(base, location, type, priority)); } }; void be_derived_probe::join_group (systemtap_session& s) { if (! s.be_derived_probes) s.be_derived_probes = new be_derived_probe_group (); s.be_derived_probes->enroll (this); } // ------------------------------------------------------------------------ void common_probe_entryfn_prologue (translator_output* o, string statestr, string new_pp, bool overload_processing = true) { o->newline() << "struct context* __restrict__ c;"; o->newline() << "#if !INTERRUPTIBLE"; o->newline() << "unsigned long flags;"; o->newline() << "#endif"; if (overload_processing) o->newline() << "#if defined(STP_TIMING) || defined(STP_OVERLOAD)"; else o->newline() << "#ifdef STP_TIMING"; o->newline() << "cycles_t cycles_atstart = get_cycles ();"; o->newline() << "#endif"; #if 0 /* XXX: PERFMON */ o->newline() << "static struct pfarg_ctx _pfm_context;"; o->newline() << "static void *_pfm_desc;"; o->newline() << "static struct pfarg_pmc *_pfm_pmc_x;"; o->newline() << "static int _pfm_num_pmc_x;"; o->newline() << "static struct pfarg_pmd *_pfm_pmd_x;"; o->newline() << "static int _pfm_num_pmd_x;"; #endif o->newline() << "#if INTERRUPTIBLE"; o->newline() << "preempt_disable ();"; o->newline() << "#else"; o->newline() << "local_irq_save (flags);"; o->newline() << "#endif"; // Check for enough free enough stack space o->newline() << "if (unlikely ((((unsigned long) (& c)) & (THREAD_SIZE-1))"; // free space o->newline(1) << "< (MINSTACKSPACE + sizeof (struct thread_info)))) {"; // needed space // XXX: may need porting to platforms where task_struct is not at bottom of kernel stack // NB: see also CONFIG_DEBUG_STACKOVERFLOW o->newline() << "atomic_inc (& skipped_count);"; o->newline() << "#ifdef STP_TIMING"; o->newline() << "atomic_inc (& skipped_count_lowstack);"; o->newline() << "#endif"; o->newline() << "goto probe_epilogue;"; o->newline(-1) << "}"; o->newline() << "if (atomic_read (&session_state) != " << statestr << ")"; o->newline(1) << "goto probe_epilogue;"; o->indent(-1); o->newline() << "c = per_cpu_ptr (contexts, smp_processor_id());"; o->newline() << "if (atomic_inc_return (& c->busy) != 1) {"; o->newline(1) << "#if !INTERRUPTIBLE"; o->newline() << "atomic_inc (& skipped_count);"; o->newline() << "#endif"; o->newline() << "#ifdef STP_TIMING"; o->newline() << "atomic_inc (& skipped_count_reentrant);"; o->newline() << "#ifdef DEBUG_REENTRANCY"; o->newline() << "_stp_warn (\"Skipped %s due to %s residency on cpu %u\\n\", " << new_pp << ", c->probe_point ?: \"?\", smp_processor_id());"; // NB: There is a conceivable race condition here with reading // c->probe_point, knowing that this other probe is sort of running. // However, in reality, it's interrupted. Plus even if it were able // to somehow start again, and stop before we read c->probe_point, // at least we have that ?: "?" bit in there to avoid a NULL deref. o->newline() << "#endif"; o->newline() << "#endif"; o->newline() << "atomic_dec (& c->busy);"; o->newline() << "goto probe_epilogue;"; o->newline(-1) << "}"; o->newline(); o->newline() << "c->last_stmt = 0;"; o->newline() << "c->last_error = 0;"; o->newline() << "c->nesting = 0;"; o->newline() << "c->regs = 0;"; o->newline() << "c->unwaddr = 0;"; o->newline() << "c->probe_point = " << new_pp << ";"; // reset unwound address cache o->newline() << "c->pi = 0;"; o->newline() << "c->regparm = 0;"; o->newline() << "c->marker_name = NULL;"; o->newline() << "c->marker_format = NULL;"; o->newline() << "#if INTERRUPTIBLE"; o->newline() << "c->actionremaining = MAXACTION_INTERRUPTIBLE;"; o->newline() << "#else"; o->newline() << "c->actionremaining = MAXACTION;"; o->newline() << "#endif"; o->newline() << "#ifdef STP_TIMING"; o->newline() << "c->statp = 0;"; o->newline() << "#endif"; // NB: The following would actually be incorrect. // That's because cycles_sum/cycles_base values are supposed to survive // between consecutive probes. Periodically (STP_OVERLOAD_INTERVAL // cycles), the values will be reset. /* o->newline() << "#ifdef STP_OVERLOAD"; o->newline() << "c->cycles_sum = 0;"; o->newline() << "c->cycles_base = 0;"; o->newline() << "#endif"; */ } void common_probe_entryfn_epilogue (translator_output* o, bool overload_processing = true) { if (overload_processing) o->newline() << "#if defined(STP_TIMING) || defined(STP_OVERLOAD)"; else o->newline() << "#ifdef STP_TIMING"; o->newline() << "{"; o->newline(1) << "cycles_t cycles_atend = get_cycles ();"; // NB: we truncate cycles counts to 32 bits. Perhaps it should be // fewer, if the hardware counter rolls over really quickly. We // handle 32-bit wraparound here. o->newline() << "int32_t cycles_elapsed = ((int32_t)cycles_atend > (int32_t)cycles_atstart)"; o->newline(1) << "? ((int32_t)cycles_atend - (int32_t)cycles_atstart)"; o->newline() << ": (~(int32_t)0) - (int32_t)cycles_atstart + (int32_t)cycles_atend + 1;"; o->indent(-1); o->newline() << "#ifdef STP_TIMING"; o->newline() << "if (likely (c->statp)) _stp_stat_add(*c->statp, cycles_elapsed);"; o->newline() << "#endif"; if (overload_processing) { o->newline() << "#ifdef STP_OVERLOAD"; o->newline() << "{"; // If the cycle count has wrapped (cycles_atend > cycles_base), // let's go ahead and pretend the interval has been reached. // This should reset cycles_base and cycles_sum. o->newline(1) << "cycles_t interval = (cycles_atend > c->cycles_base)"; o->newline(1) << "? (cycles_atend - c->cycles_base)"; o->newline() << ": (STP_OVERLOAD_INTERVAL + 1);"; o->newline(-1) << "c->cycles_sum += cycles_elapsed;"; // If we've spent more than STP_OVERLOAD_THRESHOLD cycles in a // probe during the last STP_OVERLOAD_INTERVAL cycles, the probe // has overloaded the system and we need to quit. o->newline() << "if (interval > STP_OVERLOAD_INTERVAL) {"; o->newline(1) << "if (c->cycles_sum > STP_OVERLOAD_THRESHOLD) {"; o->newline(1) << "_stp_error (\"probe overhead exceeded threshold\");"; o->newline() << "atomic_set (&session_state, STAP_SESSION_ERROR);"; o->newline() << "atomic_inc (&error_count);"; o->newline(-1) << "}"; o->newline() << "c->cycles_base = cycles_atend;"; o->newline() << "c->cycles_sum = 0;"; o->newline(-1) << "}"; o->newline(-1) << "}"; o->newline() << "#endif"; } o->newline(-1) << "}"; o->newline() << "#endif"; o->newline() << "c->probe_point = 0;"; // vacated o->newline() << "if (unlikely (c->last_error && c->last_error[0])) {"; o->newline(1) << "if (c->last_stmt != NULL)"; o->newline(1) << "_stp_softerror (\"%s near %s\", c->last_error, c->last_stmt);"; o->newline(-1) << "else"; o->newline(1) << "_stp_softerror (\"%s\", c->last_error);"; o->indent(-1); o->newline() << "atomic_inc (& error_count);"; o->newline() << "if (atomic_read (& error_count) > MAXERRORS) {"; o->newline(1) << "atomic_set (& session_state, STAP_SESSION_ERROR);"; o->newline() << "_stp_exit ();"; o->newline(-1) << "}"; o->newline(-1) << "}"; o->newline() << "atomic_dec (&c->busy);"; o->newline(-1) << "probe_epilogue:"; // context is free o->indent(1); // Check for excessive skip counts. o->newline() << "if (unlikely (atomic_read (& skipped_count) > MAXSKIPPED)) {"; o->newline(1) << "atomic_set (& session_state, STAP_SESSION_ERROR);"; o->newline() << "_stp_exit ();"; o->newline(-1) << "}"; o->newline() << "#if INTERRUPTIBLE"; o->newline() << "preempt_enable_no_resched ();"; o->newline() << "#else"; o->newline() << "local_irq_restore (flags);"; o->newline() << "#endif"; } // ------------------------------------------------------------------------ void be_derived_probe_group::emit_module_decls (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "/* ---- begin/end probes ---- */"; s.op->newline() << "static void enter_begin_probe (void (*fn)(struct context*), const char* pp) {"; s.op->indent(1); common_probe_entryfn_prologue (s.op, "STAP_SESSION_STARTING", "pp", false); s.op->newline() << "(*fn) (c);"; common_probe_entryfn_epilogue (s.op, false); s.op->newline(-1) << "}"; s.op->newline() << "static void enter_end_probe (void (*fn)(struct context*), const char* pp) {"; s.op->indent(1); common_probe_entryfn_prologue (s.op, "STAP_SESSION_STOPPING", "pp", false); s.op->newline() << "(*fn) (c);"; common_probe_entryfn_epilogue (s.op, false); s.op->newline(-1) << "}"; s.op->newline() << "static void enter_error_probe (void (*fn)(struct context*), const char* pp) {"; s.op->indent(1); common_probe_entryfn_prologue (s.op, "STAP_SESSION_ERROR", "pp", false); s.op->newline() << "(*fn) (c);"; common_probe_entryfn_epilogue (s.op, false); s.op->newline(-1) << "}"; s.op->newline() << "static struct stap_be_probe {"; s.op->newline(1) << "void (*ph)(struct context*);"; s.op->newline() << "const char* pp;"; s.op->newline() << "int type;"; s.op->newline(-1) << "} stap_be_probes[] = {"; s.op->indent(1); // NB: We emit the table in sorted order here, so we don't have to // store the priority numbers as integers and sort at run time. sort(probes.begin(), probes.end(), be_derived_probe::comp); for (unsigned i=0; i < probes.size(); i++) { s.op->newline () << "{"; s.op->line() << " .pp=" << lex_cast_qstring (*probes[i]->sole_location()) << ","; s.op->line() << " .ph=&" << probes[i]->name << ","; s.op->line() << " .type=" << probes[i]->type; s.op->line() << " },"; } s.op->newline(-1) << "};"; } void be_derived_probe_group::emit_module_init (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "for (i=0; i<" << probes.size() << "; i++) {"; s.op->newline(1) << "struct stap_be_probe* stp = & stap_be_probes [i];"; s.op->newline() << "if (stp->type != " << BEGIN << ") continue;"; s.op->newline() << "enter_begin_probe (stp->ph, stp->pp);"; s.op->newline() << "/* rc = 0; */"; // NB: begin probes that cause errors do not constitute registration // failures. An error message will probably get printed and if // MAXERRORS was left at 1, we'll get an stp_exit. The // error-handling probes will be run during the ordinary // unregistration phase. s.op->newline(-1) << "}"; } void be_derived_probe_group::emit_module_exit (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "for (i=0; i<" << probes.size() << "; i++) {"; s.op->newline(1) << "struct stap_be_probe* stp = & stap_be_probes [i];"; s.op->newline() << "if (stp->type != " << END << ") continue;"; s.op->newline() << "enter_end_probe (stp->ph, stp->pp);"; s.op->newline(-1) << "}"; s.op->newline() << "for (i=0; i<" << probes.size() << "; i++) {"; s.op->newline(1) << "struct stap_be_probe* stp = & stap_be_probes [i];"; s.op->newline() << "if (stp->type != " << ERROR << ") continue;"; s.op->newline() << "enter_error_probe (stp->ph, stp->pp);"; s.op->newline(-1) << "}"; } // ------------------------------------------------------------------------ // never probes are never run // ------------------------------------------------------------------------ static string TOK_NEVER("never"); struct never_derived_probe: public derived_probe { never_derived_probe (probe* p): derived_probe (p) {} never_derived_probe (probe* p, probe_point* l): derived_probe (p, l) {} void join_group (systemtap_session&) { /* thus no probe_group */ } }; struct never_builder: public derived_probe_builder { never_builder() {} virtual void build(systemtap_session &, probe * base, probe_point * location, literal_map_t const &, vector & finished_results) { finished_results.push_back(new never_derived_probe(base, location)); } }; // ------------------------------------------------------------------------ // Dwarf derived probes. "We apologize for the inconvience." // ------------------------------------------------------------------------ static string TOK_KERNEL("kernel"); static string TOK_MODULE("module"); static string TOK_FUNCTION("function"); static string TOK_INLINE("inline"); static string TOK_CALL("call"); static string TOK_RETURN("return"); static string TOK_MAXACTIVE("maxactive"); static string TOK_STATEMENT("statement"); static string TOK_ABSOLUTE("absolute"); static string TOK_PROCESS("process"); static string TOK_MARK("mark"); static string TOK_TRACE("trace"); static string TOK_LABEL("label"); // Can we handle this query with just symbol-table info? enum dbinfo_reqt { dbr_unknown, dbr_none, // kernel.statement(NUM).absolute dbr_need_symtab, // can get by with symbol table if there's no dwarf dbr_need_dwarf }; enum info_status { info_unknown, info_present, info_absent }; struct func_info { func_info() : decl_file(NULL), decl_line(-1), addr(0), prologue_end(0), weak(false) { memset(&die, 0, sizeof(die)); } string name; char const * decl_file; int decl_line; Dwarf_Die die; Dwarf_Addr addr; Dwarf_Addr entrypc; Dwarf_Addr prologue_end; bool weak; // Comparison functor for list of functions sorted by address. The // two versions that take a Dwarf_Addr let us use the STL algorithms // upper_bound, equal_range et al., but we don't know whether the // searched-for value will be passed as the first or the second // argument. struct Compare { bool operator() (const func_info* f1, const func_info* f2) const { return f1->addr < f2->addr; } // For doing lookups by address. bool operator() (Dwarf_Addr addr, const func_info* f2) const { return addr < f2->addr; } bool operator() (const func_info* f1, Dwarf_Addr addr) const { return f1->addr < addr; } }; }; struct inline_instance_info { inline_instance_info() : decl_file(NULL), decl_line(-1) { memset(&die, 0, sizeof(die)); } string name; char const * decl_file; int decl_line; Dwarf_Addr entrypc; Dwarf_Die die; }; struct base_query; // forward decls struct dwarf_query; struct dwflpp; struct symbol_table; struct module_info { Dwfl_Module* mod; const char* name; string elf_path; Dwarf_Addr addr; Dwarf_Addr bias; symbol_table *sym_table; info_status dwarf_status; // module has dwarf info? info_status symtab_status; // symbol table cached? void get_symtab(dwarf_query *q); module_info(const char *name) : mod(NULL), name(name), addr(0), bias(0), sym_table(NULL), dwarf_status(info_unknown), symtab_status(info_unknown) {} ~module_info(); }; struct module_cache { map cache; bool paths_collected; bool dwarf_collected; module_cache() : paths_collected(false), dwarf_collected(false) {} }; typedef struct module_cache module_cache_t; #ifdef HAVE_TR1_UNORDERED_MAP typedef tr1::unordered_map cu_function_cache_t; typedef tr1::unordered_map mod_cu_function_cache_t; // module:cu -> function -> die #else struct stringhash { // __gnu_cxx:: is needed because our own hash.h has an ambiguous hash<> decl too. size_t operator() (const string& s) const { __gnu_cxx::hash h; return h(s.c_str()); } }; typedef hash_map cu_function_cache_t; typedef hash_map mod_cu_function_cache_t; // module:cu -> function -> die #endif struct symbol_table { module_info *mod_info; // associated module map map_by_name; vector list_by_addr; typedef vector::iterator iterator_t; typedef pair range_t; #ifdef __powerpc__ GElf_Word opd_section; #endif // add_symbol doesn't leave symbol table in order; call // symbol_table::sort() when done adding symbols. void add_symbol(const char *name, bool weak, Dwarf_Addr addr, Dwarf_Addr *high_addr); void sort(); enum info_status read_symbols(FILE *f, const string& path); enum info_status read_from_elf_file(const string& path); enum info_status read_from_text_file(const string& path); enum info_status get_from_elf(); void prepare_section_rejection(Dwfl_Module *mod); bool reject_section(GElf_Word section); void mark_dwarf_redundancies(dwflpp *dw); void purge_syscall_stubs(); func_info *lookup_symbol(const string& name); Dwarf_Addr lookup_symbol_address(const string& name); func_info *get_func_containing_address(Dwarf_Addr addr); symbol_table(module_info *mi) : mod_info(mi) {} ~symbol_table(); }; static bool null_die(Dwarf_Die *die) { static Dwarf_Die null = { 0 }; return (!die || !memcmp(die, &null, sizeof(null))); } static int query_cu (Dwarf_Die * cudie, void * arg); // Helper for dealing with selected portions of libdwfl in a more readable // fashion, and with specific cleanup / checking / logging options. static const char * dwarf_diename_integrate (Dwarf_Die *die) { Dwarf_Attribute attr_mem; return dwarf_formstring (dwarf_attr_integrate (die, DW_AT_name, &attr_mem)); } enum line_t { ABSOLUTE, RELATIVE, RANGE, WILDCARD }; typedef vector inline_instance_map_t; typedef vector func_info_map_t; struct dwflpp { systemtap_session & sess; Dwfl * dwfl; // These are "current" values we focus on. Dwfl_Module * module; Dwarf * module_dwarf; Dwarf_Addr module_bias; module_info * mod_info; // These describe the current module's PC address range Dwarf_Addr module_start; Dwarf_Addr module_end; Dwarf_Die * cu; Dwarf_Die * function; string module_name; string cu_name; string function_name; string const default_name(char const * in, char const *) { if (in) return in; return string(""); } void get_module_dwarf(bool required = false, bool report = true) { module_dwarf = dwfl_module_getdwarf(module, &module_bias); mod_info->dwarf_status = (module_dwarf ? info_present : info_absent); if (!module_dwarf && report) { string msg = "cannot find "; if (module_name == "") msg += "kernel"; else msg += string("module ") + module_name; msg += " debuginfo"; int i = dwfl_errno(); if (i) msg += string(": ") + dwfl_errmsg (i); if (required) throw semantic_error (msg); else cerr << "WARNING: " << msg << "\n"; } } void focus_on_module(Dwfl_Module * m, module_info * mi) { module = m; mod_info = mi; if (m) { module_name = default_name(dwfl_module_info(module, NULL, &module_start, &module_end, NULL, NULL, NULL, NULL), "module"); } else { assert(mi && mi->name && mi->name == TOK_KERNEL); module_name = mi->name; module_start = 0; module_end = 0; module_bias = mi->bias; } // Reset existing pointers and names module_dwarf = NULL; cu_name.clear(); cu = NULL; function_name.clear(); function = NULL; } void focus_on_cu(Dwarf_Die * c) { assert(c); assert(module); cu = c; cu_name = default_name(dwarf_diename(c), "CU"); // Reset existing pointers and names function_name.clear(); function = NULL; } void focus_on_function(Dwarf_Die * f) { assert(f); assert(module); assert(cu); function = f; function_name = default_name(dwarf_diename(function), "function"); } void focus_on_module_containing_global_address(Dwarf_Addr a) { assert(dwfl); cu = NULL; Dwfl_Module* mod = dwfl_addrmodule(dwfl, a); if (mod) // address could be wildly out of range focus_on_module(mod, NULL); } void query_cu_containing_global_address(Dwarf_Addr a, void *arg) { Dwarf_Addr bias; assert(dwfl); get_module_dwarf(); Dwarf_Die* cudie = dwfl_module_addrdie(module, a, &bias); if (cudie) // address could be wildly out of range query_cu (cudie, arg); assert(bias == module_bias); } void query_cu_containing_module_address(Dwarf_Addr a, void *arg) { query_cu_containing_global_address(module_address_to_global(a), arg); } Dwarf_Addr module_address_to_global(Dwarf_Addr a) { assert(dwfl); assert(module); get_module_dwarf(); if (module_name == TOK_KERNEL || dwfl_module_relocations (module) <= 0) return a; return a + module_start; } Dwarf_Addr global_address_to_module(Dwarf_Addr a) { assert(module); get_module_dwarf(); return a - module_bias; } bool module_name_matches(string pattern) { bool t = (fnmatch(pattern.c_str(), module_name.c_str(), 0) == 0); if (t && sess.verbose>3) clog << "pattern '" << pattern << "' " << "matches " << "module '" << module_name << "'" << "\n"; return t; } bool name_has_wildcard(string pattern) { return (pattern.find('*') != string::npos || pattern.find('?') != string::npos || pattern.find('[') != string::npos); } bool module_name_final_match(string pattern) { // Assume module_name_matches(). Can there be any more matches? // Not unless the pattern is a wildcard, since module names are // presumed unique. return !name_has_wildcard(pattern); } bool function_name_matches_pattern(string name, string pattern) { bool t = (fnmatch(pattern.c_str(), name.c_str(), 0) == 0); if (t && sess.verbose>3) clog << "pattern '" << pattern << "' " << "matches " << "function '" << name << "'" << "\n"; return t; } bool function_name_matches(string pattern) { assert(function); return function_name_matches_pattern(function_name, pattern); } bool function_name_final_match(string pattern) { return module_name_final_match (pattern); } bool cu_name_matches(string pattern) { assert(cu); // PR 5049: implicit * in front of given path pattern. // NB: fnmatch() is used without FNM_PATHNAME. string prefixed_pattern = string("*/") + pattern; bool t = (fnmatch(pattern.c_str(), cu_name.c_str(), 0) == 0 || fnmatch(prefixed_pattern.c_str(), cu_name.c_str(), 0) == 0); if (t && sess.verbose>3) clog << "pattern '" << prefixed_pattern << "' " << "matches " << "CU '" << cu_name << "'" << "\n"; return t; } dwflpp(systemtap_session & session) : sess(session), dwfl(NULL), module(NULL), module_dwarf(NULL), module_bias(0), mod_info(NULL), module_start(0), module_end(0), cu(NULL), function(NULL) { } // XXX: See also translate.cxx:emit_symbol_data void setup_kernel(bool debuginfo_needed = true) { if (! sess.module_cache) sess.module_cache = new module_cache (); static const char *debuginfo_path_arr = "+:.debug:/usr/lib/debug:build"; static const char *debuginfo_env_arr = getenv("SYSTEMTAP_DEBUGINFO_PATH"); static const char *debuginfo_path = (debuginfo_env_arr ?: debuginfo_path_arr ); static const Dwfl_Callbacks kernel_callbacks = { dwfl_linux_kernel_find_elf, dwfl_standard_find_debuginfo, dwfl_offline_section_address, (char **) & debuginfo_path }; dwfl = dwfl_begin (&kernel_callbacks); if (!dwfl) throw semantic_error ("cannot open dwfl"); dwfl_report_begin (dwfl); // We have a problem with -r REVISION vs -r BUILDDIR here. If // we're running against a fedora/rhel style kernel-debuginfo // tree, s.kernel_build_tree is not the place where the unstripped // vmlinux will be installed. Rather, it's over yonder at // /usr/lib/debug/lib/modules/$REVISION/. It seems that there is // no way to set the dwfl_callback.debuginfo_path and always // passs the plain kernel_release here. So instead we have to // hard-code this magic here. string elfutils_kernel_path; if (sess.kernel_build_tree == string("/lib/modules/" + sess.kernel_release + "/build")) elfutils_kernel_path = sess.kernel_release; else elfutils_kernel_path = sess.kernel_build_tree; int rc = dwfl_linux_kernel_report_offline (dwfl, elfutils_kernel_path.c_str(), NULL); if (debuginfo_needed) dwfl_assert (string("missing ") + sess.architecture + string(" kernel/module debuginfo under '") + sess.kernel_build_tree + string("'"), rc); // XXX: it would be nice if we could do a single // ..._report_offline call for an entire systemtap script, so // that a selection predicate would filter out modules outside // the union of all the requested wildcards. But we build // derived_probes one-by-one and we don't have lookahead. // PR 3498. // XXX: a special case: if we have only kernel.* probe points, // we shouldn't waste time looking for module debug-info (and // vice versa). // NB: the result of an _offline call is the assignment of // virtualized addresses to relocatable objects such as // modules. These have to be converted to real addresses at // run time. See the dwarf_derived_probe ctor and its caller. dwfl_assert ("dwfl_report_end", dwfl_report_end(dwfl, NULL, NULL)); } void setup_user(string module_name, bool debuginfo_needed = true) { if (! sess.module_cache) sess.module_cache = new module_cache (); static const char *debuginfo_path_arr = "+:.debug:/usr/lib/debug:build"; static const char *debuginfo_env_arr = getenv("SYSTEMTAP_DEBUGINFO_PATH"); // NB: kernel_build_tree doesn't enter into this, as it's for // kernel-side modules only. static const char *debuginfo_path = (debuginfo_env_arr ?: debuginfo_path_arr); static const Dwfl_Callbacks user_callbacks = { NULL, /* dwfl_linux_kernel_find_elf, */ dwfl_standard_find_debuginfo, dwfl_offline_section_address, (char **) & debuginfo_path }; dwfl = dwfl_begin (&user_callbacks); if (!dwfl) throw semantic_error ("cannot open dwfl"); dwfl_report_begin (dwfl); // XXX: should support buildid-based naming Dwfl_Module *mod = dwfl_report_offline (dwfl, module_name.c_str(), module_name.c_str(), -1); // XXX: save mod! if (debuginfo_needed) dwfl_assert (string("missing process ") + module_name + string(" ") + sess.architecture + string(" debuginfo"), mod); if (!module) module = mod; // NB: the result of an _offline call is the assignment of // virtualized addresses to relocatable objects such as // modules. These have to be converted to real addresses at // run time. See the dwarf_derived_probe ctor and its caller. dwfl_assert ("dwfl_report_end", dwfl_report_end(dwfl, NULL, NULL)); } // ----------------------------------------------------------------- void iterate_over_modules(int (* callback)(Dwfl_Module *, void **, const char *, Dwarf_Addr, void *), base_query *data) { ptrdiff_t off = 0; do { if (pending_interrupts) return; off = dwfl_getmodules (dwfl, callback, data, off); } while (off > 0); // Don't complain if we exited dwfl_getmodules early. // This could be a $target variable error that will be // reported soon anyway. // dwfl_assert("dwfl_getmodules", off == 0); // PR6864 XXX: For dwarfless case (if .../vmlinux is missing), then the // "kernel" module is not reported in the loop above. However, we // may be able to make do with symbol table data. } // Defined after dwarf_query void query_modules(base_query *q); // ----------------------------------------------------------------- typedef map*> module_cu_cache_t; module_cu_cache_t module_cu_cache; void iterate_over_cus (int (*callback)(Dwarf_Die * die, void * arg), void * data) { get_module_dwarf(false); Dwarf *dw = module_dwarf; if (!dw) return; vector* v = module_cu_cache[dw]; if (v == 0) { v = new vector; module_cu_cache[dw] = v; Dwarf_Off off = 0; size_t cuhl; Dwarf_Off noff; while (dwarf_nextcu (dw, off, &noff, &cuhl, NULL, NULL, NULL) == 0) { if (pending_interrupts) return; Dwarf_Die die_mem; Dwarf_Die *die; die = dwarf_offdie (dw, off + cuhl, &die_mem); v->push_back (*die); /* copy */ off = noff; } } for (unsigned i = 0; i < v->size(); i++) { if (pending_interrupts) return; Dwarf_Die die = v->at(i); int rc = (*callback)(& die, data); if (rc != DWARF_CB_OK) break; } } // ----------------------------------------------------------------- bool func_is_inline() { assert (function); return dwarf_func_inline (function) != 0; } typedef map*> cu_inl_function_cache_t; cu_inl_function_cache_t cu_inl_function_cache; static int cu_inl_function_caching_callback (Dwarf_Die* func, void *arg) { vector* v = static_cast*>(arg); v->push_back (* func); return DWARF_CB_OK; } void iterate_over_inline_instances (int (* callback)(Dwarf_Die * die, void * arg), void * data) { assert (function); assert (func_is_inline ()); string key = module_name + ":" + cu_name + ":" + function_name; vector* v = cu_inl_function_cache[key]; if (v == 0) { v = new vector; cu_inl_function_cache[key] = v; dwarf_func_inline_instances (function, cu_inl_function_caching_callback, v); } for (unsigned i=0; isize(); i++) { if (pending_interrupts) return; Dwarf_Die die = v->at(i); int rc = (*callback)(& die, data); if (rc != DWARF_CB_OK) break; } } // ----------------------------------------------------------------- /* The global alias cache is used to resolve any DIE found in a * module that is stubbed out with DW_AT_declaration with a defining * DIE found in a different module. The current assumption is that * this only applies to structures and unions, which have a global * namespace (it deliberately only traverses program scope), so this * cache is indexed by name. If other declaration lookups were * added to it, it would have to be indexed by name and tag */ mod_cu_function_cache_t global_alias_cache; static int global_alias_caching_callback(Dwarf_Die *die, void *arg) { cu_function_cache_t *cache = static_cast(arg); const char *name = dwarf_diename(die); if (!name) return DWARF_CB_OK; string structure_name = name; if (!dwarf_hasattr(die, DW_AT_declaration) && cache->find(structure_name) == cache->end()) (*cache)[structure_name] = *die; return DWARF_CB_OK; } Dwarf_Die *declaration_resolve(const char *name) { if (!name) return NULL; string key = module_name + ":" + cu_name; cu_function_cache_t *v = global_alias_cache[key]; if (v == 0) // need to build the cache, just once per encountered module/cu { v = new cu_function_cache_t; global_alias_cache[key] = v; iterate_over_globals(global_alias_caching_callback, v); if (sess.verbose > 4) clog << "global alias cache " << key << " size " << v->size() << endl; } // XXX: it may be desirable to search other modules' declarations // too, in case a module/shared-library processes a // forward-declared pointer type only, where the actual definition // may only be in vmlinux or the application. // XXX: it is probably desirable to search other CU's declarations // in the same module. if (v->find(name) == v->end()) return NULL; return & ((*v)[name]); } mod_cu_function_cache_t cu_function_cache; static int cu_function_caching_callback (Dwarf_Die* func, void *arg) { cu_function_cache_t* v = static_cast(arg); const char *name = dwarf_diename(func); if (!name) return DWARF_CB_OK; string function_name = name; (*v)[function_name] = * func; return DWARF_CB_OK; } int iterate_over_functions (int (* callback)(Dwarf_Die * func, base_query * q), base_query * q, const string& function, bool has_statement_num=false); int iterate_over_globals (int (* callback)(Dwarf_Die *, void *), void * data); bool has_single_line_record (dwarf_query * q, char const * srcfile, int lineno); void iterate_over_srcfile_lines (char const * srcfile, int lines[2], bool need_single_match, enum line_t line_type, void (* callback) (const dwarf_line_t& line, void * arg), void *data) { Dwarf_Line **srcsp = NULL; size_t nsrcs = 0; dwarf_query * q = static_cast(data); int lineno = lines[0]; auto_free_ref free_srcsp(srcsp); get_module_dwarf(); if (line_type == RELATIVE) { Dwarf_Addr addr; Dwarf_Line *line; int line_number; dwarf_assert ("dwarf_entrypc", dwarf_entrypc (this->function, &addr)); line = dwarf_getsrc_die (this->cu, addr); dwarf_assert ("dwarf_getsrc_die", line == NULL); dwarf_assert ("dwarf_lineno", dwarf_lineno (line, &line_number)); lineno += line_number; } else if (line_type == WILDCARD) function_line (&lineno); for (int l = lineno; ; l = l + 1) { set lines_probed; pair::iterator,bool> line_probed; dwarf_assert ("dwarf_getsrc_file", dwarf_getsrc_file (module_dwarf, srcfile, l, 0, &srcsp, &nsrcs)); if (line_type == WILDCARD || line_type == RANGE) { Dwarf_Addr line_addr; dwarf_lineno (srcsp [0], &lineno); line_probed = lines_probed.insert(lineno); if (lineno != l || line_probed.second == false || nsrcs > 1) continue; dwarf_lineaddr (srcsp [0], &line_addr); if (dwarf_haspc (function, line_addr) != 1) break; } // NB: Formerly, we used to filter, because: // dwarf_getsrc_file gets one *near hits* for line numbers, not // exact matches. For example, an existing file but a nonexistent // line number will be rounded up to the next definition in that // file. This may be similar to the GDB breakpoint algorithm, but // we don't want to be so fuzzy in systemtap land. So we filter. // But we now see the error of our ways, and skip this filtering. // XXX: the code also fails to match e.g. inline function // definitions when the srcfile is a header file rather than the // CU name. size_t remaining_nsrcs = nsrcs; if (need_single_match && remaining_nsrcs > 1) { // We wanted a single line record (a unique address for the // line) and we got a bunch of line records. We're going to // skip this probe (throw an exception) but before we throw // we're going to look around a bit to see if there's a low or // high line number nearby which *doesn't* have this problem, // so we can give the user some advice. int lo_try = -1; int hi_try = -1; for (size_t i = 1; i < 6; ++i) { if (lo_try == -1 && has_single_line_record(q, srcfile, lineno - i)) lo_try = lineno - i; if (hi_try == -1 && has_single_line_record(q, srcfile, lineno + i)) hi_try = lineno + i; } stringstream advice; advice << "multiple addresses for " << srcfile << ":" << lineno; if (lo_try > 0 || hi_try > 0) { advice << " (try "; if (lo_try > 0) advice << srcfile << ":" << lo_try; if (lo_try > 0 && hi_try > 0) advice << " or "; if (hi_try > 0) advice << srcfile << ":" << hi_try; advice << ")"; } throw semantic_error (advice.str()); } for (size_t i = 0; i < nsrcs; ++i) { if (pending_interrupts) return; if (srcsp [i]) // skip over mismatched lines callback (dwarf_line_t(srcsp[i]), data); } if (line_type == ABSOLUTE || line_type == RELATIVE) break; else if (line_type == RANGE && l == lines[1]) break; } } void iterate_over_cu_labels (string label_val, string function, Dwarf_Die *cu, vector & results, probe_point *base_loc, void *data, void (* callback)(const string &, const char *, int, Dwarf_Die *, Dwarf_Addr, dwarf_query *)) { dwarf_query * q __attribute__ ((unused)) = static_cast(data) ; get_module_dwarf(); const char * sym = label_val.c_str(); Dwarf_Die die; int res = dwarf_child (cu, &die); if (res != 0) return; // die without children, bail out. static string function_name; do { Dwarf_Attribute attr_mem; Dwarf_Attribute *attr = dwarf_attr (&die, DW_AT_name, &attr_mem); int tag = dwarf_tag(&die); const char *name = dwarf_formstring (attr); if (name == 0) continue; switch (tag) { case DW_TAG_label: break; case DW_TAG_subprogram: function_name = name; default: if (dwarf_haschildren (&die)) iterate_over_cu_labels (label_val, function, &die, results, base_loc, q, callback); continue; } if (strcmp(function_name.c_str(), function.c_str()) == 0 || (name_has_wildcard(function) && function_name_matches_pattern (function_name, function))) { } else continue; if (strcmp(name, sym) == 0 || (name_has_wildcard(sym) && function_name_matches_pattern (name, sym))) { const char *file = dwarf_decl_file (&die); // Get the line number for this label Dwarf_Attribute attr; dwarf_attr (&die,DW_AT_decl_line, &attr); Dwarf_Sword dline; dwarf_formsdata (&attr, &dline); Dwarf_Addr stmt_addr; if (dwarf_lowpc (&die, &stmt_addr) != 0) { // There is no lowpc so figure out the address // Get the real die for this cu Dwarf_Die cudie; dwarf_diecu (cu, &cudie, NULL, NULL); size_t nlines = 0; // Get the line for this label Dwarf_Line **aline; dwarf_getsrc_file (module_dwarf, file, (int)dline, 0, &aline, &nlines); // Get the address for (size_t i = 0; i < nlines; i++) { dwarf_lineaddr (*aline, &stmt_addr); if ((dwarf_haspc (&die, stmt_addr))) break; } } Dwarf_Die *scopes; int nscopes = 0; nscopes = dwarf_getscopes_die (&die, &scopes); if (nscopes > 1) { callback(function_name.c_str(), file, (int)dline, &scopes[1], stmt_addr, q); if (sess.listing_mode) results.back()->locations[0]->components.push_back (new probe_point::component(TOK_LABEL, new literal_string (name))); } } } while (dwarf_siblingof (&die, &die) == 0); } void collect_srcfiles_matching (string const & pattern, set & filtered_srcfiles) { assert (module); assert (cu); size_t nfiles; Dwarf_Files *srcfiles; // PR 5049: implicit * in front of given path pattern. // NB: fnmatch() is used without FNM_PATHNAME. string prefixed_pattern = string("*/") + pattern; dwarf_assert ("dwarf_getsrcfiles", dwarf_getsrcfiles (cu, &srcfiles, &nfiles)); { for (size_t i = 0; i < nfiles; ++i) { char const * fname = dwarf_filesrc (srcfiles, i, NULL, NULL); if (fnmatch (pattern.c_str(), fname, 0) == 0 || fnmatch (prefixed_pattern.c_str(), fname, 0) == 0) { filtered_srcfiles.insert (fname); if (sess.verbose>2) clog << "selected source file '" << fname << "'\n"; } } } } void resolve_prologue_endings (func_info_map_t & funcs) { // This heuristic attempts to pick the first address that has a // source line distinct from the function declaration's. In a // perfect world, this would be the first statement *past* the // prologue. assert(module); assert(cu); size_t nlines = 0; Dwarf_Lines *lines = NULL; /* trouble cases: malloc do_symlink in init/initramfs.c tail-recursive/tiny then no-prologue sys_get?id in kernel/timer.c no-prologue sys_exit_group tail-recursive {do_,}sys_open extra-long-prologue (gcc 3.4) cpu_to_logical_apicid NULL-decl_file */ // Fetch all srcline records, sorted by address. dwarf_assert ("dwarf_getsrclines", dwarf_getsrclines(cu, &lines, &nlines)); // XXX: free lines[] later, but how? for(func_info_map_t::iterator it = funcs.begin(); it != funcs.end(); it++) { #if 0 /* someday */ Dwarf_Addr* bkpts = 0; int n = dwarf_entry_breakpoints (& it->die, & bkpts); // ... free (bkpts); #endif Dwarf_Addr entrypc = it->entrypc; Dwarf_Addr highpc; // NB: highpc is exclusive: [entrypc,highpc) dwfl_assert ("dwarf_highpc", dwarf_highpc (& it->die, & highpc)); if (it->decl_file == 0) it->decl_file = ""; unsigned entrypc_srcline_idx = 0; dwarf_line_t entrypc_srcline; // open-code binary search for exact match { unsigned l = 0, h = nlines; while (l < h) { entrypc_srcline_idx = (l + h) / 2; const dwarf_line_t lr(dwarf_onesrcline(lines, entrypc_srcline_idx)); Dwarf_Addr addr = lr.addr(); if (addr == entrypc) { entrypc_srcline = lr; break; } else if (l + 1 == h) { break; } // ran off bottom of tree else if (addr < entrypc) { l = entrypc_srcline_idx; } else { h = entrypc_srcline_idx; } } } if (!entrypc_srcline) { if (sess.verbose > 2) clog << "missing entrypc dwarf line record for function '" << it->name << "'\n"; // This is probably an inlined function. We'll end up using // its lowpc as a probe address. continue; } if (sess.verbose>2) clog << "prologue searching function '" << it->name << "'" << " 0x" << hex << entrypc << "-0x" << highpc << dec << "@" << it->decl_file << ":" << it->decl_line << "\n"; // Now we go searching for the first line record that has a // file/line different from the one in the declaration. // Normally, this will be the next one. BUT: // // We may have to skip a few because some old compilers plop // in dummy line records for longer prologues. If we go too // far (addr >= highpc), we take the previous one. Or, it may // be the first one, if the function had no prologue, and thus // the entrypc maps to a statement in the body rather than the // declaration. unsigned postprologue_srcline_idx = entrypc_srcline_idx; bool ranoff_end = false; while (postprologue_srcline_idx < nlines) { dwarf_line_t lr(dwarf_onesrcline(lines, postprologue_srcline_idx)); Dwarf_Addr postprologue_addr = lr.addr(); const char* postprologue_file = lr.linesrc(); int postprologue_lineno = lr.lineno(); if (sess.verbose>2) clog << "checking line record 0x" << hex << postprologue_addr << dec << "@" << postprologue_file << ":" << postprologue_lineno << "\n"; if (postprologue_addr >= highpc) { ranoff_end = true; postprologue_srcline_idx --; continue; } if (ranoff_end || (strcmp (postprologue_file, it->decl_file) || // We have a winner! (postprologue_lineno != it->decl_line))) { it->prologue_end = postprologue_addr; if (sess.verbose>2) { clog << "prologue found function '" << it->name << "'"; // Add a little classification datum if (postprologue_srcline_idx == entrypc_srcline_idx) clog << " (naked)"; if (ranoff_end) clog << " (tail-call?)"; clog << " = 0x" << hex << postprologue_addr << dec << "\n"; } break; } // Let's try the next srcline. postprologue_srcline_idx ++; } // loop over srclines // if (strlen(it->decl_file) == 0) it->decl_file = NULL; } // loop over functions // XXX: how to free lines? } bool function_entrypc (Dwarf_Addr * addr) { assert (function); return (dwarf_entrypc (function, addr) == 0); // XXX: see also _lowpc ? } bool die_entrypc (Dwarf_Die * die, Dwarf_Addr * addr) { int rc = 0; string lookup_method; * addr = 0; lookup_method = "dwarf_entrypc"; rc = dwarf_entrypc (die, addr); if (rc) { lookup_method = "dwarf_lowpc"; rc = dwarf_lowpc (die, addr); } if (rc) { lookup_method = "dwarf_ranges"; Dwarf_Addr base; Dwarf_Addr begin; Dwarf_Addr end; ptrdiff_t offset = dwarf_ranges (die, 0, &base, &begin, &end); if (offset < 0) rc = -1; else if (offset > 0) { * addr = begin; rc = 0; // Now we need to check that there are no more ranges // associated with this function, which could conceivably // happen if a function is inlined, then pieces of it are // split amongst different conditional branches. It's not // obvious which of them to favour. As a heuristic, we // pick the beginning of the first range, and ignore the // others (but with a warning). unsigned extra = 0; while ((offset = dwarf_ranges (die, offset, &base, &begin, &end)) > 0) extra ++; if (extra) lookup_method += ", ignored " + lex_cast(extra) + " more"; } } if (sess.verbose > 2) clog << "entry-pc lookup (" << lookup_method << ") = 0x" << hex << *addr << dec << " (rc " << rc << ")" << endl; return (rc == 0); } void function_die (Dwarf_Die *d) { assert (function); *d = *function; } void function_file (char const ** c) { assert (function); assert (c); *c = dwarf_decl_file (function); } void function_line (int *linep) { assert (function); dwarf_decl_line (function, linep); } bool die_has_pc (Dwarf_Die & die, Dwarf_Addr pc) { int res = dwarf_haspc (&die, pc); // dwarf_ranges will return -1 if a function die has no DW_AT_ranges // if (res == -1) // dwarf_assert ("dwarf_haspc", res); return res == 1; } static void loc2c_error (void *, const char *fmt, ...) { const char *msg = "?"; char *tmp = NULL; int rc; va_list ap; va_start (ap, fmt); rc = vasprintf (& tmp, fmt, ap); if (rc < 0) msg = "?"; else msg = tmp; va_end (ap); throw semantic_error (msg); } // This function generates code used for addressing computations of // target variables. void emit_address (struct obstack *pool, Dwarf_Addr address) { #if 0 // The easy but incorrect way is to just print a hard-wired // constant. obstack_printf (pool, "%#" PRIx64 "UL", address); #endif // Turn this address into a section-relative offset if it should be one. // We emit a comment approximating the variable+offset expression that // relocatable module probing code will need to have. Dwfl_Module *mod = dwfl_addrmodule (dwfl, address); dwfl_assert ("dwfl_addrmodule", mod); const char *modname = dwfl_module_info (mod, NULL, NULL, NULL, NULL, NULL, NULL, NULL); int n = dwfl_module_relocations (mod); dwfl_assert ("dwfl_module_relocations", n >= 0); Dwarf_Addr reloc_address = address; int i = dwfl_module_relocate_address (mod, &reloc_address); dwfl_assert ("dwfl_module_relocate_address", i >= 0); dwfl_assert ("dwfl_module_info", modname); const char *secname = dwfl_module_relocation_info (mod, i, NULL); if (sess.verbose > 2) { clog << "emit dwarf addr 0x" << hex << address << dec << " => module " << modname << " section " << (secname ?: "null") << " relocaddr 0x" << hex << reloc_address << dec << endl; } if (n > 0 && !(n == 1 && secname == NULL)) { dwfl_assert ("dwfl_module_relocation_info", secname); if (n > 1 || secname[0] != '\0') { // This gives us the module name, and section name within the // module, for a kernel module (or other ET_REL module object). obstack_printf (pool, "({ static unsigned long addr = 0; "); obstack_printf (pool, "if (addr==0) addr = _stp_module_relocate (\"%s\",\"%s\",%#" PRIx64 "); ", modname, secname, reloc_address); obstack_printf (pool, "addr; })"); } else if (n == 1 && module_name == TOK_KERNEL && secname[0] == '\0') { // elfutils' way of telling us that this is a relocatable kernel address, which we // need to treat the same way here as dwarf_query::add_probe_point does: _stext. address -= sess.sym_stext; secname = "_stext"; obstack_printf (pool, "({ static unsigned long addr = 0; "); obstack_printf (pool, "if (addr==0) addr = _stp_module_relocate (\"%s\",\"%s\",%#" PRIx64 "); ", modname, secname, address); // PR10000 NB: not reloc_address obstack_printf (pool, "addr; })"); } else { throw semantic_error ("cannot relocate user-space dso (?) address"); #if 0 // This would happen for a Dwfl_Module that's a user-level DSO. obstack_printf (pool, " /* %s+%#" PRIx64 " */", modname, address); #endif } } else obstack_printf (pool, "%#" PRIx64 "UL", address); // assume as constant } static void loc2c_emit_address (void *arg, struct obstack *pool, Dwarf_Addr address) { dwflpp *dwfl = (dwflpp *) arg; dwfl->emit_address (pool, address); } void print_locals(Dwarf_Die *die, ostream &o) { // Try to get the first child of die. Dwarf_Die child; if (dwarf_child (die, &child) == 0) { do { const char *name; // Output each sibling's name (that is a variable or // parameter) to 'o'. switch (dwarf_tag (&child)) { case DW_TAG_variable: case DW_TAG_formal_parameter: name = dwarf_diename (&child); if (name) o << " " << name; break; default: break; } } while (dwarf_siblingof (&child, &child) == 0); } } Dwarf_Attribute * find_variable_and_frame_base (Dwarf_Die *scope_die, Dwarf_Addr pc, string const & local, const target_symbol *e, Dwarf_Die *vardie, Dwarf_Attribute *fb_attr_mem) { Dwarf_Die *scopes; int nscopes = 0; Dwarf_Attribute *fb_attr = NULL; assert (cu); nscopes = dwarf_getscopes (cu, pc, &scopes); int sidx; // if pc and scope_die are disjoint then we need dwarf_getscopes_die for (sidx = 0; sidx < nscopes; sidx++) if (scopes[sidx].addr == scope_die->addr) break; if (sidx == nscopes) nscopes = dwarf_getscopes_die (scope_die, &scopes); if (nscopes <= 0) { throw semantic_error ("unable to find any scopes containing " + lex_cast_hex(pc) + ((scope_die == NULL) ? "" : (string (" in ") + (dwarf_diename(scope_die) ?: "") + "(" + (dwarf_diename(cu) ?: "") + ")")) + " while searching for local '" + local + "'", e->tok); } int declaring_scope = dwarf_getscopevar (scopes, nscopes, local.c_str(), 0, NULL, 0, 0, vardie); if (declaring_scope < 0) { stringstream alternatives; print_locals (scopes, alternatives); throw semantic_error ("unable to find local '" + local + "'" + " near pc " + lex_cast_hex(pc) + ((scope_die == NULL) ? "" : (string (" in ") + (dwarf_diename(scope_die) ?: "") + "(" + (dwarf_diename(cu) ?: "") + ")")) + (alternatives.str() == "" ? "" : (" (alternatives:" + alternatives.str () + ")")), e->tok); } for (int inner = 0; inner < nscopes; ++inner) { switch (dwarf_tag (&scopes[inner])) { default: continue; case DW_TAG_subprogram: case DW_TAG_entry_point: case DW_TAG_inlined_subroutine: /* XXX */ if (inner >= declaring_scope) fb_attr = dwarf_attr_integrate (&scopes[inner], DW_AT_frame_base, fb_attr_mem); break; } } return fb_attr; } struct location * translate_location(struct obstack *pool, Dwarf_Attribute *attr, Dwarf_Addr pc, Dwarf_Attribute *fb_attr, struct location **tail, const target_symbol *e) { Dwarf_Op *expr; size_t len; /* PR9768: formerly, we added pc+module_bias here. However, that bias value is not present in the pc value by the time we get it, so adding it would result in false negatives of variable reachibility. In other instances further below, the c_translate_FOO functions, the module_bias value used to be passed in, but instead should now be zero for the same reason. */ switch (dwarf_getlocation_addr (attr, pc /*+ module_bias*/, &expr, &len, 1)) { case 1: /* Should always happen. */ if (len > 0) break; /* Fall through. */ case 0: /* Shouldn't happen. */ throw semantic_error ("not accessible at this address", e->tok); default: /* Shouldn't happen. */ case -1: throw semantic_error (string ("dwarf_getlocation_addr failed") + string (dwarf_errmsg (-1)), e->tok); } return c_translate_location (pool, &loc2c_error, this, &loc2c_emit_address, 1, 0 /* PR9768 */, pc, expr, len, tail, fb_attr); } void print_members(Dwarf_Die *vardie, ostream &o) { const int typetag = dwarf_tag (vardie); if (typetag != DW_TAG_structure_type && typetag != DW_TAG_union_type) { o << " Error: " << (dwarf_diename_integrate (vardie) ?: "") << " isn't a struct/union"; return; } // Try to get the first child of vardie. Dwarf_Die die_mem; Dwarf_Die *die = &die_mem; switch (dwarf_child (vardie, die)) { case 1: // No children. o << ((typetag == DW_TAG_union_type) ? " union " : " struct ") << (dwarf_diename_integrate (die) ?: "") << " is empty"; break; case -1: // Error. default: // Shouldn't happen. o << ((typetag == DW_TAG_union_type) ? " union " : " struct ") << (dwarf_diename_integrate (die) ?: "") << ": " << dwarf_errmsg (-1); break; case 0: // Success. break; } // Output each sibling's name to 'o'. while (dwarf_tag (die) == DW_TAG_member) { const char *member = dwarf_diename_integrate (die) ; if ( member != NULL ) o << " " << member; else { Dwarf_Die temp_die = *die; Dwarf_Attribute temp_attr ; if (!dwarf_attr_integrate (&temp_die, DW_AT_type, &temp_attr)) { clog<<"\n Error in obtaining type attribute for " <<(dwarf_diename(&temp_die)?:""); return ; } if ( ! dwarf_formref_die (&temp_attr,&temp_die)) { clog<<"\n Error in decoding type attribute for " <<(dwarf_diename(&temp_die)?:""); return ; } print_members(&temp_die,o); } if (dwarf_siblingof (die, &die_mem) != 0) break; } } Dwarf_Die * translate_components(struct obstack *pool, struct location **tail, Dwarf_Addr pc, const target_symbol *e, Dwarf_Die *vardie, Dwarf_Die *die_mem, Dwarf_Attribute *attr_mem) { Dwarf_Die *die = NULL; Dwarf_Die struct_die; Dwarf_Attribute temp_attr; unsigned i = 0; if (vardie) *die_mem = *vardie; if (e->components.empty()) return die_mem; static unsigned int func_call_level ; static unsigned int dwarf_error_flag ; // indicates current error is dwarf error static unsigned int dwarf_error_count ; // keeps track of no of dwarf errors static semantic_error saved_dwarf_error(""); while (i < e->components.size()) { /* XXX: This would be desirable, but we don't get the target_symbol token, and printing that gives us the file:line number too early anyway. */ #if 0 // Emit a marker to note which field is being access-attempted, to give // better error messages if deref() fails. string piece = string(...target_symbol token...) + string ("#") + stringify(components[i].second); obstack_printf (pool, "c->last_stmt = %s;", lex_cast_qstring(piece).c_str()); #endif die = die ? dwarf_formref_die (attr_mem, die_mem) : die_mem; const int typetag = dwarf_tag (die); switch (typetag) { case DW_TAG_typedef: case DW_TAG_const_type: case DW_TAG_volatile_type: /* Just iterate on the referent type. */ break; case DW_TAG_pointer_type: if (e->components[i].first == target_symbol::comp_literal_array_index) throw semantic_error ("cannot index pointer", e->tok); // XXX: of course, we should support this the same way C does, // by explicit pointer arithmetic etc. PR4166. c_translate_pointer (pool, 1, 0 /* PR9768*/, die, tail); break; case DW_TAG_array_type: if (e->components[i].first == target_symbol::comp_literal_array_index) { c_translate_array (pool, 1, 0 /* PR9768 */, die, tail, NULL, lex_cast(e->components[i].second)); ++i; } else throw semantic_error("bad field '" + e->components[i].second + "' for array type", e->tok); break; case DW_TAG_structure_type: case DW_TAG_union_type: struct_die = *die; if (dwarf_hasattr(die, DW_AT_declaration)) { Dwarf_Die *tmpdie = dwflpp::declaration_resolve(dwarf_diename(die)); if (tmpdie == NULL) throw semantic_error ("unresolved struct " + string (dwarf_diename_integrate (die) ?: ""), e->tok); *die_mem = *tmpdie; } switch (dwarf_child (die, die_mem)) { case 1: /* No children. */ return NULL; case -1: /* Error. */ default: /* Shouldn't happen */ throw semantic_error (string (typetag == DW_TAG_union_type ? "union" : "struct") + string (dwarf_diename_integrate (die) ?: "") + string (dwarf_errmsg (-1)), e->tok); break; case 0: break; } while (dwarf_tag (die) != DW_TAG_member || ({ const char *member = dwarf_diename_integrate (die); member == NULL || string(member) != e->components[i].second; })) { if ( dwarf_diename (die) == NULL ) // handling Anonymous structs/unions { Dwarf_Die temp_die = *die; Dwarf_Die temp_die_2; try { if (!dwarf_attr_integrate (&temp_die, DW_AT_type, &temp_attr)) { dwarf_error_flag ++ ; dwarf_error_count ++; throw semantic_error(" Error in obtaining type attribute for "+ string(dwarf_diename(&temp_die)?:""), e->tok); } if ( !dwarf_formref_die (&temp_attr, &temp_die)) { dwarf_error_flag ++ ; dwarf_error_count ++; throw semantic_error(" Error in decoding DW_AT_type attribute for " + string(dwarf_diename(&temp_die)?:""), e->tok); } func_call_level ++ ; Dwarf_Die *result_die = translate_components(pool, tail, pc, e, &temp_die, &temp_die_2, &temp_attr); func_call_level -- ; if (result_die != NULL) { memcpy(die_mem, &temp_die_2, sizeof(Dwarf_Die)); memcpy(attr_mem, &temp_attr, sizeof(Dwarf_Attribute)); return die_mem; } } catch (const semantic_error& e) { if ( !dwarf_error_flag ) //not a dwarf error throw; else { dwarf_error_flag = 0 ; saved_dwarf_error = e ; } } } if (dwarf_siblingof (die, die_mem) != 0) { if ( func_call_level == 0 && dwarf_error_count ) // this is parent call & a dwarf error has been reported in a branch somewhere throw semantic_error( saved_dwarf_error ); else return NULL; } } if (dwarf_attr_integrate (die, DW_AT_data_member_location, attr_mem) == NULL) { /* Union members don't usually have a location, but just use the containing union's location. */ if (typetag != DW_TAG_union_type) throw semantic_error ("no location for field '" + e->components[i].second + "' :" + string(dwarf_errmsg (-1)), e->tok); } else translate_location (pool, attr_mem, pc, NULL, tail, e); ++i; break; case DW_TAG_base_type: throw semantic_error ("field '" + e->components[i].second + "' vs. base type " + string(dwarf_diename_integrate (die) ?: ""), e->tok); break; case -1: throw semantic_error ("cannot find type: " + string(dwarf_errmsg (-1)), e->tok); break; default: throw semantic_error (string(dwarf_diename_integrate (die) ?: "") + ": unexpected type tag " + lex_cast(dwarf_tag (die)), e->tok); break; } /* Now iterate on the type in DIE's attribute. */ if (dwarf_attr_integrate (die, DW_AT_type, attr_mem) == NULL) throw semantic_error ("cannot get type of field: " + string(dwarf_errmsg (-1)), e->tok); } return die; } Dwarf_Die * resolve_unqualified_inner_typedie (Dwarf_Die *typedie_mem, Dwarf_Attribute *attr_mem, const target_symbol *e) { Dwarf_Die *typedie; int typetag = 0; while (1) { typedie = dwarf_formref_die (attr_mem, typedie_mem); if (typedie == NULL) throw semantic_error ("cannot get type: " + string(dwarf_errmsg (-1)), e->tok); typetag = dwarf_tag (typedie); if (typetag != DW_TAG_typedef && typetag != DW_TAG_const_type && typetag != DW_TAG_volatile_type) break; if (dwarf_attr_integrate (typedie, DW_AT_type, attr_mem) == NULL) throw semantic_error ("cannot get type of pointee: " + string(dwarf_errmsg (-1)), e->tok); } return typedie; } void translate_final_fetch_or_store (struct obstack *pool, struct location **tail, Dwarf_Addr module_bias, Dwarf_Die *die, Dwarf_Attribute *attr_mem, bool lvalue, const target_symbol *e, string &, string &, exp_type & ty) { /* First boil away any qualifiers associated with the type DIE of the final location to be accessed. */ Dwarf_Die typedie_mem; Dwarf_Die *typedie; int typetag; char const *dname; string diestr; typedie = resolve_unqualified_inner_typedie (&typedie_mem, attr_mem, e); typetag = dwarf_tag (typedie); /* Then switch behavior depending on the type of fetch/store we want, and the type and pointer-ness of the final location. */ switch (typetag) { default: dname = dwarf_diename(die); diestr = (dname != NULL) ? dname : ""; throw semantic_error ("unsupported type tag " + lex_cast(typetag) + " for " + diestr, e->tok); break; case DW_TAG_structure_type: case DW_TAG_union_type: dname = dwarf_diename(die); diestr = (dname != NULL) ? dname : ""; throw semantic_error ("struct/union '" + diestr + "' is being accessed instead of a member of the struct/union", e->tok); break; case DW_TAG_enumeration_type: case DW_TAG_base_type: // Reject types we can't handle in systemtap { dname = dwarf_diename(die); diestr = (dname != NULL) ? dname : ""; Dwarf_Attribute encoding_attr; Dwarf_Word encoding = (Dwarf_Word) -1; dwarf_formudata (dwarf_attr_integrate (typedie, DW_AT_encoding, &encoding_attr), & encoding); if (encoding < 0) { // clog << "bad type1 " << encoding << " diestr" << endl; throw semantic_error ("unsupported type (mystery encoding " + lex_cast(encoding) + ")" + " for " + diestr, e->tok); } if (encoding == DW_ATE_float || encoding == DW_ATE_complex_float /* XXX || many others? */) { // clog << "bad type " << encoding << " diestr" << endl; throw semantic_error ("unsupported type (encoding " + lex_cast(encoding) + ")" + " for " + diestr, e->tok); } } ty = pe_long; if (lvalue) c_translate_store (pool, 1, 0 /* PR9768 */, die, typedie, tail, "THIS->value"); else c_translate_fetch (pool, 1, 0 /* PR9768 */, die, typedie, tail, "THIS->__retvalue"); break; case DW_TAG_array_type: case DW_TAG_pointer_type: { Dwarf_Die pointee_typedie_mem; Dwarf_Die *pointee_typedie; Dwarf_Word pointee_encoding; Dwarf_Word pointee_byte_size = 0; pointee_typedie = resolve_unqualified_inner_typedie (&pointee_typedie_mem, attr_mem, e); if (dwarf_attr_integrate (pointee_typedie, DW_AT_byte_size, attr_mem)) dwarf_formudata (attr_mem, &pointee_byte_size); dwarf_formudata (dwarf_attr_integrate (pointee_typedie, DW_AT_encoding, attr_mem), &pointee_encoding); if (lvalue) { ty = pe_long; if (typetag == DW_TAG_array_type) throw semantic_error ("cannot write to array address", e->tok); assert (typetag == DW_TAG_pointer_type); c_translate_pointer_store (pool, 1, 0 /* PR9768 */, typedie, tail, "THIS->value"); } else { // We have the pointer: cast it to an integral type via &(*(...)) // NB: per bug #1187, at one point char*-like types were // automagically converted here to systemtap string values. // For several reasons, this was taken back out, leaving // pointer-to-string "conversion" (copying) to tapset functions. ty = pe_long; if (typetag == DW_TAG_array_type) c_translate_array (pool, 1, 0 /* PR9768 */, typedie, tail, NULL, 0); else c_translate_pointer (pool, 1, 0 /* PR9768 */, typedie, tail); c_translate_addressof (pool, 1, 0 /* PR9768 */, NULL, pointee_typedie, tail, "THIS->__retvalue"); } } break; } } string express_as_string (string prelude, string postlude, struct location *head) { size_t bufsz = 1024; char *buf = static_cast(malloc(bufsz)); assert(buf); FILE *memstream = open_memstream (&buf, &bufsz); assert(memstream); fprintf(memstream, "{\n"); fprintf(memstream, "%s", prelude.c_str()); bool deref = c_emit_location (memstream, head, 1); fprintf(memstream, "%s", postlude.c_str()); fprintf(memstream, " goto out;\n"); // dummy use of deref_fault label, to disable warning if deref() not used fprintf(memstream, "if (0) goto deref_fault;\n"); // XXX: deref flag not reliable; emit fault label unconditionally (void) deref; fprintf(memstream, "deref_fault:\n" " goto out;\n"); fprintf(memstream, "}\n"); fclose (memstream); string result(buf); free (buf); return result; } string literal_stmt_for_local (Dwarf_Die *scope_die, Dwarf_Addr pc, string const & local, const target_symbol *e, bool lvalue, exp_type & ty) { Dwarf_Die vardie; Dwarf_Attribute fb_attr_mem, *fb_attr = NULL; fb_attr = find_variable_and_frame_base (scope_die, pc, local, e, &vardie, &fb_attr_mem); if (sess.verbose>2) clog << "finding location for local '" << local << "' near address 0x" << hex << pc << ", module bias 0x" << module_bias << dec << "\n"; Dwarf_Attribute attr_mem; if (dwarf_attr_integrate (&vardie, DW_AT_location, &attr_mem) == NULL) { throw semantic_error("failed to retrieve location " "attribute for local '" + local + "' (dieoffset: " + lex_cast_hex(dwarf_dieoffset (&vardie)) + ")", e->tok); } #define obstack_chunk_alloc malloc #define obstack_chunk_free free struct obstack pool; obstack_init (&pool); struct location *tail = NULL; /* Given $foo->bar->baz[NN], translate the location of foo. */ struct location *head = translate_location (&pool, &attr_mem, pc, fb_attr, &tail, e); if (dwarf_attr_integrate (&vardie, DW_AT_type, &attr_mem) == NULL) throw semantic_error("failed to retrieve type " "attribute for local '" + local + "'", e->tok); /* Translate the ->bar->baz[NN] parts. */ Dwarf_Die die_mem, *die = dwarf_formref_die (&attr_mem, &die_mem); die = translate_components (&pool, &tail, pc, e, die, &die_mem, &attr_mem); if(!die) { die = dwarf_formref_die (&attr_mem, &die_mem); stringstream alternatives; if (die != NULL) print_members(die,alternatives); throw semantic_error("unable to find local '" + local + "'" + " near pc " + lex_cast_hex(pc) + (alternatives.str() == "" ? "" : (" (alternatives:" + alternatives.str () + ")")), e->tok); } /* Translate the assignment part, either x = $foo->bar->baz[NN] or $foo->bar->baz[NN] = x */ string prelude, postlude; translate_final_fetch_or_store (&pool, &tail, module_bias, die, &attr_mem, lvalue, e, prelude, postlude, ty); /* Write the translation to a string. */ return express_as_string(prelude, postlude, head); } string literal_stmt_for_return (Dwarf_Die *scope_die, Dwarf_Addr pc, const target_symbol *e, bool lvalue, exp_type & ty) { if (sess.verbose>2) clog << "literal_stmt_for_return: finding return value for " << (dwarf_diename(scope_die) ?: "") << "(" << (dwarf_diename(cu) ?: "") << ")\n"; struct obstack pool; obstack_init (&pool); struct location *tail = NULL; /* Given $return->bar->baz[NN], translate the location of return. */ const Dwarf_Op *locops; int nlocops = dwfl_module_return_value_location (module, scope_die, &locops); if (nlocops < 0) { throw semantic_error("failed to retrieve return value location" " for " + string(dwarf_diename(scope_die) ?: "") + "(" + string(dwarf_diename(cu) ?: "") + ")", e->tok); } // the function has no return value (e.g. "void" in C) else if (nlocops == 0) { throw semantic_error("function " + string(dwarf_diename(scope_die) ?: "") + "(" + string(dwarf_diename(cu) ?: "") + ") has no return value", e->tok); } struct location *head = c_translate_location (&pool, &loc2c_error, this, &loc2c_emit_address, 1, 0 /* PR9768 */, pc, locops, nlocops, &tail, NULL); /* Translate the ->bar->baz[NN] parts. */ Dwarf_Attribute attr_mem; if (dwarf_attr_integrate (scope_die, DW_AT_type, &attr_mem) == NULL) throw semantic_error("failed to retrieve return value type attribute for " + string(dwarf_diename(scope_die) ?: "") + "(" + string(dwarf_diename(cu) ?: "") + ")", e->tok); Dwarf_Die die_mem, *die = dwarf_formref_die (&attr_mem, &die_mem); die = translate_components (&pool, &tail, pc, e, die, &die_mem, &attr_mem); if(!die) { die = dwarf_formref_die (&attr_mem, &die_mem); stringstream alternatives; if (die != NULL) print_members(die,alternatives); throw semantic_error("unable to find return value" " near pc " + lex_cast_hex(pc) + " for " + string(dwarf_diename(scope_die) ?: "") + "(" + string(dwarf_diename(cu) ?: "") + ")" + (alternatives.str() == "" ? "" : (" (alternatives:" + alternatives.str () + ")")), e->tok); } /* Translate the assignment part, either x = $return->bar->baz[NN] or $return->bar->baz[NN] = x */ string prelude, postlude; translate_final_fetch_or_store (&pool, &tail, module_bias, die, &attr_mem, lvalue, e, prelude, postlude, ty); /* Write the translation to a string. */ return express_as_string(prelude, postlude, head); } string literal_stmt_for_pointer (Dwarf_Die *type_die, const target_symbol *e, bool lvalue, exp_type & ty) { if (sess.verbose>2) clog << "literal_stmt_for_pointer: finding value for " << (dwarf_diename(type_die) ?: "") << "(" << (dwarf_diename(cu) ?: "") << ")\n"; struct obstack pool; obstack_init (&pool); struct location *head = c_translate_argument (&pool, &loc2c_error, this, &loc2c_emit_address, 1, "THIS->pointer"); struct location *tail = head; /* Translate the ->bar->baz[NN] parts. */ Dwarf_Attribute attr_mem; Dwarf_Die die_mem, *die = NULL; die = translate_components (&pool, &tail, 0, e, type_die, &die_mem, &attr_mem); if(!die) { die = dwarf_formref_die (&attr_mem, &die_mem); stringstream alternatives; print_members(die ?: type_die, alternatives); throw semantic_error("unable to find member for struct " + string(dwarf_diename(die ?: type_die) ?: "") + (alternatives.str() == "" ? "" : (" (alternatives:" + alternatives.str () + ")")), e->tok); } /* Translate the assignment part, either x = (THIS->pointer)->bar->baz[NN] or (THIS->pointer)->bar->baz[NN] = x */ string prelude, postlude; translate_final_fetch_or_store (&pool, &tail, module_bias, die, &attr_mem, lvalue, e, prelude, postlude, ty); /* Write the translation to a string. */ return express_as_string(prelude, postlude, head); } ~dwflpp() { if (dwfl) dwfl_end(dwfl); } }; enum function_spec_type { function_alone, function_and_file, function_file_and_line }; struct dwarf_builder; // XXX: This class is a candidate for subclassing to separate // the relocation vs non-relocation variants. Likewise for // kprobe vs kretprobe variants. struct dwarf_derived_probe: public derived_probe { dwarf_derived_probe (const string& function, const string& filename, int line, const string& module, const string& section, Dwarf_Addr dwfl_addr, Dwarf_Addr addr, dwarf_query & q, Dwarf_Die* scope_die); string module; string section; Dwarf_Addr addr; bool has_return; bool has_maxactive; long maxactive_val; bool access_vars; void printsig (std::ostream &o) const; void join_group (systemtap_session& s); void emit_probe_local_init(translator_output * o); // Pattern registration helpers. static void register_statement_variants(match_node * root, dwarf_builder * dw); static void register_function_variants(match_node * root, dwarf_builder * dw); static void register_function_and_statement_variants(match_node * root, dwarf_builder * dw); static void register_patterns(systemtap_session& s); }; struct uprobe_derived_probe: public derived_probe { bool return_p; string module; // * => unrestricted int pid; // 0 => unrestricted string section; // empty => absolute address Dwarf_Addr address; // bool has_maxactive; // long maxactive_val; uprobe_derived_probe (const string& function, const string& filename, int line, const string& module, int pid, const string& section, Dwarf_Addr dwfl_addr, Dwarf_Addr addr, dwarf_query & q, Dwarf_Die* scope_die); // alternate constructor for process(PID).statement(ADDR).absolute uprobe_derived_probe (probe *base, probe_point *location, int pid, Dwarf_Addr addr, bool return_p); void printsig (std::ostream &o) const; void join_group (systemtap_session& s); }; struct dwarf_derived_probe_group: public derived_probe_group { private: multimap probes_by_module; typedef multimap::iterator p_b_m_iterator; public: void enroll (dwarf_derived_probe* probe); void emit_module_decls (systemtap_session& s); void emit_module_init (systemtap_session& s); void emit_module_exit (systemtap_session& s); }; // Helper struct to thread through the dwfl callbacks. struct base_query { base_query(dwflpp & dw, literal_map_t const & params); base_query(dwflpp & dw, const string & module_val); virtual ~base_query() {} systemtap_session & sess; dwflpp & dw; // Parameter extractors. static bool has_null_param(literal_map_t const & params, string const & k); static bool get_string_param(literal_map_t const & params, string const & k, string & v); static bool get_number_param(literal_map_t const & params, string const & k, long & v); static bool get_number_param(literal_map_t const & params, string const & k, Dwarf_Addr & v); // Extracted parameters. bool has_kernel; bool has_module; bool has_process; string module_val; // has_kernel => module_val = "kernel" virtual void handle_query_module() = 0; }; base_query::base_query(dwflpp & dw, literal_map_t const & params): sess(dw.sess), dw(dw) { has_kernel = has_null_param (params, TOK_KERNEL); if (has_kernel) module_val = "kernel"; has_module = get_string_param (params, TOK_MODULE, module_val); if (has_module) has_process = false; else { has_process = get_string_param(params, TOK_PROCESS, module_val); if (has_process) module_val = find_executable (module_val); } assert (has_kernel || has_process || has_module); } base_query::base_query(dwflpp & dw, const string & module_val) : sess(dw.sess), dw(dw), module_val(module_val) { // NB: This uses '/' to distinguish between kernel modules and userspace, // which means that userspace modules won't get any PATH searching. if (module_val.find('/') == string::npos) { has_kernel = (module_val == TOK_KERNEL); has_module = !has_kernel; has_process = false; } else { has_kernel = has_module = false; has_process = true; } } bool base_query::has_null_param(literal_map_t const & params, string const & k) { return derived_probe_builder::has_null_param(params, k); } bool base_query::get_string_param(literal_map_t const & params, string const & k, string & v) { return derived_probe_builder::get_param (params, k, v); } bool base_query::get_number_param(literal_map_t const & params, string const & k, long & v) { int64_t value; bool present = derived_probe_builder::get_param (params, k, value); v = (long) value; return present; } bool base_query::get_number_param(literal_map_t const & params, string const & k, Dwarf_Addr & v) { int64_t value; bool present = derived_probe_builder::get_param (params, k, value); v = (Dwarf_Addr) value; return present; } struct dwarf_query : public base_query { dwarf_query(systemtap_session & sess, probe * base_probe, probe_point * base_loc, dwflpp & dw, literal_map_t const & params, vector & results); vector & results; probe * base_probe; probe_point * base_loc; virtual void handle_query_module(); void query_module_dwarf(); void query_module_symtab(); void add_probe_point(string const & funcname, char const * filename, int line, Dwarf_Die *scope_die, Dwarf_Addr addr); string get_blacklist_section(Dwarf_Addr addr); regex_t blacklist_func; // function/statement probes regex_t blacklist_func_ret; // only for .return probes regex_t blacklist_file; // file name void build_blacklist(); bool blacklisted_p(const string& funcname, const string& filename, int line, const string& module, const string& section, Dwarf_Addr addr); // Extracted parameters. string function_val; bool has_function_str; bool has_statement_str; bool has_function_num; bool has_statement_num; string statement_str_val; string function_str_val; Dwarf_Addr statement_num_val; Dwarf_Addr function_num_val; bool has_call; bool has_inline; bool has_return; bool has_maxactive; long maxactive_val; bool has_label; string label_val; bool has_relative; long relative_val; bool has_absolute; bool has_mark; enum dbinfo_reqt dbinfo_reqt; enum dbinfo_reqt assess_dbinfo_reqt(); function_spec_type parse_function_spec(string & spec); function_spec_type spec_type; string function; string file; line_t line_type; int line[2]; bool query_done; // Found exact match set filtered_srcfiles; // Map official entrypc -> func_info object inline_instance_map_t filtered_inlines; func_info_map_t filtered_functions; bool choose_next_line; Dwarf_Addr entrypc_for_next_line; }; // This little test routine represents an unfortunate breakdown in // abstraction between dwflpp (putatively, a layer right on top of // elfutils), and dwarf_query (interpreting a systemtap probe point). // It arises because we sometimes try to fix up slightly-off // .statement() probes (something we find out in fairly low-level). // // An alternative would be to put some more intelligence into query_cu(), // and have it print additional suggestions after finding that // q->dw.iterate_over_srcfile_lines resulted in no new finished_results. bool dwflpp::has_single_line_record (dwarf_query * q, char const * srcfile, int lineno) { if (lineno < 0) return false; Dwarf_Line **srcsp = NULL; size_t nsrcs = 0; dwarf_assert ("dwarf_getsrc_file", dwarf_getsrc_file (module_dwarf, srcfile, lineno, 0, &srcsp, &nsrcs)); if (nsrcs != 1) { if (sess.verbose>4) clog << "alternative line " << lineno << " rejected: nsrcs=" << nsrcs << endl; return false; } // We also try to filter out lines that leave the selected // functions (if any). dwarf_line_t line(srcsp[0]); Dwarf_Addr addr = line.addr(); for (func_info_map_t::iterator i = q->filtered_functions.begin(); i != q->filtered_functions.end(); ++i) { if (q->dw.die_has_pc (i->die, addr)) { if (q->sess.verbose>4) clog << "alternative line " << lineno << " accepted: fn=" << i->name << endl; return true; } } for (inline_instance_map_t::iterator i = q->filtered_inlines.begin(); i != q->filtered_inlines.end(); ++i) { if (q->dw.die_has_pc (i->die, addr)) { if (sess.verbose>4) clog << "alternative line " << lineno << " accepted: ifn=" << i->name << endl; return true; } } if (sess.verbose>4) clog << "alternative line " << lineno << " rejected: leaves selected fns" << endl; return false; } /* This basically only goes one level down from the compile unit so it * only picks up top level stuff (i.e. nothing in a lower scope) */ int dwflpp::iterate_over_globals (int (* callback)(Dwarf_Die *, void *), void * data) { int rc = DWARF_CB_OK; Dwarf_Die die; assert (module); assert (cu); assert (dwarf_tag(cu) == DW_TAG_compile_unit); if (dwarf_child(cu, &die) != 0) return rc; do { /* We're only currently looking for structures and unions, * although other types of declarations exist */ if (dwarf_tag(&die) != DW_TAG_structure_type && dwarf_tag(&die) != DW_TAG_union_type) continue; rc = (*callback)(&die, data); if (rc != DWARF_CB_OK) break; } while (dwarf_siblingof(&die, &die) == 0); return rc; } int dwflpp::iterate_over_functions (int (* callback)(Dwarf_Die * func, base_query * q), base_query * q, const string& function, bool has_statement_num) { int rc = DWARF_CB_OK; assert (module); assert (cu); string key = module_name + ":" + cu_name; cu_function_cache_t *v = cu_function_cache[key]; if (v == 0) { v = new cu_function_cache_t; cu_function_cache[key] = v; dwarf_getfuncs (cu, cu_function_caching_callback, v, 0); if (q->sess.verbose > 4) clog << "function cache " << key << " size " << v->size() << endl; } string subkey = function; if (v->find(subkey) != v->end()) { Dwarf_Die die = v->find(subkey)->second; if (q->sess.verbose > 4) clog << "function cache " << key << " hit " << subkey << endl; return (*callback)(& die, q); } else if (name_has_wildcard (subkey)) { for (cu_function_cache_t::iterator it = v->begin(); it != v->end(); it++) { if (pending_interrupts) return DWARF_CB_ABORT; string func_name = it->first; Dwarf_Die die = it->second; if (function_name_matches_pattern (func_name, subkey)) { if (q->sess.verbose > 4) clog << "function cache " << key << " match " << func_name << " vs " << subkey << endl; rc = (*callback)(& die, q); if (rc != DWARF_CB_OK) break; } } } else if (has_statement_num) // searching all for kernel.statement { for (cu_function_cache_t::iterator it = v->begin(); it != v->end(); it++) { Dwarf_Die die = it->second; rc = (*callback)(& die, q); if (rc != DWARF_CB_OK) break; } } else // not a wildcard and no match in this CU { // do nothing } return rc; } struct dwarf_builder: public derived_probe_builder { dwflpp *kern_dw; map user_dw; dwarf_builder(): kern_dw(0) {} /* NB: not virtual, so can be called from dtor too: */ void dwarf_build_no_more (bool verbose) { if (kern_dw) { if (verbose) clog << "dwarf_builder releasing kernel dwflpp" << endl; delete kern_dw; kern_dw = 0; } for (map::iterator udi = user_dw.begin(); udi != user_dw.end(); udi ++) { if (verbose) clog << "dwarf_builder releasing user dwflpp " << udi->first << endl; delete udi->second; } user_dw.erase (user_dw.begin(), user_dw.end()); } void build_no_more (systemtap_session &s) { dwarf_build_no_more (s.verbose > 3); } ~dwarf_builder() { dwarf_build_no_more (false); } virtual void build(systemtap_session & sess, probe * base, probe_point * location, literal_map_t const & parameters, vector & finished_results); }; dwarf_query::dwarf_query(systemtap_session & sess, probe * base_probe, probe_point * base_loc, dwflpp & dw, literal_map_t const & params, vector & results) : base_query(dw, params), results(results), base_probe(base_probe), base_loc(base_loc) { // Reduce the query to more reasonable semantic values (booleans, // extracted strings, numbers, etc). has_function_str = get_string_param(params, TOK_FUNCTION, function_str_val); has_function_num = get_number_param(params, TOK_FUNCTION, function_num_val); has_statement_str = get_string_param(params, TOK_STATEMENT, statement_str_val); has_statement_num = get_number_param(params, TOK_STATEMENT, statement_num_val); has_label = get_string_param(params, TOK_LABEL, label_val); has_call = has_null_param(params, TOK_CALL); has_inline = has_null_param(params, TOK_INLINE); has_return = has_null_param(params, TOK_RETURN); has_maxactive = get_number_param(params, TOK_MAXACTIVE, maxactive_val); has_absolute = has_null_param(params, TOK_ABSOLUTE); has_mark = false; if (has_function_str) spec_type = parse_function_spec(function_str_val); else if (has_statement_str) spec_type = parse_function_spec(statement_str_val); build_blacklist(); // XXX: why not reuse amongst dwarf_query instances? dbinfo_reqt = assess_dbinfo_reqt(); query_done = false; } void dwarf_query::query_module_dwarf() { if (has_function_num || has_statement_num) { // If we have module("foo").function(0xbeef) or // module("foo").statement(0xbeef), the address is relative // to the start of the module, so we seek the function // number plus the module's bias. Dwarf_Addr addr; if (has_function_num) addr = function_num_val; else addr = statement_num_val; // NB: we don't need to add the module base address or bias // value here (for reasons that may be coincidental). dw.query_cu_containing_module_address(addr, this); } else { // Otherwise if we have a function("foo") or statement("foo") // specifier, we have to scan over all the CUs looking for // the function(s) in question assert(has_function_str || has_statement_str); dw.iterate_over_cus(&query_cu, this); } } static void query_func_info (Dwarf_Addr entrypc, func_info & fi, dwarf_query * q); void dwarf_query::query_module_symtab() { // Get the symbol table if it's necessary, sufficient, and not already got. if (dbinfo_reqt == dbr_need_dwarf) return; module_info *mi = dw.mod_info; if (dbinfo_reqt == dbr_need_symtab) { if (mi->symtab_status == info_unknown) mi->get_symtab(this); if (mi->symtab_status == info_absent) return; } func_info *fi = NULL; symbol_table *sym_table = mi->sym_table; if (has_function_str) { // Per dwarf_query::assess_dbinfo_reqt()... assert(spec_type == function_alone); if (dw.name_has_wildcard(function_str_val)) { // Until we augment the blacklist sufficently... if (function_str_val.find_first_not_of("*?") == string::npos) { // e.g., kernel.function("*") cerr << "Error: Pattern '" << function_str_val << "' matches every instruction address in the symbol table," << endl << "some of which aren't even functions." << " Please be more precise." << endl; return; } symbol_table::iterator_t iter; for (iter = sym_table->list_by_addr.begin(); iter != sym_table->list_by_addr.end(); ++iter) { fi = *iter; if (!null_die(&fi->die)) continue; // already handled in query_module_dwarf() if (dw.function_name_matches_pattern(fi->name, function_str_val)) query_func_info(fi->addr, *fi, this); } } else { fi = sym_table->lookup_symbol(function_str_val); if (fi && null_die(&fi->die)) query_func_info(fi->addr, *fi, this); } } else { assert(has_function_num || has_statement_num); // Find the "function" in which the indicated address resides. Dwarf_Addr addr = (has_function_num ? function_num_val : statement_num_val); fi = sym_table->get_func_containing_address(addr); if (!fi) { cerr << "Warning: address " << hex << addr << dec << " out of range for module " << dw.module_name; return; } if (!null_die(&fi->die)) { // addr looks like it's in the compilation unit containing // the indicated function, but query_module_dwarf() didn't // match addr to any compilation unit, so addr must be // above that cu's address range. cerr << "Warning: address " << hex << addr << dec << " maps to no known compilation unit in module " << dw.module_name; return; } query_func_info(fi->addr, *fi, this); } } void dwarf_query::handle_query_module() { dw.get_module_dwarf(false, (dbinfo_reqt == dbr_need_dwarf || !sess.consult_symtab)); if (dw.mod_info->dwarf_status == info_present) query_module_dwarf(); // Consult the symbol table if we haven't found all we're looking for. // asm functions can show up in the symbol table but not in dwarf. if (sess.consult_symtab && !query_done) query_module_symtab(); } void dwarf_query::build_blacklist() { // No blacklist for userspace. if (has_process) return; // We build up the regexps in these strings // Add ^ anchors at the front; $ will be added just before regcomp. string blfn = "^("; string blfn_ret = "^("; string blfile = "^("; blfile += "kernel/kprobes.c"; // first alternative, no "|" blfile += "|arch/.*/kernel/kprobes.c"; // Older kernels need ... blfile += "|include/asm/io.h"; blfile += "|include/asm/bitops.h"; // While newer ones need ... blfile += "|arch/.*/include/asm/io.h"; blfile += "|arch/.*/include/asm/bitops.h"; blfile += "|drivers/ide/ide-iops.c"; // XXX: it would be nice if these blacklisted functions were pulled // in dynamically, instead of being statically defined here. // Perhaps it could be populated from script files. A "noprobe // kernel.function("...")" construct might do the trick. // Most of these are marked __kprobes in newer kernels. We list // them here (anyway) so the translator can block them on older // kernels that don't have the __kprobes function decorator. This // also allows detection of problems at translate- rather than // run-time. blfn += "atomic_notifier_call_chain"; // first blfn; no "|" blfn += "|default_do_nmi"; blfn += "|__die"; blfn += "|die_nmi"; blfn += "|do_debug"; blfn += "|do_general_protection"; blfn += "|do_int3"; blfn += "|do_IRQ"; blfn += "|do_page_fault"; blfn += "|do_sparc64_fault"; blfn += "|do_trap"; blfn += "|dummy_nmi_callback"; blfn += "|flush_icache_range"; blfn += "|ia64_bad_break"; blfn += "|ia64_do_page_fault"; blfn += "|ia64_fault"; blfn += "|io_check_error"; blfn += "|mem_parity_error"; blfn += "|nmi_watchdog_tick"; blfn += "|notifier_call_chain"; blfn += "|oops_begin"; blfn += "|oops_end"; blfn += "|program_check_exception"; blfn += "|single_step_exception"; blfn += "|sync_regs"; blfn += "|unhandled_fault"; blfn += "|unknown_nmi_error"; // Lots of locks blfn += "|.*raw_.*lock.*"; blfn += "|.*read_.*lock.*"; blfn += "|.*write_.*lock.*"; blfn += "|.*spin_.*lock.*"; blfn += "|.*rwlock_.*lock.*"; blfn += "|.*rwsem_.*lock.*"; blfn += "|.*mutex_.*lock.*"; blfn += "|raw_.*"; blfn += "|.*seq_.*lock.*"; // atomic functions blfn += "|atomic_.*"; blfn += "|atomic64_.*"; // few other problematic cases blfn += "|get_bh"; blfn += "|put_bh"; // Experimental blfn += "|.*apic.*|.*APIC.*"; blfn += "|.*softirq.*"; blfn += "|.*IRQ.*"; blfn += "|.*_intr.*"; blfn += "|__delay"; blfn += "|.*kernel_text.*"; blfn += "|get_current"; blfn += "|current_.*"; blfn += "|.*exception_tables.*"; blfn += "|.*setup_rt_frame.*"; // PR 5759, CONFIG_PREEMPT kernels blfn += "|.*preempt_count.*"; blfn += "|preempt_schedule"; // These functions don't return, so return probes would never be recovered blfn_ret += "do_exit"; // no "|" blfn_ret += "|sys_exit"; blfn_ret += "|sys_exit_group"; // __switch_to changes "current" on x86_64 and i686, so return probes // would cause kernel panic, and it is marked as "__kprobes" on x86_64 if (sess.architecture == "x86_64") blfn += "|__switch_to"; if (sess.architecture == "i686") blfn_ret += "|__switch_to"; blfn += ")$"; blfn_ret += ")$"; blfile += ")$"; if (sess.verbose > 2) { clog << "blacklist regexps:" << endl; clog << "blfn: " << blfn << endl; clog << "blfn_ret: " << blfn_ret << endl; clog << "blfile: " << blfile << endl; } int rc = regcomp (& blacklist_func, blfn.c_str(), REG_NOSUB|REG_EXTENDED); if (rc) throw semantic_error ("blacklist_func regcomp failed"); rc = regcomp (& blacklist_func_ret, blfn_ret.c_str(), REG_NOSUB|REG_EXTENDED); if (rc) throw semantic_error ("blacklist_func_ret regcomp failed"); rc = regcomp (& blacklist_file, blfile.c_str(), REG_NOSUB|REG_EXTENDED); if (rc) throw semantic_error ("blacklist_file regcomp failed"); } function_spec_type dwarf_query::parse_function_spec(string & spec) { string::const_iterator i = spec.begin(), e = spec.end(); function.clear(); file.clear(); line[0] = 0; line[1] = 0; while (i != e && *i != '@') { if (*i == ':' || *i == '+') goto bad; function += *i++; } if (i == e) { if (sess.verbose>2) clog << "parsed '" << spec << "' -> func '" << function << "'\n"; return function_alone; } if (i++ == e) goto bad; while (i != e && *i != ':' && *i != '+') file += *i++; if (*i == ':') { if (*(i + 1) == '*') line_type = WILDCARD; else line_type = ABSOLUTE; } else if (*i == '+') line_type = RELATIVE; if (i == e) { if (sess.verbose>2) clog << "parsed '" << spec << "' -> func '"<< function << "', file '" << file << "'\n"; return function_and_file; } if (i++ == e) goto bad; try { if (line_type != WILDCARD) { string::const_iterator dash = i; while (dash != e && *dash != '-') dash++; if (dash == e) line[0] = line[1] = lex_cast(string(i, e)); else { line_type = RANGE; line[0] = lex_cast(string(i, dash)); line[1] = lex_cast(string(dash + 1, e)); } } if (sess.verbose>2) clog << "parsed '" << spec << "' -> func '"<< function << "', file '" << file << "', line " << line << "\n"; return function_file_and_line; } catch (runtime_error & exn) { goto bad; } bad: throw semantic_error("malformed specification '" + spec + "'", base_probe->tok); } #if 0 // Forward declaration. static int query_kernel_module (Dwfl_Module *, void **, const char *, Dwarf_Addr, void *); #endif // XXX: pull this into dwflpp static bool in_kprobes_function(systemtap_session& sess, Dwarf_Addr addr) { if (sess.sym_kprobes_text_start != 0 && sess.sym_kprobes_text_end != 0) { // If the probe point address is anywhere in the __kprobes // address range, we can't use this probe point. if (addr >= sess.sym_kprobes_text_start && addr < sess.sym_kprobes_text_end) return true; } return false; } bool dwarf_query::blacklisted_p(const string& funcname, const string& filename, int, const string& module, const string& section, Dwarf_Addr addr) { if (has_process) return false; // no blacklist for userspace if (section.substr(0, 6) == string(".init.") || section.substr(0, 6) == string(".exit.") || section.substr(0, 9) == string(".devinit.") || section.substr(0, 9) == string(".devexit.") || section.substr(0, 9) == string(".cpuinit.") || section.substr(0, 9) == string(".cpuexit.") || section.substr(0, 9) == string(".meminit.") || section.substr(0, 9) == string(".memexit.")) { // NB: module .exit. routines could be probed in theory: // if the exit handler in "struct module" is diverted, // first inserting the kprobes // then allowing the exit code to run // then removing these kprobes if (sess.verbose>1) clog << " skipping - init/exit"; return true; } // Check for function marked '__kprobes'. if (module == TOK_KERNEL && in_kprobes_function(sess, addr)) { if (sess.verbose>1) clog << " skipping - __kprobes"; return true; } // Check probe point against blacklist. int goodfn = regexec (&blacklist_func, funcname.c_str(), 0, NULL, 0); if (has_return) goodfn = goodfn && regexec (&blacklist_func_ret, funcname.c_str(), 0, NULL, 0); int goodfile = regexec (&blacklist_file, filename.c_str(), 0, NULL, 0); if (! (goodfn && goodfile)) { if (sess.guru_mode) { if (sess.verbose>1) clog << " guru mode enabled - ignoring blacklist"; } else { if (sess.verbose>1) clog << " skipping - blacklisted"; return true; } } // This probe point is not blacklisted. return false; } string dwarf_query::get_blacklist_section(Dwarf_Addr addr) { string blacklist_section; Dwarf_Addr bias; // We prefer dwfl_module_getdwarf to dwfl_module_getelf here, // because dwfl_module_getelf can force costly section relocations // we don't really need, while either will do for this purpose. Elf* elf = (dwarf_getelf (dwfl_module_getdwarf (dw.module, &bias)) ?: dwfl_module_getelf (dw.module, &bias)); Dwarf_Addr offset = addr - bias; if (elf) { Elf_Scn* scn = 0; size_t shstrndx; dwfl_assert ("getshstrndx", elf_getshstrndx (elf, &shstrndx)); while ((scn = elf_nextscn (elf, scn)) != NULL) { GElf_Shdr shdr_mem; GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem); if (! shdr) continue; // XXX error? if (!(shdr->sh_flags & SHF_ALLOC)) continue; GElf_Addr start = shdr->sh_addr; GElf_Addr end = start + shdr->sh_size; if (! (offset >= start && offset < end)) continue; blacklist_section = elf_strptr (elf, shstrndx, shdr->sh_name); break; } } return blacklist_section; } void dwarf_query::add_probe_point(const string& funcname, const char* filename, int line, Dwarf_Die* scope_die, Dwarf_Addr addr) { string reloc_section; // base section for relocation purposes Dwarf_Addr reloc_addr = addr; // relocated string blacklist_section; // linking section for blacklist purposes const string& module = dw.module_name; // "kernel" or other assert (! has_absolute); // already handled in dwarf_builder::build() if (!dw.module) { assert(module == TOK_KERNEL); reloc_section = ""; blacklist_section = ""; } else if (dwfl_module_relocations (dw.module) > 0) { // This is a relocatable module; libdwfl already knows its // sections, so we can relativize addr. int idx = dwfl_module_relocate_address (dw.module, &reloc_addr); const char* r_s = dwfl_module_relocation_info (dw.module, idx, NULL); if (r_s) reloc_section = r_s; blacklist_section = reloc_section; if(reloc_section == "" && dwfl_module_relocations (dw.module) == 1) { blacklist_section = this->get_blacklist_section(addr); reloc_section = ".dynamic"; reloc_addr = addr; } } else { blacklist_section = this->get_blacklist_section(addr); reloc_section = ".absolute"; } if (sess.verbose > 1) { clog << "probe " << funcname << "@" << filename << ":" << line; if (string(module) == TOK_KERNEL) clog << " kernel"; else if (has_module) clog << " module=" << module; else if (has_process) clog << " process=" << module; if (reloc_section != "") clog << " reloc=" << reloc_section; if (blacklist_section != "") clog << " section=" << blacklist_section; clog << " pc=0x" << hex << addr << dec; } bool bad = blacklisted_p (funcname, filename, line, module, blacklist_section, addr); if (sess.verbose > 1) clog << endl; if (module == TOK_KERNEL) { // PR 4224: adapt to relocatable kernel by subtracting the _stext address here. reloc_addr = addr - sess.sym_stext; reloc_section = "_stext"; // a message to runtime's _stp_module_relocate } if (! bad) { sess.unwindsym_modules.insert (module); if (has_process) { results.push_back (new uprobe_derived_probe(funcname, filename, line, module, 0, reloc_section, addr, reloc_addr, *this, scope_die)); } else { assert (has_kernel || has_module); results.push_back (new dwarf_derived_probe(funcname, filename, line, module, reloc_section, addr, reloc_addr, *this, scope_die)); } } } enum dbinfo_reqt dwarf_query::assess_dbinfo_reqt() { if (has_absolute) { // kernel.statement(NUM).absolute return dbr_none; } if (has_inline) { // kernel.function("f").inline or module("m").function("f").inline return dbr_need_dwarf; } if (has_function_str && spec_type == function_alone) { // kernel.function("f") or module("m").function("f") return dbr_need_symtab; } if (has_statement_num) { // kernel.statement(NUM) or module("m").statement(NUM) // Technically, all we need is the module offset (or _stext, for // the kernel). But for that we need either the ELF file or (for // _stext) the symbol table. In either case, the symbol table // is available, and that allows us to map the NUM (address) // to a function, which is goodness. return dbr_need_symtab; } if (has_function_num) { // kernel.function(NUM) or module("m").function(NUM) // Need the symbol table so we can back up from NUM to the // start of the function. return dbr_need_symtab; } // Symbol table tells us nothing about source files or line numbers. return dbr_need_dwarf; } // The critical determining factor when interpreting a pattern // string is, perhaps surprisingly: "presence of a lineno". The // presence of a lineno changes the search strategy completely. // // Compare the two cases: // // 1. {statement,function}(foo@file.c:lineno) // - find the files matching file.c // - in each file, find the functions matching foo // - query the file for line records matching lineno // - iterate over the line records, // - and iterate over the functions, // - if(haspc(function.DIE, line.addr)) // - if looking for statements: probe(lineno.addr) // - if looking for functions: probe(function.{entrypc,return,etc.}) // // 2. {statement,function}(foo@file.c) // - find the files matching file.c // - in each file, find the functions matching foo // - probe(function.{entrypc,return,etc.}) // // Thus the first decision we make is based on the presence of a // lineno, and we enter entirely different sets of callbacks // depending on that decision. // // Note that the first case is a generalization fo the second, in that // we could theoretically search through line records for matching // file names (a "table scan" in rdbms lingo). Luckily, file names // are already cached elsewhere, so we can do an "index scan" as an // optimization. static void query_statement (string const & func, char const * file, int line, Dwarf_Die *scope_die, Dwarf_Addr stmt_addr, dwarf_query * q) { try { q->add_probe_point(func, file ? file : "", line, scope_die, stmt_addr); } catch (const semantic_error& e) { q->sess.print_error (e); } } static void query_inline_instance_info (inline_instance_info & ii, dwarf_query * q) { try { if (q->has_return) { throw semantic_error ("cannot probe .return of inline function '" + ii.name + "'"); } else { if (q->sess.verbose>2) clog << "querying entrypc " << hex << ii.entrypc << dec << " of instance of inline '" << ii.name << "'\n"; query_statement (ii.name, ii.decl_file, ii.decl_line, &ii.die, ii.entrypc, q); } } catch (semantic_error &e) { q->sess.print_error (e); } } static void query_func_info (Dwarf_Addr entrypc, func_info & fi, dwarf_query * q) { try { if (q->has_return) { // NB. dwarf_derived_probe::emit_registrations will emit a // kretprobe based on the entrypc in this case. query_statement (fi.name, fi.decl_file, fi.decl_line, &fi.die, entrypc, q); } else { if (fi.prologue_end != 0) { query_statement (fi.name, fi.decl_file, fi.decl_line, &fi.die, fi.prologue_end, q); } else { query_statement (fi.name, fi.decl_file, fi.decl_line, &fi.die, entrypc, q); } } } catch (semantic_error &e) { q->sess.print_error (e); } } static void query_srcfile_line (const dwarf_line_t& line, void * arg) { dwarf_query * q = static_cast(arg); Dwarf_Addr addr = line.addr(); int lineno = line.lineno(); for (func_info_map_t::iterator i = q->filtered_functions.begin(); i != q->filtered_functions.end(); ++i) { if (q->dw.die_has_pc (i->die, addr)) { if (q->sess.verbose>3) clog << "function DIE lands on srcfile\n"; if (q->has_statement_str) query_statement (i->name, i->decl_file, lineno, // NB: not q->line ! &(i->die), addr, q); else query_func_info (i->entrypc, *i, q); } } for (inline_instance_map_t::iterator i = q->filtered_inlines.begin(); i != q->filtered_inlines.end(); ++i) { if (q->dw.die_has_pc (i->die, addr)) { if (q->sess.verbose>3) clog << "inline instance DIE lands on srcfile\n"; if (q->has_statement_str) query_statement (i->name, i->decl_file, q->line[0], &(i->die), addr, q); else query_inline_instance_info (*i, q); } } } static int query_dwarf_inline_instance (Dwarf_Die * die, void * arg) { dwarf_query * q = static_cast(arg); assert (!q->has_statement_num); try { if (q->sess.verbose>2) clog << "examining inline instance of " << q->dw.function_name << "\n"; if ((q->has_function_str && ! q->has_call) || q->has_statement_str) { if (q->sess.verbose>2) clog << "selected inline instance of " << q->dw.function_name << "\n"; Dwarf_Addr entrypc; if (q->dw.die_entrypc (die, &entrypc)) { inline_instance_info inl; inl.die = *die; inl.name = q->dw.function_name; inl.entrypc = entrypc; q->dw.function_file (&inl.decl_file); q->dw.function_line (&inl.decl_line); q->filtered_inlines.push_back(inl); } } return DWARF_CB_OK; } catch (const semantic_error& e) { q->sess.print_error (e); return DWARF_CB_ABORT; } } static int query_dwarf_func (Dwarf_Die * func, base_query * bq) { dwarf_query * q = static_cast(bq); try { q->dw.focus_on_function (func); if (q->dw.func_is_inline () && (! q->has_call) && (! q->has_return) && (((q->has_statement_str || q->has_function_str) && q->dw.function_name_matches(q->function)))) { if (q->sess.verbose>3) clog << "checking instances of inline " << q->dw.function_name << "\n"; q->dw.iterate_over_inline_instances (query_dwarf_inline_instance, q); if (q->dw.function_name_final_match (q->function)) return DWARF_CB_ABORT; } else if (!q->dw.func_is_inline () && (! q->has_inline)) { bool record_this_function = false; if ((q->has_statement_str || q->has_function_str) && q->dw.function_name_matches(q->function)) { record_this_function = true; } else if (q->has_function_num || q->has_statement_num) { Dwarf_Addr query_addr = (q->has_function_num ? q->function_num_val : q->has_statement_num ? q->statement_num_val : (assert(0) , 0)); Dwarf_Die d; q->dw.function_die (&d); if (q->dw.die_has_pc (d, query_addr)) record_this_function = true; } if (record_this_function) { if (q->sess.verbose>2) clog << "selected function " << q->dw.function_name << "\n"; func_info func; q->dw.function_die (&func.die); func.name = q->dw.function_name; q->dw.function_file (&func.decl_file); q->dw.function_line (&func.decl_line); if (q->has_function_num || q->has_function_str || q->has_statement_str) { Dwarf_Addr entrypc; if (q->dw.function_entrypc (&entrypc)) { func.entrypc = entrypc; q->filtered_functions.push_back (func); } else /* this function just be fully inlined, just ignore it */ return DWARF_CB_OK; } else if (q->has_statement_num) { func.entrypc = q->statement_num_val; q->filtered_functions.push_back (func); if (q->dw.function_name_final_match (q->function)) return DWARF_CB_ABORT; } else assert(0); if (q->dw.function_name_final_match (q->function)) return DWARF_CB_ABORT; } } return DWARF_CB_OK; } catch (const semantic_error& e) { q->sess.print_error (e); return DWARF_CB_ABORT; } } static int query_cu (Dwarf_Die * cudie, void * arg) { dwarf_query * q = static_cast(arg); if (pending_interrupts) return DWARF_CB_ABORT; try { q->dw.focus_on_cu (cudie); if (false && q->sess.verbose>2) clog << "focused on CU '" << q->dw.cu_name << "', in module '" << q->dw.module_name << "'\n"; if (q->has_statement_str || q->has_statement_num || q->has_function_str || q->has_function_num) { q->filtered_srcfiles.clear(); q->filtered_functions.clear(); q->filtered_inlines.clear(); // In this path, we find "abstract functions", record // information about them, and then (depending on lineno // matching) possibly emit one or more of the function's // associated addresses. Unfortunately the control of this // cannot easily be turned inside out. if ((q->has_statement_str || q->has_function_str) && (q->spec_type != function_alone)) { // If we have a pattern string with a filename, we need // to elaborate the srcfile mask in question first. q->dw.collect_srcfiles_matching (q->file, q->filtered_srcfiles); // If we have a file pattern and *no* srcfile matches, there's // no need to look further into this CU, so skip. if (q->filtered_srcfiles.empty()) return DWARF_CB_OK; } // Verify that a raw address matches the beginning of a // statement. This is a somewhat lame check that the address // is at the start of an assembly instruction. Mark probes are in the // middle of a macro and thus not strictly at a statement beginning. if (q->has_statement_num && ! q->has_mark) { Dwarf_Addr queryaddr = q->statement_num_val; dwarf_line_t address_line(dwarf_getsrc_die(cudie, queryaddr)); Dwarf_Addr lineaddr = 0; if (address_line) lineaddr = address_line.addr(); if (!address_line || lineaddr != queryaddr) { stringstream msg; msg << "address 0x" << hex << queryaddr << " does not match the beginning of a statement"; throw semantic_error(msg.str()); } } // Pick up [entrypc, name, DIE] tuples for all the functions // matching the query, and fill in the prologue endings of them // all in a single pass. int rc = q->dw.iterate_over_functions (query_dwarf_func, q, q->function, q->has_statement_num); if (rc != DWARF_CB_OK) q->query_done = true; if ((q->sess.prologue_searching || q->has_process) // PR 6871 && !q->has_statement_str && !q->has_statement_num) // PR 2608 if (! q->filtered_functions.empty()) q->dw.resolve_prologue_endings (q->filtered_functions); if ((q->has_statement_str || q->has_function_str) && (q->spec_type == function_file_and_line)) { // If we have a pattern string with target *line*, we // have to look at lines in all the matched srcfiles. for (set::const_iterator i = q->filtered_srcfiles.begin(); i != q->filtered_srcfiles.end(); ++i) q->dw.iterate_over_srcfile_lines (*i, q->line, q->has_statement_str, q->line_type, query_srcfile_line, q); } else if (q->has_label) { // If we have a pattern string with target *label*, we // have to look at labels in all the matched srcfiles. q->dw.iterate_over_cu_labels (q->label_val, q->function, q->dw.cu, q->results, q->base_loc, q, query_statement); } else { // Otherwise, simply probe all resolved functions. for (func_info_map_t::iterator i = q->filtered_functions.begin(); i != q->filtered_functions.end(); ++i) query_func_info (i->entrypc, *i, q); // And all inline instances (if we're not excluding inlines with ".call") if (! q->has_call) for (inline_instance_map_t::iterator i = q->filtered_inlines.begin(); i != q->filtered_inlines.end(); ++i) query_inline_instance_info (*i, q); } } else { // Before PR 5787, we used to have this: #if 0 // Otherwise we have a statement number, and we can just // query it directly within this module. assert (q->has_statement_num); Dwarf_Addr query_addr = q->statement_num_val; query_addr = q->dw.module_address_to_global(query_addr); query_statement ("", "", -1, NULL, query_addr, q); #endif // But now, we traverse CUs/functions even for // statement_num's, for blacklist sensitivity and $var // resolution purposes. assert (0); // NOTREACHED } return DWARF_CB_OK; } catch (const semantic_error& e) { q->sess.print_error (e); return DWARF_CB_ABORT; } } #if 0 static int query_kernel_module (Dwfl_Module *mod, void **, const char *name, Dwarf_Addr, void *arg) { if (TOK_KERNEL == name) { Dwfl_Module **m = (Dwfl_Module **)arg; *m = mod; return DWARF_CB_ABORT; } return DWARF_CB_OK; } #endif static void validate_module_elf (Dwfl_Module *mod, const char *name, base_query *q) { // Validate the machine code in this elf file against the // session machine. This is important, in case the wrong kind // of debuginfo is being automagically processed by elfutils. // While we can tell i686 apart from x86-64, unfortunately // we can't help confusing i586 vs i686 (both EM_386). Dwarf_Addr bias; // We prefer dwfl_module_getdwarf to dwfl_module_getelf here, // because dwfl_module_getelf can force costly section relocations // we don't really need, while either will do for this purpose. Elf* elf = (dwarf_getelf (dwfl_module_getdwarf (mod, &bias)) ?: dwfl_module_getelf (mod, &bias)); GElf_Ehdr ehdr_mem; GElf_Ehdr* em = gelf_getehdr (elf, &ehdr_mem); if (em == 0) { dwfl_assert ("dwfl_getehdr", dwfl_errno()); } int elf_machine = em->e_machine; const char* debug_filename = ""; const char* main_filename = ""; (void) dwfl_module_info (mod, NULL, NULL, NULL, NULL, NULL, & main_filename, & debug_filename); const string& sess_machine = q->sess.architecture; string expect_machine; // to match sess.machine (i.e., kernel machine) string expect_machine2; switch (elf_machine) { // x86 and ppc are bi-architecture; a 64-bit kernel // can normally run either 32-bit or 64-bit *userspace*. case EM_386: expect_machine = "i?86"; if (! q->has_process) break; // 32-bit kernel/module /* FALLSTHROUGH */ case EM_X86_64: expect_machine2 = "x86_64"; break; case EM_PPC: expect_machine = "ppc"; if (! q->has_process) break; // 32-bit kernel/module /* FALLSTHROUGH */ case EM_PPC64: expect_machine2 = "ppc64"; break; case EM_S390: expect_machine = "s390x"; break; case EM_IA_64: expect_machine = "ia64"; break; case EM_ARM: expect_machine = "armv*"; break; // XXX: fill in some more of these default: expect_machine = "?"; break; } if (! debug_filename) debug_filename = main_filename; if (! debug_filename) debug_filename = name; if (fnmatch (expect_machine.c_str(), sess_machine.c_str(), 0) != 0 && fnmatch (expect_machine2.c_str(), sess_machine.c_str(), 0) != 0) { stringstream msg; msg << "ELF machine " << expect_machine << "|" << expect_machine2 << " (code " << elf_machine << ") mismatch with target " << sess_machine << " in '" << debug_filename << "'"; throw semantic_error(msg.str ()); } if (q->sess.verbose>2) clog << "focused on module '" << q->dw.module_name << " = [0x" << hex << q->dw.module_start << "-0x" << q->dw.module_end << ", bias 0x" << q->dw.module_bias << "]" << dec << " file " << debug_filename << " ELF machine " << expect_machine << "|" << expect_machine2 << " (code " << elf_machine << ")" << "\n"; } static Dwarf_Addr lookup_symbol_address (Dwfl_Module *m, const char* wanted) { int syments = dwfl_module_getsymtab(m); assert(syments); for (int i = 1; i < syments; ++i) { GElf_Sym sym; const char *name = dwfl_module_getsym(m, i, &sym, NULL); if (name != NULL && strcmp(name, wanted) == 0) return sym.st_value; } return 0; } static int query_module (Dwfl_Module *mod, void **, const char *name, Dwarf_Addr addr, void *arg) { base_query *q = static_cast(arg); try { module_info* mi = q->sess.module_cache->cache[name]; if (mi == 0) { mi = q->sess.module_cache->cache[name] = new module_info(name); mi->mod = mod; mi->addr = addr; const char* debug_filename = ""; const char* main_filename = ""; (void) dwfl_module_info (mod, NULL, NULL, NULL, NULL, NULL, & main_filename, & debug_filename); if (q->sess.ignore_vmlinux && name == TOK_KERNEL) { // report_kernel() in elfutils found vmlinux, but pretend it didn't. // Given a non-null path, returning 1 means keep reporting modules. mi->dwarf_status = info_absent; } else if (debug_filename || main_filename) { mi->elf_path = debug_filename ?: main_filename; } else if (name == TOK_KERNEL) { mi->dwarf_status = info_absent; } } // OK, enough of that module_info caching business. q->dw.focus_on_module(mod, mi); // If we have enough information in the pattern to skip a module and // the module does not match that information, return early. if (!q->dw.module_name_matches(q->module_val)) return DWARF_CB_OK; // Don't allow module("*kernel*") type expressions to match the // elfutils module "kernel", which we refer to in the probe // point syntax exclusively as "kernel.*". if (q->dw.module_name == TOK_KERNEL && ! q->has_kernel) return DWARF_CB_OK; if (mod) validate_module_elf(mod, name, q); else assert(q->has_kernel); // and no vmlinux to examine if (q->sess.verbose>2) cerr << "focused on module '" << q->dw.module_name << "'\n"; // Collect a few kernel addresses. XXX: these belong better // to the sess.module_info["kernel"] struct. if (q->dw.module_name == TOK_KERNEL) { if (! q->sess.sym_kprobes_text_start) q->sess.sym_kprobes_text_start = lookup_symbol_address (mod, "__kprobes_text_start"); if (! q->sess.sym_kprobes_text_end) q->sess.sym_kprobes_text_end = lookup_symbol_address (mod, "__kprobes_text_end"); if (! q->sess.sym_stext) q->sess.sym_stext = lookup_symbol_address (mod, "_stext"); } // Finally, search the module for matches of the probe point. q->handle_query_module(); // If we know that there will be no more matches, abort early. if (q->dw.module_name_final_match(q->module_val)) return DWARF_CB_ABORT; else return DWARF_CB_OK; } catch (const semantic_error& e) { q->sess.print_error (e); return DWARF_CB_ABORT; } } void dwflpp::query_modules(base_query *q) { iterate_over_modules(&query_module, q); } struct var_expanding_visitor: public update_visitor { static unsigned tick; stack target_symbol_setter_functioncalls; var_expanding_visitor() {} void visit_assignment (assignment* e); }; struct dwarf_var_expanding_visitor: public var_expanding_visitor { dwarf_query & q; Dwarf_Die *scope_die; Dwarf_Addr addr; block *add_block; probe *add_probe; std::map return_ts_map; bool visited; dwarf_var_expanding_visitor(dwarf_query & q, Dwarf_Die *sd, Dwarf_Addr a): q(q), scope_die(sd), addr(a), add_block(NULL), add_probe(NULL), visited(false) {} void visit_target_symbol (target_symbol* e); void visit_cast_op (cast_op* e); }; unsigned var_expanding_visitor::tick = 0; void var_expanding_visitor::visit_assignment (assignment* e) { // Our job would normally be to require() the left and right sides // into a new assignment. What we're doing is slightly trickier: // we're pushing a functioncall** onto a stack, and if our left // child sets the functioncall* for that value, we're going to // assume our left child was a target symbol -- transformed into a // set_target_foo(value) call, and it wants to take our right child // as the argument "value". // // This is why some people claim that languages with // constructor-decomposing case expressions have a leg up on // visitors. functioncall *fcall = NULL; expression *new_left, *new_right; target_symbol_setter_functioncalls.push (&fcall); new_left = require (e->left); target_symbol_setter_functioncalls.pop (); new_right = require (e->right); if (fcall != NULL) { // Our left child is informing us that it was a target variable // and it has been replaced with a set_target_foo() function // call; we are going to provide that function call -- with the // right child spliced in as sole argument -- in place of // ourselves, in the var expansion we're in the middle of making. // FIXME: for the time being, we only support plan $foo = bar, // not += or any other op= variant. This is fixable, but a bit // ugly. if (e->op != "=") throw semantic_error ("Operator-assign expressions on target " "variables not implemented", e->tok); assert (new_left == fcall); fcall->args.push_back (new_right); provide (fcall); } else { e->left = new_left; e->right = new_right; provide (e); } } void dwarf_var_expanding_visitor::visit_target_symbol (target_symbol *e) { assert(e->base_name.size() > 0 && e->base_name[0] == '$'); visited = true; bool lvalue = is_active_lvalue(e); if (lvalue && !q.sess.guru_mode) throw semantic_error("write to target variable not permitted", e->tok); // See if we need to generate a new probe to save/access function // parameters from a return probe. PR 1382. if (q.has_return && e->base_name != "$return" // not the special return-value variable handled below && e->base_name != "$$return") // nor the other special variable handled below { if (lvalue) throw semantic_error("write to target variable not permitted in .return probes", e->tok); // Get the full name of the target symbol. stringstream ts_name_stream; e->print(ts_name_stream); string ts_name = ts_name_stream.str(); // Check and make sure we haven't already seen this target // variable in this return probe. If we have, just return our // last replacement. map::iterator i = return_ts_map.find(ts_name); if (i != return_ts_map.end()) { provide (i->second); return; } // We've got to do several things here to handle target // variables in return probes. // (1) Synthesize two global arrays. One is the cache of the // target variable and the other contains a thread specific // nesting level counter. The arrays will look like // this: // // _dwarf_tvar_{name}_{num} // _dwarf_tvar_{name}_{num}_ctr string aname = (string("_dwarf_tvar_") + e->base_name.substr(1) + "_" + lex_cast(tick++)); vardecl* vd = new vardecl; vd->name = aname; vd->tok = e->tok; q.sess.globals.push_back (vd); string ctrname = aname + "_ctr"; vd = new vardecl; vd->name = ctrname; vd->tok = e->tok; q.sess.globals.push_back (vd); // (2) Create a new code block we're going to insert at the // beginning of this probe to get the cached value into a // temporary variable. We'll replace the target variable // reference with the temporary variable reference. The code // will look like this: // // _dwarf_tvar_tid = tid() // _dwarf_tvar_{name}_{num}_tmp // = _dwarf_tvar_{name}_{num}[_dwarf_tvar_tid, // _dwarf_tvar_{name}_{num}_ctr[_dwarf_tvar_tid]] // delete _dwarf_tvar_{name}_{num}[_dwarf_tvar_tid, // _dwarf_tvar_{name}_{num}_ctr[_dwarf_tvar_tid]--] // if (! _dwarf_tvar_{name}_{num}_ctr[_dwarf_tvar_tid]) // delete _dwarf_tvar_{name}_{num}_ctr[_dwarf_tvar_tid] // (2a) Synthesize the tid temporary expression, which will look // like this: // // _dwarf_tvar_tid = tid() symbol* tidsym = new symbol; tidsym->name = string("_dwarf_tvar_tid"); tidsym->tok = e->tok; if (add_block == NULL) { add_block = new block; add_block->tok = e->tok; // Synthesize a functioncall to grab the thread id. functioncall* fc = new functioncall; fc->tok = e->tok; fc->function = string("tid"); // Assign the tid to '_dwarf_tvar_tid'. assignment* a = new assignment; a->tok = e->tok; a->op = "="; a->left = tidsym; a->right = fc; expr_statement* es = new expr_statement; es->tok = e->tok; es->value = a; add_block->statements.push_back (es); } // (2b) Synthesize an array reference and assign it to a // temporary variable (that we'll use as replacement for the // target variable reference). It will look like this: // // _dwarf_tvar_{name}_{num}_tmp // = _dwarf_tvar_{name}_{num}[_dwarf_tvar_tid, // _dwarf_tvar_{name}_{num}_ctr[_dwarf_tvar_tid]] arrayindex* ai_tvar_base = new arrayindex; ai_tvar_base->tok = e->tok; symbol* sym = new symbol; sym->name = aname; sym->tok = e->tok; ai_tvar_base->base = sym; ai_tvar_base->indexes.push_back(tidsym); // We need to create a copy of the array index in its current // state so we can have 2 variants of it (the original and one // that post-decrements the second index). arrayindex* ai_tvar = new arrayindex; arrayindex* ai_tvar_postdec = new arrayindex; *ai_tvar = *ai_tvar_base; *ai_tvar_postdec = *ai_tvar_base; // Synthesize the // "_dwarf_tvar_{name}_{num}_ctr[_dwarf_tvar_tid]" used as the // second index into the array. arrayindex* ai_ctr = new arrayindex; ai_ctr->tok = e->tok; sym = new symbol; sym->name = ctrname; sym->tok = e->tok; ai_ctr->base = sym; ai_ctr->indexes.push_back(tidsym); ai_tvar->indexes.push_back(ai_ctr); symbol* tmpsym = new symbol; tmpsym->name = aname + "_tmp"; tmpsym->tok = e->tok; assignment* a = new assignment; a->tok = e->tok; a->op = "="; a->left = tmpsym; a->right = ai_tvar; expr_statement* es = new expr_statement; es->tok = e->tok; es->value = a; add_block->statements.push_back (es); // (2c) Add a post-decrement to the second array index and // delete the array value. It will look like this: // // delete _dwarf_tvar_{name}_{num}[_dwarf_tvar_tid, // _dwarf_tvar_{name}_{num}_ctr[_dwarf_tvar_tid]--] post_crement* pc = new post_crement; pc->tok = e->tok; pc->op = "--"; pc->operand = ai_ctr; ai_tvar_postdec->indexes.push_back(pc); delete_statement* ds = new delete_statement; ds->tok = e->tok; ds->value = ai_tvar_postdec; add_block->statements.push_back (ds); // (2d) Delete the counter value if it is 0. It will look like // this: // if (! _dwarf_tvar_{name}_{num}_ctr[_dwarf_tvar_tid]) // delete _dwarf_tvar_{name}_{num}_ctr[_dwarf_tvar_tid] ds = new delete_statement; ds->tok = e->tok; ds->value = ai_ctr; unary_expression *ue = new unary_expression; ue->tok = e->tok; ue->op = "!"; ue->operand = ai_ctr; if_statement *ifs = new if_statement; ifs->tok = e->tok; ifs->condition = ue; ifs->thenblock = ds; ifs->elseblock = NULL; add_block->statements.push_back (ifs); // (3) We need an entry probe that saves the value for us in the // global array we created. Create the entry probe, which will // look like this: // // probe kernel.function("{function}") { // _dwarf_tvar_tid = tid() // _dwarf_tvar_{name}_{num}[_dwarf_tvar_tid, // ++_dwarf_tvar_{name}_{num}_ctr[_dwarf_tvar_tid]] // = ${param} // } if (add_probe == NULL) { add_probe = new probe; add_probe->tok = e->tok; // We need the name of the current probe point, minus the // ".return" (or anything after it, such as ".maxactive(N)"). // Create a new probe point, copying all the components, // stopping when we see the ".return" component. probe_point* pp = new probe_point; for (unsigned c = 0; c < q.base_loc->components.size(); c++) { if (q.base_loc->components[c]->functor == "return") break; else pp->components.push_back(q.base_loc->components[c]); } pp->tok = e->tok; pp->optional = q.base_loc->optional; add_probe->locations.push_back(pp); add_probe->body = new block; add_probe->body->tok = e->tok; // Synthesize a functioncall to grab the thread id. functioncall* fc = new functioncall; fc->tok = e->tok; fc->function = string("tid"); // Assign the tid to '_dwarf_tvar_tid'. assignment* a = new assignment; a->tok = e->tok; a->op = "="; a->left = tidsym; a->right = fc; expr_statement* es = new expr_statement; es->tok = e->tok; es->value = a; add_probe->body = new block(add_probe->body, es); vardecl* vd = new vardecl; vd->tok = e->tok; vd->name = tidsym->name; vd->type = pe_long; vd->set_arity(0); add_probe->locals.push_back(vd); } // Save the value, like this: // _dwarf_tvar_{name}_{num}[_dwarf_tvar_tid, // ++_dwarf_tvar_{name}_{num}_ctr[_dwarf_tvar_tid]] // = ${param} arrayindex* ai_tvar_preinc = new arrayindex; *ai_tvar_preinc = *ai_tvar_base; pre_crement* preinc = new pre_crement; preinc->tok = e->tok; preinc->op = "++"; preinc->operand = ai_ctr; ai_tvar_preinc->indexes.push_back(preinc); a = new assignment; a->tok = e->tok; a->op = "="; a->left = ai_tvar_preinc; a->right = e; es = new expr_statement; es->tok = e->tok; es->value = a; add_probe->body = new block(add_probe->body, es); // (4) Provide the '_dwarf_tvar_{name}_{num}_tmp' variable to // our parent so it can be used as a substitute for the target // symbol. provide (tmpsym); // (5) Remember this replacement since we might be able to reuse // it later if the same return probe references this target // symbol again. return_ts_map[ts_name] = tmpsym; return; } if (e->base_name == "$$vars" || e->base_name == "$$parms" || e->base_name == "$$locals" || (q.has_return && (e->base_name == "$$return"))) { Dwarf_Die *scopes; if (dwarf_getscopes_die (scope_die, &scopes) == 0) return; target_symbol *tsym = new target_symbol; print_format* pf = new print_format; // Convert $$parms to sprintf of a list of parms and active local vars // which we recursively evaluate // NB: we synthesize a new token here rather than reusing // e->tok, because print_format::print likes to use // its tok->content. token* pf_tok = new token; pf_tok->location = e->tok->location; pf_tok->type = tok_identifier; pf_tok->content = "sprint"; pf->tok = pf_tok; pf->print_to_stream = false; pf->print_with_format = true; pf->print_with_delim = false; pf->print_with_newline = false; pf->print_char = false; if (q.has_return && (e->base_name == "$$return")) { tsym->tok = e->tok; tsym->base_name = "$return"; // Ignore any variable that isn't accessible. tsym->saved_conversion_error = 0; expression *texp = tsym; texp = require (texp); // NB: throws nothing ... if (tsym->saved_conversion_error) // ... but this is how we know it happened. { } else { pf->raw_components += "return"; pf->raw_components += "=%#x "; pf->args.push_back(texp); } } else { // non-.return probe: support $$parms, $$vars, $$locals Dwarf_Die result; if (dwarf_child (&scopes[0], &result) == 0) do { switch (dwarf_tag (&result)) { case DW_TAG_variable: if (e->base_name == "$$parms") continue; break; case DW_TAG_formal_parameter: if (e->base_name == "$$locals") continue; break; default: continue; } const char *diename = dwarf_diename (&result); if (! diename) continue; tsym->tok = e->tok; tsym->base_name = "$"; tsym->base_name += diename; // Ignore any variable that isn't accessible. tsym->saved_conversion_error = 0; expression *texp = tsym; texp = require (texp); // NB: throws nothing ... if (tsym->saved_conversion_error) // ... but this is how we know it happened. { if (q.sess.verbose>2) { for (semantic_error *c = tsym->saved_conversion_error; c != 0; c = c->chain) { clog << "variable location problem: " << c->what() << endl; } } pf->raw_components += diename; pf->raw_components += "=? "; } else { pf->raw_components += diename; pf->raw_components += "=%#x "; pf->args.push_back(texp); } } while (dwarf_siblingof (&result, &result) == 0); } pf->components = print_format::string_to_components(pf->raw_components); provide (pf); return; } // Synthesize a function. functiondecl *fdecl = new functiondecl; fdecl->tok = e->tok; embeddedcode *ec = new embeddedcode; ec->tok = e->tok; string fname = (string(lvalue ? "_dwarf_tvar_set" : "_dwarf_tvar_get") + "_" + e->base_name.substr(1) + "_" + lex_cast(tick++)); try { if (q.has_return && (e->base_name == "$return")) { ec->code = q.dw.literal_stmt_for_return (scope_die, addr, e, lvalue, fdecl->type); } else { ec->code = q.dw.literal_stmt_for_local (scope_die, addr, e->base_name.substr(1), e, lvalue, fdecl->type); } if (! lvalue) ec->code += "/* pure */"; } catch (const semantic_error& er) { if (!q.sess.skip_badvars) { // We suppress this error message, and pass the unresolved // target_symbol to the next pass. We hope that this value ends // up not being referenced after all, so it can be optimized out // quietly. provide (e); semantic_error* saveme = new semantic_error (er); // copy it saveme->tok1 = e->tok; // XXX: token not passed to q.dw code generation routines // NB: we can have multiple errors, since a $target variable // may be expanded in several different contexts: // function ("*") { $var } saveme->chain = e->saved_conversion_error; e->saved_conversion_error = saveme; } else { // Upon user request for ignoring context, the symbol is replaced // with a literal 0 and a warning message displayed literal_number* ln_zero = new literal_number (0); ln_zero->tok = e->tok; provide (ln_zero); if (!q.sess.suppress_warnings) q.sess.print_warning ("Bad $context variable being substituted with literal 0", e->tok); } delete fdecl; delete ec; return; } fdecl->name = fname; fdecl->body = ec; if (lvalue) { // Modify the fdecl so it carries a single pe_long formal // argument called "value". // FIXME: For the time being we only support setting target // variables which have base types; these are 'pe_long' in // stap's type vocabulary. Strings and pointers might be // reasonable, some day, but not today. vardecl *v = new vardecl; v->type = pe_long; v->name = "value"; v->tok = e->tok; fdecl->formal_args.push_back(v); } q.sess.functions[fdecl->name]=fdecl; // Synthesize a functioncall. functioncall* n = new functioncall; n->tok = e->tok; n->function = fname; n->referent = 0; // NB: must not resolve yet, to ensure inclusion in session if (lvalue) { // Provide the functioncall to our parent, so that it can be // used to substitute for the assignment node immediately above // us. assert(!target_symbol_setter_functioncalls.empty()); *(target_symbol_setter_functioncalls.top()) = n; } provide (n); } void dwarf_var_expanding_visitor::visit_cast_op (cast_op *e) { // Fill in our current module context if needed if (e->module.empty()) e->module = q.dw.module_name; var_expanding_visitor::visit_cast_op(e); } struct dwarf_cast_query : public base_query { const cast_op& e; const bool lvalue; exp_type& pe_type; string& code; dwarf_cast_query(dwflpp& dw, const string& module, const cast_op& e, bool lvalue, exp_type& pe_type, string& code): base_query(dw, module), e(e), lvalue(lvalue), pe_type(pe_type), code(code) {} void handle_query_module(); int handle_query_cu(Dwarf_Die * cudie); static int cast_query_cu (Dwarf_Die * cudie, void * arg); }; void dwarf_cast_query::handle_query_module() { if (!code.empty()) return; // look for the type in each CU dw.iterate_over_cus(cast_query_cu, this); } int dwarf_cast_query::handle_query_cu(Dwarf_Die * cudie) { if (!code.empty()) return DWARF_CB_ABORT; dw.focus_on_cu (cudie); Dwarf_Die* type_die = dw.declaration_resolve(e.type.c_str()); if (type_die) { try { code = dw.literal_stmt_for_pointer (type_die, &e, lvalue, pe_type); } catch (const semantic_error& e) { // XXX might be better to save the error // and try again in another CU sess.print_error (e); } return DWARF_CB_ABORT; } return DWARF_CB_OK; } int dwarf_cast_query::cast_query_cu (Dwarf_Die * cudie, void * arg) { dwarf_cast_query * q = static_cast(arg); if (pending_interrupts) return DWARF_CB_ABORT; return q->handle_query_cu(cudie); } struct dwarf_cast_expanding_visitor: public var_expanding_visitor { systemtap_session& s; dwarf_builder& db; dwarf_cast_expanding_visitor(systemtap_session& s, dwarf_builder& db): s(s), db(db) {} void visit_cast_op (cast_op* e); }; void dwarf_cast_expanding_visitor::visit_cast_op (cast_op* e) { bool lvalue = is_active_lvalue(e); if (lvalue && !s.guru_mode) throw semantic_error("write to typecast value not permitted", e->tok); if (e->module.empty()) e->module = "kernel"; // "*" may also be reasonable to search all kernel modules string code; exp_type type = pe_long; size_t mod_end = ~0; do { // split the module string by ':' for alternatives size_t mod_begin = mod_end + 1; mod_end = e->module.find(':', mod_begin); string module = e->module.substr(mod_begin, mod_end - mod_begin); // NB: This uses '/' to distinguish between kernel modules and userspace, // which means that userspace modules won't get any PATH searching. dwflpp* dw; if (module.find('/') == string::npos) { // kernel or kernel module target if (! db.kern_dw) { db.kern_dw = new dwflpp(s); db.kern_dw->setup_kernel(true); } dw = db.kern_dw; } else { module = find_executable (module); // canonicalize it // user-space target; we use one dwflpp instance per module name // (= program or shared library) if (db.user_dw.find(module) == db.user_dw.end()) { dw = new dwflpp(s); dw->setup_user(module); db.user_dw[module] = dw; } else dw = db.user_dw[module]; } dwarf_cast_query q (*dw, module, *e, lvalue, type, code); dw->query_modules(&q); } while (code.empty() && mod_end != string::npos); if (code.empty()) { // We generate an error message, and pass the unresolved // cast_op to the next pass. We hope that this value ends // up not being referenced after all, so it can be optimized out // quietly. string msg = "type definition '" + e->type + "' not found"; semantic_error* er = new semantic_error (msg, e->tok); // NB: we can have multiple errors, since a @cast // may be expanded in several different contexts: // function ("*") { @cast(...) } er->chain = e->saved_conversion_error; e->saved_conversion_error = er; provide (e); return; } string fname = (string(lvalue ? "_dwarf_tvar_set" : "_dwarf_tvar_get") + "_" + e->base_name.substr(1) + "_" + lex_cast(tick++)); // Synthesize a function. functiondecl *fdecl = new functiondecl; fdecl->tok = e->tok; fdecl->type = type; fdecl->name = fname; embeddedcode *ec = new embeddedcode; ec->tok = e->tok; ec->code = code; fdecl->body = ec; // Give the fdecl an argument for the pointer we're trying to cast vardecl *v1 = new vardecl; v1->type = pe_long; v1->name = "pointer"; v1->tok = e->tok; fdecl->formal_args.push_back(v1); if (lvalue) { // Modify the fdecl so it carries a second pe_long formal // argument called "value". // FIXME: For the time being we only support setting target // variables which have base types; these are 'pe_long' in // stap's type vocabulary. Strings and pointers might be // reasonable, some day, but not today. vardecl *v2 = new vardecl; v2->type = pe_long; v2->name = "value"; v2->tok = e->tok; fdecl->formal_args.push_back(v2); } else ec->code += "/* pure */"; s.functions[fdecl->name] = fdecl; // Synthesize a functioncall. functioncall* n = new functioncall; n->tok = e->tok; n->function = fname; n->referent = 0; // NB: must not resolve yet, to ensure inclusion in session n->args.push_back(e->operand); if (lvalue) { // Provide the functioncall to our parent, so that it can be // used to substitute for the assignment node immediately above // us. assert(!target_symbol_setter_functioncalls.empty()); *(target_symbol_setter_functioncalls.top()) = n; } provide (n); } void dwarf_derived_probe::printsig (ostream& o) const { // Instead of just printing the plain locations, we add a PC value // as a comment as a way of telling e.g. apart multiple inlined // function instances. This is distinct from the verbose/clog // output, since this part goes into the cache hash calculations. sole_location()->print (o); o << " /* pc=" << section << "+0x" << hex << addr << dec << " */"; printsig_nested (o); } void dwarf_derived_probe::join_group (systemtap_session& s) { if (! s.dwarf_derived_probes) s.dwarf_derived_probes = new dwarf_derived_probe_group (); s.dwarf_derived_probes->enroll (this); } dwarf_derived_probe::dwarf_derived_probe(const string& funcname, const string& filename, int line, // module & section specify a relocation // base for , unless section=="" // (equivalently module=="kernel") const string& module, const string& section, // NB: dwfl_addr is the virtualized // address for this symbol. Dwarf_Addr dwfl_addr, // addr is the section-offset for // actual relocation. Dwarf_Addr addr, dwarf_query& q, Dwarf_Die* scope_die /* may be null */) : derived_probe (q.base_probe, new probe_point(*q.base_loc) /* .components soon rewritten */ ), module (module), section (section), addr (addr), has_return (q.has_return), has_maxactive (q.has_maxactive), maxactive_val (q.maxactive_val) { // Assert relocation invariants if (section == "" && dwfl_addr != addr) // addr should be absolute throw semantic_error ("missing relocation base against", q.base_loc->tok); if (section != "" && dwfl_addr == addr) // addr should be an offset throw semantic_error ("inconsistent relocation address", q.base_loc->tok); this->tok = q.base_probe->tok; this->access_vars = false; // XXX: hack for strange g++/gcc's #ifndef USHRT_MAX #define USHRT_MAX 32767 #endif // Range limit maxactive() value if (q.has_maxactive && (q.maxactive_val < 0 || q.maxactive_val > USHRT_MAX)) throw semantic_error ("maxactive value out of range [0," + lex_cast(USHRT_MAX) + "]", q.base_loc->tok); // Expand target variables in the probe body if (!null_die(scope_die)) { dwarf_var_expanding_visitor v (q, scope_die, dwfl_addr); this->body = v.require (this->body); this->access_vars = v.visited; // If during target-variable-expanding the probe, we added a new block // of code, add it to the start of the probe. if (v.add_block) this->body = new block(v.add_block, this->body); // If when target-variable-expanding the probe, we added a new // probe, add it in a new file to the list of files to be processed. if (v.add_probe) { stapfile *f = new stapfile; f->probes.push_back(v.add_probe); q.sess.files.push_back(f); } } // else - null scope_die - $target variables will produce an error during translate phase // Reset the sole element of the "locations" vector as a // "reverse-engineered" form of the incoming (q.base_loc) probe // point. This allows a user to see what function / file / line // number any particular match of the wildcards. vector comps; if (q.has_kernel) comps.push_back (new probe_point::component(TOK_KERNEL)); else if(q.has_module) comps.push_back (new probe_point::component(TOK_MODULE, new literal_string(module))); else if(q.has_process) comps.push_back (new probe_point::component(TOK_PROCESS, new literal_string(module))); else assert (0); string fn_or_stmt; if (q.has_function_str || q.has_function_num) fn_or_stmt = "function"; else fn_or_stmt = "statement"; if (q.has_function_str || q.has_statement_str) { string retro_name = funcname; if (filename != "") { retro_name += ("@" + string (filename)); if (line > 0) retro_name += (":" + lex_cast (line)); } comps.push_back (new probe_point::component (fn_or_stmt, new literal_string (retro_name))); } else if (q.has_function_num || q.has_statement_num) { Dwarf_Addr retro_addr; if (q.has_function_num) retro_addr = q.function_num_val; else retro_addr = q.statement_num_val; comps.push_back (new probe_point::component (fn_or_stmt, new literal_number(retro_addr))); // XXX: should be hex if possible if (q.has_absolute) comps.push_back (new probe_point::component (TOK_ABSOLUTE)); } if (q.has_call) comps.push_back (new probe_point::component(TOK_CALL)); if (q.has_inline) comps.push_back (new probe_point::component(TOK_INLINE)); if (has_return) comps.push_back (new probe_point::component(TOK_RETURN)); if (has_maxactive) comps.push_back (new probe_point::component (TOK_MAXACTIVE, new literal_number(maxactive_val))); // Overwrite it. this->sole_location()->components = comps; } void dwarf_derived_probe::register_statement_variants(match_node * root, dwarf_builder * dw) { root->bind(dw); } void dwarf_derived_probe::register_function_variants(match_node * root, dwarf_builder * dw) { root->bind(dw); root->bind(TOK_INLINE)->bind(dw); root->bind(TOK_CALL)->bind(dw); root->bind(TOK_RETURN)->bind(dw); root->bind(TOK_RETURN)->bind_num(TOK_MAXACTIVE)->bind(dw); } void dwarf_derived_probe::register_function_and_statement_variants(match_node * root, dwarf_builder * dw) { // Here we match 4 forms: // // .function("foo") // .function(0xdeadbeef) // .statement("foo") // .statement(0xdeadbeef) register_function_variants(root->bind_str(TOK_FUNCTION), dw); register_function_variants(root->bind_num(TOK_FUNCTION), dw); register_statement_variants(root->bind_str(TOK_STATEMENT), dw); register_statement_variants(root->bind_num(TOK_STATEMENT), dw); } void dwarf_derived_probe::register_patterns(systemtap_session& s) { match_node* root = s.pattern_root; dwarf_builder *dw = new dwarf_builder(); update_visitor *filter = new dwarf_cast_expanding_visitor(s, *dw); s.code_filters.push_back(filter); register_function_and_statement_variants(root->bind(TOK_KERNEL), dw); register_function_and_statement_variants(root->bind_str(TOK_MODULE), dw); root->bind(TOK_KERNEL)->bind_num(TOK_STATEMENT)->bind(TOK_ABSOLUTE)->bind(dw); root->bind(TOK_KERNEL)->bind_str(TOK_FUNCTION)->bind_str(TOK_LABEL)->bind(dw); root->bind_str(TOK_PROCESS)->bind_str(TOK_FUNCTION)->bind_str(TOK_LABEL)->bind(dw); register_function_and_statement_variants(root->bind_str(TOK_PROCESS), dw); root->bind_str(TOK_PROCESS)->bind_str(TOK_MARK)->bind(dw); root->bind_str(TOK_PROCESS)->bind_num(TOK_MARK)->bind(dw); } void dwarf_derived_probe::emit_probe_local_init(translator_output * o) { if (access_vars) { // if accessing $variables, emit bsp cache setup for speeding up o->newline() << "bspcache(c->unwaddr, c->regs);"; } } // ------------------------------------------------------------------------ void dwarf_derived_probe_group::enroll (dwarf_derived_probe* p) { probes_by_module.insert (make_pair (p->module, p)); // XXX: probes put at the same address should all share a // single kprobe/kretprobe, and have their handlers executed // sequentially. } void dwarf_derived_probe_group::emit_module_decls (systemtap_session& s) { if (probes_by_module.empty()) return; s.op->newline() << "/* ---- dwarf probes ---- */"; // Warn of misconfigured kernels s.op->newline() << "#if ! defined(CONFIG_KPROBES)"; s.op->newline() << "#error \"Need CONFIG_KPROBES!\""; s.op->newline() << "#endif"; s.op->newline(); // Forward declare the master entry functions s.op->newline() << "static int enter_kprobe_probe (struct kprobe *inst,"; s.op->line() << " struct pt_regs *regs);"; s.op->newline() << "static int enter_kretprobe_probe (struct kretprobe_instance *inst,"; s.op->line() << " struct pt_regs *regs);"; // Emit an array of kprobe/kretprobe pointers s.op->newline() << "#if defined(STAPCONF_UNREGISTER_KPROBES)"; s.op->newline() << "static void * stap_unreg_kprobes[" << probes_by_module.size() << "];"; s.op->newline() << "#endif"; // Emit the actual probe list. // NB: we used to plop a union { struct kprobe; struct kretprobe } into // struct stap_dwarf_probe, but it being initialized data makes it add // hundreds of bytes of padding per stap_dwarf_probe. (PR5673) s.op->newline() << "static struct stap_dwarf_kprobe {"; s.op->newline(1) << "union { struct kprobe kp; struct kretprobe krp; } u;"; s.op->newline() << "#ifdef __ia64__"; s.op->newline() << "struct kprobe dummy;"; s.op->newline() << "#endif"; s.op->newline(-1) << "} stap_dwarf_kprobes[" << probes_by_module.size() << "];"; // NB: bss! s.op->newline() << "static struct stap_dwarf_probe {"; s.op->newline(1) << "const unsigned return_p:1;"; s.op->newline() << "const unsigned maxactive_p:1;"; s.op->newline() << "unsigned registered_p:1;"; s.op->newline() << "const unsigned short maxactive_val;"; // Let's find some stats for the three embedded strings. Maybe they // are small and uniform enough to justify putting char[MAX]'s into // the array instead of relocated char*'s. size_t module_name_max = 0, section_name_max = 0, pp_name_max = 0; size_t module_name_tot = 0, section_name_tot = 0, pp_name_tot = 0; size_t all_name_cnt = probes_by_module.size(); // for average for (p_b_m_iterator it = probes_by_module.begin(); it != probes_by_module.end(); it++) { dwarf_derived_probe* p = it->second; #define DOIT(var,expr) do { \ size_t var##_size = (expr) + 1; \ var##_max = max (var##_max, var##_size); \ var##_tot += var##_size; } while (0) DOIT(module_name, p->module.size()); DOIT(section_name, p->section.size()); DOIT(pp_name, lex_cast_qstring(*p->sole_location()).size()); #undef DOIT } // Decide whether it's worthwhile to use char[] or char* by comparing // the amount of average waste (max - avg) to the relocation data size // (3 native long words). #define CALCIT(var) \ if ((var##_name_max-(var##_name_tot/all_name_cnt)) < (3 * sizeof(void*))) \ { \ s.op->newline() << "const char " << #var << "[" << var##_name_max << "];"; \ if (s.verbose > 2) clog << "stap_dwarf_probe " << #var \ << "[" << var##_name_max << "]" << endl; \ } \ else \ { \ s.op->newline() << "const char * const " << #var << ";"; \ if (s.verbose > 2) clog << "stap_dwarf_probe *" << #var << endl; \ } CALCIT(module); CALCIT(section); CALCIT(pp); s.op->newline() << "const unsigned long address;"; s.op->newline() << "void (* const ph) (struct context*);"; s.op->newline(-1) << "} stap_dwarf_probes[] = {"; s.op->indent(1); for (p_b_m_iterator it = probes_by_module.begin(); it != probes_by_module.end(); it++) { dwarf_derived_probe* p = it->second; s.op->newline() << "{"; if (p->has_return) s.op->line() << " .return_p=1,"; if (p->has_maxactive) { s.op->line() << " .maxactive_p=1,"; assert (p->maxactive_val >= 0 && p->maxactive_val <= USHRT_MAX); s.op->line() << " .maxactive_val=" << p->maxactive_val << ","; } s.op->line() << " .address=(unsigned long)0x" << hex << p->addr << dec << "ULL,"; s.op->line() << " .module=\"" << p->module << "\","; s.op->line() << " .section=\"" << p->section << "\","; s.op->line() << " .pp=" << lex_cast_qstring (*p->sole_location()) << ","; s.op->line() << " .ph=&" << p->name; s.op->line() << " },"; } s.op->newline(-1) << "};"; // Emit the kprobes callback function s.op->newline(); s.op->newline() << "static int enter_kprobe_probe (struct kprobe *inst,"; s.op->line() << " struct pt_regs *regs) {"; // NB: as of PR5673, the kprobe|kretprobe union struct is in BSS s.op->newline(1) << "int kprobe_idx = ((uintptr_t)inst-(uintptr_t)stap_dwarf_kprobes)/sizeof(struct stap_dwarf_kprobe);"; // Check that the index is plausible s.op->newline() << "struct stap_dwarf_probe *sdp = &stap_dwarf_probes["; s.op->line() << "((kprobe_idx >= 0 && kprobe_idx < " << probes_by_module.size() << ")?"; s.op->line() << "kprobe_idx:0)"; // NB: at least we avoid memory corruption // XXX: it would be nice to give a more verbose error though; BUG_ON later? s.op->line() << "];"; common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "sdp->pp"); s.op->newline() << "c->regs = regs;"; s.op->newline() << "(*sdp->ph) (c);"; common_probe_entryfn_epilogue (s.op); s.op->newline() << "return 0;"; s.op->newline(-1) << "}"; // Same for kretprobes s.op->newline(); s.op->newline() << "static int enter_kretprobe_probe (struct kretprobe_instance *inst,"; s.op->line() << " struct pt_regs *regs) {"; s.op->newline(1) << "struct kretprobe *krp = inst->rp;"; // NB: as of PR5673, the kprobe|kretprobe union struct is in BSS s.op->newline() << "int kprobe_idx = ((uintptr_t)krp-(uintptr_t)stap_dwarf_kprobes)/sizeof(struct stap_dwarf_kprobe);"; // Check that the index is plausible s.op->newline() << "struct stap_dwarf_probe *sdp = &stap_dwarf_probes["; s.op->line() << "((kprobe_idx >= 0 && kprobe_idx < " << probes_by_module.size() << ")?"; s.op->line() << "kprobe_idx:0)"; // NB: at least we avoid memory corruption // XXX: it would be nice to give a more verbose error though; BUG_ON later? s.op->line() << "];"; common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "sdp->pp"); s.op->newline() << "c->regs = regs;"; s.op->newline() << "c->pi = inst;"; // for assisting runtime's backtrace logic s.op->newline() << "(*sdp->ph) (c);"; common_probe_entryfn_epilogue (s.op); s.op->newline() << "return 0;"; s.op->newline(-1) << "}"; } void dwarf_derived_probe_group::emit_module_init (systemtap_session& s) { s.op->newline() << "for (i=0; i<" << probes_by_module.size() << "; i++) {"; s.op->newline(1) << "struct stap_dwarf_probe *sdp = & stap_dwarf_probes[i];"; s.op->newline() << "struct stap_dwarf_kprobe *kp = & stap_dwarf_kprobes[i];"; s.op->newline() << "unsigned long relocated_addr = _stp_module_relocate (sdp->module, sdp->section, sdp->address);"; s.op->newline() << "if (relocated_addr == 0) continue;"; // quietly; assume module is absent s.op->newline() << "probe_point = sdp->pp;"; // for error messages s.op->newline() << "if (sdp->return_p) {"; s.op->newline(1) << "kp->u.krp.kp.addr = (void *) relocated_addr;"; s.op->newline() << "if (sdp->maxactive_p) {"; s.op->newline(1) << "kp->u.krp.maxactive = sdp->maxactive_val;"; s.op->newline(-1) << "} else {"; s.op->newline(1) << "kp->u.krp.maxactive = max(10, 4*NR_CPUS);"; s.op->newline(-1) << "}"; s.op->newline() << "kp->u.krp.handler = &enter_kretprobe_probe;"; // to ensure safeness of bspcache, always use aggr_kprobe on ia64 s.op->newline() << "#ifdef __ia64__"; s.op->newline() << "kp->dummy.addr = kp->u.krp.kp.addr;"; s.op->newline() << "kp->dummy.pre_handler = NULL;"; s.op->newline() << "rc = register_kprobe (& kp->dummy);"; s.op->newline() << "if (rc == 0) {"; s.op->newline(1) << "rc = register_kretprobe (& kp->u.krp);"; s.op->newline() << "if (rc != 0)"; s.op->newline(1) << "unregister_kprobe (& kp->dummy);"; s.op->newline(-2) << "}"; s.op->newline() << "#else"; s.op->newline() << "rc = register_kretprobe (& kp->u.krp);"; s.op->newline() << "#endif"; s.op->newline(-1) << "} else {"; // to ensure safeness of bspcache, always use aggr_kprobe on ia64 s.op->newline(1) << "kp->u.kp.addr = (void *) relocated_addr;"; s.op->newline() << "kp->u.kp.pre_handler = &enter_kprobe_probe;"; s.op->newline() << "#ifdef __ia64__"; s.op->newline() << "kp->dummy.addr = kp->u.kp.addr;"; s.op->newline() << "kp->dummy.pre_handler = NULL;"; s.op->newline() << "rc = register_kprobe (& kp->dummy);"; s.op->newline() << "if (rc == 0) {"; s.op->newline(1) << "rc = register_kprobe (& kp->u.kp);"; s.op->newline() << "if (rc != 0)"; s.op->newline(1) << "unregister_kprobe (& kp->dummy);"; s.op->newline(-2) << "}"; s.op->newline() << "#else"; s.op->newline() << "rc = register_kprobe (& kp->u.kp);"; s.op->newline() << "#endif"; s.op->newline(-1) << "}"; s.op->newline() << "if (rc) {"; // PR6749: tolerate a failed register_*probe. s.op->newline(1) << "sdp->registered_p = 0;"; s.op->newline() << "_stp_warn (\"probe %s registration error (rc %d)\", probe_point, rc);"; s.op->newline() << "rc = 0;"; // continue with other probes // XXX: shall we increment numskipped? s.op->newline(-1) << "}"; #if 0 /* pre PR 6749; XXX consider making an option */ s.op->newline(1) << "for (j=i-1; j>=0; j--) {"; // partial rollback s.op->newline(1) << "struct stap_dwarf_probe *sdp2 = & stap_dwarf_probes[j];"; s.op->newline() << "struct stap_dwarf_kprobe *kp2 = & stap_dwarf_kprobes[j];"; s.op->newline() << "if (sdp2->return_p) unregister_kretprobe (&kp2->u.krp);"; s.op->newline() << "else unregister_kprobe (&kp2->u.kp);"; s.op->newline() << "#ifdef __ia64__"; s.op->newline() << "unregister_kprobe (&kp2->dummy);"; s.op->newline() << "#endif"; // NB: we don't have to clear sdp2->registered_p, since the module_exit code is // not run for this early-abort case. s.op->newline(-1) << "}"; s.op->newline() << "break;"; // don't attempt to register any more probes s.op->newline(-1) << "}"; #endif s.op->newline() << "else sdp->registered_p = 1;"; s.op->newline(-1) << "}"; // for loop } void dwarf_derived_probe_group::emit_module_exit (systemtap_session& s) { //Unregister kprobes by batch interfaces. s.op->newline() << "#if defined(STAPCONF_UNREGISTER_KPROBES)"; s.op->newline() << "j = 0;"; s.op->newline() << "for (i=0; i<" << probes_by_module.size() << "; i++) {"; s.op->newline(1) << "struct stap_dwarf_probe *sdp = & stap_dwarf_probes[i];"; s.op->newline() << "struct stap_dwarf_kprobe *kp = & stap_dwarf_kprobes[i];"; s.op->newline() << "if (! sdp->registered_p) continue;"; s.op->newline() << "if (!sdp->return_p)"; s.op->newline(1) << "stap_unreg_kprobes[j++] = &kp->u.kp;"; s.op->newline(-2) << "}"; s.op->newline() << "unregister_kprobes((struct kprobe **)stap_unreg_kprobes, j);"; s.op->newline() << "j = 0;"; s.op->newline() << "for (i=0; i<" << probes_by_module.size() << "; i++) {"; s.op->newline(1) << "struct stap_dwarf_probe *sdp = & stap_dwarf_probes[i];"; s.op->newline() << "struct stap_dwarf_kprobe *kp = & stap_dwarf_kprobes[i];"; s.op->newline() << "if (! sdp->registered_p) continue;"; s.op->newline() << "if (sdp->return_p)"; s.op->newline(1) << "stap_unreg_kprobes[j++] = &kp->u.krp;"; s.op->newline(-2) << "}"; s.op->newline() << "unregister_kretprobes((struct kretprobe **)stap_unreg_kprobes, j);"; s.op->newline() << "#ifdef __ia64__"; s.op->newline() << "j = 0;"; s.op->newline() << "for (i=0; i<" << probes_by_module.size() << "; i++) {"; s.op->newline(1) << "struct stap_dwarf_probe *sdp = & stap_dwarf_probes[i];"; s.op->newline() << "struct stap_dwarf_kprobe *kp = & stap_dwarf_kprobes[i];"; s.op->newline() << "if (! sdp->registered_p) continue;"; s.op->newline() << "stap_unreg_kprobes[j++] = &kp->dummy;"; s.op->newline(-1) << "}"; s.op->newline() << "unregister_kprobes((struct kprobe **)stap_unreg_kprobes, j);"; s.op->newline() << "#endif"; s.op->newline() << "#endif"; s.op->newline() << "for (i=0; i<" << probes_by_module.size() << "; i++) {"; s.op->newline(1) << "struct stap_dwarf_probe *sdp = & stap_dwarf_probes[i];"; s.op->newline() << "struct stap_dwarf_kprobe *kp = & stap_dwarf_kprobes[i];"; s.op->newline() << "if (! sdp->registered_p) continue;"; s.op->newline() << "if (sdp->return_p) {"; s.op->newline() << "#if !defined(STAPCONF_UNREGISTER_KPROBES)"; s.op->newline(1) << "unregister_kretprobe (&kp->u.krp);"; s.op->newline() << "#endif"; s.op->newline() << "atomic_add (kp->u.krp.nmissed, & skipped_count);"; s.op->newline() << "#ifdef STP_TIMING"; s.op->newline() << "if (kp->u.krp.nmissed)"; s.op->newline(1) << "_stp_warn (\"Skipped due to missed kretprobe/1 on '%s': %d\\n\", sdp->pp, kp->u.krp.nmissed);"; s.op->newline(-1) << "#endif"; s.op->newline() << "atomic_add (kp->u.krp.kp.nmissed, & skipped_count);"; s.op->newline() << "#ifdef STP_TIMING"; s.op->newline() << "if (kp->u.krp.kp.nmissed)"; s.op->newline(1) << "_stp_warn (\"Skipped due to missed kretprobe/2 on '%s': %d\\n\", sdp->pp, kp->u.krp.kp.nmissed);"; s.op->newline(-1) << "#endif"; s.op->newline(-1) << "} else {"; s.op->newline() << "#if !defined(STAPCONF_UNREGISTER_KPROBES)"; s.op->newline(1) << "unregister_kprobe (&kp->u.kp);"; s.op->newline() << "#endif"; s.op->newline() << "atomic_add (kp->u.kp.nmissed, & skipped_count);"; s.op->newline() << "#ifdef STP_TIMING"; s.op->newline() << "if (kp->u.kp.nmissed)"; s.op->newline(1) << "_stp_warn (\"Skipped due to missed kprobe on '%s': %d\\n\", sdp->pp, kp->u.kp.nmissed);"; s.op->newline(-1) << "#endif"; s.op->newline(-1) << "}"; s.op->newline() << "#if !defined(STAPCONF_UNREGISTER_KPROBES) && defined(__ia64__)"; s.op->newline() << "unregister_kprobe (&kp->dummy);"; s.op->newline() << "#endif"; s.op->newline() << "sdp->registered_p = 0;"; s.op->newline(-1) << "}"; } void dwarf_builder::build(systemtap_session & sess, probe * base, probe_point * location, literal_map_t const & parameters, vector & finished_results) { // NB: the kernel/user dwlfpp objects are long-lived. // XXX: but they should be per-session, as this builder object // may be reused if we try to cross-instrument multiple targets. dwflpp* dw = 0; string module_name; if (has_null_param (parameters, TOK_KERNEL) || get_param (parameters, TOK_MODULE, module_name)) { // kernel or kernel module target if (! kern_dw) { kern_dw = new dwflpp(sess); // XXX: PR 3498, PR 6864 kern_dw->setup_kernel(true); } dw = kern_dw; } else if (get_param (parameters, TOK_PROCESS, module_name)) { module_name = find_executable (module_name); // canonicalize it // user-space target; we use one dwflpp instance per module name // (= program or shared library) if (user_dw.find(module_name) == user_dw.end()) { dw = new dwflpp(sess); // XXX: PR 3498 dw->setup_user(module_name); user_dw[module_name] = dw; } else dw = user_dw[module_name]; } if (((probe_point::component*)(location->components[1]))->functor == TOK_MARK) { enum probe_types { probes_and_dwarf = 0, // Use statement address dwarf_no_probes = 1, // Use label name probes_no_dwarf = 2 }; int probe_type = dwarf_no_probes; string probe_name = (char*) location->components[1]->arg->tok->content.c_str(); __uint64_t probe_arg = 0; Dwarf_Addr bias; Elf* elf = dwfl_module_getelf (dw->module, &bias); size_t shstrndx; Elf_Scn *probe_scn = NULL; dwfl_assert ("getshstrndx", elf_getshstrndx (elf, &shstrndx)); GElf_Shdr *shdr = NULL; // Is there a .probes section? while ((probe_scn = elf_nextscn (elf, probe_scn))) { GElf_Shdr shdr_mem; shdr = gelf_getshdr (probe_scn, &shdr_mem); assert (shdr != NULL); if (strcmp (elf_strptr (elf, shstrndx, shdr->sh_name), ".probes") == 0) { probe_type = probes_and_dwarf; break; } } if (probe_type == probes_and_dwarf) { Elf_Data *pdata = elf_getdata_rawchunk (elf, shdr->sh_offset, shdr->sh_size, ELF_T_BYTE); assert (pdata != NULL); size_t probe_scn_offset = 0; size_t probe_scn_addr = shdr->sh_addr; while (probe_scn_offset < pdata->d_size) { const int stap_sentinel = 0x31425250; probe_type = *((int*)((char*)pdata->d_buf + probe_scn_offset)); if (probe_type != stap_sentinel) { probe_scn_offset += sizeof(int); continue; } probe_scn_offset += sizeof(int); if (probe_scn_offset % (sizeof(__uint64_t))) probe_scn_offset += sizeof(__uint64_t) - (probe_scn_offset % sizeof(__uint64_t)); probe_name = ((char*)((long)(pdata->d_buf) + (long)(*((int*)((long)pdata->d_buf + probe_scn_offset)) - probe_scn_addr))); probe_scn_offset += sizeof(void*); if (probe_scn_offset % (sizeof(__uint64_t))) probe_scn_offset += sizeof(__uint64_t) - (probe_scn_offset % sizeof(__uint64_t)); probe_arg = *((__uint64_t*)((char*)pdata->d_buf + probe_scn_offset)); if (probe_scn_offset % (sizeof(__uint64_t)*2)) probe_scn_offset = (probe_scn_offset + sizeof(__uint64_t)*2) - (probe_scn_offset % (sizeof(__uint64_t)*2)); if ((strcmp (location->components[1]->arg->tok->content.c_str(), probe_name.c_str()) == 0) || (dw->name_has_wildcard (location->components[1]->arg->tok->content.c_str()) && dw->function_name_matches_pattern (probe_name.c_str(), location->components[1]->arg->tok->content.c_str()))) { } else continue; const token* sv_tok = location->components[1]->arg->tok; location->components[1]->functor = TOK_STATEMENT; location->components[1]->arg = new literal_number((int)probe_arg); location->components[1]->arg->tok = sv_tok; ((literal_map_t&)parameters)[TOK_STATEMENT] = location->components[1]->arg; dwarf_query q(sess, base, location, *dw, parameters, finished_results); q.has_mark = true; dw->query_modules(&q); if (sess.listing_mode) { finished_results.back()->locations[0]->components[1]->functor = TOK_MARK; finished_results.back()->locations[0]->components[1]->arg = new literal_string (probe_name.c_str()); } } return; } else if (probe_type == dwarf_no_probes) { location->components[1]->functor = TOK_FUNCTION; location->components[1]->arg = new literal_string("*"); ((literal_map_t&)parameters)[TOK_FUNCTION] = location->components[1]->arg; location->components.push_back(new probe_point::component(TOK_LABEL)); location->components[2]->arg = new literal_string("_stapprobe1_" + probe_name); ((literal_map_t&)parameters).erase(TOK_MARK); ((literal_map_t&)parameters).insert(pair(TOK_LABEL, location->components[2]->arg)); } dw->module = 0; } dwarf_query q(sess, base, location, *dw, parameters, finished_results); // XXX: kernel.statement.absolute is a special case that requires no // dwfl processing. This code should be in a separate builder. if (q.has_kernel && q.has_absolute) { // assert guru mode for absolute probes if (! q.base_probe->privileged) { throw semantic_error ("absolute statement probe in unprivileged script", q.base_probe->tok); } // For kernel.statement(NUM).absolute probe points, we bypass // all the debuginfo stuff: We just wire up a // dwarf_derived_probe right here and now. dwarf_derived_probe* p = new dwarf_derived_probe ("", "", 0, "kernel", "", q.statement_num_val, q.statement_num_val, q, 0); finished_results.push_back (p); sess.unwindsym_modules.insert ("kernel"); return; } dw->query_modules(&q); } symbol_table::~symbol_table() { for (iterator_t i = list_by_addr.begin(); i != list_by_addr.end(); ++i) delete *i; } void symbol_table::add_symbol(const char *name, bool weak, Dwarf_Addr addr, Dwarf_Addr *high_addr) { #ifdef __powerpc__ // Map ".sys_foo" to "sys_foo". if (name[0] == '.') name++; #endif func_info *fi = new func_info(); fi->addr = addr; fi->name = name; fi->weak = weak; map_by_name[fi->name] = fi; // TODO: Use a multimap in case there are multiple static // functions with the same name? list_by_addr.push_back(fi); } enum info_status symbol_table::read_symbols(FILE *f, const string& path) { // Based on do_kernel_symbols() in runtime/staprun/symbols.c int ret; char *name = 0; char *mod = 0; char type; unsigned long long addr; Dwarf_Addr high_addr = 0; int line = 0; // %as (non-POSIX) mallocs space for the string and stores its address. while ((ret = fscanf(f, "%llx %c %as [%as", &addr, &type, &name, &mod)) > 0) { auto_free free_name(name); auto_free free_mod(mod); line++; if (ret < 3) { cerr << "Symbol table error: Line " << line << " of symbol list from " << path << " is not in correct format: address type name [module]"; // Caller should delete symbol_table object. return info_absent; } else if (ret > 3) { // Modules are loaded above the kernel, so if we're getting // modules, we're done. break; } if (type == 'T' || type == 't' || type == 'W') add_symbol(name, (type == 'W'), (Dwarf_Addr) addr, &high_addr); } if (list_by_addr.size() < 1) { cerr << "Symbol table error: " << path << " contains no function symbols." << endl; return info_absent; } sort(); return info_present; } // NB: This currently unused. We use get_from_elf() instead because // that gives us raw addresses -- which we need for modules -- whereas // nm provides the address relative to the beginning of the section. enum info_status symbol_table::read_from_elf_file(const string &path) { FILE *f; string cmd = string("/usr/bin/nm -n --defined-only ") + path; f = popen(cmd.c_str(), "r"); if (!f) { // nm failures are detected by pclose, not popen. cerr << "Internal error reading symbol table from " << path << " -- " << strerror (errno); return info_absent; } enum info_status status = read_symbols(f, path); if (pclose(f) != 0) { if (status == info_present) cerr << "Warning: nm cannot read symbol table from " << path; return info_absent; } return status; } enum info_status symbol_table::read_from_text_file(const string& path) { FILE *f = fopen(path.c_str(), "r"); if (!f) { cerr << "Warning: cannot read symbol table from " << path << " -- " << strerror (errno); return info_absent; } enum info_status status = read_symbols(f, path); (void) fclose(f); return status; } void symbol_table::prepare_section_rejection(Dwfl_Module *mod) { #ifdef __powerpc__ /* * The .opd section contains function descriptors that can look * just like function entry points. For example, there's a function * descriptor called "do_exit" that links to the entry point ".do_exit". * Reject all symbols in .opd. */ opd_section = SHN_UNDEF; Dwarf_Addr bias; Elf* elf = (dwarf_getelf (dwfl_module_getdwarf (mod, &bias)) ?: dwfl_module_getelf (mod, &bias)); Elf_Scn* scn = 0; size_t shstrndx; if (!elf) return; if (elf_getshstrndx(elf, &shstrndx) != 0) return; while ((scn = elf_nextscn(elf, scn)) != NULL) { GElf_Shdr shdr_mem; GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); if (!shdr) continue; const char *name = elf_strptr(elf, shstrndx, shdr->sh_name); if (!strcmp(name, ".opd")) { opd_section = elf_ndxscn(scn); return; } } #endif } bool symbol_table::reject_section(GElf_Word section) { if (section == SHN_UNDEF) return true; #ifdef __powerpc__ if (section == opd_section) return true; #endif return false; } enum info_status symbol_table::get_from_elf() { Dwarf_Addr high_addr = 0; Dwfl_Module *mod = mod_info->mod; int syments = dwfl_module_getsymtab(mod); assert(syments); prepare_section_rejection(mod); for (int i = 1; i < syments; ++i) { GElf_Sym sym; GElf_Word section; const char *name = dwfl_module_getsym(mod, i, &sym, §ion); if (name && GELF_ST_TYPE(sym.st_info) == STT_FUNC && !reject_section(section)) add_symbol(name, (GELF_ST_BIND(sym.st_info) == STB_WEAK), sym.st_value, &high_addr); } sort(); return info_present; } void symbol_table::mark_dwarf_redundancies(dwflpp *dw) { // dwflpp.cu_function_cache maps each module_name:cu_name to a // vector of Dwarf_Dies, one per function. string module_prefix = string(mod_info->name) + ":"; for (mod_cu_function_cache_t::iterator cu = dw->cu_function_cache.begin(); cu != dw->cu_function_cache.end(); cu++) { string key = cu->first; if (key.find(module_prefix) == 0) { // Found a compilation unit in the module of interest. // Mark all its functions in the symbol table. cu_function_cache_t* v = cu->second; assert(v); for (cu_function_cache_t::iterator fc = v->begin(); fc != v->end(); fc++) { Dwarf_Die func = fc->second; string func_name = fc->first; // == dwarf_diename(&func); // map_by_name[func_name]->die = func; map::iterator i = map_by_name.find(func_name); // Func names can show up in the dwarf but not the symtab (!). if (i != map_by_name.end()) { func_info *fi = i->second; fi->die = func; } } } } } func_info * symbol_table::get_func_containing_address(Dwarf_Addr addr) { iterator_t iter = upper_bound(list_by_addr.begin(), list_by_addr.end(), addr, func_info::Compare()); if (iter == list_by_addr.begin()) return NULL; else return *(iter - 1); } func_info * symbol_table::lookup_symbol(const string& name) { map::iterator i = map_by_name.find(name); if (i == map_by_name.end()) return NULL; return i->second; } Dwarf_Addr symbol_table::lookup_symbol_address(const string& name) { func_info *fi = lookup_symbol(name); if (fi) return fi->addr; return 0; } // This is the kernel symbol table. The kernel macro cond_syscall creates // a weak symbol for each system call and maps it to sys_ni_syscall. // For system calls not implemented elsewhere, this weak symbol shows up // in the kernel symbol table. Following the precedent of dwarfful stap, // we refuse to consider such symbols. Here we delete them from our // symbol table. // TODO: Consider generalizing this and/or making it part of blacklist // processing. void symbol_table::purge_syscall_stubs() { Dwarf_Addr stub_addr = lookup_symbol_address("sys_ni_syscall"); if (stub_addr == 0) return; range_t purge_range = equal_range(list_by_addr.begin(), list_by_addr.end(), stub_addr, func_info::Compare()); for (iterator_t iter = purge_range.first; iter != purge_range.second; ++iter) { func_info *fi = *iter; if (fi->weak && fi->name != "sys_ni_syscall") { map_by_name.erase(fi->name); delete fi; *iter = 0; } } // Range might have null pointer entries that should be erased. list_by_addr.erase(remove(purge_range.first, purge_range.second, (func_info*)0), purge_range.second); } void symbol_table::sort() { stable_sort(list_by_addr.begin(), list_by_addr.end(), func_info::Compare()); } void module_info::get_symtab(dwarf_query *q) { systemtap_session &sess = q->sess; sym_table = new symbol_table(this); if (!elf_path.empty()) { if (name == TOK_KERNEL && !sess.kernel_symtab_path.empty()) cerr << "Warning: reading symbol table from " << elf_path << " -- ignoring " << sess.kernel_symtab_path << endl ;; symtab_status = sym_table->get_from_elf(); } else { assert(name == TOK_KERNEL); if (sess.kernel_symtab_path.empty()) { symtab_status = info_absent; cerr << "Error: Cannot find vmlinux." << " Consider using --kmap instead of --kelf." << endl;; } else { symtab_status = sym_table->read_from_text_file(sess.kernel_symtab_path); if (symtab_status == info_present) { sess.sym_kprobes_text_start = sym_table->lookup_symbol_address("__kprobes_text_start"); sess.sym_kprobes_text_end = sym_table->lookup_symbol_address("__kprobes_text_end"); sess.sym_stext = sym_table->lookup_symbol_address("_stext"); } } } if (symtab_status == info_absent) { delete sym_table; sym_table = NULL; return; } // If we have dwarf for the same module, mark the redundant symtab // entries. // // In dwarf_query::handle_query_module(), the call to query_module_dwarf() // precedes the call to query_module_symtab(). So we should never read // a module's symbol table without first having tried to get its dwarf. sym_table->mark_dwarf_redundancies(&q->dw); if (name == TOK_KERNEL) sym_table->purge_syscall_stubs(); } module_info::~module_info() { if (sym_table) delete sym_table; } // Helper function to emit vma tracker callbacks. static void emit_vma_callback_probe_decl (systemtap_session& s, string path, int64_t pid) { s.op->newline() << "{"; if (pid == 0) { s.op->line() << " .pathname=\"" << path << "\","; s.op->line() << " .pid=0,"; } else { s.op->line() << " .pathname=NULL,"; s.op->line() << " .pid=" << pid << ","; } s.op->line() << " .callback=NULL,"; s.op->line() << " .mmap_callback=&_stp_tf_mmap_cb,"; s.op->line() << " .munmap_callback=&_stp_tf_munmap_cb,"; s.op->line() << " .mprotect_callback=NULL,"; s.op->line() << " },"; } // ------------------------------------------------------------------------ // task_finder derived 'probes': These don't really exist. The whole // purpose of the task_finder_derived_probe_group is to make sure that // stap_start_task_finder()/stap_stop_task_finder() get called only // once and in the right place. // ------------------------------------------------------------------------ struct task_finder_derived_probe: public derived_probe { // Dummy constructor for gcc 3.4 compatibility task_finder_derived_probe (): derived_probe (0) { assert(0); } }; struct task_finder_derived_probe_group: public generic_dpg { public: static void create_session_group (systemtap_session& s); void emit_module_decls (systemtap_session& ) { } void emit_module_init (systemtap_session& s); void emit_module_exit (systemtap_session& s); }; void task_finder_derived_probe_group::create_session_group (systemtap_session& s) { if (! s.task_finder_derived_probes) s.task_finder_derived_probes = new task_finder_derived_probe_group(); } void task_finder_derived_probe_group::emit_module_init (systemtap_session& s) { s.op->newline(); s.op->newline() << "/* ---- task finder ---- */"; s.op->newline() << "rc = stap_start_task_finder();"; s.op->newline() << "if (rc) {"; s.op->newline(1) << "stap_stop_task_finder();"; s.op->newline(-1) << "}"; } void task_finder_derived_probe_group::emit_module_exit (systemtap_session& s) { s.op->newline(); s.op->newline() << "/* ---- task finder ---- */"; s.op->newline() << "stap_stop_task_finder();"; } // ------------------------------------------------------------------------ // itrace user-space probes // ------------------------------------------------------------------------ static string TOK_INSN("insn"); static string TOK_BLOCK("block"); struct itrace_derived_probe: public derived_probe { bool has_path; string path; int64_t pid; int single_step; itrace_derived_probe (systemtap_session &s, probe* p, probe_point* l, bool hp, string &pn, int64_t pd, int ss ); void join_group (systemtap_session& s); }; struct itrace_derived_probe_group: public generic_dpg { private: map > probes_by_path; typedef map >::iterator p_b_path_iterator; map > probes_by_pid; typedef map >::iterator p_b_pid_iterator; unsigned num_probes; void emit_probe_decl (systemtap_session& s, itrace_derived_probe *p); public: itrace_derived_probe_group(): num_probes(0) { } void enroll (itrace_derived_probe* probe); void emit_module_decls (systemtap_session& s); void emit_module_init (systemtap_session& s); void emit_module_exit (systemtap_session& s); }; itrace_derived_probe::itrace_derived_probe (systemtap_session &s, probe* p, probe_point* l, bool hp, string &pn, int64_t pd, int ss ): derived_probe(p, l), has_path(hp), path(pn), pid(pd), single_step(ss) { } void itrace_derived_probe::join_group (systemtap_session& s) { if (! s.itrace_derived_probes) s.itrace_derived_probes = new itrace_derived_probe_group (); s.itrace_derived_probes->enroll (this); task_finder_derived_probe_group::create_session_group (s); } struct itrace_builder: public derived_probe_builder { itrace_builder() {} virtual void build(systemtap_session & sess, probe * base, probe_point * location, std::map const & parameters, vector & finished_results) { string path; int64_t pid = 0; int single_step; bool has_path = get_param (parameters, TOK_PROCESS, path); bool has_pid = get_param (parameters, TOK_PROCESS, pid); // XXX: PR 6445 needs !has_path && !has_pid support assert (has_path || has_pid); single_step = ! has_null_param (parameters, TOK_BLOCK); // If we have a path, we need to validate it. if (has_path) path = find_executable (path); finished_results.push_back(new itrace_derived_probe(sess, base, location, has_path, path, pid, single_step )); } }; void itrace_derived_probe_group::enroll (itrace_derived_probe* p) { if (p->has_path) probes_by_path[p->path].push_back(p); else probes_by_pid[p->pid].push_back(p); num_probes++; // XXX: multiple exec probes (for instance) for the same path (or // pid) should all share a itrace report function, and have their // handlers executed sequentially. } void itrace_derived_probe_group::emit_probe_decl (systemtap_session& s, itrace_derived_probe *p) { s.op->newline() << "{"; s.op->line() << " .tgt={"; if (p->has_path) { s.op->line() << " .pathname=\"" << p->path << "\","; s.op->line() << " .pid=0,"; } else { s.op->line() << " .pathname=NULL,"; s.op->line() << " .pid=" << p->pid << ","; } s.op->line() << " .callback=&_stp_itrace_probe_cb,"; s.op->line() << " },"; s.op->line() << " .pp=" << lex_cast_qstring (*p->sole_location()) << ","; s.op->line() << " .single_step=" << p->single_step << ","; s.op->line() << " .ph=&" << p->name << ","; s.op->line() << " },"; } void itrace_derived_probe_group::emit_module_decls (systemtap_session& s) { if (probes_by_path.empty() && probes_by_pid.empty()) return; s.op->newline(); s.op->newline() << "/* ---- itrace probes ---- */"; s.op->newline() << "struct stap_itrace_probe {"; s.op->indent(1); s.op->newline() << "struct stap_task_finder_target tgt;"; s.op->newline() << "const char *pp;"; s.op->newline() << "void (*ph) (struct context*);"; s.op->newline() << "int single_step;"; s.op->newline(-1) << "};"; s.op->newline() << "static void enter_itrace_probe(struct stap_itrace_probe *p, struct pt_regs *regs, void *data);"; s.op->newline() << "#include \"itrace.c\""; // output routine to call itrace probe s.op->newline() << "static void enter_itrace_probe(struct stap_itrace_probe *p, struct pt_regs *regs, void *data) {"; s.op->indent(1); common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "p->pp"); s.op->newline() << "c->regs = regs;"; s.op->newline() << "c->data = data;"; // call probe function s.op->newline() << "(*p->ph) (c);"; common_probe_entryfn_epilogue (s.op); s.op->newline() << "return;"; s.op->newline(-1) << "}"; // Output task finder callback routine that gets called for all // itrace probe types. s.op->newline() << "static int _stp_itrace_probe_cb(struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) {"; s.op->indent(1); s.op->newline() << "int rc = 0;"; s.op->newline() << "struct stap_itrace_probe *p = container_of(tgt, struct stap_itrace_probe, tgt);"; s.op->newline() << "if (register_p) "; s.op->indent(1); s.op->newline() << "rc = usr_itrace_init(p->single_step, tsk->pid, p);"; s.op->newline(-1) << "else"; s.op->newline(1) << "remove_usr_itrace_info(find_itrace_info(p->tgt.pid));"; s.op->newline(-1) << "return rc;"; s.op->newline(-1) << "}"; // Emit vma callbacks. s.op->newline() << "#ifdef STP_NEED_VMA_TRACKER"; s.op->newline() << "static struct stap_task_finder_target stap_itrace_vmcbs[] = {"; s.op->indent(1); if (! probes_by_path.empty()) { for (p_b_path_iterator it = probes_by_path.begin(); it != probes_by_path.end(); it++) emit_vma_callback_probe_decl (s, it->first, (int64_t)0); } if (! probes_by_pid.empty()) { for (p_b_pid_iterator it = probes_by_pid.begin(); it != probes_by_pid.end(); it++) emit_vma_callback_probe_decl (s, "", it->first); } s.op->newline(-1) << "};"; s.op->newline() << "#endif"; s.op->newline() << "static struct stap_itrace_probe stap_itrace_probes[] = {"; s.op->indent(1); // Set up 'process(PATH)' probes if (! probes_by_path.empty()) { for (p_b_path_iterator it = probes_by_path.begin(); it != probes_by_path.end(); it++) { for (unsigned i = 0; i < it->second.size(); i++) { itrace_derived_probe *p = it->second[i]; emit_probe_decl(s, p); } } } // Set up 'process(PID)' probes if (! probes_by_pid.empty()) { for (p_b_pid_iterator it = probes_by_pid.begin(); it != probes_by_pid.end(); it++) { for (unsigned i = 0; i < it->second.size(); i++) { itrace_derived_probe *p = it->second[i]; emit_probe_decl(s, p); } } } s.op->newline(-1) << "};"; } void itrace_derived_probe_group::emit_module_init (systemtap_session& s) { if (probes_by_path.empty() && probes_by_pid.empty()) return; s.op->newline(); s.op->newline() << "#ifdef STP_NEED_VMA_TRACKER"; s.op->newline() << "_stp_sym_init();"; s.op->newline() << "/* ---- itrace vma callbacks ---- */"; s.op->newline() << "for (i=0; iindent(1); s.op->newline() << "struct stap_task_finder_target *r = &stap_itrace_vmcbs[i];"; s.op->newline() << "rc = stap_register_task_finder_target(r);"; s.op->newline(-1) << "}"; s.op->newline() << "#endif"; s.op->newline(); s.op->newline() << "/* ---- itrace probes ---- */"; s.op->newline() << "for (i=0; i<" << num_probes << "; i++) {"; s.op->indent(1); s.op->newline() << "struct stap_itrace_probe *p = &stap_itrace_probes[i];"; // 'arch_has_single_step' needs to be defined for either single step mode // or branch mode. s.op->newline() << "if (!arch_has_single_step()) {"; s.op->indent(1); s.op->newline() << "_stp_error (\"insn probe init: arch does not support step mode\");"; s.op->newline() << "rc = -EPERM;"; s.op->newline() << "break;"; s.op->newline(-1) << "}"; s.op->newline() << "if (!p->single_step && !arch_has_block_step()) {"; s.op->indent(1); s.op->newline() << "_stp_error (\"insn probe init: arch does not support block step mode\");"; s.op->newline() << "rc = -EPERM;"; s.op->newline() << "break;"; s.op->newline(-1) << "}"; s.op->newline() << "rc = stap_register_task_finder_target(&p->tgt);"; s.op->newline(-1) << "}"; } void itrace_derived_probe_group::emit_module_exit (systemtap_session& s) { if (probes_by_path.empty() && probes_by_pid.empty()) return; s.op->newline(); s.op->newline() << "/* ---- itrace probes ---- */"; s.op->newline() << "cleanup_usr_itrace();"; } // ------------------------------------------------------------------------ // utrace user-space probes // ------------------------------------------------------------------------ static string TOK_THREAD("thread"); static string TOK_SYSCALL("syscall"); // Note that these flags don't match up exactly with UTRACE_EVENT // flags (and that's OK). enum utrace_derived_probe_flags { UDPF_NONE, UDPF_BEGIN, // process begin UDPF_END, // process end UDPF_THREAD_BEGIN, // thread begin UDPF_THREAD_END, // thread end UDPF_SYSCALL, // syscall entry UDPF_SYSCALL_RETURN, // syscall exit UDPF_NFLAGS }; struct utrace_derived_probe: public derived_probe { bool has_path; string path; int64_t pid; enum utrace_derived_probe_flags flags; bool target_symbol_seen; utrace_derived_probe (systemtap_session &s, probe* p, probe_point* l, bool hp, string &pn, int64_t pd, enum utrace_derived_probe_flags f); void join_group (systemtap_session& s); }; struct utrace_derived_probe_group: public generic_dpg { private: map > probes_by_path; typedef map >::iterator p_b_path_iterator; map > probes_by_pid; typedef map >::iterator p_b_pid_iterator; unsigned num_probes; bool flags_seen[UDPF_NFLAGS]; void emit_probe_decl (systemtap_session& s, utrace_derived_probe *p); public: utrace_derived_probe_group(): num_probes(0), flags_seen() { } void enroll (utrace_derived_probe* probe); void emit_module_decls (systemtap_session& s); void emit_module_init (systemtap_session& s); void emit_module_exit (systemtap_session& s); }; struct utrace_var_expanding_visitor: public var_expanding_visitor { utrace_var_expanding_visitor(systemtap_session& s, const string& pn, enum utrace_derived_probe_flags f): sess (s), probe_name (pn), flags (f), target_symbol_seen (false) {} systemtap_session& sess; string probe_name; enum utrace_derived_probe_flags flags; bool target_symbol_seen; void visit_target_symbol_arg (target_symbol* e); void visit_target_symbol_context (target_symbol* e); void visit_target_symbol (target_symbol* e); }; utrace_derived_probe::utrace_derived_probe (systemtap_session &s, probe* p, probe_point* l, bool hp, string &pn, int64_t pd, enum utrace_derived_probe_flags f): derived_probe (p, new probe_point (*l) /* .components soon rewritten */ ), has_path(hp), path(pn), pid(pd), flags(f), target_symbol_seen(false) { // Expand local variables in the probe body utrace_var_expanding_visitor v (s, name, flags); this->body = v.require (this->body); target_symbol_seen = v.target_symbol_seen; // Reset the sole element of the "locations" vector as a // "reverse-engineered" form of the incoming (q.base_loc) probe // point. This allows a user to see what program etc. // number any particular match of the wildcards. vector comps; if (hp) comps.push_back (new probe_point::component(TOK_PROCESS, new literal_string(path))); else if (pid != 0) comps.push_back (new probe_point::component(TOK_PROCESS, new literal_number(pid))); else comps.push_back (new probe_point::component(TOK_PROCESS)); switch (flags) { case UDPF_THREAD_BEGIN: comps.push_back (new probe_point::component(TOK_THREAD)); comps.push_back (new probe_point::component(TOK_BEGIN)); break; case UDPF_THREAD_END: comps.push_back (new probe_point::component(TOK_THREAD)); comps.push_back (new probe_point::component(TOK_END)); break; case UDPF_SYSCALL: comps.push_back (new probe_point::component(TOK_SYSCALL)); break; case UDPF_SYSCALL_RETURN: comps.push_back (new probe_point::component(TOK_SYSCALL)); comps.push_back (new probe_point::component(TOK_RETURN)); break; case UDPF_BEGIN: comps.push_back (new probe_point::component(TOK_BEGIN)); break; case UDPF_END: comps.push_back (new probe_point::component(TOK_END)); break; default: assert (0); } // Overwrite it. this->sole_location()->components = comps; } void utrace_derived_probe::join_group (systemtap_session& s) { if (! s.utrace_derived_probes) { s.utrace_derived_probes = new utrace_derived_probe_group (); } s.utrace_derived_probes->enroll (this); task_finder_derived_probe_group::create_session_group (s); } void utrace_var_expanding_visitor::visit_target_symbol_arg (target_symbol* e) { string argnum_s = e->base_name.substr(4,e->base_name.length()-4); int argnum = lex_cast(argnum_s); if (flags != UDPF_SYSCALL) throw semantic_error ("only \"process(PATH_OR_PID).syscall\" support $argN.", e->tok); if (e->components.size() > 0) { switch (e->components[0].first) { case target_symbol::comp_literal_array_index: throw semantic_error("utrace target variable '$argN' may not be used as array", e->tok); break; case target_symbol::comp_struct_member: throw semantic_error("utrace target variable '$argN' may not be used as a structure", e->tok); break; default: throw semantic_error ("invalid use of utrace target variable '$argN'", e->tok); break; } } // FIXME: max argnument number should not be hardcoded. if (argnum < 1 || argnum > 6) throw semantic_error ("invalid syscall argument number (1-6)", e->tok); bool lvalue = is_active_lvalue(e); if (lvalue) throw semantic_error("utrace '$argN' variable is read-only", e->tok); // Remember that we've seen a target variable. target_symbol_seen = true; // We're going to substitute a synthesized '_utrace_syscall_arg' // function call for the '$argN' reference. functioncall* n = new functioncall; n->tok = e->tok; n->function = "_utrace_syscall_arg"; n->referent = 0; // NB: must not resolve yet, to ensure inclusion in session literal_number *num = new literal_number(argnum - 1); num->tok = e->tok; n->args.push_back(num); provide (n); } void utrace_var_expanding_visitor::visit_target_symbol_context (target_symbol* e) { string sname = e->base_name; if (e->components.size() > 0) { switch (e->components[0].first) { case target_symbol::comp_literal_array_index: throw semantic_error("utrace target variable '" + sname + "' may not be used as array", e->tok); break; case target_symbol::comp_struct_member: throw semantic_error("utrace target variable '" + sname + "' may not be used as a structure", e->tok); break; default: throw semantic_error ("invalid use of utrace target variable '" + sname + "'", e->tok); break; } } bool lvalue = is_active_lvalue(e); if (lvalue) throw semantic_error("utrace '" + sname + "' variable is read-only", e->tok); string fname; if (sname == "$return") { if (flags != UDPF_SYSCALL_RETURN) throw semantic_error ("only \"process(PATH_OR_PID).syscall.return\" support $return.", e->tok); fname = "_utrace_syscall_return"; } else fname = "_utrace_syscall_nr"; // Remember that we've seen a target variable. target_symbol_seen = true; // We're going to substitute a synthesized '_utrace_syscall_nr' // function call for the '$syscall' reference. functioncall* n = new functioncall; n->tok = e->tok; n->function = fname; n->referent = 0; // NB: must not resolve yet, to ensure inclusion in session provide (n); } void utrace_var_expanding_visitor::visit_target_symbol (target_symbol* e) { assert(e->base_name.size() > 0 && e->base_name[0] == '$'); if (flags != UDPF_SYSCALL && flags != UDPF_SYSCALL_RETURN) throw semantic_error ("only \"process(PATH_OR_PID).syscall\" and \"process(PATH_OR_PID).syscall.return\" probes support target symbols", e->tok); if (e->base_name.substr(0,4) == "$arg") visit_target_symbol_arg(e); else if (e->base_name == "$syscall" || e->base_name == "$return") visit_target_symbol_context(e); else throw semantic_error ("invalid target symbol for utrace probe, $syscall, $return or $argN expected", e->tok); } struct utrace_builder: public derived_probe_builder { utrace_builder() {} virtual void build(systemtap_session & sess, probe * base, probe_point * location, literal_map_t const & parameters, vector & finished_results) { string path; int64_t pid; bool has_path = get_param (parameters, TOK_PROCESS, path); bool has_pid = get_param (parameters, TOK_PROCESS, pid); enum utrace_derived_probe_flags flags = UDPF_NONE; if (has_null_param (parameters, TOK_THREAD)) { if (has_null_param (parameters, TOK_BEGIN)) flags = UDPF_THREAD_BEGIN; else if (has_null_param (parameters, TOK_END)) flags = UDPF_THREAD_END; } else if (has_null_param (parameters, TOK_SYSCALL)) { if (has_null_param (parameters, TOK_RETURN)) flags = UDPF_SYSCALL_RETURN; else flags = UDPF_SYSCALL; } else if (has_null_param (parameters, TOK_BEGIN)) flags = UDPF_BEGIN; else if (has_null_param (parameters, TOK_END)) flags = UDPF_END; // If we didn't get a path or pid, this means to probe everything. // Convert this to a pid-based probe. if (! has_path && ! has_pid) { has_path = false; path.clear(); has_pid = true; pid = 0; } else if (has_path) { path = find_executable (path); sess.unwindsym_modules.insert (path); } else if (has_pid) { // We can't probe 'init' (pid 1). XXX: where does this limitation come from? if (pid < 2) throw semantic_error ("process pid must be greater than 1", location->tok); // XXX: could we use /proc/$pid/exe in unwindsym_modules and elsewhere? } finished_results.push_back(new utrace_derived_probe(sess, base, location, has_path, path, pid, flags)); } }; void utrace_derived_probe_group::enroll (utrace_derived_probe* p) { if (p->has_path) probes_by_path[p->path].push_back(p); else probes_by_pid[p->pid].push_back(p); num_probes++; flags_seen[p->flags] = true; // XXX: multiple exec probes (for instance) for the same path (or // pid) should all share a utrace report function, and have their // handlers executed sequentially. } void utrace_derived_probe_group::emit_probe_decl (systemtap_session& s, utrace_derived_probe *p) { s.op->newline() << "{"; s.op->line() << " .tgt={"; if (p->has_path) { s.op->line() << " .pathname=\"" << p->path << "\","; s.op->line() << " .pid=0,"; } else { s.op->line() << " .pathname=NULL,"; s.op->line() << " .pid=" << p->pid << ","; } s.op->line() << " .callback=&_stp_utrace_probe_cb,"; s.op->line() << " .mmap_callback=NULL,"; s.op->line() << " .munmap_callback=NULL,"; s.op->line() << " .mprotect_callback=NULL,"; s.op->line() << " },"; s.op->line() << " .pp=" << lex_cast_qstring (*p->sole_location()) << ","; s.op->line() << " .ph=&" << p->name << ","; // Handle flags switch (p->flags) { // Notice that we'll just call the probe directly when we get // notified, since the task_finder layer stops the thread for us. case UDPF_BEGIN: // process begin s.op->line() << " .flags=(UDPF_BEGIN),"; break; case UDPF_THREAD_BEGIN: // thread begin s.op->line() << " .flags=(UDPF_THREAD_BEGIN),"; break; // Notice we're not setting up a .ops/.report_death handler for // either UDPF_END or UDPF_THREAD_END. Instead, we'll just call // the probe directly when we get notified. case UDPF_END: // process end s.op->line() << " .flags=(UDPF_END),"; break; case UDPF_THREAD_END: // thread end s.op->line() << " .flags=(UDPF_THREAD_END),"; break; // For UDPF_SYSCALL/UDPF_SYSCALL_RETURN probes, the .report_death // handler isn't strictly necessary. However, it helps to keep // our attaches/detaches symmetrical. Since the task_finder layer // stops the thread, that works around bug 6841. case UDPF_SYSCALL: s.op->line() << " .flags=(UDPF_SYSCALL),"; s.op->line() << " .ops={ .report_syscall_entry=stap_utrace_probe_syscall, .report_death=stap_utrace_task_finder_report_death },"; s.op->line() << " .events=(UTRACE_EVENT(SYSCALL_ENTRY)|UTRACE_EVENT(DEATH)),"; break; case UDPF_SYSCALL_RETURN: s.op->line() << " .flags=(UDPF_SYSCALL_RETURN),"; s.op->line() << " .ops={ .report_syscall_exit=stap_utrace_probe_syscall, .report_death=stap_utrace_task_finder_report_death },"; s.op->line() << " .events=(UTRACE_EVENT(SYSCALL_EXIT)|UTRACE_EVENT(DEATH)),"; break; case UDPF_NONE: s.op->line() << " .flags=(UDPF_NONE),"; s.op->line() << " .ops={ },"; s.op->line() << " .events=0,"; break; default: throw semantic_error ("bad utrace probe flag"); break; } s.op->line() << " .engine_attached=0,"; s.op->line() << " },"; } void utrace_derived_probe_group::emit_module_decls (systemtap_session& s) { if (probes_by_path.empty() && probes_by_pid.empty()) return; s.op->newline(); s.op->newline() << "/* ---- utrace probes ---- */"; s.op->newline() << "enum utrace_derived_probe_flags {"; s.op->indent(1); s.op->newline() << "UDPF_NONE,"; s.op->newline() << "UDPF_BEGIN,"; s.op->newline() << "UDPF_END,"; s.op->newline() << "UDPF_THREAD_BEGIN,"; s.op->newline() << "UDPF_THREAD_END,"; s.op->newline() << "UDPF_SYSCALL,"; s.op->newline() << "UDPF_SYSCALL_RETURN,"; s.op->newline() << "UDPF_NFLAGS"; s.op->newline(-1) << "};"; s.op->newline() << "struct stap_utrace_probe {"; s.op->indent(1); s.op->newline() << "struct stap_task_finder_target tgt;"; s.op->newline() << "const char *pp;"; s.op->newline() << "void (*ph) (struct context*);"; s.op->newline() << "enum utrace_derived_probe_flags flags;"; s.op->newline() << "struct utrace_engine_ops ops;"; s.op->newline() << "unsigned long events;"; s.op->newline() << "int engine_attached;"; s.op->newline(-1) << "};"; // Output handler function for UDPF_BEGIN, UDPF_THREAD_BEGIN, // UDPF_END, and UDPF_THREAD_END if (flags_seen[UDPF_BEGIN] || flags_seen[UDPF_THREAD_BEGIN] || flags_seen[UDPF_END] || flags_seen[UDPF_THREAD_END]) { s.op->newline() << "static void stap_utrace_probe_handler(struct task_struct *tsk, struct stap_utrace_probe *p) {"; s.op->indent(1); common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "p->pp"); // call probe function s.op->newline() << "(*p->ph) (c);"; common_probe_entryfn_epilogue (s.op); s.op->newline() << "return;"; s.op->newline(-1) << "}"; } // Output handler function for SYSCALL_ENTRY and SYSCALL_EXIT events if (flags_seen[UDPF_SYSCALL] || flags_seen[UDPF_SYSCALL_RETURN]) { s.op->newline() << "#ifdef UTRACE_ORIG_VERSION"; s.op->newline() << "static u32 stap_utrace_probe_syscall(struct utrace_attached_engine *engine, struct task_struct *tsk, struct pt_regs *regs) {"; s.op->newline() << "#else"; s.op->newline() << "static u32 stap_utrace_probe_syscall(enum utrace_resume_action action, struct utrace_attached_engine *engine, struct task_struct *tsk, struct pt_regs *regs) {"; s.op->newline() << "#endif"; s.op->indent(1); s.op->newline() << "struct stap_utrace_probe *p = (struct stap_utrace_probe *)engine->data;"; common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "p->pp"); s.op->newline() << "c->regs = regs;"; // call probe function s.op->newline() << "(*p->ph) (c);"; common_probe_entryfn_epilogue (s.op); s.op->newline() << "if ((atomic_read (&session_state) != STAP_SESSION_STARTING) && (atomic_read (&session_state) != STAP_SESSION_RUNNING)) {"; s.op->indent(1); s.op->newline() << "debug_task_finder_detach();"; s.op->newline() << "return UTRACE_DETACH;"; s.op->newline(-1) << "}"; s.op->newline() << "return UTRACE_RESUME;"; s.op->newline(-1) << "}"; } // Output task_finder callback routine that gets called for all // utrace probe types. s.op->newline() << "static int _stp_utrace_probe_cb(struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) {"; s.op->indent(1); s.op->newline() << "int rc = 0;"; s.op->newline() << "struct stap_utrace_probe *p = container_of(tgt, struct stap_utrace_probe, tgt);"; s.op->newline() << "struct utrace_attached_engine *engine;"; s.op->newline() << "if (register_p) {"; s.op->indent(1); s.op->newline() << "switch (p->flags) {"; s.op->indent(1); // When receiving a UTRACE_EVENT(CLONE) event, we can't call the // begin/thread.begin probe directly. So, we'll just attach an // engine that waits for the thread to quiesce. When the thread // quiesces, then call the probe. if (flags_seen[UDPF_BEGIN]) { s.op->newline() << "case UDPF_BEGIN:"; s.op->indent(1); s.op->newline() << "if (process_p) {"; s.op->indent(1); s.op->newline() << "stap_utrace_probe_handler(tsk, p);"; s.op->newline(-1) << "}"; s.op->newline() << "break;"; s.op->indent(-1); } if (flags_seen[UDPF_THREAD_BEGIN]) { s.op->newline() << "case UDPF_THREAD_BEGIN:"; s.op->indent(1); s.op->newline() << "if (! process_p) {"; s.op->indent(1); s.op->newline() << "stap_utrace_probe_handler(tsk, p);"; s.op->newline(-1) << "}"; s.op->newline() << "break;"; s.op->indent(-1); } // For end/thread_end probes, do nothing at registration time. // We'll handle these in the 'register_p == 0' case. if (flags_seen[UDPF_END] || flags_seen[UDPF_THREAD_END]) { s.op->newline() << "case UDPF_END:"; s.op->newline() << "case UDPF_THREAD_END:"; s.op->indent(1); s.op->newline() << "break;"; s.op->indent(-1); } // Attach an engine for SYSCALL_ENTRY and SYSCALL_EXIT events. if (flags_seen[UDPF_SYSCALL] || flags_seen[UDPF_SYSCALL_RETURN]) { s.op->newline() << "case UDPF_SYSCALL:"; s.op->newline() << "case UDPF_SYSCALL_RETURN:"; s.op->indent(1); s.op->newline() << "rc = stap_utrace_attach(tsk, &p->ops, p, p->events);"; s.op->newline() << "if (rc == 0) {"; s.op->indent(1); s.op->newline() << "p->engine_attached = 1;"; s.op->newline(-1) << "}"; s.op->newline() << "break;"; s.op->indent(-1); } s.op->newline() << "default:"; s.op->indent(1); s.op->newline() << "_stp_error(\"unhandled flag value %d at %s:%d\", p->flags, __FUNCTION__, __LINE__);"; s.op->newline() << "break;"; s.op->indent(-1); s.op->newline(-1) << "}"; s.op->newline(-1) << "}"; // Since this engine could be attached to multiple threads, don't // call stap_utrace_detach_ops() here, only call // stap_utrace_detach() as necessary. s.op->newline() << "else {"; s.op->indent(1); s.op->newline() << "switch (p->flags) {"; s.op->indent(1); // For death probes, go ahead and call the probe directly. if (flags_seen[UDPF_END]) { s.op->newline() << "case UDPF_END:"; s.op->indent(1); s.op->newline() << "if (process_p) {"; s.op->indent(1); s.op->newline() << "stap_utrace_probe_handler(tsk, p);"; s.op->newline(-1) << "}"; s.op->newline() << "break;"; s.op->indent(-1); } if (flags_seen[UDPF_THREAD_END]) { s.op->newline() << "case UDPF_THREAD_END:"; s.op->indent(1); s.op->newline() << "if (! process_p) {"; s.op->indent(1); s.op->newline() << "stap_utrace_probe_handler(tsk, p);"; s.op->newline(-1) << "}"; s.op->newline() << "break;"; s.op->indent(-1); } // For begin/thread_begin probes, we don't need to do anything. if (flags_seen[UDPF_BEGIN] || flags_seen[UDPF_THREAD_BEGIN]) { s.op->newline() << "case UDPF_BEGIN:"; s.op->newline() << "case UDPF_THREAD_BEGIN:"; s.op->indent(1); s.op->newline() << "break;"; s.op->indent(-1); } if (flags_seen[UDPF_SYSCALL] || flags_seen[UDPF_SYSCALL_RETURN]) { s.op->newline() << "case UDPF_SYSCALL:"; s.op->newline() << "case UDPF_SYSCALL_RETURN:"; s.op->indent(1); s.op->newline() << "stap_utrace_detach(tsk, &p->ops);"; s.op->newline() << "break;"; s.op->indent(-1); } s.op->newline() << "default:"; s.op->indent(1); s.op->newline() << "_stp_error(\"unhandled flag value %d at %s:%d\", p->flags, __FUNCTION__, __LINE__);"; s.op->newline() << "break;"; s.op->indent(-1); s.op->newline(-1) << "}"; s.op->newline(-1) << "}"; s.op->newline() << "return rc;"; s.op->newline(-1) << "}"; // Emit vma callbacks. s.op->newline() << "#ifdef STP_NEED_VMA_TRACKER"; s.op->newline() << "static struct stap_task_finder_target stap_utrace_vmcbs[] = {"; s.op->indent(1); if (! probes_by_path.empty()) { for (p_b_path_iterator it = probes_by_path.begin(); it != probes_by_path.end(); it++) emit_vma_callback_probe_decl (s, it->first, (int64_t)0); } if (! probes_by_pid.empty()) { for (p_b_pid_iterator it = probes_by_pid.begin(); it != probes_by_pid.end(); it++) emit_vma_callback_probe_decl (s, "", it->first); } s.op->newline(-1) << "};"; s.op->newline() << "#endif"; s.op->newline() << "static struct stap_utrace_probe stap_utrace_probes[] = {"; s.op->indent(1); // Set up 'process(PATH)' probes if (! probes_by_path.empty()) { for (p_b_path_iterator it = probes_by_path.begin(); it != probes_by_path.end(); it++) { for (unsigned i = 0; i < it->second.size(); i++) { utrace_derived_probe *p = it->second[i]; emit_probe_decl(s, p); } } } // Set up 'process(PID)' probes if (! probes_by_pid.empty()) { for (p_b_pid_iterator it = probes_by_pid.begin(); it != probes_by_pid.end(); it++) { for (unsigned i = 0; i < it->second.size(); i++) { utrace_derived_probe *p = it->second[i]; emit_probe_decl(s, p); } } } s.op->newline(-1) << "};"; } void utrace_derived_probe_group::emit_module_init (systemtap_session& s) { if (probes_by_path.empty() && probes_by_pid.empty()) return; s.op->newline(); s.op->newline() << "#ifdef STP_NEED_VMA_TRACKER"; s.op->newline() << "_stp_sym_init();"; s.op->newline() << "/* ---- utrace vma callbacks ---- */"; s.op->newline() << "for (i=0; iindent(1); s.op->newline() << "struct stap_task_finder_target *r = &stap_utrace_vmcbs[i];"; s.op->newline() << "rc = stap_register_task_finder_target(r);"; s.op->newline(-1) << "}"; s.op->newline() << "#endif"; s.op->newline() << "/* ---- utrace probes ---- */"; s.op->newline() << "for (i=0; iindent(1); s.op->newline() << "struct stap_utrace_probe *p = &stap_utrace_probes[i];"; s.op->newline() << "rc = stap_register_task_finder_target(&p->tgt);"; s.op->newline(-1) << "}"; // rollback all utrace probes s.op->newline() << "if (rc) {"; s.op->indent(1); s.op->newline() << "for (j=i-1; j>=0; j--) {"; s.op->indent(1); s.op->newline() << "struct stap_utrace_probe *p = &stap_utrace_probes[j];"; s.op->newline() << "if (p->engine_attached) {"; s.op->indent(1); s.op->newline() << "stap_utrace_detach_ops(&p->ops);"; s.op->newline(-1) << "}"; s.op->newline(-1) << "}"; s.op->newline(-1) << "}"; } void utrace_derived_probe_group::emit_module_exit (systemtap_session& s) { if (probes_by_path.empty() && probes_by_pid.empty()) return; s.op->newline(); s.op->newline() << "/* ---- utrace probes ---- */"; s.op->newline() << "for (i=0; iindent(1); s.op->newline() << "struct stap_utrace_probe *p = &stap_utrace_probes[i];"; s.op->newline() << "if (p->engine_attached) {"; s.op->indent(1); s.op->newline() << "stap_utrace_detach_ops(&p->ops);"; s.op->newline(-1) << "}"; s.op->newline(-1) << "}"; } // ------------------------------------------------------------------------ // user-space probes // ------------------------------------------------------------------------ struct uprobe_derived_probe_group: public generic_dpg { public: void emit_module_decls (systemtap_session& s); void emit_module_init (systemtap_session& s); void emit_module_exit (systemtap_session& s); }; uprobe_derived_probe::uprobe_derived_probe (const string& function, const string& filename, int line, const string& module, int pid, const string& section, Dwarf_Addr dwfl_addr, Dwarf_Addr addr, dwarf_query & q, Dwarf_Die* scope_die /* may be null */): derived_probe (q.base_probe, new probe_point (*q.base_loc) /* .components soon rewritten */ ), return_p (q.has_return), module (module), pid (pid), section (section), address (addr) { // We may receive probes on two types of ELF objects: ET_EXEC or ET_DYN. // ET_EXEC ones need no further relocation on the addr(==dwfl_addr), whereas // ET_DYN ones do (addr += run-time mmap base address). We tell these apart // by the incoming section value (".absolute" vs. ".dynamic"). this->tok = q.base_probe->tok; // Expand target variables in the probe body if (!null_die(scope_die)) { dwarf_var_expanding_visitor v (q, scope_die, dwfl_addr); // XXX: user-space deref's! this->body = v.require (this->body); // If during target-variable-expanding the probe, we added a new block // of code, add it to the start of the probe. if (v.add_block) this->body = new block(v.add_block, this->body); // If when target-variable-expanding the probe, we added a new // probe, add it in a new file to the list of files to be processed. if (v.add_probe) { stapfile *f = new stapfile; f->probes.push_back(v.add_probe); q.sess.files.push_back(f); } } // else - null scope_die - $target variables will produce an error during translate phase // Reset the sole element of the "locations" vector as a // "reverse-engineered" form of the incoming (q.base_loc) probe // point. This allows a user to see what function / file / line // number any particular match of the wildcards. vector comps; if(q.has_process) comps.push_back (new probe_point::component(TOK_PROCESS, new literal_string(module))); else assert (0); string fn_or_stmt; if (q.has_function_str || q.has_function_num) fn_or_stmt = "function"; else fn_or_stmt = "statement"; if (q.has_function_str || q.has_statement_str) { string retro_name = function; if (filename != "") { retro_name += ("@" + string (filename)); if (line > 0) retro_name += (":" + lex_cast (line)); } comps.push_back (new probe_point::component (fn_or_stmt, new literal_string (retro_name))); } else if (q.has_function_num || q.has_statement_num) { Dwarf_Addr retro_addr; if (q.has_function_num) retro_addr = q.function_num_val; else retro_addr = q.statement_num_val; comps.push_back (new probe_point::component (fn_or_stmt, new literal_number(retro_addr))); // XXX: should be hex if possible if (q.has_absolute) comps.push_back (new probe_point::component (TOK_ABSOLUTE)); } if (q.has_call) comps.push_back (new probe_point::component(TOK_CALL)); if (q.has_inline) comps.push_back (new probe_point::component(TOK_INLINE)); if (return_p) comps.push_back (new probe_point::component(TOK_RETURN)); /* if (has_maxactive) comps.push_back (new probe_point::component (TOK_MAXACTIVE, new literal_number(maxactive_val))); */ // Overwrite it. this->sole_location()->components = comps; } uprobe_derived_probe::uprobe_derived_probe (probe *base, probe_point *location, int pid, Dwarf_Addr addr, bool has_return): derived_probe (base, location), // location is not rewritten here return_p (has_return), pid (pid), address (addr) { } void uprobe_derived_probe::printsig (ostream& o) const { // Same as dwarf_derived_probe. sole_location()->print (o); o << " /* pc=" << section << "+0x" << hex << address << dec << " */"; printsig_nested (o); } void uprobe_derived_probe::join_group (systemtap_session& s) { if (! s.uprobe_derived_probes) s.uprobe_derived_probes = new uprobe_derived_probe_group (); s.uprobe_derived_probes->enroll (this); task_finder_derived_probe_group::create_session_group (s); // Ask buildrun.cxx to build extra module if needed, and // signal staprun to load that module s.need_uprobes = true; } struct uprobe_builder: public derived_probe_builder { uprobe_builder() {} virtual void build(systemtap_session & sess, probe * base, probe_point * location, literal_map_t const & parameters, vector & finished_results) { int64_t process, address; bool b1 = get_param (parameters, TOK_PROCESS, process); (void) b1; bool b2 = get_param (parameters, TOK_STATEMENT, address); (void) b2; bool rr = has_null_param (parameters, TOK_RETURN); assert (b1 && b2); // by pattern_root construction finished_results.push_back(new uprobe_derived_probe(base, location, process, address, rr)); } }; void uprobe_derived_probe_group::emit_module_decls (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "/* ---- user probes ---- */"; // If uprobes isn't in the kernel, pull it in from the runtime. s.op->newline() << "#if defined(CONFIG_UPROBES) || defined(CONFIG_UPROBES_MODULE)"; s.op->newline() << "#include "; s.op->newline() << "#else"; s.op->newline() << "#include \"uprobes/uprobes.h\""; s.op->newline() << "#endif"; s.op->newline() << "#ifndef MULTIPLE_UPROBES"; s.op->newline() << "#define MULTIPLE_UPROBES 256"; // maximum possible armed uprobes per process() probe point // or apprx. max number of processes mapping a shared library s.op->newline() << "#endif"; s.op->newline() << "#ifndef MAXUPROBES"; s.op->newline() << "#define MAXUPROBES (MULTIPLE_UPROBES * " << probes.size() << ")"; s.op->newline() << "#endif"; // In .bss, the shared pool of uprobe/uretprobe structs. These are // too big to embed in the initialized .data stap_uprobe_spec array. s.op->newline() << "static struct stap_uprobe {"; s.op->newline(1) << "union { struct uprobe up; struct uretprobe urp; };"; s.op->newline() << "int spec_index;"; // index into stap_uprobe_specs; <0 == free && unregistered s.op->newline(-1) << "} stap_uprobes [MAXUPROBES];"; s.op->newline() << "DEFINE_MUTEX(stap_uprobes_lock);"; // protects against concurrent registration/unregistration // Emit vma callbacks. s.op->newline() << "#ifdef STP_NEED_VMA_TRACKER"; s.op->newline() << "static struct stap_task_finder_target stap_uprobe_vmcbs[] = {"; s.op->indent(1); for (unsigned i = 0; i < probes.size(); i++) { uprobe_derived_probe* p = probes[i]; if (p->pid != 0) emit_vma_callback_probe_decl (s, "", p->pid); else emit_vma_callback_probe_decl (s, p->module, (int64_t)0); } s.op->newline(-1) << "};"; s.op->newline() << "#endif"; s.op->newline() << "static struct stap_uprobe_spec {"; s.op->newline(1) << "struct stap_task_finder_target finder;"; s.op->newline() << "unsigned long address;"; s.op->newline() << "const char *pathname;"; s.op->newline() << "const char *pp;"; s.op->newline() << "void (*ph) (struct context*);"; s.op->newline() << "unsigned return_p:1;"; s.op->newline(-1) << "} stap_uprobe_specs [] = {"; s.op->indent(1); for (unsigned i =0; inewline() << "{"; s.op->line() << " .finder = {"; if (p->pid != 0) s.op->line() << " .pid=" << p->pid; else if (p->section == ".absolute") s.op->line() << " .pathname=" << lex_cast_qstring(p->module) << ", "; // else ".dynamic" gets pathname=0, pid=0, activating task_finder "global tracing" s.op->line() << "},"; if (p->section != ".absolute") s.op->line() << " .pathname=" << lex_cast_qstring(p->module) << ", "; s.op->line() << " .address=(unsigned long)0x" << hex << p->address << dec << "ULL,"; s.op->line() << " .pp=" << lex_cast_qstring (*p->sole_location()) << ","; s.op->line() << " .ph=&" << p->name << ","; if (p->return_p) s.op->line() << " .return_p=1,"; s.op->line() << " },"; } s.op->newline(-1) << "};"; s.op->newline() << "static void enter_uprobe_probe (struct uprobe *inst, struct pt_regs *regs) {"; s.op->newline(1) << "struct stap_uprobe *sup = container_of(inst, struct stap_uprobe, up);"; s.op->newline() << "struct stap_uprobe_spec *sups = &stap_uprobe_specs [sup->spec_index];"; common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "sups->pp"); s.op->newline() << "if (sup->spec_index < 0 ||" << "sup->spec_index >= " << probes.size() << ") return;"; // XXX: should not happen s.op->newline() << "c->regs = regs;"; s.op->newline() << "(*sups->ph) (c);"; common_probe_entryfn_epilogue (s.op); s.op->newline(-1) << "}"; s.op->newline() << "static void enter_uretprobe_probe (struct uretprobe_instance *inst, struct pt_regs *regs) {"; s.op->newline(1) << "struct stap_uprobe *sup = container_of(inst->rp, struct stap_uprobe, urp);"; s.op->newline() << "struct stap_uprobe_spec *sups = &stap_uprobe_specs [sup->spec_index];"; common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "sups->pp"); s.op->newline() << "if (sup->spec_index < 0 ||" << "sup->spec_index >= " << probes.size() << ") return;"; // XXX: should not happen // XXX: kretprobes saves "c->pi = inst;" too s.op->newline() << "c->regs = regs;"; s.op->newline() << "(*sups->ph) (c);"; common_probe_entryfn_epilogue (s.op); s.op->newline(-1) << "}"; // NB: Because these utrace callbacks only occur before / after // userspace instructions run, there is no concurrency control issue // between active uprobe callbacks and these registration / // unregistration pieces. // We protect the stap_uprobe->spec_index (which also serves as a // free/busy flag) value with the outer protective stap_probes_lock // spinlock, to protect it against concurrent registration / // unregistration. s.op->newline(); s.op->newline() << "static int stap_uprobe_change (struct task_struct *tsk, int register_p, unsigned long relocation, struct stap_uprobe_spec *sups) {"; s.op->newline(1) << "int spec_index = (sups - stap_uprobe_specs);"; s.op->newline() << "int handled_p = 0;"; s.op->newline() << "int slotted_p = 0;"; s.op->newline() << "int rc = 0;"; s.op->newline() << "int i;"; s.op->newline() << "mutex_lock (& stap_uprobes_lock);"; s.op->newline() << "for (i=0; inewline(1) << "struct stap_uprobe *sup = & stap_uprobes[i];"; // register new uprobe s.op->newline() << "if (register_p && sup->spec_index < 0) {"; // PR6829: we need to check that the sup we're about to reuse is really completely free. // See PR6829 notes below. s.op->newline(1) << "if (sup->spec_index == -1 && sup->up.kdata != NULL) continue;"; s.op->newline() << "else if (sup->spec_index == -2 && sup->urp.u.kdata != NULL) continue;"; s.op->newline() << "sup->spec_index = spec_index;"; s.op->newline() << "slotted_p = 1;"; s.op->newline() << "break;"; s.op->newline(-1) << "} else if (!register_p && " << "sup->spec_index == spec_index && " // a u[ret]probe set up for this probe point << "((sups->return_p && sup->urp.u.pid == tsk->tgid && sup->urp.u.vaddr == relocation + sups->address) ||" // dying uretprobe << "(!sups->return_p && sup->up.pid == tsk->tgid && sup->up.vaddr == relocation + sups->address))) {"; // dying uprobe s.op->newline(1) << "slotted_p = 1;"; s.op->newline() << "break;"; // exit to-free slot search s.op->newline(-1) << "}"; s.op->newline(-1) << "}"; s.op->newline() << "mutex_unlock (& stap_uprobes_lock);"; s.op->newline() << "#ifdef DEBUG_UPROBES"; s.op->newline() << "printk (KERN_INFO \"%cuprobe spec %d idx %d process %s[%d] reloc %p pp '%s'\\n\", "; s.op->line() << "(register_p ? '+' : '-'), spec_index, (slotted_p ? i : -1), tsk->comm, tsk->tgid, (void*) relocation, sups->pp);"; s.op->newline() << "#endif"; // Here, slotted_p implies that `i' points to the single // stap_uprobes[] element that has been slotted in for registration // or unregistration processing. !slotted_p implies that the table // was full (registration; MAXUPROBES) or that no matching entry was // found (unregistration; should not happen). s.op->newline() << "if (register_p && slotted_p) {"; s.op->newline(1) << "struct stap_uprobe *sup = & stap_uprobes[i];"; s.op->newline() << "if (sups->return_p) {"; s.op->newline(1) << "sup->urp.u.pid = tsk->tgid;"; s.op->newline() << "sup->urp.u.vaddr = relocation + sups->address;"; s.op->newline() << "sup->urp.handler = &enter_uretprobe_probe;"; s.op->newline() << "rc = register_uretprobe (& sup->urp);"; s.op->newline(-1) << "} else {"; s.op->newline(1) << "sup->up.pid = tsk->tgid;"; s.op->newline() << "sup->up.vaddr = relocation + sups->address;"; s.op->newline() << "sup->up.handler = &enter_uprobe_probe;"; s.op->newline() << "rc = register_uprobe (& sup->up);"; s.op->newline(-1) << "}"; s.op->newline() << "if (rc) {"; // failed to register s.op->newline(1) << "printk (KERN_WARNING \"uprobe failed %s[%d] '%s' addr %p rc %d\\n\", tsk->comm, tsk->tgid, sups->pp, (void*)(relocation + sups->address), rc);"; // NB: we need to release this slot, so we need to borrow the mutex temporarily. s.op->newline() << "mutex_lock (& stap_uprobes_lock);"; s.op->newline() << "sup->spec_index = -1;"; s.op->newline() << "mutex_unlock (& stap_uprobes_lock);"; s.op->newline(-1) << "} else {"; s.op->newline(1) << "handled_p = 1;"; // success s.op->newline(-1) << "}"; s.op->newline(-1) << "} else if (!register_p && slotted_p) {"; s.op->newline(1) << "struct stap_uprobe *sup = & stap_uprobes[i];"; // NB: we need to release this slot, so we need to borrow the mutex temporarily. s.op->newline() << "mutex_lock (& stap_uprobes_lock);"; // NB: We must not actually uregister u[ret]probes when a target process execs or exits; // uprobes does that by itself asynchronously. We can reuse the up/urp struct after // uprobes clears the sup->{up,urp}->kdata pointer. PR6829. To tell the two // cases apart, we use spec_index -2 vs -1. s.op->newline() << "sup->spec_index = (sups->return_p ? -2 : -1);"; s.op->newline() << "mutex_unlock (& stap_uprobes_lock);"; s.op->newline() << "handled_p = 1;"; s.op->newline(-1) << "}"; // if slotted_p // NB: handled_p implies slotted_p s.op->newline() << "if (! handled_p) {"; s.op->newline(1) << "#ifdef STP_TIMING"; s.op->newline() << "atomic_inc (register_p ? & skipped_count_uprobe_reg : & skipped_count_uprobe_unreg);"; s.op->newline() << "#endif"; // NB: duplicates common_entryfn_epilogue, but then this is not a probe entry fn epilogue. s.op->newline() << "if (unlikely (atomic_inc_return (& skipped_count) > MAXSKIPPED)) {"; s.op->newline(1) << "atomic_set (& session_state, STAP_SESSION_ERROR);"; s.op->newline() << "_stp_exit ();"; s.op->newline(-1) << "}"; s.op->newline(-1) << "}"; s.op->newline() << "return 0;"; // XXX: or rc? s.op->newline(-1) << "}"; s.op->assert_0_indent(); // The task_finder_callback we use for ET_EXEC targets. s.op->newline(); s.op->newline() << "static int stap_uprobe_process_found (struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) {"; s.op->newline(1) << "struct stap_uprobe_spec *sups = container_of(tgt, struct stap_uprobe_spec, finder);"; s.op->newline() << "if (! process_p) return 0;"; s.op->newline(0) << "return stap_uprobe_change (tsk, register_p, 0, sups);"; s.op->newline(-1) << "}"; // The task_finder_mmap_callback we use for ET_DYN targets. s.op->newline(); s.op->newline() << "static int stap_uprobe_mmap_found (struct stap_task_finder_target *tgt, struct task_struct *tsk, char *path, unsigned long addr, unsigned long length, unsigned long offset, unsigned long vm_flags) {"; s.op->newline(1) << "struct stap_uprobe_spec *sups = container_of(tgt, struct stap_uprobe_spec, finder);"; // 1 - shared libraries' executable segments load from offset 0 - ld.so convention s.op->newline() << "if (offset != 0) return 0;"; // 2 - the shared library we're interested in s.op->newline() << "if (path == NULL || strcmp (path, sups->pathname)) return 0;"; // 3 - probe address within the mapping limits; test should not fail s.op->newline() << "if (sups->address >= addr && sups->address < (addr + length)) return 0;"; // 4 - mapping should be executable s.op->newline() << "if (!(vm_flags & VM_EXEC)) return 0;"; s.op->newline() << "#ifdef DEBUG_TASK_FINDER_VMA"; s.op->newline() << "printk (KERN_INFO \"vmchange pid %d path %s addr %p length %lu offset %p\\n\", tsk->tgid, path, (void *) addr, length, (void*) offset);"; s.op->newline() << "printk (KERN_INFO \"sups %p pp %s path %s address %p\\n\", sups, sups->pp, sups->pathname ?: \"\", (void*) sups->address);"; s.op->newline() << "#endif"; s.op->newline(0) << "return stap_uprobe_change (tsk, 1, addr, sups);"; s.op->newline(-1) << "}"; s.op->assert_0_indent(); s.op->newline(); } void uprobe_derived_probe_group::emit_module_init (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "#ifdef STP_NEED_VMA_TRACKER"; s.op->newline() << "_stp_sym_init();"; s.op->newline() << "/* ---- uprobe vma callbacks ---- */"; s.op->newline() << "for (i=0; iindent(1); s.op->newline() << "struct stap_task_finder_target *r = &stap_uprobe_vmcbs[i];"; s.op->newline() << "rc = stap_register_task_finder_target(r);"; s.op->newline(-1) << "}"; s.op->newline() << "#endif"; s.op->newline() << "/* ---- user probes ---- */"; s.op->newline() << "for (j=0; jnewline(1) << "struct stap_uprobe *sup = & stap_uprobes[j];"; s.op->newline() << "sup->spec_index = -1;"; // free slot // NB: we assume the rest of the struct (specificaly, sup->up) is // initialized to zero. This is so that we can use // sup->up->kdata = NULL for "really free!" PR 6829. s.op->newline(-1) << "}"; s.op->newline() << "mutex_init (& stap_uprobes_lock);"; s.op->newline() << "for (i=0; i<" << probes.size() << "; i++) {"; s.op->newline(1) << "struct stap_uprobe_spec *sups = & stap_uprobe_specs[i];"; s.op->newline() << "probe_point = sups->pp;"; // for error messages s.op->newline() << "if (sups->finder.pathname) sups->finder.callback = & stap_uprobe_process_found;"; s.op->newline() << "else if (sups->pathname) sups->finder.mmap_callback = & stap_uprobe_mmap_found;"; s.op->newline() << "rc = stap_register_task_finder_target (& sups->finder);"; // NB: if (rc), there is no need (XXX: nor any way) to clean up any // finders already registered, since mere registration does not // cause any utrace or memory allocation actions. That happens only // later, once the task finder engine starts running. So, for a // partial initialization requiring unwind, we need do nothing. s.op->newline() << "if (rc) break;"; s.op->newline(-1) << "}"; } void uprobe_derived_probe_group::emit_module_exit (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "/* ---- user probes ---- */"; // NB: there is no stap_unregister_task_finder_target call; // important stuff like utrace cleanups are done by // __stp_task_finder_cleanup() via stap_stop_task_finder(). // // This function blocks until all callbacks are completed, so there // is supposed to be no possibility of any registration-related code starting // to run in parallel with our shutdown here. So we don't need to protect the // stap_uprobes[] array with the mutex. s.op->newline() << "for (j=0; jnewline(1) << "struct stap_uprobe *sup = & stap_uprobes[j];"; s.op->newline() << "struct stap_uprobe_spec *sups = &stap_uprobe_specs [sup->spec_index];"; s.op->newline() << "if (sup->spec_index < 0) continue;"; // free slot s.op->newline() << "if (sups->return_p) {"; s.op->newline(1) << "#ifdef DEBUG_UPROBES"; s.op->newline() << "printk (KERN_INFO \"-uretprobe spec %d index %d pid %d addr %p\\n\", sup->spec_index, j, sup->up.pid, (void*) sup->up.vaddr);"; s.op->newline() << "#endif"; // NB: PR6829 does not change that we still need to unregister at // *this* time -- when the script as a whole exits. s.op->newline() << "unregister_uretprobe (& sup->urp);"; s.op->newline(-1) << "} else {"; s.op->newline(1) << "#ifdef DEBUG_UPROBES"; s.op->newline() << "printk (KERN_INFO \"-uprobe spec %d index %d pid %d addr %p\\n\", sup->spec_index, j, sup->urp.u.pid, (void*) sup->urp.u.vaddr);"; s.op->newline() << "#endif"; s.op->newline() << "unregister_uprobe (& sup->up);"; s.op->newline(-1) << "}"; s.op->newline() << "sup->spec_index = -1;"; // XXX: uprobe missed counts? s.op->newline(-1) << "}"; s.op->newline() << "mutex_destroy (& stap_uprobes_lock);"; } // ------------------------------------------------------------------------ // timer derived probes // ------------------------------------------------------------------------ static string TOK_TIMER("timer"); struct timer_derived_probe: public derived_probe { int64_t interval, randomize; bool time_is_msecs; // NB: hrtimers get ms-based probes on modern kernels instead timer_derived_probe (probe* p, probe_point* l, int64_t i, int64_t r, bool ms=false); virtual void join_group (systemtap_session& s); }; struct timer_derived_probe_group: public generic_dpg { void emit_interval (translator_output* o); public: void emit_module_decls (systemtap_session& s); void emit_module_init (systemtap_session& s); void emit_module_exit (systemtap_session& s); }; timer_derived_probe::timer_derived_probe (probe* p, probe_point* l, int64_t i, int64_t r, bool ms): derived_probe (p, l), interval (i), randomize (r), time_is_msecs(ms) { if (interval <= 0 || interval > 1000000) // make i and r fit into plain ints throw semantic_error ("invalid interval for jiffies timer"); // randomize = 0 means no randomization if (randomize < 0 || randomize > interval) throw semantic_error ("invalid randomize for jiffies timer"); if (locations.size() != 1) throw semantic_error ("expect single probe point"); // so we don't have to loop over them in the other functions } void timer_derived_probe::join_group (systemtap_session& s) { if (! s.timer_derived_probes) s.timer_derived_probes = new timer_derived_probe_group (); s.timer_derived_probes->enroll (this); } void timer_derived_probe_group::emit_interval (translator_output* o) { o->line() << "({"; o->newline(1) << "unsigned i = stp->intrv;"; o->newline() << "if (stp->rnd != 0)"; o->newline(1) << "i += _stp_random_pm(stp->rnd);"; o->newline(-1) << "stp->ms ? msecs_to_jiffies(i) : i;"; o->newline(-1) << "})"; } void timer_derived_probe_group::emit_module_decls (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "/* ---- timer probes ---- */"; s.op->newline() << "static struct stap_timer_probe {"; s.op->newline(1) << "struct timer_list timer_list;"; s.op->newline() << "const char *pp;"; s.op->newline() << "void (*ph) (struct context*);"; s.op->newline() << "unsigned intrv, ms, rnd;"; s.op->newline(-1) << "} stap_timer_probes [" << probes.size() << "] = {"; s.op->indent(1); for (unsigned i=0; i < probes.size(); i++) { s.op->newline () << "{"; s.op->line() << " .pp=" << lex_cast_qstring (*probes[i]->sole_location()) << ","; s.op->line() << " .ph=&" << probes[i]->name << ","; s.op->line() << " .intrv=" << probes[i]->interval << ","; s.op->line() << " .ms=" << probes[i]->time_is_msecs << ","; s.op->line() << " .rnd=" << probes[i]->randomize; s.op->line() << " },"; } s.op->newline(-1) << "};"; s.op->newline(); s.op->newline() << "static void enter_timer_probe (unsigned long val) {"; s.op->newline(1) << "struct stap_timer_probe* stp = & stap_timer_probes [val];"; s.op->newline() << "if ((atomic_read (&session_state) == STAP_SESSION_STARTING) ||"; s.op->newline() << " (atomic_read (&session_state) == STAP_SESSION_RUNNING))"; s.op->newline(1) << "mod_timer (& stp->timer_list, jiffies + "; emit_interval (s.op); s.op->line() << ");"; s.op->newline(-1) << "{"; s.op->indent(1); common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "stp->pp"); s.op->newline() << "(*stp->ph) (c);"; common_probe_entryfn_epilogue (s.op); s.op->newline(-1) << "}"; s.op->newline(-1) << "}"; } void timer_derived_probe_group::emit_module_init (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "for (i=0; i<" << probes.size() << "; i++) {"; s.op->newline(1) << "struct stap_timer_probe* stp = & stap_timer_probes [i];"; s.op->newline() << "probe_point = stp->pp;"; s.op->newline() << "init_timer (& stp->timer_list);"; s.op->newline() << "stp->timer_list.function = & enter_timer_probe;"; s.op->newline() << "stp->timer_list.data = i;"; // NB: important! // copy timer renew calculations from above :-( s.op->newline() << "stp->timer_list.expires = jiffies + "; emit_interval (s.op); s.op->line() << ";"; s.op->newline() << "add_timer (& stp->timer_list);"; // note: no partial failure rollback is needed: add_timer cannot fail. s.op->newline(-1) << "}"; // for loop } void timer_derived_probe_group::emit_module_exit (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "for (i=0; i<" << probes.size() << "; i++)"; s.op->newline(1) << "del_timer_sync (& stap_timer_probes[i].timer_list);"; s.op->indent(-1); } // ------------------------------------------------------------------------ // profile derived probes // ------------------------------------------------------------------------ // On kernels < 2.6.10, this uses the register_profile_notifier API to // generate the timed events for profiling; on kernels >= 2.6.10 this // uses the register_timer_hook API. The latter doesn't currently allow // simultaneous users, so insertion will fail if the profiler is busy. // (Conflicting users may include OProfile, other SystemTap probes, etc.) struct profile_derived_probe: public derived_probe { profile_derived_probe (systemtap_session &s, probe* p, probe_point* l); void join_group (systemtap_session& s); }; struct profile_derived_probe_group: public generic_dpg { public: void emit_module_decls (systemtap_session& s); void emit_module_init (systemtap_session& s); void emit_module_exit (systemtap_session& s); }; profile_derived_probe::profile_derived_probe (systemtap_session &, probe* p, probe_point* l): derived_probe(p, l) { } void profile_derived_probe::join_group (systemtap_session& s) { if (! s.profile_derived_probes) s.profile_derived_probes = new profile_derived_probe_group (); s.profile_derived_probes->enroll (this); } struct profile_builder: public derived_probe_builder { profile_builder() {} virtual void build(systemtap_session & sess, probe * base, probe_point * location, literal_map_t const &, vector & finished_results) { sess.unwindsym_modules.insert ("kernel"); finished_results.push_back(new profile_derived_probe(sess, base, location)); } }; // timer.profile probe handlers are hooked up in an entertaining way // to the underlying kernel facility. The fact that 2.6.11+ era // "register_timer_hook" API allows only one consumer *system-wide* // will give a hint. We will have a single entry function (and thus // trivial registration / unregistration), and it will call all probe // handler functions in sequence. void profile_derived_probe_group::emit_module_decls (systemtap_session& s) { if (probes.empty()) return; // kernels < 2.6.10: use register_profile_notifier API // kernels >= 2.6.10: use register_timer_hook API s.op->newline() << "/* ---- profile probes ---- */"; // This function calls all the profiling probe handlers in sequence. // The only tricky thing is that the context will be reused amongst // them. While a simple sequence of calls to the individual probe // handlers is unlikely to go terribly wrong (with c->last_error // being set causing an early return), but for extra assurance, we // open-code the same logic here. s.op->newline() << "static void enter_all_profile_probes (struct pt_regs *regs) {"; s.op->indent(1); string pp = lex_cast_qstring("timer.profile"); // hard-coded for convenience common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", pp); s.op->newline() << "c->regs = regs;"; for (unsigned i=0; i 0) { // Some lightweight inter-probe context resetting // XXX: not quite right: MAXERRORS not respected s.op->newline() << "c->actionremaining = MAXACTION;"; } s.op->newline() << "if (c->last_error == NULL) " << probes[i]->name << " (c);"; } common_probe_entryfn_epilogue (s.op); s.op->newline(-1) << "}"; s.op->newline() << "#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)"; // == using_rpn of yore s.op->newline() << "static int enter_profile_probes (struct notifier_block *self," << " unsigned long val, void *data) {"; s.op->newline(1) << "(void) self; (void) val;"; s.op->newline() << "enter_all_profile_probes ((struct pt_regs *) data);"; s.op->newline() << "return 0;"; s.op->newline(-1) << "}"; s.op->newline() << "struct notifier_block stap_profile_notifier = {" << " .notifier_call = & enter_profile_probes };"; s.op->newline() << "#else"; s.op->newline() << "static int enter_profile_probes (struct pt_regs *regs) {"; s.op->newline(1) << "enter_all_profile_probes (regs);"; s.op->newline() << "return 0;"; s.op->newline(-1) << "}"; s.op->newline() << "#endif"; } void profile_derived_probe_group::emit_module_init (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "probe_point = \"timer.profile\";"; // NB: hard-coded for convenience s.op->newline() << "#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)"; // == using_rpn of yore s.op->newline() << "rc = register_profile_notifier (& stap_profile_notifier);"; s.op->newline() << "#else"; s.op->newline() << "rc = register_timer_hook (& enter_profile_probes);"; s.op->newline() << "#endif"; } void profile_derived_probe_group::emit_module_exit (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "for (i=0; i<" << probes.size() << "; i++)"; s.op->newline(1) << "#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)"; // == using_rpn of yore s.op->newline() << "unregister_profile_notifier (& stap_profile_notifier);"; s.op->newline() << "#else"; s.op->newline() << "unregister_timer_hook (& enter_profile_probes);"; s.op->newline() << "#endif"; s.op->indent(-1); } // ------------------------------------------------------------------------ // procfs file derived probes // ------------------------------------------------------------------------ static string TOK_PROCFS("procfs"); static string TOK_READ("read"); static string TOK_WRITE("write"); struct procfs_derived_probe: public derived_probe { string path; bool write; bool target_symbol_seen; procfs_derived_probe (systemtap_session &, probe* p, probe_point* l, string ps, bool w); void join_group (systemtap_session& s); }; struct procfs_probe_set { procfs_derived_probe* read_probe; procfs_derived_probe* write_probe; procfs_probe_set () : read_probe (NULL), write_probe (NULL) {} }; struct procfs_derived_probe_group: public generic_dpg { private: map probes_by_path; typedef map::iterator p_b_p_iterator; bool has_read_probes; bool has_write_probes; public: procfs_derived_probe_group () : has_read_probes(false), has_write_probes(false) {} void enroll (procfs_derived_probe* probe); void emit_module_decls (systemtap_session& s); void emit_module_init (systemtap_session& s); void emit_module_exit (systemtap_session& s); }; struct procfs_var_expanding_visitor: public var_expanding_visitor { procfs_var_expanding_visitor(systemtap_session& s, const string& pn, string path, bool write_probe): sess (s), probe_name (pn), path (path), write_probe (write_probe), target_symbol_seen (false) {} systemtap_session& sess; string probe_name; string path; bool write_probe; bool target_symbol_seen; void visit_target_symbol (target_symbol* e); }; procfs_derived_probe::procfs_derived_probe (systemtap_session &s, probe* p, probe_point* l, string ps, bool w): derived_probe(p, l), path(ps), write(w), target_symbol_seen(false) { // Expand local variables in the probe body procfs_var_expanding_visitor v (s, name, path, write); this->body = v.require (this->body); target_symbol_seen = v.target_symbol_seen; } void procfs_derived_probe::join_group (systemtap_session& s) { if (! s.procfs_derived_probes) s.procfs_derived_probes = new procfs_derived_probe_group (); s.procfs_derived_probes->enroll (this); } void procfs_derived_probe_group::enroll (procfs_derived_probe* p) { procfs_probe_set *pset; if (probes_by_path.count(p->path) == 0) { pset = new procfs_probe_set; probes_by_path[p->path] = pset; } else { pset = probes_by_path[p->path]; // You can only specify 1 read and 1 write probe. if (p->write && pset->write_probe != NULL) throw semantic_error("only one write procfs probe can exist for procfs path \"" + p->path + "\""); else if (! p->write && pset->read_probe != NULL) throw semantic_error("only one read procfs probe can exist for procfs path \"" + p->path + "\""); // XXX: multiple writes should be acceptable } if (p->write) { pset->write_probe = p; has_write_probes = true; } else { pset->read_probe = p; has_read_probes = true; } } void procfs_derived_probe_group::emit_module_decls (systemtap_session& s) { if (probes_by_path.empty()) return; s.op->newline() << "/* ---- procfs probes ---- */"; s.op->newline() << "#include \"procfs.c\""; // Emit the procfs probe data list s.op->newline() << "static struct stap_procfs_probe {"; s.op->newline(1)<< "const char *path;"; s.op->newline() << "const char *read_pp;"; s.op->newline() << "void (*read_ph) (struct context*);"; s.op->newline() << "const char *write_pp;"; s.op->newline() << "void (*write_ph) (struct context*);"; s.op->newline(-1) << "} stap_procfs_probes[] = {"; s.op->indent(1); for (p_b_p_iterator it = probes_by_path.begin(); it != probes_by_path.end(); it++) { procfs_probe_set *pset = it->second; s.op->newline() << "{"; s.op->line() << " .path=" << lex_cast_qstring (it->first) << ","; if (pset->read_probe != NULL) { s.op->line() << " .read_pp=" << lex_cast_qstring (*pset->read_probe->sole_location()) << ","; s.op->line() << " .read_ph=&" << pset->read_probe->name << ","; } else { s.op->line() << " .read_pp=NULL,"; s.op->line() << " .read_ph=NULL,"; } if (pset->write_probe != NULL) { s.op->line() << " .write_pp=" << lex_cast_qstring (*pset->write_probe->sole_location()) << ","; s.op->line() << " .write_ph=&" << pset->write_probe->name; } else { s.op->line() << " .write_pp=NULL,"; s.op->line() << " .write_ph=NULL"; } s.op->line() << " },"; } s.op->newline(-1) << "};"; if (has_read_probes) { // Output routine to fill in 'page' with our data. s.op->newline(); s.op->newline() << "static int _stp_procfs_read(char *page, char **start, off_t off, int count, int *eof, void *data) {"; s.op->newline(1) << "struct stap_procfs_probe *spp = (struct stap_procfs_probe *)data;"; s.op->newline() << "int bytes = 0;"; s.op->newline() << "string_t strdata = {'\\0'};"; common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "spp->read_pp"); s.op->newline() << "if (c->data == NULL)"; s.op->newline(1) << "c->data = &strdata;"; s.op->newline(-1) << "else {"; s.op->newline(1) << "if (unlikely (atomic_inc_return (& skipped_count) > MAXSKIPPED)) {"; s.op->newline(1) << "atomic_set (& session_state, STAP_SESSION_ERROR);"; s.op->newline() << "_stp_exit ();"; s.op->newline(-1) << "}"; s.op->newline() << "atomic_dec (& c->busy);"; s.op->newline() << "goto probe_epilogue;"; s.op->newline(-1) << "}"; // call probe function (which copies data into strdata) s.op->newline() << "(*spp->read_ph) (c);"; // copy string data into 'page' s.op->newline() << "c->data = NULL;"; s.op->newline() << "bytes = strnlen(strdata, MAXSTRINGLEN - 1);"; s.op->newline() << "if (off >= bytes)"; s.op->newline(1) << "*eof = 1;"; s.op->newline(-1) << "else {"; s.op->newline(1) << "bytes -= off;"; s.op->newline() << "if (bytes > count)"; s.op->newline(1) << "bytes = count;"; s.op->newline(-1) << "memcpy(page, strdata + off, bytes);"; s.op->newline() << "*start = page;"; s.op->newline(-1) << "}"; common_probe_entryfn_epilogue (s.op); s.op->newline() << "return bytes;"; s.op->newline(-1) << "}"; } if (has_write_probes) { s.op->newline() << "static int _stp_procfs_write(struct file *file, const char *buffer, unsigned long count, void *data) {"; s.op->newline(1) << "struct stap_procfs_probe *spp = (struct stap_procfs_probe *)data;"; s.op->newline() << "string_t strdata = {'\\0'};"; common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "spp->write_pp"); s.op->newline() << "if (count > (MAXSTRINGLEN - 1))"; s.op->newline(1) << "count = MAXSTRINGLEN - 1;"; s.op->newline(-1) << "_stp_copy_from_user(strdata, buffer, count);"; s.op->newline() << "if (c->data == NULL)"; s.op->newline(1) << "c->data = &strdata;"; s.op->newline(-1) << "else {"; s.op->newline(1) << "if (unlikely (atomic_inc_return (& skipped_count) > MAXSKIPPED)) {"; s.op->newline(1) << "atomic_set (& session_state, STAP_SESSION_ERROR);"; s.op->newline() << "_stp_exit ();"; s.op->newline(-1) << "}"; s.op->newline() << "atomic_dec (& c->busy);"; s.op->newline() << "goto probe_epilogue;"; s.op->newline(-1) << "}"; // call probe function (which copies data out of strdata) s.op->newline() << "(*spp->write_ph) (c);"; s.op->newline() << "c->data = NULL;"; common_probe_entryfn_epilogue (s.op); s.op->newline() << "return count;"; s.op->newline(-1) << "}"; } } void procfs_derived_probe_group::emit_module_init (systemtap_session& s) { if (probes_by_path.empty()) return; s.op->newline() << "for (i = 0; i < " << probes_by_path.size() << "; i++) {"; s.op->newline(1) << "struct stap_procfs_probe *spp = &stap_procfs_probes[i];"; s.op->newline() << "if (spp->read_pp)"; s.op->newline(1) << "probe_point = spp->read_pp;"; s.op->newline(-1) << "else"; s.op->newline(1) << "probe_point = spp->write_pp;"; s.op->newline(-1) << "rc = _stp_create_procfs(spp->path, i);"; s.op->newline() << "if (rc) {"; s.op->newline(1) << "_stp_close_procfs();"; s.op->newline() << "break;"; s.op->newline(-1) << "}"; if (has_read_probes) { s.op->newline() << "if (spp->read_pp)"; s.op->newline(1) << "_stp_procfs_files[i]->read_proc = &_stp_procfs_read;"; s.op->newline(-1) << "else"; s.op->newline(1) << "_stp_procfs_files[i]->read_proc = NULL;"; s.op->indent(-1); } else s.op->newline() << "_stp_procfs_files[i]->read_proc = NULL;"; if (has_write_probes) { s.op->newline() << "if (spp->write_pp)"; s.op->newline(1) << "_stp_procfs_files[i]->write_proc = &_stp_procfs_write;"; s.op->newline(-1) << "else"; s.op->newline(1) << "_stp_procfs_files[i]->write_proc = NULL;"; s.op->indent(-1); } else s.op->newline() << "_stp_procfs_files[i]->write_proc = NULL;"; s.op->newline() << "_stp_procfs_files[i]->data = spp;"; s.op->newline(-1) << "}"; // for loop } void procfs_derived_probe_group::emit_module_exit (systemtap_session& s) { if (probes_by_path.empty()) return; s.op->newline() << "_stp_close_procfs();"; } void procfs_var_expanding_visitor::visit_target_symbol (target_symbol* e) { assert(e->base_name.size() > 0 && e->base_name[0] == '$'); if (e->base_name != "$value") throw semantic_error ("invalid target symbol for procfs probe, $value expected", e->tok); if (e->components.size() > 0) { switch (e->components[0].first) { case target_symbol::comp_literal_array_index: throw semantic_error("procfs target variable '$value' may not be used as array", e->tok); break; case target_symbol::comp_struct_member: throw semantic_error("procfs target variable '$value' may not be used as a structure", e->tok); break; default: throw semantic_error ("invalid use of procfs target variable '$value'", e->tok); break; } } bool lvalue = is_active_lvalue(e); if (write_probe && lvalue) throw semantic_error("procfs $value variable is read-only in a procfs write probe", e->tok); else if (! write_probe && ! lvalue) throw semantic_error("procfs $value variable cannot be read in a procfs read probe", e->tok); // Remember that we've seen a target variable. target_symbol_seen = true; // Synthesize a function. functiondecl *fdecl = new functiondecl; fdecl->tok = e->tok; embeddedcode *ec = new embeddedcode; ec->tok = e->tok; string fname = (string(lvalue ? "_procfs_value_set" : "_procfs_value_get") + "_" + lex_cast(tick++)); string locvalue = "CONTEXT->data"; if (! lvalue) ec->code = string("strlcpy (THIS->__retvalue, ") + locvalue + string(", MAXSTRINGLEN); /* pure */"); else ec->code = string("strlcpy (") + locvalue + string(", THIS->value, MAXSTRINGLEN);"); fdecl->name = fname; fdecl->body = ec; fdecl->type = pe_string; if (lvalue) { // Modify the fdecl so it carries a single pe_string formal // argument called "value". vardecl *v = new vardecl; v->type = pe_string; v->name = "value"; v->tok = e->tok; fdecl->formal_args.push_back(v); } sess.functions[fdecl->name]=fdecl; // Synthesize a functioncall. functioncall* n = new functioncall; n->tok = e->tok; n->function = fname; n->referent = 0; // NB: must not resolve yet, to ensure inclusion in session if (lvalue) { // Provide the functioncall to our parent, so that it can be // used to substitute for the assignment node immediately above // us. assert(!target_symbol_setter_functioncalls.empty()); *(target_symbol_setter_functioncalls.top()) = n; } provide (n); } struct procfs_builder: public derived_probe_builder { procfs_builder() {} virtual void build(systemtap_session & sess, probe * base, probe_point * location, literal_map_t const & parameters, vector & finished_results); }; void procfs_builder::build(systemtap_session & sess, probe * base, probe_point * location, literal_map_t const & parameters, vector & finished_results) { string path; bool has_procfs = get_param(parameters, TOK_PROCFS, path); bool has_read = (parameters.find(TOK_READ) != parameters.end()); bool has_write = (parameters.find(TOK_WRITE) != parameters.end()); // If no procfs path, default to "command". The runtime will do // this for us, but if we don't do it here, we'll think the // following 2 probes are attached to different paths: // // probe procfs("command").read {}" // probe procfs.write {} if (! has_procfs) path = "command"; // If we have a path, we need to validate it. else { string::size_type start_pos, end_pos; string component; start_pos = 0; while ((end_pos = path.find('/', start_pos)) != string::npos) { // Make sure it doesn't start with '/'. if (end_pos == 0) throw semantic_error ("procfs path cannot start with a '/'", location->tok); component = path.substr(start_pos, end_pos - start_pos); // Make sure it isn't empty. if (component.size() == 0) throw semantic_error ("procfs path component cannot be empty", location->tok); // Make sure it isn't relative. else if (component == "." || component == "..") throw semantic_error ("procfs path cannot be relative (and contain '.' or '..')", location->tok); start_pos = end_pos + 1; } component = path.substr(start_pos); // Make sure it doesn't end with '/'. if (component.size() == 0) throw semantic_error ("procfs path cannot end with a '/'", location->tok); // Make sure it isn't relative. else if (component == "." || component == "..") throw semantic_error ("procfs path cannot be relative (and contain '.' or '..')", location->tok); } if (!(has_read ^ has_write)) throw semantic_error ("need read/write component", location->tok); finished_results.push_back(new procfs_derived_probe(sess, base, location, path, has_write)); } // ------------------------------------------------------------------------ // statically inserted macro-based derived probes // ------------------------------------------------------------------------ static string TOK_FORMAT("format"); struct mark_arg { bool str; string c_type; exp_type stp_type; }; struct mark_derived_probe: public derived_probe { mark_derived_probe (systemtap_session &s, const string& probe_name, const string& probe_format, probe* base_probe, probe_point* location); systemtap_session& sess; string probe_name, probe_format; vector mark_args; bool target_symbol_seen; void join_group (systemtap_session& s); void print_dupe_stamp (ostream& o); void emit_probe_context_vars (translator_output* o); void initialize_probe_context_vars (translator_output* o); void printargs (std::ostream &o) const; void parse_probe_format (); }; struct mark_derived_probe_group: public generic_dpg { public: void emit_module_decls (systemtap_session& s); void emit_module_init (systemtap_session& s); void emit_module_exit (systemtap_session& s); }; struct mark_var_expanding_visitor: public var_expanding_visitor { mark_var_expanding_visitor(systemtap_session& s, const string& pn, vector &mark_args): sess (s), probe_name (pn), mark_args (mark_args), target_symbol_seen (false) {} systemtap_session& sess; string probe_name; vector &mark_args; bool target_symbol_seen; void visit_target_symbol (target_symbol* e); void visit_target_symbol_arg (target_symbol* e); void visit_target_symbol_context (target_symbol* e); }; void hex_dump(unsigned char *data, size_t len) { // Dump data size_t idx = 0; while (idx < len) { string char_rep; clog << " 0x" << setfill('0') << setw(8) << hex << internal << idx; for (int i = 0; i < 4; i++) { clog << " "; size_t limit = idx + 4; while (idx < len && idx < limit) { clog << setfill('0') << setw(2) << ((unsigned) *((unsigned char *)data + idx)); if (isprint(*((char *)data + idx))) char_rep += *((char *)data + idx); else char_rep += '.'; idx++; } while (idx < limit) { clog << " "; idx++; } } clog << " " << char_rep << dec << setfill(' ') << endl; } } void mark_var_expanding_visitor::visit_target_symbol_arg (target_symbol* e) { string argnum_s = e->base_name.substr(4,e->base_name.length()-4); int argnum = atoi (argnum_s.c_str()); if (argnum < 1 || argnum > (int)mark_args.size()) throw semantic_error ("invalid marker argument number", e->tok); if (is_active_lvalue (e)) throw semantic_error("write to marker parameter not permitted", e->tok); if (e->components.size() > 0) { switch (e->components[0].first) { case target_symbol::comp_literal_array_index: throw semantic_error("marker argument may not be used as array", e->tok); break; case target_symbol::comp_struct_member: throw semantic_error("marker argument may not be used as a structure", e->tok); break; default: throw semantic_error ("invalid marker argument use", e->tok); break; } } // Remember that we've seen a target variable. target_symbol_seen = true; e->probe_context_var = "__mark_arg" + lex_cast(argnum); e->type = mark_args[argnum-1]->stp_type; provide (e); } void mark_var_expanding_visitor::visit_target_symbol_context (target_symbol* e) { string sname = e->base_name; if (is_active_lvalue (e)) throw semantic_error("write to marker '" + sname + "' not permitted", e->tok); if (e->components.size() > 0) { switch (e->components[0].first) { case target_symbol::comp_literal_array_index: throw semantic_error("marker '" + sname + "' may not be used as array", e->tok); break; case target_symbol::comp_struct_member: throw semantic_error("marker '" + sname + "' may not be used as a structure", e->tok); break; default: throw semantic_error ("invalid marker '" + sname + "' use", e->tok); break; } } string fname; if (e->base_name == "$format") { fname = string("_mark_format_get"); } else { fname = string("_mark_name_get"); } // Synthesize a functioncall. functioncall* n = new functioncall; n->tok = e->tok; n->function = fname; n->referent = 0; // NB: must not resolve yet, to ensure inclusion in session provide (n); } void mark_var_expanding_visitor::visit_target_symbol (target_symbol* e) { assert(e->base_name.size() > 0 && e->base_name[0] == '$'); if (e->base_name.substr(0,4) == "$arg") visit_target_symbol_arg (e); else if (e->base_name == "$format" || e->base_name == "$name") visit_target_symbol_context (e); else throw semantic_error ("invalid target symbol for marker, $argN, $name or $format expected", e->tok); } mark_derived_probe::mark_derived_probe (systemtap_session &s, const string& p_n, const string& p_f, probe* base, probe_point* loc): derived_probe (base, new probe_point(*loc) /* .components soon rewritten */), sess (s), probe_name (p_n), probe_format (p_f), target_symbol_seen (false) { // create synthetic probe point name; preserve condition vector comps; comps.push_back (new probe_point::component (TOK_KERNEL)); comps.push_back (new probe_point::component (TOK_MARK, new literal_string (probe_name))); comps.push_back (new probe_point::component (TOK_FORMAT, new literal_string (probe_format))); this->sole_location()->components = comps; // expand the marker format parse_probe_format(); // Now expand the local variables in the probe body mark_var_expanding_visitor v (sess, name, mark_args); this->body = v.require (this->body); target_symbol_seen = v.target_symbol_seen; if (sess.verbose > 2) clog << "marker-based " << name << " mark=" << probe_name << " fmt='" << probe_format << "'" << endl; } static int skip_atoi(const char **s) { int i = 0; while (isdigit(**s)) i = i * 10 + *((*s)++) - '0'; return i; } void mark_derived_probe::parse_probe_format() { const char *fmt = probe_format.c_str(); int qualifier; // 'h', 'l', or 'L' for integer fields mark_arg *arg; for (; *fmt ; ++fmt) { if (*fmt != '%') { /* Skip text */ continue; } repeat: ++fmt; // skip conversion flags (if present) switch (*fmt) { case '-': case '+': case ' ': case '#': case '0': goto repeat; } // skip minimum field witdh (if present) if (isdigit(*fmt)) skip_atoi(&fmt); // skip precision (if present) if (*fmt == '.') { ++fmt; if (isdigit(*fmt)) skip_atoi(&fmt); } // get the conversion qualifier (if present) qualifier = -1; if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') { qualifier = *fmt; ++fmt; if (qualifier == 'l' && *fmt == 'l') { qualifier = 'L'; ++fmt; } } // get the conversion type switch (*fmt) { case 'c': arg = new mark_arg; arg->str = false; arg->c_type = "int"; arg->stp_type = pe_long; mark_args.push_back(arg); continue; case 's': arg = new mark_arg; arg->str = true; arg->c_type = "char *"; arg->stp_type = pe_string; mark_args.push_back(arg); continue; case 'p': arg = new mark_arg; arg->str = false; // This should really be 'void *'. But, then we'll get a // compile error when we assign the void pointer to an // integer without a cast. So, we use 'long' instead, since // it should have the same size as 'void *'. arg->c_type = "long"; arg->stp_type = pe_long; mark_args.push_back(arg); continue; case '%': continue; case 'o': case 'X': case 'x': case 'd': case 'i': case 'u': // fall through... break; default: if (!*fmt) --fmt; continue; } arg = new mark_arg; arg->str = false; arg->stp_type = pe_long; switch (qualifier) { case 'L': arg->c_type = "long long"; break; case 'l': arg->c_type = "long"; break; case 'h': arg->c_type = "short"; break; default: arg->c_type = "int"; break; } mark_args.push_back(arg); } } void mark_derived_probe::join_group (systemtap_session& s) { if (! s.mark_derived_probes) { s.mark_derived_probes = new mark_derived_probe_group (); // Make sure is included early. embeddedcode *ec = new embeddedcode; ec->tok = NULL; ec->code = string("#if ! defined(CONFIG_MARKERS)\n") + string("#error \"Need CONFIG_MARKERS!\"\n") + string("#endif\n") + string("#include \n"); s.embeds.push_back(ec); } s.mark_derived_probes->enroll (this); } void mark_derived_probe::print_dupe_stamp (ostream& o) { if (target_symbol_seen) for (unsigned i = 0; i < mark_args.size(); i++) o << mark_args[i]->c_type << " __mark_arg" << (i+1) << endl; } void mark_derived_probe::emit_probe_context_vars (translator_output* o) { // If we haven't seen a target symbol for this probe, quit. if (! target_symbol_seen) return; for (unsigned i = 0; i < mark_args.size(); i++) { string localname = "__mark_arg" + lex_cast(i+1); switch (mark_args[i]->stp_type) { case pe_long: o->newline() << "int64_t " << localname << ";"; break; case pe_string: o->newline() << "string_t " << localname << ";"; break; default: throw semantic_error ("cannot expand unknown type"); break; } } } void mark_derived_probe::initialize_probe_context_vars (translator_output* o) { // If we haven't seen a target symbol for this probe, quit. if (! target_symbol_seen) return; bool deref_fault_needed = false; for (unsigned i = 0; i < mark_args.size(); i++) { string localname = "l->__mark_arg" + lex_cast(i+1); switch (mark_args[i]->stp_type) { case pe_long: o->newline() << localname << " = va_arg(*c->mark_va_list, " << mark_args[i]->c_type << ");"; break; case pe_string: // We're assuming that this is a kernel string (this code is // basically the guts of kernel_string), not a user string. o->newline() << "{ " << mark_args[i]->c_type << " tmp_str = va_arg(*c->mark_va_list, " << mark_args[i]->c_type << ");"; o->newline() << "deref_string (" << localname << ", tmp_str, MAXSTRINGLEN); }"; deref_fault_needed = true; break; default: throw semantic_error ("cannot expand unknown type"); break; } } if (deref_fault_needed) // Need to report errors? o->newline() << "deref_fault: ;"; } void mark_derived_probe::printargs(std::ostream &o) const { for (unsigned i = 0; i < mark_args.size(); i++) { string localname = "$arg" + lex_cast(i+1); switch (mark_args[i]->stp_type) { case pe_long: o << " " << localname << ":long"; break; case pe_string: o << " " << localname << ":string"; break; default: o << " " << localname << ":unknown"; break; } } } void mark_derived_probe_group::emit_module_decls (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "/* ---- marker probes ---- */"; s.op->newline() << "static struct stap_marker_probe {"; s.op->newline(1) << "const char * const name;"; s.op->newline() << "const char * const format;"; s.op->newline() << "const char * const pp;"; s.op->newline() << "void (* const ph) (struct context *);"; s.op->newline(-1) << "} stap_marker_probes [" << probes.size() << "] = {"; s.op->indent(1); for (unsigned i=0; i < probes.size(); i++) { s.op->newline () << "{"; s.op->line() << " .name=" << lex_cast_qstring(probes[i]->probe_name) << ","; s.op->line() << " .format=" << lex_cast_qstring(probes[i]->probe_format) << ","; s.op->line() << " .pp=" << lex_cast_qstring (*probes[i]->sole_location()) << ","; s.op->line() << " .ph=&" << probes[i]->name; s.op->line() << " },"; } s.op->newline(-1) << "};"; s.op->newline(); // Emit the marker callback function s.op->newline(); s.op->newline() << "static void enter_marker_probe (void *probe_data, void *call_data, const char *fmt, va_list *args) {"; s.op->newline(1) << "struct stap_marker_probe *smp = (struct stap_marker_probe *)probe_data;"; common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "smp->pp"); s.op->newline() << "c->marker_name = smp->name;"; s.op->newline() << "c->marker_format = smp->format;"; s.op->newline() << "c->mark_va_list = args;"; s.op->newline() << "(*smp->ph) (c);"; s.op->newline() << "c->mark_va_list = NULL;"; s.op->newline() << "c->data = NULL;"; common_probe_entryfn_epilogue (s.op); s.op->newline(-1) << "}"; return; } void mark_derived_probe_group::emit_module_init (systemtap_session &s) { if (probes.size () == 0) return; s.op->newline() << "/* init marker probes */"; s.op->newline() << "for (i=0; i<" << probes.size() << "; i++) {"; s.op->newline(1) << "struct stap_marker_probe *smp = &stap_marker_probes[i];"; s.op->newline() << "probe_point = smp->pp;"; s.op->newline() << "rc = marker_probe_register(smp->name, smp->format, enter_marker_probe, smp);"; s.op->newline() << "if (rc) {"; s.op->newline(1) << "for (j=i-1; j>=0; j--) {"; // partial rollback s.op->newline(1) << "struct stap_marker_probe *smp2 = &stap_marker_probes[j];"; s.op->newline() << "marker_probe_unregister(smp2->name, enter_marker_probe, smp2);"; s.op->newline(-1) << "}"; s.op->newline() << "break;"; // don't attempt to register any more probes s.op->newline(-1) << "}"; s.op->newline(-1) << "}"; // for loop } void mark_derived_probe_group::emit_module_exit (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "/* deregister marker probes */"; s.op->newline() << "for (i=0; i<" << probes.size() << "; i++) {"; s.op->newline(1) << "struct stap_marker_probe *smp = &stap_marker_probes[i];"; s.op->newline() << "marker_probe_unregister(smp->name, enter_marker_probe, smp);"; s.op->newline(-1) << "}"; // for loop } struct mark_builder: public derived_probe_builder { private: bool cache_initialized; typedef multimap mark_cache_t; typedef multimap::const_iterator mark_cache_const_iterator_t; typedef pair mark_cache_const_iterator_pair_t; mark_cache_t mark_cache; public: mark_builder(): cache_initialized(false) {} void build_no_more (systemtap_session &s) { if (! mark_cache.empty()) { if (s.verbose > 3) clog << "mark_builder releasing cache" << endl; mark_cache.clear(); } } void build(systemtap_session & sess, probe * base, probe_point * location, literal_map_t const & parameters, vector & finished_results); }; void mark_builder::build(systemtap_session & sess, probe * base, probe_point *loc, literal_map_t const & parameters, vector & finished_results) { string mark_str_val; bool has_mark_str = get_param (parameters, TOK_MARK, mark_str_val); string mark_format_val; bool has_mark_format = get_param (parameters, TOK_FORMAT, mark_format_val); assert (has_mark_str); (void) has_mark_str; if (! cache_initialized) { cache_initialized = true; string module_markers_path = sess.kernel_build_tree + "/Module.markers"; ifstream module_markers; module_markers.open(module_markers_path.c_str(), ifstream::in); if (! module_markers) { if (sess.verbose>3) clog << module_markers_path << " cannot be opened: " << strerror(errno) << endl; return; } string name, module, format; do { module_markers >> name >> module; getline(module_markers, format); // trim leading whitespace string::size_type notwhite = format.find_first_not_of(" \t"); format.erase(0, notwhite); // If the format is empty, make sure we add back a space // character, which is what MARK_NOARGS expands to. if (format.length() == 0) format = " "; if (sess.verbose>3) clog << "'" << name << "' '" << module << "' '" << format << "'" << endl; if (mark_cache.count(name) > 0) { // If we have 2 markers with the same we've got 2 cases: // different format strings or duplicate format strings. // If an existing marker in the cache doesn't have the // same format string, add this marker. mark_cache_const_iterator_pair_t ret; mark_cache_const_iterator_t it; bool matching_format_string = false; ret = mark_cache.equal_range(name); for (it = ret.first; it != ret.second; ++it) { if (format == it->second) { matching_format_string = true; break; } } if (! matching_format_string) mark_cache.insert(pair(name, format)); } else mark_cache.insert(pair(name, format)); } while (! module_markers.eof()); module_markers.close(); } // Search marker list for matching markers for (mark_cache_const_iterator_t it = mark_cache.begin(); it != mark_cache.end(); it++) { // Below, "rc" has negative polarity: zero iff matching. int rc = fnmatch(mark_str_val.c_str(), it->first.c_str(), 0); if (! rc) { bool add_result = true; // Match format strings (if the user specified one) if (has_mark_format && fnmatch(mark_format_val.c_str(), it->second.c_str(), 0)) add_result = false; if (add_result) { derived_probe *dp = new mark_derived_probe (sess, it->first, it->second, base, loc); finished_results.push_back (dp); } } } } // ------------------------------------------------------------------------ // statically inserted kernel-tracepoint derived probes // ------------------------------------------------------------------------ struct tracepoint_arg { string name, c_type, typecast; bool usable, used, isptr; Dwarf_Die type_die; tracepoint_arg(): usable(false), used(false), isptr(false) {} }; struct tracepoint_derived_probe: public derived_probe { tracepoint_derived_probe (systemtap_session& s, dwflpp& dw, Dwarf_Die& func_die, const string& tracepoint_name, probe* base_probe, probe_point* location); systemtap_session& sess; string tracepoint_name, header; vector args; void build_args(dwflpp& dw, Dwarf_Die& func_die); void printargs (std::ostream &o) const; void join_group (systemtap_session& s); void print_dupe_stamp(ostream& o); void emit_probe_context_vars (translator_output* o); }; struct tracepoint_derived_probe_group: public generic_dpg { void emit_module_decls (systemtap_session& s); void emit_module_init (systemtap_session& s); void emit_module_exit (systemtap_session& s); }; struct tracepoint_var_expanding_visitor: public var_expanding_visitor { tracepoint_var_expanding_visitor(dwflpp& dw, const string& probe_name, vector & args): dw (dw), probe_name (probe_name), args (args) {} dwflpp& dw; const string& probe_name; vector & args; void visit_target_symbol (target_symbol* e); void visit_target_symbol_arg (target_symbol* e); void visit_target_symbol_context (target_symbol* e); }; void tracepoint_var_expanding_visitor::visit_target_symbol_arg (target_symbol* e) { string argname = e->base_name.substr(1); // search for a tracepoint parameter matching this name tracepoint_arg *arg = NULL; for (unsigned i = 0; i < args.size(); ++i) if (args[i].usable && args[i].name == argname) { arg = &args[i]; arg->used = true; break; } if (arg == NULL) { stringstream alternatives; for (unsigned i = 0; i < args.size(); ++i) alternatives << " $" << args[i].name; alternatives << " $$name $$parms $$vars"; // We hope that this value ends up not being referenced after all, so it // can be optimized out quietly. semantic_error* saveme = new semantic_error("unable to find tracepoint variable '" + e->base_name + "' (alternatives:" + alternatives.str () + ")", e->tok); // NB: we can have multiple errors, since a target variable // may be expanded in several different contexts: // trace ("*") { $foo->bar } saveme->chain = e->saved_conversion_error; e->saved_conversion_error = saveme; provide (e); return; } // make sure we're not dereferencing base types if (!e->components.empty() && !arg->isptr) switch (e->components[0].first) { case target_symbol::comp_literal_array_index: throw semantic_error("tracepoint variable '" + e->base_name + "' may not be used as array", e->tok); case target_symbol::comp_struct_member: throw semantic_error("tracepoint variable '" + e->base_name + "' may not be used as a structure", e->tok); default: throw semantic_error("invalid use of tracepoint variable '" + e->base_name + "'", e->tok); } // we can only write to dereferenced fields, and only if guru mode is on bool lvalue = is_active_lvalue(e); if (lvalue && (!dw.sess.guru_mode || e->components.empty())) throw semantic_error("write to tracepoint variable '" + e->base_name + "' not permitted", e->tok); // XXX: if a struct/union arg is passed by value, then writing to its fields // is also meaningless until you dereference past a pointer member. It's // harder to detect and prevent that though... if (e->components.empty()) { // Just grab the value from the probe locals e->probe_context_var = "__tracepoint_arg_" + arg->name; e->type = pe_long; provide (e); } else { // Synthesize a function to dereference the dwarf fields, // with a pointer parameter that is the base tracepoint variable functiondecl *fdecl = new functiondecl; fdecl->tok = e->tok; embeddedcode *ec = new embeddedcode; ec->tok = e->tok; string fname = (string(lvalue ? "_tracepoint_tvar_set" : "_tracepoint_tvar_get") + "_" + e->base_name.substr(1) + "_" + lex_cast(tick++)); fdecl->name = fname; fdecl->body = ec; try { ec->code = dw.literal_stmt_for_pointer (&arg->type_die, e, lvalue, fdecl->type); } catch (const semantic_error& er) { // We suppress this error message, and pass the unresolved // variable to the next pass. We hope that this value ends // up not being referenced after all, so it can be optimized out // quietly. semantic_error* saveme = new semantic_error (er); // copy it saveme->tok1 = e->tok; // XXX: token not passed to dw code generation routines // NB: we can have multiple errors, since a target variable // may be expanded in several different contexts: // trace ("*") { $foo->bar } saveme->chain = e->saved_conversion_error; e->saved_conversion_error = saveme; provide (e); return; } // Give the fdecl an argument for the raw tracepoint value vardecl *v1 = new vardecl; v1->type = pe_long; v1->name = "pointer"; v1->tok = e->tok; fdecl->formal_args.push_back(v1); if (lvalue) { // Modify the fdecl so it carries a pe_long formal // argument called "value". // FIXME: For the time being we only support setting target // variables which have base types; these are 'pe_long' in // stap's type vocabulary. Strings and pointers might be // reasonable, some day, but not today. vardecl *v2 = new vardecl; v2->type = pe_long; v2->name = "value"; v2->tok = e->tok; fdecl->formal_args.push_back(v2); } else ec->code += "/* pure */"; dw.sess.functions[fdecl->name] = fdecl; // Synthesize a functioncall. functioncall* n = new functioncall; n->tok = e->tok; n->function = fname; n->referent = 0; // NB: must not resolve yet, to ensure inclusion in session // make the original a bare target symbol for the tracepoint value, // which will be passed into the dwarf dereferencing code e->components.clear(); n->args.push_back(require(e)); if (lvalue) { // Provide the functioncall to our parent, so that it can be // used to substitute for the assignment node immediately above // us. assert(!target_symbol_setter_functioncalls.empty()); *(target_symbol_setter_functioncalls.top()) = n; } provide (n); } } void tracepoint_var_expanding_visitor::visit_target_symbol_context (target_symbol* e) { if (is_active_lvalue (e)) throw semantic_error("write to tracepoint '" + e->base_name + "' not permitted", e->tok); if (!e->components.empty()) switch (e->components[0].first) { case target_symbol::comp_literal_array_index: throw semantic_error("tracepoint '" + e->base_name + "' may not be used as array", e->tok); case target_symbol::comp_struct_member: throw semantic_error("tracepoint '" + e->base_name + "' may not be used as a structure", e->tok); default: throw semantic_error("invalid tracepoint '" + e->base_name + "' use", e->tok); } if (e->base_name == "$$name") { // Synthesize a functioncall. functioncall* n = new functioncall; n->tok = e->tok; n->function = "_mark_name_get"; n->referent = 0; // NB: must not resolve yet, to ensure inclusion in session provide (n); } else if (e->base_name == "$$vars" || e->base_name == "$$parms") { print_format* pf = new print_format; // Convert $$vars to sprintf of a list of vars which we recursively evaluate // NB: we synthesize a new token here rather than reusing // e->tok, because print_format::print likes to use // its tok->content. token* pf_tok = new token(*e->tok); pf_tok->content = "sprintf"; pf->tok = pf_tok; pf->print_to_stream = false; pf->print_with_format = true; pf->print_with_delim = false; pf->print_with_newline = false; pf->print_char = false; for (unsigned i = 0; i < args.size(); ++i) { if (!args[i].usable) continue; if (i > 0) pf->raw_components += " "; pf->raw_components += args[i].name; target_symbol *tsym = new target_symbol; tsym->tok = e->tok; tsym->base_name = "$" + args[i].name; // every variable should always be accessible! tsym->saved_conversion_error = 0; expression *texp = require (tsym); // NB: throws nothing ... assert (!tsym->saved_conversion_error); // ... but this is how we know it happened. pf->raw_components += "=%#x"; pf->args.push_back(texp); } pf->components = print_format::string_to_components(pf->raw_components); provide (pf); } else assert(0); // shouldn't get here } void tracepoint_var_expanding_visitor::visit_target_symbol (target_symbol* e) { assert(e->base_name.size() > 0 && e->base_name[0] == '$'); if (e->base_name == "$$name" || e->base_name == "$$parms" || e->base_name == "$$vars") visit_target_symbol_context (e); else visit_target_symbol_arg (e); } tracepoint_derived_probe::tracepoint_derived_probe (systemtap_session& s, dwflpp& dw, Dwarf_Die& func_die, const string& tracepoint_name, probe* base, probe_point* loc): derived_probe (base, new probe_point(*loc) /* .components soon rewritten */), sess (s), tracepoint_name (tracepoint_name) { // create synthetic probe point name; preserve condition vector comps; comps.push_back (new probe_point::component (TOK_KERNEL)); comps.push_back (new probe_point::component (TOK_TRACE, new literal_string (tracepoint_name))); this->sole_location()->components = comps; // fill out the available arguments in this tracepoint build_args(dw, func_die); // determine which header defined this tracepoint string decl_file = dwarf_decl_file(&func_die); size_t header_pos = decl_file.rfind("trace/"); if (header_pos == string::npos) throw semantic_error ("cannot parse header location for tracepoint '" + tracepoint_name + "' in '" + decl_file + "'"); header = decl_file.substr(header_pos); // tracepoints from FOO_event_types.h should really be included from FOO.h // XXX can dwarf tell us the include hierarchy? it would be better to // ... walk up to see which one was directly included by tracequery.c // XXX: see also PR9993. header_pos = header.find("_event_types"); if (header_pos != string::npos) header.erase(header_pos, 12); // Now expand the local variables in the probe body tracepoint_var_expanding_visitor v (dw, name, args); this->body = v.require (this->body); if (sess.verbose > 2) clog << "tracepoint-based " << name << " tracepoint='" << tracepoint_name << "'" << endl; } static bool dwarf_type_name(Dwarf_Die& type_die, string& c_type) { // if we've gotten down to a basic type, then we're done bool done = true; switch (dwarf_tag(&type_die)) { case DW_TAG_structure_type: c_type.append("struct "); break; case DW_TAG_union_type: c_type.append("union "); break; case DW_TAG_typedef: case DW_TAG_base_type: break; default: done = false; break; } if (done) { c_type.append(dwarf_diename_integrate(&type_die)); return true; } // otherwise, this die is a type modifier. // recurse into the referent type // if it can't be named, just call it "void" Dwarf_Attribute subtype_attr; Dwarf_Die subtype_die; if (!dwarf_attr_integrate(&type_die, DW_AT_type, &subtype_attr) || !dwarf_formref_die(&subtype_attr, &subtype_die) || !dwarf_type_name(subtype_die, c_type)) c_type = "void"; const char *suffix = NULL; switch (dwarf_tag(&type_die)) { case DW_TAG_pointer_type: suffix = "*"; break; case DW_TAG_array_type: suffix = "[]"; break; case DW_TAG_const_type: suffix = " const"; break; case DW_TAG_volatile_type: suffix = " volatile"; break; default: return false; } c_type.append(suffix); // XXX HACK! The va_list isn't usable as found in the debuginfo... if (c_type == "struct __va_list_tag*") c_type = "va_list"; return true; } static bool resolve_tracepoint_arg_type(tracepoint_arg& arg) { Dwarf_Attribute type_attr; switch (dwarf_tag(&arg.type_die)) { case DW_TAG_typedef: case DW_TAG_const_type: case DW_TAG_volatile_type: // iterate on the referent type return (dwarf_attr_integrate(&arg.type_die, DW_AT_type, &type_attr) && dwarf_formref_die(&type_attr, &arg.type_die) && resolve_tracepoint_arg_type(arg)); case DW_TAG_base_type: // base types will simply be treated as script longs arg.isptr = false; return true; case DW_TAG_pointer_type: // pointers can be treated as script longs, // and if we know their type, they can also be dereferenced if (dwarf_attr_integrate(&arg.type_die, DW_AT_type, &type_attr) && dwarf_formref_die(&type_attr, &arg.type_die)) arg.isptr = true; arg.typecast = "(intptr_t)"; return true; case DW_TAG_structure_type: case DW_TAG_union_type: // for structs/unions which are passed by value, we turn it into // a pointer that can be dereferenced. arg.isptr = true; arg.typecast = "(intptr_t)&"; return true; default: // should we consider other types too? return false; } } void tracepoint_derived_probe::build_args(dwflpp& dw, Dwarf_Die& func_die) { Dwarf_Die arg; if (dwarf_child(&func_die, &arg) == 0) do if (dwarf_tag(&arg) == DW_TAG_formal_parameter) { // build a tracepoint_arg for this parameter tracepoint_arg tparg; tparg.name = dwarf_diename_integrate(&arg); // read the type of this parameter Dwarf_Attribute type_attr; if (!dwarf_attr_integrate (&arg, DW_AT_type, &type_attr) || !dwarf_formref_die (&type_attr, &tparg.type_die) || !dwarf_type_name(tparg.type_die, tparg.c_type)) throw semantic_error ("cannot get type of tracepoint '" + tracepoint_name + "' parameter '" + tparg.name + "'"); tparg.usable = resolve_tracepoint_arg_type(tparg); args.push_back(tparg); if (sess.verbose > 4) clog << "found parameter for tracepoint '" << tracepoint_name << "': type:'" << tparg.c_type << "' name:'" << tparg.name << "'" << endl; } while (dwarf_siblingof(&arg, &arg) == 0); } void tracepoint_derived_probe::printargs(std::ostream &o) const { for (unsigned i = 0; i < args.size(); ++i) if (args[i].usable) o << " $" << args[i].name << ":" << args[i].c_type; } void tracepoint_derived_probe::join_group (systemtap_session& s) { if (! s.tracepoint_derived_probes) s.tracepoint_derived_probes = new tracepoint_derived_probe_group (); s.tracepoint_derived_probes->enroll (this); } void tracepoint_derived_probe::print_dupe_stamp(ostream& o) { for (unsigned i = 0; i < args.size(); i++) if (args[i].used) o << "__tracepoint_arg_" << args[i].name << endl; } void tracepoint_derived_probe::emit_probe_context_vars (translator_output* o) { for (unsigned i = 0; i < args.size(); i++) if (args[i].used) o->newline() << "int64_t __tracepoint_arg_" << args[i].name << ";"; } static vector tracepoint_extra_headers () { vector they_live; // PR 9993 // XXX: may need this to be configurable they_live.push_back ("linux/skbuff.h"); return they_live; } void tracepoint_derived_probe_group::emit_module_decls (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "/* ---- tracepoint probes ---- */"; s.op->newline(); // PR9993: Add extra headers to work around undeclared types in individual // include/trace/foo.h files const vector& extra_headers = tracepoint_extra_headers (); for (unsigned z=0; znewline() << "#include <" << extra_headers[z] << ">\n"; for (unsigned i = 0; i < probes.size(); ++i) { tracepoint_derived_probe *p = probes[i]; // emit a separate entry function for each probe, since tracepoints // don't provide any sort of context pointer. s.op->newline() << "#include <" << p->header << ">"; s.op->newline() << "static void enter_tracepoint_probe_" << i << "("; if (p->args.size() == 0) s.op->line() << "void"; for (unsigned j = 0; j < p->args.size(); ++j) { if (j > 0) s.op->line() << ", "; s.op->line() << p->args[j].c_type << " __tracepoint_arg_" << p->args[j].name; } s.op->line() << ") {"; s.op->indent(1); common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", lex_cast_qstring (*p->sole_location())); s.op->newline() << "c->marker_name = " << lex_cast_qstring (p->tracepoint_name) << ";"; for (unsigned j = 0; j < p->args.size(); ++j) if (p->args[j].used) { s.op->newline() << "c->locals[0]." << p->name << ".__tracepoint_arg_" << p->args[j].name << " = (int64_t)"; s.op->line() << p->args[j].typecast; s.op->line() << "__tracepoint_arg_" << p->args[j].name << ";"; } s.op->newline() << p->name << " (c);"; common_probe_entryfn_epilogue (s.op); s.op->newline(-1) << "}"; // emit normalized registration functions s.op->newline() << "static int register_tracepoint_probe_" << i << "(void) {"; s.op->newline(1) << "return register_trace_" << p->tracepoint_name << "(enter_tracepoint_probe_" << i << ");"; s.op->newline(-1) << "}"; // NB: we're not prepared to deal with unreg failures. However, failures // can only occur if the tracepoint doesn't exist (yet?), or if we // weren't even registered. The former should be OKed by the initial // registration call, and the latter is safe to ignore. s.op->newline() << "static void unregister_tracepoint_probe_" << i << "(void) {"; s.op->newline(1) << "(void) unregister_trace_" << p->tracepoint_name << "(enter_tracepoint_probe_" << i << ");"; s.op->newline(-1) << "}"; s.op->newline(); } // emit an array of registration functions for easy init/shutdown s.op->newline() << "static struct stap_tracepoint_probe {"; s.op->newline(1) << "int (*reg)(void);"; s.op->newline(0) << "void (*unreg)(void);"; s.op->newline(-1) << "} stap_tracepoint_probes[] = {"; s.op->indent(1); for (unsigned i = 0; i < probes.size(); ++i) { s.op->newline () << "{"; s.op->line() << " .reg=®ister_tracepoint_probe_" << i << ","; s.op->line() << " .unreg=&unregister_tracepoint_probe_" << i; s.op->line() << " },"; } s.op->newline(-1) << "};"; s.op->newline(); } void tracepoint_derived_probe_group::emit_module_init (systemtap_session &s) { if (probes.size () == 0) return; s.op->newline() << "/* init tracepoint probes */"; s.op->newline() << "for (i=0; i<" << probes.size() << "; i++) {"; s.op->newline(1) << "rc = stap_tracepoint_probes[i].reg();"; s.op->newline() << "if (rc) {"; s.op->newline(1) << "for (j=i-1; j>=0; j--)"; // partial rollback s.op->newline(1) << "stap_tracepoint_probes[j].unreg();"; s.op->newline(-1) << "break;"; // don't attempt to register any more probes s.op->newline(-1) << "}"; s.op->newline(-1) << "}"; // This would be technically proper (on those autoconf-detectable // kernels that include this function in tracepoint.h), however we // already make several calls to synchronze_sched() during our // shutdown processes. // s.op->newline() << "if (rc)"; // s.op->newline(1) << "tracepoint_synchronize_unregister();"; // s.op->indent(-1); } void tracepoint_derived_probe_group::emit_module_exit (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "/* deregister tracepoint probes */"; s.op->newline() << "for (i=0; i<" << probes.size() << "; i++)"; s.op->newline(1) << "stap_tracepoint_probes[i].unreg();"; s.op->indent(-1); // Not necessary: see above. // s.op->newline() << "tracepoint_synchronize_unregister();"; } struct tracepoint_query : public base_query { tracepoint_query(dwflpp & dw, const string & tracepoint, probe * base_probe, probe_point * base_loc, vector & results): base_query(dw, "*"), tracepoint(tracepoint), base_probe(base_probe), base_loc(base_loc), results(results) {} const string& tracepoint; probe * base_probe; probe_point * base_loc; vector & results; void handle_query_module(); int handle_query_cu(Dwarf_Die * cudie); int handle_query_func(Dwarf_Die * func); static int tracepoint_query_cu (Dwarf_Die * cudie, void * arg); static int tracepoint_query_func (Dwarf_Die * func, base_query * query); }; void tracepoint_query::handle_query_module() { // look for the tracepoints in each CU dw.iterate_over_cus(tracepoint_query_cu, this); } int tracepoint_query::handle_query_cu(Dwarf_Die * cudie) { dw.focus_on_cu (cudie); // look at each function to see if it's a tracepoint string function = "stapprobe_" + tracepoint; return dw.iterate_over_functions (tracepoint_query_func, this, function); } int tracepoint_query::handle_query_func(Dwarf_Die * func) { dw.focus_on_function (func); assert(dw.function_name.compare(0, 10, "stapprobe_") == 0); string tracepoint_instance = dw.function_name.substr(10); derived_probe *dp = new tracepoint_derived_probe (dw.sess, dw, *func, tracepoint_instance, base_probe, base_loc); results.push_back (dp); return DWARF_CB_OK; } int tracepoint_query::tracepoint_query_cu (Dwarf_Die * cudie, void * arg) { tracepoint_query * q = static_cast(arg); if (pending_interrupts) return DWARF_CB_ABORT; return q->handle_query_cu(cudie); } int tracepoint_query::tracepoint_query_func (Dwarf_Die * func, base_query * query) { tracepoint_query * q = static_cast(query); if (pending_interrupts) return DWARF_CB_ABORT; return q->handle_query_func(func); } struct tracepoint_builder: public derived_probe_builder { private: dwflpp *dw; bool init_dw(systemtap_session& s); public: tracepoint_builder(): dw(0) {} ~tracepoint_builder() { delete dw; } void build_no_more (systemtap_session& s) { if (dw && s.verbose > 3) clog << "tracepoint_builder releasing dwflpp" << endl; delete dw; dw = NULL; } void build(systemtap_session& s, probe *base, probe_point *location, literal_map_t const& parameters, vector& finished_results); }; bool tracepoint_builder::init_dw(systemtap_session& s) { if (dw != NULL) return true; if (s.use_cache) { // see if the cached module exists find_tracequery_hash(s); if (!s.tracequery_path.empty()) { int fd = open(s.tracequery_path.c_str(), O_RDONLY); if (fd != -1) { if (s.verbose > 2) clog << "Pass 2: using cached " << s.tracequery_path << endl; dw = new dwflpp(s); dw->setup_user(s.tracequery_path); close(fd); return true; } } } // no cached module, time to make it string tracequery_ko; int rc = make_tracequery(s, tracequery_ko, tracepoint_extra_headers()); if (rc != 0) return false; if (s.use_cache) { // try to save tracequery in the cache if (s.verbose > 2) clog << "Copying " << tracequery_ko << " to " << s.tracequery_path << endl; if (copy_file(tracequery_ko.c_str(), s.tracequery_path.c_str()) != 0) cerr << "Copy failed (\"" << tracequery_ko << "\" to \"" << s.tracequery_path << "\"): " << strerror(errno) << endl; } dw = new dwflpp(s); dw->setup_user(tracequery_ko); return true; } void tracepoint_builder::build(systemtap_session& s, probe *base, probe_point *location, literal_map_t const& parameters, vector& finished_results) { if (!init_dw(s)) return; string tracepoint; assert(get_param (parameters, TOK_TRACE, tracepoint)); tracepoint_query q(*dw, tracepoint, base, location, finished_results); dw->query_modules(&q); } // ------------------------------------------------------------------------ // hrtimer derived probes // ------------------------------------------------------------------------ // This is a new timer interface that provides more flexibility in specifying // intervals, and uses the hrtimer APIs when available for greater precision. // While hrtimers were added in 2.6.16, the API's weren't exported until // 2.6.17, so we must check this kernel version before attempting to use // hrtimers. // // * hrtimer_derived_probe: creates a probe point based on the hrtimer APIs. struct hrtimer_derived_probe: public derived_probe { // set a (generous) maximum of one day in ns static const int64_t max_ns_interval = 1000000000LL * 60LL * 60LL * 24LL; // 100us seems like a reasonable minimum static const int64_t min_ns_interval = 100000LL; int64_t interval, randomize; hrtimer_derived_probe (probe* p, probe_point* l, int64_t i, int64_t r, int64_t scale): derived_probe (p, l), interval (i), randomize (r) { if ((i < min_ns_interval) || (i > max_ns_interval)) throw semantic_error(string("interval value out of range (") + lex_cast(scale < min_ns_interval ? min_ns_interval/scale : 1) + "," + lex_cast(max_ns_interval/scale) + ")"); // randomize = 0 means no randomization if ((r < 0) || (r > i)) throw semantic_error("randomization value out of range"); } void join_group (systemtap_session& s); }; struct hrtimer_derived_probe_group: public generic_dpg { void emit_interval (translator_output* o); public: void emit_module_decls (systemtap_session& s); void emit_module_init (systemtap_session& s); void emit_module_exit (systemtap_session& s); }; void hrtimer_derived_probe::join_group (systemtap_session& s) { if (! s.hrtimer_derived_probes) s.hrtimer_derived_probes = new hrtimer_derived_probe_group (); s.hrtimer_derived_probes->enroll (this); } void hrtimer_derived_probe_group::emit_interval (translator_output* o) { o->line() << "({"; o->newline(1) << "unsigned long nsecs;"; o->newline() << "int64_t i = stp->intrv;"; o->newline() << "if (stp->rnd != 0) {"; // XXX: why not use stp_random_pm instead of this? o->newline(1) << "int64_t r;"; o->newline() << "get_random_bytes(&r, sizeof(r));"; // ensure that r is positive o->newline() << "r &= ((uint64_t)1 << (8*sizeof(r) - 1)) - 1;"; o->newline() << "r = _stp_mod64(NULL, r, (2*stp->rnd+1));"; o->newline() << "r -= stp->rnd;"; o->newline() << "i += r;"; o->newline(-1) << "}"; o->newline() << "if (unlikely(i < stap_hrtimer_resolution))"; o->newline(1) << "i = stap_hrtimer_resolution;"; o->indent(-1); o->newline() << "nsecs = do_div(i, NSEC_PER_SEC);"; o->newline() << "ktime_set(i, nsecs);"; o->newline(-1) << "})"; } void hrtimer_derived_probe_group::emit_module_decls (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "/* ---- hrtimer probes ---- */"; s.op->newline() << "static unsigned long stap_hrtimer_resolution;"; // init later s.op->newline() << "static struct stap_hrtimer_probe {"; s.op->newline(1) << "struct hrtimer hrtimer;"; s.op->newline() << "const char *pp;"; s.op->newline() << "void (*ph) (struct context*);"; s.op->newline() << "int64_t intrv, rnd;"; s.op->newline(-1) << "} stap_hrtimer_probes [" << probes.size() << "] = {"; s.op->indent(1); for (unsigned i=0; i < probes.size(); i++) { s.op->newline () << "{"; s.op->line() << " .pp=" << lex_cast_qstring (*probes[i]->sole_location()) << ","; s.op->line() << " .ph=&" << probes[i]->name << ","; s.op->line() << " .intrv=" << probes[i]->interval << "LL,"; s.op->line() << " .rnd=" << probes[i]->randomize << "LL"; s.op->line() << " },"; } s.op->newline(-1) << "};"; s.op->newline(); // autoconf: add get/set expires if missing (pre 2.6.28-rc1) s.op->newline() << "#ifndef STAPCONF_HRTIMER_GETSET_EXPIRES"; s.op->newline() << "#define hrtimer_get_expires(timer) ((timer)->expires)"; s.op->newline() << "#define hrtimer_set_expires(timer, time) (void)((timer)->expires = (time))"; s.op->newline() << "#endif"; // autoconf: adapt to HRTIMER_REL -> HRTIMER_MODE_REL renaming near 2.6.21 s.op->newline() << "#ifdef STAPCONF_HRTIMER_REL"; s.op->newline() << "#define HRTIMER_MODE_REL HRTIMER_REL"; s.op->newline() << "#endif"; // The function signature changed in 2.6.21. s.op->newline() << "#ifdef STAPCONF_HRTIMER_REL"; s.op->newline() << "static int "; s.op->newline() << "#else"; s.op->newline() << "static enum hrtimer_restart "; s.op->newline() << "#endif"; s.op->newline() << "enter_hrtimer_probe (struct hrtimer *timer) {"; s.op->newline(1) << "int rc = HRTIMER_NORESTART;"; s.op->newline() << "struct stap_hrtimer_probe *stp = container_of(timer, struct stap_hrtimer_probe, hrtimer);"; s.op->newline() << "if ((atomic_read (&session_state) == STAP_SESSION_STARTING) ||"; s.op->newline() << " (atomic_read (&session_state) == STAP_SESSION_RUNNING)) {"; // Compute next trigger time s.op->newline(1) << "hrtimer_set_expires(timer, ktime_add (hrtimer_get_expires(timer),"; emit_interval (s.op); s.op->line() << "));"; s.op->newline() << "rc = HRTIMER_RESTART;"; s.op->newline(-1) << "}"; s.op->newline() << "{"; s.op->indent(1); common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "stp->pp"); s.op->newline() << "(*stp->ph) (c);"; common_probe_entryfn_epilogue (s.op); s.op->newline(-1) << "}"; s.op->newline() << "return rc;"; s.op->newline(-1) << "}"; } void hrtimer_derived_probe_group::emit_module_init (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "{"; s.op->newline(1) << "struct timespec res;"; s.op->newline() << "hrtimer_get_res (CLOCK_MONOTONIC, &res);"; s.op->newline() << "stap_hrtimer_resolution = timespec_to_ns (&res);"; s.op->newline(-1) << "}"; s.op->newline() << "for (i=0; i<" << probes.size() << "; i++) {"; s.op->newline(1) << "struct stap_hrtimer_probe* stp = & stap_hrtimer_probes [i];"; s.op->newline() << "probe_point = stp->pp;"; s.op->newline() << "hrtimer_init (& stp->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);"; s.op->newline() << "stp->hrtimer.function = & enter_hrtimer_probe;"; // There is no hrtimer field to identify *this* (i-th) probe handler // callback. So instead we'll deduce it at entry time. s.op->newline() << "(void) hrtimer_start (& stp->hrtimer, "; emit_interval (s.op); s.op->line() << ", HRTIMER_MODE_REL);"; // Note: no partial failure rollback is needed: hrtimer_start only // "fails" if the timer was already active, which cannot be. s.op->newline(-1) << "}"; // for loop } void hrtimer_derived_probe_group::emit_module_exit (systemtap_session& s) { if (probes.empty()) return; s.op->newline() << "for (i=0; i<" << probes.size() << "; i++)"; s.op->newline(1) << "hrtimer_cancel (& stap_hrtimer_probes[i].hrtimer);"; s.op->indent(-1); } struct timer_builder: public derived_probe_builder { virtual void build(systemtap_session & sess, probe * base, probe_point * location, literal_map_t const & parameters, vector & finished_results); static void register_patterns(systemtap_session& s); }; void timer_builder::build(systemtap_session & sess, probe * base, probe_point * location, literal_map_t const & parameters, vector & finished_results) { int64_t scale=1, period, rand=0; if (!get_param(parameters, "randomize", rand)) rand = 0; if (get_param(parameters, "jiffies", period)) { // always use basic timers for jiffies finished_results.push_back( new timer_derived_probe(base, location, period, rand, false)); return; } else if (get_param(parameters, "hz", period)) { if (period <= 0) throw semantic_error ("frequency must be greater than 0"); period = (1000000000 + period - 1)/period; } else if (get_param(parameters, "s", period) || get_param(parameters, "sec", period)) { scale = 1000000000; period *= scale; rand *= scale; } else if (get_param(parameters, "ms", period) || get_param(parameters, "msec", period)) { scale = 1000000; period *= scale; rand *= scale; } else if (get_param(parameters, "us", period) || get_param(parameters, "usec", period)) { scale = 1000; period *= scale; rand *= scale; } else if (get_param(parameters, "ns", period) || get_param(parameters, "nsec", period)) { // ok } else throw semantic_error ("unrecognized timer variant"); // Redirect wallclock-time based probes to hrtimer code on recent // enough kernels. if (strverscmp(sess.kernel_base_release.c_str(), "2.6.17") < 0) { // hrtimers didn't exist, so use the old-school timers period = (period + 1000000 - 1)/1000000; rand = (rand + 1000000 - 1)/1000000; finished_results.push_back( new timer_derived_probe(base, location, period, rand, true)); } else finished_results.push_back( new hrtimer_derived_probe(base, location, period, rand, scale)); } void timer_builder::register_patterns(systemtap_session& s) { match_node* root = s.pattern_root; derived_probe_builder *builder = new timer_builder(); root = root->bind(TOK_TIMER); root->bind_num("s")->bind(builder); root->bind_num("s")->bind_num("randomize")->bind(builder); root->bind_num("sec")->bind(builder); root->bind_num("sec")->bind_num("randomize")->bind(builder); root->bind_num("ms")->bind(builder); root->bind_num("ms")->bind_num("randomize")->bind(builder); root->bind_num("msec")->bind(builder); root->bind_num("msec")->bind_num("randomize")->bind(builder); root->bind_num("us")->bind(builder); root->bind_num("us")->bind_num("randomize")->bind(builder); root->bind_num("usec")->bind(builder); root->bind_num("usec")->bind_num("randomize")->bind(builder); root->bind_num("ns")->bind(builder); root->bind_num("ns")->bind_num("randomize")->bind(builder); root->bind_num("nsec")->bind(builder); root->bind_num("nsec")->bind_num("randomize")->bind(builder); root->bind_num("jiffies")->bind(builder); root->bind_num("jiffies")->bind_num("randomize")->bind(builder); root->bind_num("hz")->bind(builder); } // ------------------------------------------------------------------------ // perfmon derived probes // ------------------------------------------------------------------------ // This is a new interface to the perfmon hw. // struct perfmon_var_expanding_visitor: public var_expanding_visitor { systemtap_session & sess; unsigned counter_number; perfmon_var_expanding_visitor(systemtap_session & s, unsigned c): sess(s), counter_number(c) {} void visit_target_symbol (target_symbol* e); }; void perfmon_var_expanding_visitor::visit_target_symbol (target_symbol *e) { assert(e->base_name.size() > 0 && e->base_name[0] == '$'); // Synthesize a function. functiondecl *fdecl = new functiondecl; fdecl->tok = e->tok; embeddedcode *ec = new embeddedcode; ec->tok = e->tok; bool lvalue = is_active_lvalue(e); if (lvalue ) throw semantic_error("writes to $counter not permitted"); string fname = string("_perfmon_tvar_get") + "_" + e->base_name.substr(1) + "_" + lex_cast(counter_number); if (e->base_name != "$counter") throw semantic_error ("target variables not available to perfmon probes"); if (e->components.size() > 0) { switch (e->components[0].first) { case target_symbol::comp_literal_array_index: throw semantic_error("perfmon probe '$counter' variable may not be used as array", e->tok); break; case target_symbol::comp_struct_member: throw semantic_error("perfmon probe '$counter' variable may not be used as a structure", e->tok); break; default: throw semantic_error ("invalid use of perfmon probe '$counter' variable", e->tok); break; } } ec->code = "THIS->__retvalue = _pfm_pmd_x[" + lex_cast(counter_number) + "].reg_num;"; ec->code += "/* pure */"; fdecl->name = fname; fdecl->body = ec; fdecl->type = pe_long; sess.functions[fdecl->name]=fdecl; // Synthesize a functioncall. functioncall* n = new functioncall; n->tok = e->tok; n->function = fname; n->referent = 0; // NB: must not resolve yet, to ensure inclusion in session provide (n); } enum perfmon_mode { perfmon_count, perfmon_sample }; struct perfmon_derived_probe: public derived_probe { protected: static unsigned probes_allocated; public: systemtap_session & sess; string event; perfmon_mode mode; perfmon_derived_probe (probe* p, probe_point* l, systemtap_session &s, string e, perfmon_mode m); virtual void join_group (systemtap_session& s); }; struct perfmon_derived_probe_group: public generic_dpg { public: void emit_module_decls (systemtap_session&) {} void emit_module_init (systemtap_session&) {} void emit_module_exit (systemtap_session&) {} }; struct perfmon_builder: public derived_probe_builder { perfmon_builder() {} virtual void build(systemtap_session & sess, probe * base, probe_point * location, literal_map_t const & parameters, vector & finished_results) { string event; if (!get_param (parameters, "counter", event)) throw semantic_error("perfmon requires an event"); sess.perfmon++; // XXX: need to revise when doing sampling finished_results.push_back(new perfmon_derived_probe(base, location, sess, event, perfmon_count)); } }; unsigned perfmon_derived_probe::probes_allocated; perfmon_derived_probe::perfmon_derived_probe (probe* p, probe_point* l, systemtap_session &s, string e, perfmon_mode m) : derived_probe (p, l), sess(s), event(e), mode(m) { ++probes_allocated; // Now expand the local variables in the probe body perfmon_var_expanding_visitor v (sess, probes_allocated-1); this->body = v.require (this->body); if (sess.verbose > 1) clog << "perfmon-based probe" << endl; } void perfmon_derived_probe::join_group (systemtap_session& s) { throw semantic_error ("incomplete", this->tok); if (! s.perfmon_derived_probes) s.perfmon_derived_probes = new perfmon_derived_probe_group (); s.perfmon_derived_probes->enroll (this); } #if 0 void perfmon_derived_probe::emit_registrations_start (translator_output* o, unsigned index) { for (unsigned i=0; inewline() << "enter_" << name << "_" << i << " ();"; } void perfmon_derived_probe::emit_registrations_end (translator_output * o, unsigned index) { } void perfmon_derived_probe::emit_deregistrations (translator_output * o) { } void perfmon_derived_probe::emit_probe_entries (translator_output * o) { o->newline() << "#ifdef STP_TIMING"; // NB: This variable may be multiply (but identically) defined. o->newline() << "static __cacheline_aligned Stat " << "time_" << basest()->name << ";"; o->newline() << "#endif"; for (unsigned i=0; inewline() << "/* location " << i << ": " << *l << " */"; o->newline() << "static void enter_" << name << "_" << i << " (void) {"; o->indent(1); o->newline() << "const char* probe_point = " << lex_cast_qstring(*l) << ";"; emit_probe_prologue (o, (mode == perfmon_count ? "STAP_SESSION_STARTING" : "STAP_SESSION_RUNNING"), "probe_point"); // NB: locals are initialized by probe function itself o->newline() << name << " (c);"; emit_probe_epilogue (o); o->newline(-1) << "}\n"; } } #endif #if 0 void no_pfm_event_error (string s) { string msg(string("Cannot find event:" + s)); throw semantic_error(msg); } void no_pfm_mask_error (string s) { string msg(string("Cannot find mask:" + s)); throw semantic_error(msg); } void split(const string& s, vector& v, const string & separator) { string::size_type last_pos = s.find_first_not_of(separator, 0); string::size_type pos = s.find_first_of(separator, last_pos); while (string::npos != pos || string::npos != last_pos) { v.push_back(s.substr(last_pos, pos - last_pos)); last_pos = s.find_first_not_of(separator, pos); pos = s.find_first_of(separator, last_pos); } } void perfmon_derived_probe_group::emit_probes (translator_output* op, unparser* up) { for (unsigned i=0; i < probes.size(); i++) { op->newline (); up->emit_probe (probes[i]); } } void perfmon_derived_probe_group::emit_module_init (translator_output* o) { int ret; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_pmd_t pd[PFMLIB_MAX_PMDS]; pfarg_pmc_t pc[PFMLIB_MAX_PMCS]; pfarg_ctx_t ctx; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; unsigned int max_counters; if ( probes.size() == 0) return; ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) throw semantic_error("Unable to generate performance monitoring events (no libpfm)"); pfm_get_num_counters(&max_counters); memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); memset(&ctx, 0, sizeof(ctx)); memset(&load_args, 0, sizeof(load_args)); /* * prepare parameters to library. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); /* figure out the events */ for (unsigned i=0; ievent == "cycles") { if (pfm_get_cycle_event( &inp.pfp_events[i].event) != PFMLIB_SUCCESS) no_pfm_event_error(probes[i]->event); } else if (probes[i]->event == "instructions") { if (pfm_get_inst_retired_event( &inp.pfp_events[i].event) != PFMLIB_SUCCESS) no_pfm_event_error(probes[i]->event); } else { unsigned int event_id = 0; unsigned int mask_id = 0; vector event_spec; split(probes[i]->event, event_spec, ":"); int num = event_spec.size(); int masks = num - 1; if (num == 0) throw semantic_error("No events found"); /* setup event */ if (pfm_find_event(event_spec[0].c_str(), &event_id) != PFMLIB_SUCCESS) no_pfm_event_error(event_spec[0]); inp.pfp_events[i].event = event_id; /* set up masks */ if (masks > PFMLIB_MAX_MASKS_PER_EVENT) throw semantic_error("Too many unit masks specified"); for (int j=0; j < masks; j++) { if (pfm_find_event_mask(event_id, event_spec[j+1].c_str(), &mask_id) != PFMLIB_SUCCESS) no_pfm_mask_error(string(event_spec[j+1])); inp.pfp_events[i].unit_masks[j] = mask_id; } inp.pfp_events[i].num_masks = masks; } } /* number of counters in use */ inp.pfp_event_count = probes.size(); // XXX: no elimination of duplicated counters if (inp.pfp_event_count>max_counters) throw semantic_error("Too many performance monitoring events."); /* count events both in kernel and user-space */ inp.pfp_dfl_plm = PFM_PLM0 | PFM_PLM3; /* XXX: some cases a perfmon register might be used of watch dog this code doesn't handle that case */ /* figure out the pmcs for the events */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) throw semantic_error("Cannot configure events"); for (unsigned i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * There could be more pmc settings than pmd. * Figure out the actual pmds to use. */ for (unsigned i=0, j=0; i < inp.pfp_event_count; i++) { pd[i].reg_num = outp.pfp_pmcs[j].reg_pmd_num; for(; j < outp.pfp_pmc_count; j++) if (outp.pfp_pmcs[j].reg_evt_idx != i) break; } // Output the be probes create function o->newline() << "static int register_perfmon_probes (void) {"; o->newline(1) << "int rc = 0;"; o->newline() << "/* data for perfmon */"; o->newline() << "static int _pfm_num_pmc = " << outp.pfp_pmc_count << ";"; o->newline() << "static struct pfarg_pmc _pfm_pmc[" << outp.pfp_pmc_count << "] = {"; /* output the needed bits for pmc here */ for (unsigned i=0; i < outp.pfp_pmc_count; i++) { o->newline() << "{.reg_num=" << pc[i].reg_num << ", " << ".reg_value=" << lex_cast_hex(pc[i].reg_value) << "},"; } o->newline() << "};"; o->newline() << "static int _pfm_num_pmd = " << inp.pfp_event_count << ";"; o->newline() << "static struct pfarg_pmd _pfm_pmd[" << inp.pfp_event_count << "] = {"; /* output the needed bits for pmd here */ for (unsigned i=0; i < inp.pfp_event_count; i++) { o->newline() << "{.reg_num=" << pd[i].reg_num << ", " << ".reg_value=" << pd[i].reg_value << "},"; } o->newline() << "};"; o->newline(); o->newline() << "_pfm_pmc_x=_pfm_pmc;"; o->newline() << "_pfm_num_pmc_x=_pfm_num_pmc;"; o->newline() << "_pfm_pmd_x=_pfm_pmd;"; o->newline() << "_pfm_num_pmd_x=_pfm_num_pmd;"; // call all the function bodies associated with perfcounters for (unsigned i=0; i < probes.size (); i++) probes[i]->emit_registrations_start (o,i); /* generate call to turn on instrumentation */ o->newline() << "_pfm_context.ctx_flags |= PFM_FL_SYSTEM_WIDE;"; o->newline() << "rc = rc || _stp_perfmon_setup(&_pfm_desc, &_pfm_context,"; o->newline(1) << "_pfm_pmc, _pfm_num_pmc,"; o->newline() << "_pfm_pmd, _pfm_num_pmd);"; o->newline(-1); o->newline() << "return rc;"; o->newline(-1) << "}\n"; // Output the be probes destroy function o->newline() << "static void unregister_perfmon_probes (void) {"; o->newline(1) << "_stp_perfmon_shutdown(_pfm_desc);"; o->newline(-1) << "}\n"; } #endif // ------------------------------------------------------------------------ // Standard tapset registry. // ------------------------------------------------------------------------ void register_standard_tapsets(systemtap_session & s) { s.pattern_root->bind(TOK_BEGIN)->bind(new be_builder(BEGIN)); s.pattern_root->bind_num(TOK_BEGIN)->bind(new be_builder(BEGIN)); s.pattern_root->bind(TOK_END)->bind(new be_builder(END)); s.pattern_root->bind_num(TOK_END)->bind(new be_builder(END)); s.pattern_root->bind(TOK_ERROR)->bind(new be_builder(ERROR)); s.pattern_root->bind_num(TOK_ERROR)->bind(new be_builder(ERROR)); s.pattern_root->bind(TOK_NEVER)->bind(new never_builder()); timer_builder::register_patterns(s); s.pattern_root->bind(TOK_TIMER)->bind("profile")->bind(new profile_builder()); s.pattern_root->bind("perfmon")->bind_str("counter") ->bind(new perfmon_builder()); // dwarf-based kprobe/uprobe parts dwarf_derived_probe::register_patterns(s); // XXX: user-space starter set s.pattern_root->bind_num(TOK_PROCESS) ->bind_num(TOK_STATEMENT)->bind(TOK_ABSOLUTE) ->bind(new uprobe_builder ()); s.pattern_root->bind_num(TOK_PROCESS) ->bind_num(TOK_STATEMENT)->bind(TOK_ABSOLUTE)->bind(TOK_RETURN) ->bind(new uprobe_builder ()); // utrace user-space probes s.pattern_root->bind_str(TOK_PROCESS)->bind(TOK_BEGIN) ->bind(new utrace_builder ()); s.pattern_root->bind_num(TOK_PROCESS)->bind(TOK_BEGIN) ->bind(new utrace_builder ()); s.pattern_root->bind(TOK_PROCESS)->bind(TOK_BEGIN) ->bind(new utrace_builder ()); s.pattern_root->bind_str(TOK_PROCESS)->bind(TOK_END) ->bind(new utrace_builder ()); s.pattern_root->bind_num(TOK_PROCESS)->bind(TOK_END) ->bind(new utrace_builder ()); s.pattern_root->bind(TOK_PROCESS)->bind(TOK_END) ->bind(new utrace_builder ()); s.pattern_root->bind_str(TOK_PROCESS)->bind(TOK_THREAD)->bind(TOK_BEGIN) ->bind(new utrace_builder ()); s.pattern_root->bind_num(TOK_PROCESS)->bind(TOK_THREAD)->bind(TOK_BEGIN) ->bind(new utrace_builder ()); s.pattern_root->bind(TOK_PROCESS)->bind(TOK_THREAD)->bind(TOK_BEGIN) ->bind(new utrace_builder ()); s.pattern_root->bind_str(TOK_PROCESS)->bind(TOK_THREAD)->bind(TOK_END) ->bind(new utrace_builder ()); s.pattern_root->bind_num(TOK_PROCESS)->bind(TOK_THREAD)->bind(TOK_END) ->bind(new utrace_builder ()); s.pattern_root->bind(TOK_PROCESS)->bind(TOK_THREAD)->bind(TOK_END) ->bind(new utrace_builder ()); s.pattern_root->bind_str(TOK_PROCESS)->bind(TOK_SYSCALL) ->bind(new utrace_builder ()); s.pattern_root->bind_num(TOK_PROCESS)->bind(TOK_SYSCALL) ->bind(new utrace_builder ()); s.pattern_root->bind(TOK_PROCESS)->bind(TOK_SYSCALL) ->bind(new utrace_builder ()); s.pattern_root->bind_str(TOK_PROCESS)->bind(TOK_SYSCALL)->bind(TOK_RETURN) ->bind(new utrace_builder ()); s.pattern_root->bind_num(TOK_PROCESS)->bind(TOK_SYSCALL)->bind(TOK_RETURN) ->bind(new utrace_builder ()); s.pattern_root->bind(TOK_PROCESS)->bind(TOK_SYSCALL)->bind(TOK_RETURN) ->bind(new utrace_builder ()); // itrace user-space probes s.pattern_root->bind_str(TOK_PROCESS)->bind(TOK_INSN) ->bind(new itrace_builder ()); s.pattern_root->bind_num(TOK_PROCESS)->bind(TOK_INSN) ->bind(new itrace_builder ()); s.pattern_root->bind_str(TOK_PROCESS)->bind(TOK_INSN)->bind(TOK_BLOCK) ->bind(new itrace_builder ()); s.pattern_root->bind_num(TOK_PROCESS)->bind(TOK_INSN)->bind(TOK_BLOCK) ->bind(new itrace_builder ()); // marker-based parts s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_MARK) ->bind(new mark_builder()); s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_MARK)->bind_str(TOK_FORMAT) ->bind(new mark_builder()); // kernel tracepoint probes s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_TRACE) ->bind(new tracepoint_builder()); // procfs parts s.pattern_root->bind(TOK_PROCFS)->bind(TOK_READ)->bind(new procfs_builder()); s.pattern_root->bind_str(TOK_PROCFS)->bind(TOK_READ) ->bind(new procfs_builder()); s.pattern_root->bind(TOK_PROCFS)->bind(TOK_WRITE)->bind(new procfs_builder()); s.pattern_root->bind_str(TOK_PROCFS)->bind(TOK_WRITE) ->bind(new procfs_builder()); } vector all_session_groups(systemtap_session& s) { vector g; #define DOONE(x) if (s. x##_derived_probes) g.push_back (s. x##_derived_probes) // Note that order *is* important here. We want to make sure we // register (actually run) begin probes before any other probe type // is run. Similarly, when unregistering probes, we want to // unregister (actually run) end probes after every other probe type // has be unregistered. To do the latter, // c_unparser::emit_module_exit() will run this list backwards. DOONE(be); DOONE(dwarf); DOONE(uprobe); DOONE(timer); DOONE(profile); DOONE(mark); DOONE(tracepoint); DOONE(hrtimer); DOONE(perfmon); DOONE(procfs); // Another "order is important" item. We want to make sure we // "register" the dummy task_finder probe group after all probe // groups that use the task_finder. DOONE(utrace); DOONE(itrace); DOONE(task_finder); #undef DOONE return g; } /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */