summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--README4
-rw-r--r--buildrun.cxx29
-rwxr-xr-xdtrace58
-rw-r--r--main.cxx30
-rw-r--r--runtime/print.c28
-rw-r--r--runtime/runtime.h14
-rw-r--r--runtime/sym.c27
-rw-r--r--runtime/sym.h1
-rw-r--r--runtime/syscall.h25
-rw-r--r--runtime/unwind.c4
-rw-r--r--runtime/unwind/unwind.h4
-rw-r--r--runtime/uprobes/uprobes.c34
-rw-r--r--runtime/uprobes/uprobes.h6
-rw-r--r--runtime/uprobes/uprobes_i386.c3
-rw-r--r--runtime/uprobes/uprobes_x86.c9
-rw-r--r--runtime/uprobes/uprobes_x86_64.c7
-rw-r--r--runtime/uprobes2/uprobes.c43
-rw-r--r--runtime/uprobes2/uprobes.h6
-rw-r--r--runtime/uprobes2/uprobes_x86.c9
-rw-r--r--runtime/vsprintf.c355
-rw-r--r--stap.1.in9
-rw-r--r--tapset/utrace.stp24
-rw-r--r--tapsets.cxx270
-rw-r--r--testsuite/systemtap.base/labels.exp31
-rw-r--r--testsuite/systemtap.base/static_uprobes.exp1
-rw-r--r--testsuite/systemtap.base/utrace_syscall_args.c67
-rw-r--r--testsuite/systemtap.base/utrace_syscall_args.exp82
-rw-r--r--testsuite/systemtap.base/utrace_syscall_args.stp366
-rw-r--r--testsuite/systemtap.examples/index.html3
-rw-r--r--testsuite/systemtap.examples/index.txt7
-rw-r--r--testsuite/systemtap.examples/keyword-index.html23
-rw-r--r--testsuite/systemtap.examples/keyword-index.txt34
-rw-r--r--testsuite/systemtap.examples/network/dropwatch.meta13
-rwxr-xr-xtestsuite/systemtap.examples/network/dropwatch.stp30
-rw-r--r--translate.cxx88
36 files changed, 1455 insertions, 290 deletions
diff --git a/.gitignore b/.gitignore
index cd63142e..5869f401 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,7 @@ config.status
.deps
loc2c-test
run-stap
+run-staprun
stamp-h1
stap
staprun
diff --git a/README b/README
index 7c1ab412..ac6f0a97 100644
--- a/README
+++ b/README
@@ -106,3 +106,7 @@ Building a kernel.org kernel:
- Or else, if you wish to install the kernel build/debuginfo data into
a place where systemtap will find it without the "-r" option:
% ln -s /path/to/kernel/build/tree /lib/modules/RELEASE/build
+
+- Instead of using the "-r" option, you can also use the environment
+ variable SYSTEMTAP_RELEASE to direct systemtap to the kernel data.
+
diff --git a/buildrun.cxx b/buildrun.cxx
index 82ac9d4e..e0f22f29 100644
--- a/buildrun.cxx
+++ b/buildrun.cxx
@@ -377,7 +377,6 @@ make_tracequery(systemtap_session& s, string& name, const vector<string>& extra_
// create our source file
string source(dir + "/tracequery.c");
ofstream osrc(source.c_str());
- osrc << "#include <linux/module.h>" << endl;
osrc << "#ifdef CONFIG_TRACEPOINTS" << endl;
osrc << "#include <linux/tracepoint.h>" << endl;
@@ -391,6 +390,10 @@ make_tracequery(systemtap_session& s, string& name, const vector<string>& extra_
osrc << "#define DEFINE_TRACE(name, proto, args) \\" << endl;
osrc << " DECLARE_TRACE(name, TPPROTO(proto), TPARGS(args))" << endl;
+ // some headers may have been pulled in already indirectly, so we need this
+ // to ensure that they still use our definition
+ osrc << "#define TRACE_HEADER_MULTI_READ 1" << endl;
+
// PR9993: Add extra headers to work around undeclared types in individual
// include/trace/foo.h files
for (unsigned z=0; z<extra_headers.size(); z++)
@@ -398,32 +401,38 @@ make_tracequery(systemtap_session& s, string& name, const vector<string>& extra_
// dynamically pull in all tracepoint headers from include/trace/
glob_t trace_glob;
- string globs[2] = { "/include/trace/*.h", "/source/include/trace/*.h" };
- for (unsigned z=0; z<2; z++)
+ string globs[] = {
+ "/include/trace/*.h",
+ "/include/trace/events/*.h",
+ "/source/include/trace/*.h",
+ "/source/include/trace/events/*.h",
+ };
+ for (unsigned z = 0; z < sizeof(globs) / sizeof(globs[0]); z++)
{
string glob_str(s.kernel_build_tree + globs[z]);
glob(glob_str.c_str(), 0, NULL, &trace_glob);
for (unsigned i = 0; i < trace_glob.gl_pathc; ++i)
{
- string header(basename(trace_glob.gl_pathv[i]));
+ string header(trace_glob.gl_pathv[i]);
+ size_t root_pos = header.rfind("/include/");
+ assert(root_pos != string::npos);
+ header.erase(0, root_pos + 9);
// filter out a few known "internal-only" headers
- if (header == "trace_events.h")
+ if (header.find("/ftrace.h") != string::npos)
+ continue;
+ if (header.find("/trace_events.h") != string::npos)
continue;
if (header.find("_event_types.h") != string::npos)
continue;
- osrc << "#include <trace/" << header << ">" << endl;
+ osrc << "#include <" << header << ">" << endl;
}
globfree(&trace_glob);
}
// finish up the module source
osrc << "#endif /* CONFIG_TRACEPOINTS */" << endl;
- osrc << "int init_module(void) { return 0; }" << endl;
- osrc << "void cleanup_module(void) {}" << endl;
- osrc << "MODULE_DESCRIPTION(\"tracepoint query\");" << endl;
- osrc << "MODULE_LICENSE(\"GPL\");" << endl;
osrc.close();
// make the module
diff --git a/dtrace b/dtrace
index ca95b678..a2b495b2 100755
--- a/dtrace
+++ b/dtrace
@@ -17,12 +17,11 @@ from subprocess import call
from tempfile import mkstemp
class provider:
- arglist = dict()
def open(self, provider, header):
have_provider = False
self.f = open(provider)
self.h = open(header,mode='w')
- self.h.write("// Generated by the Systemtap dtrace wrapper\n")
+ self.h.write("/* Generated by the Systemtap dtrace wrapper */\n")
self.h.write("\n#include <sys/sdt.h>\n\n")
in_comment = False
while (True):
@@ -49,7 +48,6 @@ class provider:
new_args = ""
i = 0
c = 0
- self.arglist[this_probe] = ""
while (i < len(args)):
if (args[i:i+1] == ","):
new_args = ('%s%s' % (new_args, args[i]))
@@ -57,41 +55,24 @@ class provider:
else:
new_args = new_args + args[i]
i += 1
- if (len(new_args) > 0):
- self.arglist[this_probe] = ('%s arg%d' % (new_args, c))
if (len(new_args) == 0):
- self.h.write ('#define %s() STAP_PROBE(provider,%s)\n' % (this_probe_canon, this_probe))
- elif (c == 0):
- self.h.write ('#define %s(arg1) STAP_PROBE%d(provider,%s,arg1)\n' % (this_probe_canon, c+1, this_probe))
- elif (c == 1):
- self.h.write ('#define %s(arg1,arg2) STAP_PROBE%d(provider,%s,arg1,arg2)\n' % (this_probe_canon, c+1, this_probe))
- elif (c == 2):
- self.h.write ('#define %s(arg1,arg2,arg3) STAP_PROBE%d(provider,%s,arg1,arg2,arg3)\n' % (this_probe_canon, c+1, this_probe))
- elif (c == 3):
- self.h.write ('#define %s(arg1,arg2,arg3,arg4) STAP_PROBE%d(provider,%s,arg1,arg2,arg3,arg4)\n' % (this_probe_canon, c+1, this_probe))
- elif (c == 4):
- self.h.write ('#define %s(arg1,arg2,arg3,arg4,arg5) STAP_PROBE%d(provider,%s,arg1,arg2,arg3,arg4,arg5)\n' % (this_probe_canon, c+1, this_probe))
- elif (c == 5):
- self.h.write ('#define %s(arg1,arg2,arg3,arg4,arg5,arg6) STAP_PROBE%d(provider,%s,arg1,arg2,arg3,arg4,arg5,arg6)\n' % (this_probe_canon, c+1, this_probe))
- elif (c == 6):
- self.h.write ('#define %s(arg1,arg2,arg3,arg4,arg5,arg6,arg7) STAP_PROBE%d(provider,%s,arg1,arg2,arg3,arg4,arg5,arg6,arg7)\n' % (this_probe_canon, c+1, this_probe))
- elif (c == 7):
- self.h.write ('#define %s(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8) STAP_PROBE%d(provider,%s,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8)\n' % (this_probe_canon, c+1, this_probe))
- elif (c == 8):
- self.h.write ('#define %s(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9) STAP_PROBE%d(provider,%s,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)\n' % (this_probe_canon, c+1, this_probe))
- elif (c == 9):
- self.h.write ('#define %s(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) STAP_PROBE%d(provider,%s,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10)\n' % (this_probe_canon, c+1, this_probe))
+ c = 0
+ stap_str = "STAP_PROBE(provider,%s" % (this_probe)
+ else:
+ c += 1
+ stap_str = "STAP_PROBE%d(provider,%s" % (c,this_probe)
+ define_str = "#define %s(" % (this_probe_canon)
+ i = 1
+ while (i <= c):
+ if (i != 1):
+ define_str += ","
+ define_str = define_str + "arg%s" % (i);
+ stap_str = stap_str + ",arg%s" % (i);
+ i += 1
+ self.h.write ('/* %s (%s) */\n' % (this_probe_canon,new_args))
self.h.write ('#define %s_ENABLED() 1\n' % this_probe_canon)
-
- def get(self, arg):
- print arg
- if (arg in self.arglist):
- return self.arglist[arg]
- else:
- return ""
-########################################################################
-# main
-########################################################################
+ self.h.write (define_str + ") \\\n")
+ self.h.write (stap_str + ")\n\n")
def usage ():
print "Usage " + sys.argv[0] + " [-h | -G] -s File.d -o File {Files}"
@@ -107,6 +88,11 @@ def open_file (arg):
sys.exit(1)
return file
+
+########################################################################
+# main
+########################################################################
+
if (len (sys.argv) < 2):
usage()
diff --git a/main.cxx b/main.cxx
index 794a5891..37c5b135 100644
--- a/main.cxx
+++ b/main.cxx
@@ -425,6 +425,36 @@ main (int argc, char * const argv [])
if (s_tc != NULL)
s.tapset_compile_coverage = true;
+ const char* s_kr = getenv ("SYSTEMTAP_RELEASE");
+ if (s_kr != NULL) {
+ if (s_kr[0] == '/') // fully specified path
+ {
+ s.kernel_build_tree = s_kr;
+ string version_file_name = s.kernel_build_tree + "/include/config/kernel.release";
+ // The file include/config/kernel.release within the
+ // build tree is used to pull out the version information
+ ifstream version_file (version_file_name.c_str());
+ if (version_file.fail ())
+ {
+ cerr << "Missing " << version_file_name << endl;
+ exit(1);
+ }
+ else
+ {
+ char c;
+ s.kernel_release = "";
+ while (version_file.get(c) && c != '\n')
+ s.kernel_release.push_back(c);
+ }
+ }
+ else
+ {
+ s.kernel_release = string (s_kr);
+ s.kernel_build_tree = "/lib/modules/" + s.kernel_release + "/build";
+ }
+ }
+
+
while (true)
{
int long_opt;
diff --git a/runtime/print.c b/runtime/print.c
index 2c84d3c9..964a73c2 100644
--- a/runtime/print.c
+++ b/runtime/print.c
@@ -13,8 +13,8 @@
#include "string.h"
-#include "vsprintf.c"
#include "transport/transport.c"
+#include "vsprintf.c"
/** @file print.c
* Printing Functions.
@@ -173,34 +173,10 @@ static void _stp_print_binary (int num, ...)
*/
static void _stp_printf (const char *fmt, ...)
{
- int num;
va_list args;
- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id());
- char *buf = pb->buf + pb->len;
- int size = STP_BUFFER_SIZE - pb->len;
-
va_start(args, fmt);
- num = _stp_vsnprintf(buf, size, fmt, args);
+ _stp_vsnprintf(NULL, 0, fmt, args);
va_end(args);
- if (unlikely(num >= size)) {
- /* overflowed the buffer */
- if (pb->len == 0) {
- /* A single print request exceeded the buffer size. */
- /* Should not be possible with Systemtap-generated code. */
- pb->len = STP_BUFFER_SIZE;
- _stp_print_flush();
- num = 0;
- } else {
- /* Need more space. Flush the previous contents */
- _stp_print_flush();
-
- /* try again */
- va_start(args, fmt);
- num = _stp_vsnprintf(pb->buf, STP_BUFFER_SIZE, fmt, args);
- va_end(args);
- }
- }
- pb->len += num;
}
/** Write a string into the print buffer.
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 0a656b78..c2e927cc 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -66,6 +66,13 @@ static struct
#define MAXTRACE 20
#endif
+/* dwarf unwinder only tested so far on i386 and x86_64. */
+#if (defined(__i386__) || defined(__x86_64__))
+#ifndef STP_USE_DWARF_UNWINDER
+#define STP_USE_DWARF_UNWINDER
+#endif
+#endif
+
#ifdef CONFIG_FRAME_POINTER
/* Just because frame pointers are available does not mean we can trust them. */
#ifndef STP_USE_DWARF_UNWINDER
@@ -73,13 +80,6 @@ static struct
#endif
#endif
-/* dwarf unwinder only tested so far on i386 and x86_64,
- but globally disabled for now */
-#if 0
-// !defined(STP_USE_FRAME_BUFFER) && (defined(__i386__) || defined(__x86_64__))
-#define STP_USE_DWARF_UNWINDER
-#endif
-
#include "alloc.c"
#include "print.c"
#include "string.c"
diff --git a/runtime/sym.c b/runtime/sym.c
index a2cdd0ff..f6f97ac2 100644
--- a/runtime/sym.c
+++ b/runtime/sym.c
@@ -136,9 +136,7 @@ static struct _stp_module *_stp_mod_sec_lookup(unsigned long addr,
struct _stp_section **sec)
{
void *user = NULL;
- struct _stp_module *m = NULL;
unsigned midx = 0;
- unsigned long closest_section_offset = ~0;
// Try vma matching first if task given.
if (task)
@@ -149,8 +147,9 @@ static struct _stp_module *_stp_mod_sec_lookup(unsigned long addr,
NULL, &user) == 0)
if (user != NULL)
{
- m = (struct _stp_module *)user;
- *sec = &m->sections[0]; // XXX check actual section and relocate
+ struct _stp_module *m = (struct _stp_module *)user;
+ if (sec)
+ *sec = &m->sections[0]; // XXX check actual section and relocate
dbug_sym(1, "found section %s in module %s at 0x%lx\n",
m->sections[0].name, m->name, vm_start);
if (strcmp(".dynamic", m->sections[0].name) == 0)
@@ -164,21 +163,19 @@ static struct _stp_module *_stp_mod_sec_lookup(unsigned long addr,
unsigned secidx;
for (secidx = 0; secidx < _stp_modules[midx]->num_sections; secidx++)
{
- unsigned long this_section_addr;
- unsigned long this_section_offset;
- this_section_addr = _stp_modules[midx]->sections[secidx].addr;
- if (addr < this_section_addr) continue;
- this_section_offset = addr - this_section_addr;
- if (this_section_offset < closest_section_offset)
- {
- closest_section_offset = this_section_offset;
- m = _stp_modules[midx];
+ unsigned long sec_addr;
+ unsigned long sec_size;
+ sec_addr = _stp_modules[midx]->sections[secidx].addr;
+ sec_size = _stp_modules[midx]->sections[secidx].size;
+ if (addr >= sec_addr && addr < sec_addr + sec_size)
+ {
if (sec)
- *sec = & m->sections[secidx];
+ *sec = & _stp_modules[midx]->sections[secidx];
+ return _stp_modules[midx];
}
}
}
- return m;
+ return NULL;
}
diff --git a/runtime/sym.h b/runtime/sym.h
index 586b10ca..80c334fb 100644
--- a/runtime/sym.h
+++ b/runtime/sym.h
@@ -18,6 +18,7 @@ struct _stp_symbol {
struct _stp_section {
const char *name;
unsigned long addr; /* XXX: belongs in per-address-space tables */
+ unsigned long size; /* length of the address space module covers. */
struct _stp_symbol *symbols; /* ordered by address */
unsigned num_symbols;
};
diff --git a/runtime/syscall.h b/runtime/syscall.h
index 6d22ba83..5e538389 100644
--- a/runtime/syscall.h
+++ b/runtime/syscall.h
@@ -124,7 +124,7 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
static inline long
syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
- return regs->r15;
+ return regs->r15;
}
#endif
@@ -304,6 +304,17 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
_stp_error("invalid syscall arg request");
return;
}
+#ifdef CONFIG_PPC64
+ if (test_tsk_thread_flag(task, TIF_32BIT)) {
+ /*
+ * Zero-extend 32-bit argument values. The high bits are
+ * garbage ignored by the actual syscall dispatch.
+ */
+ while (n-- > 0)
+ args[n] = (u32) regs->gpr[3 + i + n];
+ return;
+ }
+#endif
memcpy(args, &regs->gpr[3 + i], n * sizeof(args[0]));
}
#endif
@@ -324,22 +335,22 @@ __ia64_syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
switch (i) {
case 0:
if (!n--) break;
- *args++ = *__ia64_fetch_register(i + 32, regs, cache);
+ *args++ = ia64_fetch_register(32, regs, cache);
case 1:
if (!n--) break;
- *args++ = *__ia64_fetch_register(i + 33, regs, cache);
+ *args++ = ia64_fetch_register(33, regs, cache);
case 2:
if (!n--) break;
- *args++ = *__ia64_fetch_register(i + 34, regs, cache);
+ *args++ = ia64_fetch_register(34, regs, cache);
case 3:
if (!n--) break;
- *args++ = *__ia64_fetch_register(i + 35, regs, cache);
+ *args++ = ia64_fetch_register(35, regs, cache);
case 4:
if (!n--) break;
- *args++ = *__ia64_fetch_register(i + 36, regs, cache);
+ *args++ = ia64_fetch_register(36, regs, cache);
case 5:
if (!n--) break;
- *args++ = *__ia64_fetch_register(i + 37, regs, cache);
+ *args++ = ia64_fetch_register(37, regs, cache);
}
}
#endif
diff --git a/runtime/unwind.c b/runtime/unwind.c
index 9c704e28..41af72a7 100644
--- a/runtime/unwind.c
+++ b/runtime/unwind.c
@@ -345,7 +345,7 @@ static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc, s
state->label = NULL;
return 1;
}
- if (state->stackDepth >= MAX_STACK_DEPTH)
+ if (state->stackDepth >= STP_MAX_STACK_DEPTH)
return 0;
state->stack[state->stackDepth++] = ptr.p8;
break;
@@ -581,7 +581,7 @@ static int unwind(struct unwind_frame_info *frame)
if (UNW_PC(frame) == 0)
return -EINVAL;
- m = _stp_mod_sec_lookup (pc, &s);
+ m = _stp_mod_sec_lookup (pc, current, &s);
if (unlikely(m == NULL)) {
dbug_unwind(1, "No module found for pc=%lx", pc);
return -EINVAL;
diff --git a/runtime/unwind/unwind.h b/runtime/unwind/unwind.h
index 78a4bfef..3b6d0de0 100644
--- a/runtime/unwind/unwind.h
+++ b/runtime/unwind/unwind.h
@@ -23,7 +23,7 @@
#error "Unsupported dwarf unwind architecture"
#endif
-#define MAX_STACK_DEPTH 8
+#define STP_MAX_STACK_DEPTH 8
#ifndef BUILD_BUG_ON_ZERO
#define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1)
@@ -135,7 +135,7 @@ struct unwind_state {
unsigned stackDepth:8;
unsigned version:8;
const u8 *label;
- const u8 *stack[MAX_STACK_DEPTH];
+ const u8 *stack[STP_MAX_STACK_DEPTH];
};
static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
diff --git a/runtime/uprobes/uprobes.c b/runtime/uprobes/uprobes.c
index 9dfb82b9..27e923b8 100644
--- a/runtime/uprobes/uprobes.c
+++ b/runtime/uprobes/uprobes.c
@@ -1049,8 +1049,7 @@ fail_tsk:
}
EXPORT_SYMBOL_GPL(register_uprobe);
-/* See Documentation/uprobes.txt. */
-void unregister_uprobe(struct uprobe *u)
+void __unregister_uprobe(struct uprobe *u, bool remove_bkpt)
{
struct task_struct *p;
struct uprobe_process *uproc;
@@ -1104,10 +1103,13 @@ void unregister_uprobe(struct uprobe *u)
if (!list_empty(&ppt->uprobe_list))
goto done;
- /*
- * The last uprobe at ppt's probepoint is being unregistered.
- * Queue the breakpoint for removal.
- */
+ /* The last uprobe at ppt's probepoint is being unregistered. */
+ if (!remove_bkpt) {
+ uprobe_free_probept(ppt);
+ goto done;
+ }
+
+ /* Queue the breakpoint for removal. */
ppt->state = UPROBE_REMOVING;
list_add_tail(&ppt->pd_node, &uproc->pending_uprobes);
@@ -1132,8 +1134,20 @@ done:
up_write(&uproc->rwsem);
uprobe_put_process(uproc);
}
+
+/* See Documentation/uprobes.txt. */
+void unregister_uprobe(struct uprobe *u)
+{
+ __unregister_uprobe(u, true);
+}
EXPORT_SYMBOL_GPL(unregister_uprobe);
+void unmap_uprobe(struct uprobe *u)
+{
+ __unregister_uprobe(u, false);
+}
+EXPORT_SYMBOL_GPL(unmap_uprobe);
+
/* Find a surviving thread in uproc. Runs with uproc->rwsem locked. */
static struct task_struct *find_surviving_thread(struct uprobe_process *uproc)
{
@@ -2540,6 +2554,14 @@ void unregister_uretprobe(struct uretprobe *rp)
}
EXPORT_SYMBOL_GPL(unregister_uretprobe);
+void unmap_uretprobe(struct uretprobe *rp)
+{
+ if (!rp)
+ return;
+ unmap_uprobe(&rp->u);
+}
+EXPORT_SYMBOL_GPL(unmap_uretprobe);
+
/*
* uproc->ssol_area has been successfully set up. Establish the
* uretprobe trampoline in slot 0.
diff --git a/runtime/uprobes/uprobes.h b/runtime/uprobes/uprobes.h
index 0266cb7d..d542420d 100644
--- a/runtime/uprobes/uprobes.h
+++ b/runtime/uprobes/uprobes.h
@@ -35,6 +35,9 @@
#include <linux/types.h>
#include <linux/list.h>
+/* Version 2 includes unmap_u[ret]probe(). */
+#define UPROBES_API_VERSION 2
+
struct pt_regs;
enum uprobe_type {
@@ -89,6 +92,9 @@ extern void unregister_uprobe(struct uprobe *u);
/* For runtime, assume uprobes support includes uretprobes. */
extern int register_uretprobe(struct uretprobe *rp);
extern void unregister_uretprobe(struct uretprobe *rp);
+/* For PRs 9940, 6852... */
+extern void unmap_uprobe(struct uprobe *u);
+extern void unmap_uretprobe(struct uretprobe *rp);
#ifdef UPROBES_IMPLEMENTATION
diff --git a/runtime/uprobes/uprobes_i386.c b/runtime/uprobes/uprobes_i386.c
index ffa088ed..c43f87bf 100644
--- a/runtime/uprobes/uprobes_i386.c
+++ b/runtime/uprobes/uprobes_i386.c
@@ -44,7 +44,7 @@
W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1), /* 90 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -114,7 +114,6 @@
* 26, 2e, 36, 3e, - es:, cs:, ss:, ds: segment prefixes --
* but 64 and 65 (fs: and gs:) seems to be used, so we support them.
* 67 - addr16 prefix
- * 9b - wait/fwait
* ce - into
* f0 - lock prefix
* f2, f3 - repnz, repz prefixes
diff --git a/runtime/uprobes/uprobes_x86.c b/runtime/uprobes/uprobes_x86.c
index e3bdf8ff..404c9518 100644
--- a/runtime/uprobes/uprobes_x86.c
+++ b/runtime/uprobes/uprobes_x86.c
@@ -45,8 +45,8 @@ static const unsigned long long good_insns_64[256 / 64] = {
W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */
W(0x60, 0,0,0,1,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
- W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0x80, 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -71,7 +71,7 @@ static const unsigned long long good_insns_32[256 / 64] = {
W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -119,7 +119,7 @@ static const unsigned long long good_2byte_insns[256 / 64] = {
* 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
*
* invalid opcodes in 64-bit mode:
- * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, c4-c5, d4-d5
+ * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
*
* 63 - we support this opcode in x86_64 but not in i386.
* opcodes we may need to refine support for:
@@ -141,7 +141,6 @@ static const unsigned long long good_2byte_insns[256 / 64] = {
* 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
* but 64 and 65 (fs: and gs:) seems to be used, so we support them.
* 67 - addr16 prefix
- * 9b - wait/fwait
* ce - into
* f0 - lock prefix
*/
diff --git a/runtime/uprobes/uprobes_x86_64.c b/runtime/uprobes/uprobes_x86_64.c
index 8cf36623..56ebe2e1 100644
--- a/runtime/uprobes/uprobes_x86_64.c
+++ b/runtime/uprobes/uprobes_x86_64.c
@@ -45,8 +45,8 @@ static const unsigned long good_insns_64[256 / 64] = {
W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */
W(0x60, 0,0,0,1,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
- W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0x80, 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -71,7 +71,7 @@ static const unsigned long good_insns_32[256 / 64] = {
W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -141,7 +141,6 @@ static const unsigned long good_2byte_insns[256 / 64] = {
* 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
* but 64 and 65 (fs: and gs:) seems to be used, so we support them.
* 67 - addr16 prefix
- * 9b - wait/fwait
* ce - into
* f0 - lock prefix
*/
diff --git a/runtime/uprobes2/uprobes.c b/runtime/uprobes2/uprobes.c
index a0e9f2fe..9ea05349 100644
--- a/runtime/uprobes2/uprobes.c
+++ b/runtime/uprobes2/uprobes.c
@@ -955,10 +955,15 @@ static int defer_registration(struct uprobe *u, int regflag,
*/
static struct pid *uprobe_get_tg_leader(pid_t p)
{
- struct pid *pid;
+ struct pid *pid = NULL;
rcu_read_lock();
- pid = find_vpid(p);
+ /*
+ * We need this check because unmap_u[ret]probe() can be called
+ * from a report_death callback, where current->proxy is NULL.
+ */
+ if (current->nsproxy)
+ pid = find_vpid(p);
if (pid) {
struct task_struct *t = pid_task(pid, PIDTYPE_PID);
if (t)
@@ -1138,8 +1143,7 @@ fail_tsk:
}
EXPORT_SYMBOL_GPL(register_uprobe);
-/* See Documentation/uprobes.txt. */
-void unregister_uprobe(struct uprobe *u)
+void __unregister_uprobe(struct uprobe *u, bool remove_bkpt)
{
struct pid *p;
struct uprobe_process *uproc;
@@ -1193,10 +1197,13 @@ void unregister_uprobe(struct uprobe *u)
if (!list_empty(&ppt->uprobe_list))
goto done;
- /*
- * The last uprobe at ppt's probepoint is being unregistered.
- * Queue the breakpoint for removal.
- */
+ /* The last uprobe at ppt's probepoint is being unregistered. */
+ if (!remove_bkpt) {
+ uprobe_free_probept(ppt);
+ goto done;
+ }
+
+ /* Queue the breakpoint for removal. */
ppt->state = UPROBE_REMOVING;
list_add_tail(&ppt->pd_node, &uproc->pending_uprobes);
@@ -1221,8 +1228,20 @@ done:
up_write(&uproc->rwsem);
uprobe_put_process(uproc, false);
}
+
+/* See Documentation/uprobes.txt. */
+void unregister_uprobe(struct uprobe *u)
+{
+ __unregister_uprobe(u, true);
+}
EXPORT_SYMBOL_GPL(unregister_uprobe);
+void unmap_uprobe(struct uprobe *u)
+{
+ __unregister_uprobe(u, false);
+}
+EXPORT_SYMBOL_GPL(unmap_uprobe);
+
/* Find a surviving thread in uproc. Runs with uproc->rwsem locked. */
static struct task_struct *find_surviving_thread(struct uprobe_process *uproc)
{
@@ -2718,6 +2737,14 @@ void unregister_uretprobe(struct uretprobe *rp)
}
EXPORT_SYMBOL_GPL(unregister_uretprobe);
+void unmap_uretprobe(struct uretprobe *rp)
+{
+ if (!rp)
+ return;
+ unmap_uprobe(&rp->u);
+}
+EXPORT_SYMBOL_GPL(unmap_uretprobe);
+
/*
* uproc->ssol_area has been successfully set up. Establish the
* uretprobe trampoline in the next available slot following the
diff --git a/runtime/uprobes2/uprobes.h b/runtime/uprobes2/uprobes.h
index 112e29e2..ae0692f0 100644
--- a/runtime/uprobes2/uprobes.h
+++ b/runtime/uprobes2/uprobes.h
@@ -28,6 +28,9 @@
#define utrace_attached_engine utrace_engine
#endif
+/* Version 2 includes unmap_u[ret]probe(). */
+#define UPROBES_API_VERSION 2
+
struct pt_regs;
enum uprobe_type {
@@ -82,6 +85,9 @@ extern void unregister_uprobe(struct uprobe *u);
/* For runtime, assume uprobes support includes uretprobes. */
extern int register_uretprobe(struct uretprobe *rp);
extern void unregister_uretprobe(struct uretprobe *rp);
+/* For PRs 9940, 6852... */
+extern void unmap_uprobe(struct uprobe *u);
+extern void unmap_uretprobe(struct uretprobe *rp);
#ifdef UPROBES_IMPLEMENTATION
diff --git a/runtime/uprobes2/uprobes_x86.c b/runtime/uprobes2/uprobes_x86.c
index effb7444..8c80293d 100644
--- a/runtime/uprobes2/uprobes_x86.c
+++ b/runtime/uprobes2/uprobes_x86.c
@@ -50,8 +50,8 @@ static const u64 good_insns_64[256 / 64] = {
W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 50 */
W(0x60, 0,0,0,1,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
- W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0x80, 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -76,7 +76,7 @@ static const u64 good_insns_32[256 / 64] = {
W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1)| /* 90 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */
@@ -124,7 +124,7 @@ static const u64 good_2byte_insns[256 / 64] = {
* 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
*
* invalid opcodes in 64-bit mode:
- * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, c4-c5, d4-d5
+ * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
*
* 63 - we support this opcode in x86_64 but not in i386.
* opcodes we may need to refine support for:
@@ -146,7 +146,6 @@ static const u64 good_2byte_insns[256 / 64] = {
* 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
* but 64 and 65 (fs: and gs:) seem to be used, so we support them
* 67 - addr16 prefix
- * 9b - wait/fwait
* ce - into
* f0 - lock prefix
*/
diff --git a/runtime/vsprintf.c b/runtime/vsprintf.c
index bd58d760..38ab0e2d 100644
--- a/runtime/vsprintf.c
+++ b/runtime/vsprintf.c
@@ -12,6 +12,9 @@
#ifndef _VSPRINTF_C_
#define _VSPRINTF_C_
+//forward declaration for _stp_vsnprintf
+static void * _stp_reserve_bytes (int);
+
static int skip_atoi(const char **s)
{
int i=0;
@@ -22,6 +25,10 @@ static int skip_atoi(const char **s)
enum print_flag {STP_ZEROPAD=1, STP_SIGN=2, STP_PLUS=4, STP_SPACE=8, STP_LEFT=16, STP_SPECIAL=32, STP_LARGE=64};
+/*
+ * Changes to number() will require a corresponding change to number_size below,
+ * to ensure proper buffer allocation for _stp_printf.
+ */
static char * number(char * buf, char * end, uint64_t num, int base, int size, int precision, enum print_flag type)
{
char c,sign,tmp[66];
@@ -115,6 +122,85 @@ static char * number(char * buf, char * end, uint64_t num, int base, int size, i
return buf;
}
+/*
+ * Calculates the number of bytes required to print the paramater num. A change to
+ * number() requires a corresponding change here, and vice versa, to ensure the
+ * calculated size and printed size match.
+ */
+static int number_size(uint64_t num, int base, int size, int precision, enum print_flag type) {
+ char c,sign,tmp[66];
+ const char *digits;
+ static const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+ static const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ int i, num_bytes = 0;
+
+ digits = (type & STP_LARGE) ? large_digits : small_digits;
+ if (type & STP_LEFT)
+ type &= ~STP_ZEROPAD;
+ if (base < 2 || base > 36)
+ return 0;
+ c = (type & STP_ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & STP_SIGN) {
+ if ((int64_t) num < 0) {
+ sign = '-';
+ num = - (int64_t) num;
+ size--;
+ } else if (type & STP_PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & STP_SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & STP_SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++]='0';
+ else while (num != 0)
+ tmp[i++] = digits[do_div(num,base)];
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type&(STP_ZEROPAD+STP_LEFT))) {
+ while(size-->0) {
+ num_bytes++;
+ }
+ }
+ if (sign) {
+ num_bytes++;
+ }
+ if (type & STP_SPECIAL) {
+ if (base==8) {
+ num_bytes++;
+ } else if (base==16) {
+ num_bytes+=2;
+ }
+ }
+ if (!(type & STP_LEFT)) {
+ while (size-- > 0) {
+ num_bytes++;
+ }
+ }
+ while (i < precision--) {
+ num_bytes++;
+ }
+ while (i-- > 0) {
+ num_bytes++;
+ }
+ while (size-- > 0) {
+ num_bytes++;
+ }
+ return num_bytes;
+
+}
+
static int check_binary_precision (int precision) {
/* precision can be unspecified (-1) or one of 1, 2, 4 or 8. */
switch (precision) {
@@ -148,9 +234,262 @@ static int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
if (unlikely((int) size < 0))
return 0;
- str = buf;
- end = buf + size - 1;
+ /*
+ * buf will be NULL when this function is called from _stp_printf.
+ * This branch calculates the exact size print buffer required for
+ * the string and allocates it with _stp_reserve_bytes. A change
+ * to this branch requires a corresponding change to the same
+ * section of code below.
+ */
+ if (buf == NULL) {
+ const char* fmt_copy = fmt;
+ int num_bytes = 0;
+ va_list args_copy;
+
+ va_copy(args_copy, args);
+
+ for (; *fmt_copy ; ++fmt_copy) {
+ if (*fmt_copy != '%') {
+ num_bytes++;
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+ repeat_copy:
+ ++fmt_copy; /* this also skips first '%' */
+ switch (*fmt_copy) {
+ case '-': flags |= STP_LEFT; goto repeat_copy;
+ case '+': flags |= STP_PLUS; goto repeat_copy;
+ case ' ': flags |= STP_SPACE; goto repeat_copy;
+ case '#': flags |= STP_SPECIAL; goto repeat_copy;
+ case '0': flags |= STP_ZEROPAD; goto repeat_copy;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt_copy))
+ field_width = skip_atoi(&fmt_copy);
+ else if (*fmt_copy == '*') {
+ ++fmt_copy;
+ /* it's the next argument */
+ field_width = va_arg(args_copy, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= STP_LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt_copy == '.') {
+ ++fmt_copy;
+ if (isdigit(*fmt_copy))
+ precision = skip_atoi(&fmt_copy);
+ else if (*fmt_copy == '*') {
+ ++fmt_copy;
+ /* it's the next argument */
+ precision = va_arg(args_copy, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt_copy == 'h' || *fmt_copy == 'l' || *fmt_copy == 'L') {
+ qualifier = *fmt_copy;
+ ++fmt_copy;
+ if (qualifier == 'l' && *fmt_copy == 'l') {
+ qualifier = 'L';
+ ++fmt_copy;
+ }
+ }
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt_copy) {
+ case 'b':
+ num = va_arg(args_copy, int64_t);
+
+ /* Only certain values are valid for the precision. */
+ precision = check_binary_precision (precision);
+
+ /* Unspecified field width defaults to the specified
+ precision and vice versa. If neither is specified,
+ then both default to 8. */
+ if (field_width == -1) {
+ if (precision == -1) {
+ field_width = 8;
+ precision = 8;
+ }
+ else
+ field_width = precision;
+ }
+ else if (precision == -1) {
+ precision = check_binary_precision (field_width);
+ if (precision == -1)
+ precision = 8;
+ }
+
+ len = precision;
+ if (!(flags & STP_LEFT)) {
+ while (len < field_width--) {
+ num_bytes++;
+ }
+ }
+
+ num_bytes += precision;
+
+ while (len < field_width--)
+ num_bytes++;
+
+ continue;
+
+ case 's':
+ case 'M':
+ case 'm':
+ s = va_arg(args_copy, char *);
+ if ((unsigned long)s < PAGE_SIZE)
+ s = "<NULL>";
+
+ if (*fmt_copy == 's')
+ len = strnlen(s, precision);
+ else if (precision > 0)
+ len = precision;
+ else
+ len = 1;
+
+ if (!(flags & STP_LEFT)) {
+ while (len < field_width--) {
+ num_bytes++;
+ }
+ }
+ if (*fmt_copy == 'M') {
+ num_bytes += number_size((unsigned long) *(uint64_t *) s,
+ 16, field_width, len, flags);
+ }
+ else {
+ num_bytes += len;
+ }
+
+ while (len < field_width--) {
+ num_bytes++;
+ }
+ if(flags & STP_ZEROPAD) {
+ num_bytes++;
+ }
+ continue;
+ case 'X':
+ flags |= STP_LARGE;
+ case 'x':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= STP_SIGN;
+ case 'u':
+ break;
+
+ case 'p':
+ /* Note that %p takes an int64_t argument. */
+ len = 2*sizeof(void *) + 2;
+ flags |= STP_ZEROPAD;
+
+ if (field_width == -1)
+ field_width = len;
+
+ if (!(flags & STP_LEFT)) {
+ while (len < field_width) {
+ field_width--;
+ num_bytes++;
+ }
+ }
+
+ //account for "0x"
+ num_bytes+=2;
+ field_width-=2;
+
+ num_bytes += number_size((unsigned long) va_arg(args_copy, int64_t),
+ 16, field_width, field_width, flags);
+ continue;
+
+ case '%':
+ num_bytes++;
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'c':
+ if (!(flags & STP_LEFT)) {
+ while (--field_width > 0) {
+ num_bytes++;
+ }
+ }
+ c = (unsigned char) va_arg(args_copy, int);
+ num_bytes++;
+ while (--field_width > 0) {
+ num_bytes++;
+ }
+ continue;
+
+ default:
+ num_bytes++;
+ if (*fmt_copy) {
+ num_bytes++;
+ } else {
+ --fmt_copy;
+ }
+ continue;
+ }
+
+ if (qualifier == 'L')
+ num = va_arg(args_copy, int64_t);
+ else if (qualifier == 'l') {
+ num = va_arg(args_copy, unsigned long);
+ if (flags & STP_SIGN)
+ num = (signed long) num;
+ } else if (qualifier == 'h') {
+ num = (unsigned short) va_arg(args_copy, int);
+ if (flags & STP_SIGN)
+ num = (signed short) num;
+ } else {
+ num = va_arg(args_copy, unsigned int);
+ if (flags & STP_SIGN)
+ num = (signed int) num;
+ }
+ num_bytes += number_size(num, base, field_width, precision, flags);
+ }
+
+ va_end(args_copy);
+
+ if (num_bytes == 0)
+ return 0;
+
+ //max print buffer size
+ if (num_bytes > STP_BUFFER_SIZE) {
+ num_bytes = STP_BUFFER_SIZE;
+ }
+
+ str = (char*)_stp_reserve_bytes(num_bytes);
+ size = num_bytes;
+ end = str + size - 1;
+
+ } else {
+ str = buf;
+ end = buf + size - 1;
+ }
+ /*
+ * Note that a change to code below requires a corresponding
+ * change in the code above to properly calculate the bytes
+ * required in the output buffer.
+ */
for (; *fmt ; ++fmt) {
if (*fmt != '%') {
if (str <= end)
@@ -433,11 +772,13 @@ static int _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
field_width, precision, flags);
}
- if (likely(str <= end))
- *str = '\0';
- else if (size > 0)
- /* don't write out a null byte if the buf size is zero */
- *end = '\0';
+ if (buf != NULL) {
+ if (likely(str <= end))
+ *str = '\0';
+ else if (size > 0)
+ /* don't write out a null byte if the buf size is zero */
+ *end = '\0';
+ }
return str-buf;
}
diff --git a/stap.1.in b/stap.1.in
index c664962c..a5a8ab84 100644
--- a/stap.1.in
+++ b/stap.1.in
@@ -157,11 +157,16 @@ be used to override limit parameters described below.
Look for the systemtap runtime sources in the given directory.
.TP
.BI \-r " /DIR"
-Build for kernel in given build tree.
+Build for kernel in given build tree. Can also be set with the
+.I SYSTEMTAP_RELEASE
+environment variable.
.TP
.BI \-r " RELEASE"
Build for kernel in build tree
-.BR /lib/modules/RELEASE/build .
+.BR /lib/modules/RELEASE/build .
+Can also be set with the
+.I SYSTEMTAP_RELEASE
+environment variable.
.TP
.BI \-m " MODULE"
Use the given name for the generated kernel object module, instead
diff --git a/tapset/utrace.stp b/tapset/utrace.stp
index 00f427e6..0d26ed5f 100644
--- a/tapset/utrace.stp
+++ b/tapset/utrace.stp
@@ -1,20 +1,26 @@
/* utrace-only subset of register accessors */
-
%{
#include "syscall.h"
%}
-function _utrace_syscall_nr:long () %{
- THIS->__retvalue = syscall_get_nr(current, CONTEXT->regs); /* pure */
+function _utrace_syscall_nr:long () %{ /* pure */
+ THIS->__retvalue = syscall_get_nr(current, CONTEXT->regs);
%}
-function _utrace_syscall_arg:long (n:long) %{
- unsigned long arg = 0; /* pure */
- syscall_get_arguments(current, CONTEXT->regs, (int)THIS->n, 1, &arg);
- THIS->__retvalue = arg;
+function _utrace_syscall_arg:long (n:long) %{ /* pure */
+ unsigned long arg = 0;
+ syscall_get_arguments(current, CONTEXT->regs, (int)THIS->n, 1, &arg);
+ THIS->__retvalue = arg;
%}
-function _utrace_syscall_return:long () %{
- THIS->__retvalue = syscall_get_return_value(current, CONTEXT->regs); /* pure */
+function _utrace_syscall_return:long () %{ /* pure */
+ /*
+ * Here's the reason for the "unsigned long" cast. Since all
+ * values inside systemtap are 64-bit numbers, return values were
+ * getting sign extended. This caused return values to not match
+ * up with the same values passes as arguments.
+ */
+ THIS->__retvalue = (unsigned long)syscall_get_return_value(current,
+ CONTEXT->regs);
%}
diff --git a/tapsets.cxx b/tapsets.cxx
index c63151e1..01c838d9 100644
--- a/tapsets.cxx
+++ b/tapsets.cxx
@@ -1339,102 +1339,14 @@ struct dwflpp
}
void
- iterate_over_cu_labels (string label_val,
- string function,
- Dwarf_Die *cu,
- vector<derived_probe *> & results,
- probe_point *base_loc,
- void *data,
- void (* callback)(const string &,
- const char *,
- int,
- Dwarf_Die *,
- Dwarf_Addr,
- dwarf_query *))
- {
- dwarf_query * q __attribute__ ((unused)) = static_cast<dwarf_query *>(data) ;
-
- get_module_dwarf();
-
- const char * sym = label_val.c_str();
- Dwarf_Die die;
- int res = dwarf_child (cu, &die);
- if (res != 0)
- return; // die without children, bail out.
-
- static string function_name;
- do
- {
- Dwarf_Attribute attr_mem;
- Dwarf_Attribute *attr = dwarf_attr (&die, DW_AT_name, &attr_mem);
- int tag = dwarf_tag(&die);
- const char *name = dwarf_formstring (attr);
- if (name == 0)
- continue;
- switch (tag)
- {
- case DW_TAG_label:
- break;
- case DW_TAG_subprogram:
- function_name = name;
- default:
- if (dwarf_haschildren (&die))
- iterate_over_cu_labels (label_val, function, &die, results, base_loc, q, callback);
- continue;
- }
-
- if (strcmp(function_name.c_str(), function.c_str()) == 0
- || (name_has_wildcard(function)
- && function_name_matches_pattern (function_name, function)))
- {
- }
- else
- continue;
- if (strcmp(name, sym) == 0
- || (name_has_wildcard(sym)
- && function_name_matches_pattern (name, sym)))
- {
- const char *file = dwarf_decl_file (&die);
- // Get the line number for this label
- Dwarf_Attribute attr;
- dwarf_attr (&die,DW_AT_decl_line, &attr);
- Dwarf_Sword dline;
- dwarf_formsdata (&attr, &dline);
- Dwarf_Addr stmt_addr;
- if (dwarf_lowpc (&die, &stmt_addr) != 0)
- {
- // There is no lowpc so figure out the address
- // Get the real die for this cu
- Dwarf_Die cudie;
- dwarf_diecu (cu, &cudie, NULL, NULL);
- size_t nlines = 0;
- // Get the line for this label
- Dwarf_Line **aline;
- dwarf_getsrc_file (module_dwarf, file, (int)dline, 0, &aline, &nlines);
- // Get the address
- for (size_t i = 0; i < nlines; i++)
- {
- dwarf_lineaddr (*aline, &stmt_addr);
- if ((dwarf_haspc (&die, stmt_addr)))
- break;
- }
- }
-
- Dwarf_Die *scopes;
- int nscopes = 0;
- nscopes = dwarf_getscopes_die (&die, &scopes);
- if (nscopes > 1)
- {
- callback(function_name.c_str(), file,
- (int)dline, &scopes[1], stmt_addr, q);
- if (sess.listing_mode)
- results.back()->locations[0]->components.push_back
- (new probe_point::component(TOK_LABEL, new literal_string (name)));
- }
- }
- }
- while (dwarf_siblingof (&die, &die) == 0);
- }
+ iterate_over_labels (Dwarf_Die *begin_die,
+ void *data,
+ void (* callback)(const string &,
+ const char *,
+ int,
+ Dwarf_Die *,
+ Dwarf_Addr,
+ dwarf_query *));
void collect_srcfiles_matching (string const & pattern,
set<char const *> & filtered_srcfiles)
@@ -3072,6 +2984,100 @@ dwflpp::iterate_over_functions (int (* callback)(Dwarf_Die * func, base_query *
}
+void
+dwflpp::iterate_over_labels (Dwarf_Die *begin_die,
+ void *data,
+ void (* callback)(const string &,
+ const char *,
+ int,
+ Dwarf_Die *,
+ Dwarf_Addr,
+ dwarf_query *))
+{
+ dwarf_query * q __attribute__ ((unused)) = static_cast<dwarf_query *>(data) ;
+
+ get_module_dwarf();
+
+ const char * sym = q->label_val.c_str();
+ Dwarf_Die die;
+ int res = dwarf_child (begin_die, &die);
+ if (res != 0)
+ return; // die without children, bail out.
+
+ static string function_name = dwarf_diename (begin_die);
+ do
+ {
+ Dwarf_Attribute attr_mem;
+ Dwarf_Attribute *attr = dwarf_attr (&die, DW_AT_name, &attr_mem);
+ int tag = dwarf_tag(&die);
+ const char *name = dwarf_formstring (attr);
+ if (name == 0)
+ continue;
+ switch (tag)
+ {
+ case DW_TAG_label:
+ break;
+ case DW_TAG_subprogram:
+ function_name = name;
+ default:
+ if (dwarf_haschildren (&die))
+ iterate_over_labels (&die, q, callback);
+ continue;
+ }
+
+ if (strcmp(function_name.c_str(), q->function.c_str()) == 0
+ || (name_has_wildcard(q->function)
+ && function_name_matches_pattern (function_name, q->function)))
+ {
+ }
+ else
+ continue;
+ if (strcmp(name, sym) == 0
+ || (name_has_wildcard(sym)
+ && function_name_matches_pattern (name, sym)))
+ {
+ const char *file = dwarf_decl_file (&die);
+ // Get the line number for this label
+ Dwarf_Attribute attr;
+ dwarf_attr (&die,DW_AT_decl_line, &attr);
+ Dwarf_Sword dline;
+ dwarf_formsdata (&attr, &dline);
+ Dwarf_Addr stmt_addr;
+ if (dwarf_lowpc (&die, &stmt_addr) != 0)
+ {
+ // There is no lowpc so figure out the address
+ // Get the real die for this cu
+ Dwarf_Die cudie;
+ dwarf_diecu (q->dw.cu, &cudie, NULL, NULL);
+ size_t nlines = 0;
+ // Get the line for this label
+ Dwarf_Line **aline;
+ dwarf_getsrc_file (module_dwarf, file, (int)dline, 0, &aline, &nlines);
+ // Get the address
+ for (size_t i = 0; i < nlines; i++)
+ {
+ dwarf_lineaddr (*aline, &stmt_addr);
+ if ((dwarf_haspc (&die, stmt_addr)))
+ break;
+ }
+ }
+
+ Dwarf_Die *scopes;
+ int nscopes = 0;
+ nscopes = dwarf_getscopes_die (&die, &scopes);
+ if (nscopes > 1)
+ {
+ callback(function_name.c_str(), file,
+ (int)dline, &scopes[1], stmt_addr, q);
+ if (sess.listing_mode)
+ q->results.back()->locations[0]->components.push_back
+ (new probe_point::component(TOK_LABEL, new literal_string (name)));
+ }
+ }
+ }
+ while (dwarf_siblingof (&die, &die) == 0);
+}
+
struct dwarf_builder: public derived_probe_builder
{
@@ -3877,6 +3883,19 @@ query_func_info (Dwarf_Addr entrypc,
static void
+query_srcfile_label (const dwarf_line_t& line, void * arg)
+{
+ dwarf_query * q = static_cast<dwarf_query *>(arg);
+
+ Dwarf_Addr addr = line.addr();
+
+ for (func_info_map_t::iterator i = q->filtered_functions.begin();
+ i != q->filtered_functions.end(); ++i)
+ if (q->dw.die_has_pc (i->die, addr))
+ q->dw.iterate_over_labels (&i->die, q, query_statement);
+}
+
+static void
query_srcfile_line (const dwarf_line_t& line, void * arg)
{
dwarf_query * q = static_cast<dwarf_query *>(arg);
@@ -4120,7 +4139,17 @@ query_cu (Dwarf_Die * cudie, void * arg)
if (! q->filtered_functions.empty())
q->dw.resolve_prologue_endings (q->filtered_functions);
- if ((q->has_statement_str || q->has_function_str)
+ if (q->has_label)
+ {
+ if (q->line[0] == 0) // No line number specified
+ q->dw.iterate_over_labels (q->dw.cu, q, query_statement);
+ else
+ for (set<char const *>::const_iterator i = q->filtered_srcfiles.begin();
+ i != q->filtered_srcfiles.end(); ++i)
+ q->dw.iterate_over_srcfile_lines (*i, q->line, q->has_statement_str,
+ q->line_type, query_srcfile_label, q);
+ }
+ else if ((q->has_statement_str || q->has_function_str)
&& (q->spec_type == function_file_and_line))
{
// If we have a pattern string with target *line*, we
@@ -4130,12 +4159,6 @@ query_cu (Dwarf_Die * cudie, void * arg)
q->dw.iterate_over_srcfile_lines (*i, q->line, q->has_statement_str,
q->line_type, query_srcfile_line, q);
}
- else if (q->has_label)
- {
- // If we have a pattern string with target *label*, we
- // have to look at labels in all the matched srcfiles.
- q->dw.iterate_over_cu_labels (q->label_val, q->function, q->dw.cu, q->results, q->base_loc, q, query_statement);
- }
else
{
// Otherwise, simply probe all resolved functions.
@@ -7590,6 +7613,9 @@ uprobe_derived_probe_group::emit_module_decls (systemtap_session& s)
s.op->newline() << "#else";
s.op->newline() << "#include \"uprobes/uprobes.h\"";
s.op->newline() << "#endif";
+ s.op->newline() << "#ifndef UPROBES_API_VERSION";
+ s.op->newline() << "#define UPROBES_API_VERSION 1";
+ s.op->newline() << "#endif";
s.op->newline() << "#ifndef MULTIPLE_UPROBES";
s.op->newline() << "#define MULTIPLE_UPROBES 256"; // maximum possible armed uprobes per process() probe point
@@ -7701,10 +7727,11 @@ uprobe_derived_probe_group::emit_module_decls (systemtap_session& s)
// register new uprobe
s.op->newline() << "if (register_p && sup->spec_index < 0) {";
- // PR6829: we need to check that the sup we're about to reuse is really completely free.
- // See PR6829 notes below.
- s.op->newline(1) << "if (sup->spec_index == -1 && sup->up.kdata != NULL) continue;";
+ s.op->newline(1) << "#if (UPROBES_API_VERSION < 2)";
+ // See PR6829 comment.
+ s.op->newline() << "if (sup->spec_index == -1 && sup->up.kdata != NULL) continue;";
s.op->newline() << "else if (sup->spec_index == -2 && sup->urp.u.kdata != NULL) continue;";
+ s.op->newline() << "#endif";
s.op->newline() << "sup->spec_index = spec_index;";
s.op->newline() << "slotted_p = 1;";
s.op->newline() << "break;";
@@ -7755,13 +7782,32 @@ uprobe_derived_probe_group::emit_module_decls (systemtap_session& s)
s.op->newline(-1) << "} else if (!register_p && slotted_p) {";
s.op->newline(1) << "struct stap_uprobe *sup = & stap_uprobes[i];";
- // NB: we need to release this slot, so we need to borrow the mutex temporarily.
+ s.op->newline() << "int unregistered_flag;";
+ // PR6829, PR9940:
+ // Here we're unregistering for one of two reasons:
+ // 1. the process image is going away (or gone) due to exit or exec; or
+ // 2. the vma containing the probepoint has been unmapped.
+ // In case 1, it's sort of a nop, because uprobes will notice the event
+ // and dispose of the probes eventually, if it hasn't already. But by
+ // calling unmap_u[ret]probe() ourselves, we free up sup right away.
+ //
+ // In both cases, we must use unmap_u[ret]probe instead of
+ // unregister_u[ret]probe, so uprobes knows not to try to restore the
+ // original opcode.
+ s.op->newline() << "#if (UPROBES_API_VERSION >= 2)";
+ s.op->newline() << "if (sups->return_p)";
+ s.op->newline(1) << "unmap_uretprobe (& sup->urp);";
+ s.op->newline(-1) << "else";
+ s.op->newline(1) << "unmap_uprobe (& sup->up);";
+ s.op->newline(-1) << "unregistered_flag = -1;";
+ s.op->newline() << "#else";
+ // Uprobes lacks unmap_u[ret]probe. Before reusing sup, we must wait
+ // until uprobes turns loose of the u[ret]probe on its own, as indicated
+ // by uprobe.kdata = NULL.
+ s.op->newline() << "unregistered_flag = (sups->return_p ? -2 : -1);";
+ s.op->newline() << "#endif";
s.op->newline() << "mutex_lock (& stap_uprobes_lock);";
- // NB: We must not actually uregister u[ret]probes when a target process execs or exits;
- // uprobes does that by itself asynchronously. We can reuse the up/urp struct after
- // uprobes clears the sup->{up,urp}->kdata pointer. PR6829. To tell the two
- // cases apart, we use spec_index -2 vs -1.
- s.op->newline() << "sup->spec_index = (sups->return_p ? -2 : -1);";
+ s.op->newline() << "sup->spec_index = unregistered_flag;";
s.op->newline() << "mutex_unlock (& stap_uprobes_lock);";
s.op->newline() << "handled_p = 1;";
s.op->newline(-1) << "}"; // if slotted_p
diff --git a/testsuite/systemtap.base/labels.exp b/testsuite/systemtap.base/labels.exp
index 88ed4619..79e3f483 100644
--- a/testsuite/systemtap.base/labels.exp
+++ b/testsuite/systemtap.base/labels.exp
@@ -55,11 +55,42 @@ if { $res != "" } {
pass "compiling labels.c -g"
}
+# line number error
+
+set ok 0
+spawn stap -l "process(\"$label_exepath\").function(\"foo@${label_srcpath}:10\").label(\"*\")"
+
+wait
+expect {
+ -timeout 180
+ -re {no match while resolving probe point} { incr ok; exp_continue }
+ timeout { fail "$test (timeout)" }
+ eof { }
+}
+
+if {$ok == 1} { pass "$test :N .label" } { fail "$test :N .label $ok" }
+
+# line number
+
+set ok 0
+spawn stap -l "process(\"$label_exepath\").function(\"foo@${label_srcpath}:4\").label(\"*\")"
+
+wait
+expect {
+ -timeout 180
+ -re {process.*function.*labels.c:5...label..init_an_int} { incr ok; exp_continue }
+ timeout { fail "$test (timeout)" }
+ eof { }
+}
+
+if {$ok == 1} { pass "$test :N .label" } { fail "$test :N .label $ok" }
+
# list of labels
spawn stap -l "process(\"$label_exepath\").function(\"*\").label(\"*\")"
wait
+set ok 0
expect {
-timeout 180
-re {process.*function.*labels.c:5...label..init_an_int.*process.*function.*labels.c:16...label..init_an_int.*process.*function.*labels.c:18...label..init_an_int_again} { incr ok; exp_continue }
diff --git a/testsuite/systemtap.base/static_uprobes.exp b/testsuite/systemtap.base/static_uprobes.exp
index 07ff83e9..1e53d5d3 100644
--- a/testsuite/systemtap.base/static_uprobes.exp
+++ b/testsuite/systemtap.base/static_uprobes.exp
@@ -93,7 +93,6 @@ if {[installtest_p]} {
if {[catch {exec $dtrace -h -s $sup_dpath} res]} {
verbose -log "unable to run $dtrace: $res"
}
-catch {exec rm -f $sup_dpath}
if {[file exists $sup_hpath]} then {
pass "$test dtrace"
} else {
diff --git a/testsuite/systemtap.base/utrace_syscall_args.c b/testsuite/systemtap.base/utrace_syscall_args.c
new file mode 100644
index 00000000..2d3da838
--- /dev/null
+++ b/testsuite/systemtap.base/utrace_syscall_args.c
@@ -0,0 +1,67 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/syscall.h> /* For SYS_xxx definitions */
+
+int main()
+{
+ int fd, ret;
+ struct stat fs;
+ void *r;
+ int rc;
+
+ /* create a file with something in it */
+ fd = open("foobar", O_WRONLY|O_CREAT|O_TRUNC, 0600);
+ lseek(fd, 1024, SEEK_SET);
+ write(fd, "abcdef", 6);
+ close(fd);
+
+ fd = open("foobar", O_RDONLY);
+
+ /* stat for file size */
+ ret = fstat(fd, &fs);
+
+ /* mmap file file, then unmap it. */
+ r = mmap(NULL, fs.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (r != MAP_FAILED)
+ munmap(r, fs.st_size);
+ close(fd);
+
+ /* OK, try some system calls to see if we get the arguments
+ * correctly. */
+#if (__LONG_MAX__ > __INT_MAX__)
+ rc = syscall (__NR_dup, (unsigned long)-12345,
+ (unsigned long)0xffffffffffffffff,
+ (unsigned long)0xa5a5a5a5a5a5a5a5,
+ (unsigned long)0xf0f0f0f0f0f0f0f0,
+ (unsigned long)0x5a5a5a5a5a5a5a5a,
+ (unsigned long)0xe38e38e38e38e38e);
+#else
+ rc = syscall (__NR_dup, (unsigned long)-12345,
+ (unsigned long)0xffffffff,
+ (unsigned long)0xa5a5a5a5,
+ (unsigned long)0xf0f0f0f0,
+ (unsigned long)0x5a5a5a5a,
+ (unsigned long)0xe38e38e3);
+#endif
+#if (__LONG_MAX__ > __INT_MAX__)
+ rc = syscall ((unsigned long)-1,
+ (unsigned long)0x1c71c71c71c71c71,
+ (unsigned long)0x0f0f0f0f0f0f0f0f,
+ (unsigned long)0xdb6db6db6db6db6d,
+ (unsigned long)0x2492492492492492,
+ (unsigned long)0xad6b5ad6b5ad6b5a,
+ (unsigned long)0xdef7ddef7ddef7dd);
+#else
+ rc = syscall ((unsigned long)-1,
+ (unsigned long)0x1c71c71c,
+ (unsigned long)0x0f0f0f0f,
+ (unsigned long)0xdb6db6db,
+ (unsigned long)0x24924924,
+ (unsigned long)0xad6b5ad6,
+ (unsigned long)0xdef7ddef);
+#endif
+ return 0;
+}
diff --git a/testsuite/systemtap.base/utrace_syscall_args.exp b/testsuite/systemtap.base/utrace_syscall_args.exp
new file mode 100644
index 00000000..98bc457e
--- /dev/null
+++ b/testsuite/systemtap.base/utrace_syscall_args.exp
@@ -0,0 +1,82 @@
+# Utrace system call argument tests.
+
+set flags ""
+set srcpath "$srcdir/$subdir/utrace_syscall_args.c"
+set exepath "[pwd]/utrace_syscall_args"
+set stppath "$srcdir/$subdir/utrace_syscall_args.stp"
+
+set output_string "mmap\\(\[0-9\]+\\)\\(0x\[0-9a-f]+, 0x\[0-9a-f]+, 0x\[0-9a-f]+, 0x\[0-9a-f]+, 0x\[0-9a-f]+, 0x\[0-9a-f]+\\) = 0x\[0-9a-f]+\r\nmunmap\\(\[0-9\]+\\)\\(0x\[0-9a-f]+, 0x\[0-9a-f]+\\) = 0x\[0-9a-f]+\r\nclose\\(\[0-9\]+\\)\\(0x\[0-9a-f]+\\) = 0x\[0-9a-f]+\r\ndup\\(\[0-9\]+\\)\\(0x\[0-9a-f]+, 0x\[0-9a-f]+, 0x\[0-9a-f]+, 0x\[0-9a-f]+, 0x\[0-9a-f]+, 0x\[0-9a-f]+\\) = 0x\[0-9a-f]+\r\nbad_syscall\\(-?\[0-9\]+\\)\\(0x\[0-9a-f]+, 0x\[0-9a-f]+, 0x\[0-9a-f]+, 0x\[0-9a-f]+, 0x\[0-9a-f]+, 0x\[0-9a-f]+\\) = 0x\[0-9a-f]+\r\nsystemtap test success\r\n"
+
+# For first pass, force 64-bit compilation for 64-bit systems. Add
+# any other 64-bit architecture you want tested below.
+#
+# To find tcl's platform name for your machine, run the following:
+# echo "puts $::tcl_platform(machine)" | tclsh
+
+switch -regexp $::tcl_platform(machine) {
+ ^ia64$ {
+ set do_64_bit_pass 1
+ set flags ""
+ }
+ ^(x86_64|ppc64|s390x)$ {
+ set do_64_bit_pass 1
+ set flags "additional_flags=-m64"
+ }
+ default {
+ set do_64_bit_pass 0
+ }
+}
+
+if {$do_64_bit_pass} {
+ set testname "64_BIT_UTRACE_SYSCALL_ARGS"
+ if {![installtest_p]} { untested $testname; continue }
+ if {![utrace_p]} { untested $testname; continue }
+ send_log "Testing ${testname}\n"
+
+ # Compile our test program.
+ set res [target_compile $srcpath $exepath executable $flags]
+ if { $res != "" } {
+ verbose "target_compile for $exepath failed: $res" 2
+ fail "$testname: unable to compile $srcpath"
+ return
+ }
+
+ # Run the test.
+ stap_run $testname no_load $output_string -g $stppath -c $exepath
+
+ catch {exec rm -f $exepath foobar}
+}
+
+# The second pass is for systems that support 32-bit executables
+# (either exclusively or in addition to 64-bit executables).
+set do_32_bit_pass 1
+switch -regexp $::tcl_platform(machine) {
+ ^(x86_64|ppc64)$ {
+ set flags "additional_flags=-m32"
+ }
+ ^s390x$ {
+ set flags "additional_flags=-m31"
+ }
+ ^ia64$ {
+ set do_32_bit_pass 0
+ }
+}
+
+if {$do_32_bit_pass} {
+ set testname "32_BIT_UTRACE_SYSCALL_ARGS"
+ if {![installtest_p]} { untested $testname; continue }
+ if {![utrace_p]} { untested $testname; continue }
+ send_log "Testing ${testname}\n"
+
+ # Compile our test program
+ set res [target_compile $srcpath $exepath executable $flags]
+ if { $res != "" } {
+ verbose "target_compile for $exepath failed: $res" 2
+ fail "$testname: unable to compile $srcpath"
+ return
+ }
+
+ stap_run $testname no_load $output_string -g $stppath -c $exepath
+
+ catch {exec rm -f $exepath foobar}
+}
diff --git a/testsuite/systemtap.base/utrace_syscall_args.stp b/testsuite/systemtap.base/utrace_syscall_args.stp
new file mode 100644
index 00000000..166e1ace
--- /dev/null
+++ b/testsuite/systemtap.base/utrace_syscall_args.stp
@@ -0,0 +1,366 @@
+%{
+#include "syscall.h"
+%}
+
+function mmap_syscall_no:long () %{
+ THIS->__retvalue = MMAP_SYSCALL_NO(current); /* pure */
+%}
+function mmap2_syscall_no:long () %{
+ THIS->__retvalue = MMAP2_SYSCALL_NO(current); /* pure */
+%}
+function munmap_syscall_no:long () %{
+ THIS->__retvalue = MUNMAP_SYSCALL_NO(current); /* pure */
+%}
+
+global syscalls_seen = 0
+global failures = 0
+
+global mmap_found = 0
+global mmap_args[10]
+
+global munmap_found = 0
+global munmap_args[10]
+
+global close_found = 0
+global close_args[10]
+
+global dup_found = 0
+global dup_args[10]
+
+global bad_syscall_found = 0
+global bad_syscall_args[10]
+
+probe begin
+{
+ printf("systemtap starting probe\n")
+}
+
+probe syscall.open {
+ if (filename == "foobar") {
+ syscalls_seen += 1
+ }
+}
+
+probe process("utrace_syscall_args").syscall {
+ if (syscalls_seen >= 2) {
+ syscalls_seen += 1
+
+ # We skip the fstat() syscall, which is the 1st syscall after
+ # the open() by not looking at 'syscalls_seen == 3'.
+
+ if (syscalls_seen == 4 && ($syscall == mmap_syscall_no()
+ || $syscall == mmap2_syscall_no())) {
+ mmap_found = 1
+ mmap_args[0] = $syscall
+ mmap_args[1] = $arg1
+ mmap_args[2] = $arg2
+ mmap_args[3] = $arg3
+ mmap_args[4] = $arg4
+ mmap_args[5] = $arg5
+ mmap_args[6] = $arg6
+
+%(arch == "s390x" %?
+ # s390 requires this for mmap. Verified by running:
+ # # strace strace utrace_syscall_args
+ addr = mmap_args[1]
+ mmap_args[1] = user_long(addr)
+ addr += 8
+ mmap_args[2] = user_long(addr)
+ addr += 8
+ mmap_args[3] = user_long(addr)
+ addr += 8
+ mmap_args[4] = user_long(addr)
+ addr += 8
+ mmap_args[5] = user_long(addr)
+ addr += 8
+ mmap_args[6] = user_long(addr)
+%)
+ }
+ else if (syscalls_seen == 5 && $syscall == munmap_syscall_no()) {
+ munmap_found = 1
+ munmap_args[0] = $syscall
+ munmap_args[1] = $arg1
+ munmap_args[2] = $arg2
+ }
+ else if (syscalls_seen == 6) {
+ close_found = 1
+ close_args[0] = $syscall
+ close_args[1] = $arg1
+ }
+ else if (syscalls_seen == 7) {
+ dup_found = 1
+ dup_args[0] = $syscall
+ dup_args[1] = $arg1
+ dup_args[2] = $arg2
+ dup_args[3] = $arg3
+ dup_args[4] = $arg4
+ dup_args[5] = $arg5
+ dup_args[6] = $arg6
+ }
+ else if (syscalls_seen == 8) {
+ bad_syscall_found = 1
+ bad_syscall_args[0] = $syscall
+ bad_syscall_args[1] = $arg1
+ bad_syscall_args[2] = $arg2
+ bad_syscall_args[3] = $arg3
+ bad_syscall_args[4] = $arg4
+ bad_syscall_args[5] = $arg5
+ bad_syscall_args[6] = $arg6
+ }
+ }
+}
+probe process("utrace_syscall_args").syscall.return {
+ if (syscalls_seen >= 4) {
+ if (syscalls_seen == 4) {
+ mmap_args[7] = $return
+ }
+ else if (syscalls_seen == 5) {
+ munmap_args[3] = $return
+ }
+ else if (syscalls_seen == 6) {
+ close_args[2] = $return
+ }
+ else if (syscalls_seen == 7) {
+ dup_args[7] = $return
+ }
+ else if (syscalls_seen == 8) {
+ bad_syscall_args[7] = $return
+ syscalls_seen = 0
+ }
+ }
+}
+
+probe end
+{
+ printf("systemtap ending probe\n")
+
+ # print mmap info
+ if (mmap_found == 0) {
+ printf("error: no mmap system call found\n")
+ failures += 1
+ }
+ else {
+ printf("mmap(%d)(0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x) = 0x%x\n",
+ mmap_args[0], mmap_args[1], mmap_args[2], mmap_args[3],
+ mmap_args[4], mmap_args[5], mmap_args[6], mmap_args[7])
+
+ # Validate arguments. We can only check certain arguments.
+ # It is possible that mmap's 'prot' and 'flags' arguments
+ # could vary per platform, so we'll ignore them.
+ if (mmap_args[1] != 0) {
+ failures += 1
+ printf("mmap bad arg 1: 0x%x vs 0x0\n", mmap_args[1])
+ }
+ if (mmap_args[2] != 0x406) {
+ failures += 1
+ printf("mmap bad arg 2: 0x%x vs 0x406\n", mmap_args[2])
+ }
+ if (mmap_args[6] != 0) {
+ failures += 1
+ printf("mmap bad arg 6: 0x%x vs 0x0\n", mmap_args[6])
+ }
+ }
+
+ # print munmap info
+ if (munmap_found == 0) {
+ printf("error: no munmap system call found\n")
+ failures += 1
+ }
+ else if (munmap_found == 0 || mmap_found == 0) {
+ printf("error: no munmap/mmap system call found\n")
+ failures += 1
+ }
+ else {
+ printf("munmap(%d)(0x%x, 0x%x) = 0x%x\n",
+ munmap_args[0], munmap_args[1], munmap_args[2], munmap_args[3])
+
+ # Validate arguments. munmap()'s first argument should be the
+ # same as the mmap() return value.
+ if (munmap_args[1] != mmap_args[7]) {
+ failures += 1
+ printf("munmap bad arg 1: 0x%x vs 0x%x\n", munmap_args[1],
+ mmap_args[7])
+ }
+ if (munmap_args[2] != mmap_args[2]) {
+ failures += 1
+ printf("munmap bad arg 2: 0x%x vs 0x%x\n", munmap_args[2],
+ mmap_args[2])
+ }
+ # Validate return value
+ if (munmap_args[7] != 0) {
+ failures += 1
+ printf("munmap bad return value: 0x%x vs 0x0\n", munmap_args[7])
+ }
+ }
+
+ # print close info
+ if (close_found == 0) {
+ printf("error: no close system call found\n")
+ failures += 1
+ }
+ else if (close_found == 1) {
+ printf("close(%d)(0x%x) = 0x%x\n",
+ close_args[0], close_args[1], close_args[2])
+
+ if (mmap_args[5] != close_args[1]) {
+ failures += 1
+ printf("close bad arg 1: 0x%x vs 0x%x\n",
+ close_args[0], mmap_args[5])
+ }
+ }
+
+ # print dup info
+ if (dup_found == 0) {
+ printf("error: no dup system call found\n")
+ failures += 1
+ }
+ else {
+ printf("dup(%d)(0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x) = 0x%x\n",
+ dup_args[0], dup_args[1], dup_args[2], dup_args[3],
+ dup_args[4], dup_args[5], dup_args[6], dup_args[7])
+
+ # Validate arguments - handle 32-bit vs. 64-bit.
+ if ((dup_args[1] & 0xffffffff00000000) != 0) {
+ if (dup_args[1] != 0xffffffffffffcfc7) {
+ failures += 1
+ printf("dup bad arg 1: 0x%x vs 0xffffffffffffcfc7\n",
+ dup_args[1])
+ }
+ if (dup_args[2] != 0xffffffffffffffff) {
+ failures += 1
+ printf("dup bad arg 2: 0x%x vs 0xffffffffffffffff\n",
+ dup_args[2])
+ }
+ if (dup_args[3] != 0xa5a5a5a5a5a5a5a5) {
+ failures += 1
+ printf("dup bad arg 3: 0x%x vs 0xa5a5a5a5a5a5a5a5\n",
+ dup_args[3])
+ }
+ if (dup_args[4] != 0xf0f0f0f0f0f0f0f0) {
+ failures += 1
+ printf("dup bad arg 4: 0x%x vs 0xf0f0f0f0f0f0f0f0\n",
+ dup_args[4])
+ }
+ if (dup_args[5] != 0x5a5a5a5a5a5a5a5a) {
+ failures += 1
+ printf("dup bad arg 5: 0x%x vs 0x5a5a5a5a5a5a5a5a\n",
+ dup_args[5])
+ }
+ if (dup_args[6] != 0xe38e38e38e38e38e) {
+ failures += 1
+ printf("dup bad arg 6: 0x%x vs 0xe38e38e38e38d38e\n",
+ dup_args[6])
+ }
+ }
+ else {
+ if (dup_args[1] != 0xffffcfc7) {
+ failures += 1
+ printf("dup bad arg 1: 0x%x vs 0xffffcfc7\n", dup_args[1])
+ }
+ if (dup_args[2] != 0xffffffff) {
+ failures += 1
+ printf("dup bad arg 2: 0x%x vs 0xffffffff\n", dup_args[2])
+ }
+ if (dup_args[3] != 0xa5a5a5a5) {
+ failures += 1
+ printf("dup bad arg 3: 0x%x vs 0xa5a5a5a5\n", dup_args[3])
+ }
+ if (dup_args[4] != 0xf0f0f0f0) {
+ failures += 4
+ printf("dup bad arg 4: 0x%x vs 0xf0f0f0f0\n", dup_args[4])
+ }
+ if (dup_args[5] != 0x5a5a5a5a) {
+ failures += 1
+ printf("dup bad arg 5: 0x%x vs 0x5a5a5a5a\n", dup_args[5])
+ }
+ if (dup_args[6] != 0xe38e38e3) {
+ failures += 1
+ printf("dup bad arg 6: 0x%x vs 0xe38e38e3\n", dup_args[6])
+ }
+ }
+ }
+
+ # print bad_syscall info
+ if (bad_syscall_found == 0) {
+ printf("error: no bad_syscall system call found\n")
+ failures += 1
+ }
+ else {
+ printf("bad_syscall(%d)(0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x) = 0x%x\n",
+ bad_syscall_args[0], bad_syscall_args[1], bad_syscall_args[2], bad_syscall_args[3],
+ bad_syscall_args[4], bad_syscall_args[5], bad_syscall_args[6], bad_syscall_args[7])
+
+ # Validate arguments - handle 32-bit vs. 64-bit.
+ if (bad_syscall_args[1] > 0xffffffff) {
+ if (bad_syscall_args[1] != 0x1c71c71c71c71c71) {
+ failures += 1
+ printf("bad_syscall bad arg 1: 0x%x vs 0x1c71c71c71c71c71\n",
+ bad_syscall_args[1])
+ }
+ if (bad_syscall_args[2] != 0x0f0f0f0f0f0f0f0f) {
+ failures += 1
+ printf("bad_syscall bad arg 2: 0x%x vs 0x0f0f0f0f0f0f0f0f\n",
+ bad_syscall_args[2])
+ }
+ if (bad_syscall_args[3] != 0xdb6db6db6db6db6d) {
+ failures += 1
+ printf("bad_syscall bad arg 3: 0x%x vs 0xdb6db6db6db6db6d\n",
+ bad_syscall_args[3])
+ }
+ if (bad_syscall_args[4] != 0x2492492492492492) {
+ failures += 1
+ printf("bad_syscall bad arg 4: 0x%x vs 0x2492492492492492\n",
+ bad_syscall_args[4])
+ }
+ if (bad_syscall_args[5] != 0xad6b5ad6b5ad6b5a) {
+ failures += 1
+ printf("bad_syscall bad arg 5: 0x%x vs 0xad6b5ad6b5ad6b5a\n",
+ bad_syscall_args[5])
+ }
+ if (bad_syscall_args[6] != 0xdef7ddef7ddef7dd) {
+ failures += 1
+ printf("bad_syscall bad arg 6: 0x%x vs 0xdef7ddef7ddef7dd\n",
+ bad_syscall_args[6])
+ }
+ }
+ else {
+ if (bad_syscall_args[1] != 0x1c71c71c) {
+ failures += 1
+ printf("bad_syscall bad arg 1: 0x%x vs 0x1c71c71c\n",
+ bad_syscall_args[1])
+ }
+ if (bad_syscall_args[2] != 0x0f0f0f0f) {
+ failures += 1
+ printf("bad_syscall bad arg 2: 0x%x vs 0x0f0f0f0f\n",
+ bad_syscall_args[2])
+ }
+ if (bad_syscall_args[3] != 0xdb6db6db) {
+ failures += 1
+ printf("bad_syscall bad arg 3: 0x%x vs 0xdb6db6db\n",
+ bad_syscall_args[3])
+ }
+ if (bad_syscall_args[4] != 0x24924924) {
+ failures += 4
+ printf("bad_syscall bad arg 4: 0x%x vs 0x24924924\n",
+ bad_syscall_args[4])
+ }
+ if (bad_syscall_args[5] != 0xad6b5ad6) {
+ failures += 1
+ printf("bad_syscall bad arg 5: 0x%x vs 0xad6b5ad6\n",
+ bad_syscall_args[5])
+ }
+ if (bad_syscall_args[6] != 0xdef7ddef) {
+ failures += 1
+ printf("bad_syscall bad arg 6: 0x%x vs 0xdef7ddef\n",
+ bad_syscall_args[6])
+ }
+ }
+ }
+
+ if (failures == 0) {
+ printf("systemtap test success\n")
+ }
+ else {
+ printf("systemtap test failure\n")
+ }
+}
diff --git a/testsuite/systemtap.examples/index.html b/testsuite/systemtap.examples/index.html
index a03b8dcc..a2dc7d5c 100644
--- a/testsuite/systemtap.examples/index.html
+++ b/testsuite/systemtap.examples/index.html
@@ -88,6 +88,9 @@ keywords: <a href="keyword-index.html#MEMORY">MEMORY</a> <br>
<li><a href="memory/pfaults.stp">memory/pfaults.stp</a> - Generate Log of Major and Minor Page Faults<br>
keywords: <a href="keyword-index.html#MEMORY">MEMORY</a> <br>
<p>The pfaults.stp script generates a simple log for each major and minor page fault that occurs on the system. Each line contains a timestamp (in microseconds) when the page fault servicing was completed, the pid of the process, the address of the page fault, the type of access (read or write), the type of fault (major or minor), and the elapsed time for page fault. This log can be examined to determine where the page faults are occuring.</p></li>
+<li><a href="network/dropwatch.stp">network/dropwatch.stp</a> - Watch Where Socket Buffers are Freed in the Kernel<br>
+keywords: <a href="keyword-index.html#NETWORK">NETWORK</a> <a href="keyword-index.html#TRACEPOINT">TRACEPOINT</a> <a href="keyword-index.html#BUFFER">BUFFER</a> <a href="keyword-index.html#FREE">FREE</a> <br>
+<p>Every five seconds the dropwatch.stp script lists the number of socket buffers freed at locations in the kernel.</p></li>
<li><a href="network/nettop.stp">network/nettop.stp</a> - Periodic Listing of Processes Using Network Interfaces<br>
keywords: <a href="keyword-index.html#NETWORK">NETWORK</a> <a href="keyword-index.html#TRAFFIC">TRAFFIC</a> <a href="keyword-index.html#PER-PROCESS">PER-PROCESS</a> <br>
<p>Every five seconds the nettop.stp script prints out a list of processed (PID and command) with the number of packets sent/received and the amount of data sent/received by the process during that interval.</p></li>
diff --git a/testsuite/systemtap.examples/index.txt b/testsuite/systemtap.examples/index.txt
index d24232e7..2f85628a 100644
--- a/testsuite/systemtap.examples/index.txt
+++ b/testsuite/systemtap.examples/index.txt
@@ -152,6 +152,13 @@ keywords: memory
determine where the page faults are occuring.
+network/dropwatch.stp - Watch Where Socket Buffers are Freed in the Kernel
+keywords: network tracepoint buffer free
+
+ Every five seconds the dropwatch.stp script lists the number of
+ socket buffers freed at locations in the kernel.
+
+
network/nettop.stp - Periodic Listing of Processes Using Network Interfaces
keywords: network traffic per-process
diff --git a/testsuite/systemtap.examples/keyword-index.html b/testsuite/systemtap.examples/keyword-index.html
index e65ed19d..473c0091 100644
--- a/testsuite/systemtap.examples/keyword-index.html
+++ b/testsuite/systemtap.examples/keyword-index.html
@@ -39,7 +39,7 @@
</ul>
<h2>Examples by Keyword</h2>
-<p><tt><a href="#BACKTRACE">BACKTRACE</a> <a href="#CALLGRAPH">CALLGRAPH</a> <a href="#CPU">CPU</a> <a href="#DISK">DISK</a> <a href="#FORMAT">FORMAT</a> <a href="#FUNCTIONS">FUNCTIONS</a> <a href="#FUTEX">FUTEX</a> <a href="#GRAPH">GRAPH</a> <a href="#INTERRUPT">INTERRUPT</a> <a href="#IO">IO</a> <a href="#LOCKING">LOCKING</a> <a href="#MEMORY">MEMORY</a> <a href="#NETWORK">NETWORK</a> <a href="#PER-PROCESS">PER-PROCESS</a> <a href="#PROCESS">PROCESS</a> <a href="#PROFILING">PROFILING</a> <a href="#READ">READ</a> <a href="#SCHEDULER">SCHEDULER</a> <a href="#SIGNALS">SIGNALS</a> <a href="#SIMPLE">SIMPLE</a> <a href="#SLEEP">SLEEP</a> <a href="#SOCKET">SOCKET</a> <a href="#SYSCALL">SYSCALL</a> <a href="#TCP">TCP</a> <a href="#TIME">TIME</a> <a href="#TRACE">TRACE</a> <a href="#TRAFFIC">TRAFFIC</a> <a href="#USE">USE</a> <a href="#WAIT4">WAIT4</a> <a href="#WRITE">WRITE</a> </tt></p>
+<p><tt><a href="#BACKTRACE">BACKTRACE</a> <a href="#BUFFER">BUFFER</a> <a href="#CALLGRAPH">CALLGRAPH</a> <a href="#CPU">CPU</a> <a href="#DISK">DISK</a> <a href="#FORMAT">FORMAT</a> <a href="#FREE">FREE</a> <a href="#FUNCTIONS">FUNCTIONS</a> <a href="#FUTEX">FUTEX</a> <a href="#GRAPH">GRAPH</a> <a href="#INTERRUPT">INTERRUPT</a> <a href="#IO">IO</a> <a href="#LOCKING">LOCKING</a> <a href="#MEMORY">MEMORY</a> <a href="#NETWORK">NETWORK</a> <a href="#PER-PROCESS">PER-PROCESS</a> <a href="#PROCESS">PROCESS</a> <a href="#PROFILING">PROFILING</a> <a href="#READ">READ</a> <a href="#SCHEDULER">SCHEDULER</a> <a href="#SIGNALS">SIGNALS</a> <a href="#SIMPLE">SIMPLE</a> <a href="#SLEEP">SLEEP</a> <a href="#SOCKET">SOCKET</a> <a href="#SYSCALL">SYSCALL</a> <a href="#TCP">TCP</a> <a href="#TIME">TIME</a> <a href="#TRACE">TRACE</a> <a href="#TRACEPOINT">TRACEPOINT</a> <a href="#TRAFFIC">TRAFFIC</a> <a href="#USE">USE</a> <a href="#WAIT4">WAIT4</a> <a href="#WRITE">WRITE</a> </tt></p>
<h3><a name="BACKTRACE">BACKTRACE</a></h3>
<ul>
<li><a href="interrupt/scf.stp">interrupt/scf.stp</a> - Tally Backtraces for Inter-Processor Interrupt (IPI)<br>
@@ -52,6 +52,12 @@ keywords: <a href="keyword-index.html#IO">IO</a> <a href="keyword-index.html#BAC
keywords: <a href="keyword-index.html#IO">IO</a> <a href="keyword-index.html#SCHEDULER">SCHEDULER</a> <a href="keyword-index.html#BACKTRACE">BACKTRACE</a> <br>
<p>The script monitors the time that threads spend waiting for IO operations (in "D" state) in the wait_for_completion function. If a thread spends over 10ms, its name and backtrace is printed, and later so is the total delay.</p></li>
</ul>
+<h3><a name="BUFFER">BUFFER</a></h3>
+<ul>
+<li><a href="network/dropwatch.stp">network/dropwatch.stp</a> - Watch Where Socket Buffers are Freed in the Kernel<br>
+keywords: <a href="keyword-index.html#NETWORK">NETWORK</a> <a href="keyword-index.html#TRACEPOINT">TRACEPOINT</a> <a href="keyword-index.html#BUFFER">BUFFER</a> <a href="keyword-index.html#FREE">FREE</a> <br>
+<p>Every five seconds the dropwatch.stp script lists the number of socket buffers freed at locations in the kernel.</p></li>
+</ul>
<h3><a name="CALLGRAPH">CALLGRAPH</a></h3>
<ul>
<li><a href="general/para-callgraph.stp">general/para-callgraph.stp</a> - Callgraph tracing with arguments<br>
@@ -82,6 +88,12 @@ keywords: <a href="keyword-index.html#FORMAT">FORMAT</a> <br>
keywords: <a href="keyword-index.html#FORMAT">FORMAT</a> <br>
<p>The script prints a table showing the available attributes (bold, underline, and inverse) with color combinations for the ans_set_color3() function in the ansi.stp tapset.</p></li>
</ul>
+<h3><a name="FREE">FREE</a></h3>
+<ul>
+<li><a href="network/dropwatch.stp">network/dropwatch.stp</a> - Watch Where Socket Buffers are Freed in the Kernel<br>
+keywords: <a href="keyword-index.html#NETWORK">NETWORK</a> <a href="keyword-index.html#TRACEPOINT">TRACEPOINT</a> <a href="keyword-index.html#BUFFER">BUFFER</a> <a href="keyword-index.html#FREE">FREE</a> <br>
+<p>Every five seconds the dropwatch.stp script lists the number of socket buffers freed at locations in the kernel.</p></li>
+</ul>
<h3><a name="FUNCTIONS">FUNCTIONS</a></h3>
<ul>
<li><a href="profiling/functioncallcount.stp">profiling/functioncallcount.stp</a> - Count Times Functions Called<br>
@@ -150,6 +162,9 @@ keywords: <a href="keyword-index.html#MEMORY">MEMORY</a> <br>
</ul>
<h3><a name="NETWORK">NETWORK</a></h3>
<ul>
+<li><a href="network/dropwatch.stp">network/dropwatch.stp</a> - Watch Where Socket Buffers are Freed in the Kernel<br>
+keywords: <a href="keyword-index.html#NETWORK">NETWORK</a> <a href="keyword-index.html#TRACEPOINT">TRACEPOINT</a> <a href="keyword-index.html#BUFFER">BUFFER</a> <a href="keyword-index.html#FREE">FREE</a> <br>
+<p>Every five seconds the dropwatch.stp script lists the number of socket buffers freed at locations in the kernel.</p></li>
<li><a href="network/nettop.stp">network/nettop.stp</a> - Periodic Listing of Processes Using Network Interfaces<br>
keywords: <a href="keyword-index.html#NETWORK">NETWORK</a> <a href="keyword-index.html#TRAFFIC">TRAFFIC</a> <a href="keyword-index.html#PER-PROCESS">PER-PROCESS</a> <br>
<p>Every five seconds the nettop.stp script prints out a list of processed (PID and command) with the number of packets sent/received and the amount of data sent/received by the process during that interval.</p></li>
@@ -286,6 +301,12 @@ keywords: <a href="keyword-index.html#SYSCALL">SYSCALL</a> <a href="keyword-inde
keywords: <a href="keyword-index.html#TRACE">TRACE</a> <a href="keyword-index.html#CALLGRAPH">CALLGRAPH</a> <br>
<p>Print a timed per-thread callgraph, complete with function parameters and return values. The first parameter names the function probe points to trace. The optional second parameter names the probe points for trigger functions, which acts to enable tracing for only those functions that occur while the current thread is nested within the trigger.</p></li>
</ul>
+<h3><a name="TRACEPOINT">TRACEPOINT</a></h3>
+<ul>
+<li><a href="network/dropwatch.stp">network/dropwatch.stp</a> - Watch Where Socket Buffers are Freed in the Kernel<br>
+keywords: <a href="keyword-index.html#NETWORK">NETWORK</a> <a href="keyword-index.html#TRACEPOINT">TRACEPOINT</a> <a href="keyword-index.html#BUFFER">BUFFER</a> <a href="keyword-index.html#FREE">FREE</a> <br>
+<p>Every five seconds the dropwatch.stp script lists the number of socket buffers freed at locations in the kernel.</p></li>
+</ul>
<h3><a name="TRAFFIC">TRAFFIC</a></h3>
<ul>
<li><a href="network/nettop.stp">network/nettop.stp</a> - Periodic Listing of Processes Using Network Interfaces<br>
diff --git a/testsuite/systemtap.examples/keyword-index.txt b/testsuite/systemtap.examples/keyword-index.txt
index 40b5276f..1d5add5f 100644
--- a/testsuite/systemtap.examples/keyword-index.txt
+++ b/testsuite/systemtap.examples/keyword-index.txt
@@ -30,6 +30,15 @@ keywords: io scheduler backtrace
so is the total delay.
+= BUFFER =
+
+network/dropwatch.stp - Watch Where Socket Buffers are Freed in the Kernel
+keywords: network tracepoint buffer free
+
+ Every five seconds the dropwatch.stp script lists the number of
+ socket buffers freed at locations in the kernel.
+
+
= CALLGRAPH =
general/para-callgraph.stp - Callgraph tracing with arguments
@@ -88,6 +97,15 @@ keywords: format
ans_set_color3() function in the ansi.stp tapset.
+= FREE =
+
+network/dropwatch.stp - Watch Where Socket Buffers are Freed in the Kernel
+keywords: network tracepoint buffer free
+
+ Every five seconds the dropwatch.stp script lists the number of
+ socket buffers freed at locations in the kernel.
+
+
= FUNCTIONS =
profiling/functioncallcount.stp - Count Times Functions Called
@@ -251,6 +269,13 @@ keywords: memory
= NETWORK =
+network/dropwatch.stp - Watch Where Socket Buffers are Freed in the Kernel
+keywords: network tracepoint buffer free
+
+ Every five seconds the dropwatch.stp script lists the number of
+ socket buffers freed at locations in the kernel.
+
+
network/nettop.stp - Periodic Listing of Processes Using Network Interfaces
keywords: network traffic per-process
@@ -594,6 +619,15 @@ keywords: trace callgraph
the trigger.
+= TRACEPOINT =
+
+network/dropwatch.stp - Watch Where Socket Buffers are Freed in the Kernel
+keywords: network tracepoint buffer free
+
+ Every five seconds the dropwatch.stp script lists the number of
+ socket buffers freed at locations in the kernel.
+
+
= TRAFFIC =
network/nettop.stp - Periodic Listing of Processes Using Network Interfaces
diff --git a/testsuite/systemtap.examples/network/dropwatch.meta b/testsuite/systemtap.examples/network/dropwatch.meta
new file mode 100644
index 00000000..176ba236
--- /dev/null
+++ b/testsuite/systemtap.examples/network/dropwatch.meta
@@ -0,0 +1,13 @@
+title: Watch Where Socket Buffers are Freed in the Kernel
+name: dropwatch.stp
+version: 1.0
+author: Neil Horman
+keywords: network tracepoint buffer free
+subsystem: network
+status: production
+exit: user-controlled
+output: timed
+scope: system-wide
+description: Every five seconds the dropwatch.stp script lists the number of socket buffers freed at locations in the kernel.
+test_check: stap -p4 dropwatch.stp
+test_installcheck: stap dropwatch.stp -c "sleep 1"
diff --git a/testsuite/systemtap.examples/network/dropwatch.stp b/testsuite/systemtap.examples/network/dropwatch.stp
new file mode 100755
index 00000000..79d50a4e
--- /dev/null
+++ b/testsuite/systemtap.examples/network/dropwatch.stp
@@ -0,0 +1,30 @@
+#! /usr/bin/env stap
+
+############################################################
+# Dropwatch.stp
+# Author: Neil Horman <nhorman@redhat.com>
+# An example script to mimic the behavior of the dropwatch utility
+# http://fedorahosted.org/dropwatch
+############################################################
+
+# Array to hold the list of drop points we find
+global locations
+
+# Note when we turn the monitor on and off
+probe begin { printf("Monitoring for dropped packets\n") }
+probe end { printf("Stopping dropped packet monitor\n") }
+
+# increment a drop counter for every location we drop at
+probe kernel.trace("kfree_skb") { locations[$location] <<< 1 }
+
+# Every 5 seconds report our drop locations
+probe timer.sec(5)
+{
+ printf("\n")
+ foreach (l in locations-) {
+ printf("%d packets dropped at location %p\n",
+ @count(locations[l]), l)
+ }
+ delete locations
+}
+
diff --git a/translate.cxx b/translate.cxx
index a22e9a5b..46fea6e7 100644
--- a/translate.cxx
+++ b/translate.cxx
@@ -4532,9 +4532,14 @@ dump_unwindsyms (Dwfl_Module *m,
}
}
- // Use end as sanity check when resolving symbol addresses.
- Dwarf_Addr end;
- dwfl_module_info (m, NULL, NULL, &end, NULL, NULL, NULL, NULL);
+ // Get the canonical path of the main file for comparison at runtime.
+ // When given directly by the user through -d or in case of the kernel
+ // name and path might differ. path should be used for matching.
+ // Use end as sanity check when resolving symbol addresses and to
+ // calculate size for .dynamic and .absolute sections.
+ const char *mainfile;
+ Dwarf_Addr start, end;
+ dwfl_module_info (m, NULL, &start, &end, NULL, NULL, &mainfile, NULL);
// Look up the relocation basis for symbols
int n = dwfl_module_relocations (m);
@@ -4545,7 +4550,8 @@ dump_unwindsyms (Dwfl_Module *m,
// XXX: unfortunate duplication with tapsets.cxx:emit_address()
typedef map<Dwarf_Addr,const char*> addrmap_t; // NB: plain map, sorted by address
- vector<string> seclist; // encountered relocation bases (section names)
+ vector<pair<string,unsigned> > seclist; // encountered relocation bases
+ // (section names and sizes)
map<unsigned, addrmap_t> addrmap; // per-relocation-base sorted addrmap
Dwarf_Addr extra_offset = 0;
@@ -4588,11 +4594,11 @@ dump_unwindsyms (Dwfl_Module *m,
|| sym.st_value < base)) // before first section.
{
Dwarf_Addr sym_addr = sym.st_value;
+ Dwarf_Addr save_addr = sym_addr;
const char *secname = NULL;
if (n > 0) // only try to relocate if there exist relocation bases
{
- Dwarf_Addr save_addr = sym_addr;
int ki = dwfl_module_relocate_address (m, &sym_addr);
dwfl_assert ("dwfl_module_relocate_address", ki >= 0);
secname = dwfl_module_relocation_info (m, ki, NULL);
@@ -4645,10 +4651,31 @@ dump_unwindsyms (Dwfl_Module *m,
// Compute our section number
unsigned secidx;
for (secidx=0; secidx<seclist.size(); secidx++)
- if (seclist[secidx]==secname) break;
+ if (seclist[secidx].first==secname) break;
if (secidx == seclist.size()) // new section name
- seclist.push_back (secname);
+ {
+ // absolute, dynamic or kernel have just one relocation
+ // section, which covers the whole module address range.
+ unsigned size;
+ if (secidx == 0
+ && (n == 0
+ || (n == 1
+ && (strcmp(secname, ".dynamic") == 0
+ || strcmp(secname, "_stext") == 0))))
+ size = end - start;
+ else
+ {
+ Dwarf_Addr b;
+ Elf_Scn *scn;
+ GElf_Shdr *shdr, shdr_mem;
+ scn = dwfl_module_address_section (m, &save_addr, &b);
+ assert (scn != NULL);
+ shdr = gelf_getshdr(scn, &shdr_mem);
+ size = shdr->sh_size;
+ }
+ seclist.push_back (make_pair(secname,size));
+ }
(addrmap[secidx])[sym_addr] = name;
}
@@ -4709,12 +4736,17 @@ dump_unwindsyms (Dwfl_Module *m,
}
c->output << "static struct _stp_section _stp_module_" << stpmod_idx<< "_sections[] = {\n";
+ // For the kernel, executables (ET_EXEC) or shared libraries (ET_DYN)
+ // there is just one section that covers the whole address space of
+ // the module. For kernel modules (ET_REL) there can be multiple
+ // sections that get relocated separately.
for (unsigned secidx = 0; secidx < seclist.size(); secidx++)
{
c->output << "{\n"
- << ".name = " << lex_cast_qstring(seclist[secidx]) << ",\n"
+ << ".name = " << lex_cast_qstring(seclist[secidx].first) << ",\n"
+ << ".size = 0x" << hex << seclist[secidx].second << dec << ",\n"
<< ".symbols = _stp_module_" << stpmod_idx << "_symbols_" << secidx << ",\n"
- << ".num_symbols = sizeof(_stp_module_" << stpmod_idx << "_symbols_" << secidx << ")/sizeof(struct _stp_symbol)\n"
+ << ".num_symbols = " << addrmap[secidx].size() << "\n"
<< "},\n";
}
c->output << "};\n";
@@ -4722,11 +4754,6 @@ dump_unwindsyms (Dwfl_Module *m,
c->output << "static struct _stp_module _stp_module_" << stpmod_idx << " = {\n";
c->output << ".name = " << lex_cast_qstring (modname) << ", \n";
- // Get the canonical path of the main file for comparison at runtime.
- // When given directly by the user through -d or in case of the kernel
- // name and path might differ. path should be used for matching.
- const char *mainfile;
- dwfl_module_info (m, NULL, NULL, NULL, NULL, NULL, &mainfile, NULL);
mainfile = canonicalize_file_name(mainfile);
c->output << ".path = " << lex_cast_qstring (mainfile) << ",\n";
@@ -4791,6 +4818,7 @@ dump_unwindsyms (Dwfl_Module *m,
// Emit symbol table & unwind data, plus any calls needed to register
// them with the runtime.
+void emit_symbol_data_done (unwindsym_dump_context*, systemtap_session&);
void
emit_symbol_data (systemtap_session& s)
@@ -4803,6 +4831,14 @@ emit_symbol_data (systemtap_session& s)
unwindsym_dump_context ctx = { s, kallsyms_out, 0, s.unwindsym_modules };
+ // Micro optimization, mainly to speed up tiny regression tests
+ // using just begin probe.
+ if (s.unwindsym_modules.size () == 0)
+ {
+ emit_symbol_data_done(&ctx, s);
+ return;
+ }
+
// XXX: copied from tapsets.cxx dwflpp::, sadly
static const char *debuginfo_path_arr = "+:.debug:/usr/lib/debug:build";
static const char *debuginfo_env_arr = getenv("SYSTEMTAP_DEBUGINFO_PATH");
@@ -4861,7 +4897,8 @@ emit_symbol_data (systemtap_session& s)
{
NULL, /* dwfl_linux_kernel_find_elf, */
dwfl_standard_find_debuginfo,
- dwfl_offline_section_address,
+ NULL, /* ET_REL not supported for user space, only ET_EXEC and ET_DYN.
+ dwfl_offline_section_address, */
(char **) & debuginfo_path
};
@@ -4894,20 +4931,25 @@ emit_symbol_data (systemtap_session& s)
dwfl_end(dwfl);
}
+ emit_symbol_data_done (&ctx, s);
+}
+void
+emit_symbol_data_done (unwindsym_dump_context *ctx, systemtap_session& s)
+{
// Print out a definition of the runtime's _stp_modules[] globals.
- kallsyms_out << "\n";
- kallsyms_out << "static struct _stp_module *_stp_modules [] = {\n";
- for (unsigned i=0; i<ctx.stp_module_index; i++)
+ ctx->output << "\n";
+ ctx->output << "static struct _stp_module *_stp_modules [] = {\n";
+ for (unsigned i=0; i<ctx->stp_module_index; i++)
{
- kallsyms_out << "& _stp_module_" << i << ",\n";
+ ctx->output << "& _stp_module_" << i << ",\n";
}
- kallsyms_out << "};\n";
- kallsyms_out << "static unsigned _stp_num_modules = " << ctx.stp_module_index << ";\n";
+ ctx->output << "};\n";
+ ctx->output << "static unsigned _stp_num_modules = " << ctx->stp_module_index << ";\n";
// Some nonexistent modules may have been identified with "-d". Note them.
- for (set<string>::iterator it = ctx.undone_unwindsym_modules.begin();
- it != ctx.undone_unwindsym_modules.end();
+ for (set<string>::iterator it = ctx->undone_unwindsym_modules.begin();
+ it != ctx->undone_unwindsym_modules.end();
it ++)
{
s.print_warning ("missing unwind/symbol data for module '" + (*it) + "'");