diff options
author | Dave Brolley <brolley@redhat.com> | 2009-07-16 11:29:59 -0400 |
---|---|---|
committer | Dave Brolley <brolley@redhat.com> | 2009-07-16 11:29:59 -0400 |
commit | 451535e8b0b1018a51206283d89d233d37ea8621 (patch) | |
tree | 766a67ed307a252e20b0fe077cb7f52505b54571 | |
parent | aa3ed413744f9830c272dbcedc3fffd1974b53ea (diff) | |
parent | f190c8d7aab46fbd15e33493cec7933c93d3c912 (diff) | |
download | systemtap-steved-451535e8b0b1018a51206283d89d233d37ea8621.tar.gz systemtap-steved-451535e8b0b1018a51206283d89d233d37ea8621.tar.xz systemtap-steved-451535e8b0b1018a51206283d89d233d37ea8621.zip |
Merge branch 'master' of git://sources.redhat.com/git/systemtap
33 files changed, 515 insertions, 52 deletions
@@ -8866,6 +8866,8 @@ if test $enable_translator == "yes"; then fi ac_config_files="$ac_config_files run-staprun" +ac_config_files="$ac_config_files dtrace" + cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure @@ -9597,6 +9599,7 @@ do "initscript/systemtap") CONFIG_FILES="$CONFIG_FILES initscript/systemtap" ;; "run-stap") CONFIG_FILES="$CONFIG_FILES run-stap" ;; "run-staprun") CONFIG_FILES="$CONFIG_FILES run-staprun" ;; + "dtrace") CONFIG_FILES="$CONFIG_FILES dtrace" ;; *) { { $as_echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 $as_echo "$as_me: error: invalid argument: $ac_config_target" >&2;} @@ -10386,6 +10389,7 @@ done ;; "run-stap":F) chmod +x run-stap ;; "run-staprun":F) chmod +x run-staprun ;; + "dtrace":F) chmod +x dtrace ;; esac done # for ac_tag diff --git a/configure.ac b/configure.ac index 747c549b..747aac97 100644 --- a/configure.ac +++ b/configure.ac @@ -586,6 +586,7 @@ if test $enable_translator == "yes"; then AC_CONFIG_FILES([run-stap], [chmod +x run-stap]) fi AC_CONFIG_FILES([run-staprun], [chmod +x run-staprun]) +AC_CONFIG_FILES([dtrace], [chmod +x dtrace]) AC_OUTPUT if test "${prefix}" = "/usr/local"; then diff --git a/doc/SystemTap_Beginners_Guide/en-US/Book_Info.xml b/doc/SystemTap_Beginners_Guide/en-US/Book_Info.xml index 8128fb07..5dfeeee2 100644 --- a/doc/SystemTap_Beginners_Guide/en-US/Book_Info.xml +++ b/doc/SystemTap_Beginners_Guide/en-US/Book_Info.xml @@ -4,7 +4,7 @@ <bookinfo id="SystemTap_Beginners_Guide"> <title>SystemTap Beginners Guide</title> - <subtitle condition="RedHat">Introduction to SystemTap (for Red Hat Enterprise Linux 5.3)</subtitle> + <subtitle condition="RedHat">Introduction to SystemTap (for Red Hat Enterprise Linux 5.4)</subtitle> <subtitle condition="fedora">Introduction to SystemTap (for Fedora 10)</subtitle> <edition>2.0</edition> diff --git a/doc/SystemTap_Beginners_Guide/en-US/Scripts.xml b/doc/SystemTap_Beginners_Guide/en-US/Scripts.xml index 88aa42ab..d6f7733f 100644 --- a/doc/SystemTap_Beginners_Guide/en-US/Scripts.xml +++ b/doc/SystemTap_Beginners_Guide/en-US/Scripts.xml @@ -377,6 +377,32 @@ probe kernel.function("*@net/socket.c").return { } </varlistentry> <varlistentry> + <term>kernel.trace("<replaceable>tracepoint</replaceable>")</term> + <listitem> +<indexterm><primary>tracepoint</primary></indexterm> +<indexterm> +<primary>Events</primary> +<secondary><command>kernel.trace("<replaceable>tracepoint</replaceable>")</command></secondary> +</indexterm> + +<indexterm> +<primary><command>kernel.trace("<replaceable>tracepoint</replaceable>")</command></primary> +<secondary>Events</secondary> +</indexterm> + <para> + The static probe for <replaceable>tracepoint</replaceable>. + Recent kernels (2.6.30 and newer) + include instrumentation for specific events in the kernel. These + events are statically marked with tracepoints. One example of a + tracepoint available in systemtap is + <command>kernel.trace("kfree_skb")</command> which indicates each + time a network buffer is freed in the kernel. + </para> + </listitem> + + </varlistentry> + + <varlistentry> <term>module("<replaceable>module</replaceable>").function("<replaceable>function</replaceable>")</term> <listitem> <indexterm> diff --git a/doc/SystemTap_Beginners_Guide/en-US/Useful_Scripts-dropwatch.xml b/doc/SystemTap_Beginners_Guide/en-US/Useful_Scripts-dropwatch.xml new file mode 100644 index 00000000..c7bee988 --- /dev/null +++ b/doc/SystemTap_Beginners_Guide/en-US/Useful_Scripts-dropwatch.xml @@ -0,0 +1,114 @@ +<?xml version='1.0'?> +<!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +]> + + + <section id="dropwatchsect"> + <title>Monitoring Network Packets Drops in Kernel</title> +<indexterm> +<primary>script examples</primary> +<secondary>network profiling</secondary> +</indexterm> + +<indexterm> +<primary>examples of SystemTap scripts</primary> +<secondary>network profiling</secondary> +</indexterm> + +<indexterm> +<primary>network profiling</primary> +<secondary>examples of SystemTap scripts</secondary> +</indexterm> + + +<remark> + probably http://sourceware.org/systemtap/examples/network/nettop.stp +</remark> + +<indexterm> +<primary>profiling the network</primary> +<secondary>examples of SystemTap scripts</secondary> +</indexterm> + +<indexterm> +<primary>network traffic, monitoring</primary> +<secondary>examples of SystemTap scripts</secondary> +</indexterm> + +<para> +<indexterm><primary>tracepoint</primary></indexterm> +The network stack in Linux +can discard packets for various reasons. Some Linux kernels include a +tracepoint, <command>kernel.trace("kfree_skb")</command>, to allow easy probing +to determine where the packets are discarded. The <xref linkend="dropwatch"/> +script uses that tracepoint trace packet discards. The script summarizes the +locations discarding packets every five seconds as totals number of packets +discarded for each location. +</para> + +<formalpara id="dropwatch"> + <title>dropwatch.stp</title> +<para> +<programlisting> +<xi:include parse="text" href="extras/testsuite/systemtap.examples/network/dropwatch.stp" xmlns:xi="http://www.w3.org/2001/XInclude" /> +</programlisting> +</para> +</formalpara> + +<para> +The <command>kernel.trace("kfree_skb")</command> instruments each of the places +in the kernel that drops network packets. Like probes for functions the +tracepoint probes also have arguments. The +<command>kernel.trace("kfree_skb")</command> has two arguments: a pointer to the +buffer being freed (<command>$skb</command>) and the location in kernel code the +buffer is being freed (<command>$location</command>). +</para> + +<para> +Running the dropwatch.stp script 15 seconds would result in output similar in +<xref linkend="dropwatchoutput"/>. The output lists the number of misses for +tracepoint address and the actual address. +</para> + +<example id="dropwatchoutput"> + <title><xref linkend="dropwatch"/> Sample Output</title> +<screen> +Monitoring for dropped packets + +51 packets dropped at location 0xffffffff8024cd0f +2 packets dropped at location 0xffffffff8044b472 + +51 packets dropped at location 0xffffffff8024cd0f +1 packets dropped at location 0xffffffff8044b472 + +97 packets dropped at location 0xffffffff8024cd0f +1 packets dropped at location 0xffffffff8044b472 +Stopping dropped packet monitor +</screen> +</example> + +<para> +The functions containing the location of the packet drops is determined using +<command>/boot/System.map-`uname -r`</command> file. The System.map file lists +the starting addesses for each function. Below are the sections of the +System.map file containing the addresses in the dropwatch.stp output. The +address 0xffffffff8024cd0f maps to the function +<command>unix_stream_recvmsg</command> and the address 0xffffffff8044b472 maps +to the function <command>arp_rcv</command>. +</para> + +<screen> +[...] +ffffffff8024c5cd T unlock_new_inode +ffffffff8024c5da t unix_stream_sendmsg +ffffffff8024c920 t unix_stream_recvmsg +ffffffff8024cea1 t udp_v4_lookup_longway +[...] +ffffffff8044addc t arp_process +ffffffff8044b360 t arp_rcv +ffffffff8044b487 t parp_redo +ffffffff8044b48c t arp_solicit +[...] +</screen> + </section> + diff --git a/doc/SystemTap_Beginners_Guide/en-US/Useful_SystemTap_Scripts.xml b/doc/SystemTap_Beginners_Guide/en-US/Useful_SystemTap_Scripts.xml index eeab9b27..f9ba4290 100644 --- a/doc/SystemTap_Beginners_Guide/en-US/Useful_SystemTap_Scripts.xml +++ b/doc/SystemTap_Beginners_Guide/en-US/Useful_SystemTap_Scripts.xml @@ -43,6 +43,7 @@ <xi:include href="Useful_Scripts-sockettrace.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> <xi:include href="Useful_Scripts-tcp_connections.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> <xi:include condition="fedora" href="Useful_Scripts-tcpdumplike.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="Useful_Scripts-dropwatch.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> </section> <section id="mainsect-disk"> <title>Disk</title> @@ -179,5 +179,5 @@ if (build_source): f = open(fn,mode='w') f.write("#include \"" + filename + ".h\"\nstatic __dtrace () {}\n") f.close() - call(["gcc", "-fPIC", "-I.", "-g", "-c", fn, "-o", filename + ".o"], shell=False) + call(["gcc", "-fPIC", "-I.", "-I@prefix@/include", "-g", "-c", fn, "-o", filename + ".o"], shell=False) os.remove(fn) @@ -41,6 +41,9 @@ extern "C" { #include <fcntl.h> #include <elfutils/libdwfl.h> #include <elfutils/libdw.h> +#ifdef HAVE_ELFUTILS_VERSION_H +#include <elfutils/version.h> +#endif #include <dwarf.h> #include <elf.h> #include <obstack.h> @@ -1497,10 +1500,11 @@ dwflpp::translate_location(struct obstack *pool, e->tok); } + Dwarf_Op *cfa_ops = get_cfa_ops (pc); return c_translate_location (pool, &loc2c_error, this, &loc2c_emit_address, 1, 0 /* PR9768 */, - pc, expr, len, tail, fb_attr); + pc, expr, len, tail, fb_attr, cfa_ops); } @@ -1684,13 +1688,10 @@ dwflpp::translate_components(struct obstack *pool, break; case DW_TAG_pointer_type: - if (e->components[i].first == target_symbol::comp_literal_array_index) - throw semantic_error ("cannot index pointer", e->tok); - // XXX: of course, we should support this the same way C does, - // by explicit pointer arithmetic etc. PR4166. - c_translate_pointer (pool, 1, 0 /* PR9768*/, die, tail); - break; + if (e->components[i].first != target_symbol::comp_literal_array_index) + break; + /* else fall through as an array access */ case DW_TAG_array_type: if (e->components[i].first == target_symbol::comp_literal_array_index) @@ -1772,6 +1773,11 @@ dwflpp::translate_components(struct obstack *pool, if (dwarf_attr_integrate (die, DW_AT_type, attr_mem) == NULL) throw semantic_error ("cannot get type of field: " + string(dwarf_errmsg (-1)), e->tok); } + + /* For an array index, we need to dereference the final DIE */ + if (e->components.back().first == target_symbol::comp_literal_array_index) + die = dwarf_formref_die (attr_mem, die_mem); + return die; } @@ -1824,6 +1830,21 @@ dwflpp::translate_final_fetch_or_store (struct obstack *pool, typedie = resolve_unqualified_inner_typedie (&typedie_mem, attr_mem, e); typetag = dwarf_tag (typedie); + /* If we're looking for an address, then we can just provide what + we computed to this point, without using a fetch/store. */ + if (e->addressof) + { + if (lvalue) + throw semantic_error ("cannot write to member address", e->tok); + + if (dwarf_hasattr_integrate (die, DW_AT_bit_offset)) + throw semantic_error ("cannot take address of bit-field", e->tok); + + c_translate_addressof (pool, 1, 0, 0, die, tail, "THIS->__retvalue"); + ty = pe_long; + return; + } + /* Then switch behavior depending on the type of fetch/store we want, and the type and pointer-ness of the final location. */ @@ -2083,7 +2104,7 @@ dwflpp::literal_stmt_for_return (Dwarf_Die *scope_die, &loc2c_emit_address, 1, 0 /* PR9768 */, pc, locops, nlocops, - &tail, NULL); + &tail, NULL, NULL); /* Translate the ->bar->baz[NN] parts. */ @@ -2487,5 +2508,36 @@ dwflpp::dwarf_getscopes_cached (Dwarf_Addr pc, Dwarf_Die **scopes) return num_cached_scopes; } +Dwarf_Op * +dwflpp::get_cfa_ops (Dwarf_Addr pc) +{ + Dwarf_Op *cfa_ops = NULL; + +#ifdef _ELFUTILS_PREREQ +#if _ELFUTILS_PREREQ(0,142) + // Try debug_frame first, then fall back on eh_frame. + Dwarf_Addr bias; + Dwarf_CFI *cfi = dwfl_module_dwarf_cfi (module, &bias); + if (cfi != NULL) + { + Dwarf_Frame *frame = NULL; + if (dwarf_cfi_addrframe (cfi, pc, &frame) == 0) + dwarf_frame_cfa (frame, &cfa_ops); + } + if (cfa_ops == NULL) + { + cfi = dwfl_module_eh_cfi (module, &bias); + if (cfi != NULL) + { + Dwarf_Frame *frame = NULL; + if (dwarf_cfi_addrframe (cfi, pc, &frame) == 0) + dwarf_frame_cfa (frame, &cfa_ops); + } + } +#endif +#endif + + return cfa_ops; +} /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */ @@ -376,6 +376,10 @@ private: int num_cached_scopes; Dwarf_Die *cached_scopes; int dwarf_getscopes_cached (Dwarf_Addr pc, Dwarf_Die **scopes); + + // Returns the call frame address operations for the given program counter. + Dwarf_Op *get_cfa_ops (Dwarf_Addr pc); + }; #endif // DWFLPP_H diff --git a/loc2c-test.c b/loc2c-test.c index 688f4a8b..0c992c76 100644 --- a/loc2c-test.c +++ b/loc2c-test.c @@ -75,7 +75,7 @@ get_location (Dwarf_Addr dwbias, Dwarf_Addr pc, Dwarf_Attribute *loc_attr, static void handle_variable (Dwarf_Die *scopes, int nscopes, int out, Dwarf_Addr cubias, Dwarf_Die *vardie, Dwarf_Addr pc, - char **fields) + Dwarf_Op *cfa_ops, char **fields) { #define obstack_chunk_alloc malloc #define obstack_chunk_free free @@ -121,7 +121,7 @@ handle_variable (Dwarf_Die *scopes, int nscopes, int out, struct location *head, *tail = NULL; head = c_translate_location (&pool, &fail, NULL, NULL, 1, cubias, pc, locexpr, locexpr_len, - &tail, fb_attr); + &tail, fb_attr, cfa_ops); if (dwarf_attr_integrate (vardie, DW_AT_type, &attr_mem) == NULL) error (2, 0, _("cannot get type of variable: %s"), @@ -240,7 +240,7 @@ handle_variable (Dwarf_Die *scopes, int nscopes, int out, &locexpr_len); c_translate_location (&pool, NULL, NULL, NULL, 1, cubias, pc, locexpr, locexpr_len, - &tail, NULL); + &tail, NULL, NULL); break; } } @@ -521,7 +521,40 @@ main (int argc, char **argv) error (0, 0, "dwarf_getscopevar: %s (+%d, %s:%d:%d): %s", spec, shadow, at, lineno, colno, dwarf_errmsg (-1)); else - handle_variable (scopes, n, out, cubias, &vardie, pc, &argv[argi]); + { + Dwarf_Op *cfa_ops = NULL; + +#ifdef _ELFUTILS_PREREQ +#if _ELFUTILS_PREREQ(0,142) + Dwarf_Addr bias; + Dwfl_Module *module = dwfl_addrmodule (dwfl, pc); + if (module != NULL) + { + // Try debug_frame first, then fall back on eh_frame. + Dwarf_CFI *cfi = dwfl_module_dwarf_cfi (module, &bias); + if (cfi != NULL) + { + Dwarf_Frame *frame = NULL; + if (dwarf_cfi_addrframe (cfi, pc, &frame) == 0) + dwarf_frame_cfa (frame, &cfa_ops); + } + if (cfa_ops == NULL) + { + cfi = dwfl_module_eh_cfi (module, &bias); + if (cfi != NULL) + { + Dwarf_Frame *frame = NULL; + if (dwarf_cfi_addrframe (cfi, pc, &frame) == 0) + dwarf_frame_cfa (frame, &cfa_ops); + } + } + } +#endif +#endif + + handle_variable (scopes, n, out, cubias, &vardie, pc, cfa_ops, + &argv[argi]); + } } free (scopes); @@ -509,6 +509,15 @@ translate (struct obstack *pool, int indent, Dwarf_Addr addrbias, } break; + case DW_OP_call_frame_cfa: + // We pick this out when processing DW_AT_frame_base in + // so it really shouldn't turn up here. + if (need_fb == NULL) + DIE ("DW_OP_call_frame_cfa while processing frame base"); + else + DIE ("DW_OP_call_frame_cfa not expected outside DW_AT_frame_base"); + break; + case DW_OP_push_object_address: DIE ("XXX DW_OP_push_object_address"); break; @@ -552,7 +561,8 @@ location_from_address (struct obstack *pool, struct obstack *, Dwarf_Addr), int indent, Dwarf_Addr dwbias, const Dwarf_Op *expr, size_t len, Dwarf_Addr address, - struct location **input, Dwarf_Attribute *fb_attr) + struct location **input, Dwarf_Attribute *fb_attr, + const Dwarf_Op *cfa_ops) { struct location *loc = obstack_alloc (pool, sizeof *loc); loc->fail = *input == NULL ? fail : (*input)->fail; @@ -599,8 +609,19 @@ location_from_address (struct obstack *pool, return NULL; } + // If it is DW_OP_call_frame_cfa then get cfi cfa ops. + const Dwarf_Op * fb_ops; + if (fb_len == 1 && fb_expr[0].atom == DW_OP_call_frame_cfa) + { + if (cfa_ops == NULL) + FAIL (loc, N_("No cfa_ops supplied, but needed by DW_OP_call_frame_cfa")); + fb_ops = cfa_ops; + } + else + fb_ops = fb_expr; + loc->frame_base = alloc_location (pool, loc); - failure = translate (pool, indent + 1, dwbias, fb_expr, fb_len, NULL, + failure = translate (pool, indent + 1, dwbias, fb_ops, fb_len, NULL, NULL, &loser, loc->frame_base); if (failure != NULL) return lose (loc, failure, fb_expr, loser); @@ -622,7 +643,8 @@ static struct location * location_relative (struct obstack *pool, int indent, Dwarf_Addr dwbias, const Dwarf_Op *expr, size_t len, Dwarf_Addr address, - struct location **input, Dwarf_Attribute *fb_attr) + struct location **input, Dwarf_Attribute *fb_attr, + const Dwarf_Op *cfa_ops) { Dwarf_Sword *stack = NULL; unsigned int stack_depth = 0, max_stack = 0; @@ -752,7 +774,8 @@ location_relative (struct obstack *pool, /* This started from a register, but now it's following a pointer. So we can do the translation starting from address here. */ return location_from_address (pool, NULL, NULL, NULL, indent, dwbias, - expr, len, address, input, fb_attr); + expr, len, address, input, fb_attr, + cfa_ops); /* Constant-value operations. */ @@ -911,7 +934,8 @@ location_relative (struct obstack *pool, loc = location_from_address (pool, NULL, NULL, NULL, indent, dwbias, &expr[i + 1], len - i - 1, - address, input, fb_attr); + address, input, fb_attr, + cfa_ops); if (loc == NULL) return NULL; } @@ -994,7 +1018,8 @@ c_translate_location (struct obstack *pool, struct obstack *, Dwarf_Addr), int indent, Dwarf_Addr dwbias, Dwarf_Addr pc_address, const Dwarf_Op *expr, size_t len, - struct location **input, Dwarf_Attribute *fb_attr) + struct location **input, Dwarf_Attribute *fb_attr, + const Dwarf_Op *cfa_ops) { indent += 2; @@ -1006,14 +1031,14 @@ c_translate_location (struct obstack *pool, return location_from_address (pool, fail, fail_arg, emit_address ?: &default_emit_address, indent, dwbias, expr, len, pc_address, - input, fb_attr); + input, fb_attr, cfa_ops); case loc_noncontiguous: case loc_register: /* The starting point is not an address computation, but a register. We can only handle limited computations from here. */ return location_relative (pool, indent, dwbias, expr, len, pc_address, - input, fb_attr); + input, fb_attr, cfa_ops); default: abort (); @@ -1745,7 +1770,8 @@ c_translate_array (struct obstack *pool, int indent, Dwarf_Die *typedie, struct location **input, const char *idx, Dwarf_Word const_idx) { - assert (dwarf_tag (typedie) == DW_TAG_array_type); + assert (dwarf_tag (typedie) == DW_TAG_array_type || + dwarf_tag (typedie) == DW_TAG_pointer_type); ++indent; @@ -11,7 +11,8 @@ struct location; /* Opaque */ /* Translate a C fragment for the location expression, using *INPUT as the starting location, begin from scratch if *INPUT is null. If DW_OP_fbreg is used, it may have a subfragment computing from - the FB_ATTR location expression. + the FB_ATTR location expression. The call_frame might need to be + calculated by the cfa_ops for the given pc_address. On errors, call FAIL, which should not return. Any later errors will use FAIL and FAIL_ARG from the first c_translate_location call. @@ -34,7 +35,8 @@ struct location *c_translate_location (struct obstack *, const Dwarf_Op *locexpr, size_t locexprlen, struct location **input, - Dwarf_Attribute *fb_attr); + Dwarf_Attribute *fb_attr, + const Dwarf_Op *cfa_ops); /* Translate a fragment to dereference the given DW_TAG_pointer_type DIE, where *INPUT is the location of the pointer with that type. */ @@ -2239,6 +2239,18 @@ parser::parse_value () throw parse_error ("expected ')'"); return e; } + else if (t->type == tok_operator && t->content == "&") + { + next (); + t = peek (); + if (t->type != tok_identifier || + (t->content != "@cast" && t->content[0] != '$')) + throw parse_error ("expected @cast or $var"); + + target_symbol *ts = static_cast<target_symbol*>(parse_symbol()); + ts->addressof = true; + return ts; + } else if (t->type == tok_identifier) return parse_symbol (); else diff --git a/runtime/transport/control.c b/runtime/transport/control.c index 35130f0f..925a6768 100644 --- a/runtime/transport/control.c +++ b/runtime/transport/control.c @@ -34,8 +34,7 @@ static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, siz count -= sizeof(u32); buf += sizeof(u32); - -#ifdef DEBUG_TRANS +#if defined(DEBUG_TRANS) && (DEBUG_TRANS >= 2) if (type < STP_MAX_CMD) dbug_trans2("Got %s. len=%d\n", _stp_command_name[type], (int)count); diff --git a/runtime/transport/ring_buffer.c b/runtime/transport/ring_buffer.c index fe63bc83..0c37621f 100644 --- a/runtime/transport/ring_buffer.c +++ b/runtime/transport/ring_buffer.c @@ -19,8 +19,29 @@ struct _stp_data_entry { * results to users and which routines might sleep, etc: */ struct _stp_ring_buffer_data { +#if 0 + struct trace_array *tr; + struct tracer *trace; + void *private; + int cpu_file; + struct mutex mutex; +#endif + struct ring_buffer_iter *buffer_iter[NR_CPUS]; +#if 0 + unsigned long iter_flags; + + /* The below is zeroed out in pipe_read */ + struct trace_seq seq; + struct trace_entry *ent; +#endif int cpu; u64 ts; +#if 0 + loff_t pos; + long idx; + + cpumask_var_t started; +#endif }; struct _stp_relay_data_type { @@ -68,7 +89,7 @@ static int __stp_alloc_ring_buffer(void) if (!_stp_relay_data.rb) goto fail; - dbug_trans(1, "size = %lu\n", ring_buffer_size(_stp_relay_data.rb)); + dbug_trans(0, "size = %lu\n", ring_buffer_size(_stp_relay_data.rb)); return 0; fail: @@ -78,7 +99,7 @@ fail: static int _stp_data_open_trace(struct inode *inode, struct file *file) { - long cpu_file = (long) inode->i_private; + int cpu_file = (int)(long) inode->i_private; /* We only allow for one reader per cpu */ dbug_trans(1, "trace attach\n"); @@ -102,14 +123,13 @@ static int _stp_data_open_trace(struct inode *inode, struct file *file) static int _stp_data_release_trace(struct inode *inode, struct file *file) { - long cpu_file = (long) inode->i_private; + int cpu_file = (int)(long) inode->i_private; dbug_trans(1, "trace detach\n"); #ifdef STP_BULKMODE cpumask_clear_cpu(cpu_file, _stp_relay_data.trace_reader_cpumask); #else cpumask_clear(_stp_relay_data.trace_reader_cpumask); #endif - return 0; } @@ -141,9 +161,43 @@ _stp_event_to_user(struct ring_buffer_event *event, char __user *ubuf, return cnt; } +static int _stp_ring_buffer_empty_cpu(int cpu) +{ + if (_stp_relay_data.rb_data.buffer_iter[cpu]) { + if (ring_buffer_iter_empty(_stp_relay_data.rb_data.buffer_iter[cpu])) + return 1; + } + else { + if (ring_buffer_empty_cpu(_stp_relay_data.rb, cpu)) + return 1; + } + return 0; +} + +static int _stp_ring_buffer_empty(void) +{ +#ifdef STP_BULKMODE + return _stp_ring_buffer_empty_cpu(_stp_relay_data.rb_data.cpu); +#else + int cpu; + for_each_possible_cpu(cpu) { + if (! _stp_ring_buffer_empty_cpu(cpu)) + return 0; + } + return 1; +#endif +} + +static void _stp_ring_buffer_iterator_increment(void) +{ + if (_stp_relay_data.rb_data.buffer_iter[_stp_relay_data.rb_data.cpu]) { + ring_buffer_read(_stp_relay_data.rb_data.buffer_iter[_stp_relay_data.rb_data.cpu], NULL); + } +} + static ssize_t _stp_tracing_wait_pipe(struct file *filp) { - if (ring_buffer_empty(_stp_relay_data.rb)) { + if (_stp_ring_buffer_empty()) { if ((filp->f_flags & O_NONBLOCK)) { dbug_trans(1, "returning -EAGAIN\n"); return -EAGAIN; @@ -163,11 +217,14 @@ static ssize_t _stp_tracing_wait_pipe(struct file *filp) static struct ring_buffer_event *_stp_peek_next_event(int cpu, u64 *ts) { - return ring_buffer_peek(_stp_relay_data.rb, cpu, ts); + if (_stp_relay_data.rb_data.buffer_iter[cpu]) + return ring_buffer_iter_peek(_stp_relay_data.rb_data.buffer_iter[cpu], ts); + else + return ring_buffer_peek(_stp_relay_data.rb, cpu, ts); } /* Find the next real event */ -static struct ring_buffer_event *_stp_find_next_event(long cpu_file) +static struct ring_buffer_event *_stp_find_next_event(int cpu_file) { struct ring_buffer_event *event; @@ -176,11 +233,13 @@ static struct ring_buffer_event *_stp_find_next_event(long cpu_file) * If we are in a per_cpu trace file, don't bother by iterating over * all cpus and peek directly. */ - if (ring_buffer_empty_cpu(_stp_relay_data.rb, (int)cpu_file)) + if (_stp_ring_buffer_empty_cpu(cpu_file)) return NULL; event = _stp_peek_next_event(cpu_file, &_stp_relay_data.rb_data.ts); _stp_relay_data.rb_data.cpu = cpu_file; + if (event) + _stp_ring_buffer_iterator_increment(); return event; #else struct ring_buffer_event *next = NULL; @@ -189,8 +248,7 @@ static struct ring_buffer_event *_stp_find_next_event(long cpu_file) int cpu; for_each_possible_cpu(cpu) { - - if (ring_buffer_empty_cpu(_stp_relay_data.rb, cpu)) + if (_stp_ring_buffer_empty_cpu(cpu)) continue; event = _stp_peek_next_event(cpu, &ts); @@ -207,6 +265,8 @@ static struct ring_buffer_event *_stp_find_next_event(long cpu_file) _stp_relay_data.rb_data.cpu = next_cpu; _stp_relay_data.rb_data.ts = next_ts; + if (next) + _stp_ring_buffer_iterator_increment(); return next; #endif @@ -222,7 +282,10 @@ _stp_data_read_trace(struct file *filp, char __user *ubuf, { ssize_t sret; struct ring_buffer_event *event; - long cpu_file = (long) filp->private_data; + int cpu_file = (int)(long) filp->private_data; +#ifndef STP_BULKMODE + int cpu; +#endif dbug_trans(1, "%lu\n", (unsigned long)cnt); @@ -231,8 +294,19 @@ _stp_data_read_trace(struct file *filp, char __user *ubuf, if (sret <= 0) goto out; +#ifdef STP_BULKMODE + _stp_relay_data.rb_data.buffer_iter[cpu_file] + = ring_buffer_read_start(_stp_relay_data.rb, cpu_file); +#else + for_each_possible_cpu(cpu) { + _stp_relay_data.rb_data.buffer_iter[cpu] + = ring_buffer_read_start(_stp_relay_data.rb, cpu); + } +#endif + dbug_trans(0, "iterator(s) started\n"); + /* stop when tracing is finished */ - if (ring_buffer_empty(_stp_relay_data.rb)) { + if (_stp_ring_buffer_empty()) { sret = 0; goto out; } @@ -259,6 +333,22 @@ _stp_data_read_trace(struct file *filp, char __user *ubuf, break; } out: +#ifdef STP_BULKMODE + if (_stp_relay_data.rb_data.buffer_iter[cpu_file]) { + ring_buffer_read_finish(_stp_relay_data.rb_data.buffer_iter[cpu_file]); + _stp_relay_data.rb_data.buffer_iter[cpu_file] = NULL; + dbug_trans(0, "iterator finished\n"); + } +#else + for_each_possible_cpu(cpu) { + if (_stp_relay_data.rb_data.buffer_iter[cpu]) { + ring_buffer_read_finish(_stp_relay_data.rb_data.buffer_iter[cpu]); + _stp_relay_data.rb_data.buffer_iter[cpu] = NULL; + } + } +#endif + dbug_trans(0, "iterator(s) finished\n"); + return sret; } @@ -267,10 +357,10 @@ static unsigned int _stp_data_poll_trace(struct file *filp, poll_table *poll_table) { dbug_trans(1, "entry\n"); - if (! ring_buffer_empty(_stp_relay_data.rb)) + if (! _stp_ring_buffer_empty()) return POLLIN | POLLRDNORM; poll_wait(filp, &_stp_poll_wait, poll_table); - if (! ring_buffer_empty(_stp_relay_data.rb)) + if (! _stp_ring_buffer_empty()) return POLLIN | POLLRDNORM; dbug_trans(1, "exit\n"); @@ -410,7 +500,7 @@ static int _stp_data_write_commit(void *entry) static void __stp_relay_wakeup_timer(unsigned long val) { if (waitqueue_active(&_stp_poll_wait) - && ! ring_buffer_empty(_stp_relay_data.rb)) + && ! _stp_ring_buffer_empty()) wake_up_interruptible(&_stp_poll_wait); mod_timer(&_stp_relay_data.timer, jiffies + STP_RELAY_TIMER_INTERVAL); } @@ -435,7 +525,7 @@ static struct dentry *__stp_entry[NR_CPUS] = { NULL }; static int _stp_transport_data_fs_init(void) { int rc; - long cpu; + int cpu; _stp_relay_data.transport_state = STP_TRANSPORT_STOPPED; _stp_relay_data.rb = NULL; @@ -454,10 +544,10 @@ static int _stp_transport_data_fs_init(void) _stp_transport_data_fs_close(); return -EINVAL; } - sprintf(cpu_file, "trace%ld", cpu); + sprintf(cpu_file, "trace%d", cpu); __stp_entry[cpu] = debugfs_create_file(cpu_file, 0600, _stp_get_module_dir(), - (void *)cpu, + (void *)(long)cpu, &__stp_data_fops); if (!__stp_entry[cpu]) { @@ -474,7 +564,7 @@ static int _stp_transport_data_fs_init(void) __stp_entry[cpu]->d_inode->i_uid = _stp_uid; __stp_entry[cpu]->d_inode->i_gid = _stp_gid; - __stp_entry[cpu]->d_inode->i_private = (void *)cpu; + __stp_entry[cpu]->d_inode->i_private = (void *)(long)cpu; #ifndef STP_BULKMODE if (cpu != 0) diff --git a/staptree.cxx b/staptree.cxx index df075f44..a762cf47 100644 --- a/staptree.cxx +++ b/staptree.cxx @@ -262,6 +262,8 @@ void symbol::print (ostream& o) const void target_symbol::print (std::ostream& o) const { + if (addressof) + o << "&"; o << base_name; for (unsigned i = 0; i < components.size(); ++i) { @@ -280,6 +282,8 @@ void target_symbol::print (std::ostream& o) const void cast_op::print (std::ostream& o) const { + if (addressof) + o << "&"; o << base_name << '(' << *operand; o << ", " << lex_cast_qstring (type); if (module.length() > 0) @@ -231,11 +231,12 @@ struct target_symbol: public symbol comp_struct_member, comp_literal_array_index }; + bool addressof; std::string base_name; std::vector<std::pair<component_type, std::string> > components; std::string probe_context_var; semantic_error* saved_conversion_error; - target_symbol(): saved_conversion_error (0) {} + target_symbol(): addressof(false), saved_conversion_error (0) {} void print (std::ostream& o) const; void visit (visitor* u); }; diff --git a/systemtap.spec b/systemtap.spec index d679d969..540fe764 100644 --- a/systemtap.spec +++ b/systemtap.spec @@ -80,7 +80,7 @@ Summary: Instrumentation System Testsuite Group: Development/System License: GPLv2+ URL: http://sourceware.org/systemtap/ -Requires: systemtap dejagnu +Requires: systemtap systemtap-sdt-devel dejagnu %description testsuite The testsuite allows testing of the entire SystemTap toolchain diff --git a/tapset-mark.cxx b/tapset-mark.cxx index 7544d7bb..4d8679d2 100644 --- a/tapset-mark.cxx +++ b/tapset-mark.cxx @@ -175,6 +175,9 @@ mark_var_expanding_visitor::visit_target_symbol (target_symbol* e) { assert(e->base_name.size() > 0 && e->base_name[0] == '$'); + if (e->addressof) + throw semantic_error("cannot take address of marker variable", e->tok); + if (e->base_name.substr(0,4) == "$arg") visit_target_symbol_arg (e); else if (e->base_name == "$format" || e->base_name == "$name") diff --git a/tapset-perfmon.cxx b/tapset-perfmon.cxx index e3f30ece..0fb567f7 100644 --- a/tapset-perfmon.cxx +++ b/tapset-perfmon.cxx @@ -64,6 +64,9 @@ perfmon_var_expanding_visitor::visit_target_symbol (target_symbol *e) if (e->base_name != "$counter") throw semantic_error ("target variables not available to perfmon probes"); + if (e->addressof) + throw semantic_error("cannot take address of perfmon variable", e->tok); + if (e->components.size() > 0) { switch (e->components[0].first) diff --git a/tapset-procfs.cxx b/tapset-procfs.cxx index 0c33857b..a996ee32 100644 --- a/tapset-procfs.cxx +++ b/tapset-procfs.cxx @@ -383,6 +383,9 @@ procfs_var_expanding_visitor::visit_target_symbol (target_symbol* e) else if (! write_probe && ! lvalue) throw semantic_error("procfs $value variable cannot be read in a procfs read probe", e->tok); + if (e->addressof) + throw semantic_error("cannot take address of procfs variable", e->tok); + // Remember that we've seen a target variable. target_symbol_seen = true; diff --git a/tapset-utrace.cxx b/tapset-utrace.cxx index 64f546e6..ec20282a 100644 --- a/tapset-utrace.cxx +++ b/tapset-utrace.cxx @@ -539,6 +539,9 @@ utrace_var_expanding_visitor::visit_target_symbol (target_symbol* e) throw semantic_error ("only \"process(PATH_OR_PID).syscall\" and \"process(PATH_OR_PID).syscall.return\" probes support target symbols", e->tok); + if (e->addressof) + throw semantic_error("cannot take address of utrace variable", e->tok); + if (e->base_name.substr(0,4) == "$arg") visit_target_symbol_arg(e); else if (e->base_name == "$syscall" || e->base_name == "$return") diff --git a/tapsets.cxx b/tapsets.cxx index 51e2917f..daf6cb2b 100644 --- a/tapsets.cxx +++ b/tapsets.cxx @@ -48,7 +48,6 @@ extern "C" { #include <stdio.h> #include <sys/types.h> -#include "loc2c.h" #define __STDC_FORMAT_MACROS #include <inttypes.h> } @@ -2335,6 +2334,9 @@ dwarf_var_expanding_visitor::visit_target_symbol (target_symbol *e) if (dwarf_getscopes_die (scope_die, &scopes) == 0) return; + if (e->addressof) + throw semantic_error("cannot take address of context variable", e->tok); + target_symbol *tsym = new target_symbol; print_format* pf = new print_format; @@ -3365,6 +3367,9 @@ sdt_var_expanding_visitor::visit_target_symbol (target_symbol *e) { if (e->base_name == "$$name") { + if (e->addressof) + throw semantic_error("cannot take address of sdt variable", e->tok); + literal_string *myname = new literal_string (probe_name); myname->tok = e->tok; provide(myname); @@ -3418,6 +3423,9 @@ sdt_var_expanding_visitor::visit_target_symbol (target_symbol *e) if (e->components.empty()) { + if (e->addressof) + throw semantic_error("cannot take address of sdt variable", e->tok); + provide(fc); return; } @@ -5050,6 +5058,9 @@ tracepoint_var_expanding_visitor::visit_target_symbol_arg (target_symbol* e) if (e->components.empty()) { + if (e->addressof) + throw semantic_error("cannot take address of tracepoint variable", e->tok); + // Just grab the value from the probe locals e->probe_context_var = "__tracepoint_arg_" + arg->name; e->type = pe_long; @@ -5149,6 +5160,9 @@ tracepoint_var_expanding_visitor::visit_target_symbol_arg (target_symbol* e) void tracepoint_var_expanding_visitor::visit_target_symbol_context (target_symbol* e) { + if (e->addressof) + throw semantic_error("cannot take address of context variable", e->tok); + if (is_active_lvalue (e)) throw semantic_error("write to tracepoint '" + e->base_name + "' not permitted", e->tok); diff --git a/testsuite/semko/target_addr1.stp b/testsuite/semko/target_addr1.stp new file mode 100755 index 00000000..cac3aab2 --- /dev/null +++ b/testsuite/semko/target_addr1.stp @@ -0,0 +1,6 @@ +#! stap -p2 + +// can't take the address of bitfields +probe kernel.function("release_task") { + println(& $p->did_exec) // unsigned:1 +} diff --git a/testsuite/semko/target_addr2.stp b/testsuite/semko/target_addr2.stp new file mode 100755 index 00000000..36133e3e --- /dev/null +++ b/testsuite/semko/target_addr2.stp @@ -0,0 +1,6 @@ +#! stap -p2 + +// can't take the address of register parameters +probe kernel.function("do_sys_open") { + println(& $dfd) +} diff --git a/testsuite/semko/target_addr3.stp b/testsuite/semko/target_addr3.stp new file mode 100755 index 00000000..fe072adb --- /dev/null +++ b/testsuite/semko/target_addr3.stp @@ -0,0 +1,6 @@ +#! stap -p2 + +// can't take the address of register return values +probe kernel.function("do_sys_open").return { + println(& $return) +} diff --git a/testsuite/semok/target_addr.stp b/testsuite/semok/target_addr.stp new file mode 100755 index 00000000..dfbc2606 --- /dev/null +++ b/testsuite/semok/target_addr.stp @@ -0,0 +1,11 @@ +#! stap -p2 + +// read the address of various task_struct members. +// all should roughly be $p + offsetof(foo) +probe kernel.function("release_task") { + println(& $p->state) // long + println(& $p->usage) // atomic_t + println(& $p->comm) // comm[TASK_COMM_LEN] + println(& $p->comm[1]) + println(& $p->parent) // task_struct* +} diff --git a/testsuite/systemtap.base/cast.exp b/testsuite/systemtap.base/cast.exp index 374132f0..38ef67b9 100644 --- a/testsuite/systemtap.base/cast.exp +++ b/testsuite/systemtap.base/cast.exp @@ -2,5 +2,6 @@ set test "cast" set ::result_string {PID OK PID2 OK execname OK -sa_data OK} +sa_data OK +usage OK} stap_run2 $srcdir/$subdir/$test.stp -g diff --git a/testsuite/systemtap.base/cast.stp b/testsuite/systemtap.base/cast.stp index e2505000..bb889bb8 100644 --- a/testsuite/systemtap.base/cast.stp +++ b/testsuite/systemtap.base/cast.stp @@ -33,6 +33,15 @@ probe begin else printf("sa_data %d != %d\n", data, cast_data) + // Compare usage counter values through a struct address + usage = @cast(curr, "task_struct")->usage->counter + pusage = & @cast(curr, "task_struct")->usage + cast_usage = @cast(pusage, "atomic_t")->counter + if (usage == cast_usage) + println("usage OK") + else + printf("usage %d != %d\n", usage, cast_usage) + exit() } diff --git a/testsuite/systemtap.base/pointer_array.exp b/testsuite/systemtap.base/pointer_array.exp new file mode 100644 index 00000000..0e3af213 --- /dev/null +++ b/testsuite/systemtap.base/pointer_array.exp @@ -0,0 +1,13 @@ +set test "pointer_array" +set ::result_string {/bin/true +/ +b +i +n +/ +t +r +u +e +0} +stap_run2 $srcdir/$subdir/$test.stp -c/bin/true diff --git a/testsuite/systemtap.base/pointer_array.stp b/testsuite/systemtap.base/pointer_array.stp new file mode 100644 index 00000000..1d15ebf4 --- /dev/null +++ b/testsuite/systemtap.base/pointer_array.stp @@ -0,0 +1,16 @@ +probe syscall.execve +{ + if (pid() == target()) { + println(user_string($argv[0])) + printf("%c\n", $argv[0][0]) + printf("%c\n", $argv[0][1]) + printf("%c\n", $argv[0][2]) + printf("%c\n", $argv[0][3]) + printf("%c\n", $argv[0][4]) + printf("%c\n", $argv[0][5]) + printf("%c\n", $argv[0][6]) + printf("%c\n", $argv[0][7]) + printf("%c\n", $argv[0][8]) + println($argv[0][9]) + } +} diff --git a/testsuite/systemtap.base/utrace_syscall_args.stp b/testsuite/systemtap.base/utrace_syscall_args.stp index 6c9e14fc..5c6ca451 100644 --- a/testsuite/systemtap.base/utrace_syscall_args.stp +++ b/testsuite/systemtap.base/utrace_syscall_args.stp @@ -36,7 +36,7 @@ probe begin } probe syscall.open { - if (filename == "foobar") { + if (pid() == target() && filename == "foobar") { syscalls_seen += 1 } } diff --git a/testsuite/systemtap.exelib/exelib.exp b/testsuite/systemtap.exelib/exelib.exp index c4ca8fc0..bd9c687e 100644 --- a/testsuite/systemtap.exelib/exelib.exp +++ b/testsuite/systemtap.exelib/exelib.exp @@ -48,7 +48,7 @@ foreach arch $arches { # Adding -O, -O2, -Os and mixing lib/exe is a bit overdone foreach opt {-O0 -O3} { - foreach libprelink {no yes} { # BUG! "yes" breaks ustack PR10323 + foreach libprelink {no yes} { # not done yet, "no" lib debug. # seperate debuginfo can be done before or after prelinking |