From dbb686641c390c199e53ab94c1b365df1a62e2b9 Mon Sep 17 00:00:00 2001 From: fche Date: Sat, 30 Dec 2006 03:54:51 +0000 Subject: 2006-12-29 Frank Ch. Eigler PR 3523. * tapsets.cxx (common_probe_entryfn_prologue,epilogue): Restore support for -t (benchmarking) mode. * translate.cxx (emit_common_header,emit_module_init):Ditto. (emit_module_exit,emit_probe): Ditto, with most meat here. 2006-12-29 Frank Ch. Eigler PR 3523. * buildok/fourteen.stp, fourteen-plus.stp: Tweak & add a test. * systemtap.base/bench.stp: Work around randomized-ordered probes. * systemtap.base/bench.exp: Tighten output requirements. --- ChangeLog | 8 +++ tapsets.cxx | 81 ++++++++---------------------- testsuite/ChangeLog | 7 +++ testsuite/buildok/fourteen-plus.stp | 27 ++++++++++ testsuite/buildok/fourteen.stp | 3 ++ testsuite/systemtap.base/bench.exp | 2 +- testsuite/systemtap.base/bench.stp | 2 +- translate.cxx | 99 +++++++++++++++++++++++++++++-------- 8 files changed, 147 insertions(+), 82 deletions(-) create mode 100755 testsuite/buildok/fourteen-plus.stp diff --git a/ChangeLog b/ChangeLog index 6fbcfbcb..c8f93cf0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2006-12-29 Frank Ch. Eigler + + PR 3523. + * tapsets.cxx (common_probe_entryfn_prologue,epilogue): Restore + support for -t (benchmarking) mode. + * translate.cxx (emit_common_header,emit_module_init):Ditto. + (emit_module_exit,emit_probe): Ditto, with most meat here. + 2006-12-22 Josh Stone * stap.1.in: Document how to specify the size of global arrays. diff --git a/tapsets.cxx b/tapsets.cxx index cc078779..968ba624 100644 --- a/tapsets.cxx +++ b/tapsets.cxx @@ -52,42 +52,6 @@ extern "C" { using namespace std; -#if 0 -static void -emit_probe_timing(derived_probe* p, translator_output* o) -{ - static set basest_names; - - string nm = p->basest()->name; - if (basest_names.find(nm) == basest_names.end()) - { - basest_names.insert (nm); - - o->newline() << "#ifdef STP_TIMING"; - o->newline() << "{"; - o->newline(1) << "const char *probe_point = " - << lex_cast_qstring (*p->basest()->locations[0]) - << ";"; - o->newline() << "const char *decl_location = " - << lex_cast_qstring (p->basest()->tok->location) - << ";"; - o->newline() << "struct stat_data *stats = _stp_stat_get (time_" - << p->basest()->name - << ", 0);"; - o->newline() << "const char *error;"; - o->newline() << "if (stats->count) {"; - o->newline(1) << "int64_t avg = _stp_div64 (&error, stats->sum, stats->count);"; - o->newline() << "_stp_printf (\"probe %s (%s), %lld hits taking %lldmin/%lldavg/%lldmax cycles.\\n\","; - o->newline() << "probe_point, decl_location, (long long) stats->count, (long long) stats->min, (long long) avg, (long long) stats->max);"; - o->newline() << "_stp_print_flush();"; - o->newline(-1) << "}"; - o->newline(-1) << "}"; - o->newline() << "#endif"; - } -} -#endif - - // ------------------------------------------------------------------------ // Generic derived_probe_group: contains an ordinary vector of the // given type. It provides only the enrollment function. @@ -170,11 +134,12 @@ common_probe_entryfn_prologue (translator_output* o, string statestr) o->newline() << "struct context* __restrict__ c;"; o->newline() << "unsigned long flags;"; -#if 0 o->newline() << "#ifdef STP_TIMING"; - o->newline() << "cycles_t cycles_atstart = get_cycles ();"; + // NB: we truncate cycles counts to 32 bits. Perhaps it should be + // fewer, if the hardware counter rolls over really quickly. See + // also ...epilogue(). + o->newline() << "int32_t cycles_atstart = (int32_t) get_cycles ();"; o->newline() << "#endif"; -#endif #if 0 /* XXX: PERFMON */ o->newline() << "static struct pfarg_ctx _pfm_context;"; @@ -221,12 +186,28 @@ common_probe_entryfn_prologue (translator_output* o, string statestr) o->newline() << "c->pi = 0;"; o->newline() << "c->probe_point = 0;"; o->newline() << "c->actioncount = 0;"; + o->newline() << "#ifdef STP_TIMING"; + o->newline() << "c->statp = 0;"; + o->newline() << "#endif"; } void common_probe_entryfn_epilogue (translator_output* o) { + o->newline() << "#ifdef STP_TIMING"; + o->newline() << "{"; + o->newline(1) << "int32_t cycles_atend = (int32_t) get_cycles ();"; + // Handle 32-bit wraparound. + o->newline() << "int32_t cycles_elapsed = (cycles_atend > cycles_atstart)"; + o->newline(1) << "? (cycles_atend - cycles_atstart)"; + o->newline() << ": (~(int32_t)0) - cycles_atstart + cycles_atend + 1;"; + + o->newline() << "if (likely (c->statp)) _stp_stat_add(*c->statp, cycles_elapsed);"; + o->indent(-1); + o->newline(-1) << "}"; + o->newline() << "#endif"; + o->newline() << "if (unlikely (c->last_error && c->last_error[0])) {"; o->newline(1) << "if (c->last_stmt != NULL)"; o->newline(1) << "_stp_softerror (\"%s near %s\", c->last_error, c->last_stmt);"; @@ -244,27 +225,6 @@ common_probe_entryfn_epilogue (translator_output* o) o->newline(-1) << "probe_epilogue:"; // context is free o->indent(1); -#if 0 - o->newline() << "#ifdef STP_TIMING"; - o->newline() << "{"; - o->newline(1) << "cycles_t cycles_atend = get_cycles ();"; - - // Handle wraparound. - // XXX: get_cycles() may return fewer significant digits than - // cycles_t can carry. On some machines, cycles_t is 64 bits wide - // but get_cycles() is only 52. So we should investigate truncating - // these get_cycles() return values to some reasonable smaller - // number of bits, perhaps 32 or even 24. - o->newline() << "int64_t cycles_elapsed = (cycles_atend > cycles_atstart)"; - o->newline(1) << "? (int64_t) (cycles_atend - cycles_atstart)"; - o->newline() << ": (int64_t) (~(cycles_t)0) - cycles_atstart + cycles_atend + 1;"; - - o->newline() << "_stp_stat_add(time_" << basest()->name << ",cycles_elapsed);"; - o->indent(-1); - o->newline(-1) << "}"; - o->newline() << "#endif"; -#endif - o->newline() << "local_irq_restore (flags);"; } @@ -5021,6 +4981,7 @@ void perfmon_derived_probe::emit_probe_entries (translator_output * o) { o->newline() << "#ifdef STP_TIMING"; + // NB: This variable may be multiply (but identically) defined. o->newline() << "static __cacheline_aligned Stat " << "time_" << basest()->name << ";"; o->newline() << "#endif"; diff --git a/testsuite/ChangeLog b/testsuite/ChangeLog index 6a64e636..f0f0f0c4 100644 --- a/testsuite/ChangeLog +++ b/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2006-12-29 Frank Ch. Eigler + + PR 3523. + * buildok/fourteen.stp, fourteen-plus.stp: Tweak & add a test. + * systemtap.base/bench.stp: Work around randomized-ordered probes. + * systemtap.base/bench.exp: Tighten output requirements. + 2006-12-22 Josh Stone * buildok/array_size.stp, parseko/array01.stp, parseko/array02.stp, diff --git a/testsuite/buildok/fourteen-plus.stp b/testsuite/buildok/fourteen-plus.stp new file mode 100755 index 00000000..781cc41d --- /dev/null +++ b/testsuite/buildok/fourteen-plus.stp @@ -0,0 +1,27 @@ +#! stap -tp4 + +# NB: beyond just checking for timer compilability, this test also plays +# with duplicate probe elimination logic. See also fourteen+.stp + +global i, j +probe timer.jiffies(100) { i++ } +probe timer.jiffies(100).randomize(100) { j++ } +probe timer.s(100) { i++ } +probe timer.s(100).randomize(100) { j++ } +probe timer.sec(100) { i++ } +probe timer.sec(100).randomize(100) { j++ } +probe timer.ms(100) { i++ } +probe timer.ms(100).randomize(100) { j++ } +probe timer.msec(100) { i++ } +probe timer.msec(100).randomize(100) { j++ } +probe timer.us(100) { i++ } +probe timer.us(100).randomize(100) { j++ } +probe timer.usec(100) { i++ } +probe timer.usec(100).randomize(100) { j++ } +probe timer.ns(100000) { i++ } +probe timer.ns(100000).randomize(100) { j++ } +probe timer.nsec(100000) { i++ } +probe timer.nsec(100000).randomize(100) { j++ } +probe timer.hz(100) { i++ } +probe timer.profile { i++ } +probe end { printf("i=%d j=%d\n", i, j) } diff --git a/testsuite/buildok/fourteen.stp b/testsuite/buildok/fourteen.stp index 86b3dad2..2a7b6001 100755 --- a/testsuite/buildok/fourteen.stp +++ b/testsuite/buildok/fourteen.stp @@ -1,5 +1,8 @@ #! stap -p4 +# NB: beyond just checking for timer compilability, this test also plays +# with duplicate probe elimination logic. See also fourteen+.stp + global i, j probe timer.jiffies(100) { i++ } probe timer.jiffies(100).randomize(100) { j++ } diff --git a/testsuite/systemtap.base/bench.exp b/testsuite/systemtap.base/bench.exp index bc3b28bb..4863a00c 100644 --- a/testsuite/systemtap.base/bench.exp +++ b/testsuite/systemtap.base/bench.exp @@ -11,4 +11,4 @@ expect { eof { } } wait -if {$ok > 10} { pass "$test ($ok)" } { fail "$test ($ok)" } +if {$ok == 15} { pass "$test ($ok)" } { fail "$test ($ok)" } diff --git a/testsuite/systemtap.base/bench.stp b/testsuite/systemtap.base/bench.stp index 0568bc97..915b8a5e 100644 --- a/testsuite/systemtap.base/bench.stp +++ b/testsuite/systemtap.base/bench.stp @@ -28,4 +28,4 @@ probe test.statcount = repeat {@count(gS)} probe test.statcount {} probe test.nnarrassmt = repeat {gnn[1] = 1} probe test.nnarrassmt {} global gnn probe test.ssarrassmt = repeat {gss["foo"] = "bar"} probe test.ssarrassmt {} global gss -probe begin { exit () } +probe begin(9999 /* ... after all the others! */) { exit () } diff --git a/translate.cxx b/translate.cxx index c20cde8d..4d4c83ac 100644 --- a/translate.cxx +++ b/translate.cxx @@ -813,6 +813,9 @@ c_unparser::emit_common_header () o->newline() << "const char *last_stmt;"; o->newline() << "struct pt_regs *regs;"; o->newline() << "struct kretprobe_instance *pi;"; + o->newline() << "#ifdef STP_TIMING"; + o->newline() << "Stat *statp;"; + o->newline() << "#endif"; o->newline() << "union {"; o->indent(1); @@ -828,8 +831,11 @@ c_unparser::emit_common_header () { derived_probe* dp = session->probes[i]; + // NB: see c_unparser::emit_probe() for original copy of duplicate-hashing logic. ostringstream oss; + oss << "c->statp = & time_" << dp->basest()->name << ";" << endl; // -t anti-dupe dp->body->print(oss); + if (tmp_probe_contents.count(oss.str()) == 0) // unique { tmp_probe_contents[oss.str()] = dp->name; // save it @@ -909,12 +915,7 @@ c_unparser::emit_common_header () if (!session->stat_decls.empty()) o->newline() << "#include \"stat.c\"\n"; - // XXX: Cannot tell if statistics are being used for the timing collection. o->newline(); - o->newline() << "#ifdef STP_TIMING"; - o->newline() << "#include \"stat.c\""; - o->newline() << "#include \"arith.c\""; - o->newline() << "#endif"; } @@ -1075,20 +1076,18 @@ c_unparser::emit_module_init () } // initialize each Stat used for timing information - { - o->newline() << "#ifdef STP_TIMING"; - set basest_names; - for (unsigned i=0; iprobes.size(); i++) - { - string nm = session->probes[i]->basest()->name; - if (basest_names.find(nm) == basest_names.end()) - { - o->newline() << "time_" << nm << " = _stp_stat_init (HIST_NONE);"; - basest_names.insert (nm); - } - } - o->newline() << "#endif"; - } + o->newline() << "#ifdef STP_TIMING"; + set basest_names; + for (unsigned i=0; iprobes.size(); i++) + { + string nm = session->probes[i]->basest()->name; + if (basest_names.find(nm) == basest_names.end()) + { + o->newline() << "time_" << nm << " = _stp_stat_init (HIST_NONE);"; + basest_names.insert (nm); + } + } + o->newline() << "#endif"; for (unsigned i=0; inewline() << "free_percpu (contexts);"; + // print probe timing statistics + { + o->newline() << "#ifdef STP_TIMING"; + o->newline() << "{"; + o->indent(1); + set basest_names; + for (unsigned i=0; iprobes.size(); i++) + { + probe* p = session->probes[i]->basest(); + string nm = p->name; + if (basest_names.find(nm) == basest_names.end()) + { + basest_names.insert (nm); + o->newline() << "{"; + o->newline(1) << "const char *probe_point = " + << lex_cast_qstring (* p->locations[0]) + << (p->locations.size() > 1 ? "\"+\"" : "") + << (p->locations.size() > 1 ? lex_cast_qstring(p->locations.size()-1) : "") + << ";"; + o->newline() << "const char *decl_location = " + << lex_cast_qstring (p->tok->location) + << ";"; + o->newline() << "struct stat_data *stats = _stp_stat_get (time_" + << p->name + << ", 0);"; + o->newline() << "const char *error;"; + o->newline() << "if (stats->count) {"; + o->newline(1) << "int64_t avg = _stp_div64 (&error, stats->sum, stats->count);"; + o->newline() << "_stp_printf (\"probe %s (%s), hits: %lld, cycles: %lldmin/%lldavg/%lldmax\\n\","; + o->newline() << "probe_point, decl_location, (long long) stats->count, (long long) stats->min, (long long) avg, (long long) stats->max);"; + o->newline() << "_stp_print_flush();"; + o->newline(-1) << "}"; + o->newline(-1) << "}"; + } + } + o->newline(-1) << "}"; + o->newline() << "#endif"; + } + // print final error/reentrancy counts if non-zero o->newline() << "if (atomic_read (& skipped_count) || " << "atomic_read (& error_count))"; @@ -1270,9 +1308,20 @@ c_unparser::emit_probe (derived_probe* v) // c->last_stmt = "identifier 'printf' at foo.stp::"; // // which would make comparisons impossible. + // + // NB: see also c_unparser:emit_common_header(), which duplicates + // this calculation. ostringstream oss; + + // NB: statp is just for avoiding designation as duplicate. It need not be C. + // NB: This code *could* be enclosed in an "if (session->timing)". That would + // recognize more duplicate probe handlers, but then the generated code could + // be very different with or without -t. + oss << "c->statp = & time_" << v->basest()->name << ";" << endl; + v->body->print(oss); + // If an identical probe has already been emitted, just call that // one. if (probe_contents.count(oss.str()) != 0) @@ -1308,6 +1357,10 @@ c_unparser::emit_probe (derived_probe* v) } else // This probe is unique. Remember it and output it. { + o->newline(); + o->newline() << "#ifdef STP_TIMING"; + o->newline() << "static __cacheline_aligned Stat " << "time_" << v->basest()->name << ";"; + o->newline() << "#endif"; o->newline(); o->newline() << "static void " << v->name << " (struct context * __restrict__ c) "; o->line () << "{"; @@ -1319,6 +1372,10 @@ c_unparser::emit_probe (derived_probe* v) o->newline() << "struct " << v->name << "_locals * __restrict__ l ="; o->newline(1) << "& c->locals[0]." << v->name << ";"; o->newline(-1) << "(void) l;"; // make sure "l" is marked used + + o->newline() << "#ifdef STP_TIMING"; + o->newline() << "c->statp = & time_" << v->basest()->name << ";"; + o->newline() << "#endif"; // emit all read/write locks for global variables varuse_collecting_visitor vut; @@ -4004,7 +4061,7 @@ translate_pass (systemtap_session& s) s.op->newline() << "#define MINSTACKSPACE 1024"; s.op->newline() << "#endif"; - // impedance mismatch + // XXX: impedance mismatch // STP_STRING_SIZE defines the size of the buffer // used in stack traces. s.op->newline() << "#ifndef STP_STRING_SIZE"; @@ -4030,6 +4087,8 @@ translate_pass (systemtap_session& s) s.op->newline() << "#include \"current.c\""; s.op->newline() << "#include \"stack.c\""; s.op->newline() << "#include \"regs.c\""; + s.op->newline() << "#include \"stat.c\""; + s.op->newline() << "#include \"arith.c\""; s.op->newline() << "#include "; s.op->newline() << "#include "; s.op->newline() << "#include "; -- cgit