diff options
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | main.cxx | 8 | ||||
-rw-r--r-- | session.h | 1 | ||||
-rw-r--r-- | stap.1.in | 4 | ||||
-rw-r--r-- | tapsets.cxx | 26 | ||||
-rw-r--r-- | translate.cxx | 32 |
6 files changed, 81 insertions, 2 deletions
@@ -1,3 +1,15 @@ +2006-05-02 Will Cohen <wcohen@redhat.com> + + PR 2228 + * stap.1.in: Document "-t" option. + * main.cxx (main): Add "-t" option for collecting timing information. + * session.h (systemtap_session): Corresponding changes. + * tapsets.cxx (emit_probe_prologue, emit_probe_epilogue, + emit_probe_entries): + * translate.cxx: (emit_common_header, emit_module_init, + translate_pass): Add time collection of timing information. + + 2006-05-01 Frank Ch. Eigler <fche@elastic.org> * translate.cxx (visit_print_format): Fix regression in @@ -91,6 +91,7 @@ usage (systemtap_session& s) << " -c CMD start the probes, run CMD, and exit when it finishes" << endl << " -x PID sets target() to PID" << endl + << " -t benchmarking timing information generated" << endl ; // -d: dump safety-related external references @@ -124,6 +125,7 @@ main (int argc, char * const argv []) s.kernel_release = string (buf.release); s.architecture = string (buf.machine); s.verbose = 0; + s.timing = 0; s.guru_mode = false; s.bulk_mode = false; s.unoptimized = false; @@ -149,7 +151,7 @@ main (int argc, char * const argv []) while (true) { - int grc = getopt (argc, argv, "hVvp:I:e:o:R:r:m:kgc:x:D:bs:u"); + int grc = getopt (argc, argv, "hVvtp:I:e:o:R:r:m:kgc:x:D:bs:u"); if (grc < 0) break; switch (grc) @@ -162,6 +164,10 @@ main (int argc, char * const argv []) s.verbose ++; break; + case 't': + s.timing ++; + break; + case 'p': s.last_pass = atoi (optarg); if (s.last_pass < 1 || s.last_pass > 5) @@ -70,6 +70,7 @@ struct systemtap_session int target_pid; int last_pass; unsigned verbose; + unsigned timing; bool keep_tmpdir; bool guru_mode; bool bulk_mode; @@ -105,6 +105,10 @@ Unoptimized mode. Disable unused code elision during elaboration. .BI \-b Use relayfs-based bulk mode for kernel-to-user data transfer. .TP +.B \-t +Collect timing information on the number of times probe executes +and average amount of time spent in each probe. +.TP .BI \-s NUM Use NUM megabyte buffers for kernel-to-user data transfer. On a multiprocessor in bulk mode, this is a per-processor amount. diff --git a/tapsets.cxx b/tapsets.cxx index 50386480..278cc60c 100644 --- a/tapsets.cxx +++ b/tapsets.cxx @@ -94,7 +94,13 @@ derived_probe::emit_probe_prologue (translator_output* o, { o->newline() << "struct context* c;"; o->newline() << "unsigned long flags;"; + o->newline() << "#ifdef STP_TIMING"; + o->newline() << "cycles_t cycles_atstart;"; + o->newline() << "#endif"; o->newline() << "local_irq_save (flags);"; + o->newline() << "#ifdef STP_TIMING"; + o->newline() << "cycles_atstart = get_cycles ();"; + o->newline() << "#endif"; o->newline() << "c = per_cpu_ptr (contexts, smp_processor_id());"; o->newline() << "if (atomic_read (&session_state) != " << statereq << ")"; o->newline(1) << "goto probe_epilogue;"; @@ -139,7 +145,17 @@ derived_probe::emit_probe_epilogue (translator_output* o) o->newline() << "atomic_dec (&c->busy);"; o->newline(-1) << "probe_epilogue:"; - o->newline(1) << "local_irq_restore (flags);"; + o->newline(1) << "#ifdef STP_TIMING"; + o->newline() << "{"; + o->newline(1) << "cycles_t cycles_atend = get_cycles ();"; + o->newline() << "int64_t cycles_elapsed = (cycles_atend > cycles_atstart)"; + o->newline(1) << "? (int64_t) (cycles_atend - cycles_atstart)"; + o->newline() << ": (int64_t) (~(cycles_t)0) - cycles_atstart + cycles_atend + 1;"; + o->newline() << "_stp_stat_add(time_" << name << ",cycles_elapsed);"; + o->indent(-1); + o->newline(-1) << "}"; + o->newline() << "#endif"; + o->newline() << "local_irq_restore (flags);"; } @@ -198,6 +214,10 @@ be_derived_probe::emit_deregistrations (translator_output* o) void be_derived_probe::emit_probe_entries (translator_output* o) { + o->newline() << "#ifdef STP_TIMING"; + o->newline() << "static __cacheline_aligned Stat " << "time_" << name << ";"; + o->newline() << "#endif"; + for (unsigned i=0; i<locations.size(); i++) { probe_point *l = locations[i]; @@ -3056,6 +3076,10 @@ dwarf_derived_probe::emit_probe_entries (translator_output* o) } o->newline(-1) << "};"; + o->newline(); + o->newline() << "#ifdef STP_TIMING"; + o->newline() << "static __cacheline_aligned Stat " << "time_" << name << ";"; + o->newline() << "#endif"; // Construct a single entry function, and a struct kprobe pointing into // the entry function. The entry function will call the probe function. diff --git a/translate.cxx b/translate.cxx index b4c54a8d..e9a297e7 100644 --- a/translate.cxx +++ b/translate.cxx @@ -895,6 +895,14 @@ c_unparser::emit_common_header () if (!session->stat_decls.empty()) o->newline() << "#include \"stat.c\"\n"; + + // XXX: Cannot tell if statistics are being used for the timing collection. + o->newline(); + o->newline() << "#ifdef STP_TIMING"; + o->newline() << "#include \"stat.c\""; + o->newline() << "#include \"arith.c\""; + o->newline() << "#endif"; + } @@ -1006,6 +1014,18 @@ c_unparser::emit_module_init () o->newline() << "noinline void unregister_probe_" << i << " (void) {"; o->indent(1); session->probes[i]->emit_deregistrations (o); + o->newline() << "#ifdef STP_TIMING"; + o->newline(1) << "{"; + o->newline() << "struct stat_data *stats = _stp_stat_get (time_" + << session->probes[i]->name << ", 0);"; + o->newline() << "int64_t avg = 0;"; + o->newline() << "const char *error;"; + o->newline() << "if (stats->count) avg = _stp_div64(&error, stats->sum, stats->count);"; + o->newline() << "_stp_printf (\"time_" << session->probes[i]->name + << " %lld@%lld\\n\"," << "stats->count, avg);"; + o->newline() << "_stp_print_flush();"; + o->newline() << "#endif"; + o->newline(-1) << "}"; o->newline(-1) << "}"; } @@ -1040,6 +1060,15 @@ c_unparser::emit_module_init () o->newline() << "rwlock_init (& global_" << c_varname (v->name) << "_lock);"; } + // initialize each Stat used for timing information + o->newline() << "#ifdef STP_TIMING"; + for (unsigned i=0; i<session->probes.size(); i++) + { + o->newline() << "time_" << session->probes[i]->name + << " = _stp_stat_init (HIST_NONE);"; + } + o->newline() << "#endif"; + for (unsigned i=0; i<session->probes.size(); i++) { o->newline() << "rc = register_probe_" << i << "();"; @@ -3802,6 +3831,9 @@ translate_pass (systemtap_session& s) if (s.bulk_mode) s.op->newline() << "#define STP_RELAYFS"; + if (s.timing) + s.op->newline() << "#define STP_TIMING" << " " << s.timing ; + s.op->newline() << "#include \"runtime.h\""; s.op->newline() << "#include \"current.c\""; s.op->newline() << "#include \"stack.c\""; |