summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog21
-rw-r--r--elaborate.cxx1
-rw-r--r--staptree.cxx48
-rw-r--r--staptree.h4
-rw-r--r--tapsets.cxx6
-rw-r--r--translate.cxx141
6 files changed, 209 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 9c21e000..c3134b49 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2006-01-26 Frank Ch. Eigler <fche@elastic.org>
+
+ PR 2060: lock elevation, mop-up
+ * staptree.cxx (functioncall_traversing_visitor): Store a
+ current_function pointer during traversal.
+ (visit_embeddedcode): Use it to handle $target-synthesized functions.
+ (varuse_collecting_visitor::visit_assignment): Correct l-lr typo.
+ (visit_foreach_loop): Note added write on sorted foreach.
+ (visit_delete_statement): Note as read+write.
+ * staptree.h: Corresponding changes.
+ * elaborate.cxx (dead_assignment_remover::visit_expr_statement):
+ Correct stmt token after possible expression rewriting.
+ * tapsets.cxx (visit_target_symbol): Create naming convention
+ to recognize $target-synthesized functions.
+ * translate.cxx (emit_locks, emit_unlocks): New functions to
+ emit lock/unlock sequences at the outermost level of a probe.
+ (emit_probe): Call them.
+ (varlock_*): #if-0 out the lock code generation. Later, these
+ classes should be removed.
+ (translate_pass): Emit read_trylock() kludge macro for old kernels.
+
2006-01-25 Frank Ch. Eigler <fche@elastic.org>
PR 2205, patch from <hiramatu@sdl.hitachi.co.jp>:
diff --git a/elaborate.cxx b/elaborate.cxx
index 613d85ae..b0f784d4 100644
--- a/elaborate.cxx
+++ b/elaborate.cxx
@@ -1303,6 +1303,7 @@ dead_assignment_remover::visit_expr_statement (expr_statement* s)
expression** last_expr = current_expr;
current_expr = & s->value;
s->value->visit (this);
+ s->tok = s->value->tok; // in case it was replaced
current_expr = last_expr;
}
diff --git a/staptree.cxx b/staptree.cxx
index 97b4bcc1..b4398312 100644
--- a/staptree.cxx
+++ b/staptree.cxx
@@ -1460,7 +1460,10 @@ functioncall_traversing_visitor::visit_functioncall (functioncall* e)
{
traversed.insert (e->referent);
// recurse
+ functiondecl* last_current_function = current_function;
+ current_function = e->referent;
e->referent->body->visit (this);
+ current_function = last_current_function;
}
}
@@ -1468,6 +1471,19 @@ functioncall_traversing_visitor::visit_functioncall (functioncall* e)
void
varuse_collecting_visitor::visit_embeddedcode (embeddedcode *s)
{
+ // In order to elide unused but correct functions generated to
+ // get/set $target variables, we encode our knowledge that such
+ // functions are side-effect-free. We tell them apart from ordinary
+ // tapset embedded-C functions by the naming prefix. XXX Something
+ // apart from this heuristic would be nice. XXX Similarly, some
+ // tapset embedded-C functions are pure and disposable, like
+ // substr().
+
+ assert (current_function); // only they get embedded code
+ string name = current_function->name;
+ if (name.length() > 6 && name.substr(0, 6) == "_tvar_")
+ return;
+
embedded_seen = true;
}
@@ -1489,7 +1505,7 @@ varuse_collecting_visitor::visit_assignment (assignment *e)
{
if (e->op == "=" || e->op == "<<<") // pure writes
{
- expression* last_lvalue = current_lrvalue;
+ expression* last_lvalue = current_lvalue;
current_lvalue = e->left; // leave a mark for ::visit_symbol
functioncall_traversing_visitor::visit_assignment (e);
current_lvalue = last_lvalue;
@@ -1581,9 +1597,37 @@ varuse_collecting_visitor::visit_post_crement (post_crement *e)
current_lrvalue = last_lrvalue;
}
+void
+varuse_collecting_visitor::visit_foreach_loop (foreach_loop* s)
+{
+ functioncall_traversing_visitor::visit_foreach_loop (s);
+ // If the collection is sorted, imply a "write" access to the
+ // array in addition to the "read" one already noted in the
+ // base class call above.
+ if (s->sort_direction)
+ {
+ symbol *array = NULL;
+ hist_op *hist = NULL;
+ classify_indexable (s->base, array, hist);
+ if (array) this->written.insert (array->referent);
+ // XXX: Can hist_op iterations be sorted?
+ }
+}
-
+void
+varuse_collecting_visitor::visit_delete_statement (delete_statement* s)
+{
+ // Ideally, this would be treated like an assignment: a plain write
+ // to the underlying value ("lvalue"). XXX: However, the
+ // optimization pass is not smart enough to remove an unneeded
+ // "delete" yet, so we pose more like a *crement ("lrvalue"). This
+ // should protect the underlying value from optimizional mischief.
+ expression* last_lrvalue = current_lrvalue;
+ current_lrvalue = s->value; // leave a mark for ::visit_symbol
+ functioncall_traversing_visitor::visit_delete_statement (s);
+ current_lrvalue = last_lrvalue;
+}
// ------------------------------------------------------------------------
diff --git a/staptree.h b/staptree.h
index fecd6aaf..cde87372 100644
--- a/staptree.h
+++ b/staptree.h
@@ -676,6 +676,8 @@ struct traversing_visitor: public visitor
struct functioncall_traversing_visitor: public traversing_visitor
{
std::set<functiondecl*> traversed;
+ functiondecl* current_function;
+ functioncall_traversing_visitor(): current_function(0) {}
void visit_functioncall (functioncall* e);
};
@@ -696,12 +698,14 @@ struct varuse_collecting_visitor: public functioncall_traversing_visitor
current_lvalue(0),
current_lrvalue(0) {}
void visit_embeddedcode (embeddedcode *s);
+ void visit_delete_statement (delete_statement *s);
void visit_print_format (print_format *e);
void visit_assignment (assignment *e);
void visit_arrayindex (arrayindex *e);
void visit_symbol (symbol *e);
void visit_pre_crement (pre_crement *e);
void visit_post_crement (post_crement *e);
+ void visit_foreach_loop (foreach_loop *s);
};
diff --git a/tapsets.cxx b/tapsets.cxx
index 835c426f..91740520 100644
--- a/tapsets.cxx
+++ b/tapsets.cxx
@@ -1040,7 +1040,7 @@ dwflpp
clog << "finding prologue for '"
<< last_function->name
<< "' entrypc=0x" << hex << addr
- << " highpc=0x" << last_function_highpc
+ << " highpc=0x" << last_function_highpc << dec
<< "\n";
}
}
@@ -2619,7 +2619,9 @@ var_expanding_copy_visitor::visit_target_symbol (target_symbol *e)
if (lvalue && !q.sess.guru_mode)
throw semantic_error("write to target variable not permitted", e->tok);
- string fname = (string(lvalue ? "set" : "get")
+ // NB: This naming convention is used by varuse_collecting_visitor
+ // to make elision of these functions possible.
+ string fname = (string(lvalue ? "_tvar_set" : "_tvar_get")
+ "_" + e->base_name.substr(1)
+ "_" + lex_cast<string>(tick++));
diff --git a/translate.cxx b/translate.cxx
index 433cf23a..36504a2d 100644
--- a/translate.cxx
+++ b/translate.cxx
@@ -85,7 +85,9 @@ struct c_unparser: public unparser, public visitor
void emit_module_init ();
void emit_module_exit ();
void emit_function (functiondecl* v);
+ void emit_locks (const varuse_collecting_visitor& v);
void emit_probe (derived_probe* v, unsigned i);
+ void emit_unlocks (const varuse_collecting_visitor& v);
// for use by looping constructs
vector<string> loop_break_labels;
@@ -442,7 +444,7 @@ struct varlock
if (c.obtained_locks.count(make_pair(v.qname(), true))) // write lock
throw semantic_error("incompatible nested locks obtained for "+v.qname());
-
+#if 0
if (!w) // read lock - no need to try, just do it
c.o->newline() << "read_lock (& " << v << "_lock);";
else
@@ -473,12 +475,14 @@ struct varlock
c.o->newline(-1) << "}";
c.o->newline(-1) << "}";
}
+#endif
c.obtained_locks.insert(make_pair(v.qname(), w));
}
~varlock()
{
if (v.is_local()) return;
+#if 0
c.o->newline() << (w ? "write_unlock" : "read_unlock")
<< " (& " << v << "_lock);";
if (w)
@@ -486,7 +490,7 @@ struct varlock
c.o->newline(-1) << post_unlock_label << ": ;";
c.o->indent(1);
}
-
+#endif
assert(c.obtained_locks.count(make_pair(v.qname(), w)) > 0);
c.obtained_locks.erase(c.obtained_locks.find(make_pair(v.qname(), w)));
}
@@ -1230,6 +1234,11 @@ c_unparser::emit_function (functiondecl* v)
void
c_unparser::emit_probe (derived_probe* v, unsigned i)
{
+ this->current_function = 0;
+ this->current_probe = v;
+ this->current_probenum = i;
+ this->tmpvar_counter = 0;
+
// o->newline() << "static void probe_" << i << " (struct context *c);";
o->newline() << "void probe_" << i << " (struct context * __restrict__ c) {";
o->indent(1);
@@ -1239,6 +1248,11 @@ c_unparser::emit_probe (derived_probe* v, unsigned i)
o->newline(1) << "& c->locals[c->nesting].probe_" << i << ";";
o->newline(-1) << "(void) l;"; // make sure "l" is marked used
+ // emit all read/write locks for global variables
+ varuse_collecting_visitor vut;
+ v->body->visit (& vut);
+ emit_locks (vut);
+
// initialize locals
for (unsigned j=0; j<v->locals.size(); j++)
{
@@ -1255,25 +1269,131 @@ c_unparser::emit_probe (derived_probe* v, unsigned i)
v->locals[j]->tok);
}
- this->current_function = 0;
- this->current_probe = v;
- this->current_probenum = i;
- this->tmpvar_counter = 0;
v->body->visit (this);
- this->current_probe = 0;
- this->current_probenum = 0; // not essential
o->newline(-1) << "out:";
// NB: no need to uninitialize locals, except if arrays can somedays be local
o->newline(1) << "_stp_print_flush();";
+ emit_unlocks (vut);
+
o->newline(-1) << "}\n";
+ this->current_probe = 0;
+ this->current_probenum = 0; // not essential
+
v->emit_probe_entries (o, i);
}
void
+c_unparser::emit_locks(const varuse_collecting_visitor& vut)
+{
+ o->newline() << "{";
+ o->newline(1) << "unsigned numtrylock = 0;";
+ o->newline() << "(void) numtrylock;";
+
+ string last_locked_var;
+ for (unsigned i = 0; i < session->globals.size(); i++)
+ {
+ vardecl* v = session->globals[i];
+ bool read_p = vut.read.find(v) != vut.read.end();
+ bool write_p = vut.written.find(v) != vut.written.end();
+ if (!read_p && !write_p) continue;
+
+ if (v->type == pe_stats) // read and write locks are flipped
+ // Specifically, a "<<<" to a stats object is considered a
+ // "shared-lock" operation, since it's implicitly done
+ // per-cpu. But a "@op(x)" extraction is an "exclusive-lock"
+ // one, as is a (sorted or unsorted) foreach, so those cases
+ // are excluded by the w & !r condition below.
+ {
+ if (write_p && !read_p) { read_p = true; write_p = false; }
+ else if (read_p && !write_p) { read_p = false; write_p = true; }
+ }
+
+ string lockcall =
+ string (write_p ? "write" : "read") +
+ "_trylock (& global_" + v->name + "_lock)";
+
+ o->newline() << "while (! " << lockcall
+ << "&& (++numtrylock < MAXTRYLOCK))";
+ o->newline(1) << "ndelay (TRYLOCKDELAY);";
+ o->newline(-1) << "if (unlikely (numtrylock >= MAXTRYLOCK)) {";
+ o->newline(1) << "atomic_inc (& skipped_count);";
+ // The following works even if i==0. Note that using
+ // globals[i-1]->name is wrong since that global may not have
+ // been lockworthy by this probe.
+ o->newline() << "goto unlock_" << last_locked_var << ";";
+ o->newline(-1) << "}";
+
+ last_locked_var = v->name;
+ }
+
+ o->newline(-1) << "}";
+}
+
+
+void
+c_unparser::emit_unlocks(const varuse_collecting_visitor& vut)
+{
+ unsigned numvars = 0;
+
+ if (session->verbose)
+ clog << "Probe #" << current_probenum << " locks ";
+
+ for (int i = session->globals.size()-1; i>=0; i--) // in reverse order!
+ {
+ vardecl* v = session->globals[i];
+ bool read_p = vut.read.find(v) != vut.read.end();
+ bool write_p = vut.written.find(v) != vut.written.end();
+ if (!read_p && !write_p) continue;
+
+ numvars ++;
+ o->newline(-1) << "unlock_" << v->name << ":";
+ o->indent(1);
+
+ // Duplicate lock flipping logic from above
+ if (v->type == pe_stats)
+ {
+ if (write_p && !read_p) { read_p = true; write_p = false; }
+ else if (read_p && !write_p) { read_p = false; write_p = true; }
+ }
+
+ if (session->verbose)
+ clog << v->name << "[" << (read_p ? "r" : "")
+ << (write_p ? "w" : "") << "] ";
+
+ if (write_p) // emit write lock
+ o->newline() << "write_unlock (& global_" << v->name << "_lock);";
+ else // (read_p && !write_p) : emit read lock
+ o->newline() << "read_unlock (& global_" << v->name << "_lock);";
+
+ // fall through to next variable; thus the reverse ordering
+ }
+
+ // emit plain "unlock" label, used if the very first lock failed.
+ o->newline(-1) << "unlock_: ;";
+ o->indent(1);
+
+ if (numvars) // is there a chance that any lock attempt failed?
+ {
+ o->newline() << "if (atomic_read (& skipped_count) > MAXSKIPPED) {";
+ // XXX: In this known non-reentrant context, we could print a more
+ // informative error.
+ o->newline(1) << "atomic_set (& session_state, STAP_SESSION_ERROR);";
+ o->newline() << "_stp_exit();";
+ o->newline(-1) << "}";
+
+ if (session->verbose)
+ clog << endl;
+ }
+ else if (session->verbose)
+ clog << "nothing" << endl;
+}
+
+
+void
c_unparser::collect_map_index_types(vector<vardecl *> const & vars,
set< pair<vector<exp_type>, exp_type> > & types)
{
@@ -3742,6 +3862,11 @@ translate_pass (systemtap_session& s)
s.op->newline() << "#include <linux/delay.h>";
s.op->newline() << "#include <linux/profile.h>";
s.op->newline() << "#include \"loc2c-runtime.h\" ";
+
+ // XXX: old 2.6 kernel hack
+ s.op->newline() << "#ifndef read_trylock";
+ s.op->newline() << "#define read_trylock(x) (read_lock(x),1)";
+ s.op->newline() << "#endif";
s.up->emit_common_header ();