diff options
-rw-r--r-- | ChangeLog | 21 | ||||
-rw-r--r-- | elaborate.cxx | 1 | ||||
-rw-r--r-- | staptree.cxx | 48 | ||||
-rw-r--r-- | staptree.h | 4 | ||||
-rw-r--r-- | tapsets.cxx | 6 | ||||
-rw-r--r-- | translate.cxx | 141 |
6 files changed, 209 insertions, 12 deletions
@@ -1,3 +1,24 @@ +2006-01-26 Frank Ch. Eigler <fche@elastic.org> + + PR 2060: lock elevation, mop-up + * staptree.cxx (functioncall_traversing_visitor): Store a + current_function pointer during traversal. + (visit_embeddedcode): Use it to handle $target-synthesized functions. + (varuse_collecting_visitor::visit_assignment): Correct l-lr typo. + (visit_foreach_loop): Note added write on sorted foreach. + (visit_delete_statement): Note as read+write. + * staptree.h: Corresponding changes. + * elaborate.cxx (dead_assignment_remover::visit_expr_statement): + Correct stmt token after possible expression rewriting. + * tapsets.cxx (visit_target_symbol): Create naming convention + to recognize $target-synthesized functions. + * translate.cxx (emit_locks, emit_unlocks): New functions to + emit lock/unlock sequences at the outermost level of a probe. + (emit_probe): Call them. + (varlock_*): #if-0 out the lock code generation. Later, these + classes should be removed. + (translate_pass): Emit read_trylock() kludge macro for old kernels. + 2006-01-25 Frank Ch. Eigler <fche@elastic.org> PR 2205, patch from <hiramatu@sdl.hitachi.co.jp>: diff --git a/elaborate.cxx b/elaborate.cxx index 613d85ae..b0f784d4 100644 --- a/elaborate.cxx +++ b/elaborate.cxx @@ -1303,6 +1303,7 @@ dead_assignment_remover::visit_expr_statement (expr_statement* s) expression** last_expr = current_expr; current_expr = & s->value; s->value->visit (this); + s->tok = s->value->tok; // in case it was replaced current_expr = last_expr; } diff --git a/staptree.cxx b/staptree.cxx index 97b4bcc1..b4398312 100644 --- a/staptree.cxx +++ b/staptree.cxx @@ -1460,7 +1460,10 @@ functioncall_traversing_visitor::visit_functioncall (functioncall* e) { traversed.insert (e->referent); // recurse + functiondecl* last_current_function = current_function; + current_function = e->referent; e->referent->body->visit (this); + current_function = last_current_function; } } @@ -1468,6 +1471,19 @@ functioncall_traversing_visitor::visit_functioncall (functioncall* e) void varuse_collecting_visitor::visit_embeddedcode (embeddedcode *s) { + // In order to elide unused but correct functions generated to + // get/set $target variables, we encode our knowledge that such + // functions are side-effect-free. We tell them apart from ordinary + // tapset embedded-C functions by the naming prefix. XXX Something + // apart from this heuristic would be nice. XXX Similarly, some + // tapset embedded-C functions are pure and disposable, like + // substr(). + + assert (current_function); // only they get embedded code + string name = current_function->name; + if (name.length() > 6 && name.substr(0, 6) == "_tvar_") + return; + embedded_seen = true; } @@ -1489,7 +1505,7 @@ varuse_collecting_visitor::visit_assignment (assignment *e) { if (e->op == "=" || e->op == "<<<") // pure writes { - expression* last_lvalue = current_lrvalue; + expression* last_lvalue = current_lvalue; current_lvalue = e->left; // leave a mark for ::visit_symbol functioncall_traversing_visitor::visit_assignment (e); current_lvalue = last_lvalue; @@ -1581,9 +1597,37 @@ varuse_collecting_visitor::visit_post_crement (post_crement *e) current_lrvalue = last_lrvalue; } +void +varuse_collecting_visitor::visit_foreach_loop (foreach_loop* s) +{ + functioncall_traversing_visitor::visit_foreach_loop (s); + // If the collection is sorted, imply a "write" access to the + // array in addition to the "read" one already noted in the + // base class call above. + if (s->sort_direction) + { + symbol *array = NULL; + hist_op *hist = NULL; + classify_indexable (s->base, array, hist); + if (array) this->written.insert (array->referent); + // XXX: Can hist_op iterations be sorted? + } +} - +void +varuse_collecting_visitor::visit_delete_statement (delete_statement* s) +{ + // Ideally, this would be treated like an assignment: a plain write + // to the underlying value ("lvalue"). XXX: However, the + // optimization pass is not smart enough to remove an unneeded + // "delete" yet, so we pose more like a *crement ("lrvalue"). This + // should protect the underlying value from optimizional mischief. + expression* last_lrvalue = current_lrvalue; + current_lrvalue = s->value; // leave a mark for ::visit_symbol + functioncall_traversing_visitor::visit_delete_statement (s); + current_lrvalue = last_lrvalue; +} // ------------------------------------------------------------------------ @@ -676,6 +676,8 @@ struct traversing_visitor: public visitor struct functioncall_traversing_visitor: public traversing_visitor { std::set<functiondecl*> traversed; + functiondecl* current_function; + functioncall_traversing_visitor(): current_function(0) {} void visit_functioncall (functioncall* e); }; @@ -696,12 +698,14 @@ struct varuse_collecting_visitor: public functioncall_traversing_visitor current_lvalue(0), current_lrvalue(0) {} void visit_embeddedcode (embeddedcode *s); + void visit_delete_statement (delete_statement *s); void visit_print_format (print_format *e); void visit_assignment (assignment *e); void visit_arrayindex (arrayindex *e); void visit_symbol (symbol *e); void visit_pre_crement (pre_crement *e); void visit_post_crement (post_crement *e); + void visit_foreach_loop (foreach_loop *s); }; diff --git a/tapsets.cxx b/tapsets.cxx index 835c426f..91740520 100644 --- a/tapsets.cxx +++ b/tapsets.cxx @@ -1040,7 +1040,7 @@ dwflpp clog << "finding prologue for '" << last_function->name << "' entrypc=0x" << hex << addr - << " highpc=0x" << last_function_highpc + << " highpc=0x" << last_function_highpc << dec << "\n"; } } @@ -2619,7 +2619,9 @@ var_expanding_copy_visitor::visit_target_symbol (target_symbol *e) if (lvalue && !q.sess.guru_mode) throw semantic_error("write to target variable not permitted", e->tok); - string fname = (string(lvalue ? "set" : "get") + // NB: This naming convention is used by varuse_collecting_visitor + // to make elision of these functions possible. + string fname = (string(lvalue ? "_tvar_set" : "_tvar_get") + "_" + e->base_name.substr(1) + "_" + lex_cast<string>(tick++)); diff --git a/translate.cxx b/translate.cxx index 433cf23a..36504a2d 100644 --- a/translate.cxx +++ b/translate.cxx @@ -85,7 +85,9 @@ struct c_unparser: public unparser, public visitor void emit_module_init (); void emit_module_exit (); void emit_function (functiondecl* v); + void emit_locks (const varuse_collecting_visitor& v); void emit_probe (derived_probe* v, unsigned i); + void emit_unlocks (const varuse_collecting_visitor& v); // for use by looping constructs vector<string> loop_break_labels; @@ -442,7 +444,7 @@ struct varlock if (c.obtained_locks.count(make_pair(v.qname(), true))) // write lock throw semantic_error("incompatible nested locks obtained for "+v.qname()); - +#if 0 if (!w) // read lock - no need to try, just do it c.o->newline() << "read_lock (& " << v << "_lock);"; else @@ -473,12 +475,14 @@ struct varlock c.o->newline(-1) << "}"; c.o->newline(-1) << "}"; } +#endif c.obtained_locks.insert(make_pair(v.qname(), w)); } ~varlock() { if (v.is_local()) return; +#if 0 c.o->newline() << (w ? "write_unlock" : "read_unlock") << " (& " << v << "_lock);"; if (w) @@ -486,7 +490,7 @@ struct varlock c.o->newline(-1) << post_unlock_label << ": ;"; c.o->indent(1); } - +#endif assert(c.obtained_locks.count(make_pair(v.qname(), w)) > 0); c.obtained_locks.erase(c.obtained_locks.find(make_pair(v.qname(), w))); } @@ -1230,6 +1234,11 @@ c_unparser::emit_function (functiondecl* v) void c_unparser::emit_probe (derived_probe* v, unsigned i) { + this->current_function = 0; + this->current_probe = v; + this->current_probenum = i; + this->tmpvar_counter = 0; + // o->newline() << "static void probe_" << i << " (struct context *c);"; o->newline() << "void probe_" << i << " (struct context * __restrict__ c) {"; o->indent(1); @@ -1239,6 +1248,11 @@ c_unparser::emit_probe (derived_probe* v, unsigned i) o->newline(1) << "& c->locals[c->nesting].probe_" << i << ";"; o->newline(-1) << "(void) l;"; // make sure "l" is marked used + // emit all read/write locks for global variables + varuse_collecting_visitor vut; + v->body->visit (& vut); + emit_locks (vut); + // initialize locals for (unsigned j=0; j<v->locals.size(); j++) { @@ -1255,25 +1269,131 @@ c_unparser::emit_probe (derived_probe* v, unsigned i) v->locals[j]->tok); } - this->current_function = 0; - this->current_probe = v; - this->current_probenum = i; - this->tmpvar_counter = 0; v->body->visit (this); - this->current_probe = 0; - this->current_probenum = 0; // not essential o->newline(-1) << "out:"; // NB: no need to uninitialize locals, except if arrays can somedays be local o->newline(1) << "_stp_print_flush();"; + emit_unlocks (vut); + o->newline(-1) << "}\n"; + this->current_probe = 0; + this->current_probenum = 0; // not essential + v->emit_probe_entries (o, i); } void +c_unparser::emit_locks(const varuse_collecting_visitor& vut) +{ + o->newline() << "{"; + o->newline(1) << "unsigned numtrylock = 0;"; + o->newline() << "(void) numtrylock;"; + + string last_locked_var; + for (unsigned i = 0; i < session->globals.size(); i++) + { + vardecl* v = session->globals[i]; + bool read_p = vut.read.find(v) != vut.read.end(); + bool write_p = vut.written.find(v) != vut.written.end(); + if (!read_p && !write_p) continue; + + if (v->type == pe_stats) // read and write locks are flipped + // Specifically, a "<<<" to a stats object is considered a + // "shared-lock" operation, since it's implicitly done + // per-cpu. But a "@op(x)" extraction is an "exclusive-lock" + // one, as is a (sorted or unsorted) foreach, so those cases + // are excluded by the w & !r condition below. + { + if (write_p && !read_p) { read_p = true; write_p = false; } + else if (read_p && !write_p) { read_p = false; write_p = true; } + } + + string lockcall = + string (write_p ? "write" : "read") + + "_trylock (& global_" + v->name + "_lock)"; + + o->newline() << "while (! " << lockcall + << "&& (++numtrylock < MAXTRYLOCK))"; + o->newline(1) << "ndelay (TRYLOCKDELAY);"; + o->newline(-1) << "if (unlikely (numtrylock >= MAXTRYLOCK)) {"; + o->newline(1) << "atomic_inc (& skipped_count);"; + // The following works even if i==0. Note that using + // globals[i-1]->name is wrong since that global may not have + // been lockworthy by this probe. + o->newline() << "goto unlock_" << last_locked_var << ";"; + o->newline(-1) << "}"; + + last_locked_var = v->name; + } + + o->newline(-1) << "}"; +} + + +void +c_unparser::emit_unlocks(const varuse_collecting_visitor& vut) +{ + unsigned numvars = 0; + + if (session->verbose) + clog << "Probe #" << current_probenum << " locks "; + + for (int i = session->globals.size()-1; i>=0; i--) // in reverse order! + { + vardecl* v = session->globals[i]; + bool read_p = vut.read.find(v) != vut.read.end(); + bool write_p = vut.written.find(v) != vut.written.end(); + if (!read_p && !write_p) continue; + + numvars ++; + o->newline(-1) << "unlock_" << v->name << ":"; + o->indent(1); + + // Duplicate lock flipping logic from above + if (v->type == pe_stats) + { + if (write_p && !read_p) { read_p = true; write_p = false; } + else if (read_p && !write_p) { read_p = false; write_p = true; } + } + + if (session->verbose) + clog << v->name << "[" << (read_p ? "r" : "") + << (write_p ? "w" : "") << "] "; + + if (write_p) // emit write lock + o->newline() << "write_unlock (& global_" << v->name << "_lock);"; + else // (read_p && !write_p) : emit read lock + o->newline() << "read_unlock (& global_" << v->name << "_lock);"; + + // fall through to next variable; thus the reverse ordering + } + + // emit plain "unlock" label, used if the very first lock failed. + o->newline(-1) << "unlock_: ;"; + o->indent(1); + + if (numvars) // is there a chance that any lock attempt failed? + { + o->newline() << "if (atomic_read (& skipped_count) > MAXSKIPPED) {"; + // XXX: In this known non-reentrant context, we could print a more + // informative error. + o->newline(1) << "atomic_set (& session_state, STAP_SESSION_ERROR);"; + o->newline() << "_stp_exit();"; + o->newline(-1) << "}"; + + if (session->verbose) + clog << endl; + } + else if (session->verbose) + clog << "nothing" << endl; +} + + +void c_unparser::collect_map_index_types(vector<vardecl *> const & vars, set< pair<vector<exp_type>, exp_type> > & types) { @@ -3742,6 +3862,11 @@ translate_pass (systemtap_session& s) s.op->newline() << "#include <linux/delay.h>"; s.op->newline() << "#include <linux/profile.h>"; s.op->newline() << "#include \"loc2c-runtime.h\" "; + + // XXX: old 2.6 kernel hack + s.op->newline() << "#ifndef read_trylock"; + s.op->newline() << "#define read_trylock(x) (read_lock(x),1)"; + s.op->newline() << "#endif"; s.up->emit_common_header (); |