summaryrefslogtreecommitdiffstats
path: root/staptree.cxx
diff options
context:
space:
mode:
authorfche <fche>2005-03-02 01:28:50 +0000
committerfche <fche>2005-03-02 01:28:50 +0000
commit56099f083d7a68722ace316be4d288d21caabaee (patch)
tree3e67ec78134a358c1f90f701c165c4c577d62177 /staptree.cxx
parent2f1a1aead38c1dcd329a694dd8d3290b37320466 (diff)
downloadsystemtap-steved-56099f083d7a68722ace316be4d288d21caabaee.tar.gz
systemtap-steved-56099f083d7a68722ace316be4d288d21caabaee.tar.xz
systemtap-steved-56099f083d7a68722ace316be4d288d21caabaee.zip
* some semantic analysis
2005-03-01 Frank Ch. Eigler <fche@redhat.com> * parse.cxx: Implement left-associativity for several types of operators. Add some more statement types. Parse functions. Be able to print tokens. Simplify error generating functions. Save tokens in all parse tree nodes. * parse.h: Corresponding changes. * staptree.cxx: Move tree-printing functions here. Add many new functions for symbol and type resolution. * staptree.h: Corresponding changes. * semtest.cxx: New semantic analysis pass & test driver. * testsuite/sem*/*: New tests. * parsetest.cxx: Separated parse test driver. * testsuite/parse*/*: Adapt tests to parsetest driver. * Makefile.am: Build semtest. Run its tests. * Makefile.in: Regenerated. * parse.cxx, parse.h: New files: parser.
Diffstat (limited to 'staptree.cxx')
-rw-r--r--staptree.cxx901
1 files changed, 880 insertions, 21 deletions
diff --git a/staptree.cxx b/staptree.cxx
index 88067765..34a1d0d3 100644
--- a/staptree.cxx
+++ b/staptree.cxx
@@ -1,44 +1,903 @@
-// toy driver
+// parse tree functions
// Copyright 2005 Red Hat Inc.
// GPL
#include "staptree.h"
#include "parse.h"
#include <iostream>
+#include <typeinfo>
+#include <cassert>
+expression::expression ():
+ type (pe_unknown), tok (0)
+{
+}
+
+
+expression::~expression ()
+{
+}
+
+
+statement::statement ():
+ tok (0)
+{
+}
+
+
+statement::~statement ()
+{
+}
+
+
+symbol::symbol ():
+ referent (0)
+{
+}
+
+
+arrayindex::arrayindex ():
+ referent (0)
+{
+}
+
+
+functioncall::functioncall ():
+ referent (0)
+{
+}
+
+
+symboldecl::symboldecl ():
+ tok (0),
+ type (pe_unknown)
+{
+}
+
+
+symboldecl::~symboldecl ()
+{
+}
+
+
+vardecl::vardecl ()
+{
+}
+
+
+vardecl::vardecl (unsigned arity)
+{
+ index_types.resize (arity);
+ for (unsigned i=0; i<arity; i++)
+ index_types[i] = pe_unknown;
+}
+
+
+functiondecl::functiondecl ():
+ body (0)
+{
+}
+
+
+literal_number::literal_number (long v)
+{
+ value = v;
+ type = pe_long;
+}
+
-expression::~expression () {}
-statement::~statement () {}
+literal_string::literal_string (const string& v)
+{
+ value = v;
+ type = pe_string;
+}
+
+
+ostream&
+operator << (ostream& o, const exp_type& e)
+{
+ switch (e)
+ {
+ case pe_unknown: o << "unknown"; break;
+ case pe_long: o << "long"; break;
+ case pe_string: o << "string"; break;
+ case pe_stats: o << "stats"; break;
+ default: o << "???"; break;
+ }
+ return o;
+}
+
+
+// ------------------------------------------------------------------------
+// parse tree printing
+
+ostream& operator << (ostream& o, expression& k)
+{
+ k.print (o);
+ return o;
+}
+
+
+void literal_string::print (ostream& o)
+{
+ o << '"' << value << '"';
+}
+
+void literal_number::print (ostream& o)
+{
+ o << value;
+}
+
+
+void binary_expression::print (ostream& o)
+{
+ o << '(' << *left << ")"
+ << op
+ << '(' << *right << ")";
+}
-int main (int argc, char *argv [])
+void unary_expression::print (ostream& o)
{
- int rc = 0;
+ o << op << '(' << *operand << ")";
+}
+
+
+void post_crement::print (ostream& o)
+{
+ o << '(' << *operand << ")" << op;
+}
+
+
+void ternary_expression::print (ostream& o)
+{
+ o << "(" << *cond << ") ? ("
+ << *truevalue << ") : ("
+ << *falsevalue << ")";
+}
+
+
+void symbol::print (ostream& o)
+{
+ o << name;
+}
+
+
+void vardecl::print (ostream& o)
+{
+ o << name;
+ if (index_types.size() > 0)
+ o << "[...]";
+}
+
- if (argc > 1)
+void vardecl::printsig (ostream& o)
+{
+ o << name << ":" << type;
+ if (index_types.size() > 0)
{
- // quietly parse all listed input files
- for (int i = 1; i < argc; i ++)
+ o << " [";
+ for (unsigned i=0; i<index_types.size(); i++)
+ o << (i>0 ? ", " : "") << index_types[i];
+ o << "]";
+ }
+}
+
+
+void functiondecl::print (ostream& o)
+{
+ o << "function " << name << " (";
+ for (unsigned i=0; i<formal_args.size(); i++)
+ o << (i>0 ? ", " : "") << *formal_args[i];
+ o << ")" << endl;
+ body->print(o);
+}
+
+
+void functiondecl::printsig (ostream& o)
+{
+ o << name << ":" << type << " (";
+ for (unsigned i=0; i<formal_args.size(); i++)
+ o << (i>0 ? ", " : "")
+ << *formal_args[i]
+ << ":"
+ << formal_args[i]->type;
+ o << ")";
+}
+
+
+void arrayindex::print (ostream& o)
+{
+ o << base << "[";
+ for (unsigned i=0; i<indexes.size(); i++)
+ o << (i>0 ? ", " : "") << *indexes[i];
+ o << "]";
+}
+
+
+void functioncall::print (ostream& o)
+{
+ o << function << "(";
+ for (unsigned i=0; i<args.size(); i++)
+ o << (i>0 ? ", " : "") << *args[i];
+ o << ")";
+}
+
+
+ostream& operator << (ostream& o, statement& k)
+{
+ k.print (o);
+ return o;
+}
+
+
+void block::print (ostream& o)
+{
+ o << "{" << endl;
+ for (unsigned i=0; i<statements.size(); i++)
+ o << *statements [i] << ";" << endl;
+ o << "}" << endl;
+}
+
+
+void for_loop::print (ostream& o)
+{
+ o << "<for_loop>" << endl;
+}
+
+
+void null_statement::print (ostream& o)
+{
+ o << ";";
+}
+
+
+void expr_statement::print (ostream& o)
+{
+ o << *value;
+}
+
+
+void return_statement::print (ostream& o)
+{
+ o << "return " << *value;
+}
+
+
+void delete_statement::print (ostream& o)
+{
+ o << "delete " << *value;
+}
+
+
+void if_statement::print (ostream& o)
+{
+ o << "if (" << *condition << ") " << endl
+ << *thenblock << endl;
+ if (elseblock)
+ o << "else " << *elseblock << endl;
+}
+
+
+void stapfile::print (ostream& o)
+{
+ o << "# file " << name << endl;
+
+ for(unsigned i=0; i<probes.size(); i++)
+ {
+ probes[i]->print (o);
+ o << endl;
+ }
+
+ for (unsigned j = 0; j < functions.size(); j++)
+ {
+ functions[j]->print (o);
+ o << endl;
+ }
+}
+
+
+void probe::print (ostream& o)
+{
+ o << "probe ";
+ for (unsigned i=0; i<location.size(); i++)
+ {
+ o << (i>0 ? ":" : "");
+ location[i]->print (o);
+ }
+ o << endl;
+ o << *body;
+}
+
+
+void probe_point_spec::print (ostream& o)
+{
+ o << functor;
+ if (arg)
+ o << "(" << *arg << ")";
+}
+
+
+ostream& operator << (ostream& o, symboldecl& k)
+{
+ k.print (o);
+ return o;
+}
+
+
+// ------------------------------------------------------------------------
+// semantic processing: symbol resolution
+
+
+symresolution_info::symresolution_info (vector<vardecl*>& l,
+ vector<vardecl*>& g,
+ vector<stapfile*>& f,
+ stapfile* tf):
+ locals (l), globals (g), files (f), current_file (tf), current_function (0)
+{
+ num_unresolved = 0;
+}
+
+
+symresolution_info::symresolution_info (vector<vardecl*>& l,
+ vector<vardecl*>& g,
+ vector<stapfile*>& f,
+ stapfile* tf,
+ functiondecl* cf):
+ locals (l), globals (g), files (f), current_file (tf), current_function (cf)
+{
+ num_unresolved = 0;
+}
+
+
+void
+literal::resolve_symbols (symresolution_info& r)
+{
+}
+
+
+void
+binary_expression::resolve_symbols (symresolution_info& r)
+{
+ left->resolve_symbols (r);
+ right->resolve_symbols (r);
+}
+
+
+void
+unary_expression::resolve_symbols (symresolution_info& r)
+{
+ operand->resolve_symbols (r);
+}
+
+
+void
+ternary_expression::resolve_symbols (symresolution_info& r)
+{
+ cond->resolve_symbols (r);
+ truevalue->resolve_symbols (r);
+ falsevalue->resolve_symbols (r);
+}
+
+
+void
+symbol::resolve_symbols (symresolution_info& r)
+{
+ if (referent)
+ return;
+
+ vardecl* d = r.find (name);
+ if (d)
+ referent = d;
+ else
+ {
+ // new local
+ vardecl* v = new vardecl;
+ v->name = name;
+ v->tok = tok;
+ r.locals.push_back (v);
+ referent = v;
+ // XXX: check for conflicting function name
+ }
+}
+
+
+void
+arrayindex::resolve_symbols (symresolution_info& r)
+{
+ for (unsigned i=0; i<indexes.size(); i++)
+ indexes[i]->resolve_symbols (r);
+
+ if (referent)
+ return;
+
+ vardecl* d = r.find (base);
+ if (d)
+ referent = d;
+ else
+ {
+ // new local
+ vardecl* v = new vardecl (indexes.size());
+ v->name = base;
+ v->tok = tok;
+ r.locals.push_back (v);
+ referent = v;
+ // XXX: check for conflicting function name
+ }
+}
+
+
+void
+functioncall::resolve_symbols (symresolution_info& r)
+{
+ for (unsigned i=0; i<args.size(); i++)
+ args[i]->resolve_symbols (r);
+
+ if (referent)
+ return;
+
+ // find global functiondecl
+ functiondecl* d = 0;
+ for (unsigned j = 0; j < r.current_file->functions.size(); j++)
+ {
+ functiondecl* fd = r.current_file->functions[j];
+ if (fd->name == function)
{
- parser p (argv[i]);
- stapfile* f = p.parse ();
- if (f)
- cout << "file '" << argv[i] << "' parsed ok." << endl;
- else
- rc = 1;
+ d = fd;
+ break;
}
}
+ // XXX: check for conflicting variable name
+
+ if (d)
+ referent = d;
+ else
+ r.unresolved (tok);
+}
+
+
+void
+block::resolve_symbols (symresolution_info& r)
+{
+ for (unsigned i=0; i<statements.size(); i++)
+ statements[i]->resolve_symbols (r);
+}
+
+
+void
+if_statement::resolve_symbols (symresolution_info& r)
+{
+ condition->resolve_symbols (r);
+ thenblock->resolve_symbols (r);
+ elseblock->resolve_symbols (r);
+}
+
+
+void
+for_loop::resolve_symbols (symresolution_info& r)
+{
+ init->resolve_symbols (r);
+ cond->resolve_symbols (r);
+ incr->resolve_symbols (r);
+ block->resolve_symbols (r);
+}
+
+
+void
+expr_statement::resolve_symbols (symresolution_info& r)
+{
+ value->resolve_symbols (r);
+}
+
+
+vardecl*
+symresolution_info::find (const string& name)
+{
+ // search locals
+ for (unsigned i=0; i<locals.size(); i++)
+ if (locals[i]->name == name)
+ return locals[i];
+
+ // search function formal parameters (if any)
+ if (current_function)
+ {
+ for (unsigned i=0; i<current_function->formal_args.size(); i++)
+ if (current_function->formal_args [i]->name == name)
+ return current_function->formal_args [i];
+ }
+
+ // search globals
+ for (unsigned i=0; i<globals.size(); i++)
+ if (globals[i]->name == name)
+ return globals[i];
+
+ return 0;
+}
+
+
+void
+symresolution_info::unresolved (const token* tok)
+{
+ num_unresolved ++;
+
+ cerr << "error: unresolved symbol for ";
+ if (tok)
+ cerr << *tok;
+ else
+ cerr << "a token";
+ cerr << endl;
+}
+
+
+// ------------------------------------------------------------------------
+// semantic processing: type resolution
+
+
+void
+literal::resolve_types (typeresolution_info& r, exp_type t)
+{
+ assert (type == pe_long || type == pe_string);
+ if ((t == type) || (t == pe_unknown))
+ return;
+
+ r.mismatch (tok, type, t);
+}
+
+
+void
+binary_expression::resolve_types (typeresolution_info& r, exp_type t)
+{
+ if (op == "<<")
+ {
+ left->resolve_types (r, pe_stats);
+ right->resolve_types (r, pe_long);
+ if (t == pe_long || t == pe_string)
+ r.mismatch (tok, t, pe_stats);
+ else if (type == pe_unknown)
+ {
+ type = pe_stats;
+ r.resolved (tok, type);
+ }
+ }
+ else if (op == ".") // string concatenation
+ {
+ left->resolve_types (r, pe_string);
+ right->resolve_types (r, pe_string);
+ if (t == pe_long || t == pe_stats)
+ r.mismatch (tok, t, pe_string);
+ else if (type == pe_unknown)
+ {
+ type = pe_string;
+ r.resolved (tok, type);
+ }
+ }
+ else if (op == "==") // XXX: other comparison operators
+ {
+ left->resolve_types (r, pe_unknown);
+ right->resolve_types (r, pe_unknown);
+ if (t == pe_string || t == pe_stats)
+ r.mismatch (tok, t, pe_long);
+ else if (type == pe_unknown)
+ {
+ type = pe_long;
+ r.resolved (tok, type);
+ }
+ }
+ else // general arithmetic operators?
+ {
+ // propagate type downward
+ exp_type subtype = t;
+ if ((t == pe_unknown) && (type != pe_unknown))
+ subtype = type;
+ left->resolve_types (r, subtype);
+ right->resolve_types (r, subtype);
+
+ if ((t == pe_unknown) && (type != pe_unknown))
+ ; // already resolved
+ else if ((t != pe_unknown) && (type == pe_unknown))
+ {
+ type = t;
+ r.resolved (tok, type);
+ }
+ else if ((t == pe_unknown) && (left->type != pe_unknown))
+ {
+ type = left->type;
+ r.resolved (tok, type);
+ }
+ else if ((t == pe_unknown) && (right->type != pe_unknown))
+ {
+ type = right->type;
+ r.resolved (tok, type);
+ }
+ else if (type != t)
+ r.mismatch (tok, t, type);
+ }
+}
+
+
+void
+unary_expression::resolve_types (typeresolution_info& r, exp_type t)
+{
+ // all unary operators only work on numerics
+
+ operand->resolve_types (r, pe_long);
+
+ if (t == pe_unknown && type != pe_unknown)
+ ; // already resolved
+ else if (t == pe_string || t == pe_stats)
+ r.mismatch (tok, t, pe_long);
+ else if (type == pe_unknown)
+ {
+ type = pe_long;
+ r.resolved (tok, type);
+ }
+}
+
+
+void
+ternary_expression::resolve_types (typeresolution_info& r, exp_type t)
+{
+ cond->resolve_types (r, pe_long);
+ truevalue->resolve_types (r, t);
+ falsevalue->resolve_types (r, t);
+}
+
+
+template <class Referrer, class Referent>
+void resolve_2types (Referrer* referrer, Referent* referent,
+ typeresolution_info& r, exp_type t)
+{
+ exp_type& rtype = referrer->type;
+ const token* rtok = referrer->tok;
+ exp_type& ttype = referent->type;
+ const token* ttok = referent->tok;
+
+ if (t != pe_unknown && rtype == t && rtype == ttype)
+ ; // do nothing: all three types in agreement
+ else if (t == pe_unknown && rtype != pe_unknown && rtype == ttype)
+ ; // do nothing: two known types in agreement
+ else if (rtype != pe_unknown && ttype != pe_unknown && rtype != ttype)
+ r.mismatch (rtok, rtype, ttype);
+ else if (rtype != pe_unknown && t != pe_unknown && rtype != t)
+ r.mismatch (rtok, rtype, t);
+ else if (ttype != pe_unknown && t != pe_unknown && ttype != t)
+ r.mismatch (ttok, ttype, t);
+ else if (rtype == pe_unknown && t != pe_unknown)
+ {
+ // propagate from upstream
+ rtype = t;
+ r.resolved (rtok, rtype);
+ // catch rtype/ttype mismatch later
+ }
+ else if (rtype == pe_unknown && ttype != pe_unknown)
+ {
+ // propagate from referent
+ rtype = ttype;
+ r.resolved (rtok, rtype);
+ // catch rtype/t mismatch later
+ }
+ else if (rtype != pe_unknown && ttype == pe_unknown)
+ {
+ // propagate to referent
+ ttype = rtype;
+ r.resolved (ttok, ttype);
+ // catch rtype/t mismatch later
+ }
+ else
+ r.unresolved (rtok);
+}
+
+
+void
+symbol::resolve_types (typeresolution_info& r, exp_type t)
+{
+ assert (referent != 0);
+
+ if (referent->index_types.size() > 0)
+ r.unresolved (tok); // array
else
+ resolve_2types (this, referent, r, t);
+}
+
+
+void
+arrayindex::resolve_types (typeresolution_info& r, exp_type t)
+{
+ assert (referent != 0);
+
+ resolve_2types (this, referent, r, t);
+
+ // now resolve the array indexes
+ if (referent->index_types.size() == 0)
+ {
+ // designate referent as array
+ referent->index_types.resize (indexes.size());
+ for (unsigned i=0; i<indexes.size(); i++)
+ referent->index_types[i] = pe_unknown;
+ // NB: we "fall through" to for loop
+ }
+
+ if (indexes.size() != referent->index_types.size())
+ r.unresolved (tok);
+ else for (unsigned i=0; i<indexes.size(); i++)
+ {
+ expression* e = indexes[i];
+ e->resolve_types (r, referent->index_types[i]);
+ exp_type it = e->type;
+ referent->index_types[i] = it;
+
+ if (it == pe_string || it == pe_long)
+ ; // do nothing
+ else if (it == pe_stats)
+ r.invalid (e->tok, it);
+ else // pe_unknown
+ r.unresolved (e->tok);
+ }
+}
+
+
+void
+functioncall::resolve_types (typeresolution_info& r, exp_type t)
+{
+ assert (referent != 0);
+
+ resolve_2types (this, referent, r, t);
+
+ if (type == pe_stats)
+ r.mismatch (tok, pe_unknown, type);
+
+ // XXX: but what about functions that return no value,
+ // and are used only as an expression-statement for side effects?
+
+ // now resolve the function parameters
+ if (args.size() != referent->formal_args.size())
+ r.unresolved (tok);
+ for (unsigned i=0; i<args.size(); i++)
{
- // parse then print just stdin
- parser p (cin);
- stapfile* f = p.parse ();
- if (f)
- f->print (cout);
+ expression* e = args[i];
+ exp_type& ft = referent->formal_args[i]->type;
+ const token* ftok = referent->formal_args[i]->tok;
+ e->resolve_types (r, ft);
+ exp_type at = e->type;
+
+ if (((at == pe_string) || (at == pe_long)) && ft == pe_unknown)
+ {
+ // propagate to formal arg
+ ft = at;
+ r.resolved (referent->formal_args[i]->tok, ft);
+ }
+ if (at == pe_stats)
+ r.invalid (e->tok, at);
+ if (ft == pe_stats)
+ r.invalid (ftok, ft);
+ if (at != pe_unknown && ft != pe_unknown && ft != at)
+ r.mismatch (e->tok, at, ft);
+ if (at == pe_unknown)
+ r.unresolved (e->tok);
+ }
+}
+
+
+void
+block::resolve_types (typeresolution_info& r)
+{
+ for (unsigned i=0; i<statements.size(); i++)
+ statements[i]->resolve_types (r);
+}
+
+
+void
+if_statement::resolve_types (typeresolution_info& r)
+{
+ condition->resolve_types (r, pe_long);
+ thenblock->resolve_types (r);
+ elseblock->resolve_types (r);
+}
+
+
+void
+for_loop::resolve_types (typeresolution_info& r)
+{
+ init->resolve_types (r, pe_unknown);
+ cond->resolve_types (r, pe_long);
+ incr->resolve_types (r, pe_unknown);
+ block->resolve_types (r);
+}
+
+
+void
+expr_statement::resolve_types (typeresolution_info& r)
+{
+ value->resolve_types (r, pe_unknown);
+}
+
+
+void
+return_statement::resolve_types (typeresolution_info& r)
+{
+ // This is like symbol::resolve_types, where the referent is
+ // the return value of the function.
+
+ // XXX: need control flow semantic checking; until then:
+ if (r.current_function == 0)
+ {
+ r.unresolved (tok);
+ return;
+ }
+
+ exp_type& type = r.current_function->type;
+ value->resolve_types (r, type);
+
+ if (type != pe_unknown && value->type != pe_unknown
+ && type != value->type)
+ r.mismatch (r.current_function->tok, type, value->type);
+ if (type == pe_unknown &&
+ (value->type == pe_long || value->type == pe_string))
+ {
+ // propagate non-statistics from value
+ type = value->type;
+ r.resolved (r.current_function->tok, value->type);
+ }
+ if (value->type == pe_stats)
+ r.invalid (value->tok, value->type);
+}
+
+
+void
+typeresolution_info::unresolved (const token* tok)
+{
+ num_still_unresolved ++;
+
+ if (assert_resolvability)
+ {
+ cerr << "error: unresolved type for ";
+ if (tok)
+ cerr << *tok;
else
- rc = 1;
+ cerr << "a token";
+ cerr << endl;
}
+}
+
- return rc;
+void
+typeresolution_info::invalid (const token* tok, exp_type pe)
+{
+ num_still_unresolved ++;
+
+ if (assert_resolvability)
+ {
+ cerr << "error: invalid type " << pe << " for ";
+ if (tok)
+ cerr << *tok;
+ else
+ cerr << "a token";
+ cerr << endl;
+ }
+}
+
+
+void
+typeresolution_info::mismatch (const token* tok, exp_type t1, exp_type t2)
+{
+ num_still_unresolved ++;
+
+ if (assert_resolvability)
+ {
+ cerr << "error: type mismatch for ";
+ if (tok)
+ cerr << *tok;
+ else
+ cerr << "a token";
+ cerr << ": " << t1 << " vs. " << t2 << endl;
+ }
+}
+
+
+void
+typeresolution_info::resolved (const token* tok, exp_type t)
+{
+ num_newly_resolved ++;
+ // cerr << "resolved " << *tok << " type " << t << endl;
}