From 56099f083d7a68722ace316be4d288d21caabaee Mon Sep 17 00:00:00 2001 From: fche Date: Wed, 2 Mar 2005 01:28:50 +0000 Subject: * some semantic analysis 2005-03-01 Frank Ch. Eigler * parse.cxx: Implement left-associativity for several types of operators. Add some more statement types. Parse functions. Be able to print tokens. Simplify error generating functions. Save tokens in all parse tree nodes. * parse.h: Corresponding changes. * staptree.cxx: Move tree-printing functions here. Add many new functions for symbol and type resolution. * staptree.h: Corresponding changes. * semtest.cxx: New semantic analysis pass & test driver. * testsuite/sem*/*: New tests. * parsetest.cxx: Separated parse test driver. * testsuite/parse*/*: Adapt tests to parsetest driver. * Makefile.am: Build semtest. Run its tests. * Makefile.in: Regenerated. * parse.cxx, parse.h: New files: parser. --- staptree.cxx | 901 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 880 insertions(+), 21 deletions(-) (limited to 'staptree.cxx') diff --git a/staptree.cxx b/staptree.cxx index 88067765..34a1d0d3 100644 --- a/staptree.cxx +++ b/staptree.cxx @@ -1,44 +1,903 @@ -// toy driver +// parse tree functions // Copyright 2005 Red Hat Inc. // GPL #include "staptree.h" #include "parse.h" #include +#include +#include +expression::expression (): + type (pe_unknown), tok (0) +{ +} + + +expression::~expression () +{ +} + + +statement::statement (): + tok (0) +{ +} + + +statement::~statement () +{ +} + + +symbol::symbol (): + referent (0) +{ +} + + +arrayindex::arrayindex (): + referent (0) +{ +} + + +functioncall::functioncall (): + referent (0) +{ +} + + +symboldecl::symboldecl (): + tok (0), + type (pe_unknown) +{ +} + + +symboldecl::~symboldecl () +{ +} + + +vardecl::vardecl () +{ +} + + +vardecl::vardecl (unsigned arity) +{ + index_types.resize (arity); + for (unsigned i=0; i 0) + o << "[...]"; +} + - if (argc > 1) +void vardecl::printsig (ostream& o) +{ + o << name << ":" << type; + if (index_types.size() > 0) { - // quietly parse all listed input files - for (int i = 1; i < argc; i ++) + o << " ["; + for (unsigned i=0; i0 ? ", " : "") << index_types[i]; + o << "]"; + } +} + + +void functiondecl::print (ostream& o) +{ + o << "function " << name << " ("; + for (unsigned i=0; i0 ? ", " : "") << *formal_args[i]; + o << ")" << endl; + body->print(o); +} + + +void functiondecl::printsig (ostream& o) +{ + o << name << ":" << type << " ("; + for (unsigned i=0; i0 ? ", " : "") + << *formal_args[i] + << ":" + << formal_args[i]->type; + o << ")"; +} + + +void arrayindex::print (ostream& o) +{ + o << base << "["; + for (unsigned i=0; i0 ? ", " : "") << *indexes[i]; + o << "]"; +} + + +void functioncall::print (ostream& o) +{ + o << function << "("; + for (unsigned i=0; i0 ? ", " : "") << *args[i]; + o << ")"; +} + + +ostream& operator << (ostream& o, statement& k) +{ + k.print (o); + return o; +} + + +void block::print (ostream& o) +{ + o << "{" << endl; + for (unsigned i=0; i" << endl; +} + + +void null_statement::print (ostream& o) +{ + o << ";"; +} + + +void expr_statement::print (ostream& o) +{ + o << *value; +} + + +void return_statement::print (ostream& o) +{ + o << "return " << *value; +} + + +void delete_statement::print (ostream& o) +{ + o << "delete " << *value; +} + + +void if_statement::print (ostream& o) +{ + o << "if (" << *condition << ") " << endl + << *thenblock << endl; + if (elseblock) + o << "else " << *elseblock << endl; +} + + +void stapfile::print (ostream& o) +{ + o << "# file " << name << endl; + + for(unsigned i=0; iprint (o); + o << endl; + } + + for (unsigned j = 0; j < functions.size(); j++) + { + functions[j]->print (o); + o << endl; + } +} + + +void probe::print (ostream& o) +{ + o << "probe "; + for (unsigned i=0; i0 ? ":" : ""); + location[i]->print (o); + } + o << endl; + o << *body; +} + + +void probe_point_spec::print (ostream& o) +{ + o << functor; + if (arg) + o << "(" << *arg << ")"; +} + + +ostream& operator << (ostream& o, symboldecl& k) +{ + k.print (o); + return o; +} + + +// ------------------------------------------------------------------------ +// semantic processing: symbol resolution + + +symresolution_info::symresolution_info (vector& l, + vector& g, + vector& f, + stapfile* tf): + locals (l), globals (g), files (f), current_file (tf), current_function (0) +{ + num_unresolved = 0; +} + + +symresolution_info::symresolution_info (vector& l, + vector& g, + vector& f, + stapfile* tf, + functiondecl* cf): + locals (l), globals (g), files (f), current_file (tf), current_function (cf) +{ + num_unresolved = 0; +} + + +void +literal::resolve_symbols (symresolution_info& r) +{ +} + + +void +binary_expression::resolve_symbols (symresolution_info& r) +{ + left->resolve_symbols (r); + right->resolve_symbols (r); +} + + +void +unary_expression::resolve_symbols (symresolution_info& r) +{ + operand->resolve_symbols (r); +} + + +void +ternary_expression::resolve_symbols (symresolution_info& r) +{ + cond->resolve_symbols (r); + truevalue->resolve_symbols (r); + falsevalue->resolve_symbols (r); +} + + +void +symbol::resolve_symbols (symresolution_info& r) +{ + if (referent) + return; + + vardecl* d = r.find (name); + if (d) + referent = d; + else + { + // new local + vardecl* v = new vardecl; + v->name = name; + v->tok = tok; + r.locals.push_back (v); + referent = v; + // XXX: check for conflicting function name + } +} + + +void +arrayindex::resolve_symbols (symresolution_info& r) +{ + for (unsigned i=0; iresolve_symbols (r); + + if (referent) + return; + + vardecl* d = r.find (base); + if (d) + referent = d; + else + { + // new local + vardecl* v = new vardecl (indexes.size()); + v->name = base; + v->tok = tok; + r.locals.push_back (v); + referent = v; + // XXX: check for conflicting function name + } +} + + +void +functioncall::resolve_symbols (symresolution_info& r) +{ + for (unsigned i=0; iresolve_symbols (r); + + if (referent) + return; + + // find global functiondecl + functiondecl* d = 0; + for (unsigned j = 0; j < r.current_file->functions.size(); j++) + { + functiondecl* fd = r.current_file->functions[j]; + if (fd->name == function) { - parser p (argv[i]); - stapfile* f = p.parse (); - if (f) - cout << "file '" << argv[i] << "' parsed ok." << endl; - else - rc = 1; + d = fd; + break; } } + // XXX: check for conflicting variable name + + if (d) + referent = d; + else + r.unresolved (tok); +} + + +void +block::resolve_symbols (symresolution_info& r) +{ + for (unsigned i=0; iresolve_symbols (r); +} + + +void +if_statement::resolve_symbols (symresolution_info& r) +{ + condition->resolve_symbols (r); + thenblock->resolve_symbols (r); + elseblock->resolve_symbols (r); +} + + +void +for_loop::resolve_symbols (symresolution_info& r) +{ + init->resolve_symbols (r); + cond->resolve_symbols (r); + incr->resolve_symbols (r); + block->resolve_symbols (r); +} + + +void +expr_statement::resolve_symbols (symresolution_info& r) +{ + value->resolve_symbols (r); +} + + +vardecl* +symresolution_info::find (const string& name) +{ + // search locals + for (unsigned i=0; iname == name) + return locals[i]; + + // search function formal parameters (if any) + if (current_function) + { + for (unsigned i=0; iformal_args.size(); i++) + if (current_function->formal_args [i]->name == name) + return current_function->formal_args [i]; + } + + // search globals + for (unsigned i=0; iname == name) + return globals[i]; + + return 0; +} + + +void +symresolution_info::unresolved (const token* tok) +{ + num_unresolved ++; + + cerr << "error: unresolved symbol for "; + if (tok) + cerr << *tok; + else + cerr << "a token"; + cerr << endl; +} + + +// ------------------------------------------------------------------------ +// semantic processing: type resolution + + +void +literal::resolve_types (typeresolution_info& r, exp_type t) +{ + assert (type == pe_long || type == pe_string); + if ((t == type) || (t == pe_unknown)) + return; + + r.mismatch (tok, type, t); +} + + +void +binary_expression::resolve_types (typeresolution_info& r, exp_type t) +{ + if (op == "<<") + { + left->resolve_types (r, pe_stats); + right->resolve_types (r, pe_long); + if (t == pe_long || t == pe_string) + r.mismatch (tok, t, pe_stats); + else if (type == pe_unknown) + { + type = pe_stats; + r.resolved (tok, type); + } + } + else if (op == ".") // string concatenation + { + left->resolve_types (r, pe_string); + right->resolve_types (r, pe_string); + if (t == pe_long || t == pe_stats) + r.mismatch (tok, t, pe_string); + else if (type == pe_unknown) + { + type = pe_string; + r.resolved (tok, type); + } + } + else if (op == "==") // XXX: other comparison operators + { + left->resolve_types (r, pe_unknown); + right->resolve_types (r, pe_unknown); + if (t == pe_string || t == pe_stats) + r.mismatch (tok, t, pe_long); + else if (type == pe_unknown) + { + type = pe_long; + r.resolved (tok, type); + } + } + else // general arithmetic operators? + { + // propagate type downward + exp_type subtype = t; + if ((t == pe_unknown) && (type != pe_unknown)) + subtype = type; + left->resolve_types (r, subtype); + right->resolve_types (r, subtype); + + if ((t == pe_unknown) && (type != pe_unknown)) + ; // already resolved + else if ((t != pe_unknown) && (type == pe_unknown)) + { + type = t; + r.resolved (tok, type); + } + else if ((t == pe_unknown) && (left->type != pe_unknown)) + { + type = left->type; + r.resolved (tok, type); + } + else if ((t == pe_unknown) && (right->type != pe_unknown)) + { + type = right->type; + r.resolved (tok, type); + } + else if (type != t) + r.mismatch (tok, t, type); + } +} + + +void +unary_expression::resolve_types (typeresolution_info& r, exp_type t) +{ + // all unary operators only work on numerics + + operand->resolve_types (r, pe_long); + + if (t == pe_unknown && type != pe_unknown) + ; // already resolved + else if (t == pe_string || t == pe_stats) + r.mismatch (tok, t, pe_long); + else if (type == pe_unknown) + { + type = pe_long; + r.resolved (tok, type); + } +} + + +void +ternary_expression::resolve_types (typeresolution_info& r, exp_type t) +{ + cond->resolve_types (r, pe_long); + truevalue->resolve_types (r, t); + falsevalue->resolve_types (r, t); +} + + +template +void resolve_2types (Referrer* referrer, Referent* referent, + typeresolution_info& r, exp_type t) +{ + exp_type& rtype = referrer->type; + const token* rtok = referrer->tok; + exp_type& ttype = referent->type; + const token* ttok = referent->tok; + + if (t != pe_unknown && rtype == t && rtype == ttype) + ; // do nothing: all three types in agreement + else if (t == pe_unknown && rtype != pe_unknown && rtype == ttype) + ; // do nothing: two known types in agreement + else if (rtype != pe_unknown && ttype != pe_unknown && rtype != ttype) + r.mismatch (rtok, rtype, ttype); + else if (rtype != pe_unknown && t != pe_unknown && rtype != t) + r.mismatch (rtok, rtype, t); + else if (ttype != pe_unknown && t != pe_unknown && ttype != t) + r.mismatch (ttok, ttype, t); + else if (rtype == pe_unknown && t != pe_unknown) + { + // propagate from upstream + rtype = t; + r.resolved (rtok, rtype); + // catch rtype/ttype mismatch later + } + else if (rtype == pe_unknown && ttype != pe_unknown) + { + // propagate from referent + rtype = ttype; + r.resolved (rtok, rtype); + // catch rtype/t mismatch later + } + else if (rtype != pe_unknown && ttype == pe_unknown) + { + // propagate to referent + ttype = rtype; + r.resolved (ttok, ttype); + // catch rtype/t mismatch later + } + else + r.unresolved (rtok); +} + + +void +symbol::resolve_types (typeresolution_info& r, exp_type t) +{ + assert (referent != 0); + + if (referent->index_types.size() > 0) + r.unresolved (tok); // array else + resolve_2types (this, referent, r, t); +} + + +void +arrayindex::resolve_types (typeresolution_info& r, exp_type t) +{ + assert (referent != 0); + + resolve_2types (this, referent, r, t); + + // now resolve the array indexes + if (referent->index_types.size() == 0) + { + // designate referent as array + referent->index_types.resize (indexes.size()); + for (unsigned i=0; iindex_types[i] = pe_unknown; + // NB: we "fall through" to for loop + } + + if (indexes.size() != referent->index_types.size()) + r.unresolved (tok); + else for (unsigned i=0; iresolve_types (r, referent->index_types[i]); + exp_type it = e->type; + referent->index_types[i] = it; + + if (it == pe_string || it == pe_long) + ; // do nothing + else if (it == pe_stats) + r.invalid (e->tok, it); + else // pe_unknown + r.unresolved (e->tok); + } +} + + +void +functioncall::resolve_types (typeresolution_info& r, exp_type t) +{ + assert (referent != 0); + + resolve_2types (this, referent, r, t); + + if (type == pe_stats) + r.mismatch (tok, pe_unknown, type); + + // XXX: but what about functions that return no value, + // and are used only as an expression-statement for side effects? + + // now resolve the function parameters + if (args.size() != referent->formal_args.size()) + r.unresolved (tok); + for (unsigned i=0; iprint (cout); + expression* e = args[i]; + exp_type& ft = referent->formal_args[i]->type; + const token* ftok = referent->formal_args[i]->tok; + e->resolve_types (r, ft); + exp_type at = e->type; + + if (((at == pe_string) || (at == pe_long)) && ft == pe_unknown) + { + // propagate to formal arg + ft = at; + r.resolved (referent->formal_args[i]->tok, ft); + } + if (at == pe_stats) + r.invalid (e->tok, at); + if (ft == pe_stats) + r.invalid (ftok, ft); + if (at != pe_unknown && ft != pe_unknown && ft != at) + r.mismatch (e->tok, at, ft); + if (at == pe_unknown) + r.unresolved (e->tok); + } +} + + +void +block::resolve_types (typeresolution_info& r) +{ + for (unsigned i=0; iresolve_types (r); +} + + +void +if_statement::resolve_types (typeresolution_info& r) +{ + condition->resolve_types (r, pe_long); + thenblock->resolve_types (r); + elseblock->resolve_types (r); +} + + +void +for_loop::resolve_types (typeresolution_info& r) +{ + init->resolve_types (r, pe_unknown); + cond->resolve_types (r, pe_long); + incr->resolve_types (r, pe_unknown); + block->resolve_types (r); +} + + +void +expr_statement::resolve_types (typeresolution_info& r) +{ + value->resolve_types (r, pe_unknown); +} + + +void +return_statement::resolve_types (typeresolution_info& r) +{ + // This is like symbol::resolve_types, where the referent is + // the return value of the function. + + // XXX: need control flow semantic checking; until then: + if (r.current_function == 0) + { + r.unresolved (tok); + return; + } + + exp_type& type = r.current_function->type; + value->resolve_types (r, type); + + if (type != pe_unknown && value->type != pe_unknown + && type != value->type) + r.mismatch (r.current_function->tok, type, value->type); + if (type == pe_unknown && + (value->type == pe_long || value->type == pe_string)) + { + // propagate non-statistics from value + type = value->type; + r.resolved (r.current_function->tok, value->type); + } + if (value->type == pe_stats) + r.invalid (value->tok, value->type); +} + + +void +typeresolution_info::unresolved (const token* tok) +{ + num_still_unresolved ++; + + if (assert_resolvability) + { + cerr << "error: unresolved type for "; + if (tok) + cerr << *tok; else - rc = 1; + cerr << "a token"; + cerr << endl; } +} + - return rc; +void +typeresolution_info::invalid (const token* tok, exp_type pe) +{ + num_still_unresolved ++; + + if (assert_resolvability) + { + cerr << "error: invalid type " << pe << " for "; + if (tok) + cerr << *tok; + else + cerr << "a token"; + cerr << endl; + } +} + + +void +typeresolution_info::mismatch (const token* tok, exp_type t1, exp_type t2) +{ + num_still_unresolved ++; + + if (assert_resolvability) + { + cerr << "error: type mismatch for "; + if (tok) + cerr << *tok; + else + cerr << "a token"; + cerr << ": " << t1 << " vs. " << t2 << endl; + } +} + + +void +typeresolution_info::resolved (const token* tok, exp_type t) +{ + num_newly_resolved ++; + // cerr << "resolved " << *tok << " type " << t << endl; } -- cgit