diff options
author | fche <fche> | 2005-03-02 01:28:50 +0000 |
---|---|---|
committer | fche <fche> | 2005-03-02 01:28:50 +0000 |
commit | 56099f083d7a68722ace316be4d288d21caabaee (patch) | |
tree | 3e67ec78134a358c1f90f701c165c4c577d62177 | |
parent | 2f1a1aead38c1dcd329a694dd8d3290b37320466 (diff) | |
download | systemtap-steved-56099f083d7a68722ace316be4d288d21caabaee.tar.gz systemtap-steved-56099f083d7a68722ace316be4d288d21caabaee.tar.xz systemtap-steved-56099f083d7a68722ace316be4d288d21caabaee.zip |
* some semantic analysis
2005-03-01 Frank Ch. Eigler <fche@redhat.com>
* parse.cxx: Implement left-associativity for several types of
operators. Add some more statement types. Parse functions.
Be able to print tokens. Simplify error generating functions.
Save tokens in all parse tree nodes.
* parse.h: Corresponding changes.
* staptree.cxx: Move tree-printing functions here. Add many
new functions for symbol and type resolution.
* staptree.h: Corresponding changes.
* semtest.cxx: New semantic analysis pass & test driver.
* testsuite/sem*/*: New tests.
* parsetest.cxx: Separated parse test driver.
* testsuite/parse*/*: Adapt tests to parsetest driver.
* Makefile.am: Build semtest. Run its tests.
* Makefile.in: Regenerated.
* parse.cxx, parse.h: New files: parser.
-rw-r--r-- | ChangeLog | 17 | ||||
-rw-r--r-- | Makefile.am | 15 | ||||
-rw-r--r-- | Makefile.in | 79 | ||||
-rw-r--r-- | parse.cxx | 374 | ||||
-rw-r--r-- | parse.h | 8 | ||||
-rw-r--r-- | parsetest.cxx | 40 | ||||
-rw-r--r-- | semtest.cxx | 185 | ||||
-rw-r--r-- | staptree.cxx | 901 | ||||
-rw-r--r-- | staptree.h | 316 | ||||
-rwxr-xr-x | testsuite/parseko/one.stp | 2 | ||||
-rwxr-xr-x | testsuite/parseko/two.stp | 2 | ||||
-rwxr-xr-x | testsuite/parseok/one.stp | 3 | ||||
-rwxr-xr-x | testsuite/parseok/two.stp | 9 | ||||
-rwxr-xr-x | testsuite/semko/four.stp | 12 | ||||
-rwxr-xr-x | testsuite/semko/one.stp | 8 | ||||
-rwxr-xr-x | testsuite/semko/three.stp | 6 | ||||
-rwxr-xr-x | testsuite/semko/two.stp | 8 | ||||
-rwxr-xr-x | testsuite/semok/four.stp | 23 | ||||
-rwxr-xr-x | testsuite/semok/one.stp | 25 | ||||
-rwxr-xr-x | testsuite/semok/three.stp | 6 | ||||
-rwxr-xr-x | testsuite/semok/two.stp | 13 |
21 files changed, 1774 insertions, 278 deletions
@@ -1,3 +1,20 @@ +2005-03-01 Frank Ch. Eigler <fche@redhat.com> + + * parse.cxx: Implement left-associativity for several types of + operators. Add some more statement types. Parse functions. + Be able to print tokens. Simplify error generating functions. + Save tokens in all parse tree nodes. + * parse.h: Corresponding changes. + * staptree.cxx: Move tree-printing functions here. Add many + new functions for symbol and type resolution. + * staptree.h: Corresponding changes. + * semtest.cxx: New semantic analysis pass & test driver. + * testsuite/sem*/*: New tests. + * parsetest.cxx: Separated parse test driver. + * testsuite/parse*/*: Adapt tests to parsetest driver. + * Makefile.am: Build semtest. Run its tests. + * Makefile.in: Regenerated. + 2005-02-11 Frank Ch. Eigler <fche@redhat.com> * parse.cxx, parse.h: New files: parser. diff --git a/Makefile.am b/Makefile.am index 3771800c..1ba8071a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3,8 +3,10 @@ AM_MAKEFLAGS = 'CXXFLAGS=$(CXXFLAGS)' 'LDFLAGS=$(LDFLAGS)' -bin_PROGRAMS = stap -stap_SOURCES = parse.cxx staptree.cxx +bin_PROGRAMS = +noinst_PROGRAMS = parsetest semtest +parsetest_SOURCES = parse.cxx staptree.cxx parsetest.cxx +semtest_SOURCES = parse.cxx staptree.cxx semtest.cxx AM_CXXFLAGS = -Wall # Get extra libs as needed @@ -18,7 +20,8 @@ stapdatadir = @datadir@/systemtap DEFPATH="\".$(PATH_SEPARATOR)$(stapdatadir)\"" DEFS= -DDEFPATH=$(DEFPATH) -DHAVE_CONFIG_H -pok=$(srcdir)/testsuite/parseok -pko=$(srcdir)/testsuite/parseko -TESTS = $(wildcard $(pok)/*.stp) $(wildcard $(pko)/*.stp) -XFAIL_TESTS = $(wildcard $(pko)/*.stp) +p=$(srcdir)/testsuite/parse +s=$(srcdir)/testsuite/sem +TESTS = $(wildcard $(p)ok/*.stp) $(wildcard $(p)ko/*.stp) \ + $(wildcard $(s)ok/*.stp) $(wildcard $(s)ko/*.stp) +XFAIL_TESTS = $(wildcard $(p)ko/*.stp) $(wildcard $(s)ko/*.stp) diff --git a/Makefile.in b/Makefile.in index b3f4828d..c93c7757 100644 --- a/Makefile.in +++ b/Makefile.in @@ -16,7 +16,7 @@ # Makefile.am --- automake input file for systemtap -SOURCES = $(stap_SOURCES) +SOURCES = $(parsetest_SOURCES) $(semtest_SOURCES) srcdir = @srcdir@ top_srcdir = @top_srcdir@ @@ -38,7 +38,8 @@ POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : -bin_PROGRAMS = stap$(EXEEXT) +bin_PROGRAMS = +noinst_PROGRAMS = parsetest$(EXEEXT) semtest$(EXEEXT) subdir = . DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in $(srcdir)/config.in \ @@ -55,11 +56,17 @@ CONFIG_HEADER = config.h CONFIG_CLEAN_FILES = am__installdirs = "$(DESTDIR)$(bindir)" binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) -PROGRAMS = $(bin_PROGRAMS) -am_stap_OBJECTS = parse.$(OBJEXT) staptree.$(OBJEXT) -stap_OBJECTS = $(am_stap_OBJECTS) -stap_LDADD = $(LDADD) -stap_DEPENDENCIES = +PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS) +am_parsetest_OBJECTS = parse.$(OBJEXT) staptree.$(OBJEXT) \ + parsetest.$(OBJEXT) +parsetest_OBJECTS = $(am_parsetest_OBJECTS) +parsetest_LDADD = $(LDADD) +parsetest_DEPENDENCIES = +am_semtest_OBJECTS = parse.$(OBJEXT) staptree.$(OBJEXT) \ + semtest.$(OBJEXT) +semtest_OBJECTS = $(am_semtest_OBJECTS) +semtest_LDADD = $(LDADD) +semtest_DEPENDENCIES = DEFAULT_INCLUDES = -I. -I$(srcdir) -I. depcomp = $(SHELL) $(top_srcdir)/depcomp am__depfiles_maybe = depfiles @@ -68,8 +75,8 @@ CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ CXXLD = $(CXX) CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ -o $@ -SOURCES = $(stap_SOURCES) -DIST_SOURCES = $(stap_SOURCES) +SOURCES = $(parsetest_SOURCES) $(semtest_SOURCES) +DIST_SOURCES = $(parsetest_SOURCES) $(semtest_SOURCES) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) @@ -170,7 +177,8 @@ sharedstatedir = @sharedstatedir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ AM_MAKEFLAGS = 'CXXFLAGS=$(CXXFLAGS)' 'LDFLAGS=$(LDFLAGS)' -stap_SOURCES = parse.cxx staptree.cxx +parsetest_SOURCES = parse.cxx staptree.cxx parsetest.cxx +semtest_SOURCES = parse.cxx staptree.cxx semtest.cxx AM_CXXFLAGS = -Wall # Get extra libs as needed @@ -179,10 +187,12 @@ stapdatadir = @datadir@/systemtap # stuff for compiling gawk/pgawk DEFPATH = "\".$(PATH_SEPARATOR)$(stapdatadir)\"" -pok = $(srcdir)/testsuite/parseok -pko = $(srcdir)/testsuite/parseko -TESTS = $(wildcard $(pok)/*.stp) $(wildcard $(pko)/*.stp) -XFAIL_TESTS = $(wildcard $(pko)/*.stp) +p = $(srcdir)/testsuite/parse +s = $(srcdir)/testsuite/sem +TESTS = $(wildcard $(p)ok/*.stp) $(wildcard $(p)ko/*.stp) \ + $(wildcard $(s)ok/*.stp) $(wildcard $(s)ko/*.stp) + +XFAIL_TESTS = $(wildcard $(p)ko/*.stp) $(wildcard $(s)ko/*.stp) all: config.h $(MAKE) $(AM_MAKEFLAGS) all-am @@ -261,9 +271,15 @@ uninstall-binPROGRAMS: clean-binPROGRAMS: -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) -stap$(EXEEXT): $(stap_OBJECTS) $(stap_DEPENDENCIES) - @rm -f stap$(EXEEXT) - $(CXXLINK) $(stap_LDFLAGS) $(stap_OBJECTS) $(stap_LDADD) $(LIBS) + +clean-noinstPROGRAMS: + -test -z "$(noinst_PROGRAMS)" || rm -f $(noinst_PROGRAMS) +parsetest$(EXEEXT): $(parsetest_OBJECTS) $(parsetest_DEPENDENCIES) + @rm -f parsetest$(EXEEXT) + $(CXXLINK) $(parsetest_LDFLAGS) $(parsetest_OBJECTS) $(parsetest_LDADD) $(LIBS) +semtest$(EXEEXT): $(semtest_OBJECTS) $(semtest_DEPENDENCIES) + @rm -f semtest$(EXEEXT) + $(CXXLINK) $(semtest_LDFLAGS) $(semtest_OBJECTS) $(semtest_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) @@ -272,6 +288,8 @@ distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parse.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parsetest.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/semtest.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/staptree.Po@am__quote@ .cxx.o: @@ -570,7 +588,8 @@ maintainer-clean-generic: @echo "it deletes files that may require special tools to rebuild." clean: clean-am -clean-am: clean-binPROGRAMS clean-generic mostlyclean-am +clean-am: clean-binPROGRAMS clean-generic clean-noinstPROGRAMS \ + mostlyclean-am distclean: distclean-am -rm -f $(am__CONFIG_DISTCLEAN_FILES) @@ -621,18 +640,18 @@ ps-am: uninstall-am: uninstall-binPROGRAMS uninstall-info-am .PHONY: CTAGS GTAGS all all-am am--refresh check check-TESTS check-am \ - clean clean-binPROGRAMS clean-generic ctags dist dist-all \ - dist-bzip2 dist-gzip dist-shar dist-tarZ dist-zip distcheck \ - distclean distclean-compile distclean-generic distclean-hdr \ - distclean-tags distcleancheck distdir distuninstallcheck dvi \ - dvi-am html html-am info info-am install install-am \ - install-binPROGRAMS install-data install-data-am install-exec \ - install-exec-am install-info install-info-am install-man \ - install-strip installcheck installcheck-am installdirs \ - maintainer-clean maintainer-clean-generic mostlyclean \ - mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \ - tags uninstall uninstall-am uninstall-binPROGRAMS \ - uninstall-info-am + clean clean-binPROGRAMS clean-generic clean-noinstPROGRAMS \ + ctags dist dist-all dist-bzip2 dist-gzip dist-shar dist-tarZ \ + dist-zip distcheck distclean distclean-compile \ + distclean-generic distclean-hdr distclean-tags distcleancheck \ + distdir distuninstallcheck dvi dvi-am html html-am info \ + info-am install install-am install-binPROGRAMS install-data \ + install-data-am install-exec install-exec-am install-info \ + install-info-am install-man install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \ + uninstall-am uninstall-binPROGRAMS uninstall-info-am # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. @@ -29,6 +29,33 @@ parser::~parser() } +ostream& +operator << (ostream& o, const token& t) +{ + o << (t.type == tok_junk ? "junk" : + t.type == tok_identifier ? "identifier" : + t.type == tok_operator ? "operator" : + t.type == tok_string ? "string" : + t.type == tok_number ? "number" : + "unknown token"); + + o << " '"; + for (unsigned i=0; i<t.content.length(); i++) + { + char c = t.content[i]; + o << (isprint (c) ? c : '?'); + } + o << "'"; + + o << " at " + << t.location.file << ":" + << t.location.line << ":" + << t.location.column; + + return o; +} + + void parser::print_error (const parse_error &pe) { @@ -36,27 +63,9 @@ parser::print_error (const parse_error &pe) const token* t = last_t; if (t) - { - cerr << "\tsaw " - << (t->type == tok_junk ? "junk" : - t->type == tok_identifier ? "identifier" : - t->type == tok_operator ? "operator" : - t->type == tok_string ? "string" : - t->type == tok_number ? "number" : - "unknown token") << " '"; - for (unsigned i=0; i<t->content.length(); i++) - { - char c = t->content[i]; - cerr << (isprint (c) ? c : '?'); - } - cerr << "'" - << " at " - << t->location.file << ":" - << t->location.line << ":" - << t->location.column << endl; - } + cerr << "\tsaw: " << *t << endl; else - cerr << "\tsaw " << input_name << " EOF" << endl; + cerr << "\tsaw: " << input_name << " EOF" << endl; // XXX: make it possible to print the last input line, // so as to line up an arrow with the specific error column @@ -237,6 +246,7 @@ lexer::scan () (c == '<' && c2 == '<') || (c == '+' && c2 == '=') || (c == '-' && c2 == '=') || + (c == ':' && c2 == ':') || false) // XXX: etc. n->content.push_back((char) input_get ()); @@ -259,27 +269,36 @@ parser::parse () { stapfile* f = new stapfile; f->name = input_name; - + + bool empty = true; + while (1) { try { const token* t = peek (); - if (! t) // EOF + if (! t) // nice clean EOF break; + empty = false; if (t->type == tok_identifier && t->content == "probe") { - next (); // advance + next (); f->probes.push_back (parse_probe ()); } else if (t->type == tok_identifier && t->content == "global") { - next (); // advance - f->globals.push_back (parse_global ()); + next (); + parse_global (f->globals); + } + else if (t->type == tok_identifier && t->content == "function") + { + next (); + f->functions.push_back (parse_functiondecl ()); + // XXX: check for duplicate function decl } else - throw parse_error ("expected 'probe' or 'global'"); + throw parse_error ("expected 'probe', 'global', or 'function'"); } catch (parse_error& pe) { @@ -297,11 +316,17 @@ parser::parse () } } - if (num_errors > 0) + if (empty) + { + cerr << "Input file '" << input_name << "' is empty or missing." << endl; + delete f; + return 0; + } + else if (num_errors > 0) { cerr << num_errors << " parse error(s)." << endl; delete f; - f = 0; + return 0; } return f; @@ -317,12 +342,16 @@ parser::parse_probe () const token *t = peek (); if (t && t->type == tok_identifier) { + p->tok = t; p->location.push_back (parse_probe_point_spec ()); - t = next (); - if (t->type == tok_operator && t->content == ":") - continue; - else if (t->type == tok_operator && t->content == "{") + t = peek (); + if (t && t->type == tok_operator && t->content == ":") + { + next (); + continue; + } + else if (t && t->type == tok_operator && t->content == "{") break; else throw parse_error ("expected ':' or '{'"); @@ -339,16 +368,21 @@ parser::parse_probe () block* -parser::parse_stmt_block () // "{" already consumed +parser::parse_stmt_block () { block* pb = new block; + const token* t = next (); + if (! (t->type == tok_operator && t->content == "{")) + throw parse_error ("expected '{'"); + + pb->tok = t; while (1) { try { // handle empty blocks - const token* t = peek (); + t = peek (); if (t && t->type == tok_operator && t->content == "}") { next (); @@ -397,23 +431,22 @@ parser::parse_statement () return new null_statement (); } else if (t && t->type == tok_operator && t->content == "{") - { - next (); - return parse_stmt_block (); - } + return parse_stmt_block (); else if (t && t->type == tok_identifier && t->content == "if") - { - next (); - return parse_if_statement (); - } + return parse_if_statement (); + else if (t && t->type == tok_identifier && t->content == "return") + return parse_return_statement (); + else if (t && t->type == tok_identifier && t->content == "delete") + return parse_delete_statement (); // XXX: other control constructs ("for", "delete", "while", "do", - // "break", "continue", "exit") + // "break", "continue", "exit", "return") else if (t && (t->type == tok_operator || // expressions are flexible t->type == tok_identifier || t->type == tok_number || t->type == tok_string)) { expr_statement *es = new expr_statement; + es->tok = t; es->value = parse_expression (); return es; } @@ -422,10 +455,71 @@ parser::parse_statement () } -symbol* -parser::parse_global () +void +parser::parse_global (vector <vardecl*>& globals) { - throw parse_error ("cannot parse global block yet"); + while (1) + { + const token* t = next (); + if (! (t->type == tok_identifier)) + throw parse_error ("expected identifier"); + + vardecl* d = new vardecl; + d->name = t->content; + d->tok = t; + globals.push_back (d); // XXX: check for duplicates + + t = next (); + if (t->type == tok_operator && t->content == ";") + break; + else if (t->type == tok_operator && t->content == ",") + continue; + else + throw parse_error ("expected ';' or ','"); + } +} + + +functiondecl* +parser::parse_functiondecl () +{ + functiondecl *fd = new functiondecl (); + + const token* t = next (); + if (! (t->type == tok_identifier)) + throw parse_error ("expected identifier"); + fd->name = t->content; + fd->tok = t; + + t = next (); + if (! (t->type == tok_operator && t->content == "(")) + throw parse_error ("expected '('"); + + while (1) + { + t = next (); + + // permit zero-argument fuctions + if (t->type == tok_operator && t->content == ")") + break; + else if (! (t->type == tok_identifier)) + throw parse_error ("expected identifier"); + vardecl* vd = new vardecl; + vd->name = t->content; + vd->tok = t; + fd->formal_args.push_back (vd); + + t = next (); + if (t->type == tok_operator && t->content == ")") + break; + if (t->type == tok_operator && t->content == ",") + continue; + else + throw parse_error ("expected ',' or ')'"); + } + + fd->body = parse_stmt_block (); + return fd; } @@ -438,6 +532,7 @@ parser::parse_probe_point_spec () if (t->type != tok_identifier) throw parse_error ("expected identifier"); pl->functor = t->content; + pl->tok = t; t = peek (); if (t && t->type == tok_operator && t->content == "(") @@ -457,12 +552,16 @@ literal* parser::parse_literal () { const token* t = next (); + literal* l; if (t->type == tok_string) - return new literal_string (t->content); + l = new literal_string (t->content); else if (t->type == tok_number) - return new literal_number (atol (t->content.c_str ())); + l = new literal_number (atol (t->content.c_str ())); else throw parse_error ("expected literal string or number"); + + l->tok = t; + return l; } @@ -470,10 +569,15 @@ if_statement* parser::parse_if_statement () { const token* t = next (); + if (! (t->type == tok_identifier && t->content == "if")) + throw parse_error ("expected 'if'"); + if_statement* s = new if_statement; + s->tok = t; + + t = next (); if (! (t->type == tok_operator && t->content == "(")) throw parse_error ("expected '('"); - if_statement* s = new if_statement; s->condition = parse_expression (); t = next (); @@ -493,6 +597,32 @@ parser::parse_if_statement () } +return_statement* +parser::parse_return_statement () +{ + const token* t = next (); + if (! (t->type == tok_identifier && t->content == "return")) + throw parse_error ("expected 'return'"); + return_statement* s = new return_statement; + s->tok = t; + s->value = parse_expression (); + return s; +} + + +delete_statement* +parser::parse_delete_statement () +{ + const token* t = next (); + if (! (t->type == tok_identifier && t->content == "delete")) + throw parse_error ("expected 'delete'"); + delete_statement* s = new delete_statement; + s->tok = t; + s->value = parse_expression (); + return s; +} + + expression* parser::parse_expression () { @@ -511,21 +641,24 @@ parser::parse_assignment () expression* op1 = parse_ternary (); const token* t = peek (); - if (t && t->type == tok_operator + // left-associative operators + while (t && t->type == tok_operator && (t->content == "=" || t->content == "<<" || t->content == "+=" || false)) // XXX: add /= etc. { assignment* e = new assignment; - e->lvalue = op1; + e->left = op1; e->op = t->content; + e->tok = t; next (); - e->rvalue = parse_expression (); - return e; + e->right = parse_ternary (); + op1 = e; + t = peek (); } - else - return op1; + + return op1; } @@ -537,16 +670,17 @@ parser::parse_ternary () const token* t = peek (); if (t && t->type == tok_operator && t->content == "?") { - next (); ternary_expression* e = new ternary_expression; + e->tok = t; e->cond = op1; - e->truevalue = parse_expression (); + next (); + e->truevalue = parse_expression (); // XXX t = next (); if (! (t->type == tok_operator && t->content == ":")) throw parse_error ("expected ':'"); - e->falsevalue = parse_expression (); + e->falsevalue = parse_expression (); // XXX return e; } else @@ -560,16 +694,19 @@ parser::parse_logical_or () expression* op1 = parse_logical_and (); const token* t = peek (); - if (t && t->type == tok_operator && t->content == "||") + while (t && t->type == tok_operator && t->content == "||") { - next (); logical_or_expr* e = new logical_or_expr; + e->tok = t; + e->op = t->content; e->left = op1; - e->right = parse_expression (); - return e; + next (); + e->right = parse_logical_and (); + op1 = e; + t = peek (); } - else - return op1; + + return op1; } @@ -579,16 +716,19 @@ parser::parse_logical_and () expression* op1 = parse_array_in (); const token* t = peek (); - if (t && t->type == tok_operator && t->content == "&&") + while (t && t->type == tok_operator && t->content == "&&") { - next (); logical_and_expr *e = new logical_and_expr; e->left = op1; - e->right = parse_expression (); - return e; + e->op = t->content; + e->tok = t; + next (); + e->right = parse_array_in (); + op1 = e; + t = peek (); } - else - return op1; + + return op1; } @@ -600,10 +740,12 @@ parser::parse_array_in () const token* t = peek (); if (t && t->type == tok_identifier && t->content == "in") { - next (); array_in *e = new array_in; e->left = op1; - e->right = parse_symbol (); // XXX: restrict to identifiers + e->op = t->content; + e->tok = t; + next (); + e->right = parse_symbol_plain (); return e; } else @@ -617,18 +759,20 @@ parser::parse_comparison () expression* op1 = parse_concatenation (); const token* t = peek (); - if (t && t->type == tok_operator + while (t && t->type == tok_operator && (t->content == ">" || t->content == "==")) // xxx: more { comparison* e = new comparison; e->left = op1; e->op = t->content; + e->tok = t; next (); - e->right = parse_expression (); - return e; + e->right = parse_concatenation (); + op1 = e; + t = peek (); } - else - return op1; + + return op1; } @@ -640,17 +784,19 @@ parser::parse_concatenation () const token* t = peek (); // XXX: the actual awk string-concatenation operator is *whitespace*. // I don't know how to easily to model that here. - if (t && t->type == tok_operator && t->content == ".") + while (t && t->type == tok_operator && t->content == ".") { concatenation* e = new concatenation; e->left = op1; e->op = t->content; + e->tok = t; next (); - e->right = parse_expression (); - return e; + e->right = parse_additive (); + op1 = e; + t = peek (); } - else - return op1; + + return op1; } @@ -660,18 +806,20 @@ parser::parse_additive () expression* op1 = parse_multiplicative (); const token* t = peek (); - if (t && t->type == tok_operator + while (t && t->type == tok_operator && (t->content == "+" || t->content == "-")) { binary_expression* e = new binary_expression; e->op = t->content; e->left = op1; + e->tok = t; next (); - e->right = parse_expression (); - return e; + e->right = parse_multiplicative (); + op1 = e; + t = peek (); } - else - return op1; + + return op1; } @@ -681,18 +829,20 @@ parser::parse_multiplicative () expression* op1 = parse_unary (); const token* t = peek (); - if (t && t->type == tok_operator + while (t && t->type == tok_operator && (t->content == "*" || t->content == "/" || t->content == "%")) { binary_expression* e = new binary_expression; e->op = t->content; e->left = op1; + e->tok = t; next (); - e->right = parse_expression (); - return e; + e->right = parse_unary (); + op1 = e; + t = peek (); } - else - return op1; + + return op1; } @@ -705,6 +855,7 @@ parser::parse_unary () { unary_expression* e = new unary_expression; e->op = t->content; + e->tok = t; next (); e->operand = parse_expression (); return e; @@ -720,18 +871,20 @@ parser::parse_exponentiation () expression* op1 = parse_crement (); const token* t = peek (); + // right associative: no loop if (t && t->type == tok_operator && (t->content == "^" || t->content == "**")) { exponentiation* e = new exponentiation; e->op = t->content; e->left = op1; + e->tok = t; next (); e->right = parse_expression (); - return e; + op1 = e; } - else - return op1; + + return op1; } @@ -744,6 +897,7 @@ parser::parse_crement () // as in "increment" / "decrement" { pre_crement* e = new pre_crement; e->op = t->content; + e->tok = t; next (); e->operand = parse_value (); return e; @@ -758,6 +912,7 @@ parser::parse_crement () // as in "increment" / "decrement" { post_crement* e = new post_crement; e->op = t->content; + e->tok = t; next (); e->operand = op1; return e; @@ -796,14 +951,16 @@ parser::parse_symbol () // var, var[index], func(parms) const token* t = next (); if (t->type != tok_identifier) throw parse_error ("expected identifier"); + const token* t2 = t; string name = t->content; - + t = peek (); if (t && t->type == tok_operator && t->content == "[") // array { next (); struct arrayindex* ai = new arrayindex; - ai->name = name; + ai->tok = t2; + ai->base = name; while (1) { ai->indexes.push_back (parse_expression ()); @@ -821,7 +978,8 @@ parser::parse_symbol () // var, var[index], func(parms) { next (); struct functioncall* f = new functioncall; - f->name = name; + f->tok = t2; + f->function = name; while (1) { f->args.push_back (parse_expression ()); @@ -837,8 +995,22 @@ parser::parse_symbol () // var, var[index], func(parms) } else { - symbol *s = new symbol; - s->name = name; - return s; + symbol* sym = new symbol; + sym->name = name; + sym->tok = t2; + return sym; } } + + +symbol* +parser::parse_symbol_plain () // var only +{ + symbol *s = new symbol; + const token* t = next (); + if (t->type != tok_identifier) + throw parse_error ("expected identifier"); + s->name = t->content; + s->tok = t; + return s; +} @@ -29,6 +29,8 @@ struct token std::string content; }; +std::ostream& operator << (std::ostream& o, const token& t); + struct parse_error: public std::runtime_error { @@ -80,10 +82,13 @@ private: // nonterminals probe* parse_probe (); probe_point_spec* parse_probe_point_spec (); literal* parse_literal (); - symbol* parse_global (); + void parse_global (vector<vardecl*>&); + functiondecl* parse_functiondecl (); block* parse_stmt_block (); statement* parse_statement (); if_statement* parse_if_statement (); + return_statement* parse_return_statement (); + delete_statement* parse_delete_statement (); expression* parse_expression (); expression* parse_assignment (); expression* parse_ternary (); @@ -99,4 +104,5 @@ private: // nonterminals expression* parse_crement (); expression* parse_value (); expression* parse_symbol (); + symbol* parse_symbol_plain (); }; diff --git a/parsetest.cxx b/parsetest.cxx new file mode 100644 index 00000000..ab1d53c8 --- /dev/null +++ b/parsetest.cxx @@ -0,0 +1,40 @@ +// toy driver +// Copyright 2005 Red Hat Inc. +// GPL + + +#include "staptree.h" +#include "parse.h" +#include <iostream> + + +int main (int argc, char *argv []) +{ + int rc = 0; + + if (argc > 1) + { + // quietly parse all listed input files + for (int i = 1; i < argc; i ++) + { + parser p (argv[i]); + stapfile* f = p.parse (); + if (f) + cout << "file '" << argv[i] << "' parsed ok." << endl; + else + rc = 1; + } + } + else + { + // parse then print just stdin + parser p (cin); + stapfile* f = p.parse (); + if (f) + f->print (cout); + else + rc = 1; + } + + return rc; +} diff --git a/semtest.cxx b/semtest.cxx new file mode 100644 index 00000000..f86d17ca --- /dev/null +++ b/semtest.cxx @@ -0,0 +1,185 @@ +// semantic analysis pass, beginnings of elaboration +// Copyright 2005 Red Hat Inc. +// GPL + +#include "staptree.h" +#include "parse.h" +#include <iostream> + + +int +semantic_pass_1 (vector<stapfile*>& files) +{ + int rc = 0; + + // link up symbols to their declarations + for (unsigned i=0; i<files.size(); i++) + { + stapfile* f = files[i]; + + // ... on functions + for (unsigned j=0; j<f->functions.size(); j++) + { + functiondecl* fn = f->functions[j]; + symresolution_info ri (fn->locals, f->globals, files, f, fn); + + fn->body->resolve_symbols (ri); + if (ri.num_unresolved) + rc ++; + } + + // ... and on probes + for (unsigned j=0; j<f->probes.size(); j++) + { + probe* pn = f->probes[j]; + symresolution_info ri (pn->locals, f->globals, files, f); + + pn->body->resolve_symbols (ri); + if (ri.num_unresolved) + rc ++; + } + } + + return rc; +} + + +int +semantic_pass_2 (vector<stapfile*>& files) +{ + int rc = 0; + + // next pass: type inference + unsigned iterations = 0; + typeresolution_info ti; + + ti.assert_resolvability = false; + while (1) + { + iterations ++; + // cerr << "Type resolution, iteration " << iterations << endl; + ti.num_newly_resolved = 0; + ti.num_still_unresolved = 0; + + for (unsigned i=0; i<files.size(); i++) + { + stapfile* f = files[i]; + + for (unsigned j=0; j<f->functions.size(); j++) + { + functiondecl* fn = f->functions[j]; + ti.current_function = fn; + fn->body->resolve_types (ti); + if (fn->type == pe_unknown) + ti.unresolved (fn->tok); + } + + for (unsigned j=0; j<f->probes.size(); j++) + { + probe* pn = f->probes[j]; + ti.current_function = 0; + pn->body->resolve_types (ti); + } + + for (unsigned j=0; j<f->globals.size(); j++) + { + vardecl* gd = f->globals[j]; + if (gd->type == pe_unknown) + ti.unresolved (gd->tok); + } + } + + if (ti.num_newly_resolved == 0) // converged + if (ti.num_still_unresolved == 0) + break; // successfully + else if (! ti.assert_resolvability) + ti.assert_resolvability = true; // last pass, with error msgs + else + { // unsuccessful conclusion + rc ++; + break; + } + } + + return rc; +} + + +int +main (int argc, char *argv []) +{ + int rc = 0; + + vector<stapfile*> files; + if (argc == 1) + { + parser p (cin); + stapfile* f = p.parse (); + if (f) + files.push_back (f); + else + rc ++; + } + else for (int i = 1; i < argc; i ++) + { + parser p (argv[i]); + stapfile* f = p.parse (); + if (f) + files.push_back (f); + else + rc ++; + } + + rc += semantic_pass_1 (files); + rc += semantic_pass_2 (files); + + if (argc == 1) // processed stdin only + { + for (unsigned i=0; i<files.size(); i++) + { + stapfile* f = files[i]; + for (unsigned j=0; j<f->functions.size(); j++) + { + functiondecl* fn = f->functions[j]; + cerr << "Function "; + fn->printsig (cerr); + cerr << endl << "locals:" << endl; + for (unsigned k=0; k<fn->locals.size(); k++) + { + vardecl* fa = fn->locals[k]; + cerr << "\t"; + fa->printsig (cerr); + cerr << endl; + } + cerr << endl; + } + + for (unsigned j=0; j<f->probes.size(); j++) + { + probe* pn = f->probes[j]; + cerr << "Probe " << *pn->tok << endl; // XXX: print probespec + cerr << "locals:" << endl; + for (unsigned k=0; k<pn->locals.size(); k++) + { + vardecl* fa = pn->locals[k]; + cerr << "\t"; + fa->printsig (cerr); + cerr << endl; + } + cerr << endl; + } + + cerr << "globals:" << endl; + for (unsigned k=0; k<f->globals.size(); k++) + { + vardecl* fa = f->globals[k]; + cerr << "\t"; + fa->printsig (cerr); + cerr << endl; + } + cerr << endl; + } + } + + return rc; +} diff --git a/staptree.cxx b/staptree.cxx index 88067765..34a1d0d3 100644 --- a/staptree.cxx +++ b/staptree.cxx @@ -1,44 +1,903 @@ -// toy driver +// parse tree functions // Copyright 2005 Red Hat Inc. // GPL #include "staptree.h" #include "parse.h" #include <iostream> +#include <typeinfo> +#include <cassert> +expression::expression (): + type (pe_unknown), tok (0) +{ +} + + +expression::~expression () +{ +} + + +statement::statement (): + tok (0) +{ +} + + +statement::~statement () +{ +} + + +symbol::symbol (): + referent (0) +{ +} + + +arrayindex::arrayindex (): + referent (0) +{ +} + + +functioncall::functioncall (): + referent (0) +{ +} + + +symboldecl::symboldecl (): + tok (0), + type (pe_unknown) +{ +} + + +symboldecl::~symboldecl () +{ +} + + +vardecl::vardecl () +{ +} + + +vardecl::vardecl (unsigned arity) +{ + index_types.resize (arity); + for (unsigned i=0; i<arity; i++) + index_types[i] = pe_unknown; +} + + +functiondecl::functiondecl (): + body (0) +{ +} + + +literal_number::literal_number (long v) +{ + value = v; + type = pe_long; +} + -expression::~expression () {} -statement::~statement () {} +literal_string::literal_string (const string& v) +{ + value = v; + type = pe_string; +} + + +ostream& +operator << (ostream& o, const exp_type& e) +{ + switch (e) + { + case pe_unknown: o << "unknown"; break; + case pe_long: o << "long"; break; + case pe_string: o << "string"; break; + case pe_stats: o << "stats"; break; + default: o << "???"; break; + } + return o; +} + + +// ------------------------------------------------------------------------ +// parse tree printing + +ostream& operator << (ostream& o, expression& k) +{ + k.print (o); + return o; +} + + +void literal_string::print (ostream& o) +{ + o << '"' << value << '"'; +} + +void literal_number::print (ostream& o) +{ + o << value; +} + + +void binary_expression::print (ostream& o) +{ + o << '(' << *left << ")" + << op + << '(' << *right << ")"; +} -int main (int argc, char *argv []) +void unary_expression::print (ostream& o) { - int rc = 0; + o << op << '(' << *operand << ")"; +} + + +void post_crement::print (ostream& o) +{ + o << '(' << *operand << ")" << op; +} + + +void ternary_expression::print (ostream& o) +{ + o << "(" << *cond << ") ? (" + << *truevalue << ") : (" + << *falsevalue << ")"; +} + + +void symbol::print (ostream& o) +{ + o << name; +} + + +void vardecl::print (ostream& o) +{ + o << name; + if (index_types.size() > 0) + o << "[...]"; +} + - if (argc > 1) +void vardecl::printsig (ostream& o) +{ + o << name << ":" << type; + if (index_types.size() > 0) { - // quietly parse all listed input files - for (int i = 1; i < argc; i ++) + o << " ["; + for (unsigned i=0; i<index_types.size(); i++) + o << (i>0 ? ", " : "") << index_types[i]; + o << "]"; + } +} + + +void functiondecl::print (ostream& o) +{ + o << "function " << name << " ("; + for (unsigned i=0; i<formal_args.size(); i++) + o << (i>0 ? ", " : "") << *formal_args[i]; + o << ")" << endl; + body->print(o); +} + + +void functiondecl::printsig (ostream& o) +{ + o << name << ":" << type << " ("; + for (unsigned i=0; i<formal_args.size(); i++) + o << (i>0 ? ", " : "") + << *formal_args[i] + << ":" + << formal_args[i]->type; + o << ")"; +} + + +void arrayindex::print (ostream& o) +{ + o << base << "["; + for (unsigned i=0; i<indexes.size(); i++) + o << (i>0 ? ", " : "") << *indexes[i]; + o << "]"; +} + + +void functioncall::print (ostream& o) +{ + o << function << "("; + for (unsigned i=0; i<args.size(); i++) + o << (i>0 ? ", " : "") << *args[i]; + o << ")"; +} + + +ostream& operator << (ostream& o, statement& k) +{ + k.print (o); + return o; +} + + +void block::print (ostream& o) +{ + o << "{" << endl; + for (unsigned i=0; i<statements.size(); i++) + o << *statements [i] << ";" << endl; + o << "}" << endl; +} + + +void for_loop::print (ostream& o) +{ + o << "<for_loop>" << endl; +} + + +void null_statement::print (ostream& o) +{ + o << ";"; +} + + +void expr_statement::print (ostream& o) +{ + o << *value; +} + + +void return_statement::print (ostream& o) +{ + o << "return " << *value; +} + + +void delete_statement::print (ostream& o) +{ + o << "delete " << *value; +} + + +void if_statement::print (ostream& o) +{ + o << "if (" << *condition << ") " << endl + << *thenblock << endl; + if (elseblock) + o << "else " << *elseblock << endl; +} + + +void stapfile::print (ostream& o) +{ + o << "# file " << name << endl; + + for(unsigned i=0; i<probes.size(); i++) + { + probes[i]->print (o); + o << endl; + } + + for (unsigned j = 0; j < functions.size(); j++) + { + functions[j]->print (o); + o << endl; + } +} + + +void probe::print (ostream& o) +{ + o << "probe "; + for (unsigned i=0; i<location.size(); i++) + { + o << (i>0 ? ":" : ""); + location[i]->print (o); + } + o << endl; + o << *body; +} + + +void probe_point_spec::print (ostream& o) +{ + o << functor; + if (arg) + o << "(" << *arg << ")"; +} + + +ostream& operator << (ostream& o, symboldecl& k) +{ + k.print (o); + return o; +} + + +// ------------------------------------------------------------------------ +// semantic processing: symbol resolution + + +symresolution_info::symresolution_info (vector<vardecl*>& l, + vector<vardecl*>& g, + vector<stapfile*>& f, + stapfile* tf): + locals (l), globals (g), files (f), current_file (tf), current_function (0) +{ + num_unresolved = 0; +} + + +symresolution_info::symresolution_info (vector<vardecl*>& l, + vector<vardecl*>& g, + vector<stapfile*>& f, + stapfile* tf, + functiondecl* cf): + locals (l), globals (g), files (f), current_file (tf), current_function (cf) +{ + num_unresolved = 0; +} + + +void +literal::resolve_symbols (symresolution_info& r) +{ +} + + +void +binary_expression::resolve_symbols (symresolution_info& r) +{ + left->resolve_symbols (r); + right->resolve_symbols (r); +} + + +void +unary_expression::resolve_symbols (symresolution_info& r) +{ + operand->resolve_symbols (r); +} + + +void +ternary_expression::resolve_symbols (symresolution_info& r) +{ + cond->resolve_symbols (r); + truevalue->resolve_symbols (r); + falsevalue->resolve_symbols (r); +} + + +void +symbol::resolve_symbols (symresolution_info& r) +{ + if (referent) + return; + + vardecl* d = r.find (name); + if (d) + referent = d; + else + { + // new local + vardecl* v = new vardecl; + v->name = name; + v->tok = tok; + r.locals.push_back (v); + referent = v; + // XXX: check for conflicting function name + } +} + + +void +arrayindex::resolve_symbols (symresolution_info& r) +{ + for (unsigned i=0; i<indexes.size(); i++) + indexes[i]->resolve_symbols (r); + + if (referent) + return; + + vardecl* d = r.find (base); + if (d) + referent = d; + else + { + // new local + vardecl* v = new vardecl (indexes.size()); + v->name = base; + v->tok = tok; + r.locals.push_back (v); + referent = v; + // XXX: check for conflicting function name + } +} + + +void +functioncall::resolve_symbols (symresolution_info& r) +{ + for (unsigned i=0; i<args.size(); i++) + args[i]->resolve_symbols (r); + + if (referent) + return; + + // find global functiondecl + functiondecl* d = 0; + for (unsigned j = 0; j < r.current_file->functions.size(); j++) + { + functiondecl* fd = r.current_file->functions[j]; + if (fd->name == function) { - parser p (argv[i]); - stapfile* f = p.parse (); - if (f) - cout << "file '" << argv[i] << "' parsed ok." << endl; - else - rc = 1; + d = fd; + break; } } + // XXX: check for conflicting variable name + + if (d) + referent = d; + else + r.unresolved (tok); +} + + +void +block::resolve_symbols (symresolution_info& r) +{ + for (unsigned i=0; i<statements.size(); i++) + statements[i]->resolve_symbols (r); +} + + +void +if_statement::resolve_symbols (symresolution_info& r) +{ + condition->resolve_symbols (r); + thenblock->resolve_symbols (r); + elseblock->resolve_symbols (r); +} + + +void +for_loop::resolve_symbols (symresolution_info& r) +{ + init->resolve_symbols (r); + cond->resolve_symbols (r); + incr->resolve_symbols (r); + block->resolve_symbols (r); +} + + +void +expr_statement::resolve_symbols (symresolution_info& r) +{ + value->resolve_symbols (r); +} + + +vardecl* +symresolution_info::find (const string& name) +{ + // search locals + for (unsigned i=0; i<locals.size(); i++) + if (locals[i]->name == name) + return locals[i]; + + // search function formal parameters (if any) + if (current_function) + { + for (unsigned i=0; i<current_function->formal_args.size(); i++) + if (current_function->formal_args [i]->name == name) + return current_function->formal_args [i]; + } + + // search globals + for (unsigned i=0; i<globals.size(); i++) + if (globals[i]->name == name) + return globals[i]; + + return 0; +} + + +void +symresolution_info::unresolved (const token* tok) +{ + num_unresolved ++; + + cerr << "error: unresolved symbol for "; + if (tok) + cerr << *tok; + else + cerr << "a token"; + cerr << endl; +} + + +// ------------------------------------------------------------------------ +// semantic processing: type resolution + + +void +literal::resolve_types (typeresolution_info& r, exp_type t) +{ + assert (type == pe_long || type == pe_string); + if ((t == type) || (t == pe_unknown)) + return; + + r.mismatch (tok, type, t); +} + + +void +binary_expression::resolve_types (typeresolution_info& r, exp_type t) +{ + if (op == "<<") + { + left->resolve_types (r, pe_stats); + right->resolve_types (r, pe_long); + if (t == pe_long || t == pe_string) + r.mismatch (tok, t, pe_stats); + else if (type == pe_unknown) + { + type = pe_stats; + r.resolved (tok, type); + } + } + else if (op == ".") // string concatenation + { + left->resolve_types (r, pe_string); + right->resolve_types (r, pe_string); + if (t == pe_long || t == pe_stats) + r.mismatch (tok, t, pe_string); + else if (type == pe_unknown) + { + type = pe_string; + r.resolved (tok, type); + } + } + else if (op == "==") // XXX: other comparison operators + { + left->resolve_types (r, pe_unknown); + right->resolve_types (r, pe_unknown); + if (t == pe_string || t == pe_stats) + r.mismatch (tok, t, pe_long); + else if (type == pe_unknown) + { + type = pe_long; + r.resolved (tok, type); + } + } + else // general arithmetic operators? + { + // propagate type downward + exp_type subtype = t; + if ((t == pe_unknown) && (type != pe_unknown)) + subtype = type; + left->resolve_types (r, subtype); + right->resolve_types (r, subtype); + + if ((t == pe_unknown) && (type != pe_unknown)) + ; // already resolved + else if ((t != pe_unknown) && (type == pe_unknown)) + { + type = t; + r.resolved (tok, type); + } + else if ((t == pe_unknown) && (left->type != pe_unknown)) + { + type = left->type; + r.resolved (tok, type); + } + else if ((t == pe_unknown) && (right->type != pe_unknown)) + { + type = right->type; + r.resolved (tok, type); + } + else if (type != t) + r.mismatch (tok, t, type); + } +} + + +void +unary_expression::resolve_types (typeresolution_info& r, exp_type t) +{ + // all unary operators only work on numerics + + operand->resolve_types (r, pe_long); + + if (t == pe_unknown && type != pe_unknown) + ; // already resolved + else if (t == pe_string || t == pe_stats) + r.mismatch (tok, t, pe_long); + else if (type == pe_unknown) + { + type = pe_long; + r.resolved (tok, type); + } +} + + +void +ternary_expression::resolve_types (typeresolution_info& r, exp_type t) +{ + cond->resolve_types (r, pe_long); + truevalue->resolve_types (r, t); + falsevalue->resolve_types (r, t); +} + + +template <class Referrer, class Referent> +void resolve_2types (Referrer* referrer, Referent* referent, + typeresolution_info& r, exp_type t) +{ + exp_type& rtype = referrer->type; + const token* rtok = referrer->tok; + exp_type& ttype = referent->type; + const token* ttok = referent->tok; + + if (t != pe_unknown && rtype == t && rtype == ttype) + ; // do nothing: all three types in agreement + else if (t == pe_unknown && rtype != pe_unknown && rtype == ttype) + ; // do nothing: two known types in agreement + else if (rtype != pe_unknown && ttype != pe_unknown && rtype != ttype) + r.mismatch (rtok, rtype, ttype); + else if (rtype != pe_unknown && t != pe_unknown && rtype != t) + r.mismatch (rtok, rtype, t); + else if (ttype != pe_unknown && t != pe_unknown && ttype != t) + r.mismatch (ttok, ttype, t); + else if (rtype == pe_unknown && t != pe_unknown) + { + // propagate from upstream + rtype = t; + r.resolved (rtok, rtype); + // catch rtype/ttype mismatch later + } + else if (rtype == pe_unknown && ttype != pe_unknown) + { + // propagate from referent + rtype = ttype; + r.resolved (rtok, rtype); + // catch rtype/t mismatch later + } + else if (rtype != pe_unknown && ttype == pe_unknown) + { + // propagate to referent + ttype = rtype; + r.resolved (ttok, ttype); + // catch rtype/t mismatch later + } + else + r.unresolved (rtok); +} + + +void +symbol::resolve_types (typeresolution_info& r, exp_type t) +{ + assert (referent != 0); + + if (referent->index_types.size() > 0) + r.unresolved (tok); // array else + resolve_2types (this, referent, r, t); +} + + +void +arrayindex::resolve_types (typeresolution_info& r, exp_type t) +{ + assert (referent != 0); + + resolve_2types (this, referent, r, t); + + // now resolve the array indexes + if (referent->index_types.size() == 0) + { + // designate referent as array + referent->index_types.resize (indexes.size()); + for (unsigned i=0; i<indexes.size(); i++) + referent->index_types[i] = pe_unknown; + // NB: we "fall through" to for loop + } + + if (indexes.size() != referent->index_types.size()) + r.unresolved (tok); + else for (unsigned i=0; i<indexes.size(); i++) + { + expression* e = indexes[i]; + e->resolve_types (r, referent->index_types[i]); + exp_type it = e->type; + referent->index_types[i] = it; + + if (it == pe_string || it == pe_long) + ; // do nothing + else if (it == pe_stats) + r.invalid (e->tok, it); + else // pe_unknown + r.unresolved (e->tok); + } +} + + +void +functioncall::resolve_types (typeresolution_info& r, exp_type t) +{ + assert (referent != 0); + + resolve_2types (this, referent, r, t); + + if (type == pe_stats) + r.mismatch (tok, pe_unknown, type); + + // XXX: but what about functions that return no value, + // and are used only as an expression-statement for side effects? + + // now resolve the function parameters + if (args.size() != referent->formal_args.size()) + r.unresolved (tok); + for (unsigned i=0; i<args.size(); i++) { - // parse then print just stdin - parser p (cin); - stapfile* f = p.parse (); - if (f) - f->print (cout); + expression* e = args[i]; + exp_type& ft = referent->formal_args[i]->type; + const token* ftok = referent->formal_args[i]->tok; + e->resolve_types (r, ft); + exp_type at = e->type; + + if (((at == pe_string) || (at == pe_long)) && ft == pe_unknown) + { + // propagate to formal arg + ft = at; + r.resolved (referent->formal_args[i]->tok, ft); + } + if (at == pe_stats) + r.invalid (e->tok, at); + if (ft == pe_stats) + r.invalid (ftok, ft); + if (at != pe_unknown && ft != pe_unknown && ft != at) + r.mismatch (e->tok, at, ft); + if (at == pe_unknown) + r.unresolved (e->tok); + } +} + + +void +block::resolve_types (typeresolution_info& r) +{ + for (unsigned i=0; i<statements.size(); i++) + statements[i]->resolve_types (r); +} + + +void +if_statement::resolve_types (typeresolution_info& r) +{ + condition->resolve_types (r, pe_long); + thenblock->resolve_types (r); + elseblock->resolve_types (r); +} + + +void +for_loop::resolve_types (typeresolution_info& r) +{ + init->resolve_types (r, pe_unknown); + cond->resolve_types (r, pe_long); + incr->resolve_types (r, pe_unknown); + block->resolve_types (r); +} + + +void +expr_statement::resolve_types (typeresolution_info& r) +{ + value->resolve_types (r, pe_unknown); +} + + +void +return_statement::resolve_types (typeresolution_info& r) +{ + // This is like symbol::resolve_types, where the referent is + // the return value of the function. + + // XXX: need control flow semantic checking; until then: + if (r.current_function == 0) + { + r.unresolved (tok); + return; + } + + exp_type& type = r.current_function->type; + value->resolve_types (r, type); + + if (type != pe_unknown && value->type != pe_unknown + && type != value->type) + r.mismatch (r.current_function->tok, type, value->type); + if (type == pe_unknown && + (value->type == pe_long || value->type == pe_string)) + { + // propagate non-statistics from value + type = value->type; + r.resolved (r.current_function->tok, value->type); + } + if (value->type == pe_stats) + r.invalid (value->tok, value->type); +} + + +void +typeresolution_info::unresolved (const token* tok) +{ + num_still_unresolved ++; + + if (assert_resolvability) + { + cerr << "error: unresolved type for "; + if (tok) + cerr << *tok; else - rc = 1; + cerr << "a token"; + cerr << endl; } +} + - return rc; +void +typeresolution_info::invalid (const token* tok, exp_type pe) +{ + num_still_unresolved ++; + + if (assert_resolvability) + { + cerr << "error: invalid type " << pe << " for "; + if (tok) + cerr << *tok; + else + cerr << "a token"; + cerr << endl; + } +} + + +void +typeresolution_info::mismatch (const token* tok, exp_type t1, exp_type t2) +{ + num_still_unresolved ++; + + if (assert_resolvability) + { + cerr << "error: type mismatch for "; + if (tok) + cerr << *tok; + else + cerr << "a token"; + cerr << ": " << t1 << " vs. " << t2 << endl; + } +} + + +void +typeresolution_info::resolved (const token* tok, exp_type t) +{ + num_newly_resolved ++; + // cerr << "resolved " << *tok << " type " << t << endl; } @@ -10,46 +10,53 @@ using namespace std; -struct source_location -{ - // source co-ordinates - string lexeme; - string source_file; - unsigned source_line; -}; +enum exp_type + { + pe_unknown, + pe_long, + pe_string, + pe_stats + }; +ostream& operator << (ostream& o, const exp_type& e); +struct token; +struct symresolution_info; +struct typeresolution_info; struct expression { - enum { pe_void, pe_unknown, pe_long, pe_string } type; - source_location loc; + exp_type type; + const token* tok; virtual void print (ostream& o) = 0; + expression (); virtual ~expression (); + virtual void resolve_symbols (symresolution_info& r) = 0; + virtual void resolve_types (typeresolution_info& r, exp_type t) = 0; }; +ostream& operator << (ostream& o, expression& k); -inline ostream& operator << (ostream& o, expression& k) -{ - k.print (o); - return o; -} struct literal: public expression { + void resolve_symbols (symresolution_info& r); + void resolve_types (typeresolution_info& r, exp_type t); }; + struct literal_string: public literal { string value; - literal_string (const string& v): value (v) {} - void print (ostream& o) { o << '"' << value << '"'; } + literal_string (const string& v); + void print (ostream& o); }; + struct literal_number: public literal { long value; - literal_number (long v): value(v) {} - void print (ostream& o) { o << value; } + literal_number (long v); + void print (ostream& o); }; @@ -58,49 +65,58 @@ struct binary_expression: public expression expression* left; string op; expression* right; - void print (ostream& o) { o << '(' << *left << ")" - << op - << '(' << *right << ")"; } + void print (ostream& o); + void resolve_symbols (symresolution_info& r); + void resolve_types (typeresolution_info& r, exp_type t); }; + struct unary_expression: public expression { string op; expression* operand; - void print (ostream& o) { o << op << '(' << *operand << ")"; } + void print (ostream& o); + void resolve_symbols (symresolution_info& r); + void resolve_types (typeresolution_info& r, exp_type t); }; + struct pre_crement: public unary_expression { }; + struct post_crement: public unary_expression { - void print (ostream& o) { o << '(' << *operand << ")" << op; } - - + void print (ostream& o); }; + struct logical_or_expr: public binary_expression { }; + struct logical_and_expr: public binary_expression { }; + struct array_in: public binary_expression { }; + struct comparison: public binary_expression { }; + struct concatenation: public binary_expression { }; + struct exponentiation: public binary_expression { }; @@ -111,70 +127,160 @@ struct ternary_expression: public expression expression* cond; expression* truevalue; expression* falsevalue; - void print (ostream& o) { o << "(" << *cond << ") ? (" - << *truevalue << ") : (" - << *falsevalue << ")"; } + void print (ostream& o); + void resolve_symbols (symresolution_info& r); + void resolve_types (typeresolution_info& r, exp_type t); +}; + + +struct assignment: public binary_expression +{ }; +class vardecl; struct symbol: public expression { string name; - void print (ostream& o) { o << name; } + vardecl *referent; + symbol (); + void print (ostream& o); + void resolve_symbols (symresolution_info& r); + void resolve_types (typeresolution_info& r, exp_type t); }; -struct arrayindex: public symbol + +struct arrayindex: public expression { + string base; vector<expression*> indexes; - void print (ostream& o) - { - symbol::print(o); - o << "["; - for (unsigned i=0; i<indexes.size(); i++) - o << (i>0 ? ", " : "") << *indexes[i]; - o << "]"; - } + vardecl *referent; + arrayindex (); + void print (ostream& o); + void resolve_symbols (symresolution_info& r); + void resolve_types (typeresolution_info& r, exp_type t); }; -struct functioncall: public symbol + + +class functiondecl; +struct functioncall: public expression { + string function; vector<expression*> args; - void print (ostream& o) - { - symbol::print(o); - o << "("; - for (unsigned i=0; i<args.size(); i++) - o << (i>0 ? ", " : "") << *args[i]; - o << ")"; - } + functiondecl *referent; + functioncall (); + void print (ostream& o); + void resolve_symbols (symresolution_info& r); + void resolve_types (typeresolution_info& r, exp_type t); }; +// ------------------------------------------------------------------------ + + +struct stapfile; +struct symboldecl; +struct symresolution_info +{ + vector<vardecl*>& locals; // includes incoming function parameters + vector<vardecl*>& globals; + vector<stapfile*>& files; + stapfile* current_file; + functiondecl* current_function; + + symresolution_info (vector<vardecl*>& l, + vector<vardecl*>& g, + vector<stapfile*>& f, + stapfile* cfil, + functiondecl* cfun); + symresolution_info (vector<vardecl*>& l, + vector<vardecl*>& g, + vector<stapfile*>& f, + stapfile* cfil); + + vardecl* find (const string& name); + + void unresolved (const token* tok); + unsigned num_unresolved; +}; + + +struct typeresolution_info +{ + unsigned num_newly_resolved; + unsigned num_still_unresolved; + bool assert_resolvability; + functiondecl* current_function; + + void mismatch (const token* tok, exp_type t1, + exp_type t2); + void unresolved (const token* tok); + void resolved (const token* tok, exp_type t); + void invalid (const token* tok, exp_type t); +}; + + +struct symboldecl // unique object per (possibly implicit) + // symbol declaration +{ + const token* tok; + string name; + exp_type type; + symboldecl (); + virtual ~symboldecl (); + virtual void print (ostream &o) = 0; + virtual void printsig (ostream &o) = 0; +}; + + +ostream& operator << (ostream& o, symboldecl& k); + + +struct vardecl: public symboldecl +{ + void print (ostream& o); + void printsig (ostream& o); + vardecl (); + vardecl (unsigned arity); + vector<exp_type> index_types; // for arrays only +}; + + +struct block; +struct functiondecl: public symboldecl +{ + vector<vardecl*> formal_args; + vector<vardecl*> locals; + block* body; + functiondecl (); + void print (ostream& o); + void printsig (ostream& o); +}; + + +// ------------------------------------------------------------------------ + + struct statement { - source_location loc; virtual void print (ostream& o) = 0; + const token* tok; + statement (); virtual ~statement (); + virtual void resolve_symbols (symresolution_info& r) = 0; + virtual void resolve_types (typeresolution_info& r) = 0; }; - -inline ostream& operator << (ostream& o, statement& k) -{ - k.print (o); - return o; -} +ostream& operator << (ostream& o, statement& k); struct block: public statement { vector<statement*> statements; - void print (ostream& o) - { - o << "{" << endl; - for (unsigned i=0; i<statements.size(); i++) - o << *statements [i] << ";" << endl; - o << "}" << endl; - } + void print (ostream& o); + void resolve_symbols (symresolution_info& r); + void resolve_types (typeresolution_info& r); }; struct for_loop: public statement @@ -183,54 +289,60 @@ struct for_loop: public statement expression* cond; expression* incr; statement* block; - void print (ostream& o) - { o << "<for_loop>" << endl; } + void print (ostream& o); + void resolve_symbols (symresolution_info& r); + void resolve_types (typeresolution_info& r); }; + struct null_statement: public statement { - void print (ostream& o) - { o << ";"; } - + void print (ostream& o); + void resolve_symbols (symresolution_info& r) {} + void resolve_types (typeresolution_info& r) {} }; -struct assignment: public expression -{ - expression* lvalue; // XXX: consider type for lvalues; see parse_variable () - string op; - expression* rvalue; - - void print (ostream& o) - { o << *lvalue << " " << op << " " << *rvalue; } -}; struct expr_statement: public statement { expression* value; // executed for side-effects - void print (ostream& o) - { o << *value; } + void print (ostream& o); + void resolve_symbols (symresolution_info& r); + void resolve_types (typeresolution_info& r); }; + struct if_statement: public statement { expression* condition; statement* thenblock; statement* elseblock; - void print (ostream& o) - { o << "if (" << *condition << ") " << endl - << *thenblock << endl; - if (elseblock) - o << "else " << *elseblock << endl; } + void print (ostream& o); + void resolve_symbols (symresolution_info& r); + void resolve_types (typeresolution_info& r); +}; + + +struct return_statement: public expr_statement +{ + void print (ostream& o); + void resolve_types (typeresolution_info& r); +}; + + +struct delete_statement: public expr_statement +{ + void print (ostream& o); }; -struct probe; +struct probe; struct stapfile { string name; vector<probe*> probes; - vector<symbol*> globals; - + vector<functiondecl*> functions; + vector<vardecl*> globals; void print (ostream& o); }; @@ -238,41 +350,17 @@ struct stapfile struct probe_point_spec // inherit from something or other? { string functor; + const token* tok; literal* arg; - - void print (ostream& o) - { o << functor; - if (arg) - o << "(" << *arg << ")"; - } + void print (ostream& o); }; struct probe { - // map<string,psymbol*> locals; vector<probe_point_spec*> location; + const token* tok; block* body; - - void print (ostream& o) - { o << "probe " << endl; - for(unsigned i=0; i<location.size(); i++) - { - o << (i>0 ? ":" : ""); - location[i]->print (o); - } - o << endl; - o << *body; - } + vector<vardecl*> locals; + void print (ostream& o); }; - - - -inline void stapfile::print (ostream& o) -{ o << "# file " << name << endl; - for(unsigned i=0; i<probes.size(); i++) - { - probes[i]->print (o); - o << endl; - } - } diff --git a/testsuite/parseko/one.stp b/testsuite/parseko/one.stp index f288e930..149f602e 100755 --- a/testsuite/parseko/one.stp +++ b/testsuite/parseko/one.stp @@ -1,2 +1,2 @@ -#! stap +#! parsetest "not a probe" diff --git a/testsuite/parseko/two.stp b/testsuite/parseko/two.stp index 64a24afe..e17024ff 100755 --- a/testsuite/parseko/two.stp +++ b/testsuite/parseko/two.stp @@ -1,4 +1,4 @@ -#! stap +#! parsetest probe foo { a + } diff --git a/testsuite/parseok/one.stp b/testsuite/parseok/one.stp index b3ca32b2..5b69767d 100755 --- a/testsuite/parseok/one.stp +++ b/testsuite/parseok/one.stp @@ -1,2 +1,3 @@ -#! stap +#! parsetest # test +function k () { } diff --git a/testsuite/parseok/two.stp b/testsuite/parseok/two.stp index 3776633b..6ab3823e 100755 --- a/testsuite/parseok/two.stp +++ b/testsuite/parseok/two.stp @@ -1,4 +1,4 @@ -#! stap +#! parsetest probe kernel:systemcall("foo") { @@ -6,7 +6,12 @@ probe kernel:systemcall("foo") if (global > 5) { global -- } else ; } +function foo () { + delete array[4]; + return 0; +} + probe systemtap:end { - function("value", 4+8); + foo ("value", 4+8); } diff --git a/testsuite/semko/four.stp b/testsuite/semko/four.stp new file mode 100755 index 00000000..e73cc88d --- /dev/null +++ b/testsuite/semko/four.stp @@ -0,0 +1,12 @@ +#! semtest + +global a, b; # types unknown + +function bar () +{ + # no return statement +} + +probe foo { + a = b; +} diff --git a/testsuite/semko/one.stp b/testsuite/semko/one.stp new file mode 100755 index 00000000..994bb451 --- /dev/null +++ b/testsuite/semko/one.stp @@ -0,0 +1,8 @@ +#! semtest + +function stamp (syscall) +{ + # no return expression => unknown function type +} + +probe kernel:syscall:read { stamp ("read"); } diff --git a/testsuite/semko/three.stp b/testsuite/semko/three.stp new file mode 100755 index 00000000..bfdeec66 --- /dev/null +++ b/testsuite/semko/three.stp @@ -0,0 +1,6 @@ +#! semtest + +probe foo { + a << 2; + b[a] = 4; # must not index with stats variable +} diff --git a/testsuite/semko/two.stp b/testsuite/semko/two.stp new file mode 100755 index 00000000..39b77f6a --- /dev/null +++ b/testsuite/semko/two.stp @@ -0,0 +1,8 @@ +#! semtest + +function zoo (p) { p << 5; return 0 } # passing stats as function arg + +probe foo { + bar = 2 + "string"; # mixing integer+string arithmetic + zoo (car) +} diff --git a/testsuite/semok/four.stp b/testsuite/semok/four.stp new file mode 100755 index 00000000..e11b644a --- /dev/null +++ b/testsuite/semok/four.stp @@ -0,0 +1,23 @@ +#! semtest + +# these will ultimately be somehow associated with "providers" +# and have a syntax of their own +global kernel_jiffies, kernel_current_comm; + +function kernel_netlink(a, b) { + # this should be a builtin function + return 0 +} + +function stamp (syscall) +{ + return kernel_netlink (4, kernel_jiffies . " " . kernel_current_comm . " " . syscall) +} + +# probe kernel:syscall:read = kernel:function("sys_read"); + + +probe kernel:syscall:read +{ + stamp ("read"); +} diff --git a/testsuite/semok/one.stp b/testsuite/semok/one.stp new file mode 100755 index 00000000..fb7483e2 --- /dev/null +++ b/testsuite/semok/one.stp @@ -0,0 +1,25 @@ +#! semtest + +# these will ultimately be somehow associated with "providers" +# and have a syntax of their own +global kernel_jiffies, kernel_current_comm; + +function kernel_netlink(a, b) { + # this should be a builtin function + return 0 +} + +function stamp (syscall) +{ + kernel_netlink (4, kernel_jiffies . " " . + kernel_current_comm . " " . syscall); + return 0 +} + +# probe kernel:syscall:read = kernel:function("sys_read"); + + +probe kernel:syscall:read +{ + stamp ("read"); +} diff --git a/testsuite/semok/three.stp b/testsuite/semok/three.stp new file mode 100755 index 00000000..6ae531a2 --- /dev/null +++ b/testsuite/semok/three.stp @@ -0,0 +1,6 @@ +#! semtest + +probe foo { + a << 2; + b[4] << 4; +} diff --git a/testsuite/semok/two.stp b/testsuite/semok/two.stp new file mode 100755 index 00000000..f3c6046e --- /dev/null +++ b/testsuite/semok/two.stp @@ -0,0 +1,13 @@ +#! semtest + +global bar, baz; + +function koo (p) { + baz [p, "p", p] ++; + return p + 2; +} + +probe foo { + bar = 2 + koo (4); + foo = bar + koo; +} |