summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorfche <fche>2005-03-02 01:28:50 +0000
committerfche <fche>2005-03-02 01:28:50 +0000
commit56099f083d7a68722ace316be4d288d21caabaee (patch)
tree3e67ec78134a358c1f90f701c165c4c577d62177
parent2f1a1aead38c1dcd329a694dd8d3290b37320466 (diff)
downloadsystemtap-steved-56099f083d7a68722ace316be4d288d21caabaee.tar.gz
systemtap-steved-56099f083d7a68722ace316be4d288d21caabaee.tar.xz
systemtap-steved-56099f083d7a68722ace316be4d288d21caabaee.zip
* some semantic analysis
2005-03-01 Frank Ch. Eigler <fche@redhat.com> * parse.cxx: Implement left-associativity for several types of operators. Add some more statement types. Parse functions. Be able to print tokens. Simplify error generating functions. Save tokens in all parse tree nodes. * parse.h: Corresponding changes. * staptree.cxx: Move tree-printing functions here. Add many new functions for symbol and type resolution. * staptree.h: Corresponding changes. * semtest.cxx: New semantic analysis pass & test driver. * testsuite/sem*/*: New tests. * parsetest.cxx: Separated parse test driver. * testsuite/parse*/*: Adapt tests to parsetest driver. * Makefile.am: Build semtest. Run its tests. * Makefile.in: Regenerated. * parse.cxx, parse.h: New files: parser.
-rw-r--r--ChangeLog17
-rw-r--r--Makefile.am15
-rw-r--r--Makefile.in79
-rw-r--r--parse.cxx374
-rw-r--r--parse.h8
-rw-r--r--parsetest.cxx40
-rw-r--r--semtest.cxx185
-rw-r--r--staptree.cxx901
-rw-r--r--staptree.h316
-rwxr-xr-xtestsuite/parseko/one.stp2
-rwxr-xr-xtestsuite/parseko/two.stp2
-rwxr-xr-xtestsuite/parseok/one.stp3
-rwxr-xr-xtestsuite/parseok/two.stp9
-rwxr-xr-xtestsuite/semko/four.stp12
-rwxr-xr-xtestsuite/semko/one.stp8
-rwxr-xr-xtestsuite/semko/three.stp6
-rwxr-xr-xtestsuite/semko/two.stp8
-rwxr-xr-xtestsuite/semok/four.stp23
-rwxr-xr-xtestsuite/semok/one.stp25
-rwxr-xr-xtestsuite/semok/three.stp6
-rwxr-xr-xtestsuite/semok/two.stp13
21 files changed, 1774 insertions, 278 deletions
diff --git a/ChangeLog b/ChangeLog
index be1ad015..cbc73eb7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2005-03-01 Frank Ch. Eigler <fche@redhat.com>
+
+ * parse.cxx: Implement left-associativity for several types of
+ operators. Add some more statement types. Parse functions.
+ Be able to print tokens. Simplify error generating functions.
+ Save tokens in all parse tree nodes.
+ * parse.h: Corresponding changes.
+ * staptree.cxx: Move tree-printing functions here. Add many
+ new functions for symbol and type resolution.
+ * staptree.h: Corresponding changes.
+ * semtest.cxx: New semantic analysis pass & test driver.
+ * testsuite/sem*/*: New tests.
+ * parsetest.cxx: Separated parse test driver.
+ * testsuite/parse*/*: Adapt tests to parsetest driver.
+ * Makefile.am: Build semtest. Run its tests.
+ * Makefile.in: Regenerated.
+
2005-02-11 Frank Ch. Eigler <fche@redhat.com>
* parse.cxx, parse.h: New files: parser.
diff --git a/Makefile.am b/Makefile.am
index 3771800c..1ba8071a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -3,8 +3,10 @@
AM_MAKEFLAGS = 'CXXFLAGS=$(CXXFLAGS)' 'LDFLAGS=$(LDFLAGS)'
-bin_PROGRAMS = stap
-stap_SOURCES = parse.cxx staptree.cxx
+bin_PROGRAMS =
+noinst_PROGRAMS = parsetest semtest
+parsetest_SOURCES = parse.cxx staptree.cxx parsetest.cxx
+semtest_SOURCES = parse.cxx staptree.cxx semtest.cxx
AM_CXXFLAGS = -Wall
# Get extra libs as needed
@@ -18,7 +20,8 @@ stapdatadir = @datadir@/systemtap
DEFPATH="\".$(PATH_SEPARATOR)$(stapdatadir)\""
DEFS= -DDEFPATH=$(DEFPATH) -DHAVE_CONFIG_H
-pok=$(srcdir)/testsuite/parseok
-pko=$(srcdir)/testsuite/parseko
-TESTS = $(wildcard $(pok)/*.stp) $(wildcard $(pko)/*.stp)
-XFAIL_TESTS = $(wildcard $(pko)/*.stp)
+p=$(srcdir)/testsuite/parse
+s=$(srcdir)/testsuite/sem
+TESTS = $(wildcard $(p)ok/*.stp) $(wildcard $(p)ko/*.stp) \
+ $(wildcard $(s)ok/*.stp) $(wildcard $(s)ko/*.stp)
+XFAIL_TESTS = $(wildcard $(p)ko/*.stp) $(wildcard $(s)ko/*.stp)
diff --git a/Makefile.in b/Makefile.in
index b3f4828d..c93c7757 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -16,7 +16,7 @@
# Makefile.am --- automake input file for systemtap
-SOURCES = $(stap_SOURCES)
+SOURCES = $(parsetest_SOURCES) $(semtest_SOURCES)
srcdir = @srcdir@
top_srcdir = @top_srcdir@
@@ -38,7 +38,8 @@ POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
-bin_PROGRAMS = stap$(EXEEXT)
+bin_PROGRAMS =
+noinst_PROGRAMS = parsetest$(EXEEXT) semtest$(EXEEXT)
subdir = .
DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in $(srcdir)/config.in \
@@ -55,11 +56,17 @@ CONFIG_HEADER = config.h
CONFIG_CLEAN_FILES =
am__installdirs = "$(DESTDIR)$(bindir)"
binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
-PROGRAMS = $(bin_PROGRAMS)
-am_stap_OBJECTS = parse.$(OBJEXT) staptree.$(OBJEXT)
-stap_OBJECTS = $(am_stap_OBJECTS)
-stap_LDADD = $(LDADD)
-stap_DEPENDENCIES =
+PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS)
+am_parsetest_OBJECTS = parse.$(OBJEXT) staptree.$(OBJEXT) \
+ parsetest.$(OBJEXT)
+parsetest_OBJECTS = $(am_parsetest_OBJECTS)
+parsetest_LDADD = $(LDADD)
+parsetest_DEPENDENCIES =
+am_semtest_OBJECTS = parse.$(OBJEXT) staptree.$(OBJEXT) \
+ semtest.$(OBJEXT)
+semtest_OBJECTS = $(am_semtest_OBJECTS)
+semtest_LDADD = $(LDADD)
+semtest_DEPENDENCIES =
DEFAULT_INCLUDES = -I. -I$(srcdir) -I.
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
@@ -68,8 +75,8 @@ CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
CXXLD = $(CXX)
CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
-o $@
-SOURCES = $(stap_SOURCES)
-DIST_SOURCES = $(stap_SOURCES)
+SOURCES = $(parsetest_SOURCES) $(semtest_SOURCES)
+DIST_SOURCES = $(parsetest_SOURCES) $(semtest_SOURCES)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -170,7 +177,8 @@ sharedstatedir = @sharedstatedir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
AM_MAKEFLAGS = 'CXXFLAGS=$(CXXFLAGS)' 'LDFLAGS=$(LDFLAGS)'
-stap_SOURCES = parse.cxx staptree.cxx
+parsetest_SOURCES = parse.cxx staptree.cxx parsetest.cxx
+semtest_SOURCES = parse.cxx staptree.cxx semtest.cxx
AM_CXXFLAGS = -Wall
# Get extra libs as needed
@@ -179,10 +187,12 @@ stapdatadir = @datadir@/systemtap
# stuff for compiling gawk/pgawk
DEFPATH = "\".$(PATH_SEPARATOR)$(stapdatadir)\""
-pok = $(srcdir)/testsuite/parseok
-pko = $(srcdir)/testsuite/parseko
-TESTS = $(wildcard $(pok)/*.stp) $(wildcard $(pko)/*.stp)
-XFAIL_TESTS = $(wildcard $(pko)/*.stp)
+p = $(srcdir)/testsuite/parse
+s = $(srcdir)/testsuite/sem
+TESTS = $(wildcard $(p)ok/*.stp) $(wildcard $(p)ko/*.stp) \
+ $(wildcard $(s)ok/*.stp) $(wildcard $(s)ko/*.stp)
+
+XFAIL_TESTS = $(wildcard $(p)ko/*.stp) $(wildcard $(s)ko/*.stp)
all: config.h
$(MAKE) $(AM_MAKEFLAGS) all-am
@@ -261,9 +271,15 @@ uninstall-binPROGRAMS:
clean-binPROGRAMS:
-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
-stap$(EXEEXT): $(stap_OBJECTS) $(stap_DEPENDENCIES)
- @rm -f stap$(EXEEXT)
- $(CXXLINK) $(stap_LDFLAGS) $(stap_OBJECTS) $(stap_LDADD) $(LIBS)
+
+clean-noinstPROGRAMS:
+ -test -z "$(noinst_PROGRAMS)" || rm -f $(noinst_PROGRAMS)
+parsetest$(EXEEXT): $(parsetest_OBJECTS) $(parsetest_DEPENDENCIES)
+ @rm -f parsetest$(EXEEXT)
+ $(CXXLINK) $(parsetest_LDFLAGS) $(parsetest_OBJECTS) $(parsetest_LDADD) $(LIBS)
+semtest$(EXEEXT): $(semtest_OBJECTS) $(semtest_DEPENDENCIES)
+ @rm -f semtest$(EXEEXT)
+ $(CXXLINK) $(semtest_LDFLAGS) $(semtest_OBJECTS) $(semtest_LDADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@@ -272,6 +288,8 @@ distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parse.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parsetest.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/semtest.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/staptree.Po@am__quote@
.cxx.o:
@@ -570,7 +588,8 @@ maintainer-clean-generic:
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
-clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
+clean-am: clean-binPROGRAMS clean-generic clean-noinstPROGRAMS \
+ mostlyclean-am
distclean: distclean-am
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
@@ -621,18 +640,18 @@ ps-am:
uninstall-am: uninstall-binPROGRAMS uninstall-info-am
.PHONY: CTAGS GTAGS all all-am am--refresh check check-TESTS check-am \
- clean clean-binPROGRAMS clean-generic ctags dist dist-all \
- dist-bzip2 dist-gzip dist-shar dist-tarZ dist-zip distcheck \
- distclean distclean-compile distclean-generic distclean-hdr \
- distclean-tags distcleancheck distdir distuninstallcheck dvi \
- dvi-am html html-am info info-am install install-am \
- install-binPROGRAMS install-data install-data-am install-exec \
- install-exec-am install-info install-info-am install-man \
- install-strip installcheck installcheck-am installdirs \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \
- tags uninstall uninstall-am uninstall-binPROGRAMS \
- uninstall-info-am
+ clean clean-binPROGRAMS clean-generic clean-noinstPROGRAMS \
+ ctags dist dist-all dist-bzip2 dist-gzip dist-shar dist-tarZ \
+ dist-zip distcheck distclean distclean-compile \
+ distclean-generic distclean-hdr distclean-tags distcleancheck \
+ distdir distuninstallcheck dvi dvi-am html html-am info \
+ info-am install install-am install-binPROGRAMS install-data \
+ install-data-am install-exec install-exec-am install-info \
+ install-info-am install-man install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \
+ uninstall-am uninstall-binPROGRAMS uninstall-info-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/parse.cxx b/parse.cxx
index e33aee04..4238f37e 100644
--- a/parse.cxx
+++ b/parse.cxx
@@ -29,6 +29,33 @@ parser::~parser()
}
+ostream&
+operator << (ostream& o, const token& t)
+{
+ o << (t.type == tok_junk ? "junk" :
+ t.type == tok_identifier ? "identifier" :
+ t.type == tok_operator ? "operator" :
+ t.type == tok_string ? "string" :
+ t.type == tok_number ? "number" :
+ "unknown token");
+
+ o << " '";
+ for (unsigned i=0; i<t.content.length(); i++)
+ {
+ char c = t.content[i];
+ o << (isprint (c) ? c : '?');
+ }
+ o << "'";
+
+ o << " at "
+ << t.location.file << ":"
+ << t.location.line << ":"
+ << t.location.column;
+
+ return o;
+}
+
+
void
parser::print_error (const parse_error &pe)
{
@@ -36,27 +63,9 @@ parser::print_error (const parse_error &pe)
const token* t = last_t;
if (t)
- {
- cerr << "\tsaw "
- << (t->type == tok_junk ? "junk" :
- t->type == tok_identifier ? "identifier" :
- t->type == tok_operator ? "operator" :
- t->type == tok_string ? "string" :
- t->type == tok_number ? "number" :
- "unknown token") << " '";
- for (unsigned i=0; i<t->content.length(); i++)
- {
- char c = t->content[i];
- cerr << (isprint (c) ? c : '?');
- }
- cerr << "'"
- << " at "
- << t->location.file << ":"
- << t->location.line << ":"
- << t->location.column << endl;
- }
+ cerr << "\tsaw: " << *t << endl;
else
- cerr << "\tsaw " << input_name << " EOF" << endl;
+ cerr << "\tsaw: " << input_name << " EOF" << endl;
// XXX: make it possible to print the last input line,
// so as to line up an arrow with the specific error column
@@ -237,6 +246,7 @@ lexer::scan ()
(c == '<' && c2 == '<') ||
(c == '+' && c2 == '=') ||
(c == '-' && c2 == '=') ||
+ (c == ':' && c2 == ':') ||
false) // XXX: etc.
n->content.push_back((char) input_get ());
@@ -259,27 +269,36 @@ parser::parse ()
{
stapfile* f = new stapfile;
f->name = input_name;
-
+
+ bool empty = true;
+
while (1)
{
try
{
const token* t = peek ();
- if (! t) // EOF
+ if (! t) // nice clean EOF
break;
+ empty = false;
if (t->type == tok_identifier && t->content == "probe")
{
- next (); // advance
+ next ();
f->probes.push_back (parse_probe ());
}
else if (t->type == tok_identifier && t->content == "global")
{
- next (); // advance
- f->globals.push_back (parse_global ());
+ next ();
+ parse_global (f->globals);
+ }
+ else if (t->type == tok_identifier && t->content == "function")
+ {
+ next ();
+ f->functions.push_back (parse_functiondecl ());
+ // XXX: check for duplicate function decl
}
else
- throw parse_error ("expected 'probe' or 'global'");
+ throw parse_error ("expected 'probe', 'global', or 'function'");
}
catch (parse_error& pe)
{
@@ -297,11 +316,17 @@ parser::parse ()
}
}
- if (num_errors > 0)
+ if (empty)
+ {
+ cerr << "Input file '" << input_name << "' is empty or missing." << endl;
+ delete f;
+ return 0;
+ }
+ else if (num_errors > 0)
{
cerr << num_errors << " parse error(s)." << endl;
delete f;
- f = 0;
+ return 0;
}
return f;
@@ -317,12 +342,16 @@ parser::parse_probe ()
const token *t = peek ();
if (t && t->type == tok_identifier)
{
+ p->tok = t;
p->location.push_back (parse_probe_point_spec ());
- t = next ();
- if (t->type == tok_operator && t->content == ":")
- continue;
- else if (t->type == tok_operator && t->content == "{")
+ t = peek ();
+ if (t && t->type == tok_operator && t->content == ":")
+ {
+ next ();
+ continue;
+ }
+ else if (t && t->type == tok_operator && t->content == "{")
break;
else
throw parse_error ("expected ':' or '{'");
@@ -339,16 +368,21 @@ parser::parse_probe ()
block*
-parser::parse_stmt_block () // "{" already consumed
+parser::parse_stmt_block ()
{
block* pb = new block;
+ const token* t = next ();
+ if (! (t->type == tok_operator && t->content == "{"))
+ throw parse_error ("expected '{'");
+
+ pb->tok = t;
while (1)
{
try
{
// handle empty blocks
- const token* t = peek ();
+ t = peek ();
if (t && t->type == tok_operator && t->content == "}")
{
next ();
@@ -397,23 +431,22 @@ parser::parse_statement ()
return new null_statement ();
}
else if (t && t->type == tok_operator && t->content == "{")
- {
- next ();
- return parse_stmt_block ();
- }
+ return parse_stmt_block ();
else if (t && t->type == tok_identifier && t->content == "if")
- {
- next ();
- return parse_if_statement ();
- }
+ return parse_if_statement ();
+ else if (t && t->type == tok_identifier && t->content == "return")
+ return parse_return_statement ();
+ else if (t && t->type == tok_identifier && t->content == "delete")
+ return parse_delete_statement ();
// XXX: other control constructs ("for", "delete", "while", "do",
- // "break", "continue", "exit")
+ // "break", "continue", "exit", "return")
else if (t && (t->type == tok_operator || // expressions are flexible
t->type == tok_identifier ||
t->type == tok_number ||
t->type == tok_string))
{
expr_statement *es = new expr_statement;
+ es->tok = t;
es->value = parse_expression ();
return es;
}
@@ -422,10 +455,71 @@ parser::parse_statement ()
}
-symbol*
-parser::parse_global ()
+void
+parser::parse_global (vector <vardecl*>& globals)
{
- throw parse_error ("cannot parse global block yet");
+ while (1)
+ {
+ const token* t = next ();
+ if (! (t->type == tok_identifier))
+ throw parse_error ("expected identifier");
+
+ vardecl* d = new vardecl;
+ d->name = t->content;
+ d->tok = t;
+ globals.push_back (d); // XXX: check for duplicates
+
+ t = next ();
+ if (t->type == tok_operator && t->content == ";")
+ break;
+ else if (t->type == tok_operator && t->content == ",")
+ continue;
+ else
+ throw parse_error ("expected ';' or ','");
+ }
+}
+
+
+functiondecl*
+parser::parse_functiondecl ()
+{
+ functiondecl *fd = new functiondecl ();
+
+ const token* t = next ();
+ if (! (t->type == tok_identifier))
+ throw parse_error ("expected identifier");
+ fd->name = t->content;
+ fd->tok = t;
+
+ t = next ();
+ if (! (t->type == tok_operator && t->content == "("))
+ throw parse_error ("expected '('");
+
+ while (1)
+ {
+ t = next ();
+
+ // permit zero-argument fuctions
+ if (t->type == tok_operator && t->content == ")")
+ break;
+ else if (! (t->type == tok_identifier))
+ throw parse_error ("expected identifier");
+ vardecl* vd = new vardecl;
+ vd->name = t->content;
+ vd->tok = t;
+ fd->formal_args.push_back (vd);
+
+ t = next ();
+ if (t->type == tok_operator && t->content == ")")
+ break;
+ if (t->type == tok_operator && t->content == ",")
+ continue;
+ else
+ throw parse_error ("expected ',' or ')'");
+ }
+
+ fd->body = parse_stmt_block ();
+ return fd;
}
@@ -438,6 +532,7 @@ parser::parse_probe_point_spec ()
if (t->type != tok_identifier)
throw parse_error ("expected identifier");
pl->functor = t->content;
+ pl->tok = t;
t = peek ();
if (t && t->type == tok_operator && t->content == "(")
@@ -457,12 +552,16 @@ literal*
parser::parse_literal ()
{
const token* t = next ();
+ literal* l;
if (t->type == tok_string)
- return new literal_string (t->content);
+ l = new literal_string (t->content);
else if (t->type == tok_number)
- return new literal_number (atol (t->content.c_str ()));
+ l = new literal_number (atol (t->content.c_str ()));
else
throw parse_error ("expected literal string or number");
+
+ l->tok = t;
+ return l;
}
@@ -470,10 +569,15 @@ if_statement*
parser::parse_if_statement ()
{
const token* t = next ();
+ if (! (t->type == tok_identifier && t->content == "if"))
+ throw parse_error ("expected 'if'");
+ if_statement* s = new if_statement;
+ s->tok = t;
+
+ t = next ();
if (! (t->type == tok_operator && t->content == "("))
throw parse_error ("expected '('");
- if_statement* s = new if_statement;
s->condition = parse_expression ();
t = next ();
@@ -493,6 +597,32 @@ parser::parse_if_statement ()
}
+return_statement*
+parser::parse_return_statement ()
+{
+ const token* t = next ();
+ if (! (t->type == tok_identifier && t->content == "return"))
+ throw parse_error ("expected 'return'");
+ return_statement* s = new return_statement;
+ s->tok = t;
+ s->value = parse_expression ();
+ return s;
+}
+
+
+delete_statement*
+parser::parse_delete_statement ()
+{
+ const token* t = next ();
+ if (! (t->type == tok_identifier && t->content == "delete"))
+ throw parse_error ("expected 'delete'");
+ delete_statement* s = new delete_statement;
+ s->tok = t;
+ s->value = parse_expression ();
+ return s;
+}
+
+
expression*
parser::parse_expression ()
{
@@ -511,21 +641,24 @@ parser::parse_assignment ()
expression* op1 = parse_ternary ();
const token* t = peek ();
- if (t && t->type == tok_operator
+ // left-associative operators
+ while (t && t->type == tok_operator
&& (t->content == "=" ||
t->content == "<<" ||
t->content == "+=" ||
false)) // XXX: add /= etc.
{
assignment* e = new assignment;
- e->lvalue = op1;
+ e->left = op1;
e->op = t->content;
+ e->tok = t;
next ();
- e->rvalue = parse_expression ();
- return e;
+ e->right = parse_ternary ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -537,16 +670,17 @@ parser::parse_ternary ()
const token* t = peek ();
if (t && t->type == tok_operator && t->content == "?")
{
- next ();
ternary_expression* e = new ternary_expression;
+ e->tok = t;
e->cond = op1;
- e->truevalue = parse_expression ();
+ next ();
+ e->truevalue = parse_expression (); // XXX
t = next ();
if (! (t->type == tok_operator && t->content == ":"))
throw parse_error ("expected ':'");
- e->falsevalue = parse_expression ();
+ e->falsevalue = parse_expression (); // XXX
return e;
}
else
@@ -560,16 +694,19 @@ parser::parse_logical_or ()
expression* op1 = parse_logical_and ();
const token* t = peek ();
- if (t && t->type == tok_operator && t->content == "||")
+ while (t && t->type == tok_operator && t->content == "||")
{
- next ();
logical_or_expr* e = new logical_or_expr;
+ e->tok = t;
+ e->op = t->content;
e->left = op1;
- e->right = parse_expression ();
- return e;
+ next ();
+ e->right = parse_logical_and ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -579,16 +716,19 @@ parser::parse_logical_and ()
expression* op1 = parse_array_in ();
const token* t = peek ();
- if (t && t->type == tok_operator && t->content == "&&")
+ while (t && t->type == tok_operator && t->content == "&&")
{
- next ();
logical_and_expr *e = new logical_and_expr;
e->left = op1;
- e->right = parse_expression ();
- return e;
+ e->op = t->content;
+ e->tok = t;
+ next ();
+ e->right = parse_array_in ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -600,10 +740,12 @@ parser::parse_array_in ()
const token* t = peek ();
if (t && t->type == tok_identifier && t->content == "in")
{
- next ();
array_in *e = new array_in;
e->left = op1;
- e->right = parse_symbol (); // XXX: restrict to identifiers
+ e->op = t->content;
+ e->tok = t;
+ next ();
+ e->right = parse_symbol_plain ();
return e;
}
else
@@ -617,18 +759,20 @@ parser::parse_comparison ()
expression* op1 = parse_concatenation ();
const token* t = peek ();
- if (t && t->type == tok_operator
+ while (t && t->type == tok_operator
&& (t->content == ">" || t->content == "==")) // xxx: more
{
comparison* e = new comparison;
e->left = op1;
e->op = t->content;
+ e->tok = t;
next ();
- e->right = parse_expression ();
- return e;
+ e->right = parse_concatenation ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -640,17 +784,19 @@ parser::parse_concatenation ()
const token* t = peek ();
// XXX: the actual awk string-concatenation operator is *whitespace*.
// I don't know how to easily to model that here.
- if (t && t->type == tok_operator && t->content == ".")
+ while (t && t->type == tok_operator && t->content == ".")
{
concatenation* e = new concatenation;
e->left = op1;
e->op = t->content;
+ e->tok = t;
next ();
- e->right = parse_expression ();
- return e;
+ e->right = parse_additive ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -660,18 +806,20 @@ parser::parse_additive ()
expression* op1 = parse_multiplicative ();
const token* t = peek ();
- if (t && t->type == tok_operator
+ while (t && t->type == tok_operator
&& (t->content == "+" || t->content == "-"))
{
binary_expression* e = new binary_expression;
e->op = t->content;
e->left = op1;
+ e->tok = t;
next ();
- e->right = parse_expression ();
- return e;
+ e->right = parse_multiplicative ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -681,18 +829,20 @@ parser::parse_multiplicative ()
expression* op1 = parse_unary ();
const token* t = peek ();
- if (t && t->type == tok_operator
+ while (t && t->type == tok_operator
&& (t->content == "*" || t->content == "/" || t->content == "%"))
{
binary_expression* e = new binary_expression;
e->op = t->content;
e->left = op1;
+ e->tok = t;
next ();
- e->right = parse_expression ();
- return e;
+ e->right = parse_unary ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -705,6 +855,7 @@ parser::parse_unary ()
{
unary_expression* e = new unary_expression;
e->op = t->content;
+ e->tok = t;
next ();
e->operand = parse_expression ();
return e;
@@ -720,18 +871,20 @@ parser::parse_exponentiation ()
expression* op1 = parse_crement ();
const token* t = peek ();
+ // right associative: no loop
if (t && t->type == tok_operator
&& (t->content == "^" || t->content == "**"))
{
exponentiation* e = new exponentiation;
e->op = t->content;
e->left = op1;
+ e->tok = t;
next ();
e->right = parse_expression ();
- return e;
+ op1 = e;
}
- else
- return op1;
+
+ return op1;
}
@@ -744,6 +897,7 @@ parser::parse_crement () // as in "increment" / "decrement"
{
pre_crement* e = new pre_crement;
e->op = t->content;
+ e->tok = t;
next ();
e->operand = parse_value ();
return e;
@@ -758,6 +912,7 @@ parser::parse_crement () // as in "increment" / "decrement"
{
post_crement* e = new post_crement;
e->op = t->content;
+ e->tok = t;
next ();
e->operand = op1;
return e;
@@ -796,14 +951,16 @@ parser::parse_symbol () // var, var[index], func(parms)
const token* t = next ();
if (t->type != tok_identifier)
throw parse_error ("expected identifier");
+ const token* t2 = t;
string name = t->content;
-
+
t = peek ();
if (t && t->type == tok_operator && t->content == "[") // array
{
next ();
struct arrayindex* ai = new arrayindex;
- ai->name = name;
+ ai->tok = t2;
+ ai->base = name;
while (1)
{
ai->indexes.push_back (parse_expression ());
@@ -821,7 +978,8 @@ parser::parse_symbol () // var, var[index], func(parms)
{
next ();
struct functioncall* f = new functioncall;
- f->name = name;
+ f->tok = t2;
+ f->function = name;
while (1)
{
f->args.push_back (parse_expression ());
@@ -837,8 +995,22 @@ parser::parse_symbol () // var, var[index], func(parms)
}
else
{
- symbol *s = new symbol;
- s->name = name;
- return s;
+ symbol* sym = new symbol;
+ sym->name = name;
+ sym->tok = t2;
+ return sym;
}
}
+
+
+symbol*
+parser::parse_symbol_plain () // var only
+{
+ symbol *s = new symbol;
+ const token* t = next ();
+ if (t->type != tok_identifier)
+ throw parse_error ("expected identifier");
+ s->name = t->content;
+ s->tok = t;
+ return s;
+}
diff --git a/parse.h b/parse.h
index 21178a3f..ddfb19c1 100644
--- a/parse.h
+++ b/parse.h
@@ -29,6 +29,8 @@ struct token
std::string content;
};
+std::ostream& operator << (std::ostream& o, const token& t);
+
struct parse_error: public std::runtime_error
{
@@ -80,10 +82,13 @@ private: // nonterminals
probe* parse_probe ();
probe_point_spec* parse_probe_point_spec ();
literal* parse_literal ();
- symbol* parse_global ();
+ void parse_global (vector<vardecl*>&);
+ functiondecl* parse_functiondecl ();
block* parse_stmt_block ();
statement* parse_statement ();
if_statement* parse_if_statement ();
+ return_statement* parse_return_statement ();
+ delete_statement* parse_delete_statement ();
expression* parse_expression ();
expression* parse_assignment ();
expression* parse_ternary ();
@@ -99,4 +104,5 @@ private: // nonterminals
expression* parse_crement ();
expression* parse_value ();
expression* parse_symbol ();
+ symbol* parse_symbol_plain ();
};
diff --git a/parsetest.cxx b/parsetest.cxx
new file mode 100644
index 00000000..ab1d53c8
--- /dev/null
+++ b/parsetest.cxx
@@ -0,0 +1,40 @@
+// toy driver
+// Copyright 2005 Red Hat Inc.
+// GPL
+
+
+#include "staptree.h"
+#include "parse.h"
+#include <iostream>
+
+
+int main (int argc, char *argv [])
+{
+ int rc = 0;
+
+ if (argc > 1)
+ {
+ // quietly parse all listed input files
+ for (int i = 1; i < argc; i ++)
+ {
+ parser p (argv[i]);
+ stapfile* f = p.parse ();
+ if (f)
+ cout << "file '" << argv[i] << "' parsed ok." << endl;
+ else
+ rc = 1;
+ }
+ }
+ else
+ {
+ // parse then print just stdin
+ parser p (cin);
+ stapfile* f = p.parse ();
+ if (f)
+ f->print (cout);
+ else
+ rc = 1;
+ }
+
+ return rc;
+}
diff --git a/semtest.cxx b/semtest.cxx
new file mode 100644
index 00000000..f86d17ca
--- /dev/null
+++ b/semtest.cxx
@@ -0,0 +1,185 @@
+// semantic analysis pass, beginnings of elaboration
+// Copyright 2005 Red Hat Inc.
+// GPL
+
+#include "staptree.h"
+#include "parse.h"
+#include <iostream>
+
+
+int
+semantic_pass_1 (vector<stapfile*>& files)
+{
+ int rc = 0;
+
+ // link up symbols to their declarations
+ for (unsigned i=0; i<files.size(); i++)
+ {
+ stapfile* f = files[i];
+
+ // ... on functions
+ for (unsigned j=0; j<f->functions.size(); j++)
+ {
+ functiondecl* fn = f->functions[j];
+ symresolution_info ri (fn->locals, f->globals, files, f, fn);
+
+ fn->body->resolve_symbols (ri);
+ if (ri.num_unresolved)
+ rc ++;
+ }
+
+ // ... and on probes
+ for (unsigned j=0; j<f->probes.size(); j++)
+ {
+ probe* pn = f->probes[j];
+ symresolution_info ri (pn->locals, f->globals, files, f);
+
+ pn->body->resolve_symbols (ri);
+ if (ri.num_unresolved)
+ rc ++;
+ }
+ }
+
+ return rc;
+}
+
+
+int
+semantic_pass_2 (vector<stapfile*>& files)
+{
+ int rc = 0;
+
+ // next pass: type inference
+ unsigned iterations = 0;
+ typeresolution_info ti;
+
+ ti.assert_resolvability = false;
+ while (1)
+ {
+ iterations ++;
+ // cerr << "Type resolution, iteration " << iterations << endl;
+ ti.num_newly_resolved = 0;
+ ti.num_still_unresolved = 0;
+
+ for (unsigned i=0; i<files.size(); i++)
+ {
+ stapfile* f = files[i];
+
+ for (unsigned j=0; j<f->functions.size(); j++)
+ {
+ functiondecl* fn = f->functions[j];
+ ti.current_function = fn;
+ fn->body->resolve_types (ti);
+ if (fn->type == pe_unknown)
+ ti.unresolved (fn->tok);
+ }
+
+ for (unsigned j=0; j<f->probes.size(); j++)
+ {
+ probe* pn = f->probes[j];
+ ti.current_function = 0;
+ pn->body->resolve_types (ti);
+ }
+
+ for (unsigned j=0; j<f->globals.size(); j++)
+ {
+ vardecl* gd = f->globals[j];
+ if (gd->type == pe_unknown)
+ ti.unresolved (gd->tok);
+ }
+ }
+
+ if (ti.num_newly_resolved == 0) // converged
+ if (ti.num_still_unresolved == 0)
+ break; // successfully
+ else if (! ti.assert_resolvability)
+ ti.assert_resolvability = true; // last pass, with error msgs
+ else
+ { // unsuccessful conclusion
+ rc ++;
+ break;
+ }
+ }
+
+ return rc;
+}
+
+
+int
+main (int argc, char *argv [])
+{
+ int rc = 0;
+
+ vector<stapfile*> files;
+ if (argc == 1)
+ {
+ parser p (cin);
+ stapfile* f = p.parse ();
+ if (f)
+ files.push_back (f);
+ else
+ rc ++;
+ }
+ else for (int i = 1; i < argc; i ++)
+ {
+ parser p (argv[i]);
+ stapfile* f = p.parse ();
+ if (f)
+ files.push_back (f);
+ else
+ rc ++;
+ }
+
+ rc += semantic_pass_1 (files);
+ rc += semantic_pass_2 (files);
+
+ if (argc == 1) // processed stdin only
+ {
+ for (unsigned i=0; i<files.size(); i++)
+ {
+ stapfile* f = files[i];
+ for (unsigned j=0; j<f->functions.size(); j++)
+ {
+ functiondecl* fn = f->functions[j];
+ cerr << "Function ";
+ fn->printsig (cerr);
+ cerr << endl << "locals:" << endl;
+ for (unsigned k=0; k<fn->locals.size(); k++)
+ {
+ vardecl* fa = fn->locals[k];
+ cerr << "\t";
+ fa->printsig (cerr);
+ cerr << endl;
+ }
+ cerr << endl;
+ }
+
+ for (unsigned j=0; j<f->probes.size(); j++)
+ {
+ probe* pn = f->probes[j];
+ cerr << "Probe " << *pn->tok << endl; // XXX: print probespec
+ cerr << "locals:" << endl;
+ for (unsigned k=0; k<pn->locals.size(); k++)
+ {
+ vardecl* fa = pn->locals[k];
+ cerr << "\t";
+ fa->printsig (cerr);
+ cerr << endl;
+ }
+ cerr << endl;
+ }
+
+ cerr << "globals:" << endl;
+ for (unsigned k=0; k<f->globals.size(); k++)
+ {
+ vardecl* fa = f->globals[k];
+ cerr << "\t";
+ fa->printsig (cerr);
+ cerr << endl;
+ }
+ cerr << endl;
+ }
+ }
+
+ return rc;
+}
diff --git a/staptree.cxx b/staptree.cxx
index 88067765..34a1d0d3 100644
--- a/staptree.cxx
+++ b/staptree.cxx
@@ -1,44 +1,903 @@
-// toy driver
+// parse tree functions
// Copyright 2005 Red Hat Inc.
// GPL
#include "staptree.h"
#include "parse.h"
#include <iostream>
+#include <typeinfo>
+#include <cassert>
+expression::expression ():
+ type (pe_unknown), tok (0)
+{
+}
+
+
+expression::~expression ()
+{
+}
+
+
+statement::statement ():
+ tok (0)
+{
+}
+
+
+statement::~statement ()
+{
+}
+
+
+symbol::symbol ():
+ referent (0)
+{
+}
+
+
+arrayindex::arrayindex ():
+ referent (0)
+{
+}
+
+
+functioncall::functioncall ():
+ referent (0)
+{
+}
+
+
+symboldecl::symboldecl ():
+ tok (0),
+ type (pe_unknown)
+{
+}
+
+
+symboldecl::~symboldecl ()
+{
+}
+
+
+vardecl::vardecl ()
+{
+}
+
+
+vardecl::vardecl (unsigned arity)
+{
+ index_types.resize (arity);
+ for (unsigned i=0; i<arity; i++)
+ index_types[i] = pe_unknown;
+}
+
+
+functiondecl::functiondecl ():
+ body (0)
+{
+}
+
+
+literal_number::literal_number (long v)
+{
+ value = v;
+ type = pe_long;
+}
+
-expression::~expression () {}
-statement::~statement () {}
+literal_string::literal_string (const string& v)
+{
+ value = v;
+ type = pe_string;
+}
+
+
+ostream&
+operator << (ostream& o, const exp_type& e)
+{
+ switch (e)
+ {
+ case pe_unknown: o << "unknown"; break;
+ case pe_long: o << "long"; break;
+ case pe_string: o << "string"; break;
+ case pe_stats: o << "stats"; break;
+ default: o << "???"; break;
+ }
+ return o;
+}
+
+
+// ------------------------------------------------------------------------
+// parse tree printing
+
+ostream& operator << (ostream& o, expression& k)
+{
+ k.print (o);
+ return o;
+}
+
+
+void literal_string::print (ostream& o)
+{
+ o << '"' << value << '"';
+}
+
+void literal_number::print (ostream& o)
+{
+ o << value;
+}
+
+
+void binary_expression::print (ostream& o)
+{
+ o << '(' << *left << ")"
+ << op
+ << '(' << *right << ")";
+}
-int main (int argc, char *argv [])
+void unary_expression::print (ostream& o)
{
- int rc = 0;
+ o << op << '(' << *operand << ")";
+}
+
+
+void post_crement::print (ostream& o)
+{
+ o << '(' << *operand << ")" << op;
+}
+
+
+void ternary_expression::print (ostream& o)
+{
+ o << "(" << *cond << ") ? ("
+ << *truevalue << ") : ("
+ << *falsevalue << ")";
+}
+
+
+void symbol::print (ostream& o)
+{
+ o << name;
+}
+
+
+void vardecl::print (ostream& o)
+{
+ o << name;
+ if (index_types.size() > 0)
+ o << "[...]";
+}
+
- if (argc > 1)
+void vardecl::printsig (ostream& o)
+{
+ o << name << ":" << type;
+ if (index_types.size() > 0)
{
- // quietly parse all listed input files
- for (int i = 1; i < argc; i ++)
+ o << " [";
+ for (unsigned i=0; i<index_types.size(); i++)
+ o << (i>0 ? ", " : "") << index_types[i];
+ o << "]";
+ }
+}
+
+
+void functiondecl::print (ostream& o)
+{
+ o << "function " << name << " (";
+ for (unsigned i=0; i<formal_args.size(); i++)
+ o << (i>0 ? ", " : "") << *formal_args[i];
+ o << ")" << endl;
+ body->print(o);
+}
+
+
+void functiondecl::printsig (ostream& o)
+{
+ o << name << ":" << type << " (";
+ for (unsigned i=0; i<formal_args.size(); i++)
+ o << (i>0 ? ", " : "")
+ << *formal_args[i]
+ << ":"
+ << formal_args[i]->type;
+ o << ")";
+}
+
+
+void arrayindex::print (ostream& o)
+{
+ o << base << "[";
+ for (unsigned i=0; i<indexes.size(); i++)
+ o << (i>0 ? ", " : "") << *indexes[i];
+ o << "]";
+}
+
+
+void functioncall::print (ostream& o)
+{
+ o << function << "(";
+ for (unsigned i=0; i<args.size(); i++)
+ o << (i>0 ? ", " : "") << *args[i];
+ o << ")";
+}
+
+
+ostream& operator << (ostream& o, statement& k)
+{
+ k.print (o);
+ return o;
+}
+
+
+void block::print (ostream& o)
+{
+ o << "{" << endl;
+ for (unsigned i=0; i<statements.size(); i++)
+ o << *statements [i] << ";" << endl;
+ o << "}" << endl;
+}
+
+
+void for_loop::print (ostream& o)
+{
+ o << "<for_loop>" << endl;
+}
+
+
+void null_statement::print (ostream& o)
+{
+ o << ";";
+}
+
+
+void expr_statement::print (ostream& o)
+{
+ o << *value;
+}
+
+
+void return_statement::print (ostream& o)
+{
+ o << "return " << *value;
+}
+
+
+void delete_statement::print (ostream& o)
+{
+ o << "delete " << *value;
+}
+
+
+void if_statement::print (ostream& o)
+{
+ o << "if (" << *condition << ") " << endl
+ << *thenblock << endl;
+ if (elseblock)
+ o << "else " << *elseblock << endl;
+}
+
+
+void stapfile::print (ostream& o)
+{
+ o << "# file " << name << endl;
+
+ for(unsigned i=0; i<probes.size(); i++)
+ {
+ probes[i]->print (o);
+ o << endl;
+ }
+
+ for (unsigned j = 0; j < functions.size(); j++)
+ {
+ functions[j]->print (o);
+ o << endl;
+ }
+}
+
+
+void probe::print (ostream& o)
+{
+ o << "probe ";
+ for (unsigned i=0; i<location.size(); i++)
+ {
+ o << (i>0 ? ":" : "");
+ location[i]->print (o);
+ }
+ o << endl;
+ o << *body;
+}
+
+
+void probe_point_spec::print (ostream& o)
+{
+ o << functor;
+ if (arg)
+ o << "(" << *arg << ")";
+}
+
+
+ostream& operator << (ostream& o, symboldecl& k)
+{
+ k.print (o);
+ return o;
+}
+
+
+// ------------------------------------------------------------------------
+// semantic processing: symbol resolution
+
+
+symresolution_info::symresolution_info (vector<vardecl*>& l,
+ vector<vardecl*>& g,
+ vector<stapfile*>& f,
+ stapfile* tf):
+ locals (l), globals (g), files (f), current_file (tf), current_function (0)
+{
+ num_unresolved = 0;
+}
+
+
+symresolution_info::symresolution_info (vector<vardecl*>& l,
+ vector<vardecl*>& g,
+ vector<stapfile*>& f,
+ stapfile* tf,
+ functiondecl* cf):
+ locals (l), globals (g), files (f), current_file (tf), current_function (cf)
+{
+ num_unresolved = 0;
+}
+
+
+void
+literal::resolve_symbols (symresolution_info& r)
+{
+}
+
+
+void
+binary_expression::resolve_symbols (symresolution_info& r)
+{
+ left->resolve_symbols (r);
+ right->resolve_symbols (r);
+}
+
+
+void
+unary_expression::resolve_symbols (symresolution_info& r)
+{
+ operand->resolve_symbols (r);
+}
+
+
+void
+ternary_expression::resolve_symbols (symresolution_info& r)
+{
+ cond->resolve_symbols (r);
+ truevalue->resolve_symbols (r);
+ falsevalue->resolve_symbols (r);
+}
+
+
+void
+symbol::resolve_symbols (symresolution_info& r)
+{
+ if (referent)
+ return;
+
+ vardecl* d = r.find (name);
+ if (d)
+ referent = d;
+ else
+ {
+ // new local
+ vardecl* v = new vardecl;
+ v->name = name;
+ v->tok = tok;
+ r.locals.push_back (v);
+ referent = v;
+ // XXX: check for conflicting function name
+ }
+}
+
+
+void
+arrayindex::resolve_symbols (symresolution_info& r)
+{
+ for (unsigned i=0; i<indexes.size(); i++)
+ indexes[i]->resolve_symbols (r);
+
+ if (referent)
+ return;
+
+ vardecl* d = r.find (base);
+ if (d)
+ referent = d;
+ else
+ {
+ // new local
+ vardecl* v = new vardecl (indexes.size());
+ v->name = base;
+ v->tok = tok;
+ r.locals.push_back (v);
+ referent = v;
+ // XXX: check for conflicting function name
+ }
+}
+
+
+void
+functioncall::resolve_symbols (symresolution_info& r)
+{
+ for (unsigned i=0; i<args.size(); i++)
+ args[i]->resolve_symbols (r);
+
+ if (referent)
+ return;
+
+ // find global functiondecl
+ functiondecl* d = 0;
+ for (unsigned j = 0; j < r.current_file->functions.size(); j++)
+ {
+ functiondecl* fd = r.current_file->functions[j];
+ if (fd->name == function)
{
- parser p (argv[i]);
- stapfile* f = p.parse ();
- if (f)
- cout << "file '" << argv[i] << "' parsed ok." << endl;
- else
- rc = 1;
+ d = fd;
+ break;
}
}
+ // XXX: check for conflicting variable name
+
+ if (d)
+ referent = d;
+ else
+ r.unresolved (tok);
+}
+
+
+void
+block::resolve_symbols (symresolution_info& r)
+{
+ for (unsigned i=0; i<statements.size(); i++)
+ statements[i]->resolve_symbols (r);
+}
+
+
+void
+if_statement::resolve_symbols (symresolution_info& r)
+{
+ condition->resolve_symbols (r);
+ thenblock->resolve_symbols (r);
+ elseblock->resolve_symbols (r);
+}
+
+
+void
+for_loop::resolve_symbols (symresolution_info& r)
+{
+ init->resolve_symbols (r);
+ cond->resolve_symbols (r);
+ incr->resolve_symbols (r);
+ block->resolve_symbols (r);
+}
+
+
+void
+expr_statement::resolve_symbols (symresolution_info& r)
+{
+ value->resolve_symbols (r);
+}
+
+
+vardecl*
+symresolution_info::find (const string& name)
+{
+ // search locals
+ for (unsigned i=0; i<locals.size(); i++)
+ if (locals[i]->name == name)
+ return locals[i];
+
+ // search function formal parameters (if any)
+ if (current_function)
+ {
+ for (unsigned i=0; i<current_function->formal_args.size(); i++)
+ if (current_function->formal_args [i]->name == name)
+ return current_function->formal_args [i];
+ }
+
+ // search globals
+ for (unsigned i=0; i<globals.size(); i++)
+ if (globals[i]->name == name)
+ return globals[i];
+
+ return 0;
+}
+
+
+void
+symresolution_info::unresolved (const token* tok)
+{
+ num_unresolved ++;
+
+ cerr << "error: unresolved symbol for ";
+ if (tok)
+ cerr << *tok;
+ else
+ cerr << "a token";
+ cerr << endl;
+}
+
+
+// ------------------------------------------------------------------------
+// semantic processing: type resolution
+
+
+void
+literal::resolve_types (typeresolution_info& r, exp_type t)
+{
+ assert (type == pe_long || type == pe_string);
+ if ((t == type) || (t == pe_unknown))
+ return;
+
+ r.mismatch (tok, type, t);
+}
+
+
+void
+binary_expression::resolve_types (typeresolution_info& r, exp_type t)
+{
+ if (op == "<<")
+ {
+ left->resolve_types (r, pe_stats);
+ right->resolve_types (r, pe_long);
+ if (t == pe_long || t == pe_string)
+ r.mismatch (tok, t, pe_stats);
+ else if (type == pe_unknown)
+ {
+ type = pe_stats;
+ r.resolved (tok, type);
+ }
+ }
+ else if (op == ".") // string concatenation
+ {
+ left->resolve_types (r, pe_string);
+ right->resolve_types (r, pe_string);
+ if (t == pe_long || t == pe_stats)
+ r.mismatch (tok, t, pe_string);
+ else if (type == pe_unknown)
+ {
+ type = pe_string;
+ r.resolved (tok, type);
+ }
+ }
+ else if (op == "==") // XXX: other comparison operators
+ {
+ left->resolve_types (r, pe_unknown);
+ right->resolve_types (r, pe_unknown);
+ if (t == pe_string || t == pe_stats)
+ r.mismatch (tok, t, pe_long);
+ else if (type == pe_unknown)
+ {
+ type = pe_long;
+ r.resolved (tok, type);
+ }
+ }
+ else // general arithmetic operators?
+ {
+ // propagate type downward
+ exp_type subtype = t;
+ if ((t == pe_unknown) && (type != pe_unknown))
+ subtype = type;
+ left->resolve_types (r, subtype);
+ right->resolve_types (r, subtype);
+
+ if ((t == pe_unknown) && (type != pe_unknown))
+ ; // already resolved
+ else if ((t != pe_unknown) && (type == pe_unknown))
+ {
+ type = t;
+ r.resolved (tok, type);
+ }
+ else if ((t == pe_unknown) && (left->type != pe_unknown))
+ {
+ type = left->type;
+ r.resolved (tok, type);
+ }
+ else if ((t == pe_unknown) && (right->type != pe_unknown))
+ {
+ type = right->type;
+ r.resolved (tok, type);
+ }
+ else if (type != t)
+ r.mismatch (tok, t, type);
+ }
+}
+
+
+void
+unary_expression::resolve_types (typeresolution_info& r, exp_type t)
+{
+ // all unary operators only work on numerics
+
+ operand->resolve_types (r, pe_long);
+
+ if (t == pe_unknown && type != pe_unknown)
+ ; // already resolved
+ else if (t == pe_string || t == pe_stats)
+ r.mismatch (tok, t, pe_long);
+ else if (type == pe_unknown)
+ {
+ type = pe_long;
+ r.resolved (tok, type);
+ }
+}
+
+
+void
+ternary_expression::resolve_types (typeresolution_info& r, exp_type t)
+{
+ cond->resolve_types (r, pe_long);
+ truevalue->resolve_types (r, t);
+ falsevalue->resolve_types (r, t);
+}
+
+
+template <class Referrer, class Referent>
+void resolve_2types (Referrer* referrer, Referent* referent,
+ typeresolution_info& r, exp_type t)
+{
+ exp_type& rtype = referrer->type;
+ const token* rtok = referrer->tok;
+ exp_type& ttype = referent->type;
+ const token* ttok = referent->tok;
+
+ if (t != pe_unknown && rtype == t && rtype == ttype)
+ ; // do nothing: all three types in agreement
+ else if (t == pe_unknown && rtype != pe_unknown && rtype == ttype)
+ ; // do nothing: two known types in agreement
+ else if (rtype != pe_unknown && ttype != pe_unknown && rtype != ttype)
+ r.mismatch (rtok, rtype, ttype);
+ else if (rtype != pe_unknown && t != pe_unknown && rtype != t)
+ r.mismatch (rtok, rtype, t);
+ else if (ttype != pe_unknown && t != pe_unknown && ttype != t)
+ r.mismatch (ttok, ttype, t);
+ else if (rtype == pe_unknown && t != pe_unknown)
+ {
+ // propagate from upstream
+ rtype = t;
+ r.resolved (rtok, rtype);
+ // catch rtype/ttype mismatch later
+ }
+ else if (rtype == pe_unknown && ttype != pe_unknown)
+ {
+ // propagate from referent
+ rtype = ttype;
+ r.resolved (rtok, rtype);
+ // catch rtype/t mismatch later
+ }
+ else if (rtype != pe_unknown && ttype == pe_unknown)
+ {
+ // propagate to referent
+ ttype = rtype;
+ r.resolved (ttok, ttype);
+ // catch rtype/t mismatch later
+ }
+ else
+ r.unresolved (rtok);
+}
+
+
+void
+symbol::resolve_types (typeresolution_info& r, exp_type t)
+{
+ assert (referent != 0);
+
+ if (referent->index_types.size() > 0)
+ r.unresolved (tok); // array
else
+ resolve_2types (this, referent, r, t);
+}
+
+
+void
+arrayindex::resolve_types (typeresolution_info& r, exp_type t)
+{
+ assert (referent != 0);
+
+ resolve_2types (this, referent, r, t);
+
+ // now resolve the array indexes
+ if (referent->index_types.size() == 0)
+ {
+ // designate referent as array
+ referent->index_types.resize (indexes.size());
+ for (unsigned i=0; i<indexes.size(); i++)
+ referent->index_types[i] = pe_unknown;
+ // NB: we "fall through" to for loop
+ }
+
+ if (indexes.size() != referent->index_types.size())
+ r.unresolved (tok);
+ else for (unsigned i=0; i<indexes.size(); i++)
+ {
+ expression* e = indexes[i];
+ e->resolve_types (r, referent->index_types[i]);
+ exp_type it = e->type;
+ referent->index_types[i] = it;
+
+ if (it == pe_string || it == pe_long)
+ ; // do nothing
+ else if (it == pe_stats)
+ r.invalid (e->tok, it);
+ else // pe_unknown
+ r.unresolved (e->tok);
+ }
+}
+
+
+void
+functioncall::resolve_types (typeresolution_info& r, exp_type t)
+{
+ assert (referent != 0);
+
+ resolve_2types (this, referent, r, t);
+
+ if (type == pe_stats)
+ r.mismatch (tok, pe_unknown, type);
+
+ // XXX: but what about functions that return no value,
+ // and are used only as an expression-statement for side effects?
+
+ // now resolve the function parameters
+ if (args.size() != referent->formal_args.size())
+ r.unresolved (tok);
+ for (unsigned i=0; i<args.size(); i++)
{
- // parse then print just stdin
- parser p (cin);
- stapfile* f = p.parse ();
- if (f)
- f->print (cout);
+ expression* e = args[i];
+ exp_type& ft = referent->formal_args[i]->type;
+ const token* ftok = referent->formal_args[i]->tok;
+ e->resolve_types (r, ft);
+ exp_type at = e->type;
+
+ if (((at == pe_string) || (at == pe_long)) && ft == pe_unknown)
+ {
+ // propagate to formal arg
+ ft = at;
+ r.resolved (referent->formal_args[i]->tok, ft);
+ }
+ if (at == pe_stats)
+ r.invalid (e->tok, at);
+ if (ft == pe_stats)
+ r.invalid (ftok, ft);
+ if (at != pe_unknown && ft != pe_unknown && ft != at)
+ r.mismatch (e->tok, at, ft);
+ if (at == pe_unknown)
+ r.unresolved (e->tok);
+ }
+}
+
+
+void
+block::resolve_types (typeresolution_info& r)
+{
+ for (unsigned i=0; i<statements.size(); i++)
+ statements[i]->resolve_types (r);
+}
+
+
+void
+if_statement::resolve_types (typeresolution_info& r)
+{
+ condition->resolve_types (r, pe_long);
+ thenblock->resolve_types (r);
+ elseblock->resolve_types (r);
+}
+
+
+void
+for_loop::resolve_types (typeresolution_info& r)
+{
+ init->resolve_types (r, pe_unknown);
+ cond->resolve_types (r, pe_long);
+ incr->resolve_types (r, pe_unknown);
+ block->resolve_types (r);
+}
+
+
+void
+expr_statement::resolve_types (typeresolution_info& r)
+{
+ value->resolve_types (r, pe_unknown);
+}
+
+
+void
+return_statement::resolve_types (typeresolution_info& r)
+{
+ // This is like symbol::resolve_types, where the referent is
+ // the return value of the function.
+
+ // XXX: need control flow semantic checking; until then:
+ if (r.current_function == 0)
+ {
+ r.unresolved (tok);
+ return;
+ }
+
+ exp_type& type = r.current_function->type;
+ value->resolve_types (r, type);
+
+ if (type != pe_unknown && value->type != pe_unknown
+ && type != value->type)
+ r.mismatch (r.current_function->tok, type, value->type);
+ if (type == pe_unknown &&
+ (value->type == pe_long || value->type == pe_string))
+ {
+ // propagate non-statistics from value
+ type = value->type;
+ r.resolved (r.current_function->tok, value->type);
+ }
+ if (value->type == pe_stats)
+ r.invalid (value->tok, value->type);
+}
+
+
+void
+typeresolution_info::unresolved (const token* tok)
+{
+ num_still_unresolved ++;
+
+ if (assert_resolvability)
+ {
+ cerr << "error: unresolved type for ";
+ if (tok)
+ cerr << *tok;
else
- rc = 1;
+ cerr << "a token";
+ cerr << endl;
}
+}
+
- return rc;
+void
+typeresolution_info::invalid (const token* tok, exp_type pe)
+{
+ num_still_unresolved ++;
+
+ if (assert_resolvability)
+ {
+ cerr << "error: invalid type " << pe << " for ";
+ if (tok)
+ cerr << *tok;
+ else
+ cerr << "a token";
+ cerr << endl;
+ }
+}
+
+
+void
+typeresolution_info::mismatch (const token* tok, exp_type t1, exp_type t2)
+{
+ num_still_unresolved ++;
+
+ if (assert_resolvability)
+ {
+ cerr << "error: type mismatch for ";
+ if (tok)
+ cerr << *tok;
+ else
+ cerr << "a token";
+ cerr << ": " << t1 << " vs. " << t2 << endl;
+ }
+}
+
+
+void
+typeresolution_info::resolved (const token* tok, exp_type t)
+{
+ num_newly_resolved ++;
+ // cerr << "resolved " << *tok << " type " << t << endl;
}
diff --git a/staptree.h b/staptree.h
index f7eff830..80bffa7e 100644
--- a/staptree.h
+++ b/staptree.h
@@ -10,46 +10,53 @@
using namespace std;
-struct source_location
-{
- // source co-ordinates
- string lexeme;
- string source_file;
- unsigned source_line;
-};
+enum exp_type
+ {
+ pe_unknown,
+ pe_long,
+ pe_string,
+ pe_stats
+ };
+ostream& operator << (ostream& o, const exp_type& e);
+struct token;
+struct symresolution_info;
+struct typeresolution_info;
struct expression
{
- enum { pe_void, pe_unknown, pe_long, pe_string } type;
- source_location loc;
+ exp_type type;
+ const token* tok;
virtual void print (ostream& o) = 0;
+ expression ();
virtual ~expression ();
+ virtual void resolve_symbols (symresolution_info& r) = 0;
+ virtual void resolve_types (typeresolution_info& r, exp_type t) = 0;
};
+ostream& operator << (ostream& o, expression& k);
-inline ostream& operator << (ostream& o, expression& k)
-{
- k.print (o);
- return o;
-}
struct literal: public expression
{
+ void resolve_symbols (symresolution_info& r);
+ void resolve_types (typeresolution_info& r, exp_type t);
};
+
struct literal_string: public literal
{
string value;
- literal_string (const string& v): value (v) {}
- void print (ostream& o) { o << '"' << value << '"'; }
+ literal_string (const string& v);
+ void print (ostream& o);
};
+
struct literal_number: public literal
{
long value;
- literal_number (long v): value(v) {}
- void print (ostream& o) { o << value; }
+ literal_number (long v);
+ void print (ostream& o);
};
@@ -58,49 +65,58 @@ struct binary_expression: public expression
expression* left;
string op;
expression* right;
- void print (ostream& o) { o << '(' << *left << ")"
- << op
- << '(' << *right << ")"; }
+ void print (ostream& o);
+ void resolve_symbols (symresolution_info& r);
+ void resolve_types (typeresolution_info& r, exp_type t);
};
+
struct unary_expression: public expression
{
string op;
expression* operand;
- void print (ostream& o) { o << op << '(' << *operand << ")"; }
+ void print (ostream& o);
+ void resolve_symbols (symresolution_info& r);
+ void resolve_types (typeresolution_info& r, exp_type t);
};
+
struct pre_crement: public unary_expression
{
};
+
struct post_crement: public unary_expression
{
- void print (ostream& o) { o << '(' << *operand << ")" << op; }
-
-
+ void print (ostream& o);
};
+
struct logical_or_expr: public binary_expression
{
};
+
struct logical_and_expr: public binary_expression
{
};
+
struct array_in: public binary_expression
{
};
+
struct comparison: public binary_expression
{
};
+
struct concatenation: public binary_expression
{
};
+
struct exponentiation: public binary_expression
{
};
@@ -111,70 +127,160 @@ struct ternary_expression: public expression
expression* cond;
expression* truevalue;
expression* falsevalue;
- void print (ostream& o) { o << "(" << *cond << ") ? ("
- << *truevalue << ") : ("
- << *falsevalue << ")"; }
+ void print (ostream& o);
+ void resolve_symbols (symresolution_info& r);
+ void resolve_types (typeresolution_info& r, exp_type t);
+};
+
+
+struct assignment: public binary_expression
+{
};
+class vardecl;
struct symbol: public expression
{
string name;
- void print (ostream& o) { o << name; }
+ vardecl *referent;
+ symbol ();
+ void print (ostream& o);
+ void resolve_symbols (symresolution_info& r);
+ void resolve_types (typeresolution_info& r, exp_type t);
};
-struct arrayindex: public symbol
+
+struct arrayindex: public expression
{
+ string base;
vector<expression*> indexes;
- void print (ostream& o)
- {
- symbol::print(o);
- o << "[";
- for (unsigned i=0; i<indexes.size(); i++)
- o << (i>0 ? ", " : "") << *indexes[i];
- o << "]";
- }
+ vardecl *referent;
+ arrayindex ();
+ void print (ostream& o);
+ void resolve_symbols (symresolution_info& r);
+ void resolve_types (typeresolution_info& r, exp_type t);
};
-struct functioncall: public symbol
+
+
+class functiondecl;
+struct functioncall: public expression
{
+ string function;
vector<expression*> args;
- void print (ostream& o)
- {
- symbol::print(o);
- o << "(";
- for (unsigned i=0; i<args.size(); i++)
- o << (i>0 ? ", " : "") << *args[i];
- o << ")";
- }
+ functiondecl *referent;
+ functioncall ();
+ void print (ostream& o);
+ void resolve_symbols (symresolution_info& r);
+ void resolve_types (typeresolution_info& r, exp_type t);
};
+// ------------------------------------------------------------------------
+
+
+struct stapfile;
+struct symboldecl;
+struct symresolution_info
+{
+ vector<vardecl*>& locals; // includes incoming function parameters
+ vector<vardecl*>& globals;
+ vector<stapfile*>& files;
+ stapfile* current_file;
+ functiondecl* current_function;
+
+ symresolution_info (vector<vardecl*>& l,
+ vector<vardecl*>& g,
+ vector<stapfile*>& f,
+ stapfile* cfil,
+ functiondecl* cfun);
+ symresolution_info (vector<vardecl*>& l,
+ vector<vardecl*>& g,
+ vector<stapfile*>& f,
+ stapfile* cfil);
+
+ vardecl* find (const string& name);
+
+ void unresolved (const token* tok);
+ unsigned num_unresolved;
+};
+
+
+struct typeresolution_info
+{
+ unsigned num_newly_resolved;
+ unsigned num_still_unresolved;
+ bool assert_resolvability;
+ functiondecl* current_function;
+
+ void mismatch (const token* tok, exp_type t1,
+ exp_type t2);
+ void unresolved (const token* tok);
+ void resolved (const token* tok, exp_type t);
+ void invalid (const token* tok, exp_type t);
+};
+
+
+struct symboldecl // unique object per (possibly implicit)
+ // symbol declaration
+{
+ const token* tok;
+ string name;
+ exp_type type;
+ symboldecl ();
+ virtual ~symboldecl ();
+ virtual void print (ostream &o) = 0;
+ virtual void printsig (ostream &o) = 0;
+};
+
+
+ostream& operator << (ostream& o, symboldecl& k);
+
+
+struct vardecl: public symboldecl
+{
+ void print (ostream& o);
+ void printsig (ostream& o);
+ vardecl ();
+ vardecl (unsigned arity);
+ vector<exp_type> index_types; // for arrays only
+};
+
+
+struct block;
+struct functiondecl: public symboldecl
+{
+ vector<vardecl*> formal_args;
+ vector<vardecl*> locals;
+ block* body;
+ functiondecl ();
+ void print (ostream& o);
+ void printsig (ostream& o);
+};
+
+
+// ------------------------------------------------------------------------
+
+
struct statement
{
- source_location loc;
virtual void print (ostream& o) = 0;
+ const token* tok;
+ statement ();
virtual ~statement ();
+ virtual void resolve_symbols (symresolution_info& r) = 0;
+ virtual void resolve_types (typeresolution_info& r) = 0;
};
-
-inline ostream& operator << (ostream& o, statement& k)
-{
- k.print (o);
- return o;
-}
+ostream& operator << (ostream& o, statement& k);
struct block: public statement
{
vector<statement*> statements;
- void print (ostream& o)
- {
- o << "{" << endl;
- for (unsigned i=0; i<statements.size(); i++)
- o << *statements [i] << ";" << endl;
- o << "}" << endl;
- }
+ void print (ostream& o);
+ void resolve_symbols (symresolution_info& r);
+ void resolve_types (typeresolution_info& r);
};
struct for_loop: public statement
@@ -183,54 +289,60 @@ struct for_loop: public statement
expression* cond;
expression* incr;
statement* block;
- void print (ostream& o)
- { o << "<for_loop>" << endl; }
+ void print (ostream& o);
+ void resolve_symbols (symresolution_info& r);
+ void resolve_types (typeresolution_info& r);
};
+
struct null_statement: public statement
{
- void print (ostream& o)
- { o << ";"; }
-
+ void print (ostream& o);
+ void resolve_symbols (symresolution_info& r) {}
+ void resolve_types (typeresolution_info& r) {}
};
-struct assignment: public expression
-{
- expression* lvalue; // XXX: consider type for lvalues; see parse_variable ()
- string op;
- expression* rvalue;
-
- void print (ostream& o)
- { o << *lvalue << " " << op << " " << *rvalue; }
-};
struct expr_statement: public statement
{
expression* value; // executed for side-effects
- void print (ostream& o)
- { o << *value; }
+ void print (ostream& o);
+ void resolve_symbols (symresolution_info& r);
+ void resolve_types (typeresolution_info& r);
};
+
struct if_statement: public statement
{
expression* condition;
statement* thenblock;
statement* elseblock;
- void print (ostream& o)
- { o << "if (" << *condition << ") " << endl
- << *thenblock << endl;
- if (elseblock)
- o << "else " << *elseblock << endl; }
+ void print (ostream& o);
+ void resolve_symbols (symresolution_info& r);
+ void resolve_types (typeresolution_info& r);
+};
+
+
+struct return_statement: public expr_statement
+{
+ void print (ostream& o);
+ void resolve_types (typeresolution_info& r);
+};
+
+
+struct delete_statement: public expr_statement
+{
+ void print (ostream& o);
};
-struct probe;
+struct probe;
struct stapfile
{
string name;
vector<probe*> probes;
- vector<symbol*> globals;
-
+ vector<functiondecl*> functions;
+ vector<vardecl*> globals;
void print (ostream& o);
};
@@ -238,41 +350,17 @@ struct stapfile
struct probe_point_spec // inherit from something or other?
{
string functor;
+ const token* tok;
literal* arg;
-
- void print (ostream& o)
- { o << functor;
- if (arg)
- o << "(" << *arg << ")";
- }
+ void print (ostream& o);
};
struct probe
{
- // map<string,psymbol*> locals;
vector<probe_point_spec*> location;
+ const token* tok;
block* body;
-
- void print (ostream& o)
- { o << "probe " << endl;
- for(unsigned i=0; i<location.size(); i++)
- {
- o << (i>0 ? ":" : "");
- location[i]->print (o);
- }
- o << endl;
- o << *body;
- }
+ vector<vardecl*> locals;
+ void print (ostream& o);
};
-
-
-
-inline void stapfile::print (ostream& o)
-{ o << "# file " << name << endl;
- for(unsigned i=0; i<probes.size(); i++)
- {
- probes[i]->print (o);
- o << endl;
- }
- }
diff --git a/testsuite/parseko/one.stp b/testsuite/parseko/one.stp
index f288e930..149f602e 100755
--- a/testsuite/parseko/one.stp
+++ b/testsuite/parseko/one.stp
@@ -1,2 +1,2 @@
-#! stap
+#! parsetest
"not a probe"
diff --git a/testsuite/parseko/two.stp b/testsuite/parseko/two.stp
index 64a24afe..e17024ff 100755
--- a/testsuite/parseko/two.stp
+++ b/testsuite/parseko/two.stp
@@ -1,4 +1,4 @@
-#! stap
+#! parsetest
probe foo {
a +
}
diff --git a/testsuite/parseok/one.stp b/testsuite/parseok/one.stp
index b3ca32b2..5b69767d 100755
--- a/testsuite/parseok/one.stp
+++ b/testsuite/parseok/one.stp
@@ -1,2 +1,3 @@
-#! stap
+#! parsetest
# test
+function k () { }
diff --git a/testsuite/parseok/two.stp b/testsuite/parseok/two.stp
index 3776633b..6ab3823e 100755
--- a/testsuite/parseok/two.stp
+++ b/testsuite/parseok/two.stp
@@ -1,4 +1,4 @@
-#! stap
+#! parsetest
probe kernel:systemcall("foo")
{
@@ -6,7 +6,12 @@ probe kernel:systemcall("foo")
if (global > 5) { global -- } else ;
}
+function foo () {
+ delete array[4];
+ return 0;
+}
+
probe systemtap:end
{
- function("value", 4+8);
+ foo ("value", 4+8);
}
diff --git a/testsuite/semko/four.stp b/testsuite/semko/four.stp
new file mode 100755
index 00000000..e73cc88d
--- /dev/null
+++ b/testsuite/semko/four.stp
@@ -0,0 +1,12 @@
+#! semtest
+
+global a, b; # types unknown
+
+function bar ()
+{
+ # no return statement
+}
+
+probe foo {
+ a = b;
+}
diff --git a/testsuite/semko/one.stp b/testsuite/semko/one.stp
new file mode 100755
index 00000000..994bb451
--- /dev/null
+++ b/testsuite/semko/one.stp
@@ -0,0 +1,8 @@
+#! semtest
+
+function stamp (syscall)
+{
+ # no return expression => unknown function type
+}
+
+probe kernel:syscall:read { stamp ("read"); }
diff --git a/testsuite/semko/three.stp b/testsuite/semko/three.stp
new file mode 100755
index 00000000..bfdeec66
--- /dev/null
+++ b/testsuite/semko/three.stp
@@ -0,0 +1,6 @@
+#! semtest
+
+probe foo {
+ a << 2;
+ b[a] = 4; # must not index with stats variable
+}
diff --git a/testsuite/semko/two.stp b/testsuite/semko/two.stp
new file mode 100755
index 00000000..39b77f6a
--- /dev/null
+++ b/testsuite/semko/two.stp
@@ -0,0 +1,8 @@
+#! semtest
+
+function zoo (p) { p << 5; return 0 } # passing stats as function arg
+
+probe foo {
+ bar = 2 + "string"; # mixing integer+string arithmetic
+ zoo (car)
+}
diff --git a/testsuite/semok/four.stp b/testsuite/semok/four.stp
new file mode 100755
index 00000000..e11b644a
--- /dev/null
+++ b/testsuite/semok/four.stp
@@ -0,0 +1,23 @@
+#! semtest
+
+# these will ultimately be somehow associated with "providers"
+# and have a syntax of their own
+global kernel_jiffies, kernel_current_comm;
+
+function kernel_netlink(a, b) {
+ # this should be a builtin function
+ return 0
+}
+
+function stamp (syscall)
+{
+ return kernel_netlink (4, kernel_jiffies . " " . kernel_current_comm . " " . syscall)
+}
+
+# probe kernel:syscall:read = kernel:function("sys_read");
+
+
+probe kernel:syscall:read
+{
+ stamp ("read");
+}
diff --git a/testsuite/semok/one.stp b/testsuite/semok/one.stp
new file mode 100755
index 00000000..fb7483e2
--- /dev/null
+++ b/testsuite/semok/one.stp
@@ -0,0 +1,25 @@
+#! semtest
+
+# these will ultimately be somehow associated with "providers"
+# and have a syntax of their own
+global kernel_jiffies, kernel_current_comm;
+
+function kernel_netlink(a, b) {
+ # this should be a builtin function
+ return 0
+}
+
+function stamp (syscall)
+{
+ kernel_netlink (4, kernel_jiffies . " " .
+ kernel_current_comm . " " . syscall);
+ return 0
+}
+
+# probe kernel:syscall:read = kernel:function("sys_read");
+
+
+probe kernel:syscall:read
+{
+ stamp ("read");
+}
diff --git a/testsuite/semok/three.stp b/testsuite/semok/three.stp
new file mode 100755
index 00000000..6ae531a2
--- /dev/null
+++ b/testsuite/semok/three.stp
@@ -0,0 +1,6 @@
+#! semtest
+
+probe foo {
+ a << 2;
+ b[4] << 4;
+}
diff --git a/testsuite/semok/two.stp b/testsuite/semok/two.stp
new file mode 100755
index 00000000..f3c6046e
--- /dev/null
+++ b/testsuite/semok/two.stp
@@ -0,0 +1,13 @@
+#! semtest
+
+global bar, baz;
+
+function koo (p) {
+ baz [p, "p", p] ++;
+ return p + 2;
+}
+
+probe foo {
+ bar = 2 + koo (4);
+ foo = bar + koo;
+}