summaryrefslogtreecommitdiffstats
path: root/parse.cxx
diff options
context:
space:
mode:
authorfche <fche>2005-03-02 01:28:50 +0000
committerfche <fche>2005-03-02 01:28:50 +0000
commit56099f083d7a68722ace316be4d288d21caabaee (patch)
tree3e67ec78134a358c1f90f701c165c4c577d62177 /parse.cxx
parent2f1a1aead38c1dcd329a694dd8d3290b37320466 (diff)
downloadsystemtap-steved-56099f083d7a68722ace316be4d288d21caabaee.tar.gz
systemtap-steved-56099f083d7a68722ace316be4d288d21caabaee.tar.xz
systemtap-steved-56099f083d7a68722ace316be4d288d21caabaee.zip
* some semantic analysis
2005-03-01 Frank Ch. Eigler <fche@redhat.com> * parse.cxx: Implement left-associativity for several types of operators. Add some more statement types. Parse functions. Be able to print tokens. Simplify error generating functions. Save tokens in all parse tree nodes. * parse.h: Corresponding changes. * staptree.cxx: Move tree-printing functions here. Add many new functions for symbol and type resolution. * staptree.h: Corresponding changes. * semtest.cxx: New semantic analysis pass & test driver. * testsuite/sem*/*: New tests. * parsetest.cxx: Separated parse test driver. * testsuite/parse*/*: Adapt tests to parsetest driver. * Makefile.am: Build semtest. Run its tests. * Makefile.in: Regenerated. * parse.cxx, parse.h: New files: parser.
Diffstat (limited to 'parse.cxx')
-rw-r--r--parse.cxx374
1 files changed, 273 insertions, 101 deletions
diff --git a/parse.cxx b/parse.cxx
index e33aee04..4238f37e 100644
--- a/parse.cxx
+++ b/parse.cxx
@@ -29,6 +29,33 @@ parser::~parser()
}
+ostream&
+operator << (ostream& o, const token& t)
+{
+ o << (t.type == tok_junk ? "junk" :
+ t.type == tok_identifier ? "identifier" :
+ t.type == tok_operator ? "operator" :
+ t.type == tok_string ? "string" :
+ t.type == tok_number ? "number" :
+ "unknown token");
+
+ o << " '";
+ for (unsigned i=0; i<t.content.length(); i++)
+ {
+ char c = t.content[i];
+ o << (isprint (c) ? c : '?');
+ }
+ o << "'";
+
+ o << " at "
+ << t.location.file << ":"
+ << t.location.line << ":"
+ << t.location.column;
+
+ return o;
+}
+
+
void
parser::print_error (const parse_error &pe)
{
@@ -36,27 +63,9 @@ parser::print_error (const parse_error &pe)
const token* t = last_t;
if (t)
- {
- cerr << "\tsaw "
- << (t->type == tok_junk ? "junk" :
- t->type == tok_identifier ? "identifier" :
- t->type == tok_operator ? "operator" :
- t->type == tok_string ? "string" :
- t->type == tok_number ? "number" :
- "unknown token") << " '";
- for (unsigned i=0; i<t->content.length(); i++)
- {
- char c = t->content[i];
- cerr << (isprint (c) ? c : '?');
- }
- cerr << "'"
- << " at "
- << t->location.file << ":"
- << t->location.line << ":"
- << t->location.column << endl;
- }
+ cerr << "\tsaw: " << *t << endl;
else
- cerr << "\tsaw " << input_name << " EOF" << endl;
+ cerr << "\tsaw: " << input_name << " EOF" << endl;
// XXX: make it possible to print the last input line,
// so as to line up an arrow with the specific error column
@@ -237,6 +246,7 @@ lexer::scan ()
(c == '<' && c2 == '<') ||
(c == '+' && c2 == '=') ||
(c == '-' && c2 == '=') ||
+ (c == ':' && c2 == ':') ||
false) // XXX: etc.
n->content.push_back((char) input_get ());
@@ -259,27 +269,36 @@ parser::parse ()
{
stapfile* f = new stapfile;
f->name = input_name;
-
+
+ bool empty = true;
+
while (1)
{
try
{
const token* t = peek ();
- if (! t) // EOF
+ if (! t) // nice clean EOF
break;
+ empty = false;
if (t->type == tok_identifier && t->content == "probe")
{
- next (); // advance
+ next ();
f->probes.push_back (parse_probe ());
}
else if (t->type == tok_identifier && t->content == "global")
{
- next (); // advance
- f->globals.push_back (parse_global ());
+ next ();
+ parse_global (f->globals);
+ }
+ else if (t->type == tok_identifier && t->content == "function")
+ {
+ next ();
+ f->functions.push_back (parse_functiondecl ());
+ // XXX: check for duplicate function decl
}
else
- throw parse_error ("expected 'probe' or 'global'");
+ throw parse_error ("expected 'probe', 'global', or 'function'");
}
catch (parse_error& pe)
{
@@ -297,11 +316,17 @@ parser::parse ()
}
}
- if (num_errors > 0)
+ if (empty)
+ {
+ cerr << "Input file '" << input_name << "' is empty or missing." << endl;
+ delete f;
+ return 0;
+ }
+ else if (num_errors > 0)
{
cerr << num_errors << " parse error(s)." << endl;
delete f;
- f = 0;
+ return 0;
}
return f;
@@ -317,12 +342,16 @@ parser::parse_probe ()
const token *t = peek ();
if (t && t->type == tok_identifier)
{
+ p->tok = t;
p->location.push_back (parse_probe_point_spec ());
- t = next ();
- if (t->type == tok_operator && t->content == ":")
- continue;
- else if (t->type == tok_operator && t->content == "{")
+ t = peek ();
+ if (t && t->type == tok_operator && t->content == ":")
+ {
+ next ();
+ continue;
+ }
+ else if (t && t->type == tok_operator && t->content == "{")
break;
else
throw parse_error ("expected ':' or '{'");
@@ -339,16 +368,21 @@ parser::parse_probe ()
block*
-parser::parse_stmt_block () // "{" already consumed
+parser::parse_stmt_block ()
{
block* pb = new block;
+ const token* t = next ();
+ if (! (t->type == tok_operator && t->content == "{"))
+ throw parse_error ("expected '{'");
+
+ pb->tok = t;
while (1)
{
try
{
// handle empty blocks
- const token* t = peek ();
+ t = peek ();
if (t && t->type == tok_operator && t->content == "}")
{
next ();
@@ -397,23 +431,22 @@ parser::parse_statement ()
return new null_statement ();
}
else if (t && t->type == tok_operator && t->content == "{")
- {
- next ();
- return parse_stmt_block ();
- }
+ return parse_stmt_block ();
else if (t && t->type == tok_identifier && t->content == "if")
- {
- next ();
- return parse_if_statement ();
- }
+ return parse_if_statement ();
+ else if (t && t->type == tok_identifier && t->content == "return")
+ return parse_return_statement ();
+ else if (t && t->type == tok_identifier && t->content == "delete")
+ return parse_delete_statement ();
// XXX: other control constructs ("for", "delete", "while", "do",
- // "break", "continue", "exit")
+ // "break", "continue", "exit", "return")
else if (t && (t->type == tok_operator || // expressions are flexible
t->type == tok_identifier ||
t->type == tok_number ||
t->type == tok_string))
{
expr_statement *es = new expr_statement;
+ es->tok = t;
es->value = parse_expression ();
return es;
}
@@ -422,10 +455,71 @@ parser::parse_statement ()
}
-symbol*
-parser::parse_global ()
+void
+parser::parse_global (vector <vardecl*>& globals)
{
- throw parse_error ("cannot parse global block yet");
+ while (1)
+ {
+ const token* t = next ();
+ if (! (t->type == tok_identifier))
+ throw parse_error ("expected identifier");
+
+ vardecl* d = new vardecl;
+ d->name = t->content;
+ d->tok = t;
+ globals.push_back (d); // XXX: check for duplicates
+
+ t = next ();
+ if (t->type == tok_operator && t->content == ";")
+ break;
+ else if (t->type == tok_operator && t->content == ",")
+ continue;
+ else
+ throw parse_error ("expected ';' or ','");
+ }
+}
+
+
+functiondecl*
+parser::parse_functiondecl ()
+{
+ functiondecl *fd = new functiondecl ();
+
+ const token* t = next ();
+ if (! (t->type == tok_identifier))
+ throw parse_error ("expected identifier");
+ fd->name = t->content;
+ fd->tok = t;
+
+ t = next ();
+ if (! (t->type == tok_operator && t->content == "("))
+ throw parse_error ("expected '('");
+
+ while (1)
+ {
+ t = next ();
+
+ // permit zero-argument fuctions
+ if (t->type == tok_operator && t->content == ")")
+ break;
+ else if (! (t->type == tok_identifier))
+ throw parse_error ("expected identifier");
+ vardecl* vd = new vardecl;
+ vd->name = t->content;
+ vd->tok = t;
+ fd->formal_args.push_back (vd);
+
+ t = next ();
+ if (t->type == tok_operator && t->content == ")")
+ break;
+ if (t->type == tok_operator && t->content == ",")
+ continue;
+ else
+ throw parse_error ("expected ',' or ')'");
+ }
+
+ fd->body = parse_stmt_block ();
+ return fd;
}
@@ -438,6 +532,7 @@ parser::parse_probe_point_spec ()
if (t->type != tok_identifier)
throw parse_error ("expected identifier");
pl->functor = t->content;
+ pl->tok = t;
t = peek ();
if (t && t->type == tok_operator && t->content == "(")
@@ -457,12 +552,16 @@ literal*
parser::parse_literal ()
{
const token* t = next ();
+ literal* l;
if (t->type == tok_string)
- return new literal_string (t->content);
+ l = new literal_string (t->content);
else if (t->type == tok_number)
- return new literal_number (atol (t->content.c_str ()));
+ l = new literal_number (atol (t->content.c_str ()));
else
throw parse_error ("expected literal string or number");
+
+ l->tok = t;
+ return l;
}
@@ -470,10 +569,15 @@ if_statement*
parser::parse_if_statement ()
{
const token* t = next ();
+ if (! (t->type == tok_identifier && t->content == "if"))
+ throw parse_error ("expected 'if'");
+ if_statement* s = new if_statement;
+ s->tok = t;
+
+ t = next ();
if (! (t->type == tok_operator && t->content == "("))
throw parse_error ("expected '('");
- if_statement* s = new if_statement;
s->condition = parse_expression ();
t = next ();
@@ -493,6 +597,32 @@ parser::parse_if_statement ()
}
+return_statement*
+parser::parse_return_statement ()
+{
+ const token* t = next ();
+ if (! (t->type == tok_identifier && t->content == "return"))
+ throw parse_error ("expected 'return'");
+ return_statement* s = new return_statement;
+ s->tok = t;
+ s->value = parse_expression ();
+ return s;
+}
+
+
+delete_statement*
+parser::parse_delete_statement ()
+{
+ const token* t = next ();
+ if (! (t->type == tok_identifier && t->content == "delete"))
+ throw parse_error ("expected 'delete'");
+ delete_statement* s = new delete_statement;
+ s->tok = t;
+ s->value = parse_expression ();
+ return s;
+}
+
+
expression*
parser::parse_expression ()
{
@@ -511,21 +641,24 @@ parser::parse_assignment ()
expression* op1 = parse_ternary ();
const token* t = peek ();
- if (t && t->type == tok_operator
+ // left-associative operators
+ while (t && t->type == tok_operator
&& (t->content == "=" ||
t->content == "<<" ||
t->content == "+=" ||
false)) // XXX: add /= etc.
{
assignment* e = new assignment;
- e->lvalue = op1;
+ e->left = op1;
e->op = t->content;
+ e->tok = t;
next ();
- e->rvalue = parse_expression ();
- return e;
+ e->right = parse_ternary ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -537,16 +670,17 @@ parser::parse_ternary ()
const token* t = peek ();
if (t && t->type == tok_operator && t->content == "?")
{
- next ();
ternary_expression* e = new ternary_expression;
+ e->tok = t;
e->cond = op1;
- e->truevalue = parse_expression ();
+ next ();
+ e->truevalue = parse_expression (); // XXX
t = next ();
if (! (t->type == tok_operator && t->content == ":"))
throw parse_error ("expected ':'");
- e->falsevalue = parse_expression ();
+ e->falsevalue = parse_expression (); // XXX
return e;
}
else
@@ -560,16 +694,19 @@ parser::parse_logical_or ()
expression* op1 = parse_logical_and ();
const token* t = peek ();
- if (t && t->type == tok_operator && t->content == "||")
+ while (t && t->type == tok_operator && t->content == "||")
{
- next ();
logical_or_expr* e = new logical_or_expr;
+ e->tok = t;
+ e->op = t->content;
e->left = op1;
- e->right = parse_expression ();
- return e;
+ next ();
+ e->right = parse_logical_and ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -579,16 +716,19 @@ parser::parse_logical_and ()
expression* op1 = parse_array_in ();
const token* t = peek ();
- if (t && t->type == tok_operator && t->content == "&&")
+ while (t && t->type == tok_operator && t->content == "&&")
{
- next ();
logical_and_expr *e = new logical_and_expr;
e->left = op1;
- e->right = parse_expression ();
- return e;
+ e->op = t->content;
+ e->tok = t;
+ next ();
+ e->right = parse_array_in ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -600,10 +740,12 @@ parser::parse_array_in ()
const token* t = peek ();
if (t && t->type == tok_identifier && t->content == "in")
{
- next ();
array_in *e = new array_in;
e->left = op1;
- e->right = parse_symbol (); // XXX: restrict to identifiers
+ e->op = t->content;
+ e->tok = t;
+ next ();
+ e->right = parse_symbol_plain ();
return e;
}
else
@@ -617,18 +759,20 @@ parser::parse_comparison ()
expression* op1 = parse_concatenation ();
const token* t = peek ();
- if (t && t->type == tok_operator
+ while (t && t->type == tok_operator
&& (t->content == ">" || t->content == "==")) // xxx: more
{
comparison* e = new comparison;
e->left = op1;
e->op = t->content;
+ e->tok = t;
next ();
- e->right = parse_expression ();
- return e;
+ e->right = parse_concatenation ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -640,17 +784,19 @@ parser::parse_concatenation ()
const token* t = peek ();
// XXX: the actual awk string-concatenation operator is *whitespace*.
// I don't know how to easily to model that here.
- if (t && t->type == tok_operator && t->content == ".")
+ while (t && t->type == tok_operator && t->content == ".")
{
concatenation* e = new concatenation;
e->left = op1;
e->op = t->content;
+ e->tok = t;
next ();
- e->right = parse_expression ();
- return e;
+ e->right = parse_additive ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -660,18 +806,20 @@ parser::parse_additive ()
expression* op1 = parse_multiplicative ();
const token* t = peek ();
- if (t && t->type == tok_operator
+ while (t && t->type == tok_operator
&& (t->content == "+" || t->content == "-"))
{
binary_expression* e = new binary_expression;
e->op = t->content;
e->left = op1;
+ e->tok = t;
next ();
- e->right = parse_expression ();
- return e;
+ e->right = parse_multiplicative ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -681,18 +829,20 @@ parser::parse_multiplicative ()
expression* op1 = parse_unary ();
const token* t = peek ();
- if (t && t->type == tok_operator
+ while (t && t->type == tok_operator
&& (t->content == "*" || t->content == "/" || t->content == "%"))
{
binary_expression* e = new binary_expression;
e->op = t->content;
e->left = op1;
+ e->tok = t;
next ();
- e->right = parse_expression ();
- return e;
+ e->right = parse_unary ();
+ op1 = e;
+ t = peek ();
}
- else
- return op1;
+
+ return op1;
}
@@ -705,6 +855,7 @@ parser::parse_unary ()
{
unary_expression* e = new unary_expression;
e->op = t->content;
+ e->tok = t;
next ();
e->operand = parse_expression ();
return e;
@@ -720,18 +871,20 @@ parser::parse_exponentiation ()
expression* op1 = parse_crement ();
const token* t = peek ();
+ // right associative: no loop
if (t && t->type == tok_operator
&& (t->content == "^" || t->content == "**"))
{
exponentiation* e = new exponentiation;
e->op = t->content;
e->left = op1;
+ e->tok = t;
next ();
e->right = parse_expression ();
- return e;
+ op1 = e;
}
- else
- return op1;
+
+ return op1;
}
@@ -744,6 +897,7 @@ parser::parse_crement () // as in "increment" / "decrement"
{
pre_crement* e = new pre_crement;
e->op = t->content;
+ e->tok = t;
next ();
e->operand = parse_value ();
return e;
@@ -758,6 +912,7 @@ parser::parse_crement () // as in "increment" / "decrement"
{
post_crement* e = new post_crement;
e->op = t->content;
+ e->tok = t;
next ();
e->operand = op1;
return e;
@@ -796,14 +951,16 @@ parser::parse_symbol () // var, var[index], func(parms)
const token* t = next ();
if (t->type != tok_identifier)
throw parse_error ("expected identifier");
+ const token* t2 = t;
string name = t->content;
-
+
t = peek ();
if (t && t->type == tok_operator && t->content == "[") // array
{
next ();
struct arrayindex* ai = new arrayindex;
- ai->name = name;
+ ai->tok = t2;
+ ai->base = name;
while (1)
{
ai->indexes.push_back (parse_expression ());
@@ -821,7 +978,8 @@ parser::parse_symbol () // var, var[index], func(parms)
{
next ();
struct functioncall* f = new functioncall;
- f->name = name;
+ f->tok = t2;
+ f->function = name;
while (1)
{
f->args.push_back (parse_expression ());
@@ -837,8 +995,22 @@ parser::parse_symbol () // var, var[index], func(parms)
}
else
{
- symbol *s = new symbol;
- s->name = name;
- return s;
+ symbol* sym = new symbol;
+ sym->name = name;
+ sym->tok = t2;
+ return sym;
}
}
+
+
+symbol*
+parser::parse_symbol_plain () // var only
+{
+ symbol *s = new symbol;
+ const token* t = next ();
+ if (t->type != tok_identifier)
+ throw parse_error ("expected identifier");
+ s->name = t->content;
+ s->tok = t;
+ return s;
+}