diff options
author | fche <fche> | 2005-02-12 02:28:43 +0000 |
---|---|---|
committer | fche <fche> | 2005-02-12 02:28:43 +0000 |
commit | 2f1a1aead38c1dcd329a694dd8d3290b37320466 (patch) | |
tree | 3700f34d81fadb3b3f2cf850cce7eaec73d70659 /parse.cxx | |
download | systemtap-steved-2f1a1aead38c1dcd329a694dd8d3290b37320466.tar.gz systemtap-steved-2f1a1aead38c1dcd329a694dd8d3290b37320466.tar.xz systemtap-steved-2f1a1aead38c1dcd329a694dd8d3290b37320466.zip |
* parser prototype snapshot
Diffstat (limited to 'parse.cxx')
-rw-r--r-- | parse.cxx | 844 |
1 files changed, 844 insertions, 0 deletions
diff --git a/parse.cxx b/parse.cxx new file mode 100644 index 00000000..e33aee04 --- /dev/null +++ b/parse.cxx @@ -0,0 +1,844 @@ +// recursive descent parser for systemtap scripts +// Copyright 2005 Red Hat Inc. +// GPL + +#include <iostream> +#include "staptree.h" +#include "parse.h" +#include <cctype> +#include <fstream> + +using namespace std; + +// ------------------------------------------------------------------------ + +parser::parser (istream& i): + input_name ("<input>"), free_input (0), input (i, input_name), + last_t (0), next_t (0), num_errors (0) +{ } + +parser::parser (const string& fn): + input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)), + input (* free_input, input_name), + last_t (0), next_t (0), num_errors (0) +{ } + +parser::~parser() +{ + if (free_input) delete free_input; +} + + +void +parser::print_error (const parse_error &pe) +{ + cerr << "parse error: " << pe.what () << endl; + + const token* t = last_t; + if (t) + { + cerr << "\tsaw " + << (t->type == tok_junk ? "junk" : + t->type == tok_identifier ? "identifier" : + t->type == tok_operator ? "operator" : + t->type == tok_string ? "string" : + t->type == tok_number ? "number" : + "unknown token") << " '"; + for (unsigned i=0; i<t->content.length(); i++) + { + char c = t->content[i]; + cerr << (isprint (c) ? c : '?'); + } + cerr << "'" + << " at " + << t->location.file << ":" + << t->location.line << ":" + << t->location.column << endl; + } + else + cerr << "\tsaw " << input_name << " EOF" << endl; + + // XXX: make it possible to print the last input line, + // so as to line up an arrow with the specific error column + + num_errors ++; +} + + +const token* +parser::last () +{ + return last_t; +} + + +const token* +parser::next () +{ + if (! next_t) + next_t = input.scan (); + if (! next_t) + throw parse_error ("unexpected end-of-file"); + + // cerr << "[" << next_t->content << "]" << endl; + + last_t = next_t; + // advance by zeroing next_t + next_t = 0; + return last_t; +} + + +const token* +parser::peek () +{ + if (! next_t) + next_t = input.scan (); + + // cerr << "{" << (next_t ? next_t->content : "null") << "}"; + + // don't advance by zeroing next_t + last_t = next_t; + return next_t; +} + + +lexer::lexer (istream& i, const string& in): + input (i), input_name (in), cursor_line (1), cursor_column (1) +{ } + +int +lexer::input_get () +{ + int c = input.get(); + + if (! input) + return -1; + + // update source cursor + if (c == '\n') + { + cursor_line ++; + cursor_column = 1; + } + else + cursor_column ++; + + return c; +} + + +token* +lexer::scan () +{ + token* n = new token; + n->location.file = input_name; + + skip: + n->location.line = cursor_line; + n->location.column = cursor_column; + + int c = input_get(); + if (c < 0) + { + delete n; + return 0; + } + + if (isspace (c)) + goto skip; + + else if (isalpha (c)) + { + n->type = tok_identifier; + n->content = (char) c; + while (1) + { + int c2 = input.peek (); + if (! input) + break; + if ((isalnum(c2) || c2 == '_')) + { + n->content.push_back(c2); + input_get (); + } + else + break; + } + return n; + } + + else if (isdigit (c)) + { + // XXX: support 0xHEX etc. + n->type = tok_number; + n->content = c; + while (1) + { + int c2 = input.peek (); + if (! input) + break; + if (isdigit(c2)) + { + n->content.push_back(c2); + input_get (); + } + else + break; + } + return n; + } + + else if (c == '\"') + { + n->type = tok_string; + while (1) + { + c = input_get (); + + if (! input || c == '\n') + { + n->type = tok_junk; + break; + } + if (c == '\"') // closing double-quotes + break; + else if (c == '\\') + { + // XXX: handle escape sequences + } + else + n->content.push_back(c); + } + return n; + } + + else if (ispunct (c)) + { + int c2 = input.peek (); + + if (c == '#') // comment to end-of-line + { + unsigned this_line = cursor_line; + while (input && cursor_line == this_line) + input_get (); + goto skip; + } + + n->type = tok_operator; + n->content = (char) c; + + // handle two-character operators + if ((c == '=' && c2 == '=') || + (c == '+' && c2 == '+') || + (c == '-' && c2 == '-') || + (c == '|' && c2 == '|') || + (c == '&' && c2 == '&') || + (c == '<' && c2 == '<') || + (c == '+' && c2 == '=') || + (c == '-' && c2 == '=') || + false) // XXX: etc. + n->content.push_back((char) input_get ()); + + return n; + } + + else + { + n->type = tok_junk; + n->content = (char) c; + return n; + } +} + + +// ------------------------------------------------------------------------ + +stapfile* +parser::parse () +{ + stapfile* f = new stapfile; + f->name = input_name; + + while (1) + { + try + { + const token* t = peek (); + if (! t) // EOF + break; + + if (t->type == tok_identifier && t->content == "probe") + { + next (); // advance + f->probes.push_back (parse_probe ()); + } + else if (t->type == tok_identifier && t->content == "global") + { + next (); // advance + f->globals.push_back (parse_global ()); + } + else + throw parse_error ("expected 'probe' or 'global'"); + } + catch (parse_error& pe) + { + print_error (pe); + // Quietly swallow all tokens until the next '}'. + while (1) + { + const token* t = peek (); + if (! t) + break; + next (); + if (t->type == tok_operator && t->content == "}") + break; + } + } + } + + if (num_errors > 0) + { + cerr << num_errors << " parse error(s)." << endl; + delete f; + f = 0; + } + + return f; +} + + +probe* +parser::parse_probe () +{ + probe *p = new probe; + while (1) + { + const token *t = peek (); + if (t && t->type == tok_identifier) + { + p->location.push_back (parse_probe_point_spec ()); + + t = next (); + if (t->type == tok_operator && t->content == ":") + continue; + else if (t->type == tok_operator && t->content == "{") + break; + else + throw parse_error ("expected ':' or '{'"); + // XXX: unify logic with that in parse_symbol() + } + else + throw parse_error ("expected probe location specifier"); + } + + p->body = parse_stmt_block (); + + return p; +} + + +block* +parser::parse_stmt_block () // "{" already consumed +{ + block* pb = new block; + + while (1) + { + try + { + // handle empty blocks + const token* t = peek (); + if (t && t->type == tok_operator && t->content == "}") + { + next (); + break; + } + + pb->statements.push_back (parse_statement ()); + + // ';' is a statement separator in awk, not a terminator. + // Note that ';' is also a possible null statement. + t = peek (); + if (t && t->type == tok_operator && t->content == ";") + { + next (); + continue; + } + } + catch (parse_error& pe) + { + print_error (pe); + // Quietly swallow all tokens until the next ';' or '}'. + while (1) + { + const token* t = peek (); + if (! t) + return 0; + next (); + if (t->type == tok_operator && (t->content == "}" + || t->content == ";")) + break; + } + } + } + + return pb; +} + + +statement* +parser::parse_statement () +{ + const token* t = peek (); + if (t && t->type == tok_operator && t->content == ";") + { + next (); + return new null_statement (); + } + else if (t && t->type == tok_operator && t->content == "{") + { + next (); + return parse_stmt_block (); + } + else if (t && t->type == tok_identifier && t->content == "if") + { + next (); + return parse_if_statement (); + } + // XXX: other control constructs ("for", "delete", "while", "do", + // "break", "continue", "exit") + else if (t && (t->type == tok_operator || // expressions are flexible + t->type == tok_identifier || + t->type == tok_number || + t->type == tok_string)) + { + expr_statement *es = new expr_statement; + es->value = parse_expression (); + return es; + } + else + throw parse_error ("expected statement"); +} + + +symbol* +parser::parse_global () +{ + throw parse_error ("cannot parse global block yet"); +} + + +probe_point_spec* +parser::parse_probe_point_spec () +{ + probe_point_spec* pl = new probe_point_spec; + + const token* t = next (); + if (t->type != tok_identifier) + throw parse_error ("expected identifier"); + pl->functor = t->content; + + t = peek (); + if (t && t->type == tok_operator && t->content == "(") + { + next (); // consume "(" + pl->arg = parse_literal (); + const token* tt = next (); + if (! (tt->type == tok_operator && tt->content == ")")) + throw parse_error ("expected ')'"); + } + + return pl; +} + + +literal* +parser::parse_literal () +{ + const token* t = next (); + if (t->type == tok_string) + return new literal_string (t->content); + else if (t->type == tok_number) + return new literal_number (atol (t->content.c_str ())); + else + throw parse_error ("expected literal string or number"); +} + + +if_statement* +parser::parse_if_statement () +{ + const token* t = next (); + if (! (t->type == tok_operator && t->content == "(")) + throw parse_error ("expected '('"); + + if_statement* s = new if_statement; + s->condition = parse_expression (); + + t = next (); + if (! (t->type == tok_operator && t->content == ")")) + throw parse_error ("expected ')'"); + + s->thenblock = parse_statement (); + + t = peek (); + if (t && t->type == tok_identifier && t->content == "else") + { + next (); + s->elseblock = parse_statement (); + } + + return s; +} + + +expression* +parser::parse_expression () +{ + return parse_assignment (); +} + +// XXX: in all subsequent calls to parse_expression(), +// check whether operator priority / associativity +// suggests that a different expression subtype parser +// should be called instead + + +expression* +parser::parse_assignment () +{ + expression* op1 = parse_ternary (); + + const token* t = peek (); + if (t && t->type == tok_operator + && (t->content == "=" || + t->content == "<<" || + t->content == "+=" || + false)) // XXX: add /= etc. + { + assignment* e = new assignment; + e->lvalue = op1; + e->op = t->content; + next (); + e->rvalue = parse_expression (); + return e; + } + else + return op1; +} + + +expression* +parser::parse_ternary () +{ + expression* op1 = parse_logical_or (); + + const token* t = peek (); + if (t && t->type == tok_operator && t->content == "?") + { + next (); + ternary_expression* e = new ternary_expression; + e->cond = op1; + e->truevalue = parse_expression (); + + t = next (); + if (! (t->type == tok_operator && t->content == ":")) + throw parse_error ("expected ':'"); + + e->falsevalue = parse_expression (); + return e; + } + else + return op1; +} + + +expression* +parser::parse_logical_or () +{ + expression* op1 = parse_logical_and (); + + const token* t = peek (); + if (t && t->type == tok_operator && t->content == "||") + { + next (); + logical_or_expr* e = new logical_or_expr; + e->left = op1; + e->right = parse_expression (); + return e; + } + else + return op1; +} + + +expression* +parser::parse_logical_and () +{ + expression* op1 = parse_array_in (); + + const token* t = peek (); + if (t && t->type == tok_operator && t->content == "&&") + { + next (); + logical_and_expr *e = new logical_and_expr; + e->left = op1; + e->right = parse_expression (); + return e; + } + else + return op1; +} + + +expression* +parser::parse_array_in () +{ + expression* op1 = parse_comparison (); + + const token* t = peek (); + if (t && t->type == tok_identifier && t->content == "in") + { + next (); + array_in *e = new array_in; + e->left = op1; + e->right = parse_symbol (); // XXX: restrict to identifiers + return e; + } + else + return op1; +} + + +expression* +parser::parse_comparison () +{ + expression* op1 = parse_concatenation (); + + const token* t = peek (); + if (t && t->type == tok_operator + && (t->content == ">" || t->content == "==")) // xxx: more + { + comparison* e = new comparison; + e->left = op1; + e->op = t->content; + next (); + e->right = parse_expression (); + return e; + } + else + return op1; +} + + +expression* +parser::parse_concatenation () +{ + expression* op1 = parse_additive (); + + const token* t = peek (); + // XXX: the actual awk string-concatenation operator is *whitespace*. + // I don't know how to easily to model that here. + if (t && t->type == tok_operator && t->content == ".") + { + concatenation* e = new concatenation; + e->left = op1; + e->op = t->content; + next (); + e->right = parse_expression (); + return e; + } + else + return op1; +} + + +expression* +parser::parse_additive () +{ + expression* op1 = parse_multiplicative (); + + const token* t = peek (); + if (t && t->type == tok_operator + && (t->content == "+" || t->content == "-")) + { + binary_expression* e = new binary_expression; + e->op = t->content; + e->left = op1; + next (); + e->right = parse_expression (); + return e; + } + else + return op1; +} + + +expression* +parser::parse_multiplicative () +{ + expression* op1 = parse_unary (); + + const token* t = peek (); + if (t && t->type == tok_operator + && (t->content == "*" || t->content == "/" || t->content == "%")) + { + binary_expression* e = new binary_expression; + e->op = t->content; + e->left = op1; + next (); + e->right = parse_expression (); + return e; + } + else + return op1; +} + + +expression* +parser::parse_unary () +{ + const token* t = peek (); + if (t && t->type == tok_operator + && (t->content == "+" || t->content == "-" || t->content == "!")) + { + unary_expression* e = new unary_expression; + e->op = t->content; + next (); + e->operand = parse_expression (); + return e; + } + else + return parse_exponentiation (); +} + + +expression* +parser::parse_exponentiation () +{ + expression* op1 = parse_crement (); + + const token* t = peek (); + if (t && t->type == tok_operator + && (t->content == "^" || t->content == "**")) + { + exponentiation* e = new exponentiation; + e->op = t->content; + e->left = op1; + next (); + e->right = parse_expression (); + return e; + } + else + return op1; +} + + +expression* +parser::parse_crement () // as in "increment" / "decrement" +{ + const token* t = peek (); + if (t && t->type == tok_operator + && (t->content == "++" || t->content == "--")) + { + pre_crement* e = new pre_crement; + e->op = t->content; + next (); + e->operand = parse_value (); + return e; + } + + // post-crement or non-crement + expression *op1 = parse_value (); + + t = peek (); + if (t && t->type == tok_operator + && (t->content == "++" || t->content == "--")) + { + post_crement* e = new post_crement; + e->op = t->content; + next (); + e->operand = op1; + return e; + } + else + return op1; +} + + +expression* +parser::parse_value () +{ + const token* t = peek (); + if (! t) + throw parse_error ("expected value"); + + if (t->type == tok_operator && t->content == "(") + { + next (); + expression* e = parse_expression (); + t = next (); + if (! (t->type == tok_operator && t->content == ")")) + throw parse_error ("expected ')'"); + return e; + } + else if (t->type == tok_identifier) + return parse_symbol (); + else + return parse_literal (); +} + + +expression* +parser::parse_symbol () // var, var[index], func(parms) +{ + const token* t = next (); + if (t->type != tok_identifier) + throw parse_error ("expected identifier"); + string name = t->content; + + t = peek (); + if (t && t->type == tok_operator && t->content == "[") // array + { + next (); + struct arrayindex* ai = new arrayindex; + ai->name = name; + while (1) + { + ai->indexes.push_back (parse_expression ()); + t = next (); + if (t->type == tok_operator && t->content == "]") + break; + if (t->type == tok_operator && t->content == ",") + continue; + else + throw parse_error ("expected ',' or ']'"); + } + return ai; + } + else if (t && t->type == tok_operator && t->content == "(") // function call + { + next (); + struct functioncall* f = new functioncall; + f->name = name; + while (1) + { + f->args.push_back (parse_expression ()); + t = next (); + if (t->type == tok_operator && t->content == ")") + break; + if (t->type == tok_operator && t->content == ",") + continue; + else + throw parse_error ("expected ',' or ')'"); + } + return f; + } + else + { + symbol *s = new symbol; + s->name = name; + return s; + } +} |