diff options
author | Dave Brolley <brolley@redhat.com> | 2009-07-10 11:10:51 -0400 |
---|---|---|
committer | Dave Brolley <brolley@redhat.com> | 2009-07-10 11:10:51 -0400 |
commit | 1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f (patch) | |
tree | a0c10b78ad3e7142a59ffd6f6e75b75abf90d88e /parse.cxx | |
parent | 7d54db1a2c0b3831b6fbc8282f1155426c4be540 (diff) | |
parent | c728b7da8be430367aa33f9fbacda93d4add9ea2 (diff) | |
download | systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.tar.gz systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.tar.xz systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.zip |
Merge branch 'master' of ssh://sources.redhat.com/git/systemtap
Diffstat (limited to 'parse.cxx')
-rw-r--r-- | parse.cxx | 223 |
1 files changed, 102 insertions, 121 deletions
@@ -583,47 +583,68 @@ parser::peek_kw (std::string const & kw) -lexer::lexer (istream& i, const string& in, systemtap_session& s): - input (i), input_name (in), input_contents (""), - input_pointer (0), cursor_suspend_count(0), - cursor_line (1), cursor_column (1), session(s), - current_file (0) +lexer::lexer (istream& input, const string& in, systemtap_session& s): + input_name (in), input_pointer (0), input_end (0), + cursor_suspend_count(0), cursor_line (1), cursor_column (1), + session(s), current_file (0) { - char c; - while(input.get(c)) - input_contents.push_back(c); -} + getline(input, input_contents, '\0'); -std::string -lexer::get_input_contents () -{ - return input_contents; + input_pointer = input_contents.data(); + input_end = input_contents.data() + input_contents.size(); + + if (keywords.empty()) + { + keywords.insert("probe"); + keywords.insert("global"); + keywords.insert("function"); + keywords.insert("if"); + keywords.insert("else"); + keywords.insert("for"); + keywords.insert("foreach"); + keywords.insert("in"); + keywords.insert("limit"); + keywords.insert("return"); + keywords.insert("delete"); + keywords.insert("while"); + keywords.insert("break"); + keywords.insert("continue"); + keywords.insert("next"); + keywords.insert("string"); + keywords.insert("long"); + } } +set<string> lexer::keywords; + void lexer::set_current_file (stapfile* f) { current_file = f; + if (f) + { + f->file_contents = input_contents; + f->name = input_name; + } } int lexer::input_peek (unsigned n) { - if (input_contents.size() > (input_pointer + n)) - return (int)(unsigned char)input_contents[input_pointer+n]; - else - return -1; + if (input_pointer + n >= input_end) + return -1; // EOF + return (unsigned char)*(input_pointer + n); } int lexer::input_get () { - int c = input_peek (0); - input_pointer ++; - + int c = input_peek(); if (c < 0) return c; // EOF + ++input_pointer; + if (cursor_suspend_count) // Track effect of input_put: preserve previous cursor/line_column // until all of its characters are consumed. @@ -648,9 +669,12 @@ lexer::input_get () void lexer::input_put (const string& chars) { - // clog << "[put:" << chars << " @" << input_pointer << "]"; - input_contents.insert (input_contents.begin() + input_pointer, chars.begin(), chars.end()); + size_t pos = input_pointer - input_contents.data(); + // clog << "[put:" << chars << " @" << pos << "]"; + input_contents.insert (pos, chars); cursor_suspend_count += chars.size(); + input_pointer = input_contents.data() + pos; + input_end = input_contents.data() + input_contents.size(); } @@ -658,9 +682,7 @@ token* lexer::scan (bool wildcard) { token* n = new token; - n->location.file = input_name; - if (current_file) - n->location.stap_file = current_file; + n->location.file = current_file; unsigned semiskipped_p = 0; @@ -676,7 +698,6 @@ lexer::scan (bool wildcard) } int c = input_get(); - int c2 = input_peek (); // clog << "{" << (char)c << (char)c2 << "}"; if (c < 0) { @@ -687,6 +708,8 @@ lexer::scan (bool wildcard) if (isspace (c)) goto skip; + int c2 = input_peek (); + // Paste command line arguments as character streams into // the beginning of a token. $1..$999 go through as raw // characters; @1..@999 are quoted/escaped as strings. @@ -740,23 +763,7 @@ lexer::scan (bool wildcard) c2 = input_peek (); } - if (n->content == "probe" - || n->content == "global" - || n->content == "function" - || n->content == "if" - || n->content == "else" - || n->content == "for" - || n->content == "foreach" - || n->content == "in" - || n->content == "limit" - || n->content == "return" - || n->content == "delete" - || n->content == "while" - || n->content == "break" - || n->content == "continue" - || n->content == "next" - || n->content == "string" - || n->content == "long") + if (keywords.count(n->content)) n->type = tok_keyword; return n; @@ -767,23 +774,15 @@ lexer::scan (bool wildcard) n->type = tok_number; n->content = (char) c; - while (1) + while (isalnum (c2)) { - int c2 = input_peek (); - if (c2 < 0) - break; - // NB: isalnum is very permissive. We rely on strtol, called in // parser::parse_literal below, to confirm that the number string // is correctly formatted and in range. - if (isalnum (c2)) - { - n->content.push_back (c2); - input_get (); - } - else - break; + input_get (); + n->content.push_back (c2); + c2 = input_peek (); } return n; } @@ -835,25 +834,21 @@ lexer::scan (bool wildcard) else if (ispunct (c)) { - int c2 = input_peek (); int c3 = input_peek (1); - string s1 = string("") + (char) c; - string s2 = (c2 > 0 ? s1 + (char) c2 : s1); - string s3 = (c3 > 0 ? s2 + (char) c3 : s2); // NB: if we were to recognize negative numeric literals here, // we'd introduce another grammar ambiguity: // 1-1 would be parsed as tok_number(1) and tok_number(-1) // instead of tok_number(1) tok_operator('-') tok_number(1) - if (s1 == "#") // shell comment + if (c == '#') // shell comment { unsigned this_line = cursor_line; do { c = input_get (); } while (c >= 0 && cursor_line == this_line); goto skip; } - else if (s2 == "//") // C++ comment + else if ((c == '/' && c2 == '/')) // C++ comment { unsigned this_line = cursor_line; do { c = input_get (); } @@ -862,15 +857,15 @@ lexer::scan (bool wildcard) } else if (c == '/' && c2 == '*') // C comment { + (void) input_get (); // swallow '*' already in c2 + c = input_get (); c2 = input_get (); - unsigned chars = 0; while (c2 >= 0) { - chars ++; // track this to prevent "/*/" from being accepted + if (c == '*' && c2 == '/') + break; c = c2; c2 = input_get (); - if (chars > 1 && c == '*' && c2 == '/') - break; } goto skip; } @@ -878,73 +873,63 @@ lexer::scan (bool wildcard) { n->type = tok_embedded; (void) input_get (); // swallow '{' already in c2 - while (true) + c = input_get (); + c2 = input_get (); + while (c2 >= 0) { - c = input_get (); - if (c < 0) // EOF - { - n->type = tok_junk; - break; - } - if (c == '%') - { - c2 = input_peek (); - if (c2 == '}') - { - (void) input_get (); // swallow '}' too - break; - } - } + if (c == '%' && c2 == '}') + return n; n->content += c; + c = c2; + c2 = input_get (); } + n->type = tok_junk; return n; } // We're committed to recognizing at least the first character // as an operator. n->type = tok_operator; + n->content = c; // match all valid operators, in decreasing size order - if (s3 == "<<<" || - s3 == "<<=" || - s3 == ">>=") + if ((c == '<' && c2 == '<' && c3 == '<') || + (c == '<' && c2 == '<' && c3 == '=') || + (c == '>' && c2 == '>' && c3 == '=')) { - n->content = s3; + n->content += c2; + n->content += c3; input_get (); input_get (); // swallow other two characters } - else if (s2 == "==" || - s2 == "!=" || - s2 == "<=" || - s2 == ">=" || - s2 == "+=" || - s2 == "-=" || - s2 == "*=" || - s2 == "/=" || - s2 == "%=" || - s2 == "&=" || - s2 == "^=" || - s2 == "|=" || - s2 == ".=" || - s2 == "&&" || - s2 == "||" || - s2 == "++" || - s2 == "--" || - s2 == "->" || - s2 == "<<" || - s2 == ">>" || + else if ((c == '=' && c2 == '=') || + (c == '!' && c2 == '=') || + (c == '<' && c2 == '=') || + (c == '>' && c2 == '=') || + (c == '+' && c2 == '=') || + (c == '-' && c2 == '=') || + (c == '*' && c2 == '=') || + (c == '/' && c2 == '=') || + (c == '%' && c2 == '=') || + (c == '&' && c2 == '=') || + (c == '^' && c2 == '=') || + (c == '|' && c2 == '=') || + (c == '.' && c2 == '=') || + (c == '&' && c2 == '&') || + (c == '|' && c2 == '|') || + (c == '+' && c2 == '+') || + (c == '-' && c2 == '-') || + (c == '-' && c2 == '>') || + (c == '<' && c2 == '<') || + (c == '>' && c2 == '>') || // preprocessor tokens - s2 == "%(" || - s2 == "%?" || - s2 == "%:" || - s2 == "%)") + (c == '%' && c2 == '(') || + (c == '%' && c2 == '?') || + (c == '%' && c2 == ':') || + (c == '%' && c2 == ')')) { - n->content = s2; + n->content += c2; input_get (); // swallow other character } - else - { - n->content = s1; - } return n; } @@ -965,8 +950,6 @@ parser::parse () { stapfile* f = new stapfile; input.set_current_file (f); - f->file_contents = input.get_input_contents (); - f->name = input_name; bool empty = true; @@ -1034,18 +1017,16 @@ parser::parse () { cerr << "Input file '" << input_name << "' is empty or missing." << endl; delete f; - input.set_current_file (0); - return 0; + f = 0; } else if (num_errors > 0) { cerr << num_errors << " parse error(s)." << endl; delete f; - input.set_current_file (0); - return 0; + f = 0; } - input.set_current_file (0); + input.set_current_file(0); return f; } |