Merge branch 'master' of ssh://sources.redhat.com/git/systemtap

author: Dave Brolley <brolley@redhat.com> 2009-07-10 11:10:51 -0400
committer: Dave Brolley <brolley@redhat.com> 2009-07-10 11:10:51 -0400
commit: 1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f (patch)
tree: a0c10b78ad3e7142a59ffd6f6e75b75abf90d88e /parse.cxx
parent: 7d54db1a2c0b3831b6fbc8282f1155426c4be540 (diff)
parent: c728b7da8be430367aa33f9fbacda93d4add9ea2 (diff)
download: systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.tar.gz
systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.tar.xz
systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.zip
1 files changed, 102 insertions, 121 deletions
diff --git a/parse.cxx b/parse.cxx
index a26d594c..cfa33cb4 100644
--- a/parse.cxx
+++ b/parse.cxx
@@ -583,47 +583,68 @@ parser::peek_kw (std::string const & kw)
 
 
 
-lexer::lexer (istream& i, const string& in, systemtap_session& s):
-  input (i), input_name (in), input_contents (""),
-  input_pointer (0), cursor_suspend_count(0),
-  cursor_line (1), cursor_column (1), session(s),
-  current_file (0)
+lexer::lexer (istream& input, const string& in, systemtap_session& s):
+  input_name (in), input_pointer (0), input_end (0),
+  cursor_suspend_count(0), cursor_line (1), cursor_column (1),
+  session(s), current_file (0)
 {
-  char c;
-  while(input.get(c))
-    input_contents.push_back(c);
-}
+  getline(input, input_contents, '\0');
 
-std::string
-lexer::get_input_contents ()
-{
-  return input_contents;
+  input_pointer = input_contents.data();
+  input_end = input_contents.data() + input_contents.size();
+
+  if (keywords.empty())
+    {
+      keywords.insert("probe");
+      keywords.insert("global");
+      keywords.insert("function");
+      keywords.insert("if");
+      keywords.insert("else");
+      keywords.insert("for");
+      keywords.insert("foreach");
+      keywords.insert("in");
+      keywords.insert("limit");
+      keywords.insert("return");
+      keywords.insert("delete");
+      keywords.insert("while");
+      keywords.insert("break");
+      keywords.insert("continue");
+      keywords.insert("next");
+      keywords.insert("string");
+      keywords.insert("long");
+    }
 }
 
+set<string> lexer::keywords;
+
 void
 lexer::set_current_file (stapfile* f)
 {
   current_file = f;
+  if (f)
+    {
+      f->file_contents = input_contents;
+      f->name = input_name;
+    }
 }
 
 int
 lexer::input_peek (unsigned n)
 {
-  if (input_contents.size() > (input_pointer + n))
-    return (int)(unsigned char)input_contents[input_pointer+n];
-  else
-    return -1;
+  if (input_pointer + n >= input_end)
+    return -1; // EOF
+  return (unsigned char)*(input_pointer + n);
 }
 
 
 int
 lexer::input_get ()
 {
-  int c = input_peek (0);
-  input_pointer ++;
-
+  int c = input_peek();
   if (c < 0) return c; // EOF
 
+  ++input_pointer;
+
   if (cursor_suspend_count)
     // Track effect of input_put: preserve previous cursor/line_column
     // until all of its characters are consumed.
@@ -648,9 +669,12 @@ lexer::input_get ()
 void
 lexer::input_put (const string& chars)
 {
-  // clog << "[put:" << chars << " @" << input_pointer << "]";
-  input_contents.insert (input_contents.begin() + input_pointer, chars.begin(), chars.end());
+  size_t pos = input_pointer - input_contents.data();
+  // clog << "[put:" << chars << " @" << pos << "]";
+  input_contents.insert (pos, chars);
   cursor_suspend_count += chars.size();
+  input_pointer = input_contents.data() + pos;
+  input_end = input_contents.data() + input_contents.size();
 }
 
 
@@ -658,9 +682,7 @@ token*
 lexer::scan (bool wildcard)
 {
   token* n = new token;
-  n->location.file = input_name;
-  if (current_file)
-    n->location.stap_file = current_file;
+  n->location.file = current_file;
 
   unsigned semiskipped_p = 0;
 
@@ -676,7 +698,6 @@ lexer::scan (bool wildcard)
     }
 
   int c = input_get();
-  int c2 = input_peek ();
   // clog << "{" << (char)c << (char)c2 << "}";
   if (c < 0)
     {
@@ -687,6 +708,8 @@ lexer::scan (bool wildcard)
   if (isspace (c))
     goto skip;
 
+  int c2 = input_peek ();
+
   // Paste command line arguments as character streams into
   // the beginning of a token.  $1..$999 go through as raw
   // characters; @1..@999 are quoted/escaped as strings.
@@ -740,23 +763,7 @@ lexer::scan (bool wildcard)
           c2 = input_peek ();
         }
 
-      if (n->content    == "probe"
-          || n->content == "global"
-          || n->content == "function"
-          || n->content == "if"
-          || n->content == "else"
-          || n->content == "for"
-          || n->content == "foreach"
-          || n->content == "in"
-          || n->content == "limit"
-          || n->content == "return"
-          || n->content == "delete"
-          || n->content == "while"
-          || n->content == "break"
-          || n->content == "continue"
-          || n->content == "next"
-          || n->content == "string"
-          || n->content == "long")
+      if (keywords.count(n->content))
         n->type = tok_keyword;
 
       return n;
@@ -767,23 +774,15 @@ lexer::scan (bool wildcard)
       n->type = tok_number;
       n->content = (char) c;
 
-      while (1)
+      while (isalnum (c2))
 	{
-	  int c2 = input_peek ();
-	  if (c2 < 0)
-	    break;
-
           // NB: isalnum is very permissive.  We rely on strtol, called in
           // parser::parse_literal below, to confirm that the number string
           // is correctly formatted and in range.
 
-	  if (isalnum (c2))
-	    {
-	      n->content.push_back (c2);
-	      input_get ();
-	    }
-	  else
-	    break;
+          input_get ();
+          n->content.push_back (c2);
+          c2 = input_peek ();
 	}
       return n;
     }
@@ -835,25 +834,21 @@ lexer::scan (bool wildcard)
 
   else if (ispunct (c))
     {
-      int c2 = input_peek ();
       int c3 = input_peek (1);
-      string s1 = string("") + (char) c;
-      string s2 = (c2 > 0 ? s1 + (char) c2 : s1);
-      string s3 = (c3 > 0 ? s2 + (char) c3 : s2);
 
       // NB: if we were to recognize negative numeric literals here,
       // we'd introduce another grammar ambiguity:
       // 1-1 would be parsed as tok_number(1) and tok_number(-1)
       // instead of tok_number(1) tok_operator('-') tok_number(1)
 
-      if (s1 == "#") // shell comment
+      if (c == '#') // shell comment
         {
           unsigned this_line = cursor_line;
           do { c = input_get (); }
           while (c >= 0 && cursor_line == this_line);
           goto skip;
         }
-      else if (s2 == "//") // C++ comment
+      else if ((c == '/' && c2 == '/')) // C++ comment
         {
           unsigned this_line = cursor_line;
           do { c = input_get (); }
@@ -862,15 +857,15 @@ lexer::scan (bool wildcard)
         }
       else if (c == '/' && c2 == '*') // C comment
 	{
+          (void) input_get (); // swallow '*' already in c2
+          c = input_get ();
           c2 = input_get ();
-          unsigned chars = 0;
           while (c2 >= 0)
             {
-              chars ++; // track this to prevent "/*/" from being accepted
+              if (c == '*' && c2 == '/')
+                break;
               c = c2;
               c2 = input_get ();
-              if (chars > 1 && c == '*' && c2 == '/')
-                break;
             }
           goto skip;
 	}
@@ -878,73 +873,63 @@ lexer::scan (bool wildcard)
         {
           n->type = tok_embedded;
           (void) input_get (); // swallow '{' already in c2
-          while (true)
+          c = input_get ();
+          c2 = input_get ();
+          while (c2 >= 0)
             {
-              c = input_get ();
-              if (c < 0) // EOF
-                {
-                  n->type = tok_junk;
-                  break;
-                }
-              if (c == '%')
-                {
-                  c2 = input_peek ();
-                  if (c2 == '}')
-                    {
-                      (void) input_get (); // swallow '}' too
-                      break;
-                    }
-                }
+              if (c == '%' && c2 == '}')
+                return n;
               n->content += c;
+              c = c2;
+              c2 = input_get ();
             }
+          n->type = tok_junk;
           return n;
         }
 
       // We're committed to recognizing at least the first character
       // as an operator.
       n->type = tok_operator;
+      n->content = c;
 
       // match all valid operators, in decreasing size order
-      if (s3 == "<<<" ||
-          s3 == "<<=" ||
-          s3 == ">>=")
+      if ((c == '<' && c2 == '<' && c3 == '<') ||
+          (c == '<' && c2 == '<' && c3 == '=') ||
+          (c == '>' && c2 == '>' && c3 == '='))
         {
-          n->content = s3;
+          n->content += c2;
+          n->content += c3;
           input_get (); input_get (); // swallow other two characters
         }
-      else if (s2 == "==" ||
-               s2 == "!=" ||
-               s2 == "<=" ||
-               s2 == ">=" ||
-               s2 == "+=" ||
-               s2 == "-=" ||
-               s2 == "*=" ||
-               s2 == "/=" ||
-               s2 == "%=" ||
-               s2 == "&=" ||
-               s2 == "^=" ||
-               s2 == "|=" ||
-               s2 == ".=" ||
-               s2 == "&&" ||
-               s2 == "||" ||
-               s2 == "++" ||
-               s2 == "--" ||
-               s2 == "->" ||
-               s2 == "<<" ||
-               s2 == ">>" ||
+      else if ((c == '=' && c2 == '=') ||
+               (c == '!' && c2 == '=') ||
+               (c == '<' && c2 == '=') ||
+               (c == '>' && c2 == '=') ||
+               (c == '+' && c2 == '=') ||
+               (c == '-' && c2 == '=') ||
+               (c == '*' && c2 == '=') ||
+               (c == '/' && c2 == '=') ||
+               (c == '%' && c2 == '=') ||
+               (c == '&' && c2 == '=') ||
+               (c == '^' && c2 == '=') ||
+               (c == '|' && c2 == '=') ||
+               (c == '.' && c2 == '=') ||
+               (c == '&' && c2 == '&') ||
+               (c == '|' && c2 == '|') ||
+               (c == '+' && c2 == '+') ||
+               (c == '-' && c2 == '-') ||
+               (c == '-' && c2 == '>') ||
+               (c == '<' && c2 == '<') ||
+               (c == '>' && c2 == '>') ||
                // preprocessor tokens
-               s2 == "%(" ||
-               s2 == "%?" ||
-               s2 == "%:" ||
-               s2 == "%)")
+               (c == '%' && c2 == '(') ||
+               (c == '%' && c2 == '?') ||
+               (c == '%' && c2 == ':') ||
+               (c == '%' && c2 == ')'))
         {
-          n->content = s2;
+          n->content += c2;
           input_get (); // swallow other character
         }
-      else
-        {
-          n->content = s1;
-        }
 
       return n;
     }
@@ -965,8 +950,6 @@ parser::parse ()
 {
   stapfile* f = new stapfile;
   input.set_current_file (f);
-  f->file_contents = input.get_input_contents ();
-  f->name = input_name;
 
   bool empty = true;
 
@@ -1034,18 +1017,16 @@ parser::parse ()
     {
       cerr << "Input file '" << input_name << "' is empty or missing." << endl;
       delete f;
-      input.set_current_file (0);
-      return 0;
+      f = 0;
     }
   else if (num_errors > 0)
     {
       cerr << num_errors << " parse error(s)." << endl;
       delete f;
-      input.set_current_file (0);
-      return 0;
+      f = 0;
     }
 
-  input.set_current_file (0);
+  input.set_current_file(0);
   return f;
 }
author	Dave Brolley <brolley@redhat.com>	2009-07-10 11:10:51 -0400
committer	Dave Brolley <brolley@redhat.com>	2009-07-10 11:10:51 -0400
commit	1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f (patch)
tree	a0c10b78ad3e7142a59ffd6f6e75b75abf90d88e /parse.cxx
parent	7d54db1a2c0b3831b6fbc8282f1155426c4be540 (diff)
parent	c728b7da8be430367aa33f9fbacda93d4add9ea2 (diff)
download	systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.tar.gz systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.tar.xz systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.zip