2005-06-08 Frank Ch. Eigler <fche@redhat.com>

systemtap/916 Implement all basic scalar operators, including modify-assignment. * parse.cxx (lexer): Allow multi-character lookahead in order to scan 1/2/3-character operators. (parse_boolean_or/and/xor/shift): New routines. * translate.cxx (visit_assignment, visit_binary_expression, visit_*_crement): Generally rewrote. (visit_*): Added more parentheses in output. (emit_module_init): Initialize globals. * staptree.h, elaborate.cxx, elaborate.h: Remove exponentiation. * main.cxx (main): Add an end-of-line to output file. * testsuite/*: Several new tests.
author: fche <fche> 2005-06-08 22:02:09 +0000
committer: fche <fche> 2005-06-08 22:02:09 +0000
commit: bb2e3076ea20631d4606050550bc9664204f2c62 (patch)
tree: ef9cfb841ddb001c1f3aa266523f1ff6f56b21b5 /parse.cxx
parent: 22f4623195facb4cbc1b50c45c0bd689f6958a9d (diff)
download: systemtap-steved-bb2e3076ea20631d4606050550bc9664204f2c62.tar.gz
systemtap-steved-bb2e3076ea20631d4606050550bc9664204f2c62.tar.xz
systemtap-steved-bb2e3076ea20631d4606050550bc9664204f2c62.zip
1 files changed, 180 insertions, 78 deletions
diff --git a/parse.cxx b/parse.cxx
index d1c0b954..d6908ed0 100644
--- a/parse.cxx
+++ b/parse.cxx
@@ -20,6 +20,8 @@ using namespace std;
 
 // ------------------------------------------------------------------------
 
+
+
 parser::parser (istream& i):
   input_name ("<input>"), free_input (0), input (i, input_name),
   last_t (0), next_t (0), num_errors (0)
@@ -138,14 +140,27 @@ lexer::lexer (istream& i, const string& in):
   input (i), input_name (in), cursor_line (1), cursor_column (1)
 { }
 
+
+int
+lexer::input_peek (unsigned n)
+{
+  while (lookahead.size() <= n)
+    {
+      int c = input.get ();
+      lookahead.push_back (input ? c : -1);
+    }
+  return lookahead[n];
+}
+
+
 int 
 lexer::input_get ()
 {
-  int c = input.get();
-  
-  if (! input)
-    return -1;
-  
+  int c = input_peek (0);
+  lookahead.erase (lookahead.begin ());
+
+  if (c < 0) return c; // EOF
+
   // update source cursor
   if (c == '\n')
     {
@@ -185,7 +200,7 @@ lexer::scan ()
       n->content = (char) c;
       while (1)
 	{
-	  int c2 = input.peek ();
+	  int c2 = input_peek ();
 	  if (! input)
 	    break;
 	  if ((isalnum(c2) || c2 == '_' || c2 == '$'))
@@ -206,7 +221,7 @@ lexer::scan ()
 
       while (1)
 	{
-	  int c2 = input.peek ();
+	  int c2 = input_peek ();
 	  if (! input)
 	    break;
 
@@ -251,68 +266,79 @@ lexer::scan ()
 
   else if (ispunct (c))
     {
-      int c2 = input.peek ();
+      int c2 = input_peek ();
+      int c3 = input_peek (1);
+      string s1 = string("") + (char) c;
+      string s2 = (c2 > 0 ? s1 + (char) c2 : s1);
+      string s3 = (c3 > 0 ? s2 + (char) c3 : s2);
 
-      if (c == '#') // shell comment
+      if (s1 == "#") // shell comment
         {
           unsigned this_line = cursor_line;
-          while (input && cursor_line == this_line)
-            input_get ();
+          do { c = input_get (); }
+          while (c >= 0 && cursor_line == this_line);
           goto skip;
         }
-      else if (c == '/' && c2 == '/') // C++ comment
+      else if (s2 == "//") // C++ comment
         {
           unsigned this_line = cursor_line;
-          while (input && cursor_line == this_line)
-            input_get ();
+          do { c = input_get (); }
+          while (c >= 0 && cursor_line == this_line);
           goto skip;
         }
       else if (c == '/' && c2 == '*') // C comment
 	{
           c2 = input_get ();
           unsigned chars = 0;
-          while (input)
+          while (c2 >= 0)
             {
               chars ++; // track this to prevent "/*/" from being accepted
               c = c2;
               c2 = input_get ();
               if (chars > 1 && c == '*' && c2 == '/')
-                goto skip;
+                break;
             }
+          goto skip;
 	}
 
+      // We're committed to recognizing at least the first character
+      // as an operator.
       n->type = tok_operator;
-      n->content = (char) c;
 
-      // handle two-character operators
-      if ((c == '=' && c2 == '=') ||
-          (c == '!' && c2 == '=') ||
-          (c == '<' && c2 == '=') ||
-          (c == '>' && c2 == '=') ||
-          (c == '+' && c2 == '+') ||
-          (c == '-' && c2 == '-') ||
-          (c == '|' && c2 == '|') ||
-          (c == '&' && c2 == '&') ||
-          // (c == '<' && c2 == '<') ||
-          // (c == '>' && c2 == '>') ||
-          (c == '+' && c2 == '=') ||
-          (c == '-' && c2 == '=') ||
-          (c == '-' && c2 == '>') ||
-	  false) // XXX: etc.
-        n->content.push_back ((char) input_get ());
-
-      // handle three-character operator
-      if (c == '<' && c2 == '<')
+      // match all valid operators, in decreasing size order
+      if (s3 == "<<<" ||
+          s3 == "<<=" ||
+          s3 == ">>=")
         {
-          input_get (); // swallow c2
-          int c3 = input.peek ();
-          if (c3 == '<')
-            {
-              input_get (); // swallow c3
-              n->content = "<<<";
-            }
-          else
-            n->content = "<<";
+          n->content = s3;
+          input_get (); input_get (); // swallow other two characters
+        }
+      else if (s2 == "==" ||
+               s2 == "!=" ||
+               s2 == "<=" ||
+               s2 == ">=" ||
+               s2 == "+=" ||
+               s2 == "-=" ||
+               s2 == "*=" ||
+               s2 == "/=" ||
+               s2 == "%=" ||
+               s2 == "&=" ||
+               s2 == "^=" ||
+               s2 == "|=" ||
+               s2 == "&&" ||
+               s2 == "||" ||
+               s2 == "++" ||
+               s2 == "--" ||
+               s2 == "->" ||
+               s2 == "<<" ||
+               s2 == ">>")
+        {
+          n->content = s2;
+          input_get (); // swallow other character
+        }   
+      else
+        {
+          n->content = s1;
         }
 
       return n;
@@ -990,9 +1016,18 @@ parser::parse_assignment ()
       && (t->content == "=" ||
 	  t->content == "<<<" ||
 	  t->content == "+=" ||
-	  false)) // XXX: add /= etc.
+	  t->content == "-=" ||
+	  t->content == "*=" ||
+	  t->content == "/=" ||
+	  t->content == "%=" ||
+	  t->content == "<<=" ||
+	  t->content == ">>=" ||
+	  t->content == "&=" ||
+	  t->content == "^=" ||
+	  t->content == "|=" ||
+	  false)) 
     {
-      // NB: lvalueness is checked during translation / elaboration
+      // NB: lvalueness is checked during elaboration / translation
       assignment* e = new assignment;
       e->left = op1;
       e->op = t->content;
@@ -1000,8 +1035,6 @@ parser::parse_assignment ()
       next ();
       e->right = parse_expression ();
       op1 = e;
-      // XXX: map assign/accumulate operators like +=, /=
-      // to ordinary assignment + nested binary_expression
     }
 
   return op1;
@@ -1059,7 +1092,7 @@ parser::parse_logical_or ()
 expression*
 parser::parse_logical_and ()
 {
-  expression* op1 = parse_array_in ();
+  expression* op1 = parse_boolean_or ();
 
   const token* t = peek ();
   while (t && t->type == tok_operator && t->content == "&&")
@@ -1069,6 +1102,72 @@ parser::parse_logical_and ()
       e->op = t->content;
       e->tok = t;
       next ();
+      e->right = parse_boolean_or ();
+      op1 = e;
+      t = peek ();
+    }
+
+  return op1;
+}
+
+
+expression*
+parser::parse_boolean_or ()
+{
+  expression* op1 = parse_boolean_xor ();
+
+  const token* t = peek ();
+  while (t && t->type == tok_operator && t->content == "|")
+    {
+      binary_expression* e = new binary_expression;
+      e->left = op1;
+      e->op = t->content;
+      e->tok = t;
+      next ();
+      e->right = parse_boolean_xor ();
+      op1 = e;
+      t = peek ();
+    }
+
+  return op1;
+}
+
+
+expression*
+parser::parse_boolean_xor ()
+{
+  expression* op1 = parse_boolean_and ();
+
+  const token* t = peek ();
+  while (t && t->type == tok_operator && t->content == "^")
+    {
+      binary_expression* e = new binary_expression;
+      e->left = op1;
+      e->op = t->content;
+      e->tok = t;
+      next ();
+      e->right = parse_boolean_and ();
+      op1 = e;
+      t = peek ();
+    }
+
+  return op1;
+}
+
+
+expression*
+parser::parse_boolean_and ()
+{
+  expression* op1 = parse_array_in ();
+
+  const token* t = peek ();
+  while (t && t->type == tok_operator && t->content == "&")
+    {
+      binary_expression* e = new binary_expression;
+      e->left = op1;
+      e->op = t->content;
+      e->tok = t;
+      next ();
       e->right = parse_array_in ();
       op1 = e;
       t = peek ();
@@ -1147,7 +1246,7 @@ parser::parse_array_in ()
 expression*
 parser::parse_comparison ()
 {
-  expression* op1 = parse_concatenation ();
+  expression* op1 = parse_shift ();
 
   const token* t = peek ();
   while (t && t->type == tok_operator 
@@ -1156,14 +1255,36 @@ parser::parse_comparison ()
           t->content == "==" ||
           t->content == "!=" ||
           t->content == "<=" ||
-          t->content == ">=" ||
-          false )) // xxx: more
+          t->content == ">="))
     {
       comparison* e = new comparison;
       e->left = op1;
       e->op = t->content;
       e->tok = t;
       next ();
+      e->right = parse_shift ();
+      op1 = e;
+      t = peek ();
+    }
+
+  return op1;
+}
+
+
+expression*
+parser::parse_shift ()
+{
+  expression* op1 = parse_concatenation ();
+
+  const token* t = peek ();
+  while (t && t->type == tok_operator && 
+         (t->content == "<<" || t->content == ">>"))
+    {
+      binary_expression* e = new binary_expression;
+      e->left = op1;
+      e->op = t->content;
+      e->tok = t;
+      next ();
       e->right = parse_concatenation ();
       op1 = e;
       t = peek ();
@@ -1248,7 +1369,11 @@ parser::parse_unary ()
 {
   const token* t = peek ();
   if (t && t->type == tok_operator 
-      && (t->content == "+" || t->content == "-" || t->content == "!"))
+      && (t->content == "+" || 
+          t->content == "-" || 
+          t->content == "!" ||
+          t->content == "~" ||
+          false))
     {
       unary_expression* e = new unary_expression;
       e->op = t->content;
@@ -1258,30 +1383,7 @@ parser::parse_unary ()
       return e;
     }
   else
-    return parse_exponentiation ();
-}
-
-
-expression*
-parser::parse_exponentiation ()
-{
-  expression* op1 = parse_crement ();
-
-  const token* t = peek ();
-  // right associative: no loop
-  if (t && t->type == tok_operator 
-      && (t->content == "^" || t->content == "**"))
-    {
-      exponentiation* e = new exponentiation;
-      e->op = t->content;
-      e->left = op1;
-      e->tok = t;
-      next ();
-      e->right = parse_expression ();
-      op1 = e;
-    }
-
-  return op1;
+    return parse_crement ();
 }
author	fche <fche>	2005-06-08 22:02:09 +0000
committer	fche <fche>	2005-06-08 22:02:09 +0000
commit	bb2e3076ea20631d4606050550bc9664204f2c62 (patch)
tree	ef9cfb841ddb001c1f3aa266523f1ff6f56b21b5 /parse.cxx
parent	22f4623195facb4cbc1b50c45c0bd689f6958a9d (diff)
download	systemtap-steved-bb2e3076ea20631d4606050550bc9664204f2c62.tar.gz systemtap-steved-bb2e3076ea20631d4606050550bc9664204f2c62.tar.xz systemtap-steved-bb2e3076ea20631d4606050550bc9664204f2c62.zip