summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorfche <fche>2007-08-08 03:36:25 +0000
committerfche <fche>2007-08-08 03:36:25 +0000
commit3f99432cc11977a4345c881bed3aa60a1a614238 (patch)
tree70dc99d601a65cec051658402531ba39fa2b2dad
parent98be953834c60aaa2ae7504890f1cf11815e558a (diff)
downloadsystemtap-steved-3f99432cc11977a4345c881bed3aa60a1a614238.tar.gz
systemtap-steved-3f99432cc11977a4345c881bed3aa60a1a614238.tar.xz
systemtap-steved-3f99432cc11977a4345c881bed3aa60a1a614238.zip
2007-08-07 Frank Ch. Eigler <fche@redhat.com>
PR 4846 * parse.cxx (input_put): New function, sort of like stdio ungetc. (input_get): Skip cursor position changing for input_put strings. (scan): Rework $.../@... substitution into character pasting. * parse.h: Corresponding changes. * util.h (lex_cast_qstring): Octal-quote unprintable characters. * stap.1.in, NEWS: Document new behaviour. 2007-08-07 Frank Ch. Eigler <fche@redhat.com> PR 4846 * parseko/preprocess13.stp, parseok/nineteen.stp, semok/twentyfive.stp: New tests.
-rw-r--r--ChangeLog10
-rw-r--r--NEWS16
-rw-r--r--parse.cxx192
-rw-r--r--parse.h3
-rw-r--r--stap.1.in14
-rw-r--r--testsuite/ChangeLog6
-rwxr-xr-xtestsuite/parseko/preprocess13.stp4
-rwxr-xr-xtestsuite/parseok/nineteen.stp3
-rwxr-xr-xtestsuite/semok/twentyfive.stp8
-rw-r--r--util.h21
10 files changed, 185 insertions, 92 deletions
diff --git a/ChangeLog b/ChangeLog
index e0020586..afbb1b74 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2007-08-07 Frank Ch. Eigler <fche@redhat.com>
+
+ PR 4846
+ * parse.cxx (input_put): New function, sort of like stdio ungetc.
+ (input_get): Skip cursor position changing for input_put strings.
+ (scan): Rework $.../@... substitution into character pasting.
+ * parse.h: Corresponding changes.
+ * util.h (lex_cast_qstring): Octal-quote unprintable characters.
+ * stap.1.in, NEWS: Document new behaviour.
+
2007-07-26 David Smith <dsmith@redhat.com>
PR 4295
diff --git a/NEWS b/NEWS
index 6762901f..77597c9b 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,21 @@
* What's new since version 0.5.14?
+- The way in which command line arguments for scripts are substituted has
+ changed. Previously, $1 etc. would interpret the corresponding command
+ line argument as an numeric literal, and @1 as a string literal. Now,
+ the command line arguments are pasted uninterpreted wherever $1 etc.
+ appears at the beginning of a token. @1 is similar, but is quoted as
+ a string. This change does not modify old scripts, but has the effect
+ of permitting substitution of arbitrary token sequences.
+
+ # This worked before, and still does:
+ % stap -e 'probe timer.s($1) {}' 5
+ # Now this also works:
+ % stap -e 'probe syscall.$1 {log(@1)}' open
+ # This won't crash, just signal a recursion error:
+ % stap -e '$1' '$1'
+ # As before, $1... is recognized only at the beginning of a token
+ % stap -e 'probe begin {foo$1=5}'
* What's new since version 0.5.13?
diff --git a/parse.cxx b/parse.cxx
index de6d1385..61d0352a 100644
--- a/parse.cxx
+++ b/parse.cxx
@@ -12,6 +12,8 @@
#include "staptree.h"
#include "parse.h"
#include "session.h"
+#include "util.h"
+
#include <iostream>
#include <fstream>
#include <cctype>
@@ -21,6 +23,7 @@
#include <climits>
#include <sstream>
#include <cstring>
+#include <cctype>
using namespace std;
@@ -488,7 +491,8 @@ parser::peek_kw (std::string const & kw)
lexer::lexer (istream& i, const string& in, systemtap_session& s):
- input (i), input_name (in), cursor_line (1), cursor_column (1), session(s)
+ input (i), input_name (in), cursor_suspend_count(0),
+ cursor_line (1), cursor_column (1), session(s)
{ }
@@ -512,30 +516,61 @@ lexer::input_get ()
if (c < 0) return c; // EOF
- // update source cursor
- if (c == '\n')
+ if (cursor_suspend_count)
+ // Track effect of input_put: preserve previous cursor/line_column
+ // until all of its characters are consumed.
+ cursor_suspend_count --;
+ else
{
- cursor_line ++;
- cursor_column = 1;
+ // update source cursor
+ if (c == '\n')
+ {
+ cursor_line ++;
+ cursor_column = 1;
+ }
+ else
+ cursor_column ++;
}
- else
- cursor_column ++;
return c;
}
+void
+lexer::input_put (const string& chars)
+{
+ // clog << "[put:" << chars << "]";
+ for (int i=chars.size()-1; i>=0; i--)
+ {
+ int c = chars[i];
+ lookahead.insert (lookahead.begin(), c);
+ cursor_suspend_count ++;
+ }
+}
+
+
token*
lexer::scan (bool expand_args)
{
token* n = new token;
n->location.file = input_name;
+ unsigned semiskipped_p = 0;
+
skip:
n->location.line = cursor_line;
n->location.column = cursor_column;
+ semiskip:
+ if (semiskipped_p > 1)
+ {
+ input_get ();
+ throw parse_error ("invalid nested substitution of command line arguments");
+ }
+
int c = input_get();
+ int c2 = input_peek ();
+ // clog << "{" << (char)c << (char)c2 << "}";
if (c < 0)
{
delete n;
@@ -545,81 +580,77 @@ lexer::scan (bool expand_args)
if (isspace (c))
goto skip;
+ // Paste command line arguments as character streams into
+ // the beginning of a token. $1..$999 go through as raw
+ // characters; @1..@999 are quoted/escaped as strings.
+ // $# and @# expand to the number of arguments, similarly
+ // raw or quoted.
+ if (expand_args &&
+ (c == '$' || c == '@') &&
+ (c2 == '#'))
+ {
+ input_get(); // swallow '#'
+ stringstream converter;
+ converter << session.args.size ();
+ if (c == '$') input_put (converter.str());
+ else input_put (lex_cast_qstring (converter.str()));
+ semiskipped_p ++;
+ goto semiskip;
+ }
+ else if (expand_args &&
+ (c == '$' || c == '@') &&
+ (isdigit (c2)))
+ {
+ unsigned idx = 0;
+ do
+ {
+ input_get ();
+ idx = (idx * 10) + (c2 - '0');
+ c2 = input_peek ();
+ } while (c2 > 0 &&
+ isdigit (c2) &&
+ idx <= session.args.size()); // prevent overflow
+ if (idx == 0 ||
+ idx-1 >= session.args.size())
+ throw parse_error ("command line argument index invalid or out of range", n);
+
+ string arg = session.args[idx-1];
+ if (c == '$') input_put (arg);
+ else input_put (lex_cast_qstring (arg));
+ semiskipped_p ++;
+ goto semiskip;
+ }
+
else if (isalpha (c) || c == '$' || c == '@' || c == '_')
{
n->type = tok_identifier;
n->content = (char) c;
- while (1)
+ while (isalnum (c2) || c2 == '_' || c2 == '$')
{
- int c2 = input_peek ();
- if (! input)
- break;
- if ((isalnum(c2) || c2 == '_' || c2 == '$' || c2 == '#' ))
- {
- n->content.push_back(c2);
- input_get ();
- }
- else
- break;
- }
-
- // Expand command line arguments to literals. $1 .. $999 as
- // numbers and @1 .. @999 as strings.
- if (n->content[0] == '@' || n->content[0] == '$')
- {
- if (!expand_args)
- return n;
- if (n->content[1] == '#')
- {
- stringstream converter;
- converter << session.args.size ();
- n->type = (n->content[0] == '@') ? tok_string : tok_number;
- n->content = converter.str();
- }
- else
- {
- string idxstr = n->content.substr(1);
- const char* startp = idxstr.c_str();
- char *endp;
- errno = 0;
- unsigned long idx = strtoul (startp, &endp, 10);
- if (endp == startp)
- ; // no numbers at all - leave alone as identifier
- else
- {
- // Use @1/$1 as the base, not @0/$0. Thus the idx-1.
- if (errno == ERANGE || errno == EINVAL || *endp != '\0' ||
- idx == 0 || idx-1 >= session.args.size ())
- throw parse_error ("command line argument index invalid or out of range", n);
-
- string arg = session.args[idx-1];
- n->type = (n->content[0] == '@') ? tok_string : tok_number;
- n->content = arg;
- }
- }
- }
- else
- {
- if (n->content == "probe"
- || n->content == "global"
- || n->content == "function"
- || n->content == "if"
- || n->content == "else"
- || n->content == "for"
- || n->content == "foreach"
- || n->content == "in"
- || n->content == "limit"
- || n->content == "return"
- || n->content == "delete"
- || n->content == "while"
- || n->content == "break"
- || n->content == "continue"
- || n->content == "next"
- || n->content == "string"
- || n->content == "long")
- n->type = tok_keyword;
+ input_get ();
+ n->content.push_back (c2);
+ c2 = input_peek ();
}
+ if (n->content == "probe"
+ || n->content == "global"
+ || n->content == "function"
+ || n->content == "if"
+ || n->content == "else"
+ || n->content == "for"
+ || n->content == "foreach"
+ || n->content == "in"
+ || n->content == "limit"
+ || n->content == "return"
+ || n->content == "delete"
+ || n->content == "while"
+ || n->content == "break"
+ || n->content == "continue"
+ || n->content == "next"
+ || n->content == "string"
+ || n->content == "long")
+ n->type = tok_keyword;
+
return n;
}
@@ -631,7 +662,7 @@ lexer::scan (bool expand_args)
while (1)
{
int c2 = input_peek ();
- if (! input)
+ if (c2 < 0)
break;
// NB: isalnum is very permissive. We rely on strtol, called in
@@ -656,14 +687,14 @@ lexer::scan (bool expand_args)
{
c = input_get ();
- if (! input || c == '\n')
+ if (c < 0 || c == '\n')
{
n->type = tok_junk;
break;
}
if (c == '\"') // closing double-quotes
break;
- else if (c == '\\')
+ else if (c == '\\') // see also input_put
{
c = input_get ();
switch (c)
@@ -677,14 +708,13 @@ lexer::scan (bool expand_args)
case 'r':
case '0' ... '7': // NB: need only match the first digit
case '\\':
-
// Pass these escapes through to the string value
- // beign parsed; it will be emitted into a C literal.
+ // being parsed; it will be emitted into a C literal.
n->content.push_back('\\');
+ // fall through
default:
-
n->content.push_back(c);
break;
}
diff --git a/parse.h b/parse.h
index ba90374d..c81559dd 100644
--- a/parse.h
+++ b/parse.h
@@ -74,10 +74,13 @@ public:
private:
int input_get ();
+ void input_put (int);
+ void input_put (const std::string&);
int input_peek (unsigned n=0);
std::istream& input;
std::string input_name;
std::vector<int> lookahead;
+ unsigned cursor_suspend_count;
unsigned cursor_line;
unsigned cursor_column;
systemtap_session& session;
diff --git a/stap.1.in b/stap.1.in
index a35a6ca7..a68a030b 100644
--- a/stap.1.in
+++ b/stap.1.in
@@ -168,7 +168,7 @@ precedence.
Whitespace is ignored. Three forms of comments are supported:
.RS
.br
-.BR # " ... shell style, to the end of line"
+.BR # " ... shell style, to the end of line, except for $# and @#"
.br
.BR // " ... C++ style, to the end of line"
.br
@@ -182,17 +182,17 @@ Integers are 64-bit signed quantities, although the parser also accepts
(and wraps around) values above positive 2**63.
.PP
In addition, script arguments given at the end of the command line may
-be expanded as literals. Use
+be inserted. Use
.B $1 ... $<NN>
-for casting as a numeric literal and
+for insertion unquoted,
.B @1 ... @<NN>
-for casting as string literal. The number of arguments may be accessed
+for insertion as a string literal. The number of arguments may be accessed
through
.B $#
-(as a numeric literal) or through
+(as an unquoted number) or through
.B @#
-(as a string literal). These may be used in all contexts where literals
-are accepted, including preprocessing stage. Reference to an argument
+(as a quoted number). These may be used at any place a token may begin,
+including within the preprocessing stage. Reference to an argument
number beyond what was actually given is an error.
.SS PREPROCESSING
diff --git a/testsuite/ChangeLog b/testsuite/ChangeLog
index 8a95ff1e..ee107c22 100644
--- a/testsuite/ChangeLog
+++ b/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2007-08-07 Frank Ch. Eigler <fche@redhat.com>
+
+ PR 4846
+ * parseko/preprocess13.stp, parseok/nineteen.stp,
+ semok/twentyfive.stp: New tests.
+
2007-08-07 David Smith <dsmith@redhat.com>
PR 4736 (partial fix)
diff --git a/testsuite/parseko/preprocess13.stp b/testsuite/parseko/preprocess13.stp
new file mode 100755
index 00000000..9ef34f59
--- /dev/null
+++ b/testsuite/parseko/preprocess13.stp
@@ -0,0 +1,4 @@
+#! /bin/sh
+
+# recursive
+stap -p1 -e '$1' '$1'
diff --git a/testsuite/parseok/nineteen.stp b/testsuite/parseok/nineteen.stp
new file mode 100755
index 00000000..1d36b590
--- /dev/null
+++ b/testsuite/parseok/nineteen.stp
@@ -0,0 +1,3 @@
+#! /bin/sh
+
+stap -p1 -e '$1 {log (@1.@2)}' 'probe foo' ' ' # <- control codes
diff --git a/testsuite/semok/twentyfive.stp b/testsuite/semok/twentyfive.stp
new file mode 100755
index 00000000..fadca484
--- /dev/null
+++ b/testsuite/semok/twentyfive.stp
@@ -0,0 +1,8 @@
+#! /bin/sh
+
+set -e
+stap -p2 -e 'probe begin {foo$1$2$3}' # $-expansion only at token head
+stap -p2 -e 'probe $1 $2' 'syscall.open,begin' '{log ("hello\n")}'
+stap -p2 -e 'probe begin {@1 @2}' 'syscall.open,begin' '{log ("hello\n")}'
+stap -p2 -e 'probe begin {log (@1.@2)}' 'syscall.open,begin' '{log ("hello\n")}'
+
diff --git a/util.h b/util.h
index a076a1a3..f9c298ec 100644
--- a/util.h
+++ b/util.h
@@ -3,7 +3,7 @@
#include <iostream>
#include <sstream>
#include <stdexcept>
-
+#include <cctype>
const char *get_home_directory(void);
int copy_file(const char *src, const char *dest);
@@ -65,9 +65,22 @@ lex_cast_qstring(IN const & in)
out2 += '"';
for (unsigned i=0; i<out.length(); i++)
{
- if (out[i] == '"' || out[i] == '\\') // XXX others?
- out2 += '\\';
- out2 += out[i];
+ char c = out[i];
+ if (! isprint(c))
+ {
+ out2 += '\\';
+ // quick & dirty octal converter
+ out2 += "01234567" [(c >> 6) & 0x07];
+ out2 += "01234567" [(c >> 3) & 0x07];
+ out2 += "01234567" [(c >> 0) & 0x07];
+ }
+ else if (c == '"' || c == '\\')
+ {
+ out2 += '\\';
+ out2 += c;
+ }
+ else
+ out2 += c;
}
out2 += '"';
return out2;